head 1.2; access; symbols pkgsrc-2019Q2:1.1.0.4 pkgsrc-2019Q2-base:1.1 pkgsrc-2019Q1:1.1.0.2 pkgsrc-2019Q1-base:1.1; locks; strict; comment @# @; 1.2 date 2019.08.30.13.16.27; author bouyer; state dead; branches; next 1.1; commitid Bhbqj9CPVWgYm3BB; 1.1 date 2019.03.07.11.13.27; author bouyer; state Exp; branches; next ; commitid Gzute5jK7xPyjqeB; desc @@ 1.2 log @Upgrade Xen 4.11 packages to 4.11.2. CHANGES since 4.11.1: - include security patches up to and including XSA297 - various performances improvements, code cleanup and bug fixes @ text @$NetBSD: patch-XSA290-1,v 1.1 2019/03/07 11:13:27 bouyer Exp $ From: Jan Beulich Subject: x86/mm: also allow L2 (un)validation to be preemptible Commit c612481d1c ("x86/mm: Plumbing to allow any PTE update to fail with -ERESTART") added assertions next to the {alloc,free}_l2_table() invocations to document (and validate in debug builds) that L2 (un)validations are always preemptible. The assertion in free_page_type() was now observed to trigger when recursive L2 page tables get cleaned up. In particular put_page_from_l2e()'s assumption that _put_page_type() would always succeed is now wrong, resulting in a partially un-validated page left in a domain, which has no other means of getting cleaned up later on. If not causing any problems earlier, this would ultimately trigger the check for ->u.inuse.type_info having a zero count when freeing the page during cleanup after the domain has died. As a result it should be considered a mistake to not have extended preemption fully to L2 when it was added to L3/L4 table handling, which this change aims to correct. The validation side additions are done just for symmetry. This is part of XSA-290. Reported-by: Manuel Bouyer Tested-by: Manuel Bouyer Signed-off-by: Jan Beulich Reviewed-by: Andrew Cooper --- xen/arch/x86/mm.c.orig +++ xen/arch/x86/mm.c @@@@ -1126,7 +1126,7 @@@@ get_page_from_l1e( define_get_linear_pagetable(l2); static int get_page_from_l2e( - l2_pgentry_t l2e, unsigned long pfn, struct domain *d) + l2_pgentry_t l2e, unsigned long pfn, struct domain *d, int partial) { unsigned long mfn = l2e_get_pfn(l2e); int rc; @@@@ -1141,7 +1141,8 @@@@ get_page_from_l2e( return -EINVAL; } - rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d, 0, 0); + rc = get_page_and_type_from_mfn(_mfn(mfn), PGT_l1_page_table, d, + partial, false); if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) rc = 0; @@@@ -1295,8 +1296,11 @@@@ void put_page_from_l1e(l1_pgentry_t l1e, * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. * Note also that this automatically deals correctly with linear p.t.'s. */ -static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) +static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn, + int partial, bool defer) { + int rc = 0; + if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) ) return 1; @@@@ -1311,13 +1315,27 @@@@ static int put_page_from_l2e(l2_pgentry_ else { struct page_info *pg = l2e_get_page(l2e); - int rc = _put_page_type(pg, false, mfn_to_page(_mfn(pfn))); + struct page_info *ptpg = mfn_to_page(_mfn(pfn)); - ASSERT(!rc); - put_page(pg); + if ( unlikely(partial > 0) ) + { + ASSERT(!defer); + rc = _put_page_type(pg, true, ptpg); + } + else if ( defer ) + { + current->arch.old_guest_ptpg = ptpg; + current->arch.old_guest_table = pg; + } + else + { + rc = _put_page_type(pg, true, ptpg); + if ( likely(!rc) ) + put_page(pg); + } } - return 0; + return rc; } static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, @@@@ -1487,11 +1505,12 @@@@ static int alloc_l2_table(struct page_in unsigned long pfn = mfn_x(page_to_mfn(page)); l2_pgentry_t *pl2e; unsigned int i; - int rc = 0; + int rc = 0, partial = page->partial_pte; pl2e = map_domain_page(_mfn(pfn)); - for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ ) + for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; + i++, partial = 0 ) { if ( i > page->nr_validated_ptes && hypercall_preempt_check() ) { @@@@ -1501,23 +1520,33 @@@@ static int alloc_l2_table(struct page_in } if ( !is_guest_l2_slot(d, type, i) || - (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 ) + (rc = get_page_from_l2e(pl2e[i], pfn, d, partial)) > 0 ) continue; - if ( unlikely(rc == -ERESTART) ) + if ( rc == -ERESTART ) { page->nr_validated_ptes = i; - break; + page->partial_pte = partial ?: 1; } - - if ( rc < 0 ) + else if ( rc == -EINTR && i ) + { + page->nr_validated_ptes = i; + page->partial_pte = 0; + rc = -ERESTART; + } + else if ( rc < 0 && rc != -EINTR ) { gdprintk(XENLOG_WARNING, "Failure in alloc_l2_table: slot %#x\n", i); - while ( i-- > 0 ) - if ( is_guest_l2_slot(d, type, i) ) - put_page_from_l2e(pl2e[i], pfn); - break; + if ( i ) + { + page->nr_validated_ptes = i; + page->partial_pte = 0; + current->arch.old_guest_ptpg = NULL; + current->arch.old_guest_table = page; + } } + if ( rc < 0 ) + break; pl2e[i] = adjust_guest_l2e(pl2e[i], d); } @@@@ -1797,28 +1826,50 @@@@ static int free_l2_table(struct page_inf struct domain *d = page_get_owner(page); unsigned long pfn = mfn_x(page_to_mfn(page)); l2_pgentry_t *pl2e; - unsigned int i = page->nr_validated_ptes - 1; - int err = 0; + int rc = 0, partial = page->partial_pte; + unsigned int i = page->nr_validated_ptes - !partial; pl2e = map_domain_page(_mfn(pfn)); - ASSERT(page->nr_validated_ptes); - do { - if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) && - put_page_from_l2e(pl2e[i], pfn) == 0 && - i && hypercall_preempt_check() ) + for ( ; ; ) + { + if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) ) + rc = put_page_from_l2e(pl2e[i], pfn, partial, false); + if ( rc < 0 ) + break; + + partial = 0; + + if ( !i-- ) + break; + + if ( hypercall_preempt_check() ) { - page->nr_validated_ptes = i; - err = -ERESTART; + rc = -EINTR; + break; } - } while ( !err && i-- ); + } unmap_domain_page(pl2e); - if ( !err ) + if ( rc >= 0 ) + { page->u.inuse.type_info &= ~PGT_pae_xen_l2; + rc = 0; + } + else if ( rc == -ERESTART ) + { + page->nr_validated_ptes = i; + page->partial_pte = partial ?: -1; + } + else if ( rc == -EINTR && i < L2_PAGETABLE_ENTRIES - 1 ) + { + page->nr_validated_ptes = i + 1; + page->partial_pte = 0; + rc = -ERESTART; + } - return err; + return rc; } static int free_l3_table(struct page_info *page) @@@@ -2138,7 +2189,7 @@@@ static int mod_l2_entry(l2_pgentry_t *pl return -EBUSY; } - if ( unlikely((rc = get_page_from_l2e(nl2e, pfn, d)) < 0) ) + if ( unlikely((rc = get_page_from_l2e(nl2e, pfn, d, 0)) < 0) ) return rc; nl2e = adjust_guest_l2e(nl2e, d); @@@@ -2157,7 +2208,8 @@@@ static int mod_l2_entry(l2_pgentry_t *pl return -EBUSY; } - put_page_from_l2e(ol2e, pfn); + put_page_from_l2e(ol2e, pfn, 0, true); + return rc; } @ 1.1 log @Update to 4.11.1nb1 PKGREVISION set to 1 on purpose, because this is not a stock 4.11.1 kernel (it includes security patches). 4.11.1 includes all security patches up to XSA282. Apply official patches for XSA284, XSA285, XSA287, XSA288, XSA290, XSA291, XSA292, XSA293 and XSA294. Other changes since 4.11.0 are mostly bugfixes, no new features. @ text @d1 1 a1 1 $NetBSD: $ @