head 1.2; access; symbols pkgsrc-2013Q2:1.1.0.2 pkgsrc-2013Q2-base:1.1; locks; strict; comment @# @; 1.2 date 2013.09.11.18.00.33; author drochner; state dead; branches; next 1.1; commitid 76aqq1Rgu8ITY25x; 1.1 date 2013.05.03.16.48.38; author drochner; state Exp; branches; next ; desc @@ 1.2 log @update to 4.1.6.1 This release fixes the following critical vulnerabilities: CVE-2013-1918 / XSA-45: Several long latency operations are not preemptible CVE-2013-1952 / XSA-49: VT-d interrupt remapping source validation flaw for bridges CVE-2013-2076 / XSA-52: Information leak on XSAVE/XRSTOR capable AMD CPUs CVE-2013-2077 / XSA-53: Hypervisor crash due to missing exception recovery on XRSTOR CVE-2013-2078 / XSA-54: Hypervisor crash due to missing exception recovery on XSETBV CVE-2013-2194, CVE-2013-2195, CVE-2013-2196 / XSA-55: Multiple vulnerabilities in libelf PV kernel handling CVE-2013-2072 / XSA-56: Buffer overflow in xencontrol Python bindings affecting xend CVE-2013-2211 / XSA-57: libxl allows guest write access to sensitive console related xenstore keys CVE-2013-1432 / XSA-58: Page reference counting error due to XSA-45/CVE-2013-1918 fixes XSA-61: libxl partially sets up HVM passthrough even with disabled iommu This release contains many bug fixes and improvements. The highlights are: addressing a regression from the fix for XSA-21 addressing a regression from the fix for XSA-46 bug fixes to low level system state handling, including certain hardware errata workarounds (CVE-2013-1918 and CVE-2013-1952 were patched in pkgsrc before) @ text @$NetBSD: patch-CVE-2013-1918_9,v 1.1 2013/05/03 16:48:38 drochner Exp $ --- xen/arch/x86/mm.c.orig 2013-05-03 13:38:09.000000000 +0000 +++ xen/arch/x86/mm.c @@@@ -1183,7 +1183,16 @@@@ static int put_page_from_l3e(l3_pgentry_ #endif if ( unlikely(partial > 0) ) + { + ASSERT(preemptible >= 0); return __put_page_type(l3e_get_page(l3e), preemptible); + } + + if ( preemptible < 0 ) + { + current->arch.old_guest_table = l3e_get_page(l3e); + return 0; + } return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); } @@@@ -1196,7 +1205,17 @@@@ static int put_page_from_l4e(l4_pgentry_ (l4e_get_pfn(l4e) != pfn) ) { if ( unlikely(partial > 0) ) + { + ASSERT(preemptible >= 0); return __put_page_type(l4e_get_page(l4e), preemptible); + } + + if ( preemptible < 0 ) + { + current->arch.old_guest_table = l4e_get_page(l4e); + return 0; + } + return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); } return 1; @@@@ -1486,12 +1505,17 @@@@ static int alloc_l3_table(struct page_in if ( rc < 0 && rc != -EAGAIN && rc != -EINTR ) { MEM_LOG("Failure in alloc_l3_table: entry %d", i); + if ( i ) + { + page->nr_validated_ptes = i; + page->partial_pte = 0; + current->arch.old_guest_table = page; + } while ( i-- > 0 ) { if ( !is_guest_l3_slot(i) ) continue; unadjust_guest_l3e(pl3e[i], d); - put_page_from_l3e(pl3e[i], pfn, 0, 0); } } @@@@ -1521,22 +1545,24 @@@@ static int alloc_l4_table(struct page_in page->nr_validated_ptes = i; page->partial_pte = partial ?: 1; } - else if ( rc == -EINTR ) + else if ( rc < 0 ) { + if ( rc != -EINTR ) + MEM_LOG("Failure in alloc_l4_table: entry %d", i); if ( i ) { page->nr_validated_ptes = i; page->partial_pte = 0; - rc = -EAGAIN; + if ( rc == -EINTR ) + rc = -EAGAIN; + else + { + if ( current->arch.old_guest_table ) + page->nr_validated_ptes++; + current->arch.old_guest_table = page; + } } } - else if ( rc < 0 ) - { - MEM_LOG("Failure in alloc_l4_table: entry %d", i); - while ( i-- > 0 ) - if ( is_guest_l4_slot(d, i) ) - put_page_from_l4e(pl4e[i], pfn, 0, 0); - } if ( rc < 0 ) return rc; @@@@ -1966,7 +1992,7 @@@@ static int mod_l3_entry(l3_pgentry_t *pl pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e); } - put_page_from_l3e(ol3e, pfn, 0, 0); + put_page_from_l3e(ol3e, pfn, 0, -preemptible); return rc; } @@@@ -2029,7 +2055,7 @@@@ static int mod_l4_entry(l4_pgentry_t *pl return -EFAULT; } - put_page_from_l4e(ol4e, pfn, 0, 0); + put_page_from_l4e(ol4e, pfn, 0, -preemptible); return rc; } @@@@ -2187,7 +2213,15 @@@@ static int alloc_page_type(struct page_i PRtype_info ": caf=%08lx taf=%" PRtype_info, page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), type, page->count_info, page->u.inuse.type_info); - page->u.inuse.type_info = 0; + if ( page != current->arch.old_guest_table ) + page->u.inuse.type_info = 0; + else + { + ASSERT((page->u.inuse.type_info & + (PGT_count_mask | PGT_validated)) == 1); + get_page_light(page); + page->u.inuse.type_info |= PGT_partial; + } } else { @@@@ -2725,49 +2759,150 @@@@ static void put_superpage(unsigned long #endif +static int put_old_guest_table(struct vcpu *v) +{ + int rc; + + if ( !v->arch.old_guest_table ) + return 0; + + switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table, 1) ) + { + case -EINTR: + case -EAGAIN: + return -EAGAIN; + } + + v->arch.old_guest_table = NULL; + + return rc; +} + +int vcpu_destroy_pagetables(struct vcpu *v) +{ + unsigned long mfn = pagetable_get_pfn(v->arch.guest_table); + struct page_info *page; + int rc = put_old_guest_table(v); + + if ( rc ) + return rc; + +#ifdef __x86_64__ + if ( is_pv_32on64_vcpu(v) ) + mfn = l4e_get_pfn(*(l4_pgentry_t *)mfn_to_virt(mfn)); +#endif + + if ( mfn ) + { + page = mfn_to_page(mfn); + if ( paging_mode_refcounts(v->domain) ) + put_page(page); + else + rc = put_page_and_type_preemptible(page, 1); + } + +#ifdef __x86_64__ + if ( is_pv_32on64_vcpu(v) ) + { + if ( !rc ) + l4e_write( + (l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)), + l4e_empty()); + } + else +#endif + if ( !rc ) + { + v->arch.guest_table = pagetable_null(); + +#ifdef __x86_64__ + /* Drop ref to guest_table_user (from MMUEXT_NEW_USER_BASEPTR) */ + mfn = pagetable_get_pfn(v->arch.guest_table_user); + if ( mfn ) + { + page = mfn_to_page(mfn); + if ( paging_mode_refcounts(v->domain) ) + put_page(page); + else + rc = put_page_and_type_preemptible(page, 1); + } + if ( !rc ) + v->arch.guest_table_user = pagetable_null(); +#endif + } + + v->arch.cr3 = 0; + + return rc; +} int new_guest_cr3(unsigned long mfn) { struct vcpu *curr = current; struct domain *d = curr->domain; - int okay; + int rc; unsigned long old_base_mfn; #ifdef __x86_64__ if ( is_pv_32on64_domain(d) ) { - okay = paging_mode_refcounts(d) - ? 0 /* Old code was broken, but what should it be? */ - : mod_l4_entry( + rc = paging_mode_refcounts(d) + ? -EINVAL /* Old code was broken, but what should it be? */ + : mod_l4_entry( __va(pagetable_get_paddr(curr->arch.guest_table)), l4e_from_pfn( mfn, (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), - pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0; - if ( unlikely(!okay) ) + pagetable_get_pfn(curr->arch.guest_table), 0, 1, curr); + switch ( rc ) { + case 0: + break; + case -EINTR: + case -EAGAIN: + return -EAGAIN; + default: MEM_LOG("Error while installing new compat baseptr %lx", mfn); - return 0; + return rc; } invalidate_shadow_ldt(curr, 0); write_ptbase(curr); - return 1; + return 0; } #endif - okay = paging_mode_refcounts(d) - ? get_page_from_pagenr(mfn, d) - : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0); - if ( unlikely(!okay) ) + rc = put_old_guest_table(curr); + if ( unlikely(rc) ) + return rc; + + old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); + /* + * This is particularly important when getting restarted after the + * previous attempt got preempted in the put-old-MFN phase. + */ + if ( old_base_mfn == mfn ) { - MEM_LOG("Error while installing new baseptr %lx", mfn); + write_ptbase(curr); return 0; } - invalidate_shadow_ldt(curr, 0); + rc = paging_mode_refcounts(d) + ? (get_page_from_pagenr(mfn, d) ? 0 : -EINVAL) + : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 1); + switch ( rc ) + { + case 0: + break; + case -EINTR: + case -EAGAIN: + return -EAGAIN; + default: + MEM_LOG("Error while installing new baseptr %lx", mfn); + return rc; + } - old_base_mfn = pagetable_get_pfn(curr->arch.guest_table); + invalidate_shadow_ldt(curr, 0); curr->arch.guest_table = pagetable_from_pfn(mfn); update_cr3(curr); @@@@ -2776,13 +2911,25 @@@@ int new_guest_cr3(unsigned long mfn) if ( likely(old_base_mfn != 0) ) { + struct page_info *page = mfn_to_page(old_base_mfn); + if ( paging_mode_refcounts(d) ) - put_page(mfn_to_page(old_base_mfn)); + put_page(page); else - put_page_and_type(mfn_to_page(old_base_mfn)); + switch ( rc = put_page_and_type_preemptible(page, 1) ) + { + case -EINTR: + rc = -EAGAIN; + case -EAGAIN: + curr->arch.old_guest_table = page; + break; + default: + BUG_ON(rc); + break; + } } - return 1; + return rc; } static struct domain *get_pg_owner(domid_t domid) @@@@ -2911,12 +3058,29 @@@@ long do_mmuext_op( unsigned int foreigndom) { struct mmuext_op op; - int rc = 0, i = 0, okay; unsigned long type; - unsigned int done = 0; + unsigned int i = 0, done = 0; struct vcpu *curr = current; struct domain *d = curr->domain; struct domain *pg_owner; + int okay, rc = put_old_guest_table(curr); + + if ( unlikely(rc) ) + { + if ( likely(rc == -EAGAIN) ) + rc = hypercall_create_continuation( + __HYPERVISOR_mmuext_op, "hihi", uops, count, pdone, + foreigndom); + return rc; + } + + if ( unlikely(count == MMU_UPDATE_PREEMPTED) && + likely(guest_handle_is_null(uops)) ) + { + /* See the curr->arch.old_guest_table related + * hypercall_create_continuation() below. */ + return (int)foreigndom; + } if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { @@@@ -2941,7 +3105,7 @@@@ long do_mmuext_op( for ( i = 0; i < count; i++ ) { - if ( hypercall_preempt_check() ) + if ( curr->arch.old_guest_table || hypercall_preempt_check() ) { rc = -EAGAIN; break; @@@@ -3001,21 +3165,17 @@@@ long do_mmuext_op( page = mfn_to_page(mfn); if ( (rc = xsm_memory_pin_page(d, page)) != 0 ) - { - put_page_and_type(page); okay = 0; - break; - } - - if ( unlikely(test_and_set_bit(_PGT_pinned, - &page->u.inuse.type_info)) ) + else if ( unlikely(test_and_set_bit(_PGT_pinned, + &page->u.inuse.type_info)) ) { MEM_LOG("Mfn %lx already pinned", mfn); - put_page_and_type(page); okay = 0; - break; } + if ( unlikely(!okay) ) + goto pin_drop; + /* A page is dirtied when its pin status is set. */ paging_mark_dirty(pg_owner, mfn); @@@@ -3029,7 +3189,13 @@@@ long do_mmuext_op( &page->u.inuse.type_info)); spin_unlock(&pg_owner->page_alloc_lock); if ( drop_ref ) - put_page_and_type(page); + { + pin_drop: + if ( type == PGT_l1_page_table ) + put_page_and_type(page); + else + curr->arch.old_guest_table = page; + } } break; @@@@ -3059,7 +3225,17 @@@@ long do_mmuext_op( break; } - put_page_and_type(page); + switch ( rc = put_page_and_type_preemptible(page, 1) ) + { + case -EINTR: + case -EAGAIN: + curr->arch.old_guest_table = page; + rc = 0; + break; + default: + BUG_ON(rc); + break; + } put_page(page); /* A page is dirtied when its pin status is cleared. */ @@@@ -3069,7 +3245,8 @@@@ long do_mmuext_op( } case MMUEXT_NEW_BASEPTR: - okay = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn)); + rc = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn)); + okay = !rc; break; #ifdef __x86_64__ @@@@ -3077,29 +3254,55 @@@@ long do_mmuext_op( unsigned long old_mfn, mfn; mfn = gmfn_to_mfn(d, op.arg1.mfn); + old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); + /* + * This is particularly important when getting restarted after the + * previous attempt got preempted in the put-old-MFN phase. + */ + if ( old_mfn == mfn ) + break; + if ( mfn != 0 ) { if ( paging_mode_refcounts(d) ) okay = get_page_from_pagenr(mfn, d); else - okay = !get_page_and_type_from_pagenr( - mfn, PGT_root_page_table, d, 0, 0); + { + rc = get_page_and_type_from_pagenr( + mfn, PGT_root_page_table, d, 0, 1); + okay = !rc; + } if ( unlikely(!okay) ) { - MEM_LOG("Error while installing new mfn %lx", mfn); + if ( rc == -EINTR ) + rc = -EAGAIN; + else if ( rc != -EAGAIN ) + MEM_LOG("Error while installing new mfn %lx", mfn); break; } } - old_mfn = pagetable_get_pfn(curr->arch.guest_table_user); curr->arch.guest_table_user = pagetable_from_pfn(mfn); if ( old_mfn != 0 ) { + struct page_info *page = mfn_to_page(old_mfn); + if ( paging_mode_refcounts(d) ) - put_page(mfn_to_page(old_mfn)); + put_page(page); else - put_page_and_type(mfn_to_page(old_mfn)); + switch ( rc = put_page_and_type_preemptible(page, 1) ) + { + case -EINTR: + rc = -EAGAIN; + case -EAGAIN: + curr->arch.old_guest_table = page; + okay = 0; + break; + default: + BUG_ON(rc); + break; + } } break; @@@@ -3338,9 +3541,27 @@@@ long do_mmuext_op( } if ( rc == -EAGAIN ) + { + ASSERT(i < count); rc = hypercall_create_continuation( __HYPERVISOR_mmuext_op, "hihi", uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); + } + else if ( curr->arch.old_guest_table ) + { + XEN_GUEST_HANDLE(void) null; + + ASSERT(rc || i == count); + set_xen_guest_handle(null, NULL); + /* + * In order to have a way to communicate the final return value to + * our continuation, we pass this in place of "foreigndom", building + * on the fact that this argument isn't needed anymore. + */ + rc = hypercall_create_continuation( + __HYPERVISOR_mmuext_op, "hihi", null, + MMU_UPDATE_PREEMPTED, null, rc); + } put_pg_owner(pg_owner); @@@@ -3367,11 +3588,28 @@@@ long do_mmu_update( void *va; unsigned long gpfn, gmfn, mfn; struct page_info *page; - int rc = 0, okay = 1, i = 0; - unsigned int cmd, done = 0, pt_dom; - struct vcpu *v = current; + unsigned int cmd, i = 0, done = 0, pt_dom; + struct vcpu *curr = current, *v = curr; struct domain *d = v->domain, *pt_owner = d, *pg_owner; struct domain_mmap_cache mapcache; + int rc = put_old_guest_table(curr), okay = 1; + + if ( unlikely(rc) ) + { + if ( likely(rc == -EAGAIN) ) + rc = hypercall_create_continuation( + __HYPERVISOR_mmu_update, "hihi", ureqs, count, pdone, + foreigndom); + return rc; + } + + if ( unlikely(count == MMU_UPDATE_PREEMPTED) && + likely(guest_handle_is_null(ureqs)) ) + { + /* See the curr->arch.old_guest_table related + * hypercall_create_continuation() below. */ + return (int)foreigndom; + } if ( unlikely(count & MMU_UPDATE_PREEMPTED) ) { @@@@ -3420,7 +3658,7 @@@@ long do_mmu_update( for ( i = 0; i < count; i++ ) { - if ( hypercall_preempt_check() ) + if ( curr->arch.old_guest_table || hypercall_preempt_check() ) { rc = -EAGAIN; break; @@@@ -3685,9 +3923,27 @@@@ long do_mmu_update( } if ( rc == -EAGAIN ) + { + ASSERT(i < count); rc = hypercall_create_continuation( __HYPERVISOR_mmu_update, "hihi", ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); + } + else if ( curr->arch.old_guest_table ) + { + XEN_GUEST_HANDLE(void) null; + + ASSERT(rc || i == count); + set_xen_guest_handle(null, NULL); + /* + * In order to have a way to communicate the final return value to + * our continuation, we pass this in place of "foreigndom", building + * on the fact that this argument isn't needed anymore. + */ + rc = hypercall_create_continuation( + __HYPERVISOR_mmu_update, "hihi", null, + MMU_UPDATE_PREEMPTED, null, rc); + } put_pg_owner(pg_owner); @ 1.1 log @update to 4.1.5 This integrates fixes for all vulnerabilities which were patched in pkgsrc before. Among many bug fixes and improvements (around 50 since Xen 4.1.4): * ACPI APEI/ERST finally working on production systems * Bug fixes for other low level system state handling * Support for xz compressed Dom0 and DomU kernels @ text @d1 1 a1 1 $NetBSD$ @