head 1.2; access; symbols pkgsrc-2020Q2:1.1.0.8 pkgsrc-2020Q2-base:1.1 pkgsrc-2020Q1:1.1.0.4 pkgsrc-2020Q1-base:1.1 pkgsrc-2019Q4:1.1.0.6 pkgsrc-2019Q4-base:1.1 pkgsrc-2019Q3:1.1.0.2; locks; strict; comment @# @; 1.2 date 2020.08.24.10.35.35; author bouyer; state dead; branches; next 1.1; commitid MGsqrLPx72UHUilC; 1.1 date 2019.12.13.13.44.21; author bouyer; state Exp; branches 1.1.2.1; next ; commitid w6P0WFKdEprc9yOB; 1.1.2.1 date 2019.12.13.13.44.21; author bsiegert; state dead; branches; next 1.1.2.2; commitid TcQZmrJvhj3Y6WOB; 1.1.2.2 date 2019.12.16.13.51.58; author bsiegert; state Exp; branches; next ; commitid TcQZmrJvhj3Y6WOB; desc @@ 1.2 log @Update to 4.11.4nb1 Keep PKGREVISION at 1 to reflect that it's not a stock Xen 4.11.4 kernel, we have additinnal security fixes (all relevant patches from upstream to date). Changes: mosly bug fixes and improvements; better support for newer AMD CPUs. full changelog at https://xenproject.org/downloads/xen-project-archives/xen-proj ect-4-11-series/xen-project-4-11-4/ @ text @$NetBSD: patch-XSA311,v 1.1 2019/12/13 13:44:21 bouyer Exp $ From: Andrew Cooper Subject: AMD/IOMMU: Cease using a dynamic height for the IOMMU pagetables update_paging_mode() has multiple bugs: 1) Booting with iommu=debug will cause it to inform you that that it called without the pdev_list lock held. 2) When growing by more than a single level, it leaks the newly allocated table(s) in the case of a further error. Furthermore, the choice of default level for a domain has issues: 1) All HVM guests grow from 2 to 3 levels during construction because of the position of the VRAM just below the 4G boundary, so defaulting to 2 is a waste of effort. 2) The limit for PV guests doesn't take memory hotplug into account, and isn't dynamic at runtime like HVM guests. This means that a PV guest may get RAM which it can't map in the IOMMU. The dynamic height is a property unique to AMD, and adds a substantial quantity of complexity for what is a marginal performance improvement. Remove the complexity by removing the dynamic height. PV guests now get 3 or 4 levels based on any hotplug regions in the host. This only makes a difference for hardware which previously had all RAM below the 512G boundary, and a hotplug region above. HVM guests now get 4 levels (which will be sufficient until 256TB guests become a thing), because we don't currently have the information to know when 3 would be safe to use. The overhead of this extra level is not expected to be noticeable. It costs one page (4k) per domain, and one extra IO-TLB paging structure cache entry which is very hot and less likely to be evicted. This is XSA-311. Reported-by: XXX PERSON 3 Signed-off-by: Andrew Cooper Signed-off-by: Andrew Cooper Acked-by: Jan Beulich --- xen/drivers/passthrough/amd/iommu_map.c.orig +++ xen/drivers/passthrough/amd/iommu_map.c @@@@ -569,97 +569,6 @@@@ static int iommu_pde_from_gfn(struct dom return 0; } -static int update_paging_mode(struct domain *d, unsigned long gfn) -{ - u16 bdf; - void *device_entry; - unsigned int req_id, level, offset; - unsigned long flags; - struct pci_dev *pdev; - struct amd_iommu *iommu = NULL; - struct page_info *new_root = NULL; - struct page_info *old_root = NULL; - void *new_root_vaddr; - unsigned long old_root_mfn; - struct domain_iommu *hd = dom_iommu(d); - - if ( gfn == gfn_x(INVALID_GFN) ) - return -EADDRNOTAVAIL; - ASSERT(!(gfn >> DEFAULT_DOMAIN_ADDRESS_WIDTH)); - - level = hd->arch.paging_mode; - old_root = hd->arch.root_table; - offset = gfn >> (PTE_PER_TABLE_SHIFT * (level - 1)); - - ASSERT(spin_is_locked(&hd->arch.mapping_lock) && is_hvm_domain(d)); - - while ( offset >= PTE_PER_TABLE_SIZE ) - { - /* Allocate and install a new root table. - * Only upper I/O page table grows, no need to fix next level bits */ - new_root = alloc_amd_iommu_pgtable(); - if ( new_root == NULL ) - { - AMD_IOMMU_DEBUG("%s Cannot allocate I/O page table\n", - __func__); - return -ENOMEM; - } - - new_root_vaddr = __map_domain_page(new_root); - old_root_mfn = mfn_x(page_to_mfn(old_root)); - set_iommu_pde_present(new_root_vaddr, old_root_mfn, level, - !!IOMMUF_writable, !!IOMMUF_readable); - level++; - old_root = new_root; - offset >>= PTE_PER_TABLE_SHIFT; - unmap_domain_page(new_root_vaddr); - } - - if ( new_root != NULL ) - { - hd->arch.paging_mode = level; - hd->arch.root_table = new_root; - - if ( !pcidevs_locked() ) - AMD_IOMMU_DEBUG("%s Try to access pdev_list " - "without aquiring pcidevs_lock.\n", __func__); - - /* Update device table entries using new root table and paging mode */ - for_each_pdev( d, pdev ) - { - bdf = PCI_BDF2(pdev->bus, pdev->devfn); - iommu = find_iommu_for_device(pdev->seg, bdf); - if ( !iommu ) - { - AMD_IOMMU_DEBUG("%s Fail to find iommu.\n", __func__); - return -ENODEV; - } - - spin_lock_irqsave(&iommu->lock, flags); - do { - req_id = get_dma_requestor_id(pdev->seg, bdf); - device_entry = iommu->dev_table.buffer + - (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); - - /* valid = 0 only works for dom0 passthrough mode */ - amd_iommu_set_root_page_table((u32 *)device_entry, - page_to_maddr(hd->arch.root_table), - d->domain_id, - hd->arch.paging_mode, 1); - - amd_iommu_flush_device(iommu, req_id); - bdf += pdev->phantom_stride; - } while ( PCI_DEVFN2(bdf) != pdev->devfn && - PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) ); - spin_unlock_irqrestore(&iommu->lock, flags); - } - - /* For safety, invalidate all entries */ - amd_iommu_flush_all_pages(d); - } - return 0; -} - int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags) { @@@@ -685,19 +594,6 @@@@ int amd_iommu_map_page(struct domain *d, return rc; } - /* Since HVM domain is initialized with 2 level IO page table, - * we might need a deeper page table for lager gfn now */ - if ( is_hvm_domain(d) ) - { - if ( update_paging_mode(d, gfn) ) - { - spin_unlock(&hd->arch.mapping_lock); - AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn); - domain_crash(d); - return -EFAULT; - } - } - if ( iommu_pde_from_gfn(d, gfn, pt_mfn, true) || (pt_mfn[1] == 0) ) { spin_unlock(&hd->arch.mapping_lock); --- xen/drivers/passthrough/amd/pci_amd_iommu.c.orig +++ xen/drivers/passthrough/amd/pci_amd_iommu.c @@@@ -242,11 +242,17 @@@@ static int amd_iommu_domain_init(struct { struct domain_iommu *hd = dom_iommu(d); - /* For pv and dom0, stick with get_paging_mode(max_page) - * For HVM dom0, use 2 level page table at first */ - hd->arch.paging_mode = is_hvm_domain(d) ? - IOMMU_PAGING_MODE_LEVEL_2 : - get_paging_mode(max_page); + /* + * Choose the number of levels for the IOMMU page tables. + * - PV needs 3 or 4, depending on whether there is RAM (including hotplug + * RAM) above the 512G boundary. + * - HVM could in principle use 3 or 4 depending on how much guest + * physical address space we give it, but this isn't known yet so use 4 + * unilaterally. + */ + hd->arch.paging_mode = is_hvm_domain(d) + ? IOMMU_PAGING_MODE_LEVEL_4 : get_paging_mode(get_upper_mfn_bound()); + return 0; } @ 1.1 log @Update xenkernel411 to 4.11.3nb1, and xentools411 to 4.11.3 (PKGREVISION not reset on xenkernel411 on purpose, to enphasis that it's not a stock Xen 4.11.3 kernel). Changes since 4.11.2: - includes all security patches up to XSA306 - other minor bug fixes, hardware support and performances improvements In addition, xenkernel411 includes all security patches released since 4.11.3, up to XSA311 @ text @d1 1 a1 1 $NetBSD: $ @ 1.1.2.1 log @file patch-XSA311 was added on branch pkgsrc-2019Q3 on 2019-12-16 13:51:58 +0000 @ text @d1 189 @ 1.1.2.2 log @Pullup ticket #6104 - requested by bouyer sysutils/xenkernel411, sysutils/xentools411: security fix Revisions pulled up: - sysutils/xenkernel411/Makefile 1.12 - sysutils/xenkernel411/distinfo 1.9 - sysutils/xenkernel411/patches/patch-XSA298 deleted - sysutils/xenkernel411/patches/patch-XSA299 deleted - sysutils/xenkernel411/patches/patch-XSA302 deleted - sysutils/xenkernel411/patches/patch-XSA304 deleted - sysutils/xenkernel411/patches/patch-XSA305 deleted - sysutils/xenkernel411/patches/patch-XSA306 deleted - sysutils/xenkernel411/patches/patch-XSA307 1.1 - sysutils/xenkernel411/patches/patch-XSA308 1.1 - sysutils/xenkernel411/patches/patch-XSA309 1.1 - sysutils/xenkernel411/patches/patch-XSA310 1.1 - sysutils/xenkernel411/patches/patch-XSA311 1.1 - sysutils/xentools411/Makefile 1.12 - sysutils/xentools411/distinfo 1.8 --- Module Name: pkgsrc Committed By: bouyer Date: Fri Dec 13 13:44:21 UTC 2019 Modified Files: pkgsrc/sysutils/xenkernel411: Makefile distinfo pkgsrc/sysutils/xentools411: Makefile distinfo Added Files: pkgsrc/sysutils/xenkernel411/patches: patch-XSA307 patch-XSA308 patch-XSA309 patch-XSA310 patch-XSA311 Removed Files: pkgsrc/sysutils/xenkernel411/patches: patch-XSA298 patch-XSA299 patch-XSA302 patch-XSA304 patch-XSA305 patch-XSA306 Log Message: Update xenkernel411 to 4.11.3nb1, and xentools411 to 4.11.3 (PKGREVISION not reset on xenkernel411 on purpose, to enphasis that it's not a stock Xen 4.11.3 kernel). Changes since 4.11.2: - includes all security patches up to XSA306 - other minor bug fixes, hardware support and performances improvements In addition, xenkernel411 includes all security patches released since 4.11.3, up to XSA311 @ text @a0 189 $NetBSD: patch-XSA311,v 1.1 2019/12/13 13:44:21 bouyer Exp $ From: Andrew Cooper Subject: AMD/IOMMU: Cease using a dynamic height for the IOMMU pagetables update_paging_mode() has multiple bugs: 1) Booting with iommu=debug will cause it to inform you that that it called without the pdev_list lock held. 2) When growing by more than a single level, it leaks the newly allocated table(s) in the case of a further error. Furthermore, the choice of default level for a domain has issues: 1) All HVM guests grow from 2 to 3 levels during construction because of the position of the VRAM just below the 4G boundary, so defaulting to 2 is a waste of effort. 2) The limit for PV guests doesn't take memory hotplug into account, and isn't dynamic at runtime like HVM guests. This means that a PV guest may get RAM which it can't map in the IOMMU. The dynamic height is a property unique to AMD, and adds a substantial quantity of complexity for what is a marginal performance improvement. Remove the complexity by removing the dynamic height. PV guests now get 3 or 4 levels based on any hotplug regions in the host. This only makes a difference for hardware which previously had all RAM below the 512G boundary, and a hotplug region above. HVM guests now get 4 levels (which will be sufficient until 256TB guests become a thing), because we don't currently have the information to know when 3 would be safe to use. The overhead of this extra level is not expected to be noticeable. It costs one page (4k) per domain, and one extra IO-TLB paging structure cache entry which is very hot and less likely to be evicted. This is XSA-311. Reported-by: XXX PERSON 3 Signed-off-by: Andrew Cooper Signed-off-by: Andrew Cooper Acked-by: Jan Beulich --- xen/drivers/passthrough/amd/iommu_map.c.orig +++ xen/drivers/passthrough/amd/iommu_map.c @@@@ -569,97 +569,6 @@@@ static int iommu_pde_from_gfn(struct dom return 0; } -static int update_paging_mode(struct domain *d, unsigned long gfn) -{ - u16 bdf; - void *device_entry; - unsigned int req_id, level, offset; - unsigned long flags; - struct pci_dev *pdev; - struct amd_iommu *iommu = NULL; - struct page_info *new_root = NULL; - struct page_info *old_root = NULL; - void *new_root_vaddr; - unsigned long old_root_mfn; - struct domain_iommu *hd = dom_iommu(d); - - if ( gfn == gfn_x(INVALID_GFN) ) - return -EADDRNOTAVAIL; - ASSERT(!(gfn >> DEFAULT_DOMAIN_ADDRESS_WIDTH)); - - level = hd->arch.paging_mode; - old_root = hd->arch.root_table; - offset = gfn >> (PTE_PER_TABLE_SHIFT * (level - 1)); - - ASSERT(spin_is_locked(&hd->arch.mapping_lock) && is_hvm_domain(d)); - - while ( offset >= PTE_PER_TABLE_SIZE ) - { - /* Allocate and install a new root table. - * Only upper I/O page table grows, no need to fix next level bits */ - new_root = alloc_amd_iommu_pgtable(); - if ( new_root == NULL ) - { - AMD_IOMMU_DEBUG("%s Cannot allocate I/O page table\n", - __func__); - return -ENOMEM; - } - - new_root_vaddr = __map_domain_page(new_root); - old_root_mfn = mfn_x(page_to_mfn(old_root)); - set_iommu_pde_present(new_root_vaddr, old_root_mfn, level, - !!IOMMUF_writable, !!IOMMUF_readable); - level++; - old_root = new_root; - offset >>= PTE_PER_TABLE_SHIFT; - unmap_domain_page(new_root_vaddr); - } - - if ( new_root != NULL ) - { - hd->arch.paging_mode = level; - hd->arch.root_table = new_root; - - if ( !pcidevs_locked() ) - AMD_IOMMU_DEBUG("%s Try to access pdev_list " - "without aquiring pcidevs_lock.\n", __func__); - - /* Update device table entries using new root table and paging mode */ - for_each_pdev( d, pdev ) - { - bdf = PCI_BDF2(pdev->bus, pdev->devfn); - iommu = find_iommu_for_device(pdev->seg, bdf); - if ( !iommu ) - { - AMD_IOMMU_DEBUG("%s Fail to find iommu.\n", __func__); - return -ENODEV; - } - - spin_lock_irqsave(&iommu->lock, flags); - do { - req_id = get_dma_requestor_id(pdev->seg, bdf); - device_entry = iommu->dev_table.buffer + - (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE); - - /* valid = 0 only works for dom0 passthrough mode */ - amd_iommu_set_root_page_table((u32 *)device_entry, - page_to_maddr(hd->arch.root_table), - d->domain_id, - hd->arch.paging_mode, 1); - - amd_iommu_flush_device(iommu, req_id); - bdf += pdev->phantom_stride; - } while ( PCI_DEVFN2(bdf) != pdev->devfn && - PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) ); - spin_unlock_irqrestore(&iommu->lock, flags); - } - - /* For safety, invalidate all entries */ - amd_iommu_flush_all_pages(d); - } - return 0; -} - int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags) { @@@@ -685,19 +594,6 @@@@ int amd_iommu_map_page(struct domain *d, return rc; } - /* Since HVM domain is initialized with 2 level IO page table, - * we might need a deeper page table for lager gfn now */ - if ( is_hvm_domain(d) ) - { - if ( update_paging_mode(d, gfn) ) - { - spin_unlock(&hd->arch.mapping_lock); - AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn); - domain_crash(d); - return -EFAULT; - } - } - if ( iommu_pde_from_gfn(d, gfn, pt_mfn, true) || (pt_mfn[1] == 0) ) { spin_unlock(&hd->arch.mapping_lock); --- xen/drivers/passthrough/amd/pci_amd_iommu.c.orig +++ xen/drivers/passthrough/amd/pci_amd_iommu.c @@@@ -242,11 +242,17 @@@@ static int amd_iommu_domain_init(struct { struct domain_iommu *hd = dom_iommu(d); - /* For pv and dom0, stick with get_paging_mode(max_page) - * For HVM dom0, use 2 level page table at first */ - hd->arch.paging_mode = is_hvm_domain(d) ? - IOMMU_PAGING_MODE_LEVEL_2 : - get_paging_mode(max_page); + /* + * Choose the number of levels for the IOMMU page tables. + * - PV needs 3 or 4, depending on whether there is RAM (including hotplug + * RAM) above the 512G boundary. + * - HVM could in principle use 3 or 4 depending on how much guest + * physical address space we give it, but this isn't known yet so use 4 + * unilaterally. + */ + hd->arch.paging_mode = is_hvm_domain(d) + ? IOMMU_PAGING_MODE_LEVEL_4 : get_paging_mode(get_upper_mfn_bound()); + return 0; } @