From 147202aa772329a02c6e80bc2b7a6b8dd3deac0b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 7 Jul 2009 19:43:20 +0100 Subject: [PATCH 01/23] intel-iommu: Speed up map routines by using cached domain ASAP We did before, in the end -- but it was at the bottom of a long stack of functions. Add an inline wrapper get_valid_domain_for_dev() which will use the cached one _first_ and only make the out-of-line call if it's not already set. This takes the average time taken for a 1-page intel_map_sg() from 5961 cycles to 4812 cycles on my Lenovo x200s test box -- a modest 20%. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 360fb67a30d7..c5f7c73cbb55 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2455,8 +2455,7 @@ static struct iova *intel_alloc_iova(struct device *dev, return iova; } -static struct dmar_domain * -get_valid_domain_for_dev(struct pci_dev *pdev) +static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev) { struct dmar_domain *domain; int ret; @@ -2484,6 +2483,18 @@ get_valid_domain_for_dev(struct pci_dev *pdev) return domain; } +static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev) +{ + struct device_domain_info *info; + + /* No lock here, assumes no domain exit in normal case */ + info = dev->dev.archdata.iommu; + if (likely(info)) + return info->domain; + + return __get_valid_domain_for_dev(dev); +} + static int iommu_dummy(struct pci_dev *pdev) { return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; From 3d39cecc4841e8d4c4abdb401d10180f5faaded0 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 8 Jul 2009 15:23:30 +0100 Subject: [PATCH 02/23] intel-iommu: Remove superfluous iova_alloc_lock from IOVA code We only ever obtain this lock immediately before the iova_rbtree_lock, and release it immediately after the iova_rbtree_lock. So ditch it and just use iova_rbtree_lock. [v2: Remove the lockdep bits this time too] Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 --- drivers/pci/iova.c | 16 ++++------------ include/linux/iova.h | 1 - 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index c5f7c73cbb55..d6a857397ec3 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1309,7 +1309,6 @@ static void iommu_detach_domain(struct dmar_domain *domain, } static struct iova_domain reserved_iova_list; -static struct lock_class_key reserved_alloc_key; static struct lock_class_key reserved_rbtree_key; static void dmar_init_reserved_ranges(void) @@ -1320,8 +1319,6 @@ static void dmar_init_reserved_ranges(void) init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); - lockdep_set_class(&reserved_iova_list.iova_alloc_lock, - &reserved_alloc_key); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 46dd440e2315..7914951ef29a 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -22,7 +22,6 @@ void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) { - spin_lock_init(&iovad->iova_alloc_lock); spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; iovad->cached32_node = NULL; @@ -205,7 +204,6 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, bool size_aligned) { - unsigned long flags; struct iova *new_iova; int ret; @@ -219,11 +217,9 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, if (size_aligned) size = __roundup_pow_of_two(size); - spin_lock_irqsave(&iovad->iova_alloc_lock, flags); ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn, new_iova, size_aligned); - spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); if (ret) { free_iova_mem(new_iova); return NULL; @@ -381,8 +377,7 @@ reserve_iova(struct iova_domain *iovad, struct iova *iova; unsigned int overlap = 0; - spin_lock_irqsave(&iovad->iova_alloc_lock, flags); - spin_lock(&iovad->iova_rbtree_lock); + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { if (__is_range_overlap(node, pfn_lo, pfn_hi)) { iova = container_of(node, struct iova, node); @@ -402,8 +397,7 @@ reserve_iova(struct iova_domain *iovad, iova = __insert_new_range(iovad, pfn_lo, pfn_hi); finish: - spin_unlock(&iovad->iova_rbtree_lock); - spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags); + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return iova; } @@ -420,8 +414,7 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) unsigned long flags; struct rb_node *node; - spin_lock_irqsave(&from->iova_alloc_lock, flags); - spin_lock(&from->iova_rbtree_lock); + spin_lock_irqsave(&from->iova_rbtree_lock, flags); for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { struct iova *iova = container_of(node, struct iova, node); struct iova *new_iova; @@ -430,6 +423,5 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", iova->pfn_lo, iova->pfn_lo); } - spin_unlock(&from->iova_rbtree_lock); - spin_unlock_irqrestore(&from->iova_alloc_lock, flags); + spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); } diff --git a/include/linux/iova.h b/include/linux/iova.h index 228f6c94b69c..76a0759e88ec 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -28,7 +28,6 @@ struct iova { /* holds all the iova translations for a domain */ struct iova_domain { - spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ From acea0018a24b794e32afea4f3be4230c58f2f8e3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 14 Jul 2009 01:55:11 +0100 Subject: [PATCH 03/23] intel-iommu: Defer the iotlb flush and iova free for intel_unmap_sg() too. I see no reason why we did this _only_ in intel_unmap_page(). Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index d6a857397ec3..ee48fd073140 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2815,11 +2815,18 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, /* free page tables */ dma_pte_free_pagetable(domain, start_pfn, last_pfn); - iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, - (last_pfn - start_pfn + 1)); - - /* free iova */ - __free_iova(&domain->iovad, iova); + if (intel_iommu_strict) { + iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, + last_pfn - start_pfn + 1); + /* free iova */ + __free_iova(&domain->iovad, iova); + } else { + add_unmap(domain, iova); + /* + * queue up the release of the unmap to save the 1/6th of the + * cpu used up by the iotlb flush operation... + */ + } } static int intel_nontranslate_map_sg(struct device *hddev, From 0db9b7aebb6a1c2bba2d0636ae0b1f9ef729c827 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 14 Jul 2009 02:01:57 +0100 Subject: [PATCH 04/23] intel-iommu: Kill pointless intel_unmap_single() function Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ee48fd073140..86a83946a8f5 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2741,12 +2741,6 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, } } -static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size, - int dir) -{ - intel_unmap_page(dev, dev_addr, size, dir, NULL); -} - static void *intel_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { @@ -2779,7 +2773,7 @@ static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, size = PAGE_ALIGN(size); order = get_order(size); - intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL); + intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long)vaddr, order); } From 86f4d0123b1fddb47d35b9a893f8c0b94bf89abe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sun, 19 Jul 2009 14:47:45 +0300 Subject: [PATCH 05/23] intel-iommu: double kfree() g_iommus is freed after we "goto error;". Found by smatch (http://repo.or.cz/w/smatch.git). Signed-off-by: Dan Carpenter Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 86a83946a8f5..097d5da2fae1 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2224,7 +2224,6 @@ int __init init_dmars(void) deferred_flush = kzalloc(g_num_of_iommus * sizeof(struct deferred_flush_tables), GFP_KERNEL); if (!deferred_flush) { - kfree(g_iommus); ret = -ENOMEM; goto error; } From cfc65dd57967f2e0c7b3a8b73e6d12470b1cf1c1 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 30 Jul 2009 16:15:18 -0600 Subject: [PATCH 06/23] iommu=pt is a valid early param This avoids a "Malformed early option 'iommu'" warning on boot when trying to use pass-through mode. Signed-off-by: Alex Williamson Signed-off-by: David Woodhouse --- arch/x86/kernel/pci-dma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1a041bcf506b..ae13e34f7248 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -212,10 +212,8 @@ static __init int iommu_setup(char *p) if (!strncmp(p, "soft", 4)) swiotlb = 1; #endif - if (!strncmp(p, "pt", 2)) { + if (!strncmp(p, "pt", 2)) iommu_pass_through = 1; - return 1; - } gart_parse_options(p); From 0815565adfe3f4c369110c57d8ffe83caefeed68 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 4 Aug 2009 09:17:20 +0100 Subject: [PATCH 07/23] intel-iommu: Cope with broken HP DC7900 BIOS Yet another reason why trusting this stuff to the BIOS was a bad idea. The HP DC7900 BIOS reports an iommu at an address which just returns all ones, when VT-d is disabled in the BIOS. Fix up the missing iounmap in the error paths while we're at it. Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 7b287cb38b7a..380b60e677e0 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -632,20 +632,31 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); + if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { + /* Promote an attitude of violence to a BIOS engineer today */ + WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->reg_base_addr, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + goto err_unmap; + } + #ifdef CONFIG_DMAR agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { printk(KERN_ERR "Cannot get a valid agaw for iommu (seq_id = %d)\n", iommu->seq_id); - goto error; + goto err_unmap; } msagaw = iommu_calculate_max_sagaw(iommu); if (msagaw < 0) { printk(KERN_ERR "Cannot get a valid max agaw for iommu (seq_id = %d)\n", iommu->seq_id); - goto error; + goto err_unmap; } #endif iommu->agaw = agaw; @@ -665,7 +676,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) } ver = readl(iommu->reg + DMAR_VER_REG); - pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", + pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n", (unsigned long long)drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver), (unsigned long long)iommu->cap, @@ -675,7 +686,10 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) drhd->iommu = iommu; return 0; -error: + + err_unmap: + iounmap(iommu->reg); + error: kfree(iommu); return -1; } From 19943b0e30b05d42e494ae6fef78156ebc8c637e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 4 Aug 2009 16:19:20 +0100 Subject: [PATCH 08/23] intel-iommu: Unify hardware and software passthrough support This makes the hardware passthrough mode work a lot more like the software version, so that the behaviour of a kernel with 'iommu=pt' is the same whether the hardware supports passthrough or not. In particular: - We use a single si_domain for the pass-through devices. - 32-bit devices can be taken out of the pass-through domain so that they don't have to use swiotlb. - Devices will work again after being removed from a KVM guest. - A potential oops on OOM (in init_context_pass_through()) is fixed. Signed-off-by: David Woodhouse --- arch/x86/kernel/pci-swiotlb.c | 5 +- drivers/pci/intel-iommu.c | 178 +++++++++++++++------------------- 2 files changed, 78 insertions(+), 105 deletions(-) diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 6af96ee44200..1e66b18f45cb 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -71,9 +71,8 @@ void __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) || - iommu_pass_through) - swiotlb = 1; + if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) + swiotlb = 1; #endif if (swiotlb_force) swiotlb = 1; diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 097d5da2fae1..147b3b960b61 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -251,7 +251,8 @@ static inline int first_pte_in_page(struct dma_pte *pte) * 2. It maps to each iommu if successful. * 3. Each iommu mapps to this domain if successful. */ -struct dmar_domain *si_domain; +static struct dmar_domain *si_domain; +static int hw_pass_through = 1; /* devices under the same p2p bridge are owned in one domain */ #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) @@ -1948,14 +1949,24 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, struct dmar_domain *domain; int ret; - printk(KERN_INFO - "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", - pci_name(pdev), start, end); - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) return -ENOMEM; + /* For _hardware_ passthrough, don't bother. But for software + passthrough, we do it anyway -- it may indicate a memory + range which is reserved in E820, so which didn't get set + up to start with in si_domain */ + if (domain == si_domain && hw_pass_through) { + printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", + pci_name(pdev), start, end); + return 0; + } + + printk(KERN_INFO + "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", + pci_name(pdev), start, end); + ret = iommu_domain_identity_map(domain, start, end); if (ret) goto error; @@ -2006,23 +2017,6 @@ static inline void iommu_prepare_isa(void) } #endif /* !CONFIG_DMAR_FLPY_WA */ -/* Initialize each context entry as pass through.*/ -static int __init init_context_pass_through(void) -{ - struct pci_dev *pdev = NULL; - struct dmar_domain *domain; - int ret; - - for_each_pci_dev(pdev) { - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); - ret = domain_context_mapping(domain, pdev, - CONTEXT_TT_PASS_THROUGH); - if (ret) - return ret; - } - return 0; -} - static int md_domain_init(struct dmar_domain *domain, int guest_width); static int __init si_domain_work_fn(unsigned long start_pfn, @@ -2037,7 +2031,7 @@ static int __init si_domain_work_fn(unsigned long start_pfn, } -static int si_domain_init(void) +static int si_domain_init(int hw) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; @@ -2064,6 +2058,9 @@ static int si_domain_init(void) si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; + if (hw) + return 0; + for_each_online_node(nid) { work_with_active_regions(nid, si_domain_work_fn, &ret); if (ret) @@ -2155,24 +2152,26 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) return 1; } -static int iommu_prepare_static_identity_mapping(void) +static int iommu_prepare_static_identity_mapping(int hw) { struct pci_dev *pdev = NULL; int ret; - ret = si_domain_init(); + ret = si_domain_init(hw); if (ret) return -EFAULT; for_each_pci_dev(pdev) { if (iommu_should_identity_map(pdev, 1)) { - printk(KERN_INFO "IOMMU: identity mapping for device %s\n", - pci_name(pdev)); + printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", + hw ? "hardware" : "software", pci_name(pdev)); ret = domain_context_mapping(si_domain, pdev, + hw ? CONTEXT_TT_PASS_THROUGH : CONTEXT_TT_MULTI_LEVEL); if (ret) return ret; + ret = domain_add_dev_info(si_domain, pdev); if (ret) return ret; @@ -2189,14 +2188,6 @@ int __init init_dmars(void) struct pci_dev *pdev; struct intel_iommu *iommu; int i, ret; - int pass_through = 1; - - /* - * In case pass through can not be enabled, iommu tries to use identity - * mapping. - */ - if (iommu_pass_through) - iommu_identity_mapping = 1; /* * for each drhd @@ -2250,14 +2241,8 @@ int __init init_dmars(void) goto error; } if (!ecap_pass_through(iommu->ecap)) - pass_through = 0; + hw_pass_through = 0; } - if (iommu_pass_through) - if (!pass_through) { - printk(KERN_INFO - "Pass Through is not supported by hardware.\n"); - iommu_pass_through = 0; - } /* * Start from the sane iommu hardware state. @@ -2312,63 +2297,56 @@ int __init init_dmars(void) } } - /* - * If pass through is set and enabled, context entries of all pci - * devices are intialized by pass through translation type. - */ - if (iommu_pass_through) { - ret = init_context_pass_through(); - if (ret) { - printk(KERN_ERR "IOMMU: Pass through init failed.\n"); - iommu_pass_through = 0; - } - } - + if (iommu_pass_through) + iommu_identity_mapping = 1; +#ifdef CONFIG_DMAR_BROKEN_GFX_WA + else + iommu_identity_mapping = 2; +#endif /* * If pass through is not set or not enabled, setup context entries for * identity mappings for rmrr, gfx, and isa and may fall back to static * identity mapping if iommu_identity_mapping is set. */ - if (!iommu_pass_through) { -#ifdef CONFIG_DMAR_BROKEN_GFX_WA - if (!iommu_identity_mapping) - iommu_identity_mapping = 2; -#endif - if (iommu_identity_mapping) - iommu_prepare_static_identity_mapping(); - /* - * For each rmrr - * for each dev attached to rmrr - * do - * locate drhd for dev, alloc domain for dev - * allocate free domain - * allocate page table entries for rmrr - * if context not allocated for bus - * allocate and init context - * set present in root table for this bus - * init context with domain, translation etc - * endfor - * endfor - */ - printk(KERN_INFO "IOMMU: Setting RMRR:\n"); - for_each_rmrr_units(rmrr) { - for (i = 0; i < rmrr->devices_cnt; i++) { - pdev = rmrr->devices[i]; - /* - * some BIOS lists non-exist devices in DMAR - * table. - */ - if (!pdev) - continue; - ret = iommu_prepare_rmrr_dev(rmrr, pdev); - if (ret) - printk(KERN_ERR - "IOMMU: mapping reserved region failed\n"); - } + if (iommu_identity_mapping) { + ret = iommu_prepare_static_identity_mapping(hw_pass_through); + if (ret) { + printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); + goto error; } - - iommu_prepare_isa(); } + /* + * For each rmrr + * for each dev attached to rmrr + * do + * locate drhd for dev, alloc domain for dev + * allocate free domain + * allocate page table entries for rmrr + * if context not allocated for bus + * allocate and init context + * set present in root table for this bus + * init context with domain, translation etc + * endfor + * endfor + */ + printk(KERN_INFO "IOMMU: Setting RMRR:\n"); + for_each_rmrr_units(rmrr) { + for (i = 0; i < rmrr->devices_cnt; i++) { + pdev = rmrr->devices[i]; + /* + * some BIOS lists non-exist devices in DMAR + * table. + */ + if (!pdev) + continue; + ret = iommu_prepare_rmrr_dev(rmrr, pdev); + if (ret) + printk(KERN_ERR + "IOMMU: mapping reserved region failed\n"); + } + } + + iommu_prepare_isa(); /* * for each drhd @@ -2536,7 +2514,10 @@ static int iommu_no_mapping(struct device *dev) ret = domain_add_dev_info(si_domain, pdev); if (ret) return 0; - ret = domain_context_mapping(si_domain, pdev, CONTEXT_TT_MULTI_LEVEL); + ret = domain_context_mapping(si_domain, pdev, + hw_pass_through ? + CONTEXT_TT_PASS_THROUGH : + CONTEXT_TT_MULTI_LEVEL); if (!ret) { printk(KERN_INFO "64bit %s uses identity mapping\n", pci_name(pdev)); @@ -3202,7 +3183,7 @@ int __init intel_iommu_init(void) * Check the need for DMA-remapping initialization now. * Above initialization will also be used by Interrupt-remapping. */ - if (no_iommu || (swiotlb && !iommu_pass_through) || dmar_disabled) + if (no_iommu || swiotlb || dmar_disabled) return -ENODEV; iommu_init_mempool(); @@ -3222,14 +3203,7 @@ int __init intel_iommu_init(void) init_timer(&unmap_timer); force_iommu = 1; - - if (!iommu_pass_through) { - printk(KERN_INFO - "Multi-level page-table translation for DMAR.\n"); - dma_ops = &intel_dma_ops; - } else - printk(KERN_INFO - "DMAR: Pass through translation for DMAR.\n"); + dma_ops = &intel_dma_ops; init_iommu_sysfs(); From 5fe60f4e5871b64e687229199fafd4ef13cd0886 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Aug 2009 10:53:41 +0100 Subject: [PATCH 09/23] intel-iommu: make domain_add_dev_info() call domain_context_mapping() All callers of the former were also calling the latter, in one order or the other, and failing to correctly clean up if the second returned failure. Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3f256b8d83c1..09606e9aedec 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2094,15 +2094,23 @@ static int identity_mapping(struct pci_dev *pdev) } static int domain_add_dev_info(struct dmar_domain *domain, - struct pci_dev *pdev) + struct pci_dev *pdev, + int translation) { struct device_domain_info *info; unsigned long flags; + int ret; info = alloc_devinfo_mem(); if (!info) return -ENOMEM; + ret = domain_context_mapping(domain, pdev, translation); + if (ret) { + free_devinfo_mem(info); + return ret; + } + info->segment = pci_domain_nr(pdev->bus); info->bus = pdev->bus->number; info->devfn = pdev->devfn; @@ -2173,15 +2181,11 @@ static int iommu_prepare_static_identity_mapping(int hw) printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", hw ? "hardware" : "software", pci_name(pdev)); - ret = domain_context_mapping(si_domain, pdev, + ret = domain_add_dev_info(si_domain, pdev, hw ? CONTEXT_TT_PASS_THROUGH : CONTEXT_TT_MULTI_LEVEL); if (ret) return ret; - - ret = domain_add_dev_info(si_domain, pdev); - if (ret) - return ret; } } @@ -2510,13 +2514,10 @@ static int iommu_no_mapping(struct device *dev) */ if (iommu_should_identity_map(pdev, 0)) { int ret; - ret = domain_add_dev_info(si_domain, pdev); - if (ret) - return 0; - ret = domain_context_mapping(si_domain, pdev, - hw_pass_through ? - CONTEXT_TT_PASS_THROUGH : - CONTEXT_TT_MULTI_LEVEL); + ret = domain_add_dev_info(si_domain, pdev, + hw_pass_through ? + CONTEXT_TT_PASS_THROUGH : + CONTEXT_TT_MULTI_LEVEL); if (!ret) { printk(KERN_INFO "64bit %s uses identity mapping\n", pci_name(pdev)); @@ -3486,7 +3487,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct intel_iommu *iommu; int addr_width; u64 end; - int ret; /* normally pdev is not mapped */ if (unlikely(domain_context_mapped(pdev))) { @@ -3518,12 +3518,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, return -EFAULT; } - ret = domain_add_dev_info(dmar_domain, pdev); - if (ret) - return ret; - - ret = domain_context_mapping(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); - return ret; + return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); } static void intel_iommu_detach_device(struct iommu_domain *domain, From ba6c548701ef7a93b9ea05d1506d2b62f1628333 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 13 Aug 2009 18:18:00 +0100 Subject: [PATCH 10/23] ia64: IOMMU passthrough mode shouldn't trigger swiotlb init Since commit 19943b0e30b05d42e494ae6fef78156ebc8c637e ('intel-iommu: Unify hardware and software passthrough support'), hardware passthrough mode will do the same as software passthrough mode was doing -- it'll still use the IOMMU normally for devices which can't address all of memory. This means that we don't need to bother with swiotlb. Signed-off-by: David Woodhouse --- arch/ia64/kernel/pci-swiotlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 223abb134105..285aae8431c6 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -46,7 +46,7 @@ void __init swiotlb_dma_init(void) void __init pci_swiotlb_init(void) { - if (!iommu_detected || iommu_pass_through) { + if (!iommu_detected) { #ifdef CONFIG_IA64_GENERIC swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); From 132032274a594ee9ffb6b9c9e2e9698149a09ea9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 3 Aug 2009 12:40:27 +0100 Subject: [PATCH 11/23] USB: Work around BIOS bugs by quiescing USB controllers earlier We are seeing a number of crashes in SMM, when VT-d is enabled while 'Legacy USB support' is enabled in various BIOSes. The BIOS is supposed to indicate which addresses it uses for DMA in a special ACPI table ("RMRR"), so that we can punch a hole for it when we set up the IOMMU. The problem is, as usual, that BIOS engineers are totally incompetent. They write code which will crash if the DMA goes AWOL, and then they either neglect to provide an RMRR table at all, or they put the wrong addresses in it. And of course they don't do _any_ QA, since that would take too much time away from their crack-smoking habit. The real fix, of course, is for consumers to refuse to buy motherboards which only have closed-source firmware available. If we had _open_ firmware, bugs like this would be easy to fix. Since that's something I can only dream about, this patch implements an alternative -- ensuring that the USB controllers are handed off from the BIOS and quiesced _before_ the IOMMU is initialised. That would have been a much better design than this RMRR nonsense in the first place, of course. The bootloader has no business doing DMA after the OS has booted anyway. Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/pci-quirks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 83b5f9cea85a..23cf3bde4762 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -475,4 +475,4 @@ static void __devinit quirk_usb_early_handoff(struct pci_dev *pdev) else if (pdev->class == PCI_CLASS_SERIAL_USB_XHCI) quirk_usb_handoff_xhci(pdev); } -DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, quirk_usb_early_handoff); +DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_usb_early_handoff); From 071e13746f9ebb259987c71ea77f11e7656769a2 Mon Sep 17 00:00:00 2001 From: Matt Kraai Date: Sun, 23 Aug 2009 22:30:22 -0700 Subject: [PATCH 12/23] intel-iommu: Mark functions with __init Mark si_domain_init and iommu_prepare_static_identity_mapping with __init, to eliminate the following warnings: WARNING: drivers/pci/built-in.o(.text+0xf1f4): Section mismatch in reference from the function si_domain_init() to the function .init.text:si_domain_work_fn() The function si_domain_init() references the function __init si_domain_work_fn(). This is often because si_domain_init lacks a __init annotation or the annotation of si_domain_work_fn is wrong. WARNING: drivers/pci/built-in.o(.text+0xe340): Section mismatch in reference from the function iommu_prepare_static_identity_mapping() to the function .init.text:si_domain_init() The function iommu_prepare_static_identity_mapping() references the function __init si_domain_init(). This is often because iommu_prepare_static_identity_mapping lacks a __init annotation or the annotation of si_domain_init is wrong. Signed-off-by: Matt Kraai Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 09606e9aedec..e67335ba24a3 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2038,7 +2038,7 @@ static int __init si_domain_work_fn(unsigned long start_pfn, } -static int si_domain_init(int hw) +static int __init si_domain_init(int hw) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; @@ -2167,7 +2167,7 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) return 1; } -static int iommu_prepare_static_identity_mapping(int hw) +static int __init iommu_prepare_static_identity_mapping(int hw) { struct pci_dev *pdev = NULL; int ret; From 94a91b5051a77d8a71d4f11a3240f0d9c51b6cf2 Mon Sep 17 00:00:00 2001 From: Donald Dutile Date: Thu, 20 Aug 2009 16:51:34 -0400 Subject: [PATCH 13/23] intel-iommu: iommu init error path bug fixes The kcalloc() failure path in iommu_init_domains() calls free_dmar_iommu(), which assumes that ->domains, ->domain_ids, and ->lock have been properly initialized. Add checks in free_[dmar]_iommu to not use ->domains,->domain_ids if not alloced. Move the lock init to prior to the kcalloc()'s, so it is valid in free_context_table() when free_dmar_iommu() invokes it at the end. Patch based on iommu-2.6, commit 132032274a594ee9ffb6b9c9e2e9698149a09ea9 Signed-off-by: Donald Dutile Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e67335ba24a3..beb5ccfd89bd 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1158,6 +1158,8 @@ static int iommu_init_domains(struct intel_iommu *iommu) pr_debug("Number of Domains supportd <%ld>\n", ndomains); nlongs = BITS_TO_LONGS(ndomains); + spin_lock_init(&iommu->lock); + /* TBD: there might be 64K domains, * consider other allocation for future chip */ @@ -1170,12 +1172,9 @@ static int iommu_init_domains(struct intel_iommu *iommu) GFP_KERNEL); if (!iommu->domains) { printk(KERN_ERR "Allocating domain array failed\n"); - kfree(iommu->domain_ids); return -ENOMEM; } - spin_lock_init(&iommu->lock); - /* * if Caching mode is set, then invalid translations are tagged * with domainid 0. Hence we need to pre-allocate it. @@ -1195,22 +1194,24 @@ void free_dmar_iommu(struct intel_iommu *iommu) int i; unsigned long flags; - i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); - for (; i < cap_ndoms(iommu->cap); ) { - domain = iommu->domains[i]; - clear_bit(i, iommu->domain_ids); + if ((iommu->domains) && (iommu->domain_ids)) { + i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); + for (; i < cap_ndoms(iommu->cap); ) { + domain = iommu->domains[i]; + clear_bit(i, iommu->domain_ids); - spin_lock_irqsave(&domain->iommu_lock, flags); - if (--domain->iommu_count == 0) { - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) - vm_domain_exit(domain); - else - domain_exit(domain); + spin_lock_irqsave(&domain->iommu_lock, flags); + if (--domain->iommu_count == 0) { + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) + vm_domain_exit(domain); + else + domain_exit(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags); + + i = find_next_bit(iommu->domain_ids, + cap_ndoms(iommu->cap), i+1); } - spin_unlock_irqrestore(&domain->iommu_lock, flags); - - i = find_next_bit(iommu->domain_ids, - cap_ndoms(iommu->cap), i+1); } if (iommu->gcmd & DMA_GCMD_TE) From 2ff729f5445cc47d1910386c36e53fc6b1c5e47a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 26 Aug 2009 14:25:41 +0100 Subject: [PATCH 14/23] intel-iommu: Cope with yet another BIOS screwup causing crashes Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index beb5ccfd89bd..d36fa80aaa52 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1974,6 +1974,17 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, printk(KERN_INFO "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", pci_name(pdev), start, end); + + if (end >> agaw_to_width(domain->agaw)) { + WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + agaw_to_width(domain->agaw), + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + ret = -EIO; + goto error; + } ret = iommu_domain_identity_map(domain, start, end); if (ret) From 8211a7b5857914058c52ae977c96463e419b37ab Mon Sep 17 00:00:00 2001 From: Troy Heber Date: Wed, 19 Aug 2009 15:26:11 -0600 Subject: [PATCH 15/23] pci/dmar: correct off-by-one error in dmar_fault() DMAR faults are recorded into a ring of "fault recording registers". fault_index is a 0-based index into the ring. The code allows the 0-based fault_index to be equal to the total number of fault registers available from the cap_num_fault_regs() macro, which causes access beyond the last available register. Signed-off-by Troy Heber Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 380b60e677e0..3264b626725a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -1226,7 +1226,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) source_id, guest_addr); fault_index++; - if (fault_index > cap_num_fault_regs(iommu->cap)) + if (fault_index >= cap_num_fault_regs(iommu->cap)) fault_index = 0; spin_lock_irqsave(&iommu->register_lock, flag); } From adb2fe0277607d50f4e9ef06e1d180051a609c25 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 31 Aug 2009 15:24:23 +1000 Subject: [PATCH 16/23] intel-iommu: include linux/dmi.h to use dmi_ routines This file needs to include linux/dmi.h directly rather than relying on it being pulled in from elsewhere. Signed-off-by: Stephen Rothwell Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index d36fa80aaa52..2ec5899207e3 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include "pci.h" From e936d0773df172ec8600777fdd72bbc1f75f22ad Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Mon, 7 Sep 2009 10:58:07 -0400 Subject: [PATCH 17/23] intel-iommu: Disallow interrupt remapping if not all ioapics covered Current kernel enable interrupt remapping only when all the vt-d unit support interrupt remapping. So it is reasonable we should also disallow enabling intr-remapping if there any io-apics that are not listed under vt-d units. Otherwise we can run into issues. Acked-by: Suresh Siddha Signed-off-by: Youquan Song Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 3 --- drivers/pci/intr_remapping.c | 5 +++++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 3264b626725a..fba4f6891680 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -570,9 +570,6 @@ int __init dmar_table_init(void) printk(KERN_INFO PREFIX "No ATSR found\n"); #endif -#ifdef CONFIG_INTR_REMAP - parse_ioapics_under_ir(); -#endif return 0; } diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 4f5b8712931f..ebfa47b79c5b 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -626,6 +626,11 @@ int __init enable_intr_remapping(int eim) struct dmar_drhd_unit *drhd; int setup = 0; + if (parse_ioapics_under_ir() != 1) { + printk(KERN_INFO "Not enable interrupt remapping\n"); + return -1; + } + for_each_drhd_unit(drhd) { struct intel_iommu *iommu = drhd->iommu; From 074835f0143b83845af5044af2739c52c9f53808 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Wed, 9 Sep 2009 12:05:39 -0400 Subject: [PATCH 18/23] intel-iommu: Fix kernel hang if interrupt remapping disabled in BIOS BIOS clear DMAR table INTR_REMAP flag to disable interrupt remapping. Current kernel only check interrupt remapping(IR) flag in DRHD's extended capability register to decide interrupt remapping support or not. But IR flag will not change when BIOS disable/enable interrupt remapping. When user disable interrupt remapping in BIOS or BIOS often defaultly disable interrupt remapping feature when BIOS is not mature.Though BIOS disable interrupt remapping but intr_remapping_supported function will always report to OS support interrupt remapping if VT-d2 chipset populated. On this cases, kernel will continue enable interrupt remapping and result kernel panic. This bug exist on almost all platforms with interrupt remapping support. This patch add DMAR table INTR_REMAP flag check before enable interrupt remapping. Signed-off-by: Youquan Song Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 10 ++++++++++ drivers/pci/intr_remapping.c | 3 +++ include/linux/intel-iommu.h | 2 ++ 3 files changed, 15 insertions(+) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index fba4f6891680..270ed222a075 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -1316,3 +1316,13 @@ int dmar_reenable_qi(struct intel_iommu *iommu) return 0; } + +/* + * Check interrupt remapping support in DMAR table description. + */ +int dmar_ir_support(void) +{ + struct acpi_table_dmar *dmar; + dmar = (struct acpi_table_dmar *)dmar_tbl; + return dmar->flags & 0x1; +} diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index ebfa47b79c5b..ac065144c01c 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -611,6 +611,9 @@ int __init intr_remapping_supported(void) if (disable_intremap) return 0; + if (!dmar_ir_support()) + return 0; + for_each_drhd_unit(drhd) { struct intel_iommu *iommu = drhd->iommu; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 482dc91fd53a..4f0a72a9740c 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -360,4 +360,6 @@ extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep, extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); +extern int dmar_ir_support(void); + #endif From 2ebe31513fcbe7a781f27002f065b50ae195022f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 19 Sep 2009 07:34:04 -0700 Subject: [PATCH 19/23] intel-iommu: Limit DOMAIN_MAX_PFN to fit in an 'unsigned long' This means we're limited to 44-bit addresses on 32-bit kernels, and makes it sane for us to use 'unsigned long' for PFNs throughout. Which is just as well, really, since we already do that. Reported-by: Benjamin LaHaise Tested-by: Benjamin LaHaise Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 2ec5899207e3..c9272a1fb691 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -56,8 +56,14 @@ #define MAX_AGAW_WIDTH 64 -#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) -#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1) +#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1) +#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1) + +/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR + to match. That way, we can use 'unsigned long' for PFNs with impunity. */ +#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \ + __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) +#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) From 59c36286b74ae6a8adebf6e133a83d7f2e3e6704 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 19 Sep 2009 07:36:28 -0700 Subject: [PATCH 20/23] intel-iommu: Fix integer overflow in dma_pte_{clear_range,free_pagetable}() If end_pfn is equal to (unsigned long)-1, then the loop will never end. Seen on 32-bit kernel, but could have happened on 64-bit too once we get hardware that supports 64-bit guest addresses. Change both functions to a 'do {} while' loop with the test at the end, and check for the PFN having wrapper round to zero. Reported-by: Benjamin LaHaise Tested-by: Benjamin LaHaise Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index c9272a1fb691..5493c79dde47 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -785,9 +785,10 @@ static void dma_pte_clear_range(struct dmar_domain *domain, BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(start_pfn > last_pfn); /* we don't need lock here; nobody else touches the iova range */ - while (start_pfn <= last_pfn) { + do { first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); if (!pte) { start_pfn = align_to_level(start_pfn + 1, 2); @@ -801,7 +802,8 @@ static void dma_pte_clear_range(struct dmar_domain *domain, domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); - } + + } while (start_pfn && start_pfn <= last_pfn); } /* free page table pages. last level pte should already be cleared */ @@ -817,6 +819,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(start_pfn > last_pfn); /* We don't need lock here; nobody else touches the iova range */ level = 2; @@ -827,7 +830,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, if (tmp + level_size(level) - 1 > last_pfn) return; - while (tmp + level_size(level) - 1 <= last_pfn) { + do { first_pte = pte = dma_pfn_level_pte(domain, tmp, level); if (!pte) { tmp = align_to_level(tmp + 1, level + 1); @@ -846,7 +849,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); - } + } while (tmp && tmp + level_size(level) - 1 <= last_pfn); level++; } /* free pgd */ From 64de5af000e99f32dd49ff5dd9a0fd7db1f60305 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Wed, 16 Sep 2009 21:05:55 -0400 Subject: [PATCH 21/23] intel-iommu: Fix integer wrap on 32 bit kernels The following 64 bit promotions are necessary to handle memory above the 4GiB boundary correctly. [dwmw2: Fix the second part not to need 64-bit arithmetic at all] Signed-off-by: Benjamin LaHaise Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 5493c79dde47..52026d1797ec 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -735,7 +735,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return NULL; domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); - pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; if (cmpxchg64(&pte->val, 0ULL, pteval)) { /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); @@ -2648,10 +2648,9 @@ static void flush_unmaps(void) unsigned long mask; struct iova *iova = deferred_flush[i].iova[j]; - mask = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT; - mask = ilog2(mask >> VTD_PAGE_SHIFT); + mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1)); iommu_flush_dev_iotlb(deferred_flush[i].domain[j], - iova->pfn_lo << PAGE_SHIFT, mask); + (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); __free_iova(&deferred_flush[i].domain[j]->iovad, iova); } deferred_flush[i].next = 0; From 0c02a20ff7695f9c54cc7c013dda326270ccdac8 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 19 Sep 2009 09:37:23 -0700 Subject: [PATCH 22/23] intel-iommu: Kill DMAR_BROKEN_GFX_WA option. Just make it depend on BROKEN for now, in case people scream really loud about it (and because we might want to keep some of this logic for an upcoming BIOS workaround, so I don't just want to rip it out entirely just yet). But for graphics devices, it really ought to be unnecessary. Signed-off-by: David Woodhouse --- Documentation/Intel-IOMMU.txt | 6 +----- arch/x86/Kconfig | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt index 21bc416d887e..cf9431db8731 100644 --- a/Documentation/Intel-IOMMU.txt +++ b/Documentation/Intel-IOMMU.txt @@ -56,11 +56,7 @@ Graphics Problems? ------------------ If you encounter issues with graphics devices, you can try adding option intel_iommu=igfx_off to turn off the integrated graphics engine. - -If it happens to be a PCI device included in the INCLUDE_ALL Engine, -then try enabling CONFIG_DMAR_GFX_WA to setup a 1-1 map. We hear -graphics drivers may be in process of using DMA api's in the near -future and at that time this option can be yanked out. +If this fixes anything, please ensure you file a bug reporting the problem. Some exceptions to IOVA ----------------------- diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 738bdc6b0f8b..22a0fd132112 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1916,7 +1916,7 @@ config DMAR_DEFAULT_ON config DMAR_BROKEN_GFX_WA def_bool n prompt "Workaround broken graphics drivers (going away soon)" - depends on DMAR + depends on DMAR && BROKEN ---help--- Current Graphics drivers tend to use physical address for DMA and avoid using DMA APIs. Setting this config From b94996c99c8befed9cbbb8804a4625e203913318 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 19 Sep 2009 15:28:12 -0700 Subject: [PATCH 23/23] intel-iommu: Disable PMRs after we enable translation, not before Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 52026d1797ec..601c3278cf2a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2403,11 +2403,12 @@ int __init init_dmars(void) iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - iommu_disable_protect_mem_regions(iommu); ret = iommu_enable_translation(iommu); if (ret) goto error; + + iommu_disable_protect_mem_regions(iommu); } return 0; @@ -3066,8 +3067,8 @@ static int init_iommu_hw(void) DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - iommu_disable_protect_mem_regions(iommu); iommu_enable_translation(iommu); + iommu_disable_protect_mem_regions(iommu); } return 0;