From 5a5e02a614e59db7536cd11029e6674adc41b191 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 4 Jul 2009 09:35:44 +0100
Subject: [PATCH 1/8] intel-iommu: Fix dma vs. mm page confusion with
 aligned_nrpages()

The aligned_nrpages() function rounds up to the next VM page, but
returns its result as a number of DMA pages.

Purely theoretical except on IA64, which doesn't boot with VT-d right
now anyway.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 53075424a434..ad85e95d2dcc 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2368,15 +2368,15 @@ int __init init_dmars(void)
 	return ret;
 }
 
+/* Returns a number of VTD pages, but aligned to MM page size */
 static inline unsigned long aligned_nrpages(unsigned long host_addr,
 					    size_t size)
 {
 	host_addr &= ~PAGE_MASK;
-	host_addr += size + PAGE_SIZE - 1;
-
-	return host_addr >> VTD_PAGE_SHIFT;
+	return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
 }
 
+/* This takes a number of _MM_ pages, not VTD pages */
 static struct iova *intel_alloc_iova(struct device *dev,
 				     struct dmar_domain *domain,
 				     unsigned long nrpages, uint64_t dma_mask)
@@ -2506,7 +2506,8 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 	iommu = domain_get_iommu(domain);
 	size = aligned_nrpages(paddr, size);
 
-	iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
+	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
+				pdev->dma_mask);
 	if (!iova)
 		goto error;
 
@@ -2797,7 +2798,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
 	for_each_sg(sglist, sg, nelems, i)
 		size += aligned_nrpages(sg->offset, sg->length);
 
-	iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
+	iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
+				pdev->dma_mask);
 	if (!iova) {
 		sglist->dma_length = 0;
 		return 0;

From 1e4c64c46d413de84cc0b786bd6a9b555ba7d111 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 4 Jul 2009 10:40:38 +0100
Subject: [PATCH 2/8] intel-iommu: Don't set identity mapping for bypassed
 graphics devices

We should check iommu_dummy() _first_, because that means it's attached
to an iommu that we've just disabled completely. At the moment, we might
try to put the device into the identity mapping domain.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index ad85e95d2dcc..3e3910127fc1 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2447,8 +2447,11 @@ static int iommu_no_mapping(struct pci_dev *pdev)
 {
 	int found;
 
+	if (iommu_dummy(pdev))
+		return 1;
+
 	if (!iommu_identity_mapping)
-		return iommu_dummy(pdev);
+		return 0;
 
 	found = identity_mapping(pdev);
 	if (found) {
@@ -2480,7 +2483,7 @@ static int iommu_no_mapping(struct pci_dev *pdev)
 		}
 	}
 
-	return iommu_dummy(pdev);
+	return 0;
 }
 
 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,

From 1b7bc0a1618b4de1e6f55c6d95b790f4ab6fcd9e Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 4 Jul 2009 10:49:46 +0100
Subject: [PATCH 3/8] intel-iommu: Fix reattaching of devices to identity
 mapping domain

When we reattach a device to the si_domain (because it's been removed
from a VM), we weren't calling domain_context_mapping() to actually tell
the hardware about that.

We should really put the call to domain_context_mapping() into
domain_add_dev_info() -- we never call the latter without also doing the
former, and we can keep the error paths simple that way. But that's a
cleanup which can wait for 2.6.32 now.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 3e3910127fc1..73a5c71dd37d 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2475,6 +2475,9 @@ static int iommu_no_mapping(struct pci_dev *pdev)
 		if (pdev->dma_mask > DMA_BIT_MASK(32)) {
 			int ret;
 			ret = domain_add_dev_info(si_domain, pdev);
+			if (ret)
+				return 0;
+			ret = domain_context_mapping(si_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
 			if (!ret) {
 				printk(KERN_INFO "64bit %s uses identity mapping\n",
 				       pci_name(pdev));

From 40e4aa34324bff3938a900014254f88943d05e15 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 4 Jul 2009 10:55:41 +0100
Subject: [PATCH 4/8] intel-iommu: Add iommu_should_identity_map() function

We do this twice, and it's about to get more complicated. This makes the
code slightly clearer about what it's doing, too.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 73a5c71dd37d..ae5ccdf8b19f 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2442,6 +2442,11 @@ static int iommu_dummy(struct pci_dev *pdev)
 	return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
 }
 
+static int iommu_should_identity_map(struct pci_dev *pdev)
+{
+	return pdev->dma_mask > DMA_BIT_MASK(32);
+}
+
 /* Check if the pdev needs to go through non-identity map and unmap process.*/
 static int iommu_no_mapping(struct pci_dev *pdev)
 {
@@ -2455,7 +2460,7 @@ static int iommu_no_mapping(struct pci_dev *pdev)
 
 	found = identity_mapping(pdev);
 	if (found) {
-		if (pdev->dma_mask > DMA_BIT_MASK(32))
+		if (iommu_should_identity_map(pdev))
 			return 1;
 		else {
 			/*
@@ -2472,7 +2477,7 @@ static int iommu_no_mapping(struct pci_dev *pdev)
 		 * In case of a detached 64 bit DMA device from vm, the device
 		 * is put into si_domain for identity mapping.
 		 */
-		if (pdev->dma_mask > DMA_BIT_MASK(32)) {
+		if (iommu_should_identity_map(pdev)) {
 			int ret;
 			ret = domain_add_dev_info(si_domain, pdev);
 			if (ret)

From 62edf5dc4a524e4cb525e6020b955a1ad593d9ba Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 4 Jul 2009 10:59:46 +0100
Subject: [PATCH 5/8] intel-iommu: Restore DMAR_BROKEN_GFX_WA option for broken
 graphics drivers

We need to give people a little more time to fix the broken drivers.
Re-introduce this, but tied in properly with the 'iommu=pt' support this
time. Change the config option name and make it default to 'no' too.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/x86/Kconfig          | 12 ++++++++++++
 drivers/pci/intel-iommu.c | 29 +++++++++++++++++++----------
 2 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c07f72205909..738bdc6b0f8b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1913,6 +1913,18 @@ config DMAR_DEFAULT_ON
 	  recommended you say N here while the DMAR code remains
 	  experimental.
 
+config DMAR_BROKEN_GFX_WA
+	def_bool n
+	prompt "Workaround broken graphics drivers (going away soon)"
+	depends on DMAR
+	---help---
+	  Current Graphics drivers tend to use physical address
+	  for DMA and avoid using DMA APIs. Setting this config
+	  option permits the IOMMU driver to set a unity map for
+	  all the OS-visible memory. Hence the driver can continue
+	  to use physical addresses for DMA, at least until this
+	  option is removed in the 2.6.32 kernel.
+
 config DMAR_FLOPPY_WA
 	def_bool y
 	depends on DMAR
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index ae5ccdf8b19f..5ee8305257ea 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2127,16 +2127,18 @@ static int iommu_prepare_static_identity_mapping(void)
 		return -EFAULT;
 
 	for_each_pci_dev(pdev) {
-		printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
-		       pci_name(pdev));
+		if (iommu_identity_mapping == 1 || IS_GFX_DEVICE(pdev)) {
+			printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
+			       pci_name(pdev));
 
-		ret = domain_context_mapping(si_domain, pdev,
-					     CONTEXT_TT_MULTI_LEVEL);
-		if (ret)
-			return ret;
-		ret = domain_add_dev_info(si_domain, pdev);
-		if (ret)
-			return ret;
+			ret = domain_context_mapping(si_domain, pdev,
+						     CONTEXT_TT_MULTI_LEVEL);
+			if (ret)
+				return ret;
+			ret = domain_add_dev_info(si_domain, pdev);
+			if (ret)
+				return ret;
+		}
 	}
 
 	return 0;
@@ -2291,6 +2293,10 @@ int __init init_dmars(void)
 	 * identity mapping if iommu_identity_mapping is set.
 	 */
 	if (!iommu_pass_through) {
+#ifdef CONFIG_DMAR_BROKEN_GFX_WA
+		if (!iommu_identity_mapping)
+			iommu_identity_mapping = 2;
+#endif
 		if (iommu_identity_mapping)
 			iommu_prepare_static_identity_mapping();
 		/*
@@ -2444,7 +2450,10 @@ static int iommu_dummy(struct pci_dev *pdev)
 
 static int iommu_should_identity_map(struct pci_dev *pdev)
 {
-	return pdev->dma_mask > DMA_BIT_MASK(32);
+	if (iommu_identity_mapping == 2)
+		return IS_GFX_DEVICE(pdev);
+	else
+		return pdev->dma_mask > DMA_BIT_MASK(32);
 }
 
 /* Check if the pdev needs to go through non-identity map and unmap process.*/

From 736768325efcbee7b0861d62670d01a54c2d158b Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 4 Jul 2009 14:08:36 +0100
Subject: [PATCH 6/8] intel-iommu: No mapping for non-PCI devices

This should fix kernel.org bug #11821, where the dcdbas driver makes up
a platform device and then uses dma_alloc_coherent() on it, in an
attempt to get memory < 4GiB.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 5ee8305257ea..fa052dd89913 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2457,10 +2457,15 @@ static int iommu_should_identity_map(struct pci_dev *pdev)
 }
 
 /* Check if the pdev needs to go through non-identity map and unmap process.*/
-static int iommu_no_mapping(struct pci_dev *pdev)
+static int iommu_no_mapping(struct device *dev)
 {
+	struct pci_dev *pdev;
 	int found;
 
+	if (unlikely(dev->bus != &pci_bus_type))
+		return 1;
+
+	pdev = to_pci_dev(dev);
 	if (iommu_dummy(pdev))
 		return 1;
 
@@ -2516,7 +2521,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
 
 	BUG_ON(dir == DMA_NONE);
 
-	if (iommu_no_mapping(pdev))
+	if (iommu_no_mapping(hwdev))
 		return paddr;
 
 	domain = get_valid_domain_for_dev(pdev);
@@ -2656,7 +2661,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
 	struct iova *iova;
 	struct intel_iommu *iommu;
 
-	if (iommu_no_mapping(pdev))
+	if (iommu_no_mapping(dev))
 		return;
 
 	domain = find_domain(pdev);
@@ -2747,7 +2752,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
 	struct iova *iova;
 	struct intel_iommu *iommu;
 
-	if (iommu_no_mapping(pdev))
+	if (iommu_no_mapping(hwdev))
 		return;
 
 	domain = find_domain(pdev);
@@ -2806,7 +2811,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne
 	struct intel_iommu *iommu;
 
 	BUG_ON(dir == DMA_NONE);
-	if (iommu_no_mapping(pdev))
+	if (iommu_no_mapping(hwdev))
 		return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
 
 	domain = get_valid_domain_for_dev(pdev);

From 6941af2810c6fc970b88f7c0d52ba4e286acbee5 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 4 Jul 2009 18:24:27 +0100
Subject: [PATCH 7/8] intel-iommu: Use iommu_should_identity_map() at startup
 time too.

At boot time, the dma_mask won't have been set on any devices, so we
assume that all devices will be 64-bit capable (and thus get a 1:1 map).

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index fa052dd89913..f9fc4f3bfa3f 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2117,6 +2117,17 @@ static int domain_add_dev_info(struct dmar_domain *domain,
 	return 0;
 }
 
+static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
+{
+	if (iommu_identity_mapping == 2)
+		return IS_GFX_DEVICE(pdev);
+
+	if (!startup)
+		return pdev->dma_mask > DMA_BIT_MASK(32);
+
+	return 1;
+}
+
 static int iommu_prepare_static_identity_mapping(void)
 {
 	struct pci_dev *pdev = NULL;
@@ -2127,7 +2138,7 @@ static int iommu_prepare_static_identity_mapping(void)
 		return -EFAULT;
 
 	for_each_pci_dev(pdev) {
-		if (iommu_identity_mapping == 1 || IS_GFX_DEVICE(pdev)) {
+		if (iommu_should_identity_map(pdev, 1)) {
 			printk(KERN_INFO "IOMMU: identity mapping for device %s\n",
 			       pci_name(pdev));
 
@@ -2448,14 +2459,6 @@ static int iommu_dummy(struct pci_dev *pdev)
 	return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
 }
 
-static int iommu_should_identity_map(struct pci_dev *pdev)
-{
-	if (iommu_identity_mapping == 2)
-		return IS_GFX_DEVICE(pdev);
-	else
-		return pdev->dma_mask > DMA_BIT_MASK(32);
-}
-
 /* Check if the pdev needs to go through non-identity map and unmap process.*/
 static int iommu_no_mapping(struct device *dev)
 {
@@ -2474,7 +2477,7 @@ static int iommu_no_mapping(struct device *dev)
 
 	found = identity_mapping(pdev);
 	if (found) {
-		if (iommu_should_identity_map(pdev))
+		if (iommu_should_identity_map(pdev, 0))
 			return 1;
 		else {
 			/*
@@ -2491,7 +2494,7 @@ static int iommu_no_mapping(struct device *dev)
 		 * In case of a detached 64 bit DMA device from vm, the device
 		 * is put into si_domain for identity mapping.
 		 */
-		if (iommu_should_identity_map(pdev)) {
+		if (iommu_should_identity_map(pdev, 0)) {
 			int ret;
 			ret = domain_add_dev_info(si_domain, pdev);
 			if (ret)

From 3dfc813d94bba2046c6aed216e0fd69ac93a8e03 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sat, 4 Jul 2009 19:11:08 +0100
Subject: [PATCH 8/8] intel-iommu: Don't use identity mapping for PCI devices
 behind bridges

Our current strategy for pass-through mode is to put all devices into
the 1:1 domain at startup (which is before we know what their dma_mask
will be), and only _later_ take them out of that domain, if it turns out
that they really can't address all of memory.

However, when there are a bunch of PCI devices behind a bridge, they all
end up with the same source-id on their DMA transactions, and hence in
the same IOMMU domain. This means that we _can't_ easily move them from
the 1:1 domain into their own domain at runtime, because there might be DMA
in-flight from their siblings.

So we have to adjust our pass-through strategy: For PCI devices not on
the root bus, and for the bridges which will take responsibility for
their transactions, we have to start up _out_ of the 1:1 domain, just in
case.

This fixes the BUG() we see when we have 32-bit-capable devices behind a
PCI-PCI bridge, and use the software identity mapping.

It does mean that we might end up using 'normal' mapping mode for some
devices which could actually live with the faster 1:1 mapping -- but
this is only for PCI devices behind bridges, which presumably aren't the
devices for which people are most concerned about performance.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index f9fc4f3bfa3f..360fb67a30d7 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -2122,6 +2122,36 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
 	if (iommu_identity_mapping == 2)
 		return IS_GFX_DEVICE(pdev);
 
+	/*
+	 * We want to start off with all devices in the 1:1 domain, and
+	 * take them out later if we find they can't access all of memory.
+	 *
+	 * However, we can't do this for PCI devices behind bridges,
+	 * because all PCI devices behind the same bridge will end up
+	 * with the same source-id on their transactions.
+	 *
+	 * Practically speaking, we can't change things around for these
+	 * devices at run-time, because we can't be sure there'll be no
+	 * DMA transactions in flight for any of their siblings.
+	 * 
+	 * So PCI devices (unless they're on the root bus) as well as
+	 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
+	 * the 1:1 domain, just in _case_ one of their siblings turns out
+	 * not to be able to map all of memory.
+	 */
+	if (!pdev->is_pcie) {
+		if (!pci_is_root_bus(pdev->bus))
+			return 0;
+		if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
+			return 0;
+	} else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
+		return 0;
+
+	/* 
+	 * At boot time, we don't yet know if devices will be 64-bit capable.
+	 * Assume that they will -- if they turn out not to be, then we can 
+	 * take them out of the 1:1 domain later.
+	 */
 	if (!startup)
 		return pdev->dma_mask > DMA_BIT_MASK(32);