From 836bfa0d2930b7ec236aa488f01678c92eb5f0de Mon Sep 17 00:00:00 2001
From: YoungJun Cho <yj44.cho@samsung.com>
Date: Mon, 17 Jun 2013 13:18:52 +0900
Subject: [PATCH 1/5] ARM: dma-mapping: Get pages if the cpu_addr is out of
 atomic_pool

In __iommu_get_pages(), the cpu_addr is checked wheather in
atomic_pool range or not. So if the cpu_addr is in atomic_pool
range, it does not need to check twice.

Signed-off-by: YoungJun Cho <yj44.cho@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/mm/dma-mapping.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index ef3e0f3aac96..1d158c223ebb 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1372,19 +1372,20 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 			  dma_addr_t handle, struct dma_attrs *attrs)
 {
-	struct page **pages = __iommu_get_pages(cpu_addr, attrs);
+	struct page **pages;
 	size = PAGE_ALIGN(size);
 
-	if (!pages) {
-		WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
-		return;
-	}
-
 	if (__in_atomic_pool(cpu_addr, size)) {
 		__iommu_free_atomic(dev, cpu_addr, handle, size);
 		return;
 	}
 
+	pages = __iommu_get_pages(cpu_addr, attrs);
+	if (!pages) {
+		WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+		return;
+	}
+
 	if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs)) {
 		unmap_kernel_range((unsigned long)cpu_addr, size);
 		vunmap(cpu_addr);

From 13987d68bcf476a93fa864ea4e58755f1bc4bd30 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 10 Jun 2013 19:34:39 +0100
Subject: [PATCH 2/5] ARM: dma-mapping: convert DMA direction into IOMMU
 protection attributes

IOMMU mappings take a prot parameter, identifying the protection bits
to enforce on the newly created mapping (READ or WRITE). The ARM
dma-mapping framework currently just passes 0 as the prot argument,
resulting in faulting mappings.

This patch infers the protection attributes based on the direction of
the DMA transfer.

Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/mm/dma-mapping.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1d158c223ebb..282aacd6f570 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1637,13 +1637,27 @@ static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *p
 {
 	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
 	dma_addr_t dma_addr;
-	int ret, len = PAGE_ALIGN(size + offset);
+	int ret, prot, len = PAGE_ALIGN(size + offset);
 
 	dma_addr = __alloc_iova(mapping, len);
 	if (dma_addr == DMA_ERROR_CODE)
 		return dma_addr;
 
-	ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0);
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+		prot = IOMMU_READ | IOMMU_WRITE;
+		break;
+	case DMA_TO_DEVICE:
+		prot = IOMMU_READ;
+		break;
+	case DMA_FROM_DEVICE:
+		prot = IOMMU_WRITE;
+		break;
+	default:
+		prot = 0;
+	}
+
+	ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot);
 	if (ret < 0)
 		goto fail;
 

From 9e4b259d4fbf8a9c822117e734356e94b7f30927 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 10 Jun 2013 19:34:40 +0100
Subject: [PATCH 3/5] ARM: dma-mapping: NULLify dev->archdata.mapping pointer
 on detach

The current code only clobbers a local variable, so the device is left
with a stale mapping pointer.

Cc: Hiroshi Doyu <hdoyu@nvidia.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Hiroshi Doyu <hdoyu@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/mm/dma-mapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 282aacd6f570..26a5833c9f29 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1922,7 +1922,7 @@ void arm_iommu_detach_device(struct device *dev)
 
 	iommu_detach_device(mapping->domain, dev);
 	kref_put(&mapping->kref, release_iommu_mapping);
-	mapping = NULL;
+	dev->archdata.mapping = NULL;
 	set_dma_ops(dev, NULL);
 
 	pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));

From 63c181922f25371b7d24ef63d4e15887ff23a088 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 15 May 2013 12:09:49 +0800
Subject: [PATCH 4/5] ARM: DMA-mapping: mark all !DMA_TO_DEVICE pages in
 unmapping as clean

It is common for one sg to include many pages, so mark all these
pages as clean to avoid unnecessary flushing on them in
set_pte_at() or update_mmu_cache().

The patch might improve loading performance of applciation code a bit.

On the below test code to read file(~1GByte size) from usb mass storage
disk to buffer created with mmap(PROT_READ | PROT_EXEC) on
Pandaboard, average ~1% improvement can be observed with the patch on
10 times test.

unsigned int sum = 0;

static unsigned long tv_diff(struct timeval *tv1, struct timeval *tv2)
{
	return (tv2->tv_sec - tv1->tv_sec) * 1000000 +
		(tv2->tv_usec - tv1->tv_usec);
}

int main(int argc, char *argv[])
{
	char *mbuffer;
	int fd;
	int i;
	unsigned long page_size, size;
	struct stat stat;
	struct timeval t1, t2;

	page_size = getpagesize();
	fd = open(argv[1], O_RDONLY);
	assert(fd >= 0);

	fstat(fd, &stat);
	size = stat.st_size;
	printf("%s: file %s, file size %lu, page size %lu\n", argv[0],
		read_filename, size, page_size);

	gettimeofday(&t1, NULL);
	mbuffer = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
	for (i = 0 ; i < size ; i += page_size)
		sum += mbuffer[i];
	munmap(mbuffer, page_size);
	gettimeofday(&t2, NULL);
	printf("\tread mmaped time: %luus\n", tv_diff(&t1, &t2));

	close(fd);
}

Acked-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/mm/dma-mapping.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 26a5833c9f29..97926b1277b2 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -880,10 +880,24 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
 	dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
 
 	/*
-	 * Mark the D-cache clean for this page to avoid extra flushing.
+	 * Mark the D-cache clean for these pages to avoid extra flushing.
 	 */
-	if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
-		set_bit(PG_dcache_clean, &page->flags);
+	if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
+		unsigned long pfn;
+		size_t left = size;
+
+		pfn = page_to_pfn(page) + off / PAGE_SIZE;
+		off %= PAGE_SIZE;
+		if (off) {
+			pfn++;
+			left -= PAGE_SIZE - off;
+		}
+		while (left >= PAGE_SIZE) {
+			page = pfn_to_page(pfn++);
+			set_bit(PG_dcache_clean, &page->flags);
+			left -= PAGE_SIZE;
+		}
+	}
 }
 
 /**

From 5b91a98c61abe914e6a80d7dc15e435c47ea0004 Mon Sep 17 00:00:00 2001
From: Richard Zhao <rizhao@nvidia.com>
Date: Thu, 20 Jun 2013 20:31:00 +0800
Subject: [PATCH 5/5] ARM: dma: Drop __GFP_COMP for iommu dma memory
 allocations

__iommu_alloc_buffer wants to split pages after allocation in order to
reduce the memory footprint. This does not work well with __GFP_COMP
pages, so drop this flag before allocation

One failure example is snd_malloc_dev_pages call dma_alloc_coherent with
__GFP_COMP.

Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/mm/dma-mapping.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 97926b1277b2..22c7d7eb35a0 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1328,6 +1328,15 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
 	if (gfp & GFP_ATOMIC)
 		return __iommu_alloc_atomic(dev, size, handle);
 
+	/*
+	 * Following is a work-around (a.k.a. hack) to prevent pages
+	 * with __GFP_COMP being passed to split_page() which cannot
+	 * handle them.  The real problem is that this flag probably
+	 * should be 0 on ARM as it is not supported on this
+	 * platform; see CONFIG_HUGETLBFS.
+	 */
+	gfp &= ~(__GFP_COMP);
+
 	pages = __iommu_alloc_buffer(dev, size, gfp, attrs);
 	if (!pages)
 		return NULL;