iommu: dma-mapping: add dma mapper for io-pgtable-fast for 32 bit

io-pgtable-fast was implemented to achieve
better performance for IOMMU map/un-map. Add
DMA API support that goes through io-pgtable-fast
for 32 bit targets.

Change-Id: Ib46157cabd1f9b31903837bcc0dcaad87037cdb6
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
[pdaly@codeaurora.org: Included only changes to general iommu files]
Signed-off-by: Patrick Daly <pdaly@codeaurora.org>
diff --git a/drivers/iommu/iommu-debug.c b/drivers/iommu/iommu-debug.c
index 45ffb40..5730126 100644
--- a/drivers/iommu/iommu-debug.c
+++ b/drivers/iommu/iommu-debug.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -822,7 +822,7 @@
 	if (!virt)
 		goto out;
 
-	mapping = arm_iommu_create_mapping(&platform_bus_type, 0, SZ_1G * 4UL);
+	mapping = arm_iommu_create_mapping(&platform_bus_type, 0, SZ_1G * 4ULL);
 	if (!mapping) {
 		seq_puts(s, "fast_smmu_create_mapping failed\n");
 		goto out_kfree;
@@ -922,8 +922,8 @@
 static int __tlb_stress_sweep(struct device *dev, struct seq_file *s)
 {
 	int i, ret = 0;
-	unsigned long iova;
-	const unsigned long max = SZ_1G * 4UL;
+	u64 iova;
+	const u64  max = SZ_1G * 4ULL - 1;
 	void *virt;
 	phys_addr_t phys;
 	dma_addr_t dma_addr;
@@ -995,8 +995,8 @@
 	}
 
 	/* we're all full again. unmap everything. */
-	for (dma_addr = 0; dma_addr < max; dma_addr += SZ_8K)
-		dma_unmap_single(dev, dma_addr, SZ_8K, DMA_TO_DEVICE);
+	for (iova = 0; iova < max; iova += SZ_8K)
+		dma_unmap_single(dev, (dma_addr_t)iova, SZ_8K, DMA_TO_DEVICE);
 
 out:
 	free_pages((unsigned long)virt, get_order(SZ_8K));
@@ -1029,7 +1029,7 @@
 			   const size_t size)
 {
 	u64 iova;
-	const unsigned long max = SZ_1G * 4UL;
+	const u64 max = SZ_1G * 4ULL - 1;
 	int i, remapped, unmapped, ret = 0;
 	void *virt;
 	dma_addr_t dma_addr, dma_addr2;
@@ -1061,9 +1061,9 @@
 	fib_init(&fib);
 	for (iova = get_next_fib(&fib) * size;
 	     iova < max - size;
-	     iova = get_next_fib(&fib) * size) {
-		dma_addr = iova;
-		dma_addr2 = max - size - iova;
+	     iova = (u64)get_next_fib(&fib) * size) {
+		dma_addr = (dma_addr_t)(iova);
+		dma_addr2 = (dma_addr_t)((max + 1) - size - iova);
 		if (dma_addr == dma_addr2) {
 			WARN(1,
 			"%s test needs update! The random number sequence is folding in on itself and should be changed.\n",
@@ -1089,8 +1089,8 @@
 		ret = -EINVAL;
 	}
 
-	for (dma_addr = 0; dma_addr < max; dma_addr += size)
-		dma_unmap_single(dev, dma_addr, size, DMA_TO_DEVICE);
+	for (iova = 0; iova < max; iova += size)
+		dma_unmap_single(dev, (dma_addr_t)iova, size, DMA_TO_DEVICE);
 
 out:
 	free_pages((unsigned long)virt, get_order(size));
@@ -1118,10 +1118,11 @@
 static int __full_va_sweep(struct device *dev, struct seq_file *s,
 			   const size_t size, struct iommu_domain *domain)
 {
-	unsigned long iova;
+	u64 iova;
 	dma_addr_t dma_addr;
 	void *virt;
 	phys_addr_t phys;
+	const u64 max = SZ_1G * 4ULL - 1;
 	int ret = 0, i;
 
 	virt = (void *)__get_free_pages(GFP_KERNEL, get_order(size));
@@ -1136,7 +1137,7 @@
 	}
 	phys = virt_to_phys(virt);
 
-	for (iova = 0, i = 0; iova < SZ_1G * 4UL; iova += size, ++i) {
+	for (iova = 0, i = 0; iova < max; iova += size, ++i) {
 		unsigned long expected = iova;
 
 		dma_addr = dma_map_single(dev, virt, size, DMA_TO_DEVICE);
@@ -1184,8 +1185,8 @@
 	}
 
 out:
-	for (dma_addr = 0; dma_addr < SZ_1G * 4UL; dma_addr += size)
-		dma_unmap_single(dev, dma_addr, size, DMA_TO_DEVICE);
+	for (iova = 0; iova < max; iova += size)
+		dma_unmap_single(dev, (dma_addr_t)iova, size, DMA_TO_DEVICE);
 
 	free_pages((unsigned long)virt, get_order(size));
 	return ret;
@@ -1374,7 +1375,8 @@
 	int ret = -EINVAL, fast = 1;
 	phys_addr_t pt_phys;
 
-	mapping = arm_iommu_create_mapping(&platform_bus_type, 0, SZ_1G * 4UL);
+	mapping = arm_iommu_create_mapping(&platform_bus_type, 0,
+						(SZ_1G * 4ULL));
 	if (!mapping)
 		goto out;
 
@@ -1443,7 +1445,9 @@
 	size_t sizes[] = {SZ_4K, SZ_64K, SZ_2M, SZ_1M * 12, 0};
 	int ret = -EINVAL;
 
-	mapping = arm_iommu_create_mapping(&platform_bus_type, 0, SZ_1G * 4UL);
+	/* Make the size equal to MAX_ULONG */
+	mapping = arm_iommu_create_mapping(&platform_bus_type, 0,
+						(SZ_1G * 4ULL - 1));
 	if (!mapping)
 		goto out;