gpu: ion: Map everything into IOMMU with 64K pages.

Due to performance issues with 4K pages and the IOMMU we should use
64K pages. However, minimum allocation from ION is 4K so instead of
mapping on demand we need to map the full heap into the IOMMU when
the first request for mapping comes. Only unmap everything from the
IOMMU when the last buffer is freed.

CRs-fixed: 348606
Change-Id: Ic1793f5caaff2f69bf1fb7e8c8b3bd03378131b8
Signed-off-by: Olav Haugan <ohaugan@codeaurora.org>
diff --git a/drivers/gpu/ion/ion_carveout_heap.c b/drivers/gpu/ion/ion_carveout_heap.c
index c554e56..ca2380b 100644
--- a/drivers/gpu/ion/ion_carveout_heap.c
+++ b/drivers/gpu/ion/ion_carveout_heap.c
@@ -321,7 +321,8 @@
 
 	if (extra) {
 		unsigned long extra_iova_addr = data->iova_addr + buffer->size;
-		ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, prot);
+		ret = msm_iommu_map_extra(domain, extra_iova_addr, extra,
+					  SZ_4K, prot);
 		if (ret)
 			goto out2;
 	}
diff --git a/drivers/gpu/ion/ion_cp_heap.c b/drivers/gpu/ion/ion_cp_heap.c
index 4c10c64..d889c4d 100644
--- a/drivers/gpu/ion/ion_cp_heap.c
+++ b/drivers/gpu/ion/ion_cp_heap.c
@@ -60,8 +60,12 @@
  *			kernel space (un-cached).
  * @umap_count:	the total number of times this heap has been mapped in
  *		user space.
+ * @iommu_iova: saved iova when mapping full heap at once.
+ * @iommu_partition: partition used to map full heap.
  * @reusable: indicates if the memory should be reused via fmem.
  * @reserved_vrange: reserved virtual address range for use with fmem
+ * @iommu_map_all:	Indicates whether we should map whole heap into IOMMU.
+ * @iommu_2x_map_domain: Indicates the domain to use for overmapping.
  */
 struct ion_cp_heap {
 	struct ion_heap heap;
@@ -80,8 +84,13 @@
 	unsigned long kmap_cached_count;
 	unsigned long kmap_uncached_count;
 	unsigned long umap_count;
+	unsigned long iommu_iova[MAX_DOMAINS];
+	unsigned long iommu_partition[MAX_DOMAINS];
 	int reusable;
 	void *reserved_vrange;
+	int iommu_map_all;
+	int iommu_2x_map_domain;
+
 };
 
 enum {
@@ -247,6 +256,30 @@
 	return offset;
 }
 
+static void iommu_unmap_all(unsigned long domain_num,
+			    struct ion_cp_heap *cp_heap)
+{
+	unsigned long left_to_unmap = cp_heap->total_size;
+	unsigned long order = get_order(SZ_64K);
+	unsigned long page_size = SZ_64K;
+
+	struct iommu_domain *domain = msm_get_iommu_domain(domain_num);
+	if (domain) {
+		unsigned long temp_iova = cp_heap->iommu_iova[domain_num];
+
+		while (left_to_unmap) {
+			iommu_unmap(domain, temp_iova, order);
+			temp_iova += page_size;
+			left_to_unmap -= page_size;
+		}
+		if (domain_num == cp_heap->iommu_2x_map_domain)
+			msm_iommu_unmap_extra(domain, temp_iova,
+					      cp_heap->total_size, SZ_64K);
+	} else {
+		pr_err("Unable to get IOMMU domain %lu\n", domain_num);
+	}
+}
+
 void ion_cp_free(struct ion_heap *heap, ion_phys_addr_t addr,
 		       unsigned long size)
 {
@@ -265,6 +298,26 @@
 			pr_err("%s: unable to transition heap to T-state\n",
 				__func__);
 	}
+
+	/* Unmap everything if we previously mapped the whole heap at once. */
+	if (!cp_heap->allocated_bytes) {
+		unsigned int i;
+		for (i = 0; i < MAX_DOMAINS; ++i) {
+			if (cp_heap->iommu_iova[i]) {
+				unsigned long vaddr_len = cp_heap->total_size;
+
+				if (i == cp_heap->iommu_2x_map_domain)
+					vaddr_len <<= 1;
+				iommu_unmap_all(i, cp_heap);
+
+				msm_free_iova_address(cp_heap->iommu_iova[i], i,
+						cp_heap->iommu_partition[i],
+						vaddr_len);
+			}
+			cp_heap->iommu_iova[i] = 0;
+			cp_heap->iommu_partition[i] = 0;
+		}
+	}
 	mutex_unlock(&cp_heap->lock);
 }
 
@@ -566,6 +619,75 @@
 	return ret_value;
 }
 
+static int iommu_map_all(unsigned long domain_num, struct ion_cp_heap *cp_heap,
+			int partition, unsigned long prot, unsigned long align)
+{
+	unsigned long left_to_map = cp_heap->total_size;
+	unsigned long order = get_order(SZ_64K);
+	unsigned long page_size = SZ_64K;
+	int ret_value = 0;
+	unsigned long virt_addr_len = cp_heap->total_size;
+	struct iommu_domain *domain = msm_get_iommu_domain(domain_num);
+
+	/* If we are mapping into the video domain we need to map twice the
+	 * size of the heap to account for prefetch issue in video core.
+	 */
+	if (domain_num == cp_heap->iommu_2x_map_domain)
+		virt_addr_len <<= 1;
+
+	if (cp_heap->total_size & (SZ_64K-1)) {
+		pr_err("Heap size is not aligned to 64K, cannot map into IOMMU\n");
+		ret_value = -EINVAL;
+	}
+	if (cp_heap->base & (SZ_64K-1)) {
+		pr_err("Heap physical address is not aligned to 64K, cannot map into IOMMU\n");
+		ret_value = -EINVAL;
+	}
+	if (!ret_value && domain) {
+		unsigned long temp_phys = cp_heap->base;
+		unsigned long temp_iova =
+				msm_allocate_iova_address(domain_num, partition,
+						virt_addr_len, SZ_64K);
+		if (!temp_iova) {
+			pr_err("%s: could not allocate iova from domain %lu, partition %d\n",
+				__func__, domain_num, partition);
+			ret_value = -ENOMEM;
+			goto out;
+		}
+		cp_heap->iommu_iova[domain_num] = temp_iova;
+
+		while (left_to_map) {
+			int ret = iommu_map(domain, temp_iova, temp_phys,
+					    order, prot);
+			if (ret) {
+				pr_err("%s: could not map %lx in domain %p, error: %d\n",
+					__func__, temp_iova, domain, ret);
+				ret_value = -EAGAIN;
+				goto free_iova;
+			}
+			temp_iova += page_size;
+			temp_phys += page_size;
+			left_to_map -= page_size;
+		}
+		if (domain_num == cp_heap->iommu_2x_map_domain)
+			ret_value = msm_iommu_map_extra(domain, temp_iova,
+							cp_heap->total_size,
+							SZ_64K, prot);
+		if (ret_value)
+			goto free_iova;
+	} else {
+		pr_err("Unable to get IOMMU domain %lu\n", domain_num);
+		ret_value = -ENOMEM;
+	}
+	goto out;
+
+free_iova:
+	msm_free_iova_address(cp_heap->iommu_iova[domain_num], domain_num,
+			      partition, virt_addr_len);
+out:
+	return ret_value;
+}
+
 static int ion_cp_heap_map_iommu(struct ion_buffer *buffer,
 				struct ion_iommu_map *data,
 				unsigned int domain_num,
@@ -578,6 +700,8 @@
 	int ret = 0;
 	unsigned long extra;
 	struct scatterlist *sglist = 0;
+	struct ion_cp_heap *cp_heap =
+		container_of(buffer->heap, struct ion_cp_heap, heap);
 	int prot = IOMMU_WRITE | IOMMU_READ;
 	prot |= ION_IS_CACHED(flags) ? IOMMU_CACHE : 0;
 
@@ -588,6 +712,33 @@
 		return 0;
 	}
 
+	if (cp_heap->iommu_iova[domain_num]) {
+		/* Already mapped. */
+		unsigned long offset = buffer->priv_phys - cp_heap->base;
+		data->iova_addr = cp_heap->iommu_iova[domain_num] + offset;
+		return 0;
+	} else if (cp_heap->iommu_map_all) {
+		ret = iommu_map_all(domain_num, cp_heap, partition_num,
+				    align, prot);
+		if (!ret) {
+			unsigned long offset =
+					buffer->priv_phys - cp_heap->base;
+			data->iova_addr =
+				cp_heap->iommu_iova[domain_num] + offset;
+			cp_heap->iommu_partition[domain_num] = partition_num;
+			/*
+			clear delayed map flag so that we don't interfere
+			with this feature (we are already delaying).
+			*/
+			data->flags &= ~ION_IOMMU_UNMAP_DELAYED;
+			return 0;
+		} else {
+			cp_heap->iommu_iova[domain_num] = 0;
+			cp_heap->iommu_partition[domain_num] = 0;
+			return ret;
+		}
+	}
+
 	extra = iova_length - buffer->size;
 
 	data->iova_addr = msm_allocate_iova_address(domain_num, partition_num,
@@ -620,7 +771,8 @@
 
 	if (extra) {
 		unsigned long extra_iova_addr = data->iova_addr + buffer->size;
-		ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, prot);
+		ret = msm_iommu_map_extra(domain, extra_iova_addr, extra,
+					  SZ_4K, prot);
 		if (ret)
 			goto out2;
 	}
@@ -643,11 +795,20 @@
 	unsigned int domain_num;
 	unsigned int partition_num;
 	struct iommu_domain *domain;
+	struct ion_cp_heap *cp_heap =
+		container_of(data->buffer->heap, struct ion_cp_heap, heap);
 
 	if (!msm_use_iommu())
 		return;
 
+
 	domain_num = iommu_map_domain(data);
+
+	/* If we are mapping everything we'll wait to unmap until everything
+	   is freed. */
+	if (cp_heap->iommu_iova[domain_num])
+		return;
+
 	partition_num = iommu_map_partition(data);
 
 	domain = msm_get_iommu_domain(domain_num);
@@ -728,7 +889,13 @@
 			cp_heap->request_region = extra_data->request_region;
 		if (extra_data->release_region)
 			cp_heap->release_region = extra_data->release_region;
+		cp_heap->iommu_map_all =
+				extra_data->iommu_map_all;
+		cp_heap->iommu_2x_map_domain =
+				extra_data->iommu_2x_map_domain;
+
 	}
+
 	return &cp_heap->heap;
 
 destroy_pool:
diff --git a/drivers/gpu/ion/ion_iommu_heap.c b/drivers/gpu/ion/ion_iommu_heap.c
index 6951603..312ca42 100644
--- a/drivers/gpu/ion/ion_iommu_heap.c
+++ b/drivers/gpu/ion/ion_iommu_heap.c
@@ -215,7 +215,8 @@
 
 	if (extra) {
 		unsigned long extra_iova_addr = data->iova_addr + buffer->size;
-		ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, prot);
+		ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, SZ_4K,
+					  prot);
 		if (ret)
 			goto out2;
 	}
diff --git a/drivers/gpu/ion/ion_system_heap.c b/drivers/gpu/ion/ion_system_heap.c
index 7e859ce..ed9ae27 100644
--- a/drivers/gpu/ion/ion_system_heap.c
+++ b/drivers/gpu/ion/ion_system_heap.c
@@ -282,7 +282,8 @@
 
 	extra_iova_addr = data->iova_addr + buffer->size;
 	if (extra) {
-		ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, prot);
+		ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, SZ_4K,
+					  prot);
 		if (ret)
 			goto out2;
 	}
@@ -490,7 +491,8 @@
 
 	if (extra) {
 		unsigned long extra_iova_addr = data->iova_addr + buffer->size;
-		ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, prot);
+		ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, SZ_4K,
+					  prot);
 		if (ret)
 			goto out2;
 	}
diff --git a/drivers/staging/qcache/fmem.c b/drivers/staging/qcache/fmem.c
index 7a95880..1f78799 100644
--- a/drivers/staging/qcache/fmem.c
+++ b/drivers/staging/qcache/fmem.c
@@ -62,7 +62,7 @@
 
 	if (!pdata->phys)
 		pdata->phys = allocate_contiguous_ebi_nomap(pdata->size,
-			PAGE_SIZE);
+			pdata->align);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 	fmem_section_start = pdata->phys >> PA_SECTION_SHIFT;