iommu: arm-smmu: Move most memory allocations to GFP_KERNEL

Order 0 allocation failures were being seen rarely during camera
usecases, which were estimated to require 800Mb of memory. This
corresponds to ~1.6 Mb of memory allocated via GFP_ATOMIC.

page allocation failure: order:0, mode:0x2088020(GFP_ATOMIC|__GFP_ZERO)
warn_alloc+0x114/0x134
__alloc_pages_nodemask+0x3e8/0xd30
alloc_pages_exact+0x4c/0xa4
arm_smmu_alloc_pages_exact+0x128/0x15c
io_pgtable_alloc_pages_exact+0x30/0xa0
__arm_lpae_alloc_pages+0x40/0x1c8
__arm_lpae_map+0x224/0x3b4
__arm_lpae_map+0x108/0x3b4
arm_lpae_map_sg+0x1f8/0x314
arm_smmu_map_sg+0x108/0x204
iommu_map_sg+0x30/0xfc
arm_iommu_map_sg+0x1e0/0x3d0
msm_dma_map_sg_attrs+0x2ec/0x660
cam_smmu_map_buffer_validate+0xc4/0x698
cam_smmu_map_user_iova+0x154/0x304
cam_mem_util_map_hw_va+0xf8/0x1f8
cam_mem_mgr_map+0xa8/0x350
cam_private_ioctl+0x494/0x6f0
__video_do_ioctl+0xb8/0x2bc
video_usercopy+0x28c/0x658
video_ioctl2+0x18/0x28

Node 0 active_anon:521428kB inactive_anon:385400kB active_file:499900kB
inactive_file:459444kB unevictable:9532kB isolated(anon):0kB
isolated(file):128kB mapped:648252kB dirty:0kB writeback:0kB shmem:2544kB
writeback_tmp:0kB unstable:0kB all_unreclaimable? no

DMA free:12332kB min:2752kB low:24992kB high:26276kB active_anon:279808kB
inactive_anon:118280kB active_file:148280kB inactive_file:169896kB
unevictable:0kB writepending:0kB present:1729188kB managed:1286960kB
mlocked:0kB slab_reclaimable:14940kB slab_unreclaimable:57784kB
kernel_stack:20752kB pagetables:36284kB bounce:0kB free_pcp:4972kB
local_pcp:620kB free_cma:432kB lowmem_reserve[]: 0 1906 1906

Normal free:7460kB min:4260kB low:38640kB high:40628kB
active_anon:242612kB inactive_anon:266492kB active_file:351456kB
inactive_file:290540kB unevictable:9568kB writepending:0kB
present:2067200kB managed:1989364kB mlocked:128kB slab_reclaimable:46708kB
slab_unreclaimable:116340kB kernel_stack:23520kB pagetables:36948kB
bounce:0kB free_pcp:4580kB local_pcp:80kB free_cma:0kB lowmem_reserve[]: 0
0 0

DMA: 603*4kB (UMCH) 469*8kB (UMCH) 258*16kB (UCH) 69*32kB (CH) 0*64kB
0*128kB 0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 12500kB

Normal: 320*4kB (H) 426*8kB (H) 142*16kB (H) 19*32kB (H) 0*64kB 0*128kB
0*256kB 0*512kB 0*1024kB 0*2048kB 0*4096kB = 7568kB

For iommu domains which allow blocking, estimate the amount of pagetable
memory required by a particular operation, and preallocate it with the
GFP_KERNEL flag. The behavior for iommu domains which cannot block remains
the same, and they may encounter the same error as above.

Change-Id: I16804ef0876ab6e7acb76560f7398baacd3a30a8
Signed-off-by: Patrick Daly <pdaly@codeaurora.org>
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 85df514..b6979ed 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -535,6 +535,8 @@
 	struct list_head		unassign_list;
 	struct mutex			assign_lock;
 	struct list_head		secure_pool_list;
+	/* nonsecure pool protected by pgtbl_lock */
+	struct list_head		nonsecure_pool;
 	struct iommu_domain		domain;
 
 	bool				qsmmuv500_errata1_init;
@@ -1313,8 +1315,19 @@
 	void *page;
 	struct arm_smmu_domain *smmu_domain = cookie;
 
-	if (!arm_smmu_is_master_side_secure(smmu_domain))
+	if (!arm_smmu_is_master_side_secure(smmu_domain)) {
+		struct page *pg;
+		/* size is expected to be 4K with current configuration */
+		if (size == PAGE_SIZE) {
+			pg = list_first_entry_or_null(
+				&smmu_domain->nonsecure_pool, struct page, lru);
+			if (pg) {
+				list_del_init(&pg->lru);
+				return page_address(pg);
+			}
+		}
 		return alloc_pages_exact(size, gfp_mask);
+	}
 
 	page = arm_smmu_secure_pool_remove(smmu_domain, size);
 	if (page)
@@ -2080,6 +2093,7 @@
 	INIT_LIST_HEAD(&smmu_domain->unassign_list);
 	mutex_init(&smmu_domain->assign_lock);
 	INIT_LIST_HEAD(&smmu_domain->secure_pool_list);
+	INIT_LIST_HEAD(&smmu_domain->nonsecure_pool);
 	arm_smmu_domain_reinit(smmu_domain);
 
 	return &smmu_domain->domain;
@@ -2432,6 +2446,50 @@
 	return 0;
 }
 
+static void arm_smmu_prealloc_memory(struct arm_smmu_domain *smmu_domain,
+					struct scatterlist *sgl, int nents,
+					struct list_head *pool)
+{
+	u32 nr = 0;
+	int i;
+	size_t size = 0;
+	struct scatterlist *sg;
+	struct page *page;
+
+	if ((smmu_domain->attributes & (1 << DOMAIN_ATTR_ATOMIC)) ||
+			arm_smmu_has_secure_vmid(smmu_domain))
+		return;
+
+	for_each_sg(sgl, sg, nents, i)
+		size += sg->length;
+
+	/* number of 2nd level pagetable entries */
+	nr += round_up(size, SZ_1G) >> 30;
+	/* number of 3rd level pagetabel entries */
+	nr += round_up(size, SZ_2M) >> 21;
+
+	/* Retry later with atomic allocation on error */
+	for (i = 0; i < nr; i++) {
+		page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0);
+		if (!page)
+			break;
+		list_add(&page->lru, pool);
+	}
+}
+
+static void arm_smmu_release_prealloc_memory(
+		struct arm_smmu_domain *smmu_domain, struct list_head *list)
+{
+	struct page *page, *tmp;
+	u32 remaining = 0;
+
+	list_for_each_entry_safe(page, tmp, list, lru) {
+		list_del(&page->lru);
+		__free_pages(page, 0);
+		remaining++;
+	}
+}
+
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
 	int ret;
@@ -2589,10 +2647,12 @@
 	unsigned int idx_start, idx_end;
 	struct scatterlist *sg_start, *sg_end;
 	unsigned long __saved_iova_start;
+	LIST_HEAD(nonsecure_pool);
 
 	if (!ops)
 		return -ENODEV;
 
+	arm_smmu_prealloc_memory(smmu_domain, sg, nents, &nonsecure_pool);
 	arm_smmu_secure_domain_lock(smmu_domain);
 
 	__saved_iova_start = iova;
@@ -2611,8 +2671,10 @@
 		}
 
 		spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
+		list_splice_init(&nonsecure_pool, &smmu_domain->nonsecure_pool);
 		ret = ops->map_sg(ops, iova, sg_start, idx_end - idx_start,
 				  prot, &size);
+		list_splice_init(&smmu_domain->nonsecure_pool, &nonsecure_pool);
 		spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
 		/* Returns 0 on error */
 		if (!ret) {
@@ -2633,6 +2695,7 @@
 		iova = __saved_iova_start;
 	}
 	arm_smmu_secure_domain_unlock(smmu_domain);
+	arm_smmu_release_prealloc_memory(smmu_domain, &nonsecure_pool);
 	return iova - __saved_iova_start;
 }