iommu: io-pgtable-arm: Flush all tlbs at end of unmap

Rather than calling the tlb maintenance routines throughout the course
of the unmap operation, just flush the entire tlb for the context in
question all at once, at the very end of the unmap.  This greatly
improves performance for large page tables (which is common for large
buffers in a heavily fragmented system).

In my testing, this optimization gave a ~10% speedup when unmapping 64K.

Change-Id: Iaa2b211e730dad6bd9235ef98dd2a89cf541e663
Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
Signed-off-by: Patrick Daly <pdaly@codeaurora.org>
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 5bb3408..4fcef13 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -605,8 +605,6 @@
 	}
 
 	__arm_lpae_set_pte(ptep, table, &data->iop.cfg);
-	iova &= ~(blk_size - 1);
-	io_pgtable_tlb_add_flush(&data->iop, iova, blk_size, blk_size, true);
 	return size;
 }
 
@@ -633,13 +631,8 @@
 
 		if (!iopte_leaf(pte, lvl)) {
 			/* Also flush any partial walks */
-			io_pgtable_tlb_add_flush(iop, iova, size,
-						ARM_LPAE_GRANULE(data), false);
-			io_pgtable_tlb_sync(iop);
 			ptep = iopte_deref(pte, data);
 			__arm_lpae_free_pgtable(data, lvl + 1, ptep);
-		} else {
-			io_pgtable_tlb_add_flush(iop, iova, size, size, true);
 		}
 
 		return size;
@@ -726,7 +719,7 @@
 		iova += ret;
 	}
 	if (unmapped)
-		io_pgtable_tlb_sync(&data->iop);
+		io_pgtable_tlb_flush_all(&data->iop);
 
 	return unmapped;
 }