iommu: Introduce Interface for IOMMU TLB Flushing

With the current IOMMU-API the hardware TLBs have to be
flushed in every iommu_ops->unmap() call-back.

For unmapping large amounts of address space, like it
happens when a KVM domain with assigned devices is
destroyed, this causes thousands of unnecessary TLB flushes
in the IOMMU hardware because the unmap call-back runs for
every unmapped physical page.

With the TLB Flush Interface and the new iommu_unmap_fast()
function introduced here the need to clean the hardware TLBs
is removed from the unmapping code-path. Users of
iommu_unmap_fast() have to explicitly call the TLB-Flush
functions to sync the page-table changes to the hardware.

Three functions for TLB-Flushes are introduced:

	* iommu_flush_tlb_all() - Flushes all TLB entries
	                          associated with that
				  domain. TLBs entries are
				  flushed when this function
				  returns.

	* iommu_tlb_range_add() - This will add a given
				  range to the flush queue
				  for this domain.

	* iommu_tlb_sync() - Flushes all queued ranges from
			     the hardware TLBs. Returns when
			     the flush is finished.

The semantic of this interface is intentionally similar to
the iommu_gather_ops from the io-pgtable code.

Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 5499a03..31c2b1d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -527,6 +527,8 @@
 
 	}
 
+	iommu_flush_tlb_all(domain);
+
 out:
 	iommu_put_resv_regions(dev, &mappings);
 
@@ -1547,13 +1549,16 @@
 }
 EXPORT_SYMBOL_GPL(iommu_map);
 
-size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
+static size_t __iommu_unmap(struct iommu_domain *domain,
+			    unsigned long iova, size_t size,
+			    bool sync)
 {
+	const struct iommu_ops *ops = domain->ops;
 	size_t unmapped_page, unmapped = 0;
-	unsigned int min_pagesz;
 	unsigned long orig_iova = iova;
+	unsigned int min_pagesz;
 
-	if (unlikely(domain->ops->unmap == NULL ||
+	if (unlikely(ops->unmap == NULL ||
 		     domain->pgsize_bitmap == 0UL))
 		return -ENODEV;
 
@@ -1583,10 +1588,13 @@
 	while (unmapped < size) {
 		size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
 
-		unmapped_page = domain->ops->unmap(domain, iova, pgsize);
+		unmapped_page = ops->unmap(domain, iova, pgsize);
 		if (!unmapped_page)
 			break;
 
+		if (sync && ops->iotlb_range_add)
+			ops->iotlb_range_add(domain, iova, pgsize);
+
 		pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
 			 iova, unmapped_page);
 
@@ -1594,11 +1602,27 @@
 		unmapped += unmapped_page;
 	}
 
+	if (sync && ops->iotlb_sync)
+		ops->iotlb_sync(domain);
+
 	trace_unmap(orig_iova, size, unmapped);
 	return unmapped;
 }
+
+size_t iommu_unmap(struct iommu_domain *domain,
+		   unsigned long iova, size_t size)
+{
+	return __iommu_unmap(domain, iova, size, true);
+}
 EXPORT_SYMBOL_GPL(iommu_unmap);
 
+size_t iommu_unmap_fast(struct iommu_domain *domain,
+			unsigned long iova, size_t size)
+{
+	return __iommu_unmap(domain, iova, size, false);
+}
+EXPORT_SYMBOL_GPL(iommu_unmap_fast);
+
 size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 			 struct scatterlist *sg, unsigned int nents, int prot)
 {