AMD IOMMU: implement lazy IO/TLB flushing

The IO/TLB flushing on every unmaping operation is the most expensive
part in AMD IOMMU code and not strictly necessary. It is sufficient to
do the flush before any entries are reused. This is patch implements
lazy IO/TLB flushing which does exactly this.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 691e023..679f2a8 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -203,6 +203,14 @@
 	return 0;
 }
 
+/* Flush the whole IO/TLB for a given protection domain */
+static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
+{
+	u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+
+	iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
+}
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
@@ -386,14 +394,18 @@
 			PAGE_SIZE) >> PAGE_SHIFT;
 	limit = limit < size ? limit : size;
 
-	if (dom->next_bit >= limit)
+	if (dom->next_bit >= limit) {
 		dom->next_bit = 0;
+		dom->need_flush = true;
+	}
 
 	address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
 			0 , boundary_size, 0);
-	if (address == -1)
+	if (address == -1) {
 		address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
 				0, boundary_size, 0);
+		dom->need_flush = true;
+	}
 
 	if (likely(address != -1)) {
 		dom->next_bit = address + pages;
@@ -553,6 +565,8 @@
 	dma_dom->bitmap[0] = 1;
 	dma_dom->next_bit = 0;
 
+	dma_dom->need_flush = false;
+
 	/* Intialize the exclusion range if necessary */
 	if (iommu->exclusion_start &&
 	    iommu->exclusion_start < dma_dom->aperture_size) {
@@ -795,7 +809,10 @@
 	}
 	address += offset;
 
-	if (unlikely(iommu_has_npcache(iommu)))
+	if (unlikely(dma_dom->need_flush && !iommu_fullflush)) {
+		iommu_flush_tlb(iommu, dma_dom->domain.id);
+		dma_dom->need_flush = false;
+	} else if (unlikely(iommu_has_npcache(iommu)))
 		iommu_flush_pages(iommu, dma_dom->domain.id, address, size);
 
 out:
@@ -829,7 +846,8 @@
 
 	dma_ops_free_addresses(dma_dom, dma_addr, pages);
 
-	iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
+	if (iommu_fullflush)
+		iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size);
 }
 
 /*
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index a69cc0f..f2fa8dc 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -995,6 +995,11 @@
 	else
 		printk("disabled\n");
 
+	if (iommu_fullflush)
+		printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n");
+	else
+		printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n");
+
 out:
 	return ret;
 
@@ -1057,7 +1062,7 @@
 static int __init parse_amd_iommu_options(char *str)
 {
 	for (; *str; ++str) {
-		if (strcmp(str, "isolate") == 0)
+		if (strncmp(str, "isolate", 7) == 0)
 			amd_iommu_isolate = 1;
 	}
 
diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h
index dcc8120..dcc4724 100644
--- a/include/asm-x86/amd_iommu_types.h
+++ b/include/asm-x86/amd_iommu_types.h
@@ -196,6 +196,9 @@
 	 * just calculate its address in constant time.
 	 */
 	u64 **pte_pages;
+
+	/* This will be set to true when TLB needs to be flushed */
+	bool need_flush;
 };
 
 /*