[PATCH] AGP performance fixes

AGP allocation/deallocation is suffering major performance issues due to
the nature of global_flush_tlb() being called on every change_page_attr()
call.

For small allocations this isn't really seen, but when you start allocating
50000 pages of AGP space, for say, texture memory, then things can take
seconds to complete.

In some cases the situation is doubled or even quadrupled in the time due
to SMP, or a deallocation, then a new reallocation.  I've had a case of
upto 20 seconds wait time to deallocate and reallocate AGP space.

This patch fixes the problem by making it the caller's responsibility to
call global_flush_tlb(), and so removes it from every instance of mapping a
page into AGP space until the time that all change_page_attr() changes are
done.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 73f333f..4d5ed18 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -147,6 +147,7 @@
 			printk(KERN_ERR PFX "unable to get memory for scratch page.\n");
 			return -ENOMEM;
 		}
+		global_flush_tlb();
 
 		bridge->scratch_page_real = virt_to_gart(addr);
 		bridge->scratch_page =
@@ -187,9 +188,11 @@
 	return 0;
 
 err_out:
-	if (bridge->driver->needs_scratch_page)
+	if (bridge->driver->needs_scratch_page) {
 		bridge->driver->agp_destroy_page(
 				gart_to_virt(bridge->scratch_page_real));
+		global_flush_tlb();
+	}
 	if (got_gatt)
 		bridge->driver->free_gatt_table(bridge);
 	if (got_keylist) {
@@ -211,9 +214,11 @@
 	bridge->key_list = NULL;
 
 	if (bridge->driver->agp_destroy_page &&
-	    bridge->driver->needs_scratch_page)
+	    bridge->driver->needs_scratch_page) {
 		bridge->driver->agp_destroy_page(
 				gart_to_virt(bridge->scratch_page_real));
+		global_flush_tlb();
+	}
 }
 
 /* When we remove the global variable agp_bridge from all drivers
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index c4a3871..19f242b 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -57,7 +57,8 @@
 {
 	int i;
 	i = change_page_attr(page, 1, PAGE_KERNEL_NOCACHE);
-	global_flush_tlb();
+	/* Caller's responsibility to call global_flush_tlb() for
+	 * performance reasons */
 	return i;
 }
 EXPORT_SYMBOL_GPL(map_page_into_agp);
@@ -66,7 +67,8 @@
 {
 	int i;
 	i = change_page_attr(page, 1, PAGE_KERNEL);
-	global_flush_tlb();
+	/* Caller's responsibility to call global_flush_tlb() for
+	 * performance reasons */
 	return i;
 }
 EXPORT_SYMBOL_GPL(unmap_page_from_agp);
@@ -153,6 +155,7 @@
 		for (i = 0; i < curr->page_count; i++) {
 			curr->bridge->driver->agp_destroy_page(gart_to_virt(curr->memory[i]));
 		}
+		global_flush_tlb();
 	}
 	agp_free_key(curr->key);
 	vfree(curr->memory);
@@ -210,7 +213,9 @@
 		new->memory[i] = virt_to_gart(addr);
 		new->page_count++;
 	}
-       new->bridge = bridge;
+	global_flush_tlb();
+
+	new->bridge = bridge;
 
 	flush_agp_mappings();
 
diff --git a/drivers/char/agp/i460-agp.c b/drivers/char/agp/i460-agp.c
index 58944cd..be2e569 100644
--- a/drivers/char/agp/i460-agp.c
+++ b/drivers/char/agp/i460-agp.c
@@ -514,9 +514,10 @@
 {
 	void *page;
 
-	if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT)
+	if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) {
 		page = agp_generic_alloc_page(agp_bridge);
-	else
+		global_flush_tlb();
+	} else
 		/* Returning NULL would cause problems */
 		/* AK: really dubious code. */
 		page = (void *)~0UL;
@@ -525,8 +526,10 @@
 
 static void i460_destroy_page (void *page)
 {
-	if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT)
+	if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) {
 		agp_generic_destroy_page(page);
+		global_flush_tlb();
+	}
 }
 
 #endif /* I460_LARGE_IO_PAGES */
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index bf4cc9f..027161a 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -270,6 +270,7 @@
 
 	switch (pg_count) {
 	case 1: addr = agp_bridge->driver->agp_alloc_page(agp_bridge);
+		global_flush_tlb();
 		break;
 	case 4:
 		/* kludge to get 4 physical pages for ARGB cursor */
@@ -330,9 +331,11 @@
 	if(curr->type == AGP_PHYS_MEMORY) {
 		if (curr->page_count == 4)
 			i8xx_destroy_pages(gart_to_virt(curr->memory[0]));
-		else
+		else {
 			agp_bridge->driver->agp_destroy_page(
 				 gart_to_virt(curr->memory[0]));
+			global_flush_tlb();
+		}
 		vfree(curr->memory);
 	}
 	kfree(curr);