[PATCH] Use Zoned VM Counters for NUMA statistics

The numa statistics are really event counters.  But they are per node and
so we have had special treatment for these counters through additional
fields on the pcp structure.  We can now use the per zone nature of the
zoned VM counters to realize these.

This will shrink the size of the pcp structure on NUMA systems.  We will
have some room to add additional per zone counters that will all still fit
in the same cacheline.

 Bits	Prior pcp size	  	Size after patch	We can add
 ------------------------------------------------------------------
 64	128 bytes (16 words)	80 bytes (10 words)	48
 32	 76 bytes (19 words)	56 bytes (14 words)	8 (64 byte cacheline)
							72 (128 byte)

Remove the special statistics for numa and replace them with zoned vm
counters.  This has the side effect that global sums of these events now
show up in /proc/vmstat.

Also take the opportunity to move the zone_statistics() function from
page_alloc.c into vmstat.c.

Discussions:
V2 http://marc.theaimsgroup.com/?t=115048227000002&r=1&w=2

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 06a6d10..ee7f896 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -185,9 +185,8 @@
  * in between and therefore the atomicity vs. interrupt cannot be exploited
  * in a useful way here.
  */
-void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
+static void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
 {
-	struct zone *zone = page_zone(page);
 	s8 *p = diff_pointer(zone, item);
 
 	(*p)++;
@@ -197,6 +196,11 @@
 		*p = 0;
 	}
 }
+
+void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
+{
+	__inc_zone_state(page_zone(page), item);
+}
 EXPORT_SYMBOL(__inc_zone_page_state);
 
 void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
@@ -213,22 +217,23 @@
 }
 EXPORT_SYMBOL(__dec_zone_page_state);
 
+void inc_zone_state(struct zone *zone, enum zone_stat_item item)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__inc_zone_state(zone, item);
+	local_irq_restore(flags);
+}
+
 void inc_zone_page_state(struct page *page, enum zone_stat_item item)
 {
 	unsigned long flags;
 	struct zone *zone;
-	s8 *p;
 
 	zone = page_zone(page);
 	local_irq_save(flags);
-	p = diff_pointer(zone, item);
-
-	(*p)++;
-
-	if (unlikely(*p > STAT_THRESHOLD)) {
-		zone_page_state_add(*p, zone, item);
-		*p = 0;
-	}
+	__inc_zone_state(zone, item);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(inc_zone_page_state);
@@ -297,6 +302,28 @@
 
 #endif
 
+#ifdef CONFIG_NUMA
+/*
+ * zonelist = the list of zones passed to the allocator
+ * z 	    = the zone from which the allocation occurred.
+ *
+ * Must be called with interrupts disabled.
+ */
+void zone_statistics(struct zonelist *zonelist, struct zone *z)
+{
+	if (z->zone_pgdat == zonelist->zones[0]->zone_pgdat) {
+		__inc_zone_state(z, NUMA_HIT);
+	} else {
+		__inc_zone_state(z, NUMA_MISS);
+		__inc_zone_state(zonelist->zones[0], NUMA_FOREIGN);
+	}
+	if (z->zone_pgdat == NODE_DATA(numa_node_id()))
+		__inc_zone_state(z, NUMA_LOCAL);
+	else
+		__inc_zone_state(z, NUMA_OTHER);
+}
+#endif
+
 #ifdef CONFIG_PROC_FS
 
 #include <linux/seq_file.h>
@@ -369,6 +396,15 @@
 	"nr_unstable",
 	"nr_bounce",
 
+#ifdef CONFIG_NUMA
+	"numa_hit",
+	"numa_miss",
+	"numa_foreign",
+	"numa_interleave",
+	"numa_local",
+	"numa_other",
+#endif
+
 	/* Event counters */
 	"pgpgin",
 	"pgpgout",
@@ -490,21 +526,6 @@
 					   pageset->pcp[j].high,
 					   pageset->pcp[j].batch);
 			}
-#ifdef CONFIG_NUMA
-			seq_printf(m,
-				   "\n            numa_hit:       %lu"
-				   "\n            numa_miss:      %lu"
-				   "\n            numa_foreign:   %lu"
-				   "\n            interleave_hit: %lu"
-				   "\n            local_node:     %lu"
-				   "\n            other_node:     %lu",
-				   pageset->numa_hit,
-				   pageset->numa_miss,
-				   pageset->numa_foreign,
-				   pageset->interleave_hit,
-				   pageset->local_node,
-				   pageset->other_node);
-#endif
 		}
 		seq_printf(m,
 			   "\n  all_unreclaimable: %u"