[PATCH] zoned vm counters: conversion of nr_dirty to per zone counter

This makes nr_dirty a per zone counter.  Looping over all processors is
avoided during writeback state determination.

The counter aggregation for nr_dirty had to be undone in the NFS layer since
we summed up the page counts from multiple zones.  Someone more familiar with
NFS should probably review what I have done.

[akpm@osdl.org: bugfix]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 0bb1e5c..aa211dc 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -59,7 +59,7 @@
 	printk(KERN_INFO "%d pages swap cached\n", cached);
 
 	get_page_state(&ps);
-	printk(KERN_INFO "%lu pages dirty\n", ps.nr_dirty);
+	printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
 	printk(KERN_INFO "%lu pages writeback\n", ps.nr_writeback);
 	printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
 	printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
diff --git a/drivers/base/node.c b/drivers/base/node.c
index c22fb67..6fed520 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -50,8 +50,6 @@
 	__get_zone_counts(&active, &inactive, &free, NODE_DATA(nid));
 
 	/* Check for negative values in these approximate counters */
-	if ((long)ps.nr_dirty < 0)
-		ps.nr_dirty = 0;
 	if ((long)ps.nr_writeback < 0)
 		ps.nr_writeback = 0;
 
@@ -81,7 +79,7 @@
 		       nid, K(i.freehigh),
 		       nid, K(i.totalram - i.totalhigh),
 		       nid, K(i.freeram - i.freehigh),
-		       nid, K(ps.nr_dirty),
+		       nid, K(node_page_state(nid, NR_FILE_DIRTY)),
 		       nid, K(ps.nr_writeback),
 		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
 		       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
diff --git a/fs/buffer.c b/fs/buffer.c
index e999472..90e52e6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -852,7 +852,7 @@
 		write_lock_irq(&mapping->tree_lock);
 		if (page->mapping) {	/* Race with truncate? */
 			if (mapping_cap_account_dirty(mapping))
-				inc_page_state(nr_dirty);
+				__inc_zone_page_state(page, NR_FILE_DIRTY);
 			radix_tree_tag_set(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 031b27a..e5ad107 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -464,7 +464,7 @@
 		.range_start	= 0,
 		.range_end	= LLONG_MAX,
 	};
-	unsigned long nr_dirty = read_page_state(nr_dirty);
+	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
 	unsigned long nr_unstable = read_page_state(nr_unstable);
 
 	wbc.nr_to_write = nr_dirty + nr_unstable +
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d89f6fb..26b1fe9 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -315,6 +315,7 @@
 						req->wb_index, NFS_PAGE_TAG_DIRTY);
 				nfs_list_remove_request(req);
 				nfs_list_add_request(req, dst);
+				dec_zone_page_state(req->wb_page, NR_FILE_DIRTY);
 				res++;
 			}
 		}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8fccb9c..a6d1ca5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -497,7 +497,7 @@
 	nfs_list_add_request(req, &nfsi->dirty);
 	nfsi->ndirty++;
 	spin_unlock(&nfsi->req_lock);
-	inc_page_state(nr_dirty);
+	inc_zone_page_state(req->wb_page, NR_FILE_DIRTY);
 	mark_inode_dirty(inode);
 }
 
@@ -609,7 +609,6 @@
 	if (nfsi->ndirty != 0) {
 		res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages);
 		nfsi->ndirty -= res;
-		sub_page_state(nr_dirty,res);
 		if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
 			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
 	}
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 0eae68f..e23717d 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -190,7 +190,7 @@
 		K(i.freeram-i.freehigh),
 		K(i.totalswap),
 		K(i.freeswap),
-		K(ps.nr_dirty),
+		K(global_page_state(NR_FILE_DIRTY)),
 		K(ps.nr_writeback),
 		K(global_page_state(NR_ANON_PAGES)),
 		K(global_page_state(NR_FILE_MAPPED)),
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 15adb43..1cc8412 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -53,6 +53,7 @@
 	NR_FILE_PAGES,
 	NR_SLAB,	/* Pages used by slab allocator */
 	NR_PAGETABLE,	/* used for pagetables */
+	NR_FILE_DIRTY,
 	NR_VM_ZONE_STAT_ITEMS };
 
 struct per_cpu_pages {
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 5622044..b323ea2 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -22,7 +22,6 @@
  * commented here.
  */
 struct page_state {
-	unsigned long nr_dirty;		/* Dirty writeable pages */
 	unsigned long nr_writeback;	/* Pages under writeback */
 	unsigned long nr_unstable;	/* NFS unstable pages */
 #define GET_PAGE_STATE_LAST nr_unstable
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0faacfe..da85478 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -109,7 +109,7 @@
 
 static void get_writeback_state(struct writeback_state *wbs)
 {
-	wbs->nr_dirty = read_page_state(nr_dirty);
+	wbs->nr_dirty = global_page_state(NR_FILE_DIRTY);
 	wbs->nr_unstable = read_page_state(nr_unstable);
 	wbs->nr_mapped = global_page_state(NR_FILE_MAPPED) +
 				global_page_state(NR_ANON_PAGES);
@@ -641,7 +641,8 @@
 			if (mapping2) { /* Race with truncate? */
 				BUG_ON(mapping2 != mapping);
 				if (mapping_cap_account_dirty(mapping))
-					inc_page_state(nr_dirty);
+					__inc_zone_page_state(page,
+								NR_FILE_DIRTY);
 				radix_tree_tag_set(&mapping->page_tree,
 					page_index(page), PAGECACHE_TAG_DIRTY);
 			}
@@ -728,9 +729,9 @@
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
-			write_unlock_irqrestore(&mapping->tree_lock, flags);
 			if (mapping_cap_account_dirty(mapping))
-				dec_page_state(nr_dirty);
+				__dec_zone_page_state(page, NR_FILE_DIRTY);
+			write_unlock_irqrestore(&mapping->tree_lock, flags);
 			return 1;
 		}
 		write_unlock_irqrestore(&mapping->tree_lock, flags);
@@ -761,7 +762,7 @@
 	if (mapping) {
 		if (TestClearPageDirty(page)) {
 			if (mapping_cap_account_dirty(mapping))
-				dec_page_state(nr_dirty);
+				dec_zone_page_state(page, NR_FILE_DIRTY);
 			return 1;
 		}
 		return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ed3f2a7..c2b9aa4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1314,7 +1314,7 @@
 		"unstable:%lu free:%u slab:%lu mapped:%lu pagetables:%lu\n",
 		active,
 		inactive,
-		ps.nr_dirty,
+		global_page_state(NR_FILE_DIRTY),
 		ps.nr_writeback,
 		ps.nr_unstable,
 		nr_free_pages(),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 292a35f..1982fb5 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -400,9 +400,9 @@
 	"nr_file_pages",
 	"nr_slab",
 	"nr_page_table_pages",
+	"nr_dirty",
 
 	/* Page state */
-	"nr_dirty",
 	"nr_writeback",
 	"nr_unstable",