[PATCH] Zone reclaim: proc override

proc support for zone reclaim

This patch creates a proc entry /proc/sys/vm/zone_reclaim_mode that may be
used to override the automatic determination of the zone reclaim made on
bootup.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 6910c01..391dd64 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -27,6 +27,7 @@
 - laptop_mode
 - block_dump
 - drop-caches
+- zone_reclaim_mode
 
 ==============================================================
 
@@ -120,3 +121,20 @@
 
 The initial value is zero.  Kernel does not use this value at boot time to set
 the high water marks for each per cpu page list.
+
+===============================================================
+
+zone_reclaim_mode:
+
+This is set during bootup to 1 if it is determined that pages from
+remote zones will cause a significant performance reduction. The
+page allocator will then reclaim easily reusable pages (those page
+cache pages that are currently not used) before going off node.
+
+The user can override this setting. It may be beneficial to switch
+off zone reclaim if the system is used for a file server and all
+of memory should be used for caching files from disk.
+
+It may be beneficial to switch this on if one wants to do zone
+reclaim regardless of the numa distances in the system.
+
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 7f47212..8352a7c 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -182,6 +182,7 @@
 	VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
 	VM_DROP_PAGECACHE=29,	/* int: nuke lots of pagecache */
 	VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
+	VM_ZONE_RECLAIM_MODE=31,/* reclaim local zone memory before going off node */
 };
 
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f5d69b6..cb99a42 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -870,6 +870,17 @@
 		.strategy	= &sysctl_jiffies,
 	},
 #endif
+#ifdef CONFIG_NUMA
+	{
+		.ctl_name	= VM_ZONE_RECLAIM_MODE,
+		.procname	= "zone_reclaim_mode",
+		.data		= &zone_reclaim_mode,
+		.maxlen		= sizeof(zone_reclaim_mode),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &zero,
+	},
+#endif
 	{ .ctl_name = 0 }
 };