[PATCH] Add NUMA policy support for huge pages.

The huge_zonelist() function in the memory policy layer provides an list of
zones ordered by NUMA distance.  The hugetlb layer will walk that list looking
for a zone that has available huge pages but is also in the nodeset of the
current cpuset.

This patch does not contain the folding of find_or_alloc_huge_page() that was
controversial in the earlier discussion.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@muc.de>
Acked-by: William Lee Irwin III <wli@holomorphy.com>
Cc: Adam Litke <agl@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e93bd63..eb40556 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -11,6 +11,8 @@
 #include <linux/highmem.h>
 #include <linux/nodemask.h>
 #include <linux/pagemap.h>
+#include <linux/mempolicy.h>
+
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
@@ -36,11 +38,12 @@
 	free_huge_pages_node[nid]++;
 }
 
-static struct page *dequeue_huge_page(void)
+static struct page *dequeue_huge_page(struct vm_area_struct *vma,
+				unsigned long address)
 {
 	int nid = numa_node_id();
 	struct page *page = NULL;
-	struct zonelist *zonelist = NODE_DATA(nid)->node_zonelists;
+	struct zonelist *zonelist = huge_zonelist(vma, address);
 	struct zone **z;
 
 	for (z = zonelist->zones; *z; z++) {
@@ -87,13 +90,13 @@
 	spin_unlock(&hugetlb_lock);
 }
 
-struct page *alloc_huge_page(void)
+struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr)
 {
 	struct page *page;
 	int i;
 
 	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page();
+	page = dequeue_huge_page(vma, addr);
 	if (!page) {
 		spin_unlock(&hugetlb_lock);
 		return NULL;
@@ -196,7 +199,7 @@
 	spin_lock(&hugetlb_lock);
 	try_to_free_low(count);
 	while (count < nr_huge_pages) {
-		struct page *page = dequeue_huge_page();
+		struct page *page = dequeue_huge_page(NULL, 0);
 		if (!page)
 			break;
 		update_and_free_page(page);
@@ -365,8 +368,9 @@
 	flush_tlb_range(vma, start, end);
 }
 
-static struct page *find_or_alloc_huge_page(struct address_space *mapping,
-				unsigned long idx, int shared)
+static struct page *find_or_alloc_huge_page(struct vm_area_struct *vma,
+			unsigned long addr, struct address_space *mapping,
+			unsigned long idx, int shared)
 {
 	struct page *page;
 	int err;
@@ -378,7 +382,7 @@
 
 	if (hugetlb_get_quota(mapping))
 		goto out;
-	page = alloc_huge_page();
+	page = alloc_huge_page(vma, addr);
 	if (!page) {
 		hugetlb_put_quota(mapping);
 		goto out;
@@ -418,7 +422,7 @@
 	}
 
 	page_cache_get(old_page);
-	new_page = alloc_huge_page();
+	new_page = alloc_huge_page(vma, address);
 
 	if (!new_page) {
 		page_cache_release(old_page);
@@ -467,7 +471,7 @@
 	 * Use page lock to guard against racing truncation
 	 * before we get page_table_lock.
 	 */
-	page = find_or_alloc_huge_page(mapping, idx,
+	page = find_or_alloc_huge_page(vma, address, mapping, idx,
 			vma->vm_flags & VM_SHARED);
 	if (!page)
 		goto out;