[PATCH] x86_64: Optimize NUMA node hash function

Compute the highest possible value for memnode_shift, in order to reduce
footprint of memnodemap[] to the minimum, thus making all users
(phys_to_nid(), kfree()), more cache friendly.

Before the patch :

 Node 0 MemBase 0000000000000000 Limit 00000001ffffffff
 Node 1 MemBase 0000000200000000 Limit 00000003ffffffff
 Using 23 for the hash shift. Max adder is 3ffffffff

After the patch :

 Node 0 MemBase 0000000000000000 Limit 00000001ffffffff
 Node 1 MemBase 0000000200000000 Limit 00000003ffffffff
 Using 33 for the hash shift.

In this case, only 2 bytes of memnodemap[] are used, instead of 2048

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 4bf6458..edd5559 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -38,38 +38,57 @@
 
 int numa_off __initdata;
 
-int __init compute_hash_shift(struct node *nodes, int numnodes)
+
+/*
+ * Given a shift value, try to populate memnodemap[]
+ * Returns :
+ * 1 if OK
+ * 0 if memnodmap[] too small (of shift too small)
+ * -1 if node overlap or lost ram (shift too big)
+ */
+static int __init populate_memnodemap(
+	const struct node *nodes, int numnodes, int shift)
 {
 	int i; 
-	int shift = 20;
-	unsigned long addr,maxend=0;
-	
-	for (i = 0; i < numnodes; i++)
-		if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend))
-				maxend = nodes[i].end;
+	int res = -1;
+	unsigned long addr, end;
 
-	while ((1UL << shift) <  (maxend / NODEMAPSIZE))
+	memset(memnodemap, 0xff, sizeof(memnodemap));
+	for (i = 0; i < numnodes; i++) {
+		addr = nodes[i].start;
+		end = nodes[i].end;
+		if (addr >= end)
+			continue;
+		if ((end >> shift) >= NODEMAPSIZE)
+			return 0;
+		do {
+			if (memnodemap[addr >> shift] != 0xff)
+				return -1;
+			memnodemap[addr >> shift] = i;
+			addr += (1 << shift);
+		} while (addr < end);
+		res = 1;
+	} 
+	return res;
+}
+
+int __init compute_hash_shift(struct node *nodes, int numnodes)
+{
+	int shift = 20;
+
+	while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
 		shift++;
 
-	printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n",
-			shift,maxend);
-	memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
-	for (i = 0; i < numnodes; i++) {
-		if (nodes[i].start == nodes[i].end)
-			continue;
-		for (addr = nodes[i].start;
-		     addr < nodes[i].end;
-		     addr += (1UL << shift)) {
-			if (memnodemap[addr >> shift] != 0xff) {
-				printk(KERN_INFO
+	printk(KERN_DEBUG "Using %d for the hash shift.\n",
+		shift);
+
+	if (populate_memnodemap(nodes, numnodes, shift) != 1) {
+		printk(KERN_INFO
 	"Your memory is not aligned you need to rebuild your kernel "
-	"with a bigger NODEMAPSIZE shift=%d adder=%lu\n",
-					shift,addr);
-				return -1;
-			} 
-			memnodemap[addr >> shift] = i;
-		} 
-	} 
+	"with a bigger NODEMAPSIZE shift=%d\n",
+			shift);
+		return -1;
+	}
 	return shift;
 }