mm/nobootmem.c: have __free_pages_memory() free in larger chunks.

On large memory machines it can take a few minutes to get through
free_all_bootmem().

Currently, when free_all_bootmem() calls __free_pages_memory(), the number
of contiguous pages that __free_pages_memory() passes to the buddy
allocator is limited to BITS_PER_LONG.  BITS_PER_LONG was originally
chosen to keep things similar to mm/nobootmem.c.  But it is more efficient
to limit it to MAX_ORDER.

       base   new  change
8TB    202s  172s   30s
16TB   401s  351s   50s

That is around 1%-3% improvement on total boot time.

This patch was spun off from the boot time rfc Robin and I had been
working on.

Signed-off-by: Robin Holt <robin.m.holt@gmail.com>
Signed-off-by: Nathan Zimmer <nzimmer@sgi.com>
Cc: Robin Holt <robinmholt@linux.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mike Travis <travis@sgi.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 61107cf..2c254d3 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -82,27 +82,18 @@
 
 static void __init __free_pages_memory(unsigned long start, unsigned long end)
 {
-	unsigned long i, start_aligned, end_aligned;
-	int order = ilog2(BITS_PER_LONG);
+	int order;
 
-	start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
-	end_aligned = end & ~(BITS_PER_LONG - 1);
+	while (start < end) {
+		order = min(MAX_ORDER - 1UL, __ffs(start));
 
-	if (end_aligned <= start_aligned) {
-		for (i = start; i < end; i++)
-			__free_pages_bootmem(pfn_to_page(i), 0);
+		while (start + (1UL << order) > end)
+			order--;
 
-		return;
+		__free_pages_bootmem(pfn_to_page(start), order);
+
+		start += (1UL << order);
 	}
-
-	for (i = start; i < start_aligned; i++)
-		__free_pages_bootmem(pfn_to_page(i), 0);
-
-	for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
-		__free_pages_bootmem(pfn_to_page(i), order);
-
-	for (i = end_aligned; i < end; i++)
-		__free_pages_bootmem(pfn_to_page(i), 0);
 }
 
 static unsigned long __init __free_memory_core(phys_addr_t start,