sh: bootmem refactoring.

This reworks much of the bootmem setup and initialization code allowing
us to get rid of duplicate work between the NUMA and non-NUMA cases. The
end result is that we end up with a much more flexible interface for
supporting more complex topologies (fake NUMA, highmem, etc, etc.) which
is entirely LMB backed. This is an incremental step for more NUMA work as
well as gradually enabling migration off of bootmem entirely.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/include/asm/mmzone.h b/arch/sh/include/asm/mmzone.h
index 94f04b2..8887baf 100644
--- a/arch/sh/include/asm/mmzone.h
+++ b/arch/sh/include/asm/mmzone.h
@@ -44,6 +44,8 @@
 /* arch/sh/kernel/setup.c */
 void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
 			       unsigned long end_pfn);
+/* arch/sh/mm/init.c */
+void __init allocate_pgdat(unsigned int nid);
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_MMZONE_H */
diff --git a/arch/sh/include/asm/setup.h b/arch/sh/include/asm/setup.h
index 4758325..01fa17a 100644
--- a/arch/sh/include/asm/setup.h
+++ b/arch/sh/include/asm/setup.h
@@ -19,6 +19,7 @@
 #define COMMAND_LINE ((char *) (PARAM+0x100))
 
 void sh_mv_setup(void);
+void check_for_initrd(void);
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index f6a2db1..61404ed 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -114,31 +114,7 @@
 }
 early_param("mem", early_parse_mem);
 
-/*
- * Register fully available low RAM pages with the bootmem allocator.
- */
-static void __init register_bootmem_low_pages(void)
-{
-	unsigned long curr_pfn, last_pfn, pages;
-
-	/*
-	 * We are rounding up the start address of usable memory:
-	 */
-	curr_pfn = PFN_UP(__MEMORY_START);
-
-	/*
-	 * ... and at the end of the usable range downwards:
-	 */
-	last_pfn = PFN_DOWN(__pa(memory_end));
-
-	if (last_pfn > max_low_pfn)
-		last_pfn = max_low_pfn;
-
-	pages = last_pfn - curr_pfn;
-	free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(pages));
-}
-
-static void __init check_for_initrd(void)
+void __init check_for_initrd(void)
 {
 #ifdef CONFIG_BLK_DEV_INITRD
 	unsigned long start, end;
@@ -240,85 +216,6 @@
 	add_active_range(nid, start_pfn, end_pfn);
 }
 
-void __init do_init_bootmem(void)
-{
-	unsigned long bootmap_size;
-	unsigned long bootmap_pages, bootmem_paddr;
-	u64 total_pages = lmb_phys_mem_size() >> PAGE_SHIFT;
-	int i;
-
-	bootmap_pages = bootmem_bootmap_pages(total_pages);
-
-	bootmem_paddr = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
-
-	/*
-	 * Find a proper area for the bootmem bitmap. After this
-	 * bootstrap step all allocations (until the page allocator
-	 * is intact) must be done via bootmem_alloc().
-	 */
-	bootmap_size = init_bootmem_node(NODE_DATA(0),
-					 bootmem_paddr >> PAGE_SHIFT,
-					 min_low_pfn, max_low_pfn);
-
-	/* Add active regions with valid PFNs. */
-	for (i = 0; i < lmb.memory.cnt; i++) {
-		unsigned long start_pfn, end_pfn;
-		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
-		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
-		__add_active_range(0, start_pfn, end_pfn);
-	}
-
-	/*
-	 * Add all physical memory to the bootmem map and mark each
-	 * area as present.
-	 */
-	register_bootmem_low_pages();
-
-	/* Reserve the sections we're already using. */
-	for (i = 0; i < lmb.reserved.cnt; i++)
-		reserve_bootmem(lmb.reserved.region[i].base,
-				lmb_size_bytes(&lmb.reserved, i),
-				BOOTMEM_DEFAULT);
-
-	node_set_online(0);
-
-	sparse_memory_present_with_active_regions(0);
-}
-
-static void __init early_reserve_mem(void)
-{
-	unsigned long start_pfn;
-
-	/*
-	 * Partially used pages are not usable - thus
-	 * we are rounding upwards:
-	 */
-	start_pfn = PFN_UP(__pa(_end));
-
-	/*
-	 * Reserve the kernel text and
-	 * Reserve the bootmem bitmap. We do this in two steps (first step
-	 * was init_bootmem()), because this catches the (definitely buggy)
-	 * case of us accidentally initializing the bootmem allocator with
-	 * an invalid RAM area.
-	 */
-	lmb_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET,
-		    (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) -
-		    (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET));
-
-	/*
-	 * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET.
-	 */
-	if (CONFIG_ZERO_PAGE_OFFSET != 0)
-		lmb_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET);
-
-	/*
-	 * Handle additional early reservations
-	 */
-	check_for_initrd();
-	reserve_crashkernel();
-}
-
 /*
  * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
  * is_kdump_kernel() to determine if we are booting after a panic. Hence
@@ -342,10 +239,6 @@
 {
 }
 
-void __init __weak plat_mem_setup(void)
-{
-}
-
 void __init setup_arch(char **cmdline_p)
 {
 	enable_mmu();
@@ -401,44 +294,16 @@
 
 	plat_early_device_setup();
 
+	sh_mv_setup();
+
 	/* Let earlyprintk output early console messages */
 	early_platform_driver_probe("earlyprintk", 1, 1);
 
-	lmb_init();
-
-	sh_mv_setup();
-	sh_mv.mv_mem_init();
-
-	early_reserve_mem();
-
-	lmb_enforce_memory_limit(memory_limit);
-	lmb_analyze();
-
-	lmb_dump_all();
-
-	/*
-	 * Determine low and high memory ranges:
-	 */
-	max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
-	min_low_pfn = __MEMORY_START >> PAGE_SHIFT;
-
-	nodes_clear(node_online_map);
-
-	memory_start = (unsigned long)__va(__MEMORY_START);
-	memory_end = memory_start + (memory_limit ?: lmb_phys_mem_size());
-
-	uncached_init();
-	pmb_init();
-	do_init_bootmem();
-	plat_mem_setup();
-	sparse_init();
+	paging_init();
 
 #ifdef CONFIG_DUMMY_CONSOLE
 	conswitchp = &dummy_con;
 #endif
-	paging_init();
-
-	ioremap_fixed_init();
 
 	/* Perform the machine specific initialisation */
 	if (likely(sh_mv.mv_setup))
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 9c5400b..7f3cb52 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -17,11 +17,14 @@
 #include <linux/percpu.h>
 #include <linux/io.h>
 #include <linux/lmb.h>
+#include <linux/kexec.h>
 #include <linux/dma-mapping.h>
 #include <asm/mmu_context.h>
+#include <asm/mmzone.h>
 #include <asm/tlb.h>
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/cache.h>
 #include <asm/sizes.h>
 
@@ -33,6 +36,11 @@
 	lmb_add(__MEMORY_START, __MEMORY_SIZE);
 }
 
+void __init __weak plat_mem_setup(void)
+{
+	/* Nothing to see here, move along. */
+}
+
 #ifdef CONFIG_MMU
 static pte_t *__get_pte_phys(unsigned long addr)
 {
@@ -158,15 +166,166 @@
 }
 #endif	/* CONFIG_MMU */
 
-/*
- * paging_init() sets up the page tables
- */
+void __init allocate_pgdat(unsigned int nid)
+{
+	unsigned long start_pfn, end_pfn;
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	unsigned long phys;
+#endif
+
+	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	phys = __lmb_alloc_base(sizeof(struct pglist_data),
+				SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT);
+	/* Retry with all of system memory */
+	if (!phys)
+		phys = __lmb_alloc_base(sizeof(struct pglist_data),
+					SMP_CACHE_BYTES, lmb_end_of_DRAM());
+	if (!phys)
+		panic("Can't allocate pgdat for node %d\n", nid);
+
+	NODE_DATA(nid) = __va(phys);
+	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+	NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+#endif
+
+	NODE_DATA(nid)->node_start_pfn = start_pfn;
+	NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+}
+
+static void __init bootmem_init_one_node(unsigned int nid)
+{
+	unsigned long total_pages, paddr;
+	unsigned long end_pfn;
+	struct pglist_data *p;
+	int i;
+
+	p = NODE_DATA(nid);
+
+	/* Nothing to do.. */
+	if (!p->node_spanned_pages)
+		return;
+
+	end_pfn = p->node_start_pfn + p->node_spanned_pages;
+
+	total_pages = bootmem_bootmap_pages(p->node_spanned_pages);
+
+	paddr = lmb_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE);
+	if (!paddr)
+		panic("Can't allocate bootmap for nid[%d]\n", nid);
+
+	init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn);
+
+	free_bootmem_with_active_regions(nid, end_pfn);
+
+	/*
+	 * XXX Handle initial reservations for the system memory node
+	 * only for the moment, we'll refactor this later for handling
+	 * reservations in other nodes.
+	 */
+	if (nid == 0) {
+		/* Reserve the sections we're already using. */
+		for (i = 0; i < lmb.reserved.cnt; i++)
+			reserve_bootmem(lmb.reserved.region[i].base,
+					lmb_size_bytes(&lmb.reserved, i),
+					BOOTMEM_DEFAULT);
+	}
+
+	sparse_memory_present_with_active_regions(nid);
+}
+
+static void __init do_init_bootmem(void)
+{
+	int i;
+
+	/* Add active regions with valid PFNs. */
+	for (i = 0; i < lmb.memory.cnt; i++) {
+		unsigned long start_pfn, end_pfn;
+		start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
+		end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+		__add_active_range(0, start_pfn, end_pfn);
+	}
+
+	/* All of system RAM sits in node 0 for the non-NUMA case */
+	allocate_pgdat(0);
+	node_set_online(0);
+
+	plat_mem_setup();
+
+	for_each_online_node(i)
+		bootmem_init_one_node(i);
+
+	sparse_init();
+}
+
+static void __init early_reserve_mem(void)
+{
+	unsigned long start_pfn;
+
+	/*
+	 * Partially used pages are not usable - thus
+	 * we are rounding upwards:
+	 */
+	start_pfn = PFN_UP(__pa(_end));
+
+	/*
+	 * Reserve the kernel text and Reserve the bootmem bitmap. We do
+	 * this in two steps (first step was init_bootmem()), because
+	 * this catches the (definitely buggy) case of us accidentally
+	 * initializing the bootmem allocator with an invalid RAM area.
+	 */
+	lmb_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET,
+		    (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) -
+		    (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET));
+
+	/*
+	 * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET.
+	 */
+	if (CONFIG_ZERO_PAGE_OFFSET != 0)
+		lmb_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET);
+
+	/*
+	 * Handle additional early reservations
+	 */
+	check_for_initrd();
+	reserve_crashkernel();
+}
+
 void __init paging_init(void)
 {
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 	unsigned long vaddr, end;
 	int nid;
 
+	lmb_init();
+
+	sh_mv.mv_mem_init();
+
+	early_reserve_mem();
+
+	lmb_enforce_memory_limit(memory_limit);
+	lmb_analyze();
+
+	lmb_dump_all();
+
+	/*
+	 * Determine low and high memory ranges:
+	 */
+	max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	min_low_pfn = __MEMORY_START >> PAGE_SHIFT;
+
+	nodes_clear(node_online_map);
+
+	memory_start = (unsigned long)__va(__MEMORY_START);
+	memory_end = memory_start + (memory_limit ?: lmb_phys_mem_size());
+
+	uncached_init();
+	pmb_init();
+	do_init_bootmem();
+	ioremap_fixed_init();
+
 	/* We don't need to map the kernel through the TLB, as
 	 * it is permanatly mapped using P1. So clear the
 	 * entire pgd. */