x86: cleanup early per cpu variables/accesses v4

  * Introduce a new PER_CPU macro called "EARLY_PER_CPU".  This is
    used by some per_cpu variables that are initialized and accessed
    before there are per_cpu areas allocated.

    ["Early" in respect to per_cpu variables is "earlier than the per_cpu
    areas have been setup".]

    This patchset adds these new macros:

	DEFINE_EARLY_PER_CPU(_type, _name, _initvalue)
	EXPORT_EARLY_PER_CPU_SYMBOL(_name)
	DECLARE_EARLY_PER_CPU(_type, _name)

	early_per_cpu_ptr(_name)
	early_per_cpu_map(_name, _idx)
	early_per_cpu(_name, _cpu)

    The DEFINE macro defines the per_cpu variable as well as the early
    map and pointer.  It also initializes the per_cpu variable and map
    elements to "_initvalue".  The early_* macros provide access to
    the initial map (usually setup during system init) and the early
    pointer.  This pointer is initialized to point to the early map
    but is then NULL'ed when the actual per_cpu areas are setup.  After
    that the per_cpu variable is the correct access to the variable.

    The early_per_cpu() macro is not very efficient but does show how to
    access the variable if you have a function that can be called both
    "early" and "late".  It tests the early ptr to be NULL, and if not
    then it's still valid.  Otherwise, the per_cpu variable is used
    instead:

	#define early_per_cpu(_name, _cpu) 			\
		(early_per_cpu_ptr(_name) ?			\
			early_per_cpu_ptr(_name)[_cpu] :	\
			per_cpu(_name, _cpu))

    A better method is to actually check the pointer manually.  In the
    case below, numa_set_node can be called both "early" and "late":

	void __cpuinit numa_set_node(int cpu, int node)
	{
	    int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);

	    if (cpu_to_node_map)
		    cpu_to_node_map[cpu] = node;
	    else
		    per_cpu(x86_cpu_to_node_map, cpu) = node;
	}

  * Add a flag "arch_provides_topology_pointers" that indicates pointers
    to topology cpumask_t maps are available.  Otherwise, use the function
    returning the cpumask_t value.  This is useful if cpumask_t set size
    is very large to avoid copying data on to/off of the stack.

  * The coverage of CONFIG_DEBUG_PER_CPU_MAPS has been increased while
    the non-debug case has been optimized a bit.

  * Remove an unreferenced compiler warning in drivers/base/topology.c

  * Clean up #ifdef in setup.c

For inclusion into sched-devel/latest tree.

Based on:
	git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
    +   sched-devel/latest  .../mingo/linux-2.6-sched-devel.git

Signed-off-by: Mike Travis <travis@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index c5066d5..970f867 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,16 +31,6 @@
 
 struct memnode memnode;
 
-#ifdef CONFIG_SMP
-int x86_cpu_to_node_map_init[NR_CPUS] = {
-	[0 ... NR_CPUS-1] = NUMA_NO_NODE
-};
-void *x86_cpu_to_node_map_early_ptr;
-EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
-#endif
-DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-
 s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
@@ -577,24 +567,6 @@
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
 }
 
-__cpuinit void numa_add_cpu(int cpu)
-{
-	set_bit(cpu,
-		(unsigned long *)&node_to_cpumask_map[early_cpu_to_node(cpu)]);
-}
-
-void __cpuinit numa_set_node(int cpu, int node)
-{
-	int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
-
-	if(cpu_to_node_map)
-		cpu_to_node_map[cpu] = node;
-	else if(per_cpu_offset(cpu))
-		per_cpu(x86_cpu_to_node_map, cpu) = node;
-	else
-		Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu);
-}
-
 unsigned long __init numa_free_all_bootmem(void)
 {
 	unsigned long pages = 0;
@@ -641,6 +613,7 @@
 }
 early_param("numa", numa_setup);
 
+#ifdef CONFIG_NUMA
 /*
  * Setup early cpu_to_node.
  *
@@ -652,14 +625,19 @@
  * is already initialized in a round robin manner at numa_init_array,
  * prior to this call, and this initialization is good enough
  * for the fake NUMA cases.
+ *
+ * Called before the per_cpu areas are setup.
  */
 void __init init_cpu_to_node(void)
 {
-	int i;
+	int cpu;
+	u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
 
-	for (i = 0; i < NR_CPUS; i++) {
+	BUG_ON(cpu_to_apicid == NULL);
+
+	for_each_possible_cpu(cpu) {
 		int node;
-		u16 apicid = x86_cpu_to_apicid_init[i];
+		u16 apicid = cpu_to_apicid[cpu];
 
 		if (apicid == BAD_APICID)
 			continue;
@@ -668,8 +646,9 @@
 			continue;
 		if (!node_online(node))
 			continue;
-		numa_set_node(i, node);
+		numa_set_node(cpu, node);
 	}
 }
+#endif