[IA64] Fix NUMA configuration issue
There is a NUMA memory configuration issue in 2.6.24:
A 2-node machine of ours has got the following memory layout:
Node 0: 0 - 2 Gbytes
Node 0: 4 - 8 Gbytes
Node 1: 8 - 16 Gbytes
Node 0: 16 - 18 Gbytes
"efi_memmap_init()" merges the three last ranges into one.
"register_active_ranges()" is called as follows:
efi_memmap_walk(register_active_ranges, NULL);
i.e. once for the 4 - 18 Gbytes range. It picks up the node
number from the start address, and registers all the memory for
the node #0.
"register_active_ranges()" should be called as follows to
make sure there is no merged address range at its entry:
efi_memmap_walk(filter_memory, register_active_ranges);
"filter_memory()" is similar to "filter_rsvd_memory()",
but the reserved memory ranges are not filtered out.
Signed-off-by: Zoltan Menyhart <Zoltan.Menyhart@bull.net>
Signed-off-by: Tony Luck <tony.luck@intel.com>
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4aa9eae..c85b7dd 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -176,6 +176,29 @@
return 0;
}
+/*
+ * Similar to "filter_rsvd_memory()", but the reserved memory ranges
+ * are not filtered out.
+ */
+int __init
+filter_memory(unsigned long start, unsigned long end, void *arg)
+{
+ void (*func)(unsigned long, unsigned long, int);
+
+#if IGNORE_PFN0
+ if (start == PAGE_OFFSET) {
+ printk(KERN_WARNING "warning: skipping physical page 0\n");
+ start += PAGE_SIZE;
+ if (start >= end)
+ return 0;
+ }
+#endif
+ func = arg;
+ if (start < end)
+ call_pernode_memory(__pa(start), end - start, func);
+ return 0;
+}
+
static void __init
sort_regions (struct rsvd_region *rsvd_region, int max)
{
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 0479661..798bf98 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -253,7 +253,7 @@
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_VIRTUAL_MEM_MAP
- efi_memmap_walk(register_active_ranges, NULL);
+ efi_memmap_walk(filter_memory, register_active_ranges);
efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
if (max_gap < LARGE_GAP) {
vmem_map = (struct page *) 0;
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index ffee1ea..96d5fbf 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -444,7 +444,7 @@
mem_data[node].min_pfn = ~0UL;
}
- efi_memmap_walk(register_active_ranges, NULL);
+ efi_memmap_walk(filter_memory, register_active_ranges);
/*
* Initialize the boot memory maps in reverse order since that's
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index da05893..5c1de53 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -547,12 +547,10 @@
#endif /* CONFIG_VIRTUAL_MEM_MAP */
int __init
-register_active_ranges(u64 start, u64 end, void *arg)
+register_active_ranges(u64 start, u64 len, int nid)
{
- int nid = paddr_to_nid(__pa(start));
+ u64 end = start + len;
- if (nid < 0)
- nid = 0;
#ifdef CONFIG_KEXEC
if (start > crashk_res.start && start < crashk_res.end)
start = crashk_res.end;
diff --git a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
index f93308f..7245a57 100644
--- a/include/asm-ia64/meminit.h
+++ b/include/asm-ia64/meminit.h
@@ -35,6 +35,7 @@
extern void reserve_memory (void);
extern void find_initrd (void);
extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
+extern int filter_memory (unsigned long start, unsigned long end, void *arg);
extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
@@ -56,7 +57,7 @@
#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
-extern int register_active_ranges(u64 start, u64 end, void *arg);
+extern int register_active_ranges(u64 start, u64 len, int nid);
#ifdef CONFIG_VIRTUAL_MEM_MAP
# define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */