blob: 4f9dbf066ca4454602761f7a59acb33230dfa205 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
Yinghai Lua9ce6bc2010-08-25 13:39:17 -070019#include <linux/memblock.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020020#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <asm/proto.h>
22#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010023#include <asm/e820.h>
Ingo Molnar7b6aa332009-02-17 13:58:15 +010024#include <asm/apic.h>
Ingo Molnar4ec71fa2009-01-21 10:24:27 +010025#include <asm/uv/uv.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
Andi Kleenc31fbb12006-09-26 10:52:33 +020027int acpi_numa __initdata;
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029static struct acpi_table_slit *acpi_slit;
30
31static nodemask_t nodes_parsed __initdata;
Jack Steinerdc098552009-04-17 09:22:42 -050032static nodemask_t cpu_nodes_parsed __initdata;
Andi Kleenabe059e2006-03-25 16:29:12 +010033static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070034static struct bootnode nodes_add[MAX_NUMNODES];
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070036static int num_node_memblks __initdata;
37static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
38static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
39
Linus Torvalds1da177e2005-04-16 15:20:36 -070040static __init int setup_node(int pxm)
41{
Yasunori Goto762834e2006-06-23 02:03:19 -070042 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070043}
44
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070045static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070046{
47 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070048 for (i = 0; i < num_node_memblks; i++) {
49 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 if (nd->start == nd->end)
51 continue;
52 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070053 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070055 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070056 }
57 return -1;
58}
59
60static __init void cutoff_node(int i, unsigned long start, unsigned long end)
61{
Andi Kleenabe059e2006-03-25 16:29:12 +010062 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020063
Linus Torvalds1da177e2005-04-16 15:20:36 -070064 if (nd->start < start) {
65 nd->start = start;
66 if (nd->end < nd->start)
67 nd->start = nd->end;
68 }
69 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 nd->end = end;
71 if (nd->start > nd->end)
72 nd->start = nd->end;
73 }
74}
75
76static __init void bad_srat(void)
77{
Andi Kleen2bce2b52005-09-12 18:49:25 +020078 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 printk(KERN_ERR "SRAT: SRAT not used.\n");
80 acpi_numa = -1;
Andi Kleen2bce2b52005-09-12 18:49:25 +020081 for (i = 0; i < MAX_LOCAL_APIC; i++)
Tejun Heobbc9e2f2011-01-23 14:37:39 +010082 set_apicid_to_node(i, NUMA_NO_NODE);
Andi Kleen429b2b32009-07-18 08:56:57 +020083 for (i = 0; i < MAX_NUMNODES; i++) {
84 nodes[i].start = nodes[i].end = 0;
85 nodes_add[i].start = nodes_add[i].end = 0;
86 }
Mel Gorman5cb248a2006-09-27 01:49:52 -070087 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070088}
89
90static __init inline int srat_disabled(void)
91{
92 return numa_off || acpi_numa < 0;
93}
94
95/* Callback for SLIT parsing */
96void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
97{
Yinghai Luf302a5bb2008-07-10 20:36:37 -070098 unsigned length;
99 unsigned long phys;
100
101 length = slit->header.length;
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700102 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700103 PAGE_SIZE);
104
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700105 if (phys == MEMBLOCK_ERROR)
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700106 panic(" Can not save slit!\n");
107
108 acpi_slit = __va(phys);
109 memcpy(acpi_slit, slit, length);
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700110 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111}
112
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800113/* Callback for Proximity Domain -> x2APIC mapping */
114void __init
115acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
116{
117 int pxm, node;
118 int apic_id;
119
120 if (srat_disabled())
121 return;
122 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
123 bad_srat();
124 return;
125 }
126 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
127 return;
128 pxm = pa->proximity_domain;
129 node = setup_node(pxm);
130 if (node < 0) {
131 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
132 bad_srat();
133 return;
134 }
135
136 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800137 if (apic_id >= MAX_LOCAL_APIC) {
138 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
139 return;
140 }
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100141 set_apicid_to_node(apic_id, node);
Jack Steinerdc098552009-04-17 09:22:42 -0500142 node_set(node, cpu_nodes_parsed);
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800143 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800144 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800145 pxm, apic_id, node);
146}
147
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148/* Callback for Proximity Domain -> LAPIC mapping */
149void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300150acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151{
152 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100153 int apic_id;
154
Andi Kleend22fe802006-02-03 21:51:26 +0100155 if (srat_disabled())
156 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300157 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200158 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100159 return;
160 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300161 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300163 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 node = setup_node(pxm);
165 if (node < 0) {
166 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
167 bad_srat();
168 return;
169 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800170
Jack Steiner2e420602008-09-23 15:37:13 -0500171 if (get_uv_system_type() >= UV_X2APIC)
Jack Steinera65d1d62008-03-28 14:12:08 -0500172 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
173 else
174 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800175
176 if (apic_id >= MAX_LOCAL_APIC) {
177 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
178 return;
179 }
180
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100181 set_apicid_to_node(apic_id, node);
Jack Steinerdc098552009-04-17 09:22:42 -0500182 node_set(node, cpu_nodes_parsed);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800184 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100185 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186}
187
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700188#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
189static inline int save_add_info(void) {return 1;}
190#else
191static inline int save_add_info(void) {return 0;}
192#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200193/*
Yinghai Lu888a5892009-05-15 13:59:37 -0700194 * Update nodes_add[]
195 * This code supports one contiguous hot add area per node
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200196 */
Yinghai Lu888a5892009-05-15 13:59:37 -0700197static void __init
198update_nodes_add(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200199{
200 unsigned long s_pfn = start >> PAGE_SHIFT;
201 unsigned long e_pfn = end >> PAGE_SHIFT;
Yinghai Lu888a5892009-05-15 13:59:37 -0700202 int changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200203 struct bootnode *nd = &nodes_add[node];
204
205 /* I had some trouble with strange memory hotadd regions breaking
206 the boot. Be very strict here and reject anything unexpected.
207 If you want working memory hotadd write correct SRATs.
208
209 The node size check is a basic sanity check to guard against
210 mistakes */
211 if ((signed long)(end - start) < NODE_MIN_SIZE) {
212 printk(KERN_ERR "SRAT: Hotplug area too small\n");
Yinghai Lu888a5892009-05-15 13:59:37 -0700213 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200214 }
215
216 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700217 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700218 printk(KERN_ERR
219 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
220 s_pfn, e_pfn);
Yinghai Lu888a5892009-05-15 13:59:37 -0700221 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200222 }
223
224 /* Looks good */
225
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200226 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300227 nd->start = start;
228 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200229 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300230 } else {
231 if (nd->start == end) {
232 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200233 changed = 1;
234 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300235 if (nd->end == start) {
236 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200237 changed = 1;
238 }
239 if (!changed)
240 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300241 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200242
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800243 if (changed) {
244 node_set(node, cpu_nodes_parsed);
Yinghai Lu888a5892009-05-15 13:59:37 -0700245 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
246 nd->start, nd->end);
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800247 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200248}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200249
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
251void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300252acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253{
Tejun Heo13081df2011-02-16 12:13:06 +0100254 struct bootnode *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 unsigned long start, end;
256 int node, pxm;
257 int i;
258
Andi Kleend22fe802006-02-03 21:51:26 +0100259 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300261 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100262 bad_srat();
263 return;
264 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300265 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100266 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300267
268 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200269 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300270 start = ma->base_address;
271 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 pxm = ma->proximity_domain;
273 node = setup_node(pxm);
274 if (node < 0) {
275 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
276 bad_srat();
277 return;
278 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700279 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200280 if (i == node) {
281 printk(KERN_WARNING
282 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
283 pxm, start, end, nodes[i].start, nodes[i].end);
284 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200286 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
287 pxm, start, end, node_to_pxm(i),
288 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 bad_srat();
290 return;
291 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200292
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700293 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
294 start, end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200295
Tejun Heo13081df2011-02-16 12:13:06 +0100296 if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) {
297 nd = &nodes[node];
298 if (!node_test_and_set(node, nodes_parsed)) {
299 nd->start = start;
300 nd->end = end;
301 } else {
302 if (start < nd->start)
303 nd->start = start;
304 if (nd->end < end)
305 nd->end = end;
306 }
307 } else
Yinghai Lu888a5892009-05-15 13:59:37 -0700308 update_nodes_add(node, start, end);
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700309
310 node_memblk_range[num_node_memblks].start = start;
311 node_memblk_range[num_node_memblks].end = end;
312 memblk_nodeid[num_node_memblks] = node;
313 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314}
315
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100316/* Sanity check to catch more bad SRATs (they are amazingly common).
317 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200318static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100319{
320 int i;
321 unsigned long pxmram, e820ram;
322
323 pxmram = 0;
324 for_each_node_mask(i, nodes_parsed) {
325 unsigned long s = nodes[i].start >> PAGE_SHIFT;
326 unsigned long e = nodes[i].end >> PAGE_SHIFT;
327 pxmram += e - s;
Yinghai Lu32996252009-12-15 17:59:02 -0800328 pxmram -= __absent_pages_in_range(i, s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200329 if ((long)pxmram < 0)
330 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100331 }
332
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700333 e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
Yinghai Lu0964b052009-05-08 00:37:34 -0700334 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
335 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100336 printk(KERN_ERR
337 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
338 (pxmram << PAGE_SHIFT) >> 20,
339 (e820ram << PAGE_SHIFT) >> 20);
340 return 0;
341 }
342 return 1;
343}
344
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345void __init acpi_numa_arch_fixup(void) {}
346
David Rientjes4e76f4e2010-12-22 17:23:47 -0800347#ifdef CONFIG_NUMA_EMU
David Rientjesa387e952010-12-22 17:23:56 -0800348void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start,
349 unsigned long end)
David Rientjes87162732009-09-25 15:20:04 -0700350{
351 int i;
David Rientjes87162732009-09-25 15:20:04 -0700352
353 for_each_node_mask(i, nodes_parsed) {
David Rientjesa387e952010-12-22 17:23:56 -0800354 cutoff_node(i, start, end);
355 physnodes[i].start = nodes[i].start;
356 physnodes[i].end = nodes[i].end;
David Rientjes87162732009-09-25 15:20:04 -0700357 }
David Rientjes87162732009-09-25 15:20:04 -0700358}
David Rientjes4e76f4e2010-12-22 17:23:47 -0800359#endif /* CONFIG_NUMA_EMU */
David Rientjes87162732009-09-25 15:20:04 -0700360
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361/* Use the information discovered above to actually set up the nodes. */
Tejun Heo940fed22011-02-16 12:13:06 +0100362int __init acpi_scan_nodes(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363{
364 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100365
David Rientjesae2c6dc2007-07-21 17:09:56 +0200366 if (acpi_numa <= 0)
367 return -1;
368
Andi Kleen9391a3f2006-02-03 21:51:17 +0100369 /* First clean up the node list */
Yinghai Lu7c437692009-05-15 13:59:37 -0700370 for (i = 0; i < MAX_NUMNODES; i++)
Tejun Heo940fed22011-02-16 12:13:06 +0100371 cutoff_node(i, 0, max_pfn << PAGE_SHIFT);
Andi Kleen9391a3f2006-02-03 21:51:17 +0100372
Jan Beulich2e618782010-04-21 16:13:20 +0100373 /*
374 * Join together blocks on the same node, holes between
375 * which don't overlap with memory on other nodes.
376 */
377 for (i = 0; i < num_node_memblks; ++i) {
378 int j, k;
379
380 for (j = i + 1; j < num_node_memblks; ++j) {
381 unsigned long start, end;
382
383 if (memblk_nodeid[i] != memblk_nodeid[j])
384 continue;
385 start = min(node_memblk_range[i].end,
386 node_memblk_range[j].end);
387 end = max(node_memblk_range[i].start,
388 node_memblk_range[j].start);
389 for (k = 0; k < num_node_memblks; ++k) {
390 if (memblk_nodeid[i] == memblk_nodeid[k])
391 continue;
392 if (start < node_memblk_range[k].end &&
393 end > node_memblk_range[k].start)
394 break;
395 }
396 if (k < num_node_memblks)
397 continue;
398 start = min(node_memblk_range[i].start,
399 node_memblk_range[j].start);
400 end = max(node_memblk_range[i].end,
401 node_memblk_range[j].end);
402 printk(KERN_INFO "SRAT: Node %d "
403 "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
404 memblk_nodeid[i],
405 node_memblk_range[i].start,
406 node_memblk_range[i].end,
407 node_memblk_range[j].start,
408 node_memblk_range[j].end,
409 start, end);
410 node_memblk_range[i].start = start;
411 node_memblk_range[i].end = end;
412 k = --num_node_memblks - j;
413 memmove(memblk_nodeid + j, memblk_nodeid + j+1,
414 k * sizeof(*memblk_nodeid));
415 memmove(node_memblk_range + j, node_memblk_range + j+1,
416 k * sizeof(*node_memblk_range));
417 --j;
418 }
419 }
420
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700421 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
422 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 if (memnode_shift < 0) {
424 printk(KERN_ERR
425 "SRAT: No NUMA node hash function found. Contact maintainer\n");
426 bad_srat();
427 return -1;
428 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200429
Yinghai Lu73cf6242010-10-10 19:52:15 -0700430 for (i = 0; i < num_node_memblks; i++)
H. Peter Anvin8e4029e2010-10-11 17:05:11 -0700431 memblock_x86_register_active_regions(memblk_nodeid[i],
Yinghai Lu73cf6242010-10-10 19:52:15 -0700432 node_memblk_range[i].start >> PAGE_SHIFT,
433 node_memblk_range[i].end >> PAGE_SHIFT);
434
Yinghai Lu32996252009-12-15 17:59:02 -0800435 /* for out of order entries in SRAT */
436 sort_node_map();
David Rientjes87162732009-09-25 15:20:04 -0700437 if (!nodes_cover_memory(nodes)) {
438 bad_srat();
439 return -1;
440 }
441
Yinghai Lu1411e0e2010-12-27 16:48:17 -0800442 init_memory_mapping_high();
443
Jack Steinerdc098552009-04-17 09:22:42 -0500444 /* Account for nodes with cpus and no memory */
445 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200446
Andi Kleene58e0d02005-09-12 18:49:25 +0200447 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200448 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200450 /* Try again in case setup_node_bootmem missed one due
451 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200452 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200453 if (!node_online(i))
454 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
455
Mike Travis168ef542008-12-16 17:34:01 -0800456 for (i = 0; i < nr_cpu_ids; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100457 int node = early_cpu_to_node(i);
458
travis@sgi.com834beda12008-01-30 13:33:21 +0100459 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 continue;
Yinghai Lu7c437692009-05-15 13:59:37 -0700461 if (!node_online(node))
Mike Travis23ca4bb2008-05-12 21:21:12 +0200462 numa_clear_node(i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 }
464 numa_init_array();
465 return 0;
466}
467
David Rientjes3484d792007-07-21 17:10:32 +0200468#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100469static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
470 [0 ... MAX_NUMNODES-1] = PXM_INVAL
471};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100472static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100473 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
474};
David Rientjes3484d792007-07-21 17:10:32 +0200475static int __init find_node_by_addr(unsigned long addr)
476{
477 int ret = NUMA_NO_NODE;
478 int i;
479
480 for_each_node_mask(i, nodes_parsed) {
481 /*
482 * Find the real node that this emulated node appears on. For
483 * the sake of simplicity, we only use a real node's starting
484 * address to determine which emulated node it appears on.
485 */
486 if (addr >= nodes[i].start && addr < nodes[i].end) {
487 ret = i;
488 break;
489 }
490 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100491 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200492}
493
494/*
495 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
496 * mappings that respect the real ACPI topology but reflect our emulated
497 * environment. For each emulated node, we find which real node it appears on
498 * and create PXM to NID mappings for those fake nodes which mirror that
499 * locality. SLIT will now represent the correct distances between emulated
500 * nodes as a result of the real topology.
501 */
502void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
503{
David Rientjes08705b82007-07-21 17:10:33 +0200504 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200505
David Rientjes3484d792007-07-21 17:10:32 +0200506 for (i = 0; i < num_nodes; i++) {
507 int nid, pxm;
508
509 nid = find_node_by_addr(fake_nodes[i].start);
510 if (nid == NUMA_NO_NODE)
511 continue;
512 pxm = node_to_pxm(nid);
513 if (pxm == PXM_INVAL)
514 continue;
515 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200516 /*
517 * For each apicid_to_node mapping that exists for this real
518 * node, it must now point to the fake node ID.
519 */
520 for (j = 0; j < MAX_LOCAL_APIC; j++)
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100521 if (__apicid_to_node[j] == nid &&
David Rientjesb0c4d952010-05-06 02:24:34 -0700522 fake_apicid_to_node[j] == NUMA_NO_NODE)
David Rientjes08705b82007-07-21 17:10:33 +0200523 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200524 }
David Rientjesa387e952010-12-22 17:23:56 -0800525
526 /*
527 * If there are apicid-to-node mappings for physical nodes that do not
528 * have a corresponding emulated node, it should default to a guaranteed
529 * value.
530 */
531 for (i = 0; i < MAX_LOCAL_APIC; i++)
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100532 if (__apicid_to_node[i] != NUMA_NO_NODE &&
David Rientjesa387e952010-12-22 17:23:56 -0800533 fake_apicid_to_node[i] == NUMA_NO_NODE)
534 fake_apicid_to_node[i] = 0;
535
David Rientjes3484d792007-07-21 17:10:32 +0200536 for (i = 0; i < num_nodes; i++)
537 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100538 memcpy(__apicid_to_node, fake_apicid_to_node, sizeof(__apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200539
540 nodes_clear(nodes_parsed);
541 for (i = 0; i < num_nodes; i++)
542 if (fake_nodes[i].start != fake_nodes[i].end)
543 node_set(i, nodes_parsed);
David Rientjes3484d792007-07-21 17:10:32 +0200544}
545
546static int null_slit_node_compare(int a, int b)
547{
548 return node_to_pxm(a) == node_to_pxm(b);
549}
550#else
551static int null_slit_node_compare(int a, int b)
552{
553 return a == b;
554}
555#endif /* CONFIG_NUMA_EMU */
556
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557int __node_distance(int a, int b)
558{
559 int index;
560
561 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200562 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
563 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300564 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 return acpi_slit->entry[index + node_to_pxm(b)];
566}
567
568EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700569
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200570#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
Keith Mannthey4942e992006-09-30 23:27:06 -0700571int memory_add_physaddr_to_nid(u64 start)
572{
573 int i, ret = 0;
574
575 for_each_node(i)
576 if (nodes_add[i].start <= start && nodes_add[i].end > start)
577 ret = i;
578
579 return ret;
580}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700581EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200582#endif