blob: a35cb9d8b0606bc8f7123cd15f0017972a5e8dda [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
Yinghai Lua9ce6bc2010-08-25 13:39:17 -070019#include <linux/memblock.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020020#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <asm/proto.h>
22#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010023#include <asm/e820.h>
Ingo Molnar7b6aa332009-02-17 13:58:15 +010024#include <asm/apic.h>
Ingo Molnar4ec71fa2009-01-21 10:24:27 +010025#include <asm/uv/uv.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
Andi Kleenc31fbb12006-09-26 10:52:33 +020027int acpi_numa __initdata;
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029static struct acpi_table_slit *acpi_slit;
30
31static nodemask_t nodes_parsed __initdata;
Jack Steinerdc098552009-04-17 09:22:42 -050032static nodemask_t cpu_nodes_parsed __initdata;
Andi Kleenabe059e2006-03-25 16:29:12 +010033static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070034static struct bootnode nodes_add[MAX_NUMNODES];
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070036static int num_node_memblks __initdata;
37static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
38static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
39
Linus Torvalds1da177e2005-04-16 15:20:36 -070040static __init int setup_node(int pxm)
41{
Yasunori Goto762834e2006-06-23 02:03:19 -070042 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070043}
44
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070045static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070046{
47 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070048 for (i = 0; i < num_node_memblks; i++) {
49 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 if (nd->start == nd->end)
51 continue;
52 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070053 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070055 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070056 }
57 return -1;
58}
59
60static __init void cutoff_node(int i, unsigned long start, unsigned long end)
61{
Andi Kleenabe059e2006-03-25 16:29:12 +010062 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020063
Linus Torvalds1da177e2005-04-16 15:20:36 -070064 if (nd->start < start) {
65 nd->start = start;
66 if (nd->end < nd->start)
67 nd->start = nd->end;
68 }
69 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 nd->end = end;
71 if (nd->start > nd->end)
72 nd->start = nd->end;
73 }
74}
75
76static __init void bad_srat(void)
77{
Andi Kleen2bce2b52005-09-12 18:49:25 +020078 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 printk(KERN_ERR "SRAT: SRAT not used.\n");
80 acpi_numa = -1;
Andi Kleen2bce2b52005-09-12 18:49:25 +020081 for (i = 0; i < MAX_LOCAL_APIC; i++)
82 apicid_to_node[i] = NUMA_NO_NODE;
Andi Kleen429b2b32009-07-18 08:56:57 +020083 for (i = 0; i < MAX_NUMNODES; i++) {
84 nodes[i].start = nodes[i].end = 0;
85 nodes_add[i].start = nodes_add[i].end = 0;
86 }
Mel Gorman5cb248a2006-09-27 01:49:52 -070087 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070088}
89
90static __init inline int srat_disabled(void)
91{
92 return numa_off || acpi_numa < 0;
93}
94
95/* Callback for SLIT parsing */
96void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
97{
Yinghai Luf302a5bb2008-07-10 20:36:37 -070098 unsigned length;
99 unsigned long phys;
100
101 length = slit->header.length;
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700102 phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length,
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700103 PAGE_SIZE);
104
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700105 if (phys == MEMBLOCK_ERROR)
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700106 panic(" Can not save slit!\n");
107
108 acpi_slit = __va(phys);
109 memcpy(acpi_slit, slit, length);
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700110 memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111}
112
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800113/* Callback for Proximity Domain -> x2APIC mapping */
114void __init
115acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
116{
117 int pxm, node;
118 int apic_id;
119
120 if (srat_disabled())
121 return;
122 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
123 bad_srat();
124 return;
125 }
126 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
127 return;
128 pxm = pa->proximity_domain;
129 node = setup_node(pxm);
130 if (node < 0) {
131 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
132 bad_srat();
133 return;
134 }
135
136 apic_id = pa->apic_id;
137 apicid_to_node[apic_id] = node;
Jack Steinerdc098552009-04-17 09:22:42 -0500138 node_set(node, cpu_nodes_parsed);
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800139 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800140 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800141 pxm, apic_id, node);
142}
143
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144/* Callback for Proximity Domain -> LAPIC mapping */
145void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300146acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147{
148 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100149 int apic_id;
150
Andi Kleend22fe802006-02-03 21:51:26 +0100151 if (srat_disabled())
152 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300153 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200154 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100155 return;
156 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300157 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300159 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 node = setup_node(pxm);
161 if (node < 0) {
162 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
163 bad_srat();
164 return;
165 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800166
Jack Steiner2e420602008-09-23 15:37:13 -0500167 if (get_uv_system_type() >= UV_X2APIC)
Jack Steinera65d1d62008-03-28 14:12:08 -0500168 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
169 else
170 apic_id = pa->apic_id;
travis@sgi.comef970012008-01-30 13:33:10 +0100171 apicid_to_node[apic_id] = node;
Jack Steinerdc098552009-04-17 09:22:42 -0500172 node_set(node, cpu_nodes_parsed);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800174 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100175 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176}
177
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700178#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
179static inline int save_add_info(void) {return 1;}
180#else
181static inline int save_add_info(void) {return 0;}
182#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200183/*
Yinghai Lu888a5892009-05-15 13:59:37 -0700184 * Update nodes_add[]
185 * This code supports one contiguous hot add area per node
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200186 */
Yinghai Lu888a5892009-05-15 13:59:37 -0700187static void __init
188update_nodes_add(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200189{
190 unsigned long s_pfn = start >> PAGE_SHIFT;
191 unsigned long e_pfn = end >> PAGE_SHIFT;
Yinghai Lu888a5892009-05-15 13:59:37 -0700192 int changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200193 struct bootnode *nd = &nodes_add[node];
194
195 /* I had some trouble with strange memory hotadd regions breaking
196 the boot. Be very strict here and reject anything unexpected.
197 If you want working memory hotadd write correct SRATs.
198
199 The node size check is a basic sanity check to guard against
200 mistakes */
201 if ((signed long)(end - start) < NODE_MIN_SIZE) {
202 printk(KERN_ERR "SRAT: Hotplug area too small\n");
Yinghai Lu888a5892009-05-15 13:59:37 -0700203 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200204 }
205
206 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700207 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700208 printk(KERN_ERR
209 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
210 s_pfn, e_pfn);
Yinghai Lu888a5892009-05-15 13:59:37 -0700211 return;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200212 }
213
214 /* Looks good */
215
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200216 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300217 nd->start = start;
218 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200219 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300220 } else {
221 if (nd->start == end) {
222 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200223 changed = 1;
224 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300225 if (nd->end == start) {
226 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200227 changed = 1;
228 }
229 if (!changed)
230 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300231 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200232
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800233 if (changed) {
234 node_set(node, cpu_nodes_parsed);
Yinghai Lu888a5892009-05-15 13:59:37 -0700235 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
236 nd->start, nd->end);
David Rientjes3a5fc0e2010-01-20 12:10:47 -0800237 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200238}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200239
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
241void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300242acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243{
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200244 struct bootnode *nd, oldnode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 unsigned long start, end;
246 int node, pxm;
247 int i;
248
Andi Kleend22fe802006-02-03 21:51:26 +0100249 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300251 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100252 bad_srat();
253 return;
254 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300255 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100256 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300257
258 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200259 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300260 start = ma->base_address;
261 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 pxm = ma->proximity_domain;
263 node = setup_node(pxm);
264 if (node < 0) {
265 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
266 bad_srat();
267 return;
268 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700269 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200270 if (i == node) {
271 printk(KERN_WARNING
272 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
273 pxm, start, end, nodes[i].start, nodes[i].end);
274 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200276 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
277 pxm, start, end, node_to_pxm(i),
278 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 bad_srat();
280 return;
281 }
282 nd = &nodes[node];
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200283 oldnode = *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 if (!node_test_and_set(node, nodes_parsed)) {
285 nd->start = start;
286 nd->end = end;
287 } else {
288 if (start < nd->start)
289 nd->start = start;
290 if (nd->end < end)
291 nd->end = end;
292 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200293
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700294 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
295 start, end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200296
Yinghai Lu888a5892009-05-15 13:59:37 -0700297 if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
298 update_nodes_add(node, start, end);
299 /* restore nodes[node] */
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200300 *nd = oldnode;
301 if ((nd->start | nd->end) == 0)
302 node_clear(node, nodes_parsed);
303 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700304
305 node_memblk_range[num_node_memblks].start = start;
306 node_memblk_range[num_node_memblks].end = end;
307 memblk_nodeid[num_node_memblks] = node;
308 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309}
310
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100311/* Sanity check to catch more bad SRATs (they are amazingly common).
312 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200313static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100314{
315 int i;
316 unsigned long pxmram, e820ram;
317
318 pxmram = 0;
319 for_each_node_mask(i, nodes_parsed) {
320 unsigned long s = nodes[i].start >> PAGE_SHIFT;
321 unsigned long e = nodes[i].end >> PAGE_SHIFT;
322 pxmram += e - s;
Yinghai Lu32996252009-12-15 17:59:02 -0800323 pxmram -= __absent_pages_in_range(i, s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200324 if ((long)pxmram < 0)
325 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100326 }
327
Yinghai Lua9ce6bc2010-08-25 13:39:17 -0700328 e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
Yinghai Lu0964b052009-05-08 00:37:34 -0700329 /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
330 if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100331 printk(KERN_ERR
332 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
333 (pxmram << PAGE_SHIFT) >> 20,
334 (e820ram << PAGE_SHIFT) >> 20);
335 return 0;
336 }
337 return 1;
338}
339
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340void __init acpi_numa_arch_fixup(void) {}
341
David Rientjes87162732009-09-25 15:20:04 -0700342int __init acpi_get_nodes(struct bootnode *physnodes)
343{
344 int i;
345 int ret = 0;
346
347 for_each_node_mask(i, nodes_parsed) {
348 physnodes[ret].start = nodes[i].start;
349 physnodes[ret].end = nodes[i].end;
350 ret++;
351 }
352 return ret;
353}
354
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355/* Use the information discovered above to actually set up the nodes. */
356int __init acpi_scan_nodes(unsigned long start, unsigned long end)
357{
358 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100359
David Rientjesae2c6dc2007-07-21 17:09:56 +0200360 if (acpi_numa <= 0)
361 return -1;
362
Andi Kleen9391a3f2006-02-03 21:51:17 +0100363 /* First clean up the node list */
Yinghai Lu7c437692009-05-15 13:59:37 -0700364 for (i = 0; i < MAX_NUMNODES; i++)
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300365 cutoff_node(i, start, end);
Andi Kleen9391a3f2006-02-03 21:51:17 +0100366
Jan Beulich2e618782010-04-21 16:13:20 +0100367 /*
368 * Join together blocks on the same node, holes between
369 * which don't overlap with memory on other nodes.
370 */
371 for (i = 0; i < num_node_memblks; ++i) {
372 int j, k;
373
374 for (j = i + 1; j < num_node_memblks; ++j) {
375 unsigned long start, end;
376
377 if (memblk_nodeid[i] != memblk_nodeid[j])
378 continue;
379 start = min(node_memblk_range[i].end,
380 node_memblk_range[j].end);
381 end = max(node_memblk_range[i].start,
382 node_memblk_range[j].start);
383 for (k = 0; k < num_node_memblks; ++k) {
384 if (memblk_nodeid[i] == memblk_nodeid[k])
385 continue;
386 if (start < node_memblk_range[k].end &&
387 end > node_memblk_range[k].start)
388 break;
389 }
390 if (k < num_node_memblks)
391 continue;
392 start = min(node_memblk_range[i].start,
393 node_memblk_range[j].start);
394 end = max(node_memblk_range[i].end,
395 node_memblk_range[j].end);
396 printk(KERN_INFO "SRAT: Node %d "
397 "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
398 memblk_nodeid[i],
399 node_memblk_range[i].start,
400 node_memblk_range[i].end,
401 node_memblk_range[j].start,
402 node_memblk_range[j].end,
403 start, end);
404 node_memblk_range[i].start = start;
405 node_memblk_range[i].end = end;
406 k = --num_node_memblks - j;
407 memmove(memblk_nodeid + j, memblk_nodeid + j+1,
408 k * sizeof(*memblk_nodeid));
409 memmove(node_memblk_range + j, node_memblk_range + j+1,
410 k * sizeof(*node_memblk_range));
411 --j;
412 }
413 }
414
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700415 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
416 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 if (memnode_shift < 0) {
418 printk(KERN_ERR
419 "SRAT: No NUMA node hash function found. Contact maintainer\n");
420 bad_srat();
421 return -1;
422 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200423
Yinghai Lu73cf6242010-10-10 19:52:15 -0700424 for (i = 0; i < num_node_memblks; i++)
H. Peter Anvin8e4029e2010-10-11 17:05:11 -0700425 memblock_x86_register_active_regions(memblk_nodeid[i],
Yinghai Lu73cf6242010-10-10 19:52:15 -0700426 node_memblk_range[i].start >> PAGE_SHIFT,
427 node_memblk_range[i].end >> PAGE_SHIFT);
428
Yinghai Lu32996252009-12-15 17:59:02 -0800429 /* for out of order entries in SRAT */
430 sort_node_map();
David Rientjes87162732009-09-25 15:20:04 -0700431 if (!nodes_cover_memory(nodes)) {
432 bad_srat();
433 return -1;
434 }
435
Jack Steinerdc098552009-04-17 09:22:42 -0500436 /* Account for nodes with cpus and no memory */
437 nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed);
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200438
Andi Kleene58e0d02005-09-12 18:49:25 +0200439 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200440 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200442 /* Try again in case setup_node_bootmem missed one due
443 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200444 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200445 if (!node_online(i))
446 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
447
Mike Travis168ef542008-12-16 17:34:01 -0800448 for (i = 0; i < nr_cpu_ids; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100449 int node = early_cpu_to_node(i);
450
travis@sgi.com834beda12008-01-30 13:33:21 +0100451 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 continue;
Yinghai Lu7c437692009-05-15 13:59:37 -0700453 if (!node_online(node))
Mike Travis23ca4bb2008-05-12 21:21:12 +0200454 numa_clear_node(i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 }
456 numa_init_array();
457 return 0;
458}
459
David Rientjes3484d792007-07-21 17:10:32 +0200460#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100461static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
462 [0 ... MAX_NUMNODES-1] = PXM_INVAL
463};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100464static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100465 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
466};
David Rientjes3484d792007-07-21 17:10:32 +0200467static int __init find_node_by_addr(unsigned long addr)
468{
469 int ret = NUMA_NO_NODE;
470 int i;
471
472 for_each_node_mask(i, nodes_parsed) {
473 /*
474 * Find the real node that this emulated node appears on. For
475 * the sake of simplicity, we only use a real node's starting
476 * address to determine which emulated node it appears on.
477 */
478 if (addr >= nodes[i].start && addr < nodes[i].end) {
479 ret = i;
480 break;
481 }
482 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100483 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200484}
485
486/*
487 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
488 * mappings that respect the real ACPI topology but reflect our emulated
489 * environment. For each emulated node, we find which real node it appears on
490 * and create PXM to NID mappings for those fake nodes which mirror that
491 * locality. SLIT will now represent the correct distances between emulated
492 * nodes as a result of the real topology.
493 */
494void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
495{
David Rientjes08705b82007-07-21 17:10:33 +0200496 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200497
498 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
499 "topology.\n");
500 for (i = 0; i < num_nodes; i++) {
501 int nid, pxm;
502
503 nid = find_node_by_addr(fake_nodes[i].start);
504 if (nid == NUMA_NO_NODE)
505 continue;
506 pxm = node_to_pxm(nid);
507 if (pxm == PXM_INVAL)
508 continue;
509 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200510 /*
511 * For each apicid_to_node mapping that exists for this real
512 * node, it must now point to the fake node ID.
513 */
514 for (j = 0; j < MAX_LOCAL_APIC; j++)
David Rientjesb0c4d952010-05-06 02:24:34 -0700515 if (apicid_to_node[j] == nid &&
516 fake_apicid_to_node[j] == NUMA_NO_NODE)
David Rientjes08705b82007-07-21 17:10:33 +0200517 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200518 }
519 for (i = 0; i < num_nodes; i++)
520 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
David Rientjes08705b82007-07-21 17:10:33 +0200521 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200522
523 nodes_clear(nodes_parsed);
524 for (i = 0; i < num_nodes; i++)
525 if (fake_nodes[i].start != fake_nodes[i].end)
526 node_set(i, nodes_parsed);
David Rientjes3484d792007-07-21 17:10:32 +0200527}
528
529static int null_slit_node_compare(int a, int b)
530{
531 return node_to_pxm(a) == node_to_pxm(b);
532}
533#else
534static int null_slit_node_compare(int a, int b)
535{
536 return a == b;
537}
538#endif /* CONFIG_NUMA_EMU */
539
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540int __node_distance(int a, int b)
541{
542 int index;
543
544 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200545 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
546 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300547 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 return acpi_slit->entry[index + node_to_pxm(b)];
549}
550
551EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700552
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200553#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
Keith Mannthey4942e992006-09-30 23:27:06 -0700554int memory_add_physaddr_to_nid(u64 start)
555{
556 int i, ret = 0;
557
558 for_each_node(i)
559 if (nodes_add[i].start <= start && nodes_add[i].end > start)
560 ret = i;
561
562 return ret;
563}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700564EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200565#endif