blob: 3890234e5b2629f54f1449224510ba612d58ba68 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
19#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/proto.h>
21#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010022#include <asm/e820.h>
Jack Steinera65d1d62008-03-28 14:12:08 -050023#include <asm/genapic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024
Andi Kleenc31fbb12006-09-26 10:52:33 +020025int acpi_numa __initdata;
26
Linus Torvalds1da177e2005-04-16 15:20:36 -070027static struct acpi_table_slit *acpi_slit;
28
29static nodemask_t nodes_parsed __initdata;
Andi Kleenabe059e2006-03-25 16:29:12 +010030static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070031static struct bootnode nodes_add[MAX_NUMNODES];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020032static int found_add_area __initdata;
Andi Kleenfad79062006-05-15 18:19:44 +020033int hotadd_percent __initdata = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070034
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070035static int num_node_memblks __initdata;
36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
38
Andi Kleen9391a3f2006-02-03 21:51:17 +010039/* Too small nodes confuse the VM badly. Usually they result
40 from BIOS bugs. */
41#define NODE_MIN_SIZE (4*1024*1024)
42
Linus Torvalds1da177e2005-04-16 15:20:36 -070043static __init int setup_node(int pxm)
44{
Yasunori Goto762834e2006-06-23 02:03:19 -070045 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070046}
47
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070048static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070049{
50 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070051 for (i = 0; i < num_node_memblks; i++) {
52 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070053 if (nd->start == nd->end)
54 continue;
55 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070056 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070057 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070058 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 }
60 return -1;
61}
62
63static __init void cutoff_node(int i, unsigned long start, unsigned long end)
64{
Andi Kleenabe059e2006-03-25 16:29:12 +010065 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020066
67 if (found_add_area)
68 return;
69
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 if (nd->start < start) {
71 nd->start = start;
72 if (nd->end < nd->start)
73 nd->start = nd->end;
74 }
75 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070076 nd->end = end;
77 if (nd->start > nd->end)
78 nd->start = nd->end;
79 }
80}
81
82static __init void bad_srat(void)
83{
Andi Kleen2bce2b52005-09-12 18:49:25 +020084 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 printk(KERN_ERR "SRAT: SRAT not used.\n");
86 acpi_numa = -1;
Andi Kleenfad79062006-05-15 18:19:44 +020087 found_add_area = 0;
Andi Kleen2bce2b52005-09-12 18:49:25 +020088 for (i = 0; i < MAX_LOCAL_APIC; i++)
89 apicid_to_node[i] = NUMA_NO_NODE;
Andi Kleen68a3a7f2006-04-07 19:49:18 +020090 for (i = 0; i < MAX_NUMNODES; i++)
91 nodes_add[i].start = nodes[i].end = 0;
Mel Gorman5cb248a2006-09-27 01:49:52 -070092 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070093}
94
95static __init inline int srat_disabled(void)
96{
97 return numa_off || acpi_numa < 0;
98}
99
Andi Kleen1584b892006-01-11 22:43:42 +0100100/*
101 * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
102 * up the NUMA heuristics which wants the local node to have a smaller
103 * distance than the others.
104 * Do some quick checks here and only use the SLIT if it passes.
105 */
106static __init int slit_valid(struct acpi_table_slit *slit)
107{
108 int i, j;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300109 int d = slit->locality_count;
Andi Kleen1584b892006-01-11 22:43:42 +0100110 for (i = 0; i < d; i++) {
111 for (j = 0; j < d; j++) {
112 u8 val = slit->entry[d*i + j];
113 if (i == j) {
David Rientjesa2e212d2007-07-21 17:09:55 +0200114 if (val != LOCAL_DISTANCE)
Andi Kleen1584b892006-01-11 22:43:42 +0100115 return 0;
David Rientjesa2e212d2007-07-21 17:09:55 +0200116 } else if (val <= LOCAL_DISTANCE)
Andi Kleen1584b892006-01-11 22:43:42 +0100117 return 0;
118 }
119 }
120 return 1;
121}
122
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123/* Callback for SLIT parsing */
124void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
125{
Andi Kleen1584b892006-01-11 22:43:42 +0100126 if (!slit_valid(slit)) {
127 printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
128 return;
129 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130 acpi_slit = slit;
131}
132
133/* Callback for Proximity Domain -> LAPIC mapping */
134void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300135acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136{
137 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100138 int apic_id;
139
Andi Kleend22fe802006-02-03 21:51:26 +0100140 if (srat_disabled())
141 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300142 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200143 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100144 return;
145 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300146 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300148 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 node = setup_node(pxm);
150 if (node < 0) {
151 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
152 bad_srat();
153 return;
154 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800155
Jack Steinera65d1d62008-03-28 14:12:08 -0500156 if (is_uv_system())
157 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
158 else
159 apic_id = pa->apic_id;
travis@sgi.comef970012008-01-30 13:33:10 +0100160 apicid_to_node[apic_id] = node;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 acpi_numa = 1;
Andi Kleen0b07e982005-09-12 18:49:24 +0200162 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100163 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164}
165
Ingo Molnara4928cf2008-04-23 13:20:56 +0200166static int update_end_of_memory(unsigned long end) {return -1;}
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700167static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
168#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
169static inline int save_add_info(void) {return 1;}
170#else
171static inline int save_add_info(void) {return 0;}
172#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200173/*
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700174 * Update nodes_add and decide if to include add are in the zone.
Joe Perchesab4a5742008-01-30 13:31:42 +0100175 * Both SPARSE and RESERVE need nodes_add information.
Simon Arlott676b1852007-10-20 01:25:36 +0200176 * This code supports one contiguous hot add area per node.
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200177 */
Sam Ravnborgd01b9ad2008-02-17 13:22:58 +0100178static int __init
179reserve_hotadd(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200180{
181 unsigned long s_pfn = start >> PAGE_SHIFT;
182 unsigned long e_pfn = end >> PAGE_SHIFT;
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700183 int ret = 0, changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200184 struct bootnode *nd = &nodes_add[node];
185
186 /* I had some trouble with strange memory hotadd regions breaking
187 the boot. Be very strict here and reject anything unexpected.
188 If you want working memory hotadd write correct SRATs.
189
190 The node size check is a basic sanity check to guard against
191 mistakes */
192 if ((signed long)(end - start) < NODE_MIN_SIZE) {
193 printk(KERN_ERR "SRAT: Hotplug area too small\n");
194 return -1;
195 }
196
197 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700198 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700199 printk(KERN_ERR
200 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
201 s_pfn, e_pfn);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200202 return -1;
203 }
204
205 if (!hotadd_enough_memory(&nodes_add[node])) {
206 printk(KERN_ERR "SRAT: Hotplug area too large\n");
207 return -1;
208 }
209
210 /* Looks good */
211
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200212 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300213 nd->start = start;
214 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200215 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300216 } else {
217 if (nd->start == end) {
218 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200219 changed = 1;
220 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300221 if (nd->end == start) {
222 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200223 changed = 1;
224 }
225 if (!changed)
226 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300227 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200228
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700229 ret = update_end_of_memory(nd->end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200230
231 if (changed)
232 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700233 return ret;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200234}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200235
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
237void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300238acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239{
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200240 struct bootnode *nd, oldnode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 unsigned long start, end;
242 int node, pxm;
243 int i;
244
Andi Kleend22fe802006-02-03 21:51:26 +0100245 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300247 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100248 bad_srat();
249 return;
250 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300251 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100252 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300253
254 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200255 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300256 start = ma->base_address;
257 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 pxm = ma->proximity_domain;
259 node = setup_node(pxm);
260 if (node < 0) {
261 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
262 bad_srat();
263 return;
264 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700265 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200266 if (i == node) {
267 printk(KERN_WARNING
268 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
269 pxm, start, end, nodes[i].start, nodes[i].end);
270 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200272 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
273 pxm, start, end, node_to_pxm(i),
274 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 bad_srat();
276 return;
277 }
278 nd = &nodes[node];
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200279 oldnode = *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 if (!node_test_and_set(node, nodes_parsed)) {
281 nd->start = start;
282 nd->end = end;
283 } else {
284 if (start < nd->start)
285 nd->start = start;
286 if (nd->end < end)
287 nd->end = end;
288 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200289
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700290 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
291 start, end);
292 e820_register_active_regions(node, start >> PAGE_SHIFT,
293 end >> PAGE_SHIFT);
Mel Gormanfb014392006-09-27 01:49:59 -0700294 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
295 nd->end >> PAGE_SHIFT);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200296
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300297 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
298 (reserve_hotadd(node, start, end) < 0)) {
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200299 /* Ignore hotadd region. Undo damage */
300 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
301 *nd = oldnode;
302 if ((nd->start | nd->end) == 0)
303 node_clear(node, nodes_parsed);
304 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700305
306 node_memblk_range[num_node_memblks].start = start;
307 node_memblk_range[num_node_memblks].end = end;
308 memblk_nodeid[num_node_memblks] = node;
309 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310}
311
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100312/* Sanity check to catch more bad SRATs (they are amazingly common).
313 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200314static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100315{
316 int i;
317 unsigned long pxmram, e820ram;
318
319 pxmram = 0;
320 for_each_node_mask(i, nodes_parsed) {
321 unsigned long s = nodes[i].start >> PAGE_SHIFT;
322 unsigned long e = nodes[i].end >> PAGE_SHIFT;
323 pxmram += e - s;
Mel Gorman5cb248a2006-09-27 01:49:52 -0700324 pxmram -= absent_pages_in_range(s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200325 if ((long)pxmram < 0)
326 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100327 }
328
Mel Gorman5cb248a2006-09-27 01:49:52 -0700329 e820ram = end_pfn - absent_pages_in_range(0, end_pfn);
Andi Kleenfdb9df92006-02-16 23:42:13 +0100330 /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
331 if ((long)(e820ram - pxmram) >= 1*1024*1024) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100332 printk(KERN_ERR
333 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
334 (pxmram << PAGE_SHIFT) >> 20,
335 (e820ram << PAGE_SHIFT) >> 20);
336 return 0;
337 }
338 return 1;
339}
340
Sam Ravnborg1e296f52008-01-30 13:33:37 +0100341static void __init unparse_node(int node)
Andi Kleen9391a3f2006-02-03 21:51:17 +0100342{
343 int i;
344 node_clear(node, nodes_parsed);
345 for (i = 0; i < MAX_LOCAL_APIC; i++) {
346 if (apicid_to_node[i] == node)
347 apicid_to_node[i] = NUMA_NO_NODE;
348 }
349}
350
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351void __init acpi_numa_arch_fixup(void) {}
352
353/* Use the information discovered above to actually set up the nodes. */
354int __init acpi_scan_nodes(unsigned long start, unsigned long end)
355{
356 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100357
David Rientjesae2c6dc2007-07-21 17:09:56 +0200358 if (acpi_numa <= 0)
359 return -1;
360
Andi Kleen9391a3f2006-02-03 21:51:17 +0100361 /* First clean up the node list */
362 for (i = 0; i < MAX_NUMNODES; i++) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300363 cutoff_node(i, start, end);
Mike Travis693e3c52008-01-30 13:33:14 +0100364 /*
365 * don't confuse VM with a node that doesn't have the
366 * minimum memory.
367 */
368 if (nodes[i].end &&
369 (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
Andi Kleen9391a3f2006-02-03 21:51:17 +0100370 unparse_node(i);
Daniel Yeisley0d015322006-05-30 22:47:57 +0200371 node_set_offline(i);
372 }
Andi Kleen9391a3f2006-02-03 21:51:17 +0100373 }
374
David Rientjes3484d792007-07-21 17:10:32 +0200375 if (!nodes_cover_memory(nodes)) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100376 bad_srat();
377 return -1;
378 }
379
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700380 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
381 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 if (memnode_shift < 0) {
383 printk(KERN_ERR
384 "SRAT: No NUMA node hash function found. Contact maintainer\n");
385 bad_srat();
386 return -1;
387 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200388
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200389 node_possible_map = nodes_parsed;
390
Andi Kleene58e0d02005-09-12 18:49:25 +0200391 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200392 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200394 /* Try again in case setup_node_bootmem missed one due
395 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200396 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200397 if (!node_online(i))
398 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
399
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300400 for (i = 0; i < NR_CPUS; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100401 int node = early_cpu_to_node(i);
402
travis@sgi.com834beda2008-01-30 13:33:21 +0100403 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 continue;
travis@sgi.com834beda2008-01-30 13:33:21 +0100405 if (!node_isset(node, node_possible_map))
Andi Kleen69d81fc2005-11-05 17:25:53 +0100406 numa_set_node(i, NUMA_NO_NODE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 }
408 numa_init_array();
409 return 0;
410}
411
David Rientjes3484d792007-07-21 17:10:32 +0200412#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100413static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
414 [0 ... MAX_NUMNODES-1] = PXM_INVAL
415};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100416static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100417 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
418};
David Rientjes3484d792007-07-21 17:10:32 +0200419static int __init find_node_by_addr(unsigned long addr)
420{
421 int ret = NUMA_NO_NODE;
422 int i;
423
424 for_each_node_mask(i, nodes_parsed) {
425 /*
426 * Find the real node that this emulated node appears on. For
427 * the sake of simplicity, we only use a real node's starting
428 * address to determine which emulated node it appears on.
429 */
430 if (addr >= nodes[i].start && addr < nodes[i].end) {
431 ret = i;
432 break;
433 }
434 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100435 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200436}
437
438/*
439 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
440 * mappings that respect the real ACPI topology but reflect our emulated
441 * environment. For each emulated node, we find which real node it appears on
442 * and create PXM to NID mappings for those fake nodes which mirror that
443 * locality. SLIT will now represent the correct distances between emulated
444 * nodes as a result of the real topology.
445 */
446void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
447{
David Rientjes08705b82007-07-21 17:10:33 +0200448 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200449
450 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
451 "topology.\n");
452 for (i = 0; i < num_nodes; i++) {
453 int nid, pxm;
454
455 nid = find_node_by_addr(fake_nodes[i].start);
456 if (nid == NUMA_NO_NODE)
457 continue;
458 pxm = node_to_pxm(nid);
459 if (pxm == PXM_INVAL)
460 continue;
461 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200462 /*
463 * For each apicid_to_node mapping that exists for this real
464 * node, it must now point to the fake node ID.
465 */
466 for (j = 0; j < MAX_LOCAL_APIC; j++)
467 if (apicid_to_node[j] == nid)
468 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200469 }
470 for (i = 0; i < num_nodes; i++)
471 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
David Rientjes08705b82007-07-21 17:10:33 +0200472 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200473
474 nodes_clear(nodes_parsed);
475 for (i = 0; i < num_nodes; i++)
476 if (fake_nodes[i].start != fake_nodes[i].end)
477 node_set(i, nodes_parsed);
478 WARN_ON(!nodes_cover_memory(fake_nodes));
479}
480
481static int null_slit_node_compare(int a, int b)
482{
483 return node_to_pxm(a) == node_to_pxm(b);
484}
485#else
486static int null_slit_node_compare(int a, int b)
487{
488 return a == b;
489}
490#endif /* CONFIG_NUMA_EMU */
491
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200492void __init srat_reserve_add_area(int nodeid)
493{
494 if (found_add_area && nodes_add[nodeid].end) {
495 u64 total_mb;
496
497 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
498 "for node %d at %Lx-%Lx\n",
499 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
500 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
501 >> PAGE_SHIFT;
502 total_mb *= sizeof(struct page);
503 total_mb >>= 20;
504 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
505 "pre-allocated memory.\n", (unsigned long long)total_mb);
506 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
Bernhard Walle72a7fe32008-02-07 00:15:17 -0800507 nodes_add[nodeid].end - nodes_add[nodeid].start,
508 BOOTMEM_DEFAULT);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200509 }
510}
511
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512int __node_distance(int a, int b)
513{
514 int index;
515
516 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200517 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
518 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300519 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 return acpi_slit->entry[index + node_to_pxm(b)];
521}
522
523EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700524
525int memory_add_physaddr_to_nid(u64 start)
526{
527 int i, ret = 0;
528
529 for_each_node(i)
530 if (nodes_add[i].start <= start && nodes_add[i].end > start)
531 ret = i;
532
533 return ret;
534}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700535EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
536