blob: 13d56f5b134925e7e2f68043c0ac9d8a6b4c4444 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
19#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/proto.h>
21#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010022#include <asm/e820.h>
Jack Steinera65d1d62008-03-28 14:12:08 -050023#include <asm/genapic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024
Andi Kleenc31fbb12006-09-26 10:52:33 +020025int acpi_numa __initdata;
26
Linus Torvalds1da177e2005-04-16 15:20:36 -070027static struct acpi_table_slit *acpi_slit;
28
29static nodemask_t nodes_parsed __initdata;
Andi Kleenabe059e2006-03-25 16:29:12 +010030static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070031static struct bootnode nodes_add[MAX_NUMNODES];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020032static int found_add_area __initdata;
Andi Kleenfad79062006-05-15 18:19:44 +020033int hotadd_percent __initdata = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070034
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070035static int num_node_memblks __initdata;
36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
38
Andi Kleen9391a3f2006-02-03 21:51:17 +010039/* Too small nodes confuse the VM badly. Usually they result
40 from BIOS bugs. */
41#define NODE_MIN_SIZE (4*1024*1024)
42
Linus Torvalds1da177e2005-04-16 15:20:36 -070043static __init int setup_node(int pxm)
44{
Yasunori Goto762834e2006-06-23 02:03:19 -070045 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070046}
47
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070048static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070049{
50 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070051 for (i = 0; i < num_node_memblks; i++) {
52 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070053 if (nd->start == nd->end)
54 continue;
55 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070056 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070057 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070058 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 }
60 return -1;
61}
62
63static __init void cutoff_node(int i, unsigned long start, unsigned long end)
64{
Andi Kleenabe059e2006-03-25 16:29:12 +010065 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020066
67 if (found_add_area)
68 return;
69
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 if (nd->start < start) {
71 nd->start = start;
72 if (nd->end < nd->start)
73 nd->start = nd->end;
74 }
75 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070076 nd->end = end;
77 if (nd->start > nd->end)
78 nd->start = nd->end;
79 }
80}
81
82static __init void bad_srat(void)
83{
Andi Kleen2bce2b52005-09-12 18:49:25 +020084 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 printk(KERN_ERR "SRAT: SRAT not used.\n");
86 acpi_numa = -1;
Andi Kleenfad79062006-05-15 18:19:44 +020087 found_add_area = 0;
Andi Kleen2bce2b52005-09-12 18:49:25 +020088 for (i = 0; i < MAX_LOCAL_APIC; i++)
89 apicid_to_node[i] = NUMA_NO_NODE;
Andi Kleen68a3a7f2006-04-07 19:49:18 +020090 for (i = 0; i < MAX_NUMNODES; i++)
91 nodes_add[i].start = nodes[i].end = 0;
Mel Gorman5cb248a2006-09-27 01:49:52 -070092 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070093}
94
95static __init inline int srat_disabled(void)
96{
97 return numa_off || acpi_numa < 0;
98}
99
100/* Callback for SLIT parsing */
101void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
102{
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700103 unsigned length;
104 unsigned long phys;
105
106 length = slit->header.length;
107 phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length,
108 PAGE_SIZE);
109
110 if (phys == -1L)
111 panic(" Can not save slit!\n");
112
113 acpi_slit = __va(phys);
114 memcpy(acpi_slit, slit, length);
115 reserve_early(phys, phys + length, "ACPI SLIT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116}
117
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800118/* Callback for Proximity Domain -> x2APIC mapping */
119void __init
120acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
121{
122 int pxm, node;
123 int apic_id;
124
125 if (srat_disabled())
126 return;
127 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
128 bad_srat();
129 return;
130 }
131 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
132 return;
133 pxm = pa->proximity_domain;
134 node = setup_node(pxm);
135 if (node < 0) {
136 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
137 bad_srat();
138 return;
139 }
140
141 apic_id = pa->apic_id;
142 apicid_to_node[apic_id] = node;
143 acpi_numa = 1;
144 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
145 pxm, apic_id, node);
146}
147
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148/* Callback for Proximity Domain -> LAPIC mapping */
149void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300150acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151{
152 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100153 int apic_id;
154
Andi Kleend22fe802006-02-03 21:51:26 +0100155 if (srat_disabled())
156 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300157 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200158 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100159 return;
160 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300161 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300163 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 node = setup_node(pxm);
165 if (node < 0) {
166 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
167 bad_srat();
168 return;
169 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800170
Jack Steiner2e420602008-09-23 15:37:13 -0500171 if (get_uv_system_type() >= UV_X2APIC)
Jack Steinera65d1d62008-03-28 14:12:08 -0500172 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
173 else
174 apic_id = pa->apic_id;
travis@sgi.comef970012008-01-30 13:33:10 +0100175 apicid_to_node[apic_id] = node;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 acpi_numa = 1;
Andi Kleen0b07e982005-09-12 18:49:24 +0200177 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100178 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179}
180
Ingo Molnara4928cf2008-04-23 13:20:56 +0200181static int update_end_of_memory(unsigned long end) {return -1;}
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700182static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
183#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
184static inline int save_add_info(void) {return 1;}
185#else
186static inline int save_add_info(void) {return 0;}
187#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200188/*
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700189 * Update nodes_add and decide if to include add are in the zone.
Joe Perchesab4a5742008-01-30 13:31:42 +0100190 * Both SPARSE and RESERVE need nodes_add information.
Simon Arlott676b1852007-10-20 01:25:36 +0200191 * This code supports one contiguous hot add area per node.
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200192 */
Sam Ravnborgd01b9ad2008-02-17 13:22:58 +0100193static int __init
194reserve_hotadd(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200195{
196 unsigned long s_pfn = start >> PAGE_SHIFT;
197 unsigned long e_pfn = end >> PAGE_SHIFT;
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700198 int ret = 0, changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200199 struct bootnode *nd = &nodes_add[node];
200
201 /* I had some trouble with strange memory hotadd regions breaking
202 the boot. Be very strict here and reject anything unexpected.
203 If you want working memory hotadd write correct SRATs.
204
205 The node size check is a basic sanity check to guard against
206 mistakes */
207 if ((signed long)(end - start) < NODE_MIN_SIZE) {
208 printk(KERN_ERR "SRAT: Hotplug area too small\n");
209 return -1;
210 }
211
212 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700213 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700214 printk(KERN_ERR
215 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
216 s_pfn, e_pfn);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200217 return -1;
218 }
219
220 if (!hotadd_enough_memory(&nodes_add[node])) {
221 printk(KERN_ERR "SRAT: Hotplug area too large\n");
222 return -1;
223 }
224
225 /* Looks good */
226
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200227 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300228 nd->start = start;
229 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200230 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300231 } else {
232 if (nd->start == end) {
233 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200234 changed = 1;
235 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300236 if (nd->end == start) {
237 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200238 changed = 1;
239 }
240 if (!changed)
241 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300242 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200243
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700244 ret = update_end_of_memory(nd->end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200245
246 if (changed)
247 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700248 return ret;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200249}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200250
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
252void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300253acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254{
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200255 struct bootnode *nd, oldnode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 unsigned long start, end;
257 int node, pxm;
258 int i;
259
Andi Kleend22fe802006-02-03 21:51:26 +0100260 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300262 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100263 bad_srat();
264 return;
265 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300266 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100267 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300268
269 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200270 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300271 start = ma->base_address;
272 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 pxm = ma->proximity_domain;
274 node = setup_node(pxm);
275 if (node < 0) {
276 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
277 bad_srat();
278 return;
279 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700280 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200281 if (i == node) {
282 printk(KERN_WARNING
283 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
284 pxm, start, end, nodes[i].start, nodes[i].end);
285 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200287 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
288 pxm, start, end, node_to_pxm(i),
289 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290 bad_srat();
291 return;
292 }
293 nd = &nodes[node];
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200294 oldnode = *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 if (!node_test_and_set(node, nodes_parsed)) {
296 nd->start = start;
297 nd->end = end;
298 } else {
299 if (start < nd->start)
300 nd->start = start;
301 if (nd->end < end)
302 nd->end = end;
303 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200304
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700305 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
306 start, end);
307 e820_register_active_regions(node, start >> PAGE_SHIFT,
308 end >> PAGE_SHIFT);
Mel Gormanfb014392006-09-27 01:49:59 -0700309 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
310 nd->end >> PAGE_SHIFT);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200311
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300312 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
313 (reserve_hotadd(node, start, end) < 0)) {
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200314 /* Ignore hotadd region. Undo damage */
315 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
316 *nd = oldnode;
317 if ((nd->start | nd->end) == 0)
318 node_clear(node, nodes_parsed);
319 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700320
321 node_memblk_range[num_node_memblks].start = start;
322 node_memblk_range[num_node_memblks].end = end;
323 memblk_nodeid[num_node_memblks] = node;
324 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325}
326
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100327/* Sanity check to catch more bad SRATs (they are amazingly common).
328 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200329static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100330{
331 int i;
332 unsigned long pxmram, e820ram;
333
334 pxmram = 0;
335 for_each_node_mask(i, nodes_parsed) {
336 unsigned long s = nodes[i].start >> PAGE_SHIFT;
337 unsigned long e = nodes[i].end >> PAGE_SHIFT;
338 pxmram += e - s;
Mel Gorman5cb248a2006-09-27 01:49:52 -0700339 pxmram -= absent_pages_in_range(s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200340 if ((long)pxmram < 0)
341 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100342 }
343
Yinghai Luc987d122008-06-24 22:14:09 -0700344 e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
Andi Kleenfdb9df92006-02-16 23:42:13 +0100345 /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
346 if ((long)(e820ram - pxmram) >= 1*1024*1024) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100347 printk(KERN_ERR
348 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
349 (pxmram << PAGE_SHIFT) >> 20,
350 (e820ram << PAGE_SHIFT) >> 20);
351 return 0;
352 }
353 return 1;
354}
355
Sam Ravnborg1e296f52008-01-30 13:33:37 +0100356static void __init unparse_node(int node)
Andi Kleen9391a3f2006-02-03 21:51:17 +0100357{
358 int i;
359 node_clear(node, nodes_parsed);
360 for (i = 0; i < MAX_LOCAL_APIC; i++) {
361 if (apicid_to_node[i] == node)
362 apicid_to_node[i] = NUMA_NO_NODE;
363 }
364}
365
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366void __init acpi_numa_arch_fixup(void) {}
367
368/* Use the information discovered above to actually set up the nodes. */
369int __init acpi_scan_nodes(unsigned long start, unsigned long end)
370{
371 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100372
David Rientjesae2c6dc2007-07-21 17:09:56 +0200373 if (acpi_numa <= 0)
374 return -1;
375
Andi Kleen9391a3f2006-02-03 21:51:17 +0100376 /* First clean up the node list */
377 for (i = 0; i < MAX_NUMNODES; i++) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300378 cutoff_node(i, start, end);
Mike Travis693e3c52008-01-30 13:33:14 +0100379 /*
380 * don't confuse VM with a node that doesn't have the
381 * minimum memory.
382 */
383 if (nodes[i].end &&
384 (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
Andi Kleen9391a3f2006-02-03 21:51:17 +0100385 unparse_node(i);
Daniel Yeisley0d015322006-05-30 22:47:57 +0200386 node_set_offline(i);
387 }
Andi Kleen9391a3f2006-02-03 21:51:17 +0100388 }
389
David Rientjes3484d792007-07-21 17:10:32 +0200390 if (!nodes_cover_memory(nodes)) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100391 bad_srat();
392 return -1;
393 }
394
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700395 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
396 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 if (memnode_shift < 0) {
398 printk(KERN_ERR
399 "SRAT: No NUMA node hash function found. Contact maintainer\n");
400 bad_srat();
401 return -1;
402 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200403
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200404 node_possible_map = nodes_parsed;
405
Andi Kleene58e0d02005-09-12 18:49:25 +0200406 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200407 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200409 /* Try again in case setup_node_bootmem missed one due
410 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200411 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200412 if (!node_online(i))
413 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
414
Mike Travis168ef542008-12-16 17:34:01 -0800415 for (i = 0; i < nr_cpu_ids; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100416 int node = early_cpu_to_node(i);
417
travis@sgi.com834beda12008-01-30 13:33:21 +0100418 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 continue;
travis@sgi.com834beda12008-01-30 13:33:21 +0100420 if (!node_isset(node, node_possible_map))
Mike Travis23ca4bb2008-05-12 21:21:12 +0200421 numa_clear_node(i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 }
423 numa_init_array();
424 return 0;
425}
426
David Rientjes3484d792007-07-21 17:10:32 +0200427#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100428static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
429 [0 ... MAX_NUMNODES-1] = PXM_INVAL
430};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100431static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100432 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
433};
David Rientjes3484d792007-07-21 17:10:32 +0200434static int __init find_node_by_addr(unsigned long addr)
435{
436 int ret = NUMA_NO_NODE;
437 int i;
438
439 for_each_node_mask(i, nodes_parsed) {
440 /*
441 * Find the real node that this emulated node appears on. For
442 * the sake of simplicity, we only use a real node's starting
443 * address to determine which emulated node it appears on.
444 */
445 if (addr >= nodes[i].start && addr < nodes[i].end) {
446 ret = i;
447 break;
448 }
449 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100450 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200451}
452
453/*
454 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
455 * mappings that respect the real ACPI topology but reflect our emulated
456 * environment. For each emulated node, we find which real node it appears on
457 * and create PXM to NID mappings for those fake nodes which mirror that
458 * locality. SLIT will now represent the correct distances between emulated
459 * nodes as a result of the real topology.
460 */
461void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
462{
David Rientjes08705b82007-07-21 17:10:33 +0200463 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200464
465 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
466 "topology.\n");
467 for (i = 0; i < num_nodes; i++) {
468 int nid, pxm;
469
470 nid = find_node_by_addr(fake_nodes[i].start);
471 if (nid == NUMA_NO_NODE)
472 continue;
473 pxm = node_to_pxm(nid);
474 if (pxm == PXM_INVAL)
475 continue;
476 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200477 /*
478 * For each apicid_to_node mapping that exists for this real
479 * node, it must now point to the fake node ID.
480 */
481 for (j = 0; j < MAX_LOCAL_APIC; j++)
482 if (apicid_to_node[j] == nid)
483 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200484 }
485 for (i = 0; i < num_nodes; i++)
486 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
David Rientjes08705b82007-07-21 17:10:33 +0200487 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200488
489 nodes_clear(nodes_parsed);
490 for (i = 0; i < num_nodes; i++)
491 if (fake_nodes[i].start != fake_nodes[i].end)
492 node_set(i, nodes_parsed);
493 WARN_ON(!nodes_cover_memory(fake_nodes));
494}
495
496static int null_slit_node_compare(int a, int b)
497{
498 return node_to_pxm(a) == node_to_pxm(b);
499}
500#else
501static int null_slit_node_compare(int a, int b)
502{
503 return a == b;
504}
505#endif /* CONFIG_NUMA_EMU */
506
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200507void __init srat_reserve_add_area(int nodeid)
508{
509 if (found_add_area && nodes_add[nodeid].end) {
510 u64 total_mb;
511
512 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
513 "for node %d at %Lx-%Lx\n",
514 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
515 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
516 >> PAGE_SHIFT;
517 total_mb *= sizeof(struct page);
518 total_mb >>= 20;
519 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
520 "pre-allocated memory.\n", (unsigned long long)total_mb);
521 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
Bernhard Walle72a7fe32008-02-07 00:15:17 -0800522 nodes_add[nodeid].end - nodes_add[nodeid].start,
523 BOOTMEM_DEFAULT);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200524 }
525}
526
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527int __node_distance(int a, int b)
528{
529 int index;
530
531 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200532 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
533 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300534 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 return acpi_slit->entry[index + node_to_pxm(b)];
536}
537
538EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700539
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200540#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
Keith Mannthey4942e992006-09-30 23:27:06 -0700541int memory_add_physaddr_to_nid(u64 start)
542{
543 int i, ret = 0;
544
545 for_each_node(i)
546 if (nodes_add[i].start <= start && nodes_add[i].end > start)
547 ret = i;
548
549 return ret;
550}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700551EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200552#endif