blob: c7d272b8574cc1e55d9de9f10186dd1da5c75e85 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
19#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/proto.h>
21#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010022#include <asm/e820.h>
Ingo Molnar7b6aa332009-02-17 13:58:15 +010023#include <asm/apic.h>
Ingo Molnar4ec71fa2009-01-21 10:24:27 +010024#include <asm/uv/uv.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070025
Andi Kleenc31fbb12006-09-26 10:52:33 +020026int acpi_numa __initdata;
27
Linus Torvalds1da177e2005-04-16 15:20:36 -070028static struct acpi_table_slit *acpi_slit;
29
30static nodemask_t nodes_parsed __initdata;
Andi Kleenabe059e2006-03-25 16:29:12 +010031static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070032static struct bootnode nodes_add[MAX_NUMNODES];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020033static int found_add_area __initdata;
Andi Kleenfad79062006-05-15 18:19:44 +020034int hotadd_percent __initdata = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070036static int num_node_memblks __initdata;
37static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
38static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
39
Andi Kleen9391a3f2006-02-03 21:51:17 +010040/* Too small nodes confuse the VM badly. Usually they result
41 from BIOS bugs. */
42#define NODE_MIN_SIZE (4*1024*1024)
43
Linus Torvalds1da177e2005-04-16 15:20:36 -070044static __init int setup_node(int pxm)
45{
Yasunori Goto762834e2006-06-23 02:03:19 -070046 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070047}
48
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070049static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070050{
51 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070052 for (i = 0; i < num_node_memblks; i++) {
53 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 if (nd->start == nd->end)
55 continue;
56 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070057 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070059 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070060 }
61 return -1;
62}
63
64static __init void cutoff_node(int i, unsigned long start, unsigned long end)
65{
Andi Kleenabe059e2006-03-25 16:29:12 +010066 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020067
68 if (found_add_area)
69 return;
70
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 if (nd->start < start) {
72 nd->start = start;
73 if (nd->end < nd->start)
74 nd->start = nd->end;
75 }
76 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 nd->end = end;
78 if (nd->start > nd->end)
79 nd->start = nd->end;
80 }
81}
82
83static __init void bad_srat(void)
84{
Andi Kleen2bce2b52005-09-12 18:49:25 +020085 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 printk(KERN_ERR "SRAT: SRAT not used.\n");
87 acpi_numa = -1;
Andi Kleenfad79062006-05-15 18:19:44 +020088 found_add_area = 0;
Andi Kleen2bce2b52005-09-12 18:49:25 +020089 for (i = 0; i < MAX_LOCAL_APIC; i++)
90 apicid_to_node[i] = NUMA_NO_NODE;
Andi Kleen68a3a7f2006-04-07 19:49:18 +020091 for (i = 0; i < MAX_NUMNODES; i++)
92 nodes_add[i].start = nodes[i].end = 0;
Mel Gorman5cb248a2006-09-27 01:49:52 -070093 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070094}
95
96static __init inline int srat_disabled(void)
97{
98 return numa_off || acpi_numa < 0;
99}
100
101/* Callback for SLIT parsing */
102void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
103{
Yinghai Luf302a5bb2008-07-10 20:36:37 -0700104 unsigned length;
105 unsigned long phys;
106
107 length = slit->header.length;
108 phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length,
109 PAGE_SIZE);
110
111 if (phys == -1L)
112 panic(" Can not save slit!\n");
113
114 acpi_slit = __va(phys);
115 memcpy(acpi_slit, slit, length);
116 reserve_early(phys, phys + length, "ACPI SLIT");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117}
118
Suresh Siddha7237d3d2009-03-30 13:55:30 -0800119/* Callback for Proximity Domain -> x2APIC mapping */
120void __init
121acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
122{
123 int pxm, node;
124 int apic_id;
125
126 if (srat_disabled())
127 return;
128 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
129 bad_srat();
130 return;
131 }
132 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
133 return;
134 pxm = pa->proximity_domain;
135 node = setup_node(pxm);
136 if (node < 0) {
137 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
138 bad_srat();
139 return;
140 }
141
142 apic_id = pa->apic_id;
143 apicid_to_node[apic_id] = node;
144 acpi_numa = 1;
145 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
146 pxm, apic_id, node);
147}
148
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149/* Callback for Proximity Domain -> LAPIC mapping */
150void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300151acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152{
153 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100154 int apic_id;
155
Andi Kleend22fe802006-02-03 21:51:26 +0100156 if (srat_disabled())
157 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300158 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200159 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100160 return;
161 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300162 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300164 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 node = setup_node(pxm);
166 if (node < 0) {
167 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
168 bad_srat();
169 return;
170 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800171
Jack Steiner2e420602008-09-23 15:37:13 -0500172 if (get_uv_system_type() >= UV_X2APIC)
Jack Steinera65d1d62008-03-28 14:12:08 -0500173 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
174 else
175 apic_id = pa->apic_id;
travis@sgi.comef970012008-01-30 13:33:10 +0100176 apicid_to_node[apic_id] = node;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 acpi_numa = 1;
Andi Kleen0b07e982005-09-12 18:49:24 +0200178 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100179 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180}
181
Ingo Molnara4928cf2008-04-23 13:20:56 +0200182static int update_end_of_memory(unsigned long end) {return -1;}
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700183static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
184#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
185static inline int save_add_info(void) {return 1;}
186#else
187static inline int save_add_info(void) {return 0;}
188#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200189/*
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700190 * Update nodes_add and decide if to include add are in the zone.
Joe Perchesab4a5742008-01-30 13:31:42 +0100191 * Both SPARSE and RESERVE need nodes_add information.
Simon Arlott676b1852007-10-20 01:25:36 +0200192 * This code supports one contiguous hot add area per node.
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200193 */
Sam Ravnborgd01b9ad2008-02-17 13:22:58 +0100194static int __init
195reserve_hotadd(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200196{
197 unsigned long s_pfn = start >> PAGE_SHIFT;
198 unsigned long e_pfn = end >> PAGE_SHIFT;
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700199 int ret = 0, changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200200 struct bootnode *nd = &nodes_add[node];
201
202 /* I had some trouble with strange memory hotadd regions breaking
203 the boot. Be very strict here and reject anything unexpected.
204 If you want working memory hotadd write correct SRATs.
205
206 The node size check is a basic sanity check to guard against
207 mistakes */
208 if ((signed long)(end - start) < NODE_MIN_SIZE) {
209 printk(KERN_ERR "SRAT: Hotplug area too small\n");
210 return -1;
211 }
212
213 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700214 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700215 printk(KERN_ERR
216 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
217 s_pfn, e_pfn);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200218 return -1;
219 }
220
221 if (!hotadd_enough_memory(&nodes_add[node])) {
222 printk(KERN_ERR "SRAT: Hotplug area too large\n");
223 return -1;
224 }
225
226 /* Looks good */
227
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200228 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300229 nd->start = start;
230 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200231 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300232 } else {
233 if (nd->start == end) {
234 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200235 changed = 1;
236 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300237 if (nd->end == start) {
238 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200239 changed = 1;
240 }
241 if (!changed)
242 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300243 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200244
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700245 ret = update_end_of_memory(nd->end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200246
247 if (changed)
248 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700249 return ret;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200250}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200251
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
253void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300254acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255{
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200256 struct bootnode *nd, oldnode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 unsigned long start, end;
258 int node, pxm;
259 int i;
260
Andi Kleend22fe802006-02-03 21:51:26 +0100261 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300263 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100264 bad_srat();
265 return;
266 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300267 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100268 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300269
270 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200271 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300272 start = ma->base_address;
273 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 pxm = ma->proximity_domain;
275 node = setup_node(pxm);
276 if (node < 0) {
277 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
278 bad_srat();
279 return;
280 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700281 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200282 if (i == node) {
283 printk(KERN_WARNING
284 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
285 pxm, start, end, nodes[i].start, nodes[i].end);
286 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200288 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
289 pxm, start, end, node_to_pxm(i),
290 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 bad_srat();
292 return;
293 }
294 nd = &nodes[node];
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200295 oldnode = *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 if (!node_test_and_set(node, nodes_parsed)) {
297 nd->start = start;
298 nd->end = end;
299 } else {
300 if (start < nd->start)
301 nd->start = start;
302 if (nd->end < end)
303 nd->end = end;
304 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200305
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700306 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
307 start, end);
308 e820_register_active_regions(node, start >> PAGE_SHIFT,
309 end >> PAGE_SHIFT);
Mel Gormanfb014392006-09-27 01:49:59 -0700310 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
311 nd->end >> PAGE_SHIFT);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200312
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300313 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
314 (reserve_hotadd(node, start, end) < 0)) {
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200315 /* Ignore hotadd region. Undo damage */
316 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
317 *nd = oldnode;
318 if ((nd->start | nd->end) == 0)
319 node_clear(node, nodes_parsed);
320 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700321
322 node_memblk_range[num_node_memblks].start = start;
323 node_memblk_range[num_node_memblks].end = end;
324 memblk_nodeid[num_node_memblks] = node;
325 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326}
327
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100328/* Sanity check to catch more bad SRATs (they are amazingly common).
329 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200330static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100331{
332 int i;
333 unsigned long pxmram, e820ram;
334
335 pxmram = 0;
336 for_each_node_mask(i, nodes_parsed) {
337 unsigned long s = nodes[i].start >> PAGE_SHIFT;
338 unsigned long e = nodes[i].end >> PAGE_SHIFT;
339 pxmram += e - s;
Mel Gorman5cb248a2006-09-27 01:49:52 -0700340 pxmram -= absent_pages_in_range(s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200341 if ((long)pxmram < 0)
342 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100343 }
344
Yinghai Luc987d122008-06-24 22:14:09 -0700345 e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
Andi Kleenfdb9df92006-02-16 23:42:13 +0100346 /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
347 if ((long)(e820ram - pxmram) >= 1*1024*1024) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100348 printk(KERN_ERR
349 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
350 (pxmram << PAGE_SHIFT) >> 20,
351 (e820ram << PAGE_SHIFT) >> 20);
352 return 0;
353 }
354 return 1;
355}
356
Sam Ravnborg1e296f52008-01-30 13:33:37 +0100357static void __init unparse_node(int node)
Andi Kleen9391a3f2006-02-03 21:51:17 +0100358{
359 int i;
360 node_clear(node, nodes_parsed);
361 for (i = 0; i < MAX_LOCAL_APIC; i++) {
362 if (apicid_to_node[i] == node)
363 apicid_to_node[i] = NUMA_NO_NODE;
364 }
365}
366
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367void __init acpi_numa_arch_fixup(void) {}
368
369/* Use the information discovered above to actually set up the nodes. */
370int __init acpi_scan_nodes(unsigned long start, unsigned long end)
371{
372 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100373
David Rientjesae2c6dc2007-07-21 17:09:56 +0200374 if (acpi_numa <= 0)
375 return -1;
376
Andi Kleen9391a3f2006-02-03 21:51:17 +0100377 /* First clean up the node list */
378 for (i = 0; i < MAX_NUMNODES; i++) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300379 cutoff_node(i, start, end);
Mike Travis693e3c52008-01-30 13:33:14 +0100380 /*
381 * don't confuse VM with a node that doesn't have the
382 * minimum memory.
383 */
384 if (nodes[i].end &&
385 (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
Andi Kleen9391a3f2006-02-03 21:51:17 +0100386 unparse_node(i);
Daniel Yeisley0d015322006-05-30 22:47:57 +0200387 node_set_offline(i);
388 }
Andi Kleen9391a3f2006-02-03 21:51:17 +0100389 }
390
David Rientjes3484d792007-07-21 17:10:32 +0200391 if (!nodes_cover_memory(nodes)) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100392 bad_srat();
393 return -1;
394 }
395
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700396 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
397 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 if (memnode_shift < 0) {
399 printk(KERN_ERR
400 "SRAT: No NUMA node hash function found. Contact maintainer\n");
401 bad_srat();
402 return -1;
403 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200404
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200405 node_possible_map = nodes_parsed;
406
Andi Kleene58e0d02005-09-12 18:49:25 +0200407 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200408 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200410 /* Try again in case setup_node_bootmem missed one due
411 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200412 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200413 if (!node_online(i))
414 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
415
Mike Travis168ef542008-12-16 17:34:01 -0800416 for (i = 0; i < nr_cpu_ids; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100417 int node = early_cpu_to_node(i);
418
travis@sgi.com834beda12008-01-30 13:33:21 +0100419 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 continue;
travis@sgi.com834beda12008-01-30 13:33:21 +0100421 if (!node_isset(node, node_possible_map))
Mike Travis23ca4bb2008-05-12 21:21:12 +0200422 numa_clear_node(i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 }
424 numa_init_array();
425 return 0;
426}
427
David Rientjes3484d792007-07-21 17:10:32 +0200428#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100429static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
430 [0 ... MAX_NUMNODES-1] = PXM_INVAL
431};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100432static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100433 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
434};
David Rientjes3484d792007-07-21 17:10:32 +0200435static int __init find_node_by_addr(unsigned long addr)
436{
437 int ret = NUMA_NO_NODE;
438 int i;
439
440 for_each_node_mask(i, nodes_parsed) {
441 /*
442 * Find the real node that this emulated node appears on. For
443 * the sake of simplicity, we only use a real node's starting
444 * address to determine which emulated node it appears on.
445 */
446 if (addr >= nodes[i].start && addr < nodes[i].end) {
447 ret = i;
448 break;
449 }
450 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100451 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200452}
453
454/*
455 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
456 * mappings that respect the real ACPI topology but reflect our emulated
457 * environment. For each emulated node, we find which real node it appears on
458 * and create PXM to NID mappings for those fake nodes which mirror that
459 * locality. SLIT will now represent the correct distances between emulated
460 * nodes as a result of the real topology.
461 */
462void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
463{
David Rientjes08705b82007-07-21 17:10:33 +0200464 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200465
466 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
467 "topology.\n");
468 for (i = 0; i < num_nodes; i++) {
469 int nid, pxm;
470
471 nid = find_node_by_addr(fake_nodes[i].start);
472 if (nid == NUMA_NO_NODE)
473 continue;
474 pxm = node_to_pxm(nid);
475 if (pxm == PXM_INVAL)
476 continue;
477 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200478 /*
479 * For each apicid_to_node mapping that exists for this real
480 * node, it must now point to the fake node ID.
481 */
482 for (j = 0; j < MAX_LOCAL_APIC; j++)
483 if (apicid_to_node[j] == nid)
484 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200485 }
486 for (i = 0; i < num_nodes; i++)
487 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
David Rientjes08705b82007-07-21 17:10:33 +0200488 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200489
490 nodes_clear(nodes_parsed);
491 for (i = 0; i < num_nodes; i++)
492 if (fake_nodes[i].start != fake_nodes[i].end)
493 node_set(i, nodes_parsed);
494 WARN_ON(!nodes_cover_memory(fake_nodes));
495}
496
497static int null_slit_node_compare(int a, int b)
498{
499 return node_to_pxm(a) == node_to_pxm(b);
500}
501#else
502static int null_slit_node_compare(int a, int b)
503{
504 return a == b;
505}
506#endif /* CONFIG_NUMA_EMU */
507
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200508void __init srat_reserve_add_area(int nodeid)
509{
510 if (found_add_area && nodes_add[nodeid].end) {
511 u64 total_mb;
512
513 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
514 "for node %d at %Lx-%Lx\n",
515 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
516 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
517 >> PAGE_SHIFT;
518 total_mb *= sizeof(struct page);
519 total_mb >>= 20;
520 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
521 "pre-allocated memory.\n", (unsigned long long)total_mb);
522 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
Bernhard Walle72a7fe32008-02-07 00:15:17 -0800523 nodes_add[nodeid].end - nodes_add[nodeid].start,
524 BOOTMEM_DEFAULT);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200525 }
526}
527
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528int __node_distance(int a, int b)
529{
530 int index;
531
532 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200533 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
534 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300535 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 return acpi_slit->entry[index + node_to_pxm(b)];
537}
538
539EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700540
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200541#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY)
Keith Mannthey4942e992006-09-30 23:27:06 -0700542int memory_add_physaddr_to_nid(u64 start)
543{
544 int i, ret = 0;
545
546 for_each_node(i)
547 if (nodes_add[i].start <= start && nodes_add[i].end > start)
548 ret = i;
549
550 return ret;
551}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700552EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
Thomas Gleixner6a1673a2008-05-12 15:43:38 +0200553#endif