blob: 99649dccad28cef2d1399b003dcdd39c04552b5b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
19#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/proto.h>
21#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010022#include <asm/e820.h>
Jack Steinera65d1d62008-03-28 14:12:08 -050023#include <asm/genapic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024
Andi Kleenc31fbb12006-09-26 10:52:33 +020025int acpi_numa __initdata;
26
Linus Torvalds1da177e2005-04-16 15:20:36 -070027static struct acpi_table_slit *acpi_slit;
28
29static nodemask_t nodes_parsed __initdata;
Andi Kleenabe059e2006-03-25 16:29:12 +010030static struct bootnode nodes[MAX_NUMNODES] __initdata;
Keith Mannthey4942e992006-09-30 23:27:06 -070031static struct bootnode nodes_add[MAX_NUMNODES];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020032static int found_add_area __initdata;
Andi Kleenfad79062006-05-15 18:19:44 +020033int hotadd_percent __initdata = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070034
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070035static int num_node_memblks __initdata;
36static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
37static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
38
Andi Kleen9391a3f2006-02-03 21:51:17 +010039/* Too small nodes confuse the VM badly. Usually they result
40 from BIOS bugs. */
41#define NODE_MIN_SIZE (4*1024*1024)
42
Linus Torvalds1da177e2005-04-16 15:20:36 -070043static __init int setup_node(int pxm)
44{
Yasunori Goto762834e2006-06-23 02:03:19 -070045 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070046}
47
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070048static __init int conflicting_memblks(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -070049{
50 int i;
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070051 for (i = 0; i < num_node_memblks; i++) {
52 struct bootnode *nd = &node_memblk_range[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070053 if (nd->start == nd->end)
54 continue;
55 if (nd->end > start && nd->start < end)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070056 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070057 if (nd->end == end && nd->start == start)
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -070058 return memblk_nodeid[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 }
60 return -1;
61}
62
63static __init void cutoff_node(int i, unsigned long start, unsigned long end)
64{
Andi Kleenabe059e2006-03-25 16:29:12 +010065 struct bootnode *nd = &nodes[i];
Andi Kleen68a3a7f2006-04-07 19:49:18 +020066
67 if (found_add_area)
68 return;
69
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 if (nd->start < start) {
71 nd->start = start;
72 if (nd->end < nd->start)
73 nd->start = nd->end;
74 }
75 if (nd->end > end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070076 nd->end = end;
77 if (nd->start > nd->end)
78 nd->start = nd->end;
79 }
80}
81
82static __init void bad_srat(void)
83{
Andi Kleen2bce2b52005-09-12 18:49:25 +020084 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 printk(KERN_ERR "SRAT: SRAT not used.\n");
86 acpi_numa = -1;
Andi Kleenfad79062006-05-15 18:19:44 +020087 found_add_area = 0;
Andi Kleen2bce2b52005-09-12 18:49:25 +020088 for (i = 0; i < MAX_LOCAL_APIC; i++)
89 apicid_to_node[i] = NUMA_NO_NODE;
Andi Kleen68a3a7f2006-04-07 19:49:18 +020090 for (i = 0; i < MAX_NUMNODES; i++)
91 nodes_add[i].start = nodes[i].end = 0;
Mel Gorman5cb248a2006-09-27 01:49:52 -070092 remove_all_active_ranges();
Linus Torvalds1da177e2005-04-16 15:20:36 -070093}
94
95static __init inline int srat_disabled(void)
96{
97 return numa_off || acpi_numa < 0;
98}
99
100/* Callback for SLIT parsing */
101void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
102{
103 acpi_slit = slit;
104}
105
106/* Callback for Proximity Domain -> LAPIC mapping */
107void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300108acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109{
110 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100111 int apic_id;
112
Andi Kleend22fe802006-02-03 21:51:26 +0100113 if (srat_disabled())
114 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300115 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200116 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100117 return;
118 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300119 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300121 pxm = pa->proximity_domain_lo;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122 node = setup_node(pxm);
123 if (node < 0) {
124 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
125 bad_srat();
126 return;
127 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800128
Jack Steinera65d1d62008-03-28 14:12:08 -0500129 if (is_uv_system())
130 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
131 else
132 apic_id = pa->apic_id;
travis@sgi.comef970012008-01-30 13:33:10 +0100133 apicid_to_node[apic_id] = node;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 acpi_numa = 1;
Andi Kleen0b07e982005-09-12 18:49:24 +0200135 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100136 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137}
138
Ingo Molnara4928cf2008-04-23 13:20:56 +0200139static int update_end_of_memory(unsigned long end) {return -1;}
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700140static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
141#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
142static inline int save_add_info(void) {return 1;}
143#else
144static inline int save_add_info(void) {return 0;}
145#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200146/*
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700147 * Update nodes_add and decide if to include add are in the zone.
Joe Perchesab4a5742008-01-30 13:31:42 +0100148 * Both SPARSE and RESERVE need nodes_add information.
Simon Arlott676b1852007-10-20 01:25:36 +0200149 * This code supports one contiguous hot add area per node.
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200150 */
Sam Ravnborgd01b9ad2008-02-17 13:22:58 +0100151static int __init
152reserve_hotadd(int node, unsigned long start, unsigned long end)
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200153{
154 unsigned long s_pfn = start >> PAGE_SHIFT;
155 unsigned long e_pfn = end >> PAGE_SHIFT;
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700156 int ret = 0, changed = 0;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200157 struct bootnode *nd = &nodes_add[node];
158
159 /* I had some trouble with strange memory hotadd regions breaking
160 the boot. Be very strict here and reject anything unexpected.
161 If you want working memory hotadd write correct SRATs.
162
163 The node size check is a basic sanity check to guard against
164 mistakes */
165 if ((signed long)(end - start) < NODE_MIN_SIZE) {
166 printk(KERN_ERR "SRAT: Hotplug area too small\n");
167 return -1;
168 }
169
170 /* This check might be a bit too strict, but I'm keeping it for now. */
Mel Gorman5cb248a2006-09-27 01:49:52 -0700171 if (absent_pages_in_range(s_pfn, e_pfn) != e_pfn - s_pfn) {
Mel Gorman9c7cd682006-09-27 01:49:58 -0700172 printk(KERN_ERR
173 "SRAT: Hotplug area %lu -> %lu has existing memory\n",
174 s_pfn, e_pfn);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200175 return -1;
176 }
177
178 if (!hotadd_enough_memory(&nodes_add[node])) {
179 printk(KERN_ERR "SRAT: Hotplug area too large\n");
180 return -1;
181 }
182
183 /* Looks good */
184
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200185 if (nd->start == nd->end) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300186 nd->start = start;
187 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200188 changed = 1;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300189 } else {
190 if (nd->start == end) {
191 nd->start = start;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200192 changed = 1;
193 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300194 if (nd->end == start) {
195 nd->end = end;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200196 changed = 1;
197 }
198 if (!changed)
199 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300200 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200201
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700202 ret = update_end_of_memory(nd->end);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200203
204 if (changed)
205 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700206 return ret;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200207}
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200208
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
210void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300211acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212{
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200213 struct bootnode *nd, oldnode;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 unsigned long start, end;
215 int node, pxm;
216 int i;
217
Andi Kleend22fe802006-02-03 21:51:26 +0100218 if (srat_disabled())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300220 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) {
Andi Kleend22fe802006-02-03 21:51:26 +0100221 bad_srat();
222 return;
223 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300224 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Andi Kleend22fe802006-02-03 21:51:26 +0100225 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300226
227 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info())
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200228 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300229 start = ma->base_address;
230 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 pxm = ma->proximity_domain;
232 node = setup_node(pxm);
233 if (node < 0) {
234 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
235 bad_srat();
236 return;
237 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700238 i = conflicting_memblks(start, end);
Andi Kleen05d1fa42005-09-12 18:49:24 +0200239 if (i == node) {
240 printk(KERN_WARNING
241 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
242 pxm, start, end, nodes[i].start, nodes[i].end);
243 } else if (i >= 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 printk(KERN_ERR
Andi Kleen05d1fa42005-09-12 18:49:24 +0200245 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
246 pxm, start, end, node_to_pxm(i),
247 nodes[i].start, nodes[i].end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 bad_srat();
249 return;
250 }
251 nd = &nodes[node];
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200252 oldnode = *nd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 if (!node_test_and_set(node, nodes_parsed)) {
254 nd->start = start;
255 nd->end = end;
256 } else {
257 if (start < nd->start)
258 nd->start = start;
259 if (nd->end < end)
260 nd->end = end;
261 }
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200262
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700263 printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm,
264 start, end);
265 e820_register_active_regions(node, start >> PAGE_SHIFT,
266 end >> PAGE_SHIFT);
Mel Gormanfb014392006-09-27 01:49:59 -0700267 push_node_boundaries(node, nd->start >> PAGE_SHIFT,
268 nd->end >> PAGE_SHIFT);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200269
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300270 if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
271 (reserve_hotadd(node, start, end) < 0)) {
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200272 /* Ignore hotadd region. Undo damage */
273 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
274 *nd = oldnode;
275 if ((nd->start | nd->end) == 0)
276 node_clear(node, nodes_parsed);
277 }
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700278
279 node_memblk_range[num_node_memblks].start = start;
280 node_memblk_range[num_node_memblks].end = end;
281 memblk_nodeid[num_node_memblks] = node;
282 num_node_memblks++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283}
284
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100285/* Sanity check to catch more bad SRATs (they are amazingly common).
286 Make sure the PXMs cover all memory. */
David Rientjes3484d792007-07-21 17:10:32 +0200287static int __init nodes_cover_memory(const struct bootnode *nodes)
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100288{
289 int i;
290 unsigned long pxmram, e820ram;
291
292 pxmram = 0;
293 for_each_node_mask(i, nodes_parsed) {
294 unsigned long s = nodes[i].start >> PAGE_SHIFT;
295 unsigned long e = nodes[i].end >> PAGE_SHIFT;
296 pxmram += e - s;
Mel Gorman5cb248a2006-09-27 01:49:52 -0700297 pxmram -= absent_pages_in_range(s, e);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200298 if ((long)pxmram < 0)
299 pxmram = 0;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100300 }
301
Mel Gorman5cb248a2006-09-27 01:49:52 -0700302 e820ram = end_pfn - absent_pages_in_range(0, end_pfn);
Andi Kleenfdb9df92006-02-16 23:42:13 +0100303 /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
304 if ((long)(e820ram - pxmram) >= 1*1024*1024) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100305 printk(KERN_ERR
306 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
307 (pxmram << PAGE_SHIFT) >> 20,
308 (e820ram << PAGE_SHIFT) >> 20);
309 return 0;
310 }
311 return 1;
312}
313
Sam Ravnborg1e296f52008-01-30 13:33:37 +0100314static void __init unparse_node(int node)
Andi Kleen9391a3f2006-02-03 21:51:17 +0100315{
316 int i;
317 node_clear(node, nodes_parsed);
318 for (i = 0; i < MAX_LOCAL_APIC; i++) {
319 if (apicid_to_node[i] == node)
320 apicid_to_node[i] = NUMA_NO_NODE;
321 }
322}
323
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324void __init acpi_numa_arch_fixup(void) {}
325
326/* Use the information discovered above to actually set up the nodes. */
327int __init acpi_scan_nodes(unsigned long start, unsigned long end)
328{
329 int i;
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100330
David Rientjesae2c6dc2007-07-21 17:09:56 +0200331 if (acpi_numa <= 0)
332 return -1;
333
Andi Kleen9391a3f2006-02-03 21:51:17 +0100334 /* First clean up the node list */
335 for (i = 0; i < MAX_NUMNODES; i++) {
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300336 cutoff_node(i, start, end);
Mike Travis693e3c52008-01-30 13:33:14 +0100337 /*
338 * don't confuse VM with a node that doesn't have the
339 * minimum memory.
340 */
341 if (nodes[i].end &&
342 (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
Andi Kleen9391a3f2006-02-03 21:51:17 +0100343 unparse_node(i);
Daniel Yeisley0d015322006-05-30 22:47:57 +0200344 node_set_offline(i);
345 }
Andi Kleen9391a3f2006-02-03 21:51:17 +0100346 }
347
David Rientjes3484d792007-07-21 17:10:32 +0200348 if (!nodes_cover_memory(nodes)) {
Andi Kleen8a6fdd32006-01-11 22:44:39 +0100349 bad_srat();
350 return -1;
351 }
352
Suresh Siddha6ec6e0d2008-03-25 10:14:35 -0700353 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
354 memblk_nodeid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 if (memnode_shift < 0) {
356 printk(KERN_ERR
357 "SRAT: No NUMA node hash function found. Contact maintainer\n");
358 bad_srat();
359 return -1;
360 }
Andi Kleene58e0d02005-09-12 18:49:25 +0200361
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200362 node_possible_map = nodes_parsed;
363
Andi Kleene58e0d02005-09-12 18:49:25 +0200364 /* Finally register nodes */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200365 for_each_node_mask(i, node_possible_map)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
Andi Kleena8062232006-04-07 19:49:21 +0200367 /* Try again in case setup_node_bootmem missed one due
368 to missing bootmem */
Suresh Siddhae3f1cae2007-05-02 19:27:20 +0200369 for_each_node_mask(i, node_possible_map)
Andi Kleena8062232006-04-07 19:49:21 +0200370 if (!node_online(i))
371 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
372
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300373 for (i = 0; i < NR_CPUS; i++) {
Mike Travis0164fe12008-01-30 13:33:21 +0100374 int node = early_cpu_to_node(i);
375
travis@sgi.com834beda2008-01-30 13:33:21 +0100376 if (node == NUMA_NO_NODE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 continue;
travis@sgi.com834beda2008-01-30 13:33:21 +0100378 if (!node_isset(node, node_possible_map))
Andi Kleen69d81fc2005-11-05 17:25:53 +0100379 numa_set_node(i, NUMA_NO_NODE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 }
381 numa_init_array();
382 return 0;
383}
384
David Rientjes3484d792007-07-21 17:10:32 +0200385#ifdef CONFIG_NUMA_EMU
travis@sgi.comef970012008-01-30 13:33:10 +0100386static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = {
387 [0 ... MAX_NUMNODES-1] = PXM_INVAL
388};
travis@sgi.com602a54a2008-01-30 13:33:21 +0100389static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = {
travis@sgi.comef970012008-01-30 13:33:10 +0100390 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
391};
David Rientjes3484d792007-07-21 17:10:32 +0200392static int __init find_node_by_addr(unsigned long addr)
393{
394 int ret = NUMA_NO_NODE;
395 int i;
396
397 for_each_node_mask(i, nodes_parsed) {
398 /*
399 * Find the real node that this emulated node appears on. For
400 * the sake of simplicity, we only use a real node's starting
401 * address to determine which emulated node it appears on.
402 */
403 if (addr >= nodes[i].start && addr < nodes[i].end) {
404 ret = i;
405 break;
406 }
407 }
Minoru Usui9a1b62f2008-01-30 13:33:35 +0100408 return ret;
David Rientjes3484d792007-07-21 17:10:32 +0200409}
410
411/*
412 * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
413 * mappings that respect the real ACPI topology but reflect our emulated
414 * environment. For each emulated node, we find which real node it appears on
415 * and create PXM to NID mappings for those fake nodes which mirror that
416 * locality. SLIT will now represent the correct distances between emulated
417 * nodes as a result of the real topology.
418 */
419void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
420{
David Rientjes08705b82007-07-21 17:10:33 +0200421 int i, j;
David Rientjes3484d792007-07-21 17:10:32 +0200422
423 printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
424 "topology.\n");
425 for (i = 0; i < num_nodes; i++) {
426 int nid, pxm;
427
428 nid = find_node_by_addr(fake_nodes[i].start);
429 if (nid == NUMA_NO_NODE)
430 continue;
431 pxm = node_to_pxm(nid);
432 if (pxm == PXM_INVAL)
433 continue;
434 fake_node_to_pxm_map[i] = pxm;
David Rientjes08705b82007-07-21 17:10:33 +0200435 /*
436 * For each apicid_to_node mapping that exists for this real
437 * node, it must now point to the fake node ID.
438 */
439 for (j = 0; j < MAX_LOCAL_APIC; j++)
440 if (apicid_to_node[j] == nid)
441 fake_apicid_to_node[j] = i;
David Rientjes3484d792007-07-21 17:10:32 +0200442 }
443 for (i = 0; i < num_nodes; i++)
444 __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
David Rientjes08705b82007-07-21 17:10:33 +0200445 memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
David Rientjes3484d792007-07-21 17:10:32 +0200446
447 nodes_clear(nodes_parsed);
448 for (i = 0; i < num_nodes; i++)
449 if (fake_nodes[i].start != fake_nodes[i].end)
450 node_set(i, nodes_parsed);
451 WARN_ON(!nodes_cover_memory(fake_nodes));
452}
453
454static int null_slit_node_compare(int a, int b)
455{
456 return node_to_pxm(a) == node_to_pxm(b);
457}
458#else
459static int null_slit_node_compare(int a, int b)
460{
461 return a == b;
462}
463#endif /* CONFIG_NUMA_EMU */
464
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200465void __init srat_reserve_add_area(int nodeid)
466{
467 if (found_add_area && nodes_add[nodeid].end) {
468 u64 total_mb;
469
470 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
471 "for node %d at %Lx-%Lx\n",
472 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
473 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
474 >> PAGE_SHIFT;
475 total_mb *= sizeof(struct page);
476 total_mb >>= 20;
477 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
478 "pre-allocated memory.\n", (unsigned long long)total_mb);
479 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
Bernhard Walle72a7fe32008-02-07 00:15:17 -0800480 nodes_add[nodeid].end - nodes_add[nodeid].start,
481 BOOTMEM_DEFAULT);
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200482 }
483}
484
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485int __node_distance(int a, int b)
486{
487 int index;
488
489 if (!acpi_slit)
David Rientjes3484d792007-07-21 17:10:32 +0200490 return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
491 REMOTE_DISTANCE;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300492 index = acpi_slit->locality_count * node_to_pxm(a);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 return acpi_slit->entry[index + node_to_pxm(b)];
494}
495
496EXPORT_SYMBOL(__node_distance);
Keith Mannthey4942e992006-09-30 23:27:06 -0700497
498int memory_add_physaddr_to_nid(u64 start)
499{
500 int i, ret = 0;
501
502 for_each_node(i)
503 if (nodes_add[i].start <= start && nodes_add[i].end > start)
504 ret = i;
505
506 return ret;
507}
Keith Mannthey8c2676a2006-09-30 23:27:07 -0700508EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
509