blob: 79836d01f78957639fd181102bf54756966f8f71 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020018#include <linux/bootmem.h>
Yinghai Lua9ce6bc2010-08-25 13:39:17 -070019#include <linux/memblock.h>
Andi Kleen68a3a7f2006-04-07 19:49:18 +020020#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <asm/proto.h>
22#include <asm/numa.h>
Andi Kleen8a6fdd32006-01-11 22:44:39 +010023#include <asm/e820.h>
Ingo Molnar7b6aa332009-02-17 13:58:15 +010024#include <asm/apic.h>
Ingo Molnar4ec71fa2009-01-21 10:24:27 +010025#include <asm/uv/uv.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
Andi Kleenc31fbb12006-09-26 10:52:33 +020027int acpi_numa __initdata;
28
Linus Torvalds1da177e2005-04-16 15:20:36 -070029static __init int setup_node(int pxm)
30{
Yasunori Goto762834e2006-06-23 02:03:19 -070031 return acpi_map_pxm_to_node(pxm);
Linus Torvalds1da177e2005-04-16 15:20:36 -070032}
33
Linus Torvalds1da177e2005-04-16 15:20:36 -070034static __init void bad_srat(void)
35{
36 printk(KERN_ERR "SRAT: SRAT not used.\n");
37 acpi_numa = -1;
38}
39
40static __init inline int srat_disabled(void)
41{
Tejun Heoffe77a42011-02-16 12:13:06 +010042 return acpi_numa < 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070043}
44
45/* Callback for SLIT parsing */
46void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
47{
Tejun Heoac7136b2011-02-16 17:11:09 +010048 int i, j;
Yinghai Luf302a5bb2008-07-10 20:36:37 -070049
Tejun Heoac7136b2011-02-16 17:11:09 +010050 for (i = 0; i < slit->locality_count; i++)
51 for (j = 0; j < slit->locality_count; j++)
52 numa_set_distance(pxm_to_node(i), pxm_to_node(j),
53 slit->entry[slit->locality_count * i + j]);
Linus Torvalds1da177e2005-04-16 15:20:36 -070054}
55
Suresh Siddha7237d3d2009-03-30 13:55:30 -080056/* Callback for Proximity Domain -> x2APIC mapping */
57void __init
58acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
59{
60 int pxm, node;
61 int apic_id;
62
63 if (srat_disabled())
64 return;
65 if (pa->header.length < sizeof(struct acpi_srat_x2apic_cpu_affinity)) {
66 bad_srat();
67 return;
68 }
69 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
70 return;
71 pxm = pa->proximity_domain;
Yinghai Lua35fd282011-12-21 17:45:16 -080072 apic_id = pa->apic_id;
Steffen Persvoldb7157ac2012-03-16 20:25:35 +010073 if (!apic->apic_id_valid(apic_id)) {
Yinghai Lua35fd282011-12-21 17:45:16 -080074 printk(KERN_INFO "SRAT: PXM %u -> X2APIC 0x%04x ignored\n",
75 pxm, apic_id);
76 return;
77 }
Suresh Siddha7237d3d2009-03-30 13:55:30 -080078 node = setup_node(pxm);
79 if (node < 0) {
80 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
81 bad_srat();
82 return;
83 }
84
Yinghai Lud3bd0582010-12-16 19:09:58 -080085 if (apic_id >= MAX_LOCAL_APIC) {
86 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
87 return;
88 }
Tejun Heobbc9e2f2011-01-23 14:37:39 +010089 set_apicid_to_node(apic_id, node);
Tejun Heo92d4a432011-02-16 17:11:09 +010090 node_set(node, numa_nodes_parsed);
Suresh Siddha7237d3d2009-03-30 13:55:30 -080091 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -080092 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n",
Suresh Siddha7237d3d2009-03-30 13:55:30 -080093 pxm, apic_id, node);
94}
95
Linus Torvalds1da177e2005-04-16 15:20:36 -070096/* Callback for Proximity Domain -> LAPIC mapping */
97void __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +030098acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
Linus Torvalds1da177e2005-04-16 15:20:36 -070099{
100 int pxm, node;
travis@sgi.comef970012008-01-30 13:33:10 +0100101 int apic_id;
102
Andi Kleend22fe802006-02-03 21:51:26 +0100103 if (srat_disabled())
104 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300105 if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
Andi Kleenfad79062006-05-15 18:19:44 +0200106 bad_srat();
Andi Kleend22fe802006-02-03 21:51:26 +0100107 return;
108 }
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300109 if ((pa->flags & ACPI_SRAT_CPU_ENABLED) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110 return;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300111 pxm = pa->proximity_domain_lo;
Kurt Garloffcd298f62012-01-17 04:20:31 -0500112 if (acpi_srat_revision >= 2)
113 pxm |= *((unsigned int*)pa->proximity_domain_hi) << 8;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114 node = setup_node(pxm);
115 if (node < 0) {
116 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
117 bad_srat();
118 return;
119 }
Yinghai Lubeafe912008-02-16 23:00:22 -0800120
Jack Steiner2e420602008-09-23 15:37:13 -0500121 if (get_uv_system_type() >= UV_X2APIC)
Jack Steinera65d1d62008-03-28 14:12:08 -0500122 apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
123 else
124 apic_id = pa->apic_id;
Yinghai Lud3bd0582010-12-16 19:09:58 -0800125
126 if (apic_id >= MAX_LOCAL_APIC) {
127 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node);
128 return;
129 }
130
Tejun Heobbc9e2f2011-01-23 14:37:39 +0100131 set_apicid_to_node(apic_id, node);
Tejun Heo92d4a432011-02-16 17:11:09 +0100132 node_set(node, numa_nodes_parsed);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 acpi_numa = 1;
Yinghai Lu163d3862009-11-21 00:23:37 -0800134 printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n",
travis@sgi.comef970012008-01-30 13:33:10 +0100135 pxm, apic_id, node);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136}
137
Tejun Heo96886782011-05-02 14:18:51 +0200138#ifdef CONFIG_MEMORY_HOTPLUG
Keith Mannthey71efa8f2006-09-30 23:27:05 -0700139static inline int save_add_info(void) {return 1;}
140#else
141static inline int save_add_info(void) {return 0;}
142#endif
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200143
Tang Chen27168d32013-02-22 16:33:46 -0800144#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
Tang Chen01a178a2013-02-22 16:33:49 -0800145static void __init
146handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
Tang Chen27168d32013-02-22 16:33:46 -0800147{
Tang Chen01a178a2013-02-22 16:33:49 -0800148 int overlap, i;
Tang Chen27168d32013-02-22 16:33:46 -0800149 unsigned long start_pfn, end_pfn;
150
151 start_pfn = PFN_DOWN(start);
152 end_pfn = PFN_UP(end);
153
154 /*
Tang Chen01a178a2013-02-22 16:33:49 -0800155 * For movablemem_map=acpi:
156 *
157 * SRAT: |_____| |_____| |_________| |_________| ......
158 * node id: 0 1 1 2
159 * hotpluggable: n y y n
160 * movablemem_map: |_____| |_________|
161 *
162 * Using movablemem_map, we can prevent memblock from allocating memory
163 * on ZONE_MOVABLE at boot time.
164 *
165 * Before parsing SRAT, memblock has already reserve some memory ranges
166 * for other purposes, such as for kernel image. We cannot prevent
167 * kernel from using these memory, so we need to exclude these memory
168 * even if it is hotpluggable.
169 * Furthermore, to ensure the kernel has enough memory to boot, we make
170 * all the memory on the node which the kernel resides in
171 * un-hotpluggable.
172 */
173 if (hotpluggable && movablemem_map.acpi) {
174 /* Exclude ranges reserved by memblock. */
175 struct memblock_type *rgn = &memblock.reserved;
176
177 for (i = 0; i < rgn->cnt; i++) {
178 if (end <= rgn->regions[i].base ||
179 start >= rgn->regions[i].base +
180 rgn->regions[i].size)
181 continue;
182
183 /*
184 * If the memory range overlaps the memory reserved by
185 * memblock, then the kernel resides in this node.
186 */
187 node_set(node, movablemem_map.numa_nodes_kernel);
188
189 goto out;
190 }
191
192 /*
193 * If the kernel resides in this node, then the whole node
194 * should not be hotpluggable.
195 */
196 if (node_isset(node, movablemem_map.numa_nodes_kernel))
197 goto out;
198
199 insert_movablemem_map(start_pfn, end_pfn);
200
201 /*
202 * numa_nodes_hotplug nodemask represents which nodes are put
203 * into movablemem_map.map[].
204 */
205 node_set(node, movablemem_map.numa_nodes_hotplug);
206 goto out;
207 }
208
209 /*
210 * For movablemem_map=nn[KMG]@ss[KMG]:
Tang Chen27168d32013-02-22 16:33:46 -0800211 *
212 * SRAT: |_____| |_____| |_________| |_________| ......
213 * node id: 0 1 1 2
214 * user specified: |__| |___|
215 * movablemem_map: |___| |_________| |______| ......
216 *
217 * Using movablemem_map, we can prevent memblock from allocating memory
218 * on ZONE_MOVABLE at boot time.
Tang Chen01a178a2013-02-22 16:33:49 -0800219 *
220 * NOTE: In this case, SRAT info will be ingored.
Tang Chen27168d32013-02-22 16:33:46 -0800221 */
222 overlap = movablemem_map_overlap(start_pfn, end_pfn);
223 if (overlap >= 0) {
224 /*
225 * If part of this range is in movablemem_map, we need to
226 * add the range after it to extend the range to the end
227 * of the node, because from the min address specified to
228 * the end of the node will be ZONE_MOVABLE.
229 */
230 start_pfn = max(start_pfn,
231 movablemem_map.map[overlap].start_pfn);
232 insert_movablemem_map(start_pfn, end_pfn);
233
234 /*
235 * Set the nodemask, so that if the address range on one node
236 * is not continuse, we can add the subsequent ranges on the
237 * same node into movablemem_map.
238 */
239 node_set(node, movablemem_map.numa_nodes_hotplug);
240 } else {
241 if (node_isset(node, movablemem_map.numa_nodes_hotplug))
242 /*
243 * Insert the range if we already have movable ranges
244 * on the same node.
245 */
246 insert_movablemem_map(start_pfn, end_pfn);
247 }
Tang Chen01a178a2013-02-22 16:33:49 -0800248out:
249 return;
Tang Chen27168d32013-02-22 16:33:46 -0800250}
251#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
Tang Chen01a178a2013-02-22 16:33:49 -0800252static inline void
253handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable)
Tang Chen27168d32013-02-22 16:33:46 -0800254{
255}
256#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
257
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
Thomas Renninger095adbb2012-07-31 17:41:09 +0200259int __init
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300260acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261{
Tejun Heoeca9ad32011-05-02 14:18:52 +0200262 u64 start, end;
Tang Chen27168d32013-02-22 16:33:46 -0800263 u32 hotpluggable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 int node, pxm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265
Andi Kleend22fe802006-02-03 21:51:26 +0100266 if (srat_disabled())
Davidlohr Bueso479a99a2013-01-08 16:18:41 -0800267 goto out_err;
268 if (ma->header.length != sizeof(struct acpi_srat_mem_affinity))
269 goto out_err_bad_srat;
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300270 if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0)
Davidlohr Bueso479a99a2013-01-08 16:18:41 -0800271 goto out_err;
Tang Chen27168d32013-02-22 16:33:46 -0800272 hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE;
273 if (hotpluggable && !save_add_info())
Davidlohr Bueso479a99a2013-01-08 16:18:41 -0800274 goto out_err;
275
Alexey Starikovskiy15a58ed2007-02-02 19:48:22 +0300276 start = ma->base_address;
277 end = start + ma->length;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 pxm = ma->proximity_domain;
Kurt Garloffcd298f62012-01-17 04:20:31 -0500279 if (acpi_srat_revision <= 1)
280 pxm &= 0xff;
Davidlohr Bueso479a99a2013-01-08 16:18:41 -0800281
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 node = setup_node(pxm);
283 if (node < 0) {
284 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
Davidlohr Bueso479a99a2013-01-08 16:18:41 -0800285 goto out_err_bad_srat;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 }
Tejun Heoef396ec2011-02-16 17:11:07 +0100287
Davidlohr Bueso479a99a2013-01-08 16:18:41 -0800288 if (numa_add_memblk(node, start, end) < 0)
289 goto out_err_bad_srat;
Andi Kleen68a3a7f2006-04-07 19:49:18 +0200290
Yasuaki Ishimatsu4af463d2012-06-04 11:42:32 +0900291 node_set(node, numa_nodes_parsed);
292
Tang Chen27168d32013-02-22 16:33:46 -0800293 printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n",
Bjorn Helgaas365811d2012-05-29 15:06:29 -0700294 node, pxm,
Tang Chen27168d32013-02-22 16:33:46 -0800295 (unsigned long long) start, (unsigned long long) end - 1,
296 hotpluggable ? "Hot Pluggable": "");
297
Tang Chen01a178a2013-02-22 16:33:49 -0800298 handle_movablemem(node, start, end, hotpluggable);
Davidlohr Bueso479a99a2013-01-08 16:18:41 -0800299
Thomas Renninger095adbb2012-07-31 17:41:09 +0200300 return 0;
Davidlohr Bueso479a99a2013-01-08 16:18:41 -0800301out_err_bad_srat:
302 bad_srat();
303out_err:
304 return -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305}
306
307void __init acpi_numa_arch_fixup(void) {}
308
Tejun Heoa9aec562011-02-16 12:13:06 +0100309int __init x86_acpi_numa_init(void)
310{
311 int ret;
312
313 ret = acpi_numa_init();
314 if (ret < 0)
315 return ret;
316 return srat_disabled() ? -EINVAL : 0;
317}