blob: 43b1199af591303c74fbb56e82b43b6d8fca2d6e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Simple NUMA memory policy for the Linux kernel.
3 *
4 * Copyright 2003,2004 Andi Kleen, SuSE Labs.
5 * Subject to the GNU Public License, version 2.
6 *
7 * NUMA policy allows the user to give hints in which node(s) memory should
8 * be allocated.
9 *
10 * Support four policies per VMA and per process:
11 *
12 * The VMA policy has priority over the process policy for a page fault.
13 *
14 * interleave Allocate memory interleaved over a set of nodes,
15 * with normal fallback if it fails.
16 * For VMA based allocations this interleaves based on the
17 * offset into the backing object or offset into the mapping
18 * for anonymous memory. For process policy an process counter
19 * is used.
20 * bind Only allocate memory on a specific set of nodes,
21 * no fallback.
22 * preferred Try a specific node first before normal fallback.
23 * As a special case node -1 here means do the allocation
24 * on the local CPU. This is normally identical to default,
25 * but useful to set in a VMA when you have a non default
26 * process policy.
27 * default Allocate on the local node first, or when on a VMA
28 * use the process policy. This is what Linux always did
29 * in a NUMA aware kernel and still does by, ahem, default.
30 *
31 * The process policy is applied for most non interrupt memory allocations
32 * in that process' context. Interrupts ignore the policies and always
33 * try to allocate on the local CPU. The VMA policy is only applied for memory
34 * allocations for a VMA in the VM.
35 *
36 * Currently there are a few corner cases in swapping where the policy
37 * is not applied, but the majority should be handled. When process policy
38 * is used it is not remembered over swap outs/swap ins.
39 *
40 * Only the highest zone in the zone hierarchy gets policied. Allocations
41 * requesting a lower zone just use default policy. This implies that
42 * on systems with highmem kernel lowmem allocation don't get policied.
43 * Same with GFP_DMA allocations.
44 *
45 * For shmfs/tmpfs/hugetlbfs shared memory the policy is shared between
46 * all users and remembered even when nobody has memory mapped.
47 */
48
49/* Notebook:
50 fix mmap readahead to honour policy and enable policy for any page cache
51 object
52 statistics for bigpages
53 global policy for page cache? currently it uses process policy. Requires
54 first item above.
55 handle mremap for shared memory (currently ignored for the policy)
56 grows down?
57 make bind policy root only? It can trigger oom much faster and the
58 kernel is not always grateful with that.
59 could replace all the switch()es with a mempolicy_ops structure.
60*/
61
62#include <linux/mempolicy.h>
63#include <linux/mm.h>
64#include <linux/highmem.h>
65#include <linux/hugetlb.h>
66#include <linux/kernel.h>
67#include <linux/sched.h>
68#include <linux/mm.h>
69#include <linux/nodemask.h>
70#include <linux/cpuset.h>
71#include <linux/gfp.h>
72#include <linux/slab.h>
73#include <linux/string.h>
74#include <linux/module.h>
75#include <linux/interrupt.h>
76#include <linux/init.h>
77#include <linux/compat.h>
78#include <linux/mempolicy.h>
79#include <asm/tlbflush.h>
80#include <asm/uaccess.h>
81
82static kmem_cache_t *policy_cache;
83static kmem_cache_t *sn_cache;
84
85#define PDprintk(fmt...)
86
87/* Highest zone. An specific allocation for a zone below that is not
88 policied. */
89static int policy_zone;
90
Andi Kleend42c6992005-07-06 19:56:03 +020091struct mempolicy default_policy = {
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 .refcnt = ATOMIC_INIT(1), /* never free it */
93 .policy = MPOL_DEFAULT,
94};
95
Linus Torvalds1da177e2005-04-16 15:20:36 -070096/* Do sanity checking on a policy */
Andi Kleendfcd3c02005-10-29 18:15:48 -070097static int mpol_check_policy(int mode, nodemask_t *nodes)
Linus Torvalds1da177e2005-04-16 15:20:36 -070098{
Andi Kleendfcd3c02005-10-29 18:15:48 -070099 int empty = nodes_empty(*nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
101 switch (mode) {
102 case MPOL_DEFAULT:
103 if (!empty)
104 return -EINVAL;
105 break;
106 case MPOL_BIND:
107 case MPOL_INTERLEAVE:
108 /* Preferred will only use the first bit, but allow
109 more for now. */
110 if (empty)
111 return -EINVAL;
112 break;
113 }
Andi Kleendfcd3c02005-10-29 18:15:48 -0700114 return nodes_subset(*nodes, node_online_map) ? 0 : -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115}
116
117/* Copy a node mask from user space. */
Andi Kleendfcd3c02005-10-29 18:15:48 -0700118static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119 unsigned long maxnode, int mode)
120{
121 unsigned long k;
122 unsigned long nlongs;
123 unsigned long endmask;
124
125 --maxnode;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700126 nodes_clear(*nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 if (maxnode == 0 || !nmask)
128 return 0;
129
130 nlongs = BITS_TO_LONGS(maxnode);
131 if ((maxnode % BITS_PER_LONG) == 0)
132 endmask = ~0UL;
133 else
134 endmask = (1UL << (maxnode % BITS_PER_LONG)) - 1;
135
136 /* When the user specified more nodes than supported just check
137 if the non supported part is all zero. */
138 if (nlongs > BITS_TO_LONGS(MAX_NUMNODES)) {
139 if (nlongs > PAGE_SIZE/sizeof(long))
140 return -EINVAL;
141 for (k = BITS_TO_LONGS(MAX_NUMNODES); k < nlongs; k++) {
142 unsigned long t;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700143 if (get_user(t, nmask + k))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 return -EFAULT;
145 if (k == nlongs - 1) {
146 if (t & endmask)
147 return -EINVAL;
148 } else if (t)
149 return -EINVAL;
150 }
151 nlongs = BITS_TO_LONGS(MAX_NUMNODES);
152 endmask = ~0UL;
153 }
154
Andi Kleendfcd3c02005-10-29 18:15:48 -0700155 if (copy_from_user(nodes_addr(*nodes), nmask, nlongs*sizeof(unsigned long)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 return -EFAULT;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700157 nodes_addr(*nodes)[nlongs-1] &= endmask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 /* Update current mems_allowed */
159 cpuset_update_current_mems_allowed();
160 /* Ignore nodes not set in current->mems_allowed */
Andi Kleendfcd3c02005-10-29 18:15:48 -0700161 /* AK: shouldn't this error out instead? */
162 cpuset_restrict_to_mems_allowed(nodes_addr(*nodes));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 return mpol_check_policy(mode, nodes);
164}
165
166/* Generate a custom zonelist for the BIND policy. */
Andi Kleendfcd3c02005-10-29 18:15:48 -0700167static struct zonelist *bind_zonelist(nodemask_t *nodes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168{
169 struct zonelist *zl;
170 int num, max, nd;
171
Andi Kleendfcd3c02005-10-29 18:15:48 -0700172 max = 1 + MAX_NR_ZONES * nodes_weight(*nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 zl = kmalloc(sizeof(void *) * max, GFP_KERNEL);
174 if (!zl)
175 return NULL;
176 num = 0;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700177 for_each_node_mask(nd, *nodes) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 int k;
179 for (k = MAX_NR_ZONES-1; k >= 0; k--) {
180 struct zone *z = &NODE_DATA(nd)->node_zones[k];
181 if (!z->present_pages)
182 continue;
183 zl->zones[num++] = z;
184 if (k > policy_zone)
185 policy_zone = k;
186 }
187 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 zl->zones[num] = NULL;
189 return zl;
190}
191
192/* Create a new policy */
Andi Kleendfcd3c02005-10-29 18:15:48 -0700193static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194{
195 struct mempolicy *policy;
196
Andi Kleendfcd3c02005-10-29 18:15:48 -0700197 PDprintk("setting mode %d nodes[0] %lx\n", mode, nodes_addr(*nodes)[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 if (mode == MPOL_DEFAULT)
199 return NULL;
200 policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
201 if (!policy)
202 return ERR_PTR(-ENOMEM);
203 atomic_set(&policy->refcnt, 1);
204 switch (mode) {
205 case MPOL_INTERLEAVE:
Andi Kleendfcd3c02005-10-29 18:15:48 -0700206 policy->v.nodes = *nodes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 break;
208 case MPOL_PREFERRED:
Andi Kleendfcd3c02005-10-29 18:15:48 -0700209 policy->v.preferred_node = first_node(*nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 if (policy->v.preferred_node >= MAX_NUMNODES)
211 policy->v.preferred_node = -1;
212 break;
213 case MPOL_BIND:
214 policy->v.zonelist = bind_zonelist(nodes);
215 if (policy->v.zonelist == NULL) {
216 kmem_cache_free(policy_cache, policy);
217 return ERR_PTR(-ENOMEM);
218 }
219 break;
220 }
221 policy->policy = mode;
222 return policy;
223}
224
225/* Ensure all existing pages follow the policy. */
Hugh Dickins91612e02005-06-21 17:15:07 -0700226static int check_pte_range(struct mm_struct *mm, pmd_t *pmd,
Andi Kleendfcd3c02005-10-29 18:15:48 -0700227 unsigned long addr, unsigned long end, nodemask_t *nodes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228{
Hugh Dickins91612e02005-06-21 17:15:07 -0700229 pte_t *orig_pte;
230 pte_t *pte;
Hugh Dickins941150a2005-06-21 17:15:06 -0700231
232 spin_lock(&mm->page_table_lock);
Hugh Dickins91612e02005-06-21 17:15:07 -0700233 orig_pte = pte = pte_offset_map(pmd, addr);
234 do {
235 unsigned long pfn;
236 unsigned int nid;
237
238 if (!pte_present(*pte))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 continue;
Hugh Dickins91612e02005-06-21 17:15:07 -0700240 pfn = pte_pfn(*pte);
241 if (!pfn_valid(pfn))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 continue;
Hugh Dickins91612e02005-06-21 17:15:07 -0700243 nid = pfn_to_nid(pfn);
Andi Kleendfcd3c02005-10-29 18:15:48 -0700244 if (!node_isset(nid, *nodes))
Hugh Dickins91612e02005-06-21 17:15:07 -0700245 break;
246 } while (pte++, addr += PAGE_SIZE, addr != end);
247 pte_unmap(orig_pte);
Hugh Dickins941150a2005-06-21 17:15:06 -0700248 spin_unlock(&mm->page_table_lock);
Hugh Dickins91612e02005-06-21 17:15:07 -0700249 return addr != end;
250}
251
252static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud,
Andi Kleendfcd3c02005-10-29 18:15:48 -0700253 unsigned long addr, unsigned long end, nodemask_t *nodes)
Hugh Dickins91612e02005-06-21 17:15:07 -0700254{
255 pmd_t *pmd;
256 unsigned long next;
257
258 pmd = pmd_offset(pud, addr);
259 do {
260 next = pmd_addr_end(addr, end);
261 if (pmd_none_or_clear_bad(pmd))
262 continue;
263 if (check_pte_range(mm, pmd, addr, next, nodes))
264 return -EIO;
265 } while (pmd++, addr = next, addr != end);
266 return 0;
267}
268
269static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd,
Andi Kleendfcd3c02005-10-29 18:15:48 -0700270 unsigned long addr, unsigned long end, nodemask_t *nodes)
Hugh Dickins91612e02005-06-21 17:15:07 -0700271{
272 pud_t *pud;
273 unsigned long next;
274
275 pud = pud_offset(pgd, addr);
276 do {
277 next = pud_addr_end(addr, end);
278 if (pud_none_or_clear_bad(pud))
279 continue;
280 if (check_pmd_range(mm, pud, addr, next, nodes))
281 return -EIO;
282 } while (pud++, addr = next, addr != end);
283 return 0;
284}
285
286static inline int check_pgd_range(struct mm_struct *mm,
Andi Kleendfcd3c02005-10-29 18:15:48 -0700287 unsigned long addr, unsigned long end, nodemask_t *nodes)
Hugh Dickins91612e02005-06-21 17:15:07 -0700288{
289 pgd_t *pgd;
290 unsigned long next;
291
292 pgd = pgd_offset(mm, addr);
293 do {
294 next = pgd_addr_end(addr, end);
295 if (pgd_none_or_clear_bad(pgd))
296 continue;
297 if (check_pud_range(mm, pgd, addr, next, nodes))
298 return -EIO;
299 } while (pgd++, addr = next, addr != end);
300 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301}
302
303/* Step 1: check the range */
304static struct vm_area_struct *
305check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
Andi Kleendfcd3c02005-10-29 18:15:48 -0700306 nodemask_t *nodes, unsigned long flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307{
308 int err;
309 struct vm_area_struct *first, *vma, *prev;
310
311 first = find_vma(mm, start);
312 if (!first)
313 return ERR_PTR(-EFAULT);
314 prev = NULL;
315 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
316 if (!vma->vm_next && vma->vm_end < end)
317 return ERR_PTR(-EFAULT);
318 if (prev && prev->vm_end < vma->vm_start)
319 return ERR_PTR(-EFAULT);
320 if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) {
Andi Kleen5b952b32005-09-13 01:25:08 -0700321 unsigned long endvma = vma->vm_end;
322 if (endvma > end)
323 endvma = end;
324 if (vma->vm_start > start)
325 start = vma->vm_start;
Hugh Dickins91612e02005-06-21 17:15:07 -0700326 err = check_pgd_range(vma->vm_mm,
Andi Kleen5b952b32005-09-13 01:25:08 -0700327 start, endvma, nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 if (err) {
329 first = ERR_PTR(err);
330 break;
331 }
332 }
333 prev = vma;
334 }
335 return first;
336}
337
338/* Apply policy to a single VMA */
339static int policy_vma(struct vm_area_struct *vma, struct mempolicy *new)
340{
341 int err = 0;
342 struct mempolicy *old = vma->vm_policy;
343
344 PDprintk("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
345 vma->vm_start, vma->vm_end, vma->vm_pgoff,
346 vma->vm_ops, vma->vm_file,
347 vma->vm_ops ? vma->vm_ops->set_policy : NULL);
348
349 if (vma->vm_ops && vma->vm_ops->set_policy)
350 err = vma->vm_ops->set_policy(vma, new);
351 if (!err) {
352 mpol_get(new);
353 vma->vm_policy = new;
354 mpol_free(old);
355 }
356 return err;
357}
358
359/* Step 2: apply policy to a range and do splits. */
360static int mbind_range(struct vm_area_struct *vma, unsigned long start,
361 unsigned long end, struct mempolicy *new)
362{
363 struct vm_area_struct *next;
364 int err;
365
366 err = 0;
367 for (; vma && vma->vm_start < end; vma = next) {
368 next = vma->vm_next;
369 if (vma->vm_start < start)
370 err = split_vma(vma->vm_mm, vma, start, 1);
371 if (!err && vma->vm_end > end)
372 err = split_vma(vma->vm_mm, vma, end, 0);
373 if (!err)
374 err = policy_vma(vma, new);
375 if (err)
376 break;
377 }
378 return err;
379}
380
381/* Change policy for a memory range */
382asmlinkage long sys_mbind(unsigned long start, unsigned long len,
383 unsigned long mode,
384 unsigned long __user *nmask, unsigned long maxnode,
385 unsigned flags)
386{
387 struct vm_area_struct *vma;
388 struct mm_struct *mm = current->mm;
389 struct mempolicy *new;
390 unsigned long end;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700391 nodemask_t nodes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 int err;
393
394 if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX)
395 return -EINVAL;
396 if (start & ~PAGE_MASK)
397 return -EINVAL;
398 if (mode == MPOL_DEFAULT)
399 flags &= ~MPOL_MF_STRICT;
400 len = (len + PAGE_SIZE - 1) & PAGE_MASK;
401 end = start + len;
402 if (end < start)
403 return -EINVAL;
404 if (end == start)
405 return 0;
406
Andi Kleendfcd3c02005-10-29 18:15:48 -0700407 err = get_nodes(&nodes, nmask, maxnode, mode);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 if (err)
409 return err;
410
Andi Kleendfcd3c02005-10-29 18:15:48 -0700411 new = mpol_new(mode, &nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 if (IS_ERR(new))
413 return PTR_ERR(new);
414
415 PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
Andi Kleendfcd3c02005-10-29 18:15:48 -0700416 mode,nodes_addr(nodes)[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
418 down_write(&mm->mmap_sem);
Andi Kleendfcd3c02005-10-29 18:15:48 -0700419 vma = check_range(mm, start, end, &nodes, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 err = PTR_ERR(vma);
421 if (!IS_ERR(vma))
422 err = mbind_range(vma, start, end, new);
423 up_write(&mm->mmap_sem);
424 mpol_free(new);
425 return err;
426}
427
428/* Set the process memory policy */
429asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
430 unsigned long maxnode)
431{
432 int err;
433 struct mempolicy *new;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700434 nodemask_t nodes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435
Eric Dumazetba171012005-08-01 21:11:43 -0700436 if (mode < 0 || mode > MPOL_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 return -EINVAL;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700438 err = get_nodes(&nodes, nmask, maxnode, mode);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 if (err)
440 return err;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700441 new = mpol_new(mode, &nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 if (IS_ERR(new))
443 return PTR_ERR(new);
444 mpol_free(current->mempolicy);
445 current->mempolicy = new;
446 if (new && new->policy == MPOL_INTERLEAVE)
Andi Kleendfcd3c02005-10-29 18:15:48 -0700447 current->il_next = first_node(new->v.nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 return 0;
449}
450
451/* Fill a zone bitmap for a policy */
Andi Kleendfcd3c02005-10-29 18:15:48 -0700452static void get_zonemask(struct mempolicy *p, nodemask_t *nodes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453{
454 int i;
455
Andi Kleendfcd3c02005-10-29 18:15:48 -0700456 nodes_clear(*nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 switch (p->policy) {
458 case MPOL_BIND:
459 for (i = 0; p->v.zonelist->zones[i]; i++)
Andi Kleendfcd3c02005-10-29 18:15:48 -0700460 node_set(p->v.zonelist->zones[i]->zone_pgdat->node_id, *nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 break;
462 case MPOL_DEFAULT:
463 break;
464 case MPOL_INTERLEAVE:
Andi Kleendfcd3c02005-10-29 18:15:48 -0700465 *nodes = p->v.nodes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 break;
467 case MPOL_PREFERRED:
468 /* or use current node instead of online map? */
469 if (p->v.preferred_node < 0)
Andi Kleendfcd3c02005-10-29 18:15:48 -0700470 *nodes = node_online_map;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 else
Andi Kleendfcd3c02005-10-29 18:15:48 -0700472 node_set(p->v.preferred_node, *nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 break;
474 default:
475 BUG();
476 }
477}
478
479static int lookup_node(struct mm_struct *mm, unsigned long addr)
480{
481 struct page *p;
482 int err;
483
484 err = get_user_pages(current, mm, addr & PAGE_MASK, 1, 0, 0, &p, NULL);
485 if (err >= 0) {
486 err = page_to_nid(p);
487 put_page(p);
488 }
489 return err;
490}
491
492/* Copy a kernel node mask to user space */
493static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
Andi Kleendfcd3c02005-10-29 18:15:48 -0700494 nodemask_t *nodes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495{
496 unsigned long copy = ALIGN(maxnode-1, 64) / 8;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700497 const int nbytes = BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 if (copy > nbytes) {
500 if (copy > PAGE_SIZE)
501 return -EINVAL;
502 if (clear_user((char __user *)mask + nbytes, copy - nbytes))
503 return -EFAULT;
504 copy = nbytes;
505 }
Andi Kleendfcd3c02005-10-29 18:15:48 -0700506 return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507}
508
509/* Retrieve NUMA policy */
510asmlinkage long sys_get_mempolicy(int __user *policy,
511 unsigned long __user *nmask,
512 unsigned long maxnode,
513 unsigned long addr, unsigned long flags)
514{
515 int err, pval;
516 struct mm_struct *mm = current->mm;
517 struct vm_area_struct *vma = NULL;
518 struct mempolicy *pol = current->mempolicy;
519
520 if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
521 return -EINVAL;
522 if (nmask != NULL && maxnode < MAX_NUMNODES)
523 return -EINVAL;
524 if (flags & MPOL_F_ADDR) {
525 down_read(&mm->mmap_sem);
526 vma = find_vma_intersection(mm, addr, addr+1);
527 if (!vma) {
528 up_read(&mm->mmap_sem);
529 return -EFAULT;
530 }
531 if (vma->vm_ops && vma->vm_ops->get_policy)
532 pol = vma->vm_ops->get_policy(vma, addr);
533 else
534 pol = vma->vm_policy;
535 } else if (addr)
536 return -EINVAL;
537
538 if (!pol)
539 pol = &default_policy;
540
541 if (flags & MPOL_F_NODE) {
542 if (flags & MPOL_F_ADDR) {
543 err = lookup_node(mm, addr);
544 if (err < 0)
545 goto out;
546 pval = err;
547 } else if (pol == current->mempolicy &&
548 pol->policy == MPOL_INTERLEAVE) {
549 pval = current->il_next;
550 } else {
551 err = -EINVAL;
552 goto out;
553 }
554 } else
555 pval = pol->policy;
556
557 if (vma) {
558 up_read(&current->mm->mmap_sem);
559 vma = NULL;
560 }
561
562 if (policy && put_user(pval, policy))
563 return -EFAULT;
564
565 err = 0;
566 if (nmask) {
Andi Kleendfcd3c02005-10-29 18:15:48 -0700567 nodemask_t nodes;
568 get_zonemask(pol, &nodes);
569 err = copy_nodes_to_user(nmask, maxnode, &nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 }
571
572 out:
573 if (vma)
574 up_read(&current->mm->mmap_sem);
575 return err;
576}
577
578#ifdef CONFIG_COMPAT
579
580asmlinkage long compat_sys_get_mempolicy(int __user *policy,
581 compat_ulong_t __user *nmask,
582 compat_ulong_t maxnode,
583 compat_ulong_t addr, compat_ulong_t flags)
584{
585 long err;
586 unsigned long __user *nm = NULL;
587 unsigned long nr_bits, alloc_size;
588 DECLARE_BITMAP(bm, MAX_NUMNODES);
589
590 nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
591 alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
592
593 if (nmask)
594 nm = compat_alloc_user_space(alloc_size);
595
596 err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags);
597
598 if (!err && nmask) {
599 err = copy_from_user(bm, nm, alloc_size);
600 /* ensure entire bitmap is zeroed */
601 err |= clear_user(nmask, ALIGN(maxnode-1, 8) / 8);
602 err |= compat_put_bitmap(nmask, bm, nr_bits);
603 }
604
605 return err;
606}
607
608asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask,
609 compat_ulong_t maxnode)
610{
611 long err = 0;
612 unsigned long __user *nm = NULL;
613 unsigned long nr_bits, alloc_size;
614 DECLARE_BITMAP(bm, MAX_NUMNODES);
615
616 nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
617 alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
618
619 if (nmask) {
620 err = compat_get_bitmap(bm, nmask, nr_bits);
621 nm = compat_alloc_user_space(alloc_size);
622 err |= copy_to_user(nm, bm, alloc_size);
623 }
624
625 if (err)
626 return -EFAULT;
627
628 return sys_set_mempolicy(mode, nm, nr_bits+1);
629}
630
631asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
632 compat_ulong_t mode, compat_ulong_t __user *nmask,
633 compat_ulong_t maxnode, compat_ulong_t flags)
634{
635 long err = 0;
636 unsigned long __user *nm = NULL;
637 unsigned long nr_bits, alloc_size;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700638 nodemask_t bm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
640 nr_bits = min_t(unsigned long, maxnode-1, MAX_NUMNODES);
641 alloc_size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
642
643 if (nmask) {
Andi Kleendfcd3c02005-10-29 18:15:48 -0700644 err = compat_get_bitmap(nodes_addr(bm), nmask, nr_bits);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 nm = compat_alloc_user_space(alloc_size);
Andi Kleendfcd3c02005-10-29 18:15:48 -0700646 err |= copy_to_user(nm, nodes_addr(bm), alloc_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 }
648
649 if (err)
650 return -EFAULT;
651
652 return sys_mbind(start, len, mode, nm, nr_bits+1, flags);
653}
654
655#endif
656
657/* Return effective policy for a VMA */
Christoph Lameter6e21c8f2005-09-03 15:54:45 -0700658struct mempolicy *
659get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660{
Christoph Lameter6e21c8f2005-09-03 15:54:45 -0700661 struct mempolicy *pol = task->mempolicy;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662
663 if (vma) {
664 if (vma->vm_ops && vma->vm_ops->get_policy)
665 pol = vma->vm_ops->get_policy(vma, addr);
666 else if (vma->vm_policy &&
667 vma->vm_policy->policy != MPOL_DEFAULT)
668 pol = vma->vm_policy;
669 }
670 if (!pol)
671 pol = &default_policy;
672 return pol;
673}
674
675/* Return a zonelist representing a mempolicy */
Al Virodd0fc662005-10-07 07:46:04 +0100676static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677{
678 int nd;
679
680 switch (policy->policy) {
681 case MPOL_PREFERRED:
682 nd = policy->v.preferred_node;
683 if (nd < 0)
684 nd = numa_node_id();
685 break;
686 case MPOL_BIND:
687 /* Lower zones don't get a policy applied */
688 /* Careful: current->mems_allowed might have moved */
Al Viroaf4ca452005-10-21 02:55:38 -0400689 if (gfp_zone(gfp) >= policy_zone)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist))
691 return policy->v.zonelist;
692 /*FALL THROUGH*/
693 case MPOL_INTERLEAVE: /* should not happen */
694 case MPOL_DEFAULT:
695 nd = numa_node_id();
696 break;
697 default:
698 nd = 0;
699 BUG();
700 }
Al Viroaf4ca452005-10-21 02:55:38 -0400701 return NODE_DATA(nd)->node_zonelists + gfp_zone(gfp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702}
703
704/* Do dynamic interleaving for a process */
705static unsigned interleave_nodes(struct mempolicy *policy)
706{
707 unsigned nid, next;
708 struct task_struct *me = current;
709
710 nid = me->il_next;
Andi Kleendfcd3c02005-10-29 18:15:48 -0700711 next = next_node(nid, policy->v.nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 if (next >= MAX_NUMNODES)
Andi Kleendfcd3c02005-10-29 18:15:48 -0700713 next = first_node(policy->v.nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 me->il_next = next;
715 return nid;
716}
717
718/* Do static interleaving for a VMA with known offset. */
719static unsigned offset_il_node(struct mempolicy *pol,
720 struct vm_area_struct *vma, unsigned long off)
721{
Andi Kleendfcd3c02005-10-29 18:15:48 -0700722 unsigned nnodes = nodes_weight(pol->v.nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 unsigned target = (unsigned)off % nnodes;
724 int c;
725 int nid = -1;
726
727 c = 0;
728 do {
Andi Kleendfcd3c02005-10-29 18:15:48 -0700729 nid = next_node(nid, pol->v.nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 c++;
731 } while (c <= target);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 return nid;
733}
734
735/* Allocate a page in interleaved policy.
736 Own path because it needs to do special accounting. */
Andi Kleen662f3a02005-10-29 18:15:49 -0700737static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
738 unsigned nid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739{
740 struct zonelist *zl;
741 struct page *page;
742
Al Viroaf4ca452005-10-21 02:55:38 -0400743 zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744 page = __alloc_pages(gfp, order, zl);
745 if (page && page_zone(page) == zl->zones[0]) {
Christoph Lametere7c8d5c2005-06-21 17:14:47 -0700746 zone_pcp(zl->zones[0],get_cpu())->interleave_hit++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 put_cpu();
748 }
749 return page;
750}
751
752/**
753 * alloc_page_vma - Allocate a page for a VMA.
754 *
755 * @gfp:
756 * %GFP_USER user allocation.
757 * %GFP_KERNEL kernel allocations,
758 * %GFP_HIGHMEM highmem/user allocations,
759 * %GFP_FS allocation should not call back into a file system.
760 * %GFP_ATOMIC don't sleep.
761 *
762 * @vma: Pointer to VMA or NULL if not available.
763 * @addr: Virtual Address of the allocation. Must be inside the VMA.
764 *
765 * This function allocates a page from the kernel page pool and applies
766 * a NUMA policy associated with the VMA or the current process.
767 * When VMA is not NULL caller must hold down_read on the mmap_sem of the
768 * mm_struct of the VMA to prevent it from going away. Should be used for
769 * all allocations for pages that will be mapped into
770 * user space. Returns NULL when no page can be allocated.
771 *
772 * Should be called with the mm_sem of the vma hold.
773 */
774struct page *
Al Virodd0fc662005-10-07 07:46:04 +0100775alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776{
Christoph Lameter6e21c8f2005-09-03 15:54:45 -0700777 struct mempolicy *pol = get_vma_policy(current, vma, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778
779 cpuset_update_current_mems_allowed();
780
781 if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
782 unsigned nid;
783 if (vma) {
784 unsigned long off;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 off = vma->vm_pgoff;
786 off += (addr - vma->vm_start) >> PAGE_SHIFT;
787 nid = offset_il_node(pol, vma, off);
788 } else {
789 /* fall back to process interleaving */
790 nid = interleave_nodes(pol);
791 }
792 return alloc_page_interleave(gfp, 0, nid);
793 }
794 return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
795}
796
797/**
798 * alloc_pages_current - Allocate pages.
799 *
800 * @gfp:
801 * %GFP_USER user allocation,
802 * %GFP_KERNEL kernel allocation,
803 * %GFP_HIGHMEM highmem allocation,
804 * %GFP_FS don't call back into a file system.
805 * %GFP_ATOMIC don't sleep.
806 * @order: Power of two of allocation size in pages. 0 is a single page.
807 *
808 * Allocate a page from the kernel page pool. When not in
809 * interrupt context and apply the current process NUMA policy.
810 * Returns NULL when no page can be allocated.
811 *
812 * Don't call cpuset_update_current_mems_allowed() unless
813 * 1) it's ok to take cpuset_sem (can WAIT), and
814 * 2) allocating for current task (not interrupt).
815 */
Al Virodd0fc662005-10-07 07:46:04 +0100816struct page *alloc_pages_current(gfp_t gfp, unsigned order)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817{
818 struct mempolicy *pol = current->mempolicy;
819
820 if ((gfp & __GFP_WAIT) && !in_interrupt())
821 cpuset_update_current_mems_allowed();
822 if (!pol || in_interrupt())
823 pol = &default_policy;
824 if (pol->policy == MPOL_INTERLEAVE)
825 return alloc_page_interleave(gfp, order, interleave_nodes(pol));
826 return __alloc_pages(gfp, order, zonelist_policy(gfp, pol));
827}
828EXPORT_SYMBOL(alloc_pages_current);
829
830/* Slow path of a mempolicy copy */
831struct mempolicy *__mpol_copy(struct mempolicy *old)
832{
833 struct mempolicy *new = kmem_cache_alloc(policy_cache, GFP_KERNEL);
834
835 if (!new)
836 return ERR_PTR(-ENOMEM);
837 *new = *old;
838 atomic_set(&new->refcnt, 1);
839 if (new->policy == MPOL_BIND) {
840 int sz = ksize(old->v.zonelist);
841 new->v.zonelist = kmalloc(sz, SLAB_KERNEL);
842 if (!new->v.zonelist) {
843 kmem_cache_free(policy_cache, new);
844 return ERR_PTR(-ENOMEM);
845 }
846 memcpy(new->v.zonelist, old->v.zonelist, sz);
847 }
848 return new;
849}
850
851/* Slow path of a mempolicy comparison */
852int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
853{
854 if (!a || !b)
855 return 0;
856 if (a->policy != b->policy)
857 return 0;
858 switch (a->policy) {
859 case MPOL_DEFAULT:
860 return 1;
861 case MPOL_INTERLEAVE:
Andi Kleendfcd3c02005-10-29 18:15:48 -0700862 return nodes_equal(a->v.nodes, b->v.nodes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 case MPOL_PREFERRED:
864 return a->v.preferred_node == b->v.preferred_node;
865 case MPOL_BIND: {
866 int i;
867 for (i = 0; a->v.zonelist->zones[i]; i++)
868 if (a->v.zonelist->zones[i] != b->v.zonelist->zones[i])
869 return 0;
870 return b->v.zonelist->zones[i] == NULL;
871 }
872 default:
873 BUG();
874 return 0;
875 }
876}
877
878/* Slow path of a mpol destructor. */
879void __mpol_free(struct mempolicy *p)
880{
881 if (!atomic_dec_and_test(&p->refcnt))
882 return;
883 if (p->policy == MPOL_BIND)
884 kfree(p->v.zonelist);
885 p->policy = MPOL_DEFAULT;
886 kmem_cache_free(policy_cache, p);
887}
888
889/*
890 * Hugetlb policy. Same as above, just works with node numbers instead of
891 * zonelists.
892 */
893
894/* Find first node suitable for an allocation */
895int mpol_first_node(struct vm_area_struct *vma, unsigned long addr)
896{
Christoph Lameter6e21c8f2005-09-03 15:54:45 -0700897 struct mempolicy *pol = get_vma_policy(current, vma, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898
899 switch (pol->policy) {
900 case MPOL_DEFAULT:
901 return numa_node_id();
902 case MPOL_BIND:
903 return pol->v.zonelist->zones[0]->zone_pgdat->node_id;
904 case MPOL_INTERLEAVE:
905 return interleave_nodes(pol);
906 case MPOL_PREFERRED:
907 return pol->v.preferred_node >= 0 ?
908 pol->v.preferred_node : numa_node_id();
909 }
910 BUG();
911 return 0;
912}
913
914/* Find secondary valid nodes for an allocation */
915int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr)
916{
Christoph Lameter6e21c8f2005-09-03 15:54:45 -0700917 struct mempolicy *pol = get_vma_policy(current, vma, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918
919 switch (pol->policy) {
920 case MPOL_PREFERRED:
921 case MPOL_DEFAULT:
922 case MPOL_INTERLEAVE:
923 return 1;
924 case MPOL_BIND: {
925 struct zone **z;
926 for (z = pol->v.zonelist->zones; *z; z++)
927 if ((*z)->zone_pgdat->node_id == nid)
928 return 1;
929 return 0;
930 }
931 default:
932 BUG();
933 return 0;
934 }
935}
936
937/*
938 * Shared memory backing store policy support.
939 *
940 * Remember policies even when nobody has shared memory mapped.
941 * The policies are kept in Red-Black tree linked from the inode.
942 * They are protected by the sp->lock spinlock, which should be held
943 * for any accesses to the tree.
944 */
945
946/* lookup first element intersecting start-end */
947/* Caller holds sp->lock */
948static struct sp_node *
949sp_lookup(struct shared_policy *sp, unsigned long start, unsigned long end)
950{
951 struct rb_node *n = sp->root.rb_node;
952
953 while (n) {
954 struct sp_node *p = rb_entry(n, struct sp_node, nd);
955
956 if (start >= p->end)
957 n = n->rb_right;
958 else if (end <= p->start)
959 n = n->rb_left;
960 else
961 break;
962 }
963 if (!n)
964 return NULL;
965 for (;;) {
966 struct sp_node *w = NULL;
967 struct rb_node *prev = rb_prev(n);
968 if (!prev)
969 break;
970 w = rb_entry(prev, struct sp_node, nd);
971 if (w->end <= start)
972 break;
973 n = prev;
974 }
975 return rb_entry(n, struct sp_node, nd);
976}
977
978/* Insert a new shared policy into the list. */
979/* Caller holds sp->lock */
980static void sp_insert(struct shared_policy *sp, struct sp_node *new)
981{
982 struct rb_node **p = &sp->root.rb_node;
983 struct rb_node *parent = NULL;
984 struct sp_node *nd;
985
986 while (*p) {
987 parent = *p;
988 nd = rb_entry(parent, struct sp_node, nd);
989 if (new->start < nd->start)
990 p = &(*p)->rb_left;
991 else if (new->end > nd->end)
992 p = &(*p)->rb_right;
993 else
994 BUG();
995 }
996 rb_link_node(&new->nd, parent, p);
997 rb_insert_color(&new->nd, &sp->root);
998 PDprintk("inserting %lx-%lx: %d\n", new->start, new->end,
999 new->policy ? new->policy->policy : 0);
1000}
1001
1002/* Find shared policy intersecting idx */
1003struct mempolicy *
1004mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
1005{
1006 struct mempolicy *pol = NULL;
1007 struct sp_node *sn;
1008
1009 if (!sp->root.rb_node)
1010 return NULL;
1011 spin_lock(&sp->lock);
1012 sn = sp_lookup(sp, idx, idx+1);
1013 if (sn) {
1014 mpol_get(sn->policy);
1015 pol = sn->policy;
1016 }
1017 spin_unlock(&sp->lock);
1018 return pol;
1019}
1020
1021static void sp_delete(struct shared_policy *sp, struct sp_node *n)
1022{
1023 PDprintk("deleting %lx-l%x\n", n->start, n->end);
1024 rb_erase(&n->nd, &sp->root);
1025 mpol_free(n->policy);
1026 kmem_cache_free(sn_cache, n);
1027}
1028
1029struct sp_node *
1030sp_alloc(unsigned long start, unsigned long end, struct mempolicy *pol)
1031{
1032 struct sp_node *n = kmem_cache_alloc(sn_cache, GFP_KERNEL);
1033
1034 if (!n)
1035 return NULL;
1036 n->start = start;
1037 n->end = end;
1038 mpol_get(pol);
1039 n->policy = pol;
1040 return n;
1041}
1042
1043/* Replace a policy range. */
1044static int shared_policy_replace(struct shared_policy *sp, unsigned long start,
1045 unsigned long end, struct sp_node *new)
1046{
1047 struct sp_node *n, *new2 = NULL;
1048
1049restart:
1050 spin_lock(&sp->lock);
1051 n = sp_lookup(sp, start, end);
1052 /* Take care of old policies in the same range. */
1053 while (n && n->start < end) {
1054 struct rb_node *next = rb_next(&n->nd);
1055 if (n->start >= start) {
1056 if (n->end <= end)
1057 sp_delete(sp, n);
1058 else
1059 n->start = end;
1060 } else {
1061 /* Old policy spanning whole new range. */
1062 if (n->end > end) {
1063 if (!new2) {
1064 spin_unlock(&sp->lock);
1065 new2 = sp_alloc(end, n->end, n->policy);
1066 if (!new2)
1067 return -ENOMEM;
1068 goto restart;
1069 }
1070 n->end = start;
1071 sp_insert(sp, new2);
1072 new2 = NULL;
1073 break;
1074 } else
1075 n->end = start;
1076 }
1077 if (!next)
1078 break;
1079 n = rb_entry(next, struct sp_node, nd);
1080 }
1081 if (new)
1082 sp_insert(sp, new);
1083 spin_unlock(&sp->lock);
1084 if (new2) {
1085 mpol_free(new2->policy);
1086 kmem_cache_free(sn_cache, new2);
1087 }
1088 return 0;
1089}
1090
1091int mpol_set_shared_policy(struct shared_policy *info,
1092 struct vm_area_struct *vma, struct mempolicy *npol)
1093{
1094 int err;
1095 struct sp_node *new = NULL;
1096 unsigned long sz = vma_pages(vma);
1097
1098 PDprintk("set_shared_policy %lx sz %lu %d %lx\n",
1099 vma->vm_pgoff,
1100 sz, npol? npol->policy : -1,
Andi Kleendfcd3c02005-10-29 18:15:48 -07001101 npol ? nodes_addr(npol->v.nodes)[0] : -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102
1103 if (npol) {
1104 new = sp_alloc(vma->vm_pgoff, vma->vm_pgoff + sz, npol);
1105 if (!new)
1106 return -ENOMEM;
1107 }
1108 err = shared_policy_replace(info, vma->vm_pgoff, vma->vm_pgoff+sz, new);
1109 if (err && new)
1110 kmem_cache_free(sn_cache, new);
1111 return err;
1112}
1113
1114/* Free a backing policy store on inode delete. */
1115void mpol_free_shared_policy(struct shared_policy *p)
1116{
1117 struct sp_node *n;
1118 struct rb_node *next;
1119
1120 if (!p->root.rb_node)
1121 return;
1122 spin_lock(&p->lock);
1123 next = rb_first(&p->root);
1124 while (next) {
1125 n = rb_entry(next, struct sp_node, nd);
1126 next = rb_next(&n->nd);
Andi Kleen90c50292005-07-27 11:43:50 -07001127 rb_erase(&n->nd, &p->root);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 mpol_free(n->policy);
1129 kmem_cache_free(sn_cache, n);
1130 }
1131 spin_unlock(&p->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132}
1133
1134/* assumes fs == KERNEL_DS */
1135void __init numa_policy_init(void)
1136{
1137 policy_cache = kmem_cache_create("numa_policy",
1138 sizeof(struct mempolicy),
1139 0, SLAB_PANIC, NULL, NULL);
1140
1141 sn_cache = kmem_cache_create("shared_policy_node",
1142 sizeof(struct sp_node),
1143 0, SLAB_PANIC, NULL, NULL);
1144
1145 /* Set interleaving policy for system init. This way not all
1146 the data structures allocated at system boot end up in node zero. */
1147
1148 if (sys_set_mempolicy(MPOL_INTERLEAVE, nodes_addr(node_online_map),
1149 MAX_NUMNODES) < 0)
1150 printk("numa_policy_init: interleaving failed\n");
1151}
1152
1153/* Reset policy of current process to default.
1154 * Assumes fs == KERNEL_DS */
1155void numa_default_policy(void)
1156{
1157 sys_set_mempolicy(MPOL_DEFAULT, NULL, 0);
1158}