blob: 8768e5250323fddff14c38ee2734e0dc90b31cfa [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Generic hugetlb support.
3 * (C) William Irwin, April 2004
4 */
5#include <linux/gfp.h>
6#include <linux/list.h>
7#include <linux/init.h>
8#include <linux/module.h>
9#include <linux/mm.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070010#include <linux/sysctl.h>
11#include <linux/highmem.h>
12#include <linux/nodemask.h>
David Gibson63551ae2005-06-21 17:14:44 -070013#include <linux/pagemap.h>
Christoph Lameter5da7ca82006-01-06 00:10:46 -080014#include <linux/mempolicy.h>
Christoph Lameteraea47ff2006-01-08 01:00:57 -080015#include <linux/cpuset.h>
David Gibson3935baa2006-03-22 00:08:53 -080016#include <linux/mutex.h>
Christoph Lameter5da7ca82006-01-06 00:10:46 -080017
David Gibson63551ae2005-06-21 17:14:44 -070018#include <asm/page.h>
19#include <asm/pgtable.h>
20
21#include <linux/hugetlb.h>
Nick Piggin7835e982006-03-22 00:08:40 -080022#include "internal.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070023
24const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
Chen, Kenneth Wa43a8c32006-06-23 02:03:15 -070025static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages;
Adam Litke7893d1d2007-10-16 01:26:18 -070026static unsigned long surplus_huge_pages;
Linus Torvalds1da177e2005-04-16 15:20:36 -070027unsigned long max_huge_pages;
28static struct list_head hugepage_freelists[MAX_NUMNODES];
29static unsigned int nr_huge_pages_node[MAX_NUMNODES];
30static unsigned int free_huge_pages_node[MAX_NUMNODES];
Adam Litke7893d1d2007-10-16 01:26:18 -070031static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
Mel Gorman396faf02007-07-17 04:03:13 -070032static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
33unsigned long hugepages_treat_as_movable;
34
David Gibson3935baa2006-03-22 00:08:53 -080035/*
36 * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
37 */
38static DEFINE_SPINLOCK(hugetlb_lock);
Eric Paris0bd0f9f2005-11-21 21:32:28 -080039
David Gibson79ac6ba2006-03-22 00:08:51 -080040static void clear_huge_page(struct page *page, unsigned long addr)
41{
42 int i;
43
44 might_sleep();
45 for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) {
46 cond_resched();
Ralf Baechle281e0e32007-10-01 01:20:10 -070047 clear_user_highpage(page + i, addr + i * PAGE_SIZE);
David Gibson79ac6ba2006-03-22 00:08:51 -080048 }
49}
50
51static void copy_huge_page(struct page *dst, struct page *src,
Atsushi Nemoto9de455b2006-12-12 17:14:55 +000052 unsigned long addr, struct vm_area_struct *vma)
David Gibson79ac6ba2006-03-22 00:08:51 -080053{
54 int i;
55
56 might_sleep();
57 for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) {
58 cond_resched();
Atsushi Nemoto9de455b2006-12-12 17:14:55 +000059 copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE, vma);
David Gibson79ac6ba2006-03-22 00:08:51 -080060 }
61}
62
Linus Torvalds1da177e2005-04-16 15:20:36 -070063static void enqueue_huge_page(struct page *page)
64{
65 int nid = page_to_nid(page);
66 list_add(&page->lru, &hugepage_freelists[nid]);
67 free_huge_pages++;
68 free_huge_pages_node[nid]++;
69}
70
Christoph Lameter5da7ca82006-01-06 00:10:46 -080071static struct page *dequeue_huge_page(struct vm_area_struct *vma,
72 unsigned long address)
Linus Torvalds1da177e2005-04-16 15:20:36 -070073{
Nishanth Aravamudan31a5c6e2007-07-15 23:38:02 -070074 int nid;
Linus Torvalds1da177e2005-04-16 15:20:36 -070075 struct page *page = NULL;
Lee Schermerhorn480eccf2007-09-18 22:46:47 -070076 struct mempolicy *mpol;
Mel Gorman396faf02007-07-17 04:03:13 -070077 struct zonelist *zonelist = huge_zonelist(vma, address,
Lee Schermerhorn480eccf2007-09-18 22:46:47 -070078 htlb_alloc_mask, &mpol);
Christoph Lameter96df9332006-01-06 00:10:45 -080079 struct zone **z;
Linus Torvalds1da177e2005-04-16 15:20:36 -070080
Christoph Lameter96df9332006-01-06 00:10:45 -080081 for (z = zonelist->zones; *z; z++) {
Christoph Lameter89fa3022006-09-25 23:31:55 -070082 nid = zone_to_nid(*z);
Mel Gorman396faf02007-07-17 04:03:13 -070083 if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) &&
Andrew Morton3abf7af2007-07-19 01:49:08 -070084 !list_empty(&hugepage_freelists[nid])) {
85 page = list_entry(hugepage_freelists[nid].next,
86 struct page, lru);
87 list_del(&page->lru);
88 free_huge_pages--;
89 free_huge_pages_node[nid]--;
Ken Chen5ab3ee72007-07-23 18:44:00 -070090 break;
Andrew Morton3abf7af2007-07-19 01:49:08 -070091 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 }
Lee Schermerhorn480eccf2007-09-18 22:46:47 -070093 mpol_free(mpol); /* unref if mpol !NULL */
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 return page;
95}
96
Adam Litke6af2acb2007-10-16 01:26:16 -070097static void update_and_free_page(struct page *page)
98{
99 int i;
100 nr_huge_pages--;
101 nr_huge_pages_node[page_to_nid(page)]--;
102 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
103 page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
104 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
105 1 << PG_private | 1<< PG_writeback);
106 }
107 set_compound_page_dtor(page, NULL);
108 set_page_refcounted(page);
109 __free_pages(page, HUGETLB_PAGE_ORDER);
110}
111
David Gibson27a85ef2006-03-22 00:08:56 -0800112static void free_huge_page(struct page *page)
113{
Adam Litke7893d1d2007-10-16 01:26:18 -0700114 int nid = page_to_nid(page);
David Gibson27a85ef2006-03-22 00:08:56 -0800115
Adam Litke7893d1d2007-10-16 01:26:18 -0700116 BUG_ON(page_count(page));
David Gibson27a85ef2006-03-22 00:08:56 -0800117 INIT_LIST_HEAD(&page->lru);
118
119 spin_lock(&hugetlb_lock);
Adam Litke7893d1d2007-10-16 01:26:18 -0700120 if (surplus_huge_pages_node[nid]) {
121 update_and_free_page(page);
122 surplus_huge_pages--;
123 surplus_huge_pages_node[nid]--;
124 } else {
125 enqueue_huge_page(page);
126 }
David Gibson27a85ef2006-03-22 00:08:56 -0800127 spin_unlock(&hugetlb_lock);
128}
129
Adam Litke7893d1d2007-10-16 01:26:18 -0700130/*
131 * Increment or decrement surplus_huge_pages. Keep node-specific counters
132 * balanced by operating on them in a round-robin fashion.
133 * Returns 1 if an adjustment was made.
134 */
135static int adjust_pool_surplus(int delta)
136{
137 static int prev_nid;
138 int nid = prev_nid;
139 int ret = 0;
140
141 VM_BUG_ON(delta != -1 && delta != 1);
142 do {
143 nid = next_node(nid, node_online_map);
144 if (nid == MAX_NUMNODES)
145 nid = first_node(node_online_map);
146
147 /* To shrink on this node, there must be a surplus page */
148 if (delta < 0 && !surplus_huge_pages_node[nid])
149 continue;
150 /* Surplus cannot exceed the total number of pages */
151 if (delta > 0 && surplus_huge_pages_node[nid] >=
152 nr_huge_pages_node[nid])
153 continue;
154
155 surplus_huge_pages += delta;
156 surplus_huge_pages_node[nid] += delta;
157 ret = 1;
158 break;
159 } while (nid != prev_nid);
160
161 prev_nid = nid;
162 return ret;
163}
164
Nick Piggina4822892006-03-22 00:08:08 -0800165static int alloc_fresh_huge_page(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166{
Joe Jinf96efd52007-07-15 23:38:12 -0700167 static int prev_nid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 struct page *page;
Joe Jinf96efd52007-07-15 23:38:12 -0700169 int nid;
170
Hugh Dickins7ed5cb22007-07-19 01:49:11 -0700171 /*
172 * Copy static prev_nid to local nid, work on that, then copy it
173 * back to prev_nid afterwards: otherwise there's a window in which
174 * a racer might pass invalid nid MAX_NUMNODES to alloc_pages_node.
175 * But we don't need to use a spin_lock here: it really doesn't
176 * matter if occasionally a racer chooses the same nid as we do.
177 */
Joe Jinf96efd52007-07-15 23:38:12 -0700178 nid = next_node(prev_nid, node_online_map);
Paul Jacksonfdb7cc52006-03-22 00:09:10 -0800179 if (nid == MAX_NUMNODES)
180 nid = first_node(node_online_map);
Joe Jinf96efd52007-07-15 23:38:12 -0700181 prev_nid = nid;
Joe Jinf96efd52007-07-15 23:38:12 -0700182
Mel Gorman396faf02007-07-17 04:03:13 -0700183 page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
Joe Jinf96efd52007-07-15 23:38:12 -0700184 HUGETLB_PAGE_ORDER);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 if (page) {
Andy Whitcroft33f2ef82006-12-06 20:33:32 -0800186 set_compound_page_dtor(page, free_huge_page);
Eric Paris0bd0f9f2005-11-21 21:32:28 -0800187 spin_lock(&hugetlb_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 nr_huge_pages++;
189 nr_huge_pages_node[page_to_nid(page)]++;
Eric Paris0bd0f9f2005-11-21 21:32:28 -0800190 spin_unlock(&hugetlb_lock);
Nick Piggina4822892006-03-22 00:08:08 -0800191 put_page(page); /* free it into the hugepage allocator */
192 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 }
Nick Piggina4822892006-03-22 00:08:08 -0800194 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195}
196
Adam Litke7893d1d2007-10-16 01:26:18 -0700197static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
198 unsigned long address)
199{
200 struct page *page;
201
202 page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
203 HUGETLB_PAGE_ORDER);
204 if (page) {
205 set_compound_page_dtor(page, free_huge_page);
206 spin_lock(&hugetlb_lock);
207 nr_huge_pages++;
208 nr_huge_pages_node[page_to_nid(page)]++;
209 surplus_huge_pages++;
210 surplus_huge_pages_node[page_to_nid(page)]++;
211 spin_unlock(&hugetlb_lock);
212 }
213
214 return page;
215}
216
David Gibson27a85ef2006-03-22 00:08:56 -0800217static struct page *alloc_huge_page(struct vm_area_struct *vma,
218 unsigned long addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219{
Adam Litke7893d1d2007-10-16 01:26:18 -0700220 struct page *page = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
222 spin_lock(&hugetlb_lock);
Chen, Kenneth Wa43a8c32006-06-23 02:03:15 -0700223 if (vma->vm_flags & VM_MAYSHARE)
224 resv_huge_pages--;
225 else if (free_huge_pages <= resv_huge_pages)
226 goto fail;
David Gibsonb45b5bd2006-03-22 00:08:55 -0800227
228 page = dequeue_huge_page(vma, addr);
229 if (!page)
230 goto fail;
231
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 spin_unlock(&hugetlb_lock);
Nick Piggin7835e982006-03-22 00:08:40 -0800233 set_page_refcounted(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 return page;
David Gibsonb45b5bd2006-03-22 00:08:55 -0800235
Chen, Kenneth Wa43a8c32006-06-23 02:03:15 -0700236fail:
Ken Chenace4bd22007-05-09 02:33:09 -0700237 if (vma->vm_flags & VM_MAYSHARE)
238 resv_huge_pages++;
David Gibsonb45b5bd2006-03-22 00:08:55 -0800239 spin_unlock(&hugetlb_lock);
Adam Litke7893d1d2007-10-16 01:26:18 -0700240
241 /*
242 * Private mappings do not use reserved huge pages so the allocation
243 * may have failed due to an undersized hugetlb pool. Try to grab a
244 * surplus huge page from the buddy allocator.
245 */
246 if (!(vma->vm_flags & VM_MAYSHARE))
247 page = alloc_buddy_huge_page(vma, addr);
248
249 return page;
David Gibsonb45b5bd2006-03-22 00:08:55 -0800250}
251
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252static int __init hugetlb_init(void)
253{
254 unsigned long i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255
Benjamin Herrenschmidt3c726f82005-11-07 11:06:55 +1100256 if (HPAGE_SHIFT == 0)
257 return 0;
258
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 for (i = 0; i < MAX_NUMNODES; ++i)
260 INIT_LIST_HEAD(&hugepage_freelists[i]);
261
262 for (i = 0; i < max_huge_pages; ++i) {
Nick Piggina4822892006-03-22 00:08:08 -0800263 if (!alloc_fresh_huge_page())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 }
266 max_huge_pages = free_huge_pages = nr_huge_pages = i;
267 printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
268 return 0;
269}
270module_init(hugetlb_init);
271
272static int __init hugetlb_setup(char *s)
273{
274 if (sscanf(s, "%lu", &max_huge_pages) <= 0)
275 max_huge_pages = 0;
276 return 1;
277}
278__setup("hugepages=", hugetlb_setup);
279
Ken Chen8a630112007-05-09 02:33:34 -0700280static unsigned int cpuset_mems_nr(unsigned int *array)
281{
282 int node;
283 unsigned int nr = 0;
284
285 for_each_node_mask(node, cpuset_current_mems_allowed)
286 nr += array[node];
287
288 return nr;
289}
290
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291#ifdef CONFIG_SYSCTL
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292#ifdef CONFIG_HIGHMEM
293static void try_to_free_low(unsigned long count)
294{
Christoph Lameter4415cc82006-09-25 23:31:55 -0700295 int i;
296
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 for (i = 0; i < MAX_NUMNODES; ++i) {
298 struct page *page, *next;
299 list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
300 if (PageHighMem(page))
301 continue;
302 list_del(&page->lru);
303 update_and_free_page(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 free_huge_pages--;
Christoph Lameter4415cc82006-09-25 23:31:55 -0700305 free_huge_pages_node[page_to_nid(page)]--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 if (count >= nr_huge_pages)
307 return;
308 }
309 }
310}
311#else
312static inline void try_to_free_low(unsigned long count)
313{
314}
315#endif
316
Adam Litke7893d1d2007-10-16 01:26:18 -0700317#define persistent_huge_pages (nr_huge_pages - surplus_huge_pages)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318static unsigned long set_max_huge_pages(unsigned long count)
319{
Adam Litke7893d1d2007-10-16 01:26:18 -0700320 unsigned long min_count, ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
Adam Litke7893d1d2007-10-16 01:26:18 -0700322 /*
323 * Increase the pool size
324 * First take pages out of surplus state. Then make up the
325 * remaining difference by allocating fresh huge pages.
326 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 spin_lock(&hugetlb_lock);
Adam Litke7893d1d2007-10-16 01:26:18 -0700328 while (surplus_huge_pages && count > persistent_huge_pages) {
329 if (!adjust_pool_surplus(-1))
330 break;
331 }
332
333 while (count > persistent_huge_pages) {
334 int ret;
335 /*
336 * If this allocation races such that we no longer need the
337 * page, free_huge_page will handle it by freeing the page
338 * and reducing the surplus.
339 */
340 spin_unlock(&hugetlb_lock);
341 ret = alloc_fresh_huge_page();
342 spin_lock(&hugetlb_lock);
343 if (!ret)
344 goto out;
345
346 }
347 if (count >= persistent_huge_pages)
348 goto out;
349
350 /*
351 * Decrease the pool size
352 * First return free pages to the buddy allocator (being careful
353 * to keep enough around to satisfy reservations). Then place
354 * pages into surplus state as needed so the pool will shrink
355 * to the desired size as pages become free.
356 */
357 min_count = max(count, resv_huge_pages);
358 try_to_free_low(min_count);
359 while (min_count < persistent_huge_pages) {
Christoph Lameter5da7ca82006-01-06 00:10:46 -0800360 struct page *page = dequeue_huge_page(NULL, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 if (!page)
362 break;
363 update_and_free_page(page);
364 }
Adam Litke7893d1d2007-10-16 01:26:18 -0700365 while (count < persistent_huge_pages) {
366 if (!adjust_pool_surplus(1))
367 break;
368 }
369out:
370 ret = persistent_huge_pages;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 spin_unlock(&hugetlb_lock);
Adam Litke7893d1d2007-10-16 01:26:18 -0700372 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373}
374
375int hugetlb_sysctl_handler(struct ctl_table *table, int write,
376 struct file *file, void __user *buffer,
377 size_t *length, loff_t *ppos)
378{
379 proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
380 max_huge_pages = set_max_huge_pages(max_huge_pages);
381 return 0;
382}
Mel Gorman396faf02007-07-17 04:03:13 -0700383
384int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
385 struct file *file, void __user *buffer,
386 size_t *length, loff_t *ppos)
387{
388 proc_dointvec(table, write, file, buffer, length, ppos);
389 if (hugepages_treat_as_movable)
390 htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
391 else
392 htlb_alloc_mask = GFP_HIGHUSER;
393 return 0;
394}
395
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396#endif /* CONFIG_SYSCTL */
397
398int hugetlb_report_meminfo(char *buf)
399{
400 return sprintf(buf,
401 "HugePages_Total: %5lu\n"
402 "HugePages_Free: %5lu\n"
Chen, Kenneth Wa43a8c32006-06-23 02:03:15 -0700403 "HugePages_Rsvd: %5lu\n"
Adam Litke7893d1d2007-10-16 01:26:18 -0700404 "HugePages_Surp: %5lu\n"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 "Hugepagesize: %5lu kB\n",
406 nr_huge_pages,
407 free_huge_pages,
Chen, Kenneth Wa43a8c32006-06-23 02:03:15 -0700408 resv_huge_pages,
Adam Litke7893d1d2007-10-16 01:26:18 -0700409 surplus_huge_pages,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 HPAGE_SIZE/1024);
411}
412
413int hugetlb_report_node_meminfo(int nid, char *buf)
414{
415 return sprintf(buf,
416 "Node %d HugePages_Total: %5u\n"
417 "Node %d HugePages_Free: %5u\n",
418 nid, nr_huge_pages_node[nid],
419 nid, free_huge_pages_node[nid]);
420}
421
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
423unsigned long hugetlb_total_pages(void)
424{
425 return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
426}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427
428/*
429 * We cannot handle pagefaults against hugetlb pages at all. They cause
430 * handle_mm_fault() to try to instantiate regular-sized pages in the
431 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get
432 * this far.
433 */
Nick Piggind0217ac2007-07-19 01:47:03 -0700434static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435{
436 BUG();
Nick Piggind0217ac2007-07-19 01:47:03 -0700437 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438}
439
440struct vm_operations_struct hugetlb_vm_ops = {
Nick Piggind0217ac2007-07-19 01:47:03 -0700441 .fault = hugetlb_vm_op_fault,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442};
443
David Gibson1e8f8892006-01-06 00:10:44 -0800444static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
445 int writable)
David Gibson63551ae2005-06-21 17:14:44 -0700446{
447 pte_t entry;
448
David Gibson1e8f8892006-01-06 00:10:44 -0800449 if (writable) {
David Gibson63551ae2005-06-21 17:14:44 -0700450 entry =
451 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
452 } else {
453 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
454 }
455 entry = pte_mkyoung(entry);
456 entry = pte_mkhuge(entry);
457
458 return entry;
459}
460
David Gibson1e8f8892006-01-06 00:10:44 -0800461static void set_huge_ptep_writable(struct vm_area_struct *vma,
462 unsigned long address, pte_t *ptep)
463{
464 pte_t entry;
465
466 entry = pte_mkwrite(pte_mkdirty(*ptep));
Benjamin Herrenschmidt8dab5242007-06-16 10:16:12 -0700467 if (ptep_set_access_flags(vma, address, ptep, entry, 1)) {
468 update_mmu_cache(vma, address, entry);
Benjamin Herrenschmidt8dab5242007-06-16 10:16:12 -0700469 }
David Gibson1e8f8892006-01-06 00:10:44 -0800470}
471
472
David Gibson63551ae2005-06-21 17:14:44 -0700473int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
474 struct vm_area_struct *vma)
475{
476 pte_t *src_pte, *dst_pte, entry;
477 struct page *ptepage;
Hugh Dickins1c598272005-10-19 21:23:43 -0700478 unsigned long addr;
David Gibson1e8f8892006-01-06 00:10:44 -0800479 int cow;
480
481 cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
David Gibson63551ae2005-06-21 17:14:44 -0700482
Hugh Dickins1c598272005-10-19 21:23:43 -0700483 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
Hugh Dickinsc74df322005-10-29 18:16:23 -0700484 src_pte = huge_pte_offset(src, addr);
485 if (!src_pte)
486 continue;
David Gibson63551ae2005-06-21 17:14:44 -0700487 dst_pte = huge_pte_alloc(dst, addr);
488 if (!dst_pte)
489 goto nomem;
Hugh Dickinsc74df322005-10-29 18:16:23 -0700490 spin_lock(&dst->page_table_lock);
Hugh Dickins1c598272005-10-19 21:23:43 -0700491 spin_lock(&src->page_table_lock);
Hugh Dickinsc74df322005-10-29 18:16:23 -0700492 if (!pte_none(*src_pte)) {
David Gibson1e8f8892006-01-06 00:10:44 -0800493 if (cow)
494 ptep_set_wrprotect(src, addr, src_pte);
Hugh Dickins1c598272005-10-19 21:23:43 -0700495 entry = *src_pte;
496 ptepage = pte_page(entry);
497 get_page(ptepage);
Hugh Dickins1c598272005-10-19 21:23:43 -0700498 set_huge_pte_at(dst, addr, dst_pte, entry);
499 }
500 spin_unlock(&src->page_table_lock);
Hugh Dickinsc74df322005-10-29 18:16:23 -0700501 spin_unlock(&dst->page_table_lock);
David Gibson63551ae2005-06-21 17:14:44 -0700502 }
503 return 0;
504
505nomem:
506 return -ENOMEM;
507}
508
Chen, Kenneth W502717f2006-10-11 01:20:46 -0700509void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
510 unsigned long end)
David Gibson63551ae2005-06-21 17:14:44 -0700511{
512 struct mm_struct *mm = vma->vm_mm;
513 unsigned long address;
David Gibsonc7546f82005-08-05 11:59:35 -0700514 pte_t *ptep;
David Gibson63551ae2005-06-21 17:14:44 -0700515 pte_t pte;
516 struct page *page;
Chen, Kenneth Wfe1668a2006-10-04 02:15:24 -0700517 struct page *tmp;
Chen, Kenneth Wc0a499c2006-12-06 20:31:39 -0800518 /*
519 * A page gathering list, protected by per file i_mmap_lock. The
520 * lock is used to avoid list corruption from multiple unmapping
521 * of the same page since we are using page->lru.
522 */
Chen, Kenneth Wfe1668a2006-10-04 02:15:24 -0700523 LIST_HEAD(page_list);
David Gibson63551ae2005-06-21 17:14:44 -0700524
525 WARN_ON(!is_vm_hugetlb_page(vma));
526 BUG_ON(start & ~HPAGE_MASK);
527 BUG_ON(end & ~HPAGE_MASK);
528
Hugh Dickins508034a2005-10-29 18:16:30 -0700529 spin_lock(&mm->page_table_lock);
David Gibson63551ae2005-06-21 17:14:44 -0700530 for (address = start; address < end; address += HPAGE_SIZE) {
David Gibsonc7546f82005-08-05 11:59:35 -0700531 ptep = huge_pte_offset(mm, address);
Adam Litke4c887262005-10-29 18:16:46 -0700532 if (!ptep)
David Gibsonc7546f82005-08-05 11:59:35 -0700533 continue;
534
Chen, Kenneth W39dde652006-12-06 20:32:03 -0800535 if (huge_pmd_unshare(mm, &address, ptep))
536 continue;
537
David Gibsonc7546f82005-08-05 11:59:35 -0700538 pte = huge_ptep_get_and_clear(mm, address, ptep);
David Gibson63551ae2005-06-21 17:14:44 -0700539 if (pte_none(pte))
540 continue;
David Gibsonc7546f82005-08-05 11:59:35 -0700541
David Gibson63551ae2005-06-21 17:14:44 -0700542 page = pte_page(pte);
Ken Chen6649a382007-02-08 14:20:27 -0800543 if (pte_dirty(pte))
544 set_page_dirty(page);
Chen, Kenneth Wfe1668a2006-10-04 02:15:24 -0700545 list_add(&page->lru, &page_list);
David Gibson63551ae2005-06-21 17:14:44 -0700546 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 spin_unlock(&mm->page_table_lock);
Hugh Dickins508034a2005-10-29 18:16:30 -0700548 flush_tlb_range(vma, start, end);
Chen, Kenneth Wfe1668a2006-10-04 02:15:24 -0700549 list_for_each_entry_safe(page, tmp, &page_list, lru) {
550 list_del(&page->lru);
551 put_page(page);
552 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553}
David Gibson63551ae2005-06-21 17:14:44 -0700554
Chen, Kenneth W502717f2006-10-11 01:20:46 -0700555void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
556 unsigned long end)
557{
558 /*
559 * It is undesirable to test vma->vm_file as it should be non-null
560 * for valid hugetlb area. However, vm_file will be NULL in the error
561 * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
562 * do_mmap_pgoff() nullifies vma->vm_file before calling this function
563 * to clean up. Since no pte has actually been setup, it is safe to
564 * do nothing in this case.
565 */
566 if (vma->vm_file) {
567 spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
568 __unmap_hugepage_range(vma, start, end);
569 spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
570 }
571}
572
David Gibson1e8f8892006-01-06 00:10:44 -0800573static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
574 unsigned long address, pte_t *ptep, pte_t pte)
575{
576 struct page *old_page, *new_page;
David Gibson79ac6ba2006-03-22 00:08:51 -0800577 int avoidcopy;
David Gibson1e8f8892006-01-06 00:10:44 -0800578
579 old_page = pte_page(pte);
580
581 /* If no-one else is actually using this page, avoid the copy
582 * and just make the page writable */
583 avoidcopy = (page_count(old_page) == 1);
584 if (avoidcopy) {
585 set_huge_ptep_writable(vma, address, ptep);
Nick Piggin83c54072007-07-19 01:47:05 -0700586 return 0;
David Gibson1e8f8892006-01-06 00:10:44 -0800587 }
588
589 page_cache_get(old_page);
Christoph Lameter5da7ca82006-01-06 00:10:46 -0800590 new_page = alloc_huge_page(vma, address);
David Gibson1e8f8892006-01-06 00:10:44 -0800591
592 if (!new_page) {
593 page_cache_release(old_page);
Christoph Lameter0df420d2006-02-07 12:58:30 -0800594 return VM_FAULT_OOM;
David Gibson1e8f8892006-01-06 00:10:44 -0800595 }
596
597 spin_unlock(&mm->page_table_lock);
Atsushi Nemoto9de455b2006-12-12 17:14:55 +0000598 copy_huge_page(new_page, old_page, address, vma);
David Gibson1e8f8892006-01-06 00:10:44 -0800599 spin_lock(&mm->page_table_lock);
600
601 ptep = huge_pte_offset(mm, address & HPAGE_MASK);
602 if (likely(pte_same(*ptep, pte))) {
603 /* Break COW */
604 set_huge_pte_at(mm, address, ptep,
605 make_huge_pte(vma, new_page, 1));
606 /* Make the old page be freed below */
607 new_page = old_page;
608 }
609 page_cache_release(new_page);
610 page_cache_release(old_page);
Nick Piggin83c54072007-07-19 01:47:05 -0700611 return 0;
David Gibson1e8f8892006-01-06 00:10:44 -0800612}
613
Robert P. J. Daya1ed3dd2007-07-17 04:03:33 -0700614static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
David Gibson1e8f8892006-01-06 00:10:44 -0800615 unsigned long address, pte_t *ptep, int write_access)
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100616{
617 int ret = VM_FAULT_SIGBUS;
Adam Litke4c887262005-10-29 18:16:46 -0700618 unsigned long idx;
619 unsigned long size;
Adam Litke4c887262005-10-29 18:16:46 -0700620 struct page *page;
621 struct address_space *mapping;
David Gibson1e8f8892006-01-06 00:10:44 -0800622 pte_t new_pte;
Adam Litke4c887262005-10-29 18:16:46 -0700623
Adam Litke4c887262005-10-29 18:16:46 -0700624 mapping = vma->vm_file->f_mapping;
625 idx = ((address - vma->vm_start) >> HPAGE_SHIFT)
626 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
627
628 /*
629 * Use page lock to guard against racing truncation
630 * before we get page_table_lock.
631 */
Christoph Lameter6bda6662006-01-06 00:10:49 -0800632retry:
633 page = find_lock_page(mapping, idx);
634 if (!page) {
Hugh Dickinsebed4bf2006-10-28 10:38:43 -0700635 size = i_size_read(mapping->host) >> HPAGE_SHIFT;
636 if (idx >= size)
637 goto out;
Christoph Lameter6bda6662006-01-06 00:10:49 -0800638 if (hugetlb_get_quota(mapping))
639 goto out;
640 page = alloc_huge_page(vma, address);
641 if (!page) {
642 hugetlb_put_quota(mapping);
Christoph Lameter0df420d2006-02-07 12:58:30 -0800643 ret = VM_FAULT_OOM;
Christoph Lameter6bda6662006-01-06 00:10:49 -0800644 goto out;
645 }
David Gibson79ac6ba2006-03-22 00:08:51 -0800646 clear_huge_page(page, address);
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100647
Christoph Lameter6bda6662006-01-06 00:10:49 -0800648 if (vma->vm_flags & VM_SHARED) {
649 int err;
650
651 err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
652 if (err) {
653 put_page(page);
654 hugetlb_put_quota(mapping);
655 if (err == -EEXIST)
656 goto retry;
657 goto out;
658 }
659 } else
660 lock_page(page);
661 }
David Gibson1e8f8892006-01-06 00:10:44 -0800662
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100663 spin_lock(&mm->page_table_lock);
Adam Litke4c887262005-10-29 18:16:46 -0700664 size = i_size_read(mapping->host) >> HPAGE_SHIFT;
665 if (idx >= size)
666 goto backout;
667
Nick Piggin83c54072007-07-19 01:47:05 -0700668 ret = 0;
Adam Litke86e52162006-01-06 00:10:43 -0800669 if (!pte_none(*ptep))
Adam Litke4c887262005-10-29 18:16:46 -0700670 goto backout;
671
David Gibson1e8f8892006-01-06 00:10:44 -0800672 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
673 && (vma->vm_flags & VM_SHARED)));
674 set_huge_pte_at(mm, address, ptep, new_pte);
675
676 if (write_access && !(vma->vm_flags & VM_SHARED)) {
677 /* Optimization, do the COW without a second fault */
678 ret = hugetlb_cow(mm, vma, address, ptep, new_pte);
679 }
680
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100681 spin_unlock(&mm->page_table_lock);
Adam Litke4c887262005-10-29 18:16:46 -0700682 unlock_page(page);
683out:
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100684 return ret;
Adam Litke4c887262005-10-29 18:16:46 -0700685
686backout:
687 spin_unlock(&mm->page_table_lock);
688 hugetlb_put_quota(mapping);
689 unlock_page(page);
690 put_page(page);
691 goto out;
Hugh Dickinsac9b9c62005-10-20 16:24:28 +0100692}
693
Adam Litke86e52162006-01-06 00:10:43 -0800694int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
695 unsigned long address, int write_access)
696{
697 pte_t *ptep;
698 pte_t entry;
David Gibson1e8f8892006-01-06 00:10:44 -0800699 int ret;
David Gibson3935baa2006-03-22 00:08:53 -0800700 static DEFINE_MUTEX(hugetlb_instantiation_mutex);
Adam Litke86e52162006-01-06 00:10:43 -0800701
702 ptep = huge_pte_alloc(mm, address);
703 if (!ptep)
704 return VM_FAULT_OOM;
705
David Gibson3935baa2006-03-22 00:08:53 -0800706 /*
707 * Serialize hugepage allocation and instantiation, so that we don't
708 * get spurious allocation failures if two CPUs race to instantiate
709 * the same page in the page cache.
710 */
711 mutex_lock(&hugetlb_instantiation_mutex);
Adam Litke86e52162006-01-06 00:10:43 -0800712 entry = *ptep;
David Gibson3935baa2006-03-22 00:08:53 -0800713 if (pte_none(entry)) {
714 ret = hugetlb_no_page(mm, vma, address, ptep, write_access);
715 mutex_unlock(&hugetlb_instantiation_mutex);
716 return ret;
717 }
Adam Litke86e52162006-01-06 00:10:43 -0800718
Nick Piggin83c54072007-07-19 01:47:05 -0700719 ret = 0;
David Gibson1e8f8892006-01-06 00:10:44 -0800720
721 spin_lock(&mm->page_table_lock);
722 /* Check for a racing update before calling hugetlb_cow */
723 if (likely(pte_same(entry, *ptep)))
724 if (write_access && !pte_write(entry))
725 ret = hugetlb_cow(mm, vma, address, ptep, entry);
726 spin_unlock(&mm->page_table_lock);
David Gibson3935baa2006-03-22 00:08:53 -0800727 mutex_unlock(&hugetlb_instantiation_mutex);
David Gibson1e8f8892006-01-06 00:10:44 -0800728
729 return ret;
Adam Litke86e52162006-01-06 00:10:43 -0800730}
731
David Gibson63551ae2005-06-21 17:14:44 -0700732int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
733 struct page **pages, struct vm_area_struct **vmas,
734 unsigned long *position, int *length, int i)
735{
Chen, Kenneth Wd5d4b0a2006-03-22 00:09:03 -0800736 unsigned long pfn_offset;
737 unsigned long vaddr = *position;
David Gibson63551ae2005-06-21 17:14:44 -0700738 int remainder = *length;
739
Hugh Dickins1c598272005-10-19 21:23:43 -0700740 spin_lock(&mm->page_table_lock);
David Gibson63551ae2005-06-21 17:14:44 -0700741 while (vaddr < vma->vm_end && remainder) {
Adam Litke4c887262005-10-29 18:16:46 -0700742 pte_t *pte;
743 struct page *page;
744
745 /*
746 * Some archs (sparc64, sh*) have multiple pte_ts to
747 * each hugepage. We have to make * sure we get the
748 * first, for the page indexing below to work.
749 */
750 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
751
752 if (!pte || pte_none(*pte)) {
753 int ret;
754
755 spin_unlock(&mm->page_table_lock);
756 ret = hugetlb_fault(mm, vma, vaddr, 0);
757 spin_lock(&mm->page_table_lock);
Adam Litkea89182c2007-08-22 14:01:51 -0700758 if (!(ret & VM_FAULT_ERROR))
Adam Litke4c887262005-10-29 18:16:46 -0700759 continue;
760
761 remainder = 0;
762 if (!i)
763 i = -EFAULT;
764 break;
765 }
David Gibson63551ae2005-06-21 17:14:44 -0700766
Chen, Kenneth Wd5d4b0a2006-03-22 00:09:03 -0800767 pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT;
768 page = pte_page(*pte);
769same_page:
Chen, Kenneth Wd6692182006-03-31 02:29:57 -0800770 if (pages) {
771 get_page(page);
Chen, Kenneth Wd5d4b0a2006-03-22 00:09:03 -0800772 pages[i] = page + pfn_offset;
Chen, Kenneth Wd6692182006-03-31 02:29:57 -0800773 }
David Gibson63551ae2005-06-21 17:14:44 -0700774
775 if (vmas)
776 vmas[i] = vma;
777
778 vaddr += PAGE_SIZE;
Chen, Kenneth Wd5d4b0a2006-03-22 00:09:03 -0800779 ++pfn_offset;
David Gibson63551ae2005-06-21 17:14:44 -0700780 --remainder;
781 ++i;
Chen, Kenneth Wd5d4b0a2006-03-22 00:09:03 -0800782 if (vaddr < vma->vm_end && remainder &&
783 pfn_offset < HPAGE_SIZE/PAGE_SIZE) {
784 /*
785 * We use pfn_offset to avoid touching the pageframes
786 * of this compound page.
787 */
788 goto same_page;
789 }
David Gibson63551ae2005-06-21 17:14:44 -0700790 }
Hugh Dickins1c598272005-10-19 21:23:43 -0700791 spin_unlock(&mm->page_table_lock);
David Gibson63551ae2005-06-21 17:14:44 -0700792 *length = remainder;
793 *position = vaddr;
794
795 return i;
796}
Zhang, Yanmin8f860592006-03-22 00:08:50 -0800797
798void hugetlb_change_protection(struct vm_area_struct *vma,
799 unsigned long address, unsigned long end, pgprot_t newprot)
800{
801 struct mm_struct *mm = vma->vm_mm;
802 unsigned long start = address;
803 pte_t *ptep;
804 pte_t pte;
805
806 BUG_ON(address >= end);
807 flush_cache_range(vma, address, end);
808
Chen, Kenneth W39dde652006-12-06 20:32:03 -0800809 spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
Zhang, Yanmin8f860592006-03-22 00:08:50 -0800810 spin_lock(&mm->page_table_lock);
811 for (; address < end; address += HPAGE_SIZE) {
812 ptep = huge_pte_offset(mm, address);
813 if (!ptep)
814 continue;
Chen, Kenneth W39dde652006-12-06 20:32:03 -0800815 if (huge_pmd_unshare(mm, &address, ptep))
816 continue;
Zhang, Yanmin8f860592006-03-22 00:08:50 -0800817 if (!pte_none(*ptep)) {
818 pte = huge_ptep_get_and_clear(mm, address, ptep);
819 pte = pte_mkhuge(pte_modify(pte, newprot));
820 set_huge_pte_at(mm, address, ptep, pte);
Zhang, Yanmin8f860592006-03-22 00:08:50 -0800821 }
822 }
823 spin_unlock(&mm->page_table_lock);
Chen, Kenneth W39dde652006-12-06 20:32:03 -0800824 spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
Zhang, Yanmin8f860592006-03-22 00:08:50 -0800825
826 flush_tlb_range(vma, start, end);
827}
828
Chen, Kenneth Wa43a8c32006-06-23 02:03:15 -0700829struct file_region {
830 struct list_head link;
831 long from;
832 long to;
833};
834
835static long region_add(struct list_head *head, long f, long t)
836{
837 struct file_region *rg, *nrg, *trg;
838
839 /* Locate the region we are either in or before. */
840 list_for_each_entry(rg, head, link)
841 if (f <= rg->to)
842 break;
843
844 /* Round our left edge to the current segment if it encloses us. */
845 if (f > rg->from)
846 f = rg->from;
847
848 /* Check for and consume any regions we now overlap with. */
849 nrg = rg;
850 list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
851 if (&rg->link == head)
852 break;
853 if (rg->from > t)
854 break;
855
856 /* If this area reaches higher then extend our area to
857 * include it completely. If this is not the first area
858 * which we intend to reuse, free it. */
859 if (rg->to > t)
860 t = rg->to;
861 if (rg != nrg) {
862 list_del(&rg->link);
863 kfree(rg);
864 }
865 }
866 nrg->from = f;
867 nrg->to = t;
868 return 0;
869}
870
871static long region_chg(struct list_head *head, long f, long t)
872{
873 struct file_region *rg, *nrg;
874 long chg = 0;
875
876 /* Locate the region we are before or in. */
877 list_for_each_entry(rg, head, link)
878 if (f <= rg->to)
879 break;
880
881 /* If we are below the current region then a new region is required.
882 * Subtle, allocate a new region at the position but make it zero
883 * size such that we can guarentee to record the reservation. */
884 if (&rg->link == head || t < rg->from) {
885 nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
886 if (nrg == 0)
887 return -ENOMEM;
888 nrg->from = f;
889 nrg->to = f;
890 INIT_LIST_HEAD(&nrg->link);
891 list_add(&nrg->link, rg->link.prev);
892
893 return t - f;
894 }
895
896 /* Round our left edge to the current segment if it encloses us. */
897 if (f > rg->from)
898 f = rg->from;
899 chg = t - f;
900
901 /* Check for and consume any regions we now overlap with. */
902 list_for_each_entry(rg, rg->link.prev, link) {
903 if (&rg->link == head)
904 break;
905 if (rg->from > t)
906 return chg;
907
908 /* We overlap with this area, if it extends futher than
909 * us then we must extend ourselves. Account for its
910 * existing reservation. */
911 if (rg->to > t) {
912 chg += rg->to - t;
913 t = rg->to;
914 }
915 chg -= rg->to - rg->from;
916 }
917 return chg;
918}
919
920static long region_truncate(struct list_head *head, long end)
921{
922 struct file_region *rg, *trg;
923 long chg = 0;
924
925 /* Locate the region we are either in or before. */
926 list_for_each_entry(rg, head, link)
927 if (end <= rg->to)
928 break;
929 if (&rg->link == head)
930 return 0;
931
932 /* If we are in the middle of a region then adjust it. */
933 if (end > rg->from) {
934 chg = rg->to - end;
935 rg->to = end;
936 rg = list_entry(rg->link.next, typeof(*rg), link);
937 }
938
939 /* Drop any remaining regions. */
940 list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
941 if (&rg->link == head)
942 break;
943 chg += rg->to - rg->from;
944 list_del(&rg->link);
945 kfree(rg);
946 }
947 return chg;
948}
949
950static int hugetlb_acct_memory(long delta)
951{
952 int ret = -ENOMEM;
953
954 spin_lock(&hugetlb_lock);
955 if ((delta + resv_huge_pages) <= free_huge_pages) {
956 resv_huge_pages += delta;
957 ret = 0;
958 }
959 spin_unlock(&hugetlb_lock);
960 return ret;
961}
962
963int hugetlb_reserve_pages(struct inode *inode, long from, long to)
964{
965 long ret, chg;
966
967 chg = region_chg(&inode->i_mapping->private_list, from, to);
968 if (chg < 0)
969 return chg;
Ken Chen8a630112007-05-09 02:33:34 -0700970 /*
971 * When cpuset is configured, it breaks the strict hugetlb page
972 * reservation as the accounting is done on a global variable. Such
973 * reservation is completely rubbish in the presence of cpuset because
974 * the reservation is not checked against page availability for the
975 * current cpuset. Application can still potentially OOM'ed by kernel
976 * with lack of free htlb page in cpuset that the task is in.
977 * Attempt to enforce strict accounting with cpuset is almost
978 * impossible (or too ugly) because cpuset is too fluid that
979 * task or memory node can be dynamically moved between cpusets.
980 *
981 * The change of semantics for shared hugetlb mapping with cpuset is
982 * undesirable. However, in order to preserve some of the semantics,
983 * we fall back to check against current free page availability as
984 * a best attempt and hopefully to minimize the impact of changing
985 * semantics that cpuset has.
986 */
987 if (chg > cpuset_mems_nr(free_huge_pages_node))
988 return -ENOMEM;
989
Chen, Kenneth Wa43a8c32006-06-23 02:03:15 -0700990 ret = hugetlb_acct_memory(chg);
991 if (ret < 0)
992 return ret;
993 region_add(&inode->i_mapping->private_list, from, to);
994 return 0;
995}
996
997void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
998{
999 long chg = region_truncate(&inode->i_mapping->private_list, offset);
1000 hugetlb_acct_memory(freed - chg);
1001}