blob: 0ea0994ed974e0185bad810cc1757cc4f20f5063 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * PPC64 (POWER4) Huge TLB Page Support for Kernel.
3 *
4 * Copyright (C) 2003 David Gibson, IBM Corporation.
5 *
6 * Based on the IA-32 version:
7 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
8 */
9
10#include <linux/init.h>
11#include <linux/fs.h>
12#include <linux/mm.h>
13#include <linux/hugetlb.h>
14#include <linux/pagemap.h>
15#include <linux/smp_lock.h>
16#include <linux/slab.h>
17#include <linux/err.h>
18#include <linux/sysctl.h>
19#include <asm/mman.h>
20#include <asm/pgalloc.h>
21#include <asm/tlb.h>
22#include <asm/tlbflush.h>
23#include <asm/mmu_context.h>
24#include <asm/machdep.h>
25#include <asm/cputable.h>
26#include <asm/tlb.h>
27
28#include <linux/sysctl.h>
29
David Gibsonc594ada2005-08-11 16:55:21 +100030#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT)
31#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT)
32
David Gibsone28f7fa2005-08-05 19:39:06 +100033/* Modelled after find_linux_pte() */
34pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -070035{
David Gibsone28f7fa2005-08-05 19:39:06 +100036 pgd_t *pg;
37 pud_t *pu;
38 pmd_t *pm;
39 pte_t *pt;
Linus Torvalds1da177e2005-04-16 15:20:36 -070040
Linus Torvalds1da177e2005-04-16 15:20:36 -070041 BUG_ON(! in_hugepage_area(mm->context, addr));
42
David Gibsone28f7fa2005-08-05 19:39:06 +100043 addr &= HPAGE_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
David Gibsone28f7fa2005-08-05 19:39:06 +100045 pg = pgd_offset(mm, addr);
46 if (!pgd_none(*pg)) {
47 pu = pud_offset(pg, addr);
48 if (!pud_none(*pu)) {
49 pm = pmd_offset(pu, addr);
50 pt = (pte_t *)pm;
51 BUG_ON(!pmd_none(*pm)
52 && !(pte_present(*pt) && pte_huge(*pt)));
53 return pt;
Linus Torvalds1da177e2005-04-16 15:20:36 -070054 }
55 }
56
David Gibsone28f7fa2005-08-05 19:39:06 +100057 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -070058}
59
David Gibson63551ae2005-06-21 17:14:44 -070060pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -070061{
David Gibsone28f7fa2005-08-05 19:39:06 +100062 pgd_t *pg;
63 pud_t *pu;
64 pmd_t *pm;
65 pte_t *pt;
Linus Torvalds1da177e2005-04-16 15:20:36 -070066
67 BUG_ON(! in_hugepage_area(mm->context, addr));
68
David Gibsone28f7fa2005-08-05 19:39:06 +100069 addr &= HPAGE_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
David Gibsone28f7fa2005-08-05 19:39:06 +100071 pg = pgd_offset(mm, addr);
72 pu = pud_alloc(mm, pg, addr);
73
74 if (pu) {
75 pm = pmd_alloc(mm, pu, addr);
76 if (pm) {
77 pt = (pte_t *)pm;
78 BUG_ON(!pmd_none(*pm)
79 && !(pte_present(*pt) && pte_huge(*pt)));
80 return pt;
81 }
82 }
83
84 return NULL;
85}
86
87#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
88
89void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
90 pte_t *ptep, pte_t pte)
91{
92 int i;
93
94 if (pte_present(*ptep)) {
95 pte_clear(mm, addr, ptep);
96 flush_tlb_pending();
97 }
98
99 for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
100 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
101 ptep++;
102 }
103}
104
105pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
106 pte_t *ptep)
107{
108 unsigned long old = pte_update(ptep, ~0UL);
109 int i;
110
111 if (old & _PAGE_HASHPTE)
112 hpte_update(mm, addr, old, 0);
113
114 for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
115 ptep[i] = __pte(0);
116
117 return __pte(old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118}
119
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120/*
121 * This function checks for proper alignment of input addr and len parameters.
122 */
123int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
124{
125 if (len & ~HPAGE_MASK)
126 return -EINVAL;
127 if (addr & ~HPAGE_MASK)
128 return -EINVAL;
129 if (! (within_hugepage_low_range(addr, len)
130 || within_hugepage_high_range(addr, len)) )
131 return -EINVAL;
132 return 0;
133}
134
David Gibsonc594ada2005-08-11 16:55:21 +1000135static void flush_low_segments(void *parm)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136{
David Gibsonc594ada2005-08-11 16:55:21 +1000137 u16 areas = (unsigned long) parm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 unsigned long i;
139
140 asm volatile("isync" : : : "memory");
141
David Gibsonc594ada2005-08-11 16:55:21 +1000142 BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS);
143
144 for (i = 0; i < NUM_LOW_AREAS; i++) {
145 if (! (areas & (1U << i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 continue;
David Gibson14b34662005-09-06 14:59:47 +1000147 asm volatile("slbie %0"
148 : : "r" ((i << SID_SHIFT) | SLBIE_C));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 }
150
151 asm volatile("isync" : : : "memory");
152}
153
David Gibsonc594ada2005-08-11 16:55:21 +1000154static void flush_high_segments(void *parm)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155{
David Gibsonc594ada2005-08-11 16:55:21 +1000156 u16 areas = (unsigned long) parm;
157 unsigned long i, j;
158
159 asm volatile("isync" : : : "memory");
160
161 BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS);
162
163 for (i = 0; i < NUM_HIGH_AREAS; i++) {
164 if (! (areas & (1U << i)))
165 continue;
166 for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++)
167 asm volatile("slbie %0"
David Gibson14b34662005-09-06 14:59:47 +1000168 :: "r" (((i << HTLB_AREA_SHIFT)
169 + (j << SID_SHIFT)) | SLBIE_C));
David Gibsonc594ada2005-08-11 16:55:21 +1000170 }
171
172 asm volatile("isync" : : : "memory");
173}
174
175static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area)
176{
177 unsigned long start = area << SID_SHIFT;
178 unsigned long end = (area+1) << SID_SHIFT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 struct vm_area_struct *vma;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180
David Gibsonc594ada2005-08-11 16:55:21 +1000181 BUG_ON(area >= NUM_LOW_AREAS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
183 /* Check no VMAs are in the region */
184 vma = find_vma(mm, start);
185 if (vma && (vma->vm_start < end))
186 return -EBUSY;
187
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 return 0;
189}
190
David Gibsonc594ada2005-08-11 16:55:21 +1000191static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area)
192{
193 unsigned long start = area << HTLB_AREA_SHIFT;
194 unsigned long end = (area+1) << HTLB_AREA_SHIFT;
195 struct vm_area_struct *vma;
196
197 BUG_ON(area >= NUM_HIGH_AREAS);
198
199 /* Check no VMAs are in the region */
200 vma = find_vma(mm, start);
201 if (vma && (vma->vm_start < end))
202 return -EBUSY;
203
204 return 0;
205}
206
207static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208{
209 unsigned long i;
210
David Gibsonc594ada2005-08-11 16:55:21 +1000211 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS);
212 BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS);
213
214 newareas &= ~(mm->context.low_htlb_areas);
215 if (! newareas)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 return 0; /* The segments we want are already open */
217
David Gibsonc594ada2005-08-11 16:55:21 +1000218 for (i = 0; i < NUM_LOW_AREAS; i++)
219 if ((1 << i) & newareas)
220 if (prepare_low_area_for_htlb(mm, i) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 return -EBUSY;
222
David Gibsonc594ada2005-08-11 16:55:21 +1000223 mm->context.low_htlb_areas |= newareas;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224
225 /* update the paca copy of the context struct */
226 get_paca()->context = mm->context;
227
228 /* the context change must make it to memory before the flush,
229 * so that further SLB misses do the right thing. */
230 mb();
David Gibsonc594ada2005-08-11 16:55:21 +1000231 on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1);
232
233 return 0;
234}
235
236static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
237{
238 unsigned long i;
239
240 BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS);
241 BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8)
242 != NUM_HIGH_AREAS);
243
244 newareas &= ~(mm->context.high_htlb_areas);
245 if (! newareas)
246 return 0; /* The areas we want are already open */
247
248 for (i = 0; i < NUM_HIGH_AREAS; i++)
249 if ((1 << i) & newareas)
250 if (prepare_high_area_for_htlb(mm, i) != 0)
251 return -EBUSY;
252
253 mm->context.high_htlb_areas |= newareas;
254
255 /* update the paca copy of the context struct */
256 get_paca()->context = mm->context;
257
258 /* the context change must make it to memory before the flush,
259 * so that further SLB misses do the right thing. */
260 mb();
261 on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262
263 return 0;
264}
265
266int prepare_hugepage_range(unsigned long addr, unsigned long len)
267{
David Gibsonc594ada2005-08-11 16:55:21 +1000268 int err;
269
270 if ( (addr+len) < addr )
271 return -EINVAL;
272
273 if ((addr + len) < 0x100000000UL)
274 err = open_low_hpage_areas(current->mm,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 LOW_ESID_MASK(addr, len));
David Gibsonc594ada2005-08-11 16:55:21 +1000276 else
277 err = open_high_hpage_areas(current->mm,
278 HTLB_AREA_MASK(addr, len));
279 if (err) {
280 printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)"
281 " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n",
282 addr, len,
283 LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 return err;
285 }
286
David Gibsonc594ada2005-08-11 16:55:21 +1000287 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288}
289
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290struct page *
291follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
292{
293 pte_t *ptep;
294 struct page *page;
295
296 if (! in_hugepage_area(mm->context, address))
297 return ERR_PTR(-EINVAL);
298
299 ptep = huge_pte_offset(mm, address);
300 page = pte_page(*ptep);
301 if (page)
302 page += (address % HPAGE_SIZE) / PAGE_SIZE;
303
304 return page;
305}
306
307int pmd_huge(pmd_t pmd)
308{
309 return 0;
310}
311
312struct page *
313follow_huge_pmd(struct mm_struct *mm, unsigned long address,
314 pmd_t *pmd, int write)
315{
316 BUG();
317 return NULL;
318}
319
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320/* Because we have an exclusive hugepage region which lies within the
321 * normal user address space, we have to take special measures to make
322 * non-huge mmap()s evade the hugepage reserved regions. */
323unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
324 unsigned long len, unsigned long pgoff,
325 unsigned long flags)
326{
327 struct mm_struct *mm = current->mm;
328 struct vm_area_struct *vma;
329 unsigned long start_addr;
330
331 if (len > TASK_SIZE)
332 return -ENOMEM;
333
334 if (addr) {
335 addr = PAGE_ALIGN(addr);
336 vma = find_vma(mm, addr);
337 if (((TASK_SIZE - len) >= addr)
338 && (!vma || (addr+len) <= vma->vm_start)
339 && !is_hugepage_only_range(mm, addr,len))
340 return addr;
341 }
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700342 if (len > mm->cached_hole_size) {
343 start_addr = addr = mm->free_area_cache;
344 } else {
345 start_addr = addr = TASK_UNMAPPED_BASE;
346 mm->cached_hole_size = 0;
347 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348
349full_search:
350 vma = find_vma(mm, addr);
351 while (TASK_SIZE - len >= addr) {
352 BUG_ON(vma && (addr >= vma->vm_end));
353
354 if (touches_hugepage_low_range(mm, addr, len)) {
355 addr = ALIGN(addr+1, 1<<SID_SHIFT);
356 vma = find_vma(mm, addr);
357 continue;
358 }
David Gibsonc594ada2005-08-11 16:55:21 +1000359 if (touches_hugepage_high_range(mm, addr, len)) {
360 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 vma = find_vma(mm, addr);
362 continue;
363 }
364 if (!vma || addr + len <= vma->vm_start) {
365 /*
366 * Remember the place where we stopped the search:
367 */
368 mm->free_area_cache = addr + len;
369 return addr;
370 }
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700371 if (addr + mm->cached_hole_size < vma->vm_start)
372 mm->cached_hole_size = vma->vm_start - addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 addr = vma->vm_end;
374 vma = vma->vm_next;
375 }
376
377 /* Make sure we didn't miss any holes */
378 if (start_addr != TASK_UNMAPPED_BASE) {
379 start_addr = addr = TASK_UNMAPPED_BASE;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700380 mm->cached_hole_size = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 goto full_search;
382 }
383 return -ENOMEM;
384}
385
386/*
387 * This mmap-allocator allocates new areas top-down from below the
388 * stack's low limit (the base):
389 *
390 * Because we have an exclusive hugepage region which lies within the
391 * normal user address space, we have to take special measures to make
392 * non-huge mmap()s evade the hugepage reserved regions.
393 */
394unsigned long
395arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
396 const unsigned long len, const unsigned long pgoff,
397 const unsigned long flags)
398{
399 struct vm_area_struct *vma, *prev_vma;
400 struct mm_struct *mm = current->mm;
401 unsigned long base = mm->mmap_base, addr = addr0;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700402 unsigned long largest_hole = mm->cached_hole_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 int first_time = 1;
404
405 /* requested length too big for entire address space */
406 if (len > TASK_SIZE)
407 return -ENOMEM;
408
409 /* dont allow allocations above current base */
410 if (mm->free_area_cache > base)
411 mm->free_area_cache = base;
412
413 /* requesting a specific address */
414 if (addr) {
415 addr = PAGE_ALIGN(addr);
416 vma = find_vma(mm, addr);
417 if (TASK_SIZE - len >= addr &&
418 (!vma || addr + len <= vma->vm_start)
419 && !is_hugepage_only_range(mm, addr,len))
420 return addr;
421 }
422
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700423 if (len <= largest_hole) {
424 largest_hole = 0;
425 mm->free_area_cache = base;
426 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427try_again:
428 /* make sure it can fit in the remaining address space */
429 if (mm->free_area_cache < len)
430 goto fail;
431
432 /* either no address requested or cant fit in requested address hole */
433 addr = (mm->free_area_cache - len) & PAGE_MASK;
434 do {
435hugepage_recheck:
436 if (touches_hugepage_low_range(mm, addr, len)) {
437 addr = (addr & ((~0) << SID_SHIFT)) - len;
438 goto hugepage_recheck;
David Gibsonc594ada2005-08-11 16:55:21 +1000439 } else if (touches_hugepage_high_range(mm, addr, len)) {
440 addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len;
441 goto hugepage_recheck;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 }
443
444 /*
445 * Lookup failure means no vma is above this address,
446 * i.e. return with success:
447 */
448 if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
449 return addr;
450
451 /*
452 * new region fits between prev_vma->vm_end and
453 * vma->vm_start, use it:
454 */
455 if (addr+len <= vma->vm_start &&
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700456 (!prev_vma || (addr >= prev_vma->vm_end))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 /* remember the address as a hint for next time */
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700458 mm->cached_hole_size = largest_hole;
459 return (mm->free_area_cache = addr);
460 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 /* pull free_area_cache down to the first hole */
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700462 if (mm->free_area_cache == vma->vm_end) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 mm->free_area_cache = vma->vm_start;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700464 mm->cached_hole_size = largest_hole;
465 }
466 }
467
468 /* remember the largest hole we saw so far */
469 if (addr + largest_hole < vma->vm_start)
470 largest_hole = vma->vm_start - addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471
472 /* try just below the current vma->vm_start */
473 addr = vma->vm_start-len;
474 } while (len <= vma->vm_start);
475
476fail:
477 /*
478 * if hint left us with no space for the requested
479 * mapping then try again:
480 */
481 if (first_time) {
482 mm->free_area_cache = base;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700483 largest_hole = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 first_time = 0;
485 goto try_again;
486 }
487 /*
488 * A failed mmap() very likely causes application failure,
489 * so fall back to the bottom-up function here. This scenario
490 * can happen with large stack limits and large mmap()
491 * allocations.
492 */
493 mm->free_area_cache = TASK_UNMAPPED_BASE;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700494 mm->cached_hole_size = ~0UL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
496 /*
497 * Restore the topdown base:
498 */
499 mm->free_area_cache = base;
Wolfgang Wander1363c3c2005-06-21 17:14:49 -0700500 mm->cached_hole_size = ~0UL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
502 return addr;
503}
504
505static unsigned long htlb_get_low_area(unsigned long len, u16 segmask)
506{
507 unsigned long addr = 0;
508 struct vm_area_struct *vma;
509
510 vma = find_vma(current->mm, addr);
511 while (addr + len <= 0x100000000UL) {
512 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
513
514 if (! __within_hugepage_low_range(addr, len, segmask)) {
515 addr = ALIGN(addr+1, 1<<SID_SHIFT);
516 vma = find_vma(current->mm, addr);
517 continue;
518 }
519
520 if (!vma || (addr + len) <= vma->vm_start)
521 return addr;
522 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
523 /* Depending on segmask this might not be a confirmed
524 * hugepage region, so the ALIGN could have skipped
525 * some VMAs */
526 vma = find_vma(current->mm, addr);
527 }
528
529 return -ENOMEM;
530}
531
David Gibsonc594ada2005-08-11 16:55:21 +1000532static unsigned long htlb_get_high_area(unsigned long len, u16 areamask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533{
David Gibsonc594ada2005-08-11 16:55:21 +1000534 unsigned long addr = 0x100000000UL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 struct vm_area_struct *vma;
536
537 vma = find_vma(current->mm, addr);
David Gibsonc594ada2005-08-11 16:55:21 +1000538 while (addr + len <= TASK_SIZE_USER64) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */
David Gibsonc594ada2005-08-11 16:55:21 +1000540
541 if (! __within_hugepage_high_range(addr, len, areamask)) {
542 addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT);
543 vma = find_vma(current->mm, addr);
544 continue;
545 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546
547 if (!vma || (addr + len) <= vma->vm_start)
548 return addr;
549 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
David Gibsonc594ada2005-08-11 16:55:21 +1000550 /* Depending on segmask this might not be a confirmed
551 * hugepage region, so the ALIGN could have skipped
552 * some VMAs */
553 vma = find_vma(current->mm, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 }
555
556 return -ENOMEM;
557}
558
559unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
560 unsigned long len, unsigned long pgoff,
561 unsigned long flags)
562{
David Gibsonc594ada2005-08-11 16:55:21 +1000563 int lastshift;
564 u16 areamask, curareas;
565
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 if (len & ~HPAGE_MASK)
567 return -EINVAL;
568
569 if (!cpu_has_feature(CPU_FTR_16M_PAGE))
570 return -EINVAL;
571
572 if (test_thread_flag(TIF_32BIT)) {
David Gibsonc594ada2005-08-11 16:55:21 +1000573 curareas = current->mm->context.low_htlb_areas;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574
575 /* First see if we can do the mapping in the existing
David Gibsonc594ada2005-08-11 16:55:21 +1000576 * low areas */
577 addr = htlb_get_low_area(len, curareas);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 if (addr != -ENOMEM)
579 return addr;
580
David Gibsonc594ada2005-08-11 16:55:21 +1000581 lastshift = 0;
582 for (areamask = LOW_ESID_MASK(0x100000000UL-len, len);
583 ! lastshift; areamask >>=1) {
584 if (areamask & 1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 lastshift = 1;
586
David Gibsonc594ada2005-08-11 16:55:21 +1000587 addr = htlb_get_low_area(len, curareas | areamask);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 if ((addr != -ENOMEM)
David Gibsonc594ada2005-08-11 16:55:21 +1000589 && open_low_hpage_areas(current->mm, areamask) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 return addr;
591 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 } else {
David Gibsonc594ada2005-08-11 16:55:21 +1000593 curareas = current->mm->context.high_htlb_areas;
594
595 /* First see if we can do the mapping in the existing
596 * high areas */
597 addr = htlb_get_high_area(len, curareas);
598 if (addr != -ENOMEM)
599 return addr;
600
601 lastshift = 0;
602 for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len);
603 ! lastshift; areamask >>=1) {
604 if (areamask & 1)
605 lastshift = 1;
606
607 addr = htlb_get_high_area(len, curareas | areamask);
608 if ((addr != -ENOMEM)
609 && open_high_hpage_areas(current->mm, areamask) == 0)
610 return addr;
611 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 }
David Gibsonc594ada2005-08-11 16:55:21 +1000613 printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open"
614 " enough areas\n");
615 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616}
617
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618int hash_huge_page(struct mm_struct *mm, unsigned long access,
619 unsigned long ea, unsigned long vsid, int local)
620{
621 pte_t *ptep;
622 unsigned long va, vpn;
623 pte_t old_pte, new_pte;
David Gibson96e28442005-07-13 01:11:42 -0700624 unsigned long rflags, prpn;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 long slot;
626 int err = 1;
627
628 spin_lock(&mm->page_table_lock);
629
630 ptep = huge_pte_offset(mm, ea);
631
632 /* Search the Linux page table for a match with va */
633 va = (vsid << 28) | (ea & 0x0fffffff);
634 vpn = va >> HPAGE_SHIFT;
635
636 /*
637 * If no pte found or not present, send the problem up to
638 * do_page_fault
639 */
640 if (unlikely(!ptep || pte_none(*ptep)))
641 goto out;
642
643/* BUG_ON(pte_bad(*ptep)); */
644
645 /*
646 * Check the user's access rights to the page. If access should be
647 * prevented then send the problem up to do_page_fault.
648 */
649 if (unlikely(access & ~pte_val(*ptep)))
650 goto out;
651 /*
652 * At this point, we have a pte (old_pte) which can be used to build
653 * or update an HPTE. There are 2 cases:
654 *
655 * 1. There is a valid (present) pte with no associated HPTE (this is
656 * the most common case)
657 * 2. There is a valid (present) pte with an associated HPTE. The
658 * current values of the pp bits in the HPTE prevent access
659 * because we are doing software DIRTY bit management and the
660 * page is currently not DIRTY.
661 */
662
663
664 old_pte = *ptep;
665 new_pte = old_pte;
666
David Gibson96e28442005-07-13 01:11:42 -0700667 rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
David Gibson96e28442005-07-13 01:11:42 -0700669 rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
671 /* Check if pte already has an hpte (case 2) */
672 if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) {
673 /* There MIGHT be an HPTE for this pte */
674 unsigned long hash, slot;
675
676 hash = hpt_hash(vpn, 1);
677 if (pte_val(old_pte) & _PAGE_SECONDARY)
678 hash = ~hash;
679 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
680 slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12;
681
David Gibson96e28442005-07-13 01:11:42 -0700682 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 pte_val(old_pte) &= ~_PAGE_HPTEFLAGS;
684 }
685
686 if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) {
687 unsigned long hash = hpt_hash(vpn, 1);
688 unsigned long hpte_group;
689
690 prpn = pte_pfn(old_pte);
691
692repeat:
693 hpte_group = ((hash & htab_hash_mask) *
694 HPTES_PER_GROUP) & ~0x7UL;
695
696 /* Update the linux pte with the HPTE slot */
697 pte_val(new_pte) &= ~_PAGE_HPTEFLAGS;
698 pte_val(new_pte) |= _PAGE_HASHPTE;
699
700 /* Add in WIMG bits */
701 /* XXX We should store these in the pte */
David Gibson96e28442005-07-13 01:11:42 -0700702 rflags |= _PAGE_COHERENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703
David Gibson96e28442005-07-13 01:11:42 -0700704 slot = ppc_md.hpte_insert(hpte_group, va, prpn,
705 HPTE_V_LARGE, rflags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
707 /* Primary is full, try the secondary */
708 if (unlikely(slot == -1)) {
709 pte_val(new_pte) |= _PAGE_SECONDARY;
710 hpte_group = ((~hash & htab_hash_mask) *
711 HPTES_PER_GROUP) & ~0x7UL;
712 slot = ppc_md.hpte_insert(hpte_group, va, prpn,
Benjamin Herrenschmidt67b10812005-09-23 13:24:07 -0700713 HPTE_V_LARGE |
714 HPTE_V_SECONDARY,
715 rflags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 if (slot == -1) {
717 if (mftb() & 0x1)
Benjamin Herrenschmidt67b10812005-09-23 13:24:07 -0700718 hpte_group = ((hash & htab_hash_mask) *
719 HPTES_PER_GROUP)&~0x7UL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720
721 ppc_md.hpte_remove(hpte_group);
722 goto repeat;
723 }
724 }
725
726 if (unlikely(slot == -2))
727 panic("hash_huge_page: pte_insert failed\n");
728
729 pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX;
730
731 /*
732 * No need to use ldarx/stdcx here because all who
733 * might be updating the pte will hold the
734 * page_table_lock
735 */
736 *ptep = new_pte;
737 }
738
739 err = 0;
740
741 out:
742 spin_unlock(&mm->page_table_lock);
743
744 return err;
745}