blob: 464d8fc21ce69b67d8d8bf057f5a57ef6a84c677 [file] [log] [blame]
Ingo Molnar9f4c8152008-01-30 13:33:41 +01001/*
2 * Copyright 2002 Andi Kleen, SuSE Labs.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 * Thanks to Ben LaHaise for precious feedback.
Ingo Molnar9f4c8152008-01-30 13:33:41 +01004 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005#include <linux/highmem.h>
Ingo Molnar81922062008-01-30 13:34:04 +01006#include <linux/bootmem.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007#include <linux/module.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +01008#include <linux/sched.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -07009#include <linux/slab.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +010010#include <linux/mm.h>
Thomas Gleixner76ebd052008-02-09 23:24:09 +010011#include <linux/interrupt.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +010012
Thomas Gleixner950f9d92008-01-30 13:34:06 +010013#include <asm/e820.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <asm/processor.h>
15#include <asm/tlbflush.h>
Dave Jonesf8af0952006-01-06 00:12:10 -080016#include <asm/sections.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +010017#include <asm/uaccess.h>
18#include <asm/pgalloc.h>
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +010019#include <asm/proto.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
Ingo Molnar9df84992008-02-04 16:48:09 +010021/*
22 * The current flushing context - we pass it instead of 5 arguments:
23 */
Thomas Gleixner72e458d2008-02-04 16:48:07 +010024struct cpa_data {
25 unsigned long vaddr;
Thomas Gleixner72e458d2008-02-04 16:48:07 +010026 pgprot_t mask_set;
27 pgprot_t mask_clr;
Thomas Gleixner65e074d2008-02-04 16:48:07 +010028 int numpages;
Thomas Gleixnerf4ae5da2008-02-04 16:48:07 +010029 int flushtlb;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +010030 unsigned long pfn;
Thomas Gleixner72e458d2008-02-04 16:48:07 +010031};
32
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +010033#ifdef CONFIG_X86_64
34
35static inline unsigned long highmap_start_pfn(void)
36{
37 return __pa(_text) >> PAGE_SHIFT;
38}
39
40static inline unsigned long highmap_end_pfn(void)
41{
42 return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
43}
44
45#endif
46
Arjan van de Vened724be2008-01-30 13:34:04 +010047static inline int
48within(unsigned long addr, unsigned long start, unsigned long end)
Ingo Molnar687c4822008-01-30 13:34:04 +010049{
Arjan van de Vened724be2008-01-30 13:34:04 +010050 return addr >= start && addr < end;
51}
52
53/*
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010054 * Flushing functions
55 */
Thomas Gleixnercd8ddf12008-01-30 13:34:08 +010056
Thomas Gleixnercd8ddf12008-01-30 13:34:08 +010057/**
58 * clflush_cache_range - flush a cache range with clflush
59 * @addr: virtual start address
60 * @size: number of bytes to flush
61 *
62 * clflush is an unordered instruction which needs fencing with mfence
63 * to avoid ordering issues.
64 */
Ingo Molnar4c61afc2008-01-30 13:34:09 +010065void clflush_cache_range(void *vaddr, unsigned int size)
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010066{
Ingo Molnar4c61afc2008-01-30 13:34:09 +010067 void *vend = vaddr + size - 1;
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010068
Thomas Gleixnercd8ddf12008-01-30 13:34:08 +010069 mb();
Ingo Molnar4c61afc2008-01-30 13:34:09 +010070
71 for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
72 clflush(vaddr);
73 /*
74 * Flush any possible final partial cacheline:
75 */
76 clflush(vend);
77
Thomas Gleixnercd8ddf12008-01-30 13:34:08 +010078 mb();
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010079}
80
Thomas Gleixneraf1e6842008-01-30 13:34:08 +010081static void __cpa_flush_all(void *arg)
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010082{
Andi Kleen6bb83832008-02-04 16:48:06 +010083 unsigned long cache = (unsigned long)arg;
84
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010085 /*
86 * Flush all to work around Errata in early athlons regarding
87 * large page flushing.
88 */
89 __flush_tlb_all();
90
Andi Kleen6bb83832008-02-04 16:48:06 +010091 if (cache && boot_cpu_data.x86_model >= 4)
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010092 wbinvd();
93}
94
Andi Kleen6bb83832008-02-04 16:48:06 +010095static void cpa_flush_all(unsigned long cache)
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010096{
97 BUG_ON(irqs_disabled());
98
Andi Kleen6bb83832008-02-04 16:48:06 +010099 on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100100}
101
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100102static void __cpa_flush_range(void *arg)
103{
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100104 /*
105 * We could optimize that further and do individual per page
106 * tlb invalidates for a low number of pages. Caveat: we must
107 * flush the high aliases on 64bit as well.
108 */
109 __flush_tlb_all();
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100110}
111
Andi Kleen6bb83832008-02-04 16:48:06 +0100112static void cpa_flush_range(unsigned long start, int numpages, int cache)
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100113{
Ingo Molnar4c61afc2008-01-30 13:34:09 +0100114 unsigned int i, level;
115 unsigned long addr;
116
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100117 BUG_ON(irqs_disabled());
Ingo Molnar4c61afc2008-01-30 13:34:09 +0100118 WARN_ON(PAGE_ALIGN(start) != start);
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100119
Thomas Gleixner3b233e52008-01-30 13:34:08 +0100120 on_each_cpu(__cpa_flush_range, NULL, 1, 1);
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100121
Andi Kleen6bb83832008-02-04 16:48:06 +0100122 if (!cache)
123 return;
124
Thomas Gleixner3b233e52008-01-30 13:34:08 +0100125 /*
126 * We only need to flush on one CPU,
127 * clflush is a MESI-coherent instruction that
128 * will cause all other CPUs to flush the same
129 * cachelines:
130 */
Ingo Molnar4c61afc2008-01-30 13:34:09 +0100131 for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
132 pte_t *pte = lookup_address(addr, &level);
133
134 /*
135 * Only flush present addresses:
136 */
Thomas Gleixner7bfb72e2008-02-04 16:48:08 +0100137 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
Ingo Molnar4c61afc2008-01-30 13:34:09 +0100138 clflush_cache_range((void *) addr, PAGE_SIZE);
139 }
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100140}
141
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100142/*
Arjan van de Vened724be2008-01-30 13:34:04 +0100143 * Certain areas of memory on x86 require very specific protection flags,
144 * for example the BIOS area or kernel text. Callers don't always get this
145 * right (again, ioremap() on BIOS memory is not uncommon) so this function
146 * checks and fixes these known static required protection bits.
147 */
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100148static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
149 unsigned long pfn)
Arjan van de Vened724be2008-01-30 13:34:04 +0100150{
151 pgprot_t forbidden = __pgprot(0);
152
Ingo Molnar687c4822008-01-30 13:34:04 +0100153 /*
Arjan van de Vened724be2008-01-30 13:34:04 +0100154 * The BIOS area between 640k and 1Mb needs to be executable for
155 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
Ingo Molnar687c4822008-01-30 13:34:04 +0100156 */
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100157 if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
Arjan van de Vened724be2008-01-30 13:34:04 +0100158 pgprot_val(forbidden) |= _PAGE_NX;
159
160 /*
161 * The kernel text needs to be executable for obvious reasons
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100162 * Does not cover __inittext since that is gone later on. On
163 * 64bit we do not enforce !NX on the low mapping
Arjan van de Vened724be2008-01-30 13:34:04 +0100164 */
165 if (within(address, (unsigned long)_text, (unsigned long)_etext))
166 pgprot_val(forbidden) |= _PAGE_NX;
Arjan van de Vencc0f21b2008-02-04 16:48:05 +0100167
Arjan van de Vencc0f21b2008-02-04 16:48:05 +0100168 /*
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100169 * The .rodata section needs to be read-only. Using the pfn
170 * catches all aliases.
Arjan van de Vencc0f21b2008-02-04 16:48:05 +0100171 */
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100172 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
173 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
Arjan van de Vencc0f21b2008-02-04 16:48:05 +0100174 pgprot_val(forbidden) |= _PAGE_RW;
Arjan van de Vened724be2008-01-30 13:34:04 +0100175
176 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
Ingo Molnar687c4822008-01-30 13:34:04 +0100177
178 return prot;
179}
180
Thomas Gleixner9a14aef2008-02-04 16:48:07 +0100181/*
182 * Lookup the page table entry for a virtual address. Return a pointer
183 * to the entry and the level of the mapping.
184 *
185 * Note: We return pud and pmd either when the entry is marked large
186 * or when the present bit is not set. Otherwise we would return a
187 * pointer to a nonexisting mapping.
188 */
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100189pte_t *lookup_address(unsigned long address, unsigned int *level)
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100190{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 pgd_t *pgd = pgd_offset_k(address);
192 pud_t *pud;
193 pmd_t *pmd;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100194
Thomas Gleixner30551bb2008-01-30 13:34:04 +0100195 *level = PG_LEVEL_NONE;
196
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 if (pgd_none(*pgd))
198 return NULL;
Ingo Molnar9df84992008-02-04 16:48:09 +0100199
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 pud = pud_offset(pgd, address);
201 if (pud_none(*pud))
202 return NULL;
Andi Kleenc2f71ee2008-02-04 16:48:09 +0100203
204 *level = PG_LEVEL_1G;
205 if (pud_large(*pud) || !pud_present(*pud))
206 return (pte_t *)pud;
207
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 pmd = pmd_offset(pud, address);
209 if (pmd_none(*pmd))
210 return NULL;
Thomas Gleixner30551bb2008-01-30 13:34:04 +0100211
212 *level = PG_LEVEL_2M;
Thomas Gleixner9a14aef2008-02-04 16:48:07 +0100213 if (pmd_large(*pmd) || !pmd_present(*pmd))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 return (pte_t *)pmd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215
Thomas Gleixner30551bb2008-01-30 13:34:04 +0100216 *level = PG_LEVEL_4K;
Ingo Molnar9df84992008-02-04 16:48:09 +0100217
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100218 return pte_offset_kernel(pmd, address);
219}
220
Ingo Molnar9df84992008-02-04 16:48:09 +0100221/*
222 * Set the new pmd in all the pgds we know about:
223 */
Ingo Molnar9a3dc782008-01-30 13:33:57 +0100224static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100225{
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100226 /* change init_mm */
227 set_pte_atomic(kpte, pte);
Ingo Molnar44af6c42008-01-30 13:34:03 +0100228#ifdef CONFIG_X86_32
Ingo Molnare4b71dc2008-01-30 13:34:04 +0100229 if (!SHARED_KERNEL_PMD) {
Ingo Molnar44af6c42008-01-30 13:34:03 +0100230 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231
Jeremy Fitzhardingee3ed9102008-01-30 13:34:11 +0100232 list_for_each_entry(page, &pgd_list, lru) {
Ingo Molnar44af6c42008-01-30 13:34:03 +0100233 pgd_t *pgd;
234 pud_t *pud;
235 pmd_t *pmd;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100236
Ingo Molnar44af6c42008-01-30 13:34:03 +0100237 pgd = (pgd_t *)page_address(page) + pgd_index(address);
238 pud = pud_offset(pgd, address);
239 pmd = pmd_offset(pud, address);
240 set_pte_atomic((pte_t *)pmd, pte);
241 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 }
Ingo Molnar44af6c42008-01-30 13:34:03 +0100243#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244}
245
Ingo Molnar9df84992008-02-04 16:48:09 +0100246static int
247try_preserve_large_page(pte_t *kpte, unsigned long address,
248 struct cpa_data *cpa)
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100249{
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100250 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100251 pte_t new_pte, old_pte, *tmp;
252 pgprot_t old_prot, new_prot;
Thomas Gleixnerfac84932008-02-09 23:24:09 +0100253 int i, do_split = 1;
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100254 unsigned int level;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100255
256 spin_lock_irqsave(&pgd_lock, flags);
257 /*
258 * Check for races, another CPU might have split this page
259 * up already:
260 */
261 tmp = lookup_address(address, &level);
262 if (tmp != kpte)
263 goto out_unlock;
264
265 switch (level) {
266 case PG_LEVEL_2M:
Andi Kleen31422c52008-02-04 16:48:08 +0100267 psize = PMD_PAGE_SIZE;
268 pmask = PMD_PAGE_MASK;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100269 break;
Andi Kleenf07333f2008-02-04 16:48:09 +0100270#ifdef CONFIG_X86_64
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100271 case PG_LEVEL_1G:
Andi Kleen5d3c8b22008-02-13 16:20:35 +0100272 psize = PUD_PAGE_SIZE;
273 pmask = PUD_PAGE_MASK;
Andi Kleenf07333f2008-02-04 16:48:09 +0100274 break;
275#endif
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100276 default:
Ingo Molnarbeaff632008-02-04 16:48:09 +0100277 do_split = -EINVAL;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100278 goto out_unlock;
279 }
280
281 /*
282 * Calculate the number of pages, which fit into this large
283 * page starting at address:
284 */
285 nextpage_addr = (address + psize) & pmask;
286 numpages = (nextpage_addr - address) >> PAGE_SHIFT;
287 if (numpages < cpa->numpages)
288 cpa->numpages = numpages;
289
290 /*
291 * We are safe now. Check whether the new pgprot is the same:
292 */
293 old_pte = *kpte;
294 old_prot = new_prot = pte_pgprot(old_pte);
295
296 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
297 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100298
299 /*
300 * old_pte points to the large page base address. So we need
301 * to add the offset of the virtual address:
302 */
303 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
304 cpa->pfn = pfn;
305
306 new_prot = static_protections(new_prot, address, pfn);
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100307
308 /*
Thomas Gleixnerfac84932008-02-09 23:24:09 +0100309 * We need to check the full range, whether
310 * static_protection() requires a different pgprot for one of
311 * the pages in the range we try to preserve:
312 */
313 addr = address + PAGE_SIZE;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100314 pfn++;
315 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
316 pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
Thomas Gleixnerfac84932008-02-09 23:24:09 +0100317
318 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
319 goto out_unlock;
320 }
321
322 /*
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100323 * If there are no changes, return. maxpages has been updated
324 * above:
325 */
326 if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
Ingo Molnarbeaff632008-02-04 16:48:09 +0100327 do_split = 0;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100328 goto out_unlock;
329 }
330
331 /*
332 * We need to change the attributes. Check, whether we can
333 * change the large page in one go. We request a split, when
334 * the address is not aligned and the number of pages is
335 * smaller than the number of pages in the large page. Note
336 * that we limited the number of possible pages already to
337 * the number of pages in the large page.
338 */
339 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
340 /*
341 * The address is aligned and the number of pages
342 * covers the full page.
343 */
344 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
345 __set_pmd_pte(kpte, address, new_pte);
346 cpa->flushtlb = 1;
Ingo Molnarbeaff632008-02-04 16:48:09 +0100347 do_split = 0;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100348 }
349
350out_unlock:
351 spin_unlock_irqrestore(&pgd_lock, flags);
Ingo Molnar9df84992008-02-04 16:48:09 +0100352
Ingo Molnarbeaff632008-02-04 16:48:09 +0100353 return do_split;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100354}
355
Thomas Gleixner76ebd052008-02-09 23:24:09 +0100356static LIST_HEAD(page_pool);
357static unsigned long pool_size, pool_pages, pool_low;
358static unsigned long pool_used, pool_failed, pool_refill;
359
360static void cpa_fill_pool(void)
361{
362 struct page *p;
363 gfp_t gfp = GFP_KERNEL;
364
365 /* Do not allocate from interrupt context */
366 if (in_irq() || irqs_disabled())
367 return;
368 /*
369 * Check unlocked. I does not matter when we have one more
370 * page in the pool. The bit lock avoids recursive pool
371 * allocations:
372 */
373 if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
374 return;
375
376#ifdef CONFIG_DEBUG_PAGEALLOC
377 /*
378 * We could do:
379 * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
380 * but this fails on !PREEMPT kernels
381 */
382 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
383#endif
384
385 while (pool_pages < pool_size) {
386 p = alloc_pages(gfp, 0);
387 if (!p) {
388 pool_failed++;
389 break;
390 }
391 spin_lock_irq(&pgd_lock);
392 list_add(&p->lru, &page_pool);
393 pool_pages++;
394 spin_unlock_irq(&pgd_lock);
395 }
396 clear_bit_unlock(0, &pool_refill);
397}
398
399#define SHIFT_MB (20 - PAGE_SHIFT)
400#define ROUND_MB_GB ((1 << 10) - 1)
401#define SHIFT_MB_GB 10
402#define POOL_PAGES_PER_GB 16
403
404void __init cpa_init(void)
405{
406 struct sysinfo si;
407 unsigned long gb;
408
409 si_meminfo(&si);
410 /*
411 * Calculate the number of pool pages:
412 *
413 * Convert totalram (nr of pages) to MiB and round to the next
414 * GiB. Shift MiB to Gib and multiply the result by
415 * POOL_PAGES_PER_GB:
416 */
417 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
418 pool_size = POOL_PAGES_PER_GB * gb;
419 pool_low = pool_size;
420
421 cpa_fill_pool();
422 printk(KERN_DEBUG
423 "CPA: page pool initialized %lu of %lu pages preallocated\n",
424 pool_pages, pool_size);
425}
426
Ingo Molnar7afe15b2008-01-30 13:33:57 +0100427static int split_large_page(pte_t *kpte, unsigned long address)
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100428{
Thomas Gleixner7b610ee2008-02-04 16:48:10 +0100429 unsigned long flags, pfn, pfninc = 1;
Ingo Molnar86f03982008-01-30 13:34:09 +0100430 unsigned int i, level;
Ingo Molnar9df84992008-02-04 16:48:09 +0100431 pte_t *pbase, *tmp;
432 pgprot_t ref_prot;
433 struct page *base;
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100434
Thomas Gleixnereb5b5f02008-02-09 23:24:09 +0100435 /*
436 * Get a page from the pool. The pool list is protected by the
437 * pgd_lock, which we have to take anyway for the split
438 * operation:
439 */
Ingo Molnar9a3dc782008-01-30 13:33:57 +0100440 spin_lock_irqsave(&pgd_lock, flags);
Thomas Gleixnereb5b5f02008-02-09 23:24:09 +0100441 if (list_empty(&page_pool)) {
442 spin_unlock_irqrestore(&pgd_lock, flags);
443 return -ENOMEM;
444 }
445
446 base = list_first_entry(&page_pool, struct page, lru);
447 list_del(&base->lru);
448 pool_pages--;
449
450 if (pool_pages < pool_low)
451 pool_low = pool_pages;
452
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100453 /*
454 * Check for races, another CPU might have split this page
455 * up for us already:
456 */
457 tmp = lookup_address(address, &level);
Ingo Molnar6ce9fc12008-02-04 16:48:08 +0100458 if (tmp != kpte)
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100459 goto out_unlock;
460
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100461 pbase = (pte_t *)page_address(base);
Ingo Molnar44af6c42008-01-30 13:34:03 +0100462#ifdef CONFIG_X86_32
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100463 paravirt_alloc_pt(&init_mm, page_to_pfn(base));
Ingo Molnar44af6c42008-01-30 13:34:03 +0100464#endif
Thomas Gleixner07cf89c2008-02-04 16:48:08 +0100465 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100466
Andi Kleenf07333f2008-02-04 16:48:09 +0100467#ifdef CONFIG_X86_64
468 if (level == PG_LEVEL_1G) {
469 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
470 pgprot_val(ref_prot) |= _PAGE_PSE;
Andi Kleenf07333f2008-02-04 16:48:09 +0100471 }
472#endif
473
Thomas Gleixner63c1dcf2008-02-04 16:48:05 +0100474 /*
475 * Get the target pfn from the original entry:
476 */
477 pfn = pte_pfn(*kpte);
Andi Kleenf07333f2008-02-04 16:48:09 +0100478 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
Thomas Gleixner63c1dcf2008-02-04 16:48:05 +0100479 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100480
481 /*
Thomas Gleixner07cf89c2008-02-04 16:48:08 +0100482 * Install the new, split up pagetable. Important details here:
Huang, Ying4c881ca2008-01-30 13:34:04 +0100483 *
484 * On Intel the NX bit of all levels must be cleared to make a
485 * page executable. See section 4.13.2 of Intel 64 and IA-32
486 * Architectures Software Developer's Manual).
Thomas Gleixner07cf89c2008-02-04 16:48:08 +0100487 *
488 * Mark the entry present. The current mapping might be
489 * set to not present, which we preserved above.
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100490 */
Huang, Ying4c881ca2008-01-30 13:34:04 +0100491 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
Thomas Gleixner07cf89c2008-02-04 16:48:08 +0100492 pgprot_val(ref_prot) |= _PAGE_PRESENT;
Ingo Molnar9a3dc782008-01-30 13:33:57 +0100493 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100494 base = NULL;
495
496out_unlock:
Thomas Gleixnereb5b5f02008-02-09 23:24:09 +0100497 /*
498 * If we dropped out via the lookup_address check under
499 * pgd_lock then stick the page back into the pool:
500 */
501 if (base) {
502 list_add(&base->lru, &page_pool);
503 pool_pages++;
504 } else
505 pool_used++;
Ingo Molnar9a3dc782008-01-30 13:33:57 +0100506 spin_unlock_irqrestore(&pgd_lock, flags);
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100507
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100508 return 0;
509}
510
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100511static int __change_page_attr(struct cpa_data *cpa, int primary)
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100512{
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100513 unsigned long address = cpa->vaddr;
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100514 int do_split, err;
515 unsigned int level;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100516 pte_t *kpte, old_pte;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517
Ingo Molnar97f99fe2008-01-30 13:33:55 +0100518repeat:
Ingo Molnarf0646e42008-01-30 13:33:43 +0100519 kpte = lookup_address(address, &level);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 if (!kpte)
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100521 return primary ? -EINVAL : 0;
522
523 old_pte = *kpte;
524 if (!pte_val(old_pte)) {
525 if (!primary)
526 return 0;
527 printk(KERN_WARNING "CPA: called for zero pte. "
528 "vaddr = %lx cpa->vaddr = %lx\n", address,
529 cpa->vaddr);
530 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 return -EINVAL;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100532 }
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100533
Thomas Gleixner30551bb2008-01-30 13:34:04 +0100534 if (level == PG_LEVEL_4K) {
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100535 pte_t new_pte;
Arjan van de Ven626c2c92008-02-04 16:48:05 +0100536 pgprot_t new_prot = pte_pgprot(old_pte);
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100537 unsigned long pfn = pte_pfn(old_pte);
Thomas Gleixnera72a08a2008-01-30 13:34:07 +0100538
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100539 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
540 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
Ingo Molnar86f03982008-01-30 13:34:09 +0100541
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100542 new_prot = static_protections(new_prot, address, pfn);
Ingo Molnar86f03982008-01-30 13:34:09 +0100543
Arjan van de Ven626c2c92008-02-04 16:48:05 +0100544 /*
545 * We need to keep the pfn from the existing PTE,
546 * after all we're only going to change it's attributes
547 * not the memory it points to
548 */
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100549 new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
550 cpa->pfn = pfn;
Thomas Gleixnerf4ae5da2008-02-04 16:48:07 +0100551 /*
552 * Do we really change anything ?
553 */
554 if (pte_val(old_pte) != pte_val(new_pte)) {
555 set_pte_atomic(kpte, new_pte);
556 cpa->flushtlb = 1;
557 }
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100558 cpa->numpages = 1;
559 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 }
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100561
562 /*
563 * Check, whether we can keep the large page intact
564 * and just change the pte:
565 */
Ingo Molnarbeaff632008-02-04 16:48:09 +0100566 do_split = try_preserve_large_page(kpte, address, cpa);
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100567 /*
568 * When the range fits into the existing large page,
569 * return. cp->numpages and cpa->tlbflush have been updated in
570 * try_large_page:
571 */
Ingo Molnar87f7f8f2008-02-04 16:48:10 +0100572 if (do_split <= 0)
573 return do_split;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100574
575 /*
576 * We have to split the large page:
577 */
Ingo Molnar87f7f8f2008-02-04 16:48:10 +0100578 err = split_large_page(kpte, address);
579 if (!err) {
580 cpa->flushtlb = 1;
581 goto repeat;
582 }
Ingo Molnarbeaff632008-02-04 16:48:09 +0100583
Ingo Molnar87f7f8f2008-02-04 16:48:10 +0100584 return err;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100585}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100587static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
588
589static int cpa_process_alias(struct cpa_data *cpa)
Ingo Molnar44af6c42008-01-30 13:34:03 +0100590{
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100591 struct cpa_data alias_cpa;
Thomas Gleixnerf34b4392008-02-15 22:17:57 +0100592 int ret = 0;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100593
594 if (cpa->pfn > max_pfn_mapped)
595 return 0;
596
Thomas Gleixnerf34b4392008-02-15 22:17:57 +0100597 /*
598 * No need to redo, when the primary call touched the direct
599 * mapping already:
600 */
601 if (!within(cpa->vaddr, PAGE_OFFSET,
602 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100603
Thomas Gleixnerf34b4392008-02-15 22:17:57 +0100604 alias_cpa = *cpa;
605 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
606
607 ret = __change_page_attr_set_clr(&alias_cpa, 0);
608 }
Ingo Molnar44af6c42008-01-30 13:34:03 +0100609
Arjan van de Ven488fd992008-01-30 13:34:07 +0100610#ifdef CONFIG_X86_64
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100611 if (ret)
612 return ret;
Thomas Gleixner08797502008-01-30 13:34:09 +0100613 /*
Thomas Gleixnerf34b4392008-02-15 22:17:57 +0100614 * No need to redo, when the primary call touched the high
615 * mapping already:
616 */
617 if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
618 return 0;
619
620 /*
Thomas Gleixner08797502008-01-30 13:34:09 +0100621 * If the physical address is inside the kernel map, we need
622 * to touch the high mapped kernel as well:
623 */
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100624 if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
625 return 0;
Thomas Gleixner08797502008-01-30 13:34:09 +0100626
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100627 alias_cpa = *cpa;
628 alias_cpa.vaddr =
629 (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
630
631 /*
632 * The high mapping range is imprecise, so ignore the return value.
633 */
634 __change_page_attr_set_clr(&alias_cpa, 0);
Thomas Gleixner08797502008-01-30 13:34:09 +0100635#endif
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100636 return ret;
Ingo Molnar44af6c42008-01-30 13:34:03 +0100637}
638
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100639static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
Thomas Gleixnerff314522008-01-30 13:34:08 +0100640{
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100641 int ret, numpages = cpa->numpages;
Thomas Gleixnerff314522008-01-30 13:34:08 +0100642
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100643 while (numpages) {
644 /*
645 * Store the remaining nr of pages for the large page
646 * preservation check.
647 */
648 cpa->numpages = numpages;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100649
650 ret = __change_page_attr(cpa, checkalias);
Thomas Gleixnerff314522008-01-30 13:34:08 +0100651 if (ret)
652 return ret;
Thomas Gleixnerff314522008-01-30 13:34:08 +0100653
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100654 if (checkalias) {
655 ret = cpa_process_alias(cpa);
656 if (ret)
657 return ret;
658 }
659
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100660 /*
661 * Adjust the number of pages with the result of the
662 * CPA operation. Either a large page has been
663 * preserved or a single page update happened.
664 */
665 BUG_ON(cpa->numpages > numpages);
666 numpages -= cpa->numpages;
667 cpa->vaddr += cpa->numpages * PAGE_SIZE;
668 }
Thomas Gleixnerff314522008-01-30 13:34:08 +0100669 return 0;
670}
671
Andi Kleen6bb83832008-02-04 16:48:06 +0100672static inline int cache_attr(pgprot_t attr)
673{
674 return pgprot_val(attr) &
675 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
676}
677
Thomas Gleixnerff314522008-01-30 13:34:08 +0100678static int change_page_attr_set_clr(unsigned long addr, int numpages,
679 pgprot_t mask_set, pgprot_t mask_clr)
680{
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100681 struct cpa_data cpa;
Thomas Gleixneraf96e442008-02-15 21:49:46 +0100682 int ret, cache, checkalias;
Thomas Gleixner331e4062008-02-04 16:48:06 +0100683
684 /*
685 * Check, if we are requested to change a not supported
686 * feature:
687 */
688 mask_set = canon_pgprot(mask_set);
689 mask_clr = canon_pgprot(mask_clr);
690 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
691 return 0;
692
Thomas Gleixner69b14152008-02-13 11:04:50 +0100693 /* Ensure we are PAGE_SIZE aligned */
694 if (addr & ~PAGE_MASK) {
695 addr &= PAGE_MASK;
696 /*
697 * People should not be passing in unaligned addresses:
698 */
699 WARN_ON_ONCE(1);
700 }
701
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100702 cpa.vaddr = addr;
703 cpa.numpages = numpages;
704 cpa.mask_set = mask_set;
705 cpa.mask_clr = mask_clr;
Thomas Gleixnerf4ae5da2008-02-04 16:48:07 +0100706 cpa.flushtlb = 0;
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100707
Thomas Gleixneraf96e442008-02-15 21:49:46 +0100708 /* No alias checking for _NX bit modifications */
709 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
710
711 ret = __change_page_attr_set_clr(&cpa, checkalias);
Thomas Gleixnerff314522008-01-30 13:34:08 +0100712
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100713 /*
Thomas Gleixnerf4ae5da2008-02-04 16:48:07 +0100714 * Check whether we really changed something:
715 */
716 if (!cpa.flushtlb)
Thomas Gleixner76ebd052008-02-09 23:24:09 +0100717 goto out;
Thomas Gleixnerf4ae5da2008-02-04 16:48:07 +0100718
719 /*
Andi Kleen6bb83832008-02-04 16:48:06 +0100720 * No need to flush, when we did not set any of the caching
721 * attributes:
722 */
723 cache = cache_attr(mask_set);
724
725 /*
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100726 * On success we use clflush, when the CPU supports it to
727 * avoid the wbindv. If the CPU does not support it and in the
Thomas Gleixneraf1e6842008-01-30 13:34:08 +0100728 * error case we fall back to cpa_flush_all (which uses
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100729 * wbindv):
730 */
731 if (!ret && cpu_has_clflush)
Andi Kleen6bb83832008-02-04 16:48:06 +0100732 cpa_flush_range(addr, numpages, cache);
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100733 else
Andi Kleen6bb83832008-02-04 16:48:06 +0100734 cpa_flush_all(cache);
Thomas Gleixnerff314522008-01-30 13:34:08 +0100735
Thomas Gleixner76ebd052008-02-09 23:24:09 +0100736out:
737 cpa_fill_pool();
Thomas Gleixnerff314522008-01-30 13:34:08 +0100738 return ret;
739}
740
Thomas Gleixner56744542008-01-30 13:34:08 +0100741static inline int change_page_attr_set(unsigned long addr, int numpages,
742 pgprot_t mask)
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100743{
Thomas Gleixner56744542008-01-30 13:34:08 +0100744 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100745}
746
Thomas Gleixner56744542008-01-30 13:34:08 +0100747static inline int change_page_attr_clear(unsigned long addr, int numpages,
748 pgprot_t mask)
Thomas Gleixner72932c72008-01-30 13:34:08 +0100749{
Huang, Ying58270402008-01-31 22:05:43 +0100750 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
Thomas Gleixner72932c72008-01-30 13:34:08 +0100751}
752
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100753int set_memory_uc(unsigned long addr, int numpages)
754{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100755 return change_page_attr_set(addr, numpages,
756 __pgprot(_PAGE_PCD | _PAGE_PWT));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100757}
758EXPORT_SYMBOL(set_memory_uc);
759
760int set_memory_wb(unsigned long addr, int numpages)
761{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100762 return change_page_attr_clear(addr, numpages,
763 __pgprot(_PAGE_PCD | _PAGE_PWT));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100764}
765EXPORT_SYMBOL(set_memory_wb);
766
767int set_memory_x(unsigned long addr, int numpages)
768{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100769 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100770}
771EXPORT_SYMBOL(set_memory_x);
772
773int set_memory_nx(unsigned long addr, int numpages)
774{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100775 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100776}
777EXPORT_SYMBOL(set_memory_nx);
778
779int set_memory_ro(unsigned long addr, int numpages)
780{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100781 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100782}
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100783
784int set_memory_rw(unsigned long addr, int numpages)
785{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100786 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100787}
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100788
789int set_memory_np(unsigned long addr, int numpages)
790{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100791 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100792}
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100793
794int set_pages_uc(struct page *page, int numpages)
795{
796 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100797
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100798 return set_memory_uc(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100799}
800EXPORT_SYMBOL(set_pages_uc);
801
802int set_pages_wb(struct page *page, int numpages)
803{
804 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100805
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100806 return set_memory_wb(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100807}
808EXPORT_SYMBOL(set_pages_wb);
809
810int set_pages_x(struct page *page, int numpages)
811{
812 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100813
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100814 return set_memory_x(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100815}
816EXPORT_SYMBOL(set_pages_x);
817
818int set_pages_nx(struct page *page, int numpages)
819{
820 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100821
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100822 return set_memory_nx(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100823}
824EXPORT_SYMBOL(set_pages_nx);
825
826int set_pages_ro(struct page *page, int numpages)
827{
828 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100829
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100830 return set_memory_ro(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100831}
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100832
833int set_pages_rw(struct page *page, int numpages)
834{
835 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100836
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100837 return set_memory_rw(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100838}
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100839
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840#ifdef CONFIG_DEBUG_PAGEALLOC
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100841
842static int __set_pages_p(struct page *page, int numpages)
843{
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100844 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
845 .numpages = numpages,
846 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
847 .mask_clr = __pgprot(0)};
Thomas Gleixner72932c72008-01-30 13:34:08 +0100848
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100849 return __change_page_attr_set_clr(&cpa, 1);
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100850}
851
852static int __set_pages_np(struct page *page, int numpages)
853{
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100854 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
855 .numpages = numpages,
856 .mask_set = __pgprot(0),
857 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
Thomas Gleixner72932c72008-01-30 13:34:08 +0100858
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100859 return __change_page_attr_set_clr(&cpa, 1);
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100860}
861
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862void kernel_map_pages(struct page *page, int numpages, int enable)
863{
864 if (PageHighMem(page))
865 return;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100866 if (!enable) {
Ingo Molnarf9b84042006-06-27 02:54:49 -0700867 debug_check_no_locks_freed(page_address(page),
868 numpages * PAGE_SIZE);
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100869 }
Ingo Molnarde5097c2006-01-09 15:59:21 -0800870
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100871 /*
Ingo Molnar12d6f212008-01-30 13:33:58 +0100872 * If page allocator is not up yet then do not call c_p_a():
873 */
874 if (!debug_pagealloc_enabled)
875 return;
876
877 /*
Ingo Molnarf8d84062008-02-13 14:09:53 +0100878 * The return value is ignored as the calls cannot fail.
879 * Large pages are kept enabled at boot time, and are
880 * split up quickly with DEBUG_PAGEALLOC. If a splitup
881 * fails here (due to temporary memory shortage) no damage
882 * is done because we just keep the largepage intact up
883 * to the next attempt when it will likely be split up:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 */
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100885 if (enable)
886 __set_pages_p(page, numpages);
887 else
888 __set_pages_np(page, numpages);
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100889
890 /*
Ingo Molnare4b71dc2008-01-30 13:34:04 +0100891 * We should perform an IPI and flush all tlbs,
892 * but that can deadlock->flush only current cpu:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 */
894 __flush_tlb_all();
Thomas Gleixner76ebd052008-02-09 23:24:09 +0100895
896 /*
897 * Try to refill the page pool here. We can do this only after
898 * the tlb flush.
899 */
900 cpa_fill_pool();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901}
Rafael J. Wysocki8a235ef2008-02-20 01:47:44 +0100902
903#ifdef CONFIG_HIBERNATION
904
905bool kernel_page_present(struct page *page)
906{
907 unsigned int level;
908 pte_t *pte;
909
910 if (PageHighMem(page))
911 return false;
912
913 pte = lookup_address((unsigned long)page_address(page), &level);
914 return (pte_val(*pte) & _PAGE_PRESENT);
915}
916
917#endif /* CONFIG_HIBERNATION */
918
919#endif /* CONFIG_DEBUG_PAGEALLOC */
Arjan van de Vend1028a12008-01-30 13:34:07 +0100920
921/*
922 * The testcases use internal knowledge of the implementation that shouldn't
923 * be exposed to the rest of the kernel. Include these directly here.
924 */
925#ifdef CONFIG_CPA_DEBUG
926#include "pageattr-test.c"
927#endif