blob: 7b4a17236972bc90815204f766e9e8bfc389dc10 [file] [log] [blame]
Ingo Molnar9f4c8152008-01-30 13:33:41 +01001/*
2 * Copyright 2002 Andi Kleen, SuSE Labs.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 * Thanks to Ben LaHaise for precious feedback.
Ingo Molnar9f4c8152008-01-30 13:33:41 +01004 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005#include <linux/highmem.h>
Ingo Molnar81922062008-01-30 13:34:04 +01006#include <linux/bootmem.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007#include <linux/module.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +01008#include <linux/sched.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -07009#include <linux/slab.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +010010#include <linux/mm.h>
Thomas Gleixner76ebd052008-02-09 23:24:09 +010011#include <linux/interrupt.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +010012
Thomas Gleixner950f9d92008-01-30 13:34:06 +010013#include <asm/e820.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <asm/processor.h>
15#include <asm/tlbflush.h>
Dave Jonesf8af0952006-01-06 00:12:10 -080016#include <asm/sections.h>
Ingo Molnar9f4c8152008-01-30 13:33:41 +010017#include <asm/uaccess.h>
18#include <asm/pgalloc.h>
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +010019#include <asm/proto.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020
Ingo Molnar9df84992008-02-04 16:48:09 +010021/*
22 * The current flushing context - we pass it instead of 5 arguments:
23 */
Thomas Gleixner72e458d2008-02-04 16:48:07 +010024struct cpa_data {
25 unsigned long vaddr;
Thomas Gleixner72e458d2008-02-04 16:48:07 +010026 pgprot_t mask_set;
27 pgprot_t mask_clr;
Thomas Gleixner65e074d2008-02-04 16:48:07 +010028 int numpages;
Thomas Gleixnerf4ae5da2008-02-04 16:48:07 +010029 int flushtlb;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +010030 unsigned long pfn;
Thomas Gleixner72e458d2008-02-04 16:48:07 +010031};
32
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +010033#ifdef CONFIG_X86_64
34
35static inline unsigned long highmap_start_pfn(void)
36{
37 return __pa(_text) >> PAGE_SHIFT;
38}
39
40static inline unsigned long highmap_end_pfn(void)
41{
42 return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
43}
44
45#endif
46
Arjan van de Vened724be2008-01-30 13:34:04 +010047static inline int
48within(unsigned long addr, unsigned long start, unsigned long end)
Ingo Molnar687c4822008-01-30 13:34:04 +010049{
Arjan van de Vened724be2008-01-30 13:34:04 +010050 return addr >= start && addr < end;
51}
52
53/*
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010054 * Flushing functions
55 */
Thomas Gleixnercd8ddf12008-01-30 13:34:08 +010056
Thomas Gleixnercd8ddf12008-01-30 13:34:08 +010057/**
58 * clflush_cache_range - flush a cache range with clflush
59 * @addr: virtual start address
60 * @size: number of bytes to flush
61 *
62 * clflush is an unordered instruction which needs fencing with mfence
63 * to avoid ordering issues.
64 */
Ingo Molnar4c61afc2008-01-30 13:34:09 +010065void clflush_cache_range(void *vaddr, unsigned int size)
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010066{
Ingo Molnar4c61afc2008-01-30 13:34:09 +010067 void *vend = vaddr + size - 1;
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010068
Thomas Gleixnercd8ddf12008-01-30 13:34:08 +010069 mb();
Ingo Molnar4c61afc2008-01-30 13:34:09 +010070
71 for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size)
72 clflush(vaddr);
73 /*
74 * Flush any possible final partial cacheline:
75 */
76 clflush(vend);
77
Thomas Gleixnercd8ddf12008-01-30 13:34:08 +010078 mb();
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010079}
80
Thomas Gleixneraf1e6842008-01-30 13:34:08 +010081static void __cpa_flush_all(void *arg)
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010082{
Andi Kleen6bb83832008-02-04 16:48:06 +010083 unsigned long cache = (unsigned long)arg;
84
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010085 /*
86 * Flush all to work around Errata in early athlons regarding
87 * large page flushing.
88 */
89 __flush_tlb_all();
90
Andi Kleen6bb83832008-02-04 16:48:06 +010091 if (cache && boot_cpu_data.x86_model >= 4)
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010092 wbinvd();
93}
94
Andi Kleen6bb83832008-02-04 16:48:06 +010095static void cpa_flush_all(unsigned long cache)
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +010096{
97 BUG_ON(irqs_disabled());
98
Andi Kleen6bb83832008-02-04 16:48:06 +010099 on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100100}
101
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100102static void __cpa_flush_range(void *arg)
103{
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100104 /*
105 * We could optimize that further and do individual per page
106 * tlb invalidates for a low number of pages. Caveat: we must
107 * flush the high aliases on 64bit as well.
108 */
109 __flush_tlb_all();
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100110}
111
Andi Kleen6bb83832008-02-04 16:48:06 +0100112static void cpa_flush_range(unsigned long start, int numpages, int cache)
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100113{
Ingo Molnar4c61afc2008-01-30 13:34:09 +0100114 unsigned int i, level;
115 unsigned long addr;
116
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100117 BUG_ON(irqs_disabled());
Ingo Molnar4c61afc2008-01-30 13:34:09 +0100118 WARN_ON(PAGE_ALIGN(start) != start);
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100119
Thomas Gleixner3b233e52008-01-30 13:34:08 +0100120 on_each_cpu(__cpa_flush_range, NULL, 1, 1);
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100121
Andi Kleen6bb83832008-02-04 16:48:06 +0100122 if (!cache)
123 return;
124
Thomas Gleixner3b233e52008-01-30 13:34:08 +0100125 /*
126 * We only need to flush on one CPU,
127 * clflush is a MESI-coherent instruction that
128 * will cause all other CPUs to flush the same
129 * cachelines:
130 */
Ingo Molnar4c61afc2008-01-30 13:34:09 +0100131 for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) {
132 pte_t *pte = lookup_address(addr, &level);
133
134 /*
135 * Only flush present addresses:
136 */
Thomas Gleixner7bfb72e2008-02-04 16:48:08 +0100137 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
Ingo Molnar4c61afc2008-01-30 13:34:09 +0100138 clflush_cache_range((void *) addr, PAGE_SIZE);
139 }
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100140}
141
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100142/*
Arjan van de Vened724be2008-01-30 13:34:04 +0100143 * Certain areas of memory on x86 require very specific protection flags,
144 * for example the BIOS area or kernel text. Callers don't always get this
145 * right (again, ioremap() on BIOS memory is not uncommon) so this function
146 * checks and fixes these known static required protection bits.
147 */
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100148static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
149 unsigned long pfn)
Arjan van de Vened724be2008-01-30 13:34:04 +0100150{
151 pgprot_t forbidden = __pgprot(0);
152
Ingo Molnar687c4822008-01-30 13:34:04 +0100153 /*
Arjan van de Vened724be2008-01-30 13:34:04 +0100154 * The BIOS area between 640k and 1Mb needs to be executable for
155 * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support.
Ingo Molnar687c4822008-01-30 13:34:04 +0100156 */
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100157 if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT))
Arjan van de Vened724be2008-01-30 13:34:04 +0100158 pgprot_val(forbidden) |= _PAGE_NX;
159
160 /*
161 * The kernel text needs to be executable for obvious reasons
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100162 * Does not cover __inittext since that is gone later on. On
163 * 64bit we do not enforce !NX on the low mapping
Arjan van de Vened724be2008-01-30 13:34:04 +0100164 */
165 if (within(address, (unsigned long)_text, (unsigned long)_etext))
166 pgprot_val(forbidden) |= _PAGE_NX;
Arjan van de Vencc0f21b2008-02-04 16:48:05 +0100167
Arjan van de Vencc0f21b2008-02-04 16:48:05 +0100168 /*
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100169 * The .rodata section needs to be read-only. Using the pfn
170 * catches all aliases.
Arjan van de Vencc0f21b2008-02-04 16:48:05 +0100171 */
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100172 if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
173 __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
Arjan van de Vencc0f21b2008-02-04 16:48:05 +0100174 pgprot_val(forbidden) |= _PAGE_RW;
Arjan van de Vened724be2008-01-30 13:34:04 +0100175
176 prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
Ingo Molnar687c4822008-01-30 13:34:04 +0100177
178 return prot;
179}
180
Thomas Gleixner9a14aef2008-02-04 16:48:07 +0100181/*
182 * Lookup the page table entry for a virtual address. Return a pointer
183 * to the entry and the level of the mapping.
184 *
185 * Note: We return pud and pmd either when the entry is marked large
186 * or when the present bit is not set. Otherwise we would return a
187 * pointer to a nonexisting mapping.
188 */
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100189pte_t *lookup_address(unsigned long address, unsigned int *level)
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100190{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 pgd_t *pgd = pgd_offset_k(address);
192 pud_t *pud;
193 pmd_t *pmd;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100194
Thomas Gleixner30551bb2008-01-30 13:34:04 +0100195 *level = PG_LEVEL_NONE;
196
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 if (pgd_none(*pgd))
198 return NULL;
Ingo Molnar9df84992008-02-04 16:48:09 +0100199
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 pud = pud_offset(pgd, address);
201 if (pud_none(*pud))
202 return NULL;
Andi Kleenc2f71ee2008-02-04 16:48:09 +0100203
204 *level = PG_LEVEL_1G;
205 if (pud_large(*pud) || !pud_present(*pud))
206 return (pte_t *)pud;
207
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 pmd = pmd_offset(pud, address);
209 if (pmd_none(*pmd))
210 return NULL;
Thomas Gleixner30551bb2008-01-30 13:34:04 +0100211
212 *level = PG_LEVEL_2M;
Thomas Gleixner9a14aef2008-02-04 16:48:07 +0100213 if (pmd_large(*pmd) || !pmd_present(*pmd))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 return (pte_t *)pmd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215
Thomas Gleixner30551bb2008-01-30 13:34:04 +0100216 *level = PG_LEVEL_4K;
Ingo Molnar9df84992008-02-04 16:48:09 +0100217
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100218 return pte_offset_kernel(pmd, address);
219}
220
Ingo Molnar9df84992008-02-04 16:48:09 +0100221/*
222 * Set the new pmd in all the pgds we know about:
223 */
Ingo Molnar9a3dc782008-01-30 13:33:57 +0100224static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100225{
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100226 /* change init_mm */
227 set_pte_atomic(kpte, pte);
Ingo Molnar44af6c42008-01-30 13:34:03 +0100228#ifdef CONFIG_X86_32
Ingo Molnare4b71dc2008-01-30 13:34:04 +0100229 if (!SHARED_KERNEL_PMD) {
Ingo Molnar44af6c42008-01-30 13:34:03 +0100230 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231
Jeremy Fitzhardingee3ed9102008-01-30 13:34:11 +0100232 list_for_each_entry(page, &pgd_list, lru) {
Ingo Molnar44af6c42008-01-30 13:34:03 +0100233 pgd_t *pgd;
234 pud_t *pud;
235 pmd_t *pmd;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100236
Ingo Molnar44af6c42008-01-30 13:34:03 +0100237 pgd = (pgd_t *)page_address(page) + pgd_index(address);
238 pud = pud_offset(pgd, address);
239 pmd = pmd_offset(pud, address);
240 set_pte_atomic((pte_t *)pmd, pte);
241 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 }
Ingo Molnar44af6c42008-01-30 13:34:03 +0100243#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244}
245
Ingo Molnar9df84992008-02-04 16:48:09 +0100246static int
247try_preserve_large_page(pte_t *kpte, unsigned long address,
248 struct cpa_data *cpa)
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100249{
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100250 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100251 pte_t new_pte, old_pte, *tmp;
252 pgprot_t old_prot, new_prot;
Thomas Gleixnerfac84932008-02-09 23:24:09 +0100253 int i, do_split = 1;
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100254 unsigned int level;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100255
256 spin_lock_irqsave(&pgd_lock, flags);
257 /*
258 * Check for races, another CPU might have split this page
259 * up already:
260 */
261 tmp = lookup_address(address, &level);
262 if (tmp != kpte)
263 goto out_unlock;
264
265 switch (level) {
266 case PG_LEVEL_2M:
Andi Kleen31422c52008-02-04 16:48:08 +0100267 psize = PMD_PAGE_SIZE;
268 pmask = PMD_PAGE_MASK;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100269 break;
Andi Kleenf07333f2008-02-04 16:48:09 +0100270#ifdef CONFIG_X86_64
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100271 case PG_LEVEL_1G:
Andi Kleen5d3c8b22008-02-13 16:20:35 +0100272 psize = PUD_PAGE_SIZE;
273 pmask = PUD_PAGE_MASK;
Andi Kleenf07333f2008-02-04 16:48:09 +0100274 break;
275#endif
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100276 default:
Ingo Molnarbeaff632008-02-04 16:48:09 +0100277 do_split = -EINVAL;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100278 goto out_unlock;
279 }
280
281 /*
282 * Calculate the number of pages, which fit into this large
283 * page starting at address:
284 */
285 nextpage_addr = (address + psize) & pmask;
286 numpages = (nextpage_addr - address) >> PAGE_SHIFT;
287 if (numpages < cpa->numpages)
288 cpa->numpages = numpages;
289
290 /*
291 * We are safe now. Check whether the new pgprot is the same:
292 */
293 old_pte = *kpte;
294 old_prot = new_prot = pte_pgprot(old_pte);
295
296 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
297 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100298
299 /*
300 * old_pte points to the large page base address. So we need
301 * to add the offset of the virtual address:
302 */
303 pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT);
304 cpa->pfn = pfn;
305
306 new_prot = static_protections(new_prot, address, pfn);
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100307
308 /*
Thomas Gleixnerfac84932008-02-09 23:24:09 +0100309 * We need to check the full range, whether
310 * static_protection() requires a different pgprot for one of
311 * the pages in the range we try to preserve:
312 */
313 addr = address + PAGE_SIZE;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100314 pfn++;
315 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) {
316 pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
Thomas Gleixnerfac84932008-02-09 23:24:09 +0100317
318 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
319 goto out_unlock;
320 }
321
322 /*
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100323 * If there are no changes, return. maxpages has been updated
324 * above:
325 */
326 if (pgprot_val(new_prot) == pgprot_val(old_prot)) {
Ingo Molnarbeaff632008-02-04 16:48:09 +0100327 do_split = 0;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100328 goto out_unlock;
329 }
330
331 /*
332 * We need to change the attributes. Check, whether we can
333 * change the large page in one go. We request a split, when
334 * the address is not aligned and the number of pages is
335 * smaller than the number of pages in the large page. Note
336 * that we limited the number of possible pages already to
337 * the number of pages in the large page.
338 */
339 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) {
340 /*
341 * The address is aligned and the number of pages
342 * covers the full page.
343 */
344 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
345 __set_pmd_pte(kpte, address, new_pte);
346 cpa->flushtlb = 1;
Ingo Molnarbeaff632008-02-04 16:48:09 +0100347 do_split = 0;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100348 }
349
350out_unlock:
351 spin_unlock_irqrestore(&pgd_lock, flags);
Ingo Molnar9df84992008-02-04 16:48:09 +0100352
Ingo Molnarbeaff632008-02-04 16:48:09 +0100353 return do_split;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100354}
355
Thomas Gleixner76ebd052008-02-09 23:24:09 +0100356static LIST_HEAD(page_pool);
357static unsigned long pool_size, pool_pages, pool_low;
358static unsigned long pool_used, pool_failed, pool_refill;
359
360static void cpa_fill_pool(void)
361{
362 struct page *p;
363 gfp_t gfp = GFP_KERNEL;
364
365 /* Do not allocate from interrupt context */
366 if (in_irq() || irqs_disabled())
367 return;
368 /*
369 * Check unlocked. I does not matter when we have one more
370 * page in the pool. The bit lock avoids recursive pool
371 * allocations:
372 */
373 if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
374 return;
375
376#ifdef CONFIG_DEBUG_PAGEALLOC
377 /*
378 * We could do:
379 * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
380 * but this fails on !PREEMPT kernels
381 */
382 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
383#endif
384
385 while (pool_pages < pool_size) {
386 p = alloc_pages(gfp, 0);
387 if (!p) {
388 pool_failed++;
389 break;
390 }
391 spin_lock_irq(&pgd_lock);
392 list_add(&p->lru, &page_pool);
393 pool_pages++;
394 spin_unlock_irq(&pgd_lock);
395 }
396 clear_bit_unlock(0, &pool_refill);
397}
398
399#define SHIFT_MB (20 - PAGE_SHIFT)
400#define ROUND_MB_GB ((1 << 10) - 1)
401#define SHIFT_MB_GB 10
402#define POOL_PAGES_PER_GB 16
403
404void __init cpa_init(void)
405{
406 struct sysinfo si;
407 unsigned long gb;
408
409 si_meminfo(&si);
410 /*
411 * Calculate the number of pool pages:
412 *
413 * Convert totalram (nr of pages) to MiB and round to the next
414 * GiB. Shift MiB to Gib and multiply the result by
415 * POOL_PAGES_PER_GB:
416 */
417 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
418 pool_size = POOL_PAGES_PER_GB * gb;
419 pool_low = pool_size;
420
421 cpa_fill_pool();
422 printk(KERN_DEBUG
423 "CPA: page pool initialized %lu of %lu pages preallocated\n",
424 pool_pages, pool_size);
425}
426
Ingo Molnar7afe15b2008-01-30 13:33:57 +0100427static int split_large_page(pte_t *kpte, unsigned long address)
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100428{
Thomas Gleixner7b610ee2008-02-04 16:48:10 +0100429 unsigned long flags, pfn, pfninc = 1;
Ingo Molnar86f03982008-01-30 13:34:09 +0100430 unsigned int i, level;
Ingo Molnar9df84992008-02-04 16:48:09 +0100431 pte_t *pbase, *tmp;
432 pgprot_t ref_prot;
433 struct page *base;
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100434
Thomas Gleixnereb5b5f02008-02-09 23:24:09 +0100435 /*
436 * Get a page from the pool. The pool list is protected by the
437 * pgd_lock, which we have to take anyway for the split
438 * operation:
439 */
Ingo Molnar9a3dc782008-01-30 13:33:57 +0100440 spin_lock_irqsave(&pgd_lock, flags);
Thomas Gleixnereb5b5f02008-02-09 23:24:09 +0100441 if (list_empty(&page_pool)) {
442 spin_unlock_irqrestore(&pgd_lock, flags);
443 return -ENOMEM;
444 }
445
446 base = list_first_entry(&page_pool, struct page, lru);
447 list_del(&base->lru);
448 pool_pages--;
449
450 if (pool_pages < pool_low)
451 pool_low = pool_pages;
452
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100453 /*
454 * Check for races, another CPU might have split this page
455 * up for us already:
456 */
457 tmp = lookup_address(address, &level);
Ingo Molnar6ce9fc12008-02-04 16:48:08 +0100458 if (tmp != kpte)
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100459 goto out_unlock;
460
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100461 pbase = (pte_t *)page_address(base);
Ingo Molnar44af6c42008-01-30 13:34:03 +0100462#ifdef CONFIG_X86_32
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100463 paravirt_alloc_pt(&init_mm, page_to_pfn(base));
Ingo Molnar44af6c42008-01-30 13:34:03 +0100464#endif
Thomas Gleixner07cf89c2008-02-04 16:48:08 +0100465 ref_prot = pte_pgprot(pte_clrhuge(*kpte));
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100466
Andi Kleenf07333f2008-02-04 16:48:09 +0100467#ifdef CONFIG_X86_64
468 if (level == PG_LEVEL_1G) {
469 pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
470 pgprot_val(ref_prot) |= _PAGE_PSE;
Andi Kleenf07333f2008-02-04 16:48:09 +0100471 }
472#endif
473
Thomas Gleixner63c1dcf2008-02-04 16:48:05 +0100474 /*
475 * Get the target pfn from the original entry:
476 */
477 pfn = pte_pfn(*kpte);
Andi Kleenf07333f2008-02-04 16:48:09 +0100478 for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
Thomas Gleixner63c1dcf2008-02-04 16:48:05 +0100479 set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100480
481 /*
Thomas Gleixner07cf89c2008-02-04 16:48:08 +0100482 * Install the new, split up pagetable. Important details here:
Huang, Ying4c881ca2008-01-30 13:34:04 +0100483 *
484 * On Intel the NX bit of all levels must be cleared to make a
485 * page executable. See section 4.13.2 of Intel 64 and IA-32
486 * Architectures Software Developer's Manual).
Thomas Gleixner07cf89c2008-02-04 16:48:08 +0100487 *
488 * Mark the entry present. The current mapping might be
489 * set to not present, which we preserved above.
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100490 */
Huang, Ying4c881ca2008-01-30 13:34:04 +0100491 ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
Thomas Gleixner07cf89c2008-02-04 16:48:08 +0100492 pgprot_val(ref_prot) |= _PAGE_PRESENT;
Ingo Molnar9a3dc782008-01-30 13:33:57 +0100493 __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100494 base = NULL;
495
496out_unlock:
Thomas Gleixnereb5b5f02008-02-09 23:24:09 +0100497 /*
498 * If we dropped out via the lookup_address check under
499 * pgd_lock then stick the page back into the pool:
500 */
501 if (base) {
502 list_add(&base->lru, &page_pool);
503 pool_pages++;
504 } else
505 pool_used++;
Ingo Molnar9a3dc782008-01-30 13:33:57 +0100506 spin_unlock_irqrestore(&pgd_lock, flags);
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100507
Ingo Molnarbb5c2db2008-01-30 13:33:56 +0100508 return 0;
509}
510
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100511static int __change_page_attr(struct cpa_data *cpa, int primary)
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100512{
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100513 unsigned long address = cpa->vaddr;
Harvey Harrisonda7bfc52008-02-09 23:24:08 +0100514 int do_split, err;
515 unsigned int level;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 struct page *kpte_page;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100517 pte_t *kpte, old_pte;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
Ingo Molnar97f99fe2008-01-30 13:33:55 +0100519repeat:
Ingo Molnarf0646e42008-01-30 13:33:43 +0100520 kpte = lookup_address(address, &level);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 if (!kpte)
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100522 return primary ? -EINVAL : 0;
523
524 old_pte = *kpte;
525 if (!pte_val(old_pte)) {
526 if (!primary)
527 return 0;
528 printk(KERN_WARNING "CPA: called for zero pte. "
529 "vaddr = %lx cpa->vaddr = %lx\n", address,
530 cpa->vaddr);
531 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 return -EINVAL;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100533 }
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100534
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 kpte_page = virt_to_page(kpte);
Andi Kleen65d2f0b2007-07-21 17:09:51 +0200536 BUG_ON(PageLRU(kpte_page));
537 BUG_ON(PageCompound(kpte_page));
538
Thomas Gleixner30551bb2008-01-30 13:34:04 +0100539 if (level == PG_LEVEL_4K) {
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100540 pte_t new_pte;
Arjan van de Ven626c2c92008-02-04 16:48:05 +0100541 pgprot_t new_prot = pte_pgprot(old_pte);
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100542 unsigned long pfn = pte_pfn(old_pte);
Thomas Gleixnera72a08a2008-01-30 13:34:07 +0100543
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100544 pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
545 pgprot_val(new_prot) |= pgprot_val(cpa->mask_set);
Ingo Molnar86f03982008-01-30 13:34:09 +0100546
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100547 new_prot = static_protections(new_prot, address, pfn);
Ingo Molnar86f03982008-01-30 13:34:09 +0100548
Arjan van de Ven626c2c92008-02-04 16:48:05 +0100549 /*
550 * We need to keep the pfn from the existing PTE,
551 * after all we're only going to change it's attributes
552 * not the memory it points to
553 */
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100554 new_pte = pfn_pte(pfn, canon_pgprot(new_prot));
555 cpa->pfn = pfn;
Thomas Gleixnerf4ae5da2008-02-04 16:48:07 +0100556 /*
557 * Do we really change anything ?
558 */
559 if (pte_val(old_pte) != pte_val(new_pte)) {
560 set_pte_atomic(kpte, new_pte);
561 cpa->flushtlb = 1;
562 }
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100563 cpa->numpages = 1;
564 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 }
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100566
567 /*
568 * Check, whether we can keep the large page intact
569 * and just change the pte:
570 */
Ingo Molnarbeaff632008-02-04 16:48:09 +0100571 do_split = try_preserve_large_page(kpte, address, cpa);
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100572 /*
573 * When the range fits into the existing large page,
574 * return. cp->numpages and cpa->tlbflush have been updated in
575 * try_large_page:
576 */
Ingo Molnar87f7f8f2008-02-04 16:48:10 +0100577 if (do_split <= 0)
578 return do_split;
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100579
580 /*
581 * We have to split the large page:
582 */
Ingo Molnar87f7f8f2008-02-04 16:48:10 +0100583 err = split_large_page(kpte, address);
584 if (!err) {
585 cpa->flushtlb = 1;
586 goto repeat;
587 }
Ingo Molnarbeaff632008-02-04 16:48:09 +0100588
Ingo Molnar87f7f8f2008-02-04 16:48:10 +0100589 return err;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100590}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100592static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
593
594static int cpa_process_alias(struct cpa_data *cpa)
Ingo Molnar44af6c42008-01-30 13:34:03 +0100595{
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100596 struct cpa_data alias_cpa;
597 int ret;
598
599 if (cpa->pfn > max_pfn_mapped)
600 return 0;
601
602 alias_cpa = *cpa;
603 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
604
605 ret = __change_page_attr_set_clr(&alias_cpa, 0);
Ingo Molnar44af6c42008-01-30 13:34:03 +0100606
Arjan van de Ven488fd992008-01-30 13:34:07 +0100607#ifdef CONFIG_X86_64
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100608 if (ret)
609 return ret;
Thomas Gleixner08797502008-01-30 13:34:09 +0100610 /*
611 * If the physical address is inside the kernel map, we need
612 * to touch the high mapped kernel as well:
613 */
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100614 if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
615 return 0;
Thomas Gleixner08797502008-01-30 13:34:09 +0100616
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100617 alias_cpa = *cpa;
618 alias_cpa.vaddr =
619 (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
620
621 /*
622 * The high mapping range is imprecise, so ignore the return value.
623 */
624 __change_page_attr_set_clr(&alias_cpa, 0);
Thomas Gleixner08797502008-01-30 13:34:09 +0100625#endif
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100626 return ret;
Ingo Molnar44af6c42008-01-30 13:34:03 +0100627}
628
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100629static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
Thomas Gleixnerff314522008-01-30 13:34:08 +0100630{
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100631 int ret, numpages = cpa->numpages;
Thomas Gleixnerff314522008-01-30 13:34:08 +0100632
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100633 while (numpages) {
634 /*
635 * Store the remaining nr of pages for the large page
636 * preservation check.
637 */
638 cpa->numpages = numpages;
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100639
640 ret = __change_page_attr(cpa, checkalias);
Thomas Gleixnerff314522008-01-30 13:34:08 +0100641 if (ret)
642 return ret;
Thomas Gleixnerff314522008-01-30 13:34:08 +0100643
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100644 if (checkalias) {
645 ret = cpa_process_alias(cpa);
646 if (ret)
647 return ret;
648 }
649
Thomas Gleixner65e074d2008-02-04 16:48:07 +0100650 /*
651 * Adjust the number of pages with the result of the
652 * CPA operation. Either a large page has been
653 * preserved or a single page update happened.
654 */
655 BUG_ON(cpa->numpages > numpages);
656 numpages -= cpa->numpages;
657 cpa->vaddr += cpa->numpages * PAGE_SIZE;
658 }
Thomas Gleixnerff314522008-01-30 13:34:08 +0100659 return 0;
660}
661
Andi Kleen6bb83832008-02-04 16:48:06 +0100662static inline int cache_attr(pgprot_t attr)
663{
664 return pgprot_val(attr) &
665 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
666}
667
Thomas Gleixnerff314522008-01-30 13:34:08 +0100668static int change_page_attr_set_clr(unsigned long addr, int numpages,
669 pgprot_t mask_set, pgprot_t mask_clr)
670{
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100671 struct cpa_data cpa;
Andi Kleen6bb83832008-02-04 16:48:06 +0100672 int ret, cache;
Thomas Gleixner331e4062008-02-04 16:48:06 +0100673
674 /*
675 * Check, if we are requested to change a not supported
676 * feature:
677 */
678 mask_set = canon_pgprot(mask_set);
679 mask_clr = canon_pgprot(mask_clr);
680 if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
681 return 0;
682
Thomas Gleixner69b14152008-02-13 11:04:50 +0100683 /* Ensure we are PAGE_SIZE aligned */
684 if (addr & ~PAGE_MASK) {
685 addr &= PAGE_MASK;
686 /*
687 * People should not be passing in unaligned addresses:
688 */
689 WARN_ON_ONCE(1);
690 }
691
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100692 cpa.vaddr = addr;
693 cpa.numpages = numpages;
694 cpa.mask_set = mask_set;
695 cpa.mask_clr = mask_clr;
Thomas Gleixnerf4ae5da2008-02-04 16:48:07 +0100696 cpa.flushtlb = 0;
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100697
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100698 ret = __change_page_attr_set_clr(&cpa, 1);
Thomas Gleixnerff314522008-01-30 13:34:08 +0100699
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100700 /*
Thomas Gleixnerf4ae5da2008-02-04 16:48:07 +0100701 * Check whether we really changed something:
702 */
703 if (!cpa.flushtlb)
Thomas Gleixner76ebd052008-02-09 23:24:09 +0100704 goto out;
Thomas Gleixnerf4ae5da2008-02-04 16:48:07 +0100705
706 /*
Andi Kleen6bb83832008-02-04 16:48:06 +0100707 * No need to flush, when we did not set any of the caching
708 * attributes:
709 */
710 cache = cache_attr(mask_set);
711
712 /*
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100713 * On success we use clflush, when the CPU supports it to
714 * avoid the wbindv. If the CPU does not support it and in the
Thomas Gleixneraf1e6842008-01-30 13:34:08 +0100715 * error case we fall back to cpa_flush_all (which uses
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100716 * wbindv):
717 */
718 if (!ret && cpu_has_clflush)
Andi Kleen6bb83832008-02-04 16:48:06 +0100719 cpa_flush_range(addr, numpages, cache);
Thomas Gleixner57a6a462008-01-30 13:34:08 +0100720 else
Andi Kleen6bb83832008-02-04 16:48:06 +0100721 cpa_flush_all(cache);
Thomas Gleixnerff314522008-01-30 13:34:08 +0100722
Thomas Gleixner76ebd052008-02-09 23:24:09 +0100723out:
724 cpa_fill_pool();
Thomas Gleixnerff314522008-01-30 13:34:08 +0100725 return ret;
726}
727
Thomas Gleixner56744542008-01-30 13:34:08 +0100728static inline int change_page_attr_set(unsigned long addr, int numpages,
729 pgprot_t mask)
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100730{
Thomas Gleixner56744542008-01-30 13:34:08 +0100731 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100732}
733
Thomas Gleixner56744542008-01-30 13:34:08 +0100734static inline int change_page_attr_clear(unsigned long addr, int numpages,
735 pgprot_t mask)
Thomas Gleixner72932c72008-01-30 13:34:08 +0100736{
Huang, Ying58270402008-01-31 22:05:43 +0100737 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
Thomas Gleixner72932c72008-01-30 13:34:08 +0100738}
739
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100740int set_memory_uc(unsigned long addr, int numpages)
741{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100742 return change_page_attr_set(addr, numpages,
743 __pgprot(_PAGE_PCD | _PAGE_PWT));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100744}
745EXPORT_SYMBOL(set_memory_uc);
746
747int set_memory_wb(unsigned long addr, int numpages)
748{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100749 return change_page_attr_clear(addr, numpages,
750 __pgprot(_PAGE_PCD | _PAGE_PWT));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100751}
752EXPORT_SYMBOL(set_memory_wb);
753
754int set_memory_x(unsigned long addr, int numpages)
755{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100756 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100757}
758EXPORT_SYMBOL(set_memory_x);
759
760int set_memory_nx(unsigned long addr, int numpages)
761{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100762 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100763}
764EXPORT_SYMBOL(set_memory_nx);
765
766int set_memory_ro(unsigned long addr, int numpages)
767{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100768 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100769}
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100770
771int set_memory_rw(unsigned long addr, int numpages)
772{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100773 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100774}
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100775
776int set_memory_np(unsigned long addr, int numpages)
777{
Thomas Gleixner72932c72008-01-30 13:34:08 +0100778 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100779}
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100780
781int set_pages_uc(struct page *page, int numpages)
782{
783 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100784
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100785 return set_memory_uc(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100786}
787EXPORT_SYMBOL(set_pages_uc);
788
789int set_pages_wb(struct page *page, int numpages)
790{
791 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100792
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100793 return set_memory_wb(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100794}
795EXPORT_SYMBOL(set_pages_wb);
796
797int set_pages_x(struct page *page, int numpages)
798{
799 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100800
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100801 return set_memory_x(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100802}
803EXPORT_SYMBOL(set_pages_x);
804
805int set_pages_nx(struct page *page, int numpages)
806{
807 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100808
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100809 return set_memory_nx(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100810}
811EXPORT_SYMBOL(set_pages_nx);
812
813int set_pages_ro(struct page *page, int numpages)
814{
815 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100816
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100817 return set_memory_ro(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100818}
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100819
820int set_pages_rw(struct page *page, int numpages)
821{
822 unsigned long addr = (unsigned long)page_address(page);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100823
Thomas Gleixnerd7c8f212008-01-30 13:34:07 +0100824 return set_memory_rw(addr, numpages);
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100825}
Arjan van de Ven75cbade2008-01-30 13:34:06 +0100826
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827#ifdef CONFIG_DEBUG_PAGEALLOC
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100828
829static int __set_pages_p(struct page *page, int numpages)
830{
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100831 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
832 .numpages = numpages,
833 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
834 .mask_clr = __pgprot(0)};
Thomas Gleixner72932c72008-01-30 13:34:08 +0100835
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100836 return __change_page_attr_set_clr(&cpa, 1);
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100837}
838
839static int __set_pages_np(struct page *page, int numpages)
840{
Thomas Gleixner72e458d2008-02-04 16:48:07 +0100841 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
842 .numpages = numpages,
843 .mask_set = __pgprot(0),
844 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
Thomas Gleixner72932c72008-01-30 13:34:08 +0100845
Thomas Gleixnerc31c7d42008-02-18 20:54:14 +0100846 return __change_page_attr_set_clr(&cpa, 1);
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100847}
848
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849void kernel_map_pages(struct page *page, int numpages, int enable)
850{
851 if (PageHighMem(page))
852 return;
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100853 if (!enable) {
Ingo Molnarf9b84042006-06-27 02:54:49 -0700854 debug_check_no_locks_freed(page_address(page),
855 numpages * PAGE_SIZE);
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100856 }
Ingo Molnarde5097c2006-01-09 15:59:21 -0800857
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100858 /*
Ingo Molnar12d6f212008-01-30 13:33:58 +0100859 * If page allocator is not up yet then do not call c_p_a():
860 */
861 if (!debug_pagealloc_enabled)
862 return;
863
864 /*
Ingo Molnarf8d84062008-02-13 14:09:53 +0100865 * The return value is ignored as the calls cannot fail.
866 * Large pages are kept enabled at boot time, and are
867 * split up quickly with DEBUG_PAGEALLOC. If a splitup
868 * fails here (due to temporary memory shortage) no damage
869 * is done because we just keep the largepage intact up
870 * to the next attempt when it will likely be split up:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871 */
Ingo Molnarf62d0f02008-01-30 13:34:07 +0100872 if (enable)
873 __set_pages_p(page, numpages);
874 else
875 __set_pages_np(page, numpages);
Ingo Molnar9f4c8152008-01-30 13:33:41 +0100876
877 /*
Ingo Molnare4b71dc2008-01-30 13:34:04 +0100878 * We should perform an IPI and flush all tlbs,
879 * but that can deadlock->flush only current cpu:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 */
881 __flush_tlb_all();
Thomas Gleixner76ebd052008-02-09 23:24:09 +0100882
883 /*
884 * Try to refill the page pool here. We can do this only after
885 * the tlb flush.
886 */
887 cpa_fill_pool();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888}
889#endif
Arjan van de Vend1028a12008-01-30 13:34:07 +0100890
891/*
892 * The testcases use internal knowledge of the implementation that shouldn't
893 * be exposed to the rest of the kernel. Include these directly here.
894 */
895#ifdef CONFIG_CPA_DEBUG
896#include "pageattr-test.c"
897#endif