blob: 5337ca0ea7b9733b0ac91c6a323362079ba88f4b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */
8
9#include <linux/config.h>
10#include <linux/signal.h>
11#include <linux/sched.h>
12#include <linux/kernel.h>
13#include <linux/errno.h>
14#include <linux/string.h>
15#include <linux/types.h>
16#include <linux/ptrace.h>
17#include <linux/mman.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/smp.h>
21#include <linux/init.h>
22#include <linux/pagemap.h>
23#include <linux/bootmem.h>
24#include <linux/proc_fs.h>
Andi Kleen59170892005-11-05 17:25:53 +010025#include <linux/pci.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
27#include <asm/processor.h>
28#include <asm/system.h>
29#include <asm/uaccess.h>
30#include <asm/pgtable.h>
31#include <asm/pgalloc.h>
32#include <asm/dma.h>
33#include <asm/fixmap.h>
34#include <asm/e820.h>
35#include <asm/apic.h>
36#include <asm/tlb.h>
37#include <asm/mmu_context.h>
38#include <asm/proto.h>
39#include <asm/smp.h>
Andi Kleen2bc04142005-11-05 17:25:53 +010040#include <asm/sections.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041
42#ifndef Dprintk
43#define Dprintk(x...)
44#endif
45
Andi Kleene18c6872005-11-05 17:25:53 +010046static unsigned long dma_reserve __initdata;
47
Linus Torvalds1da177e2005-04-16 15:20:36 -070048DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
49
50/*
51 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
52 * physical space so we can cache the place of the first one and move
53 * around without checking the pgd every time.
54 */
55
56void show_mem(void)
57{
Andi Kleene92343c2005-09-12 18:49:24 +020058 long i, total = 0, reserved = 0;
59 long shared = 0, cached = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070060 pg_data_t *pgdat;
61 struct page *page;
62
Andi Kleene92343c2005-09-12 18:49:24 +020063 printk(KERN_INFO "Mem-info:\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -070064 show_free_areas();
Andi Kleene92343c2005-09-12 18:49:24 +020065 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
Linus Torvalds1da177e2005-04-16 15:20:36 -070066
67 for_each_pgdat(pgdat) {
68 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
69 page = pfn_to_page(pgdat->node_start_pfn + i);
70 total++;
Andi Kleene92343c2005-09-12 18:49:24 +020071 if (PageReserved(page))
72 reserved++;
73 else if (PageSwapCache(page))
74 cached++;
75 else if (page_count(page))
76 shared += page_count(page) - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 }
78 }
Andi Kleene92343c2005-09-12 18:49:24 +020079 printk(KERN_INFO "%lu pages of RAM\n", total);
80 printk(KERN_INFO "%lu reserved pages\n",reserved);
81 printk(KERN_INFO "%lu pages shared\n",shared);
82 printk(KERN_INFO "%lu pages swap cached\n",cached);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083}
84
85/* References to section boundaries */
86
Linus Torvalds1da177e2005-04-16 15:20:36 -070087int after_bootmem;
88
89static void *spp_getpage(void)
90{
91 void *ptr;
92 if (after_bootmem)
93 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
94 else
95 ptr = alloc_bootmem_pages(PAGE_SIZE);
96 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
97 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
98
99 Dprintk("spp_getpage %p\n", ptr);
100 return ptr;
101}
102
103static void set_pte_phys(unsigned long vaddr,
104 unsigned long phys, pgprot_t prot)
105{
106 pgd_t *pgd;
107 pud_t *pud;
108 pmd_t *pmd;
109 pte_t *pte, new_pte;
110
111 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
112
113 pgd = pgd_offset_k(vaddr);
114 if (pgd_none(*pgd)) {
115 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
116 return;
117 }
118 pud = pud_offset(pgd, vaddr);
119 if (pud_none(*pud)) {
120 pmd = (pmd_t *) spp_getpage();
121 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
122 if (pmd != pmd_offset(pud, 0)) {
123 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
124 return;
125 }
126 }
127 pmd = pmd_offset(pud, vaddr);
128 if (pmd_none(*pmd)) {
129 pte = (pte_t *) spp_getpage();
130 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
131 if (pte != pte_offset_kernel(pmd, 0)) {
132 printk("PAGETABLE BUG #02!\n");
133 return;
134 }
135 }
136 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
137
138 pte = pte_offset_kernel(pmd, vaddr);
139 if (!pte_none(*pte) &&
140 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
141 pte_ERROR(*pte);
142 set_pte(pte, new_pte);
143
144 /*
145 * It's enough to flush this one mapping.
146 * (PGE mappings get flushed as well)
147 */
148 __flush_tlb_one(vaddr);
149}
150
151/* NOTE: this is meant to be run only at boot */
152void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
153{
154 unsigned long address = __fix_to_virt(idx);
155
156 if (idx >= __end_of_fixed_addresses) {
157 printk("Invalid __set_fixmap\n");
158 return;
159 }
160 set_pte_phys(address, phys, prot);
161}
162
163unsigned long __initdata table_start, table_end;
164
165extern pmd_t temp_boot_pmds[];
166
167static struct temp_map {
168 pmd_t *pmd;
169 void *address;
170 int allocated;
171} temp_mappings[] __initdata = {
172 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
173 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
174 {}
175};
176
177static __init void *alloc_low_page(int *index, unsigned long *phys)
178{
179 struct temp_map *ti;
180 int i;
181 unsigned long pfn = table_end++, paddr;
182 void *adr;
183
184 if (pfn >= end_pfn)
185 panic("alloc_low_page: ran out of memory");
186 for (i = 0; temp_mappings[i].allocated; i++) {
187 if (!temp_mappings[i].pmd)
188 panic("alloc_low_page: ran out of temp mappings");
189 }
190 ti = &temp_mappings[i];
191 paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
192 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
193 ti->allocated = 1;
194 __flush_tlb();
195 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
196 *index = i;
197 *phys = pfn * PAGE_SIZE;
198 return adr;
199}
200
201static __init void unmap_low_page(int i)
202{
203 struct temp_map *ti = &temp_mappings[i];
204 set_pmd(ti->pmd, __pmd(0));
205 ti->allocated = 0;
206}
207
208static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
209{
210 long i, j;
211
212 i = pud_index(address);
213 pud = pud + i;
214 for (; i < PTRS_PER_PUD; pud++, i++) {
215 int map;
216 unsigned long paddr, pmd_phys;
217 pmd_t *pmd;
218
219 paddr = address + i*PUD_SIZE;
220 if (paddr >= end) {
221 for (; i < PTRS_PER_PUD; i++, pud++)
222 set_pud(pud, __pud(0));
223 break;
224 }
225
226 if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) {
227 set_pud(pud, __pud(0));
228 continue;
229 }
230
231 pmd = alloc_low_page(&map, &pmd_phys);
232 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
233 for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
234 unsigned long pe;
235
236 if (paddr >= end) {
237 for (; j < PTRS_PER_PMD; j++, pmd++)
238 set_pmd(pmd, __pmd(0));
239 break;
240 }
241 pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
242 pe &= __supported_pte_mask;
243 set_pmd(pmd, __pmd(pe));
244 }
245 unmap_low_page(map);
246 }
247 __flush_tlb();
248}
249
250static void __init find_early_table_space(unsigned long end)
251{
252 unsigned long puds, pmds, tables;
253
254 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
255 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
256 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
257 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
258
259 table_start = find_e820_area(0x8000, __pa_symbol(&_text), tables);
260 if (table_start == -1UL)
261 panic("Cannot find space for the kernel page tables");
262
263 table_start >>= PAGE_SHIFT;
264 table_end = table_start;
265}
266
267/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
268 This runs before bootmem is initialized and gets pages directly from the
269 physical memory. To access them they are temporarily mapped. */
270void __init init_memory_mapping(unsigned long start, unsigned long end)
271{
272 unsigned long next;
273
274 Dprintk("init_memory_mapping\n");
275
276 /*
277 * Find space for the kernel direct mapping tables.
278 * Later we should allocate these tables in the local node of the memory
279 * mapped. Unfortunately this is done currently before the nodes are
280 * discovered.
281 */
282 find_early_table_space(end);
283
284 start = (unsigned long)__va(start);
285 end = (unsigned long)__va(end);
286
287 for (; start < end; start = next) {
288 int map;
289 unsigned long pud_phys;
290 pud_t *pud = alloc_low_page(&map, &pud_phys);
291 next = start + PGDIR_SIZE;
292 if (next > end)
293 next = end;
294 phys_pud_init(pud, __pa(start), __pa(next));
295 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
296 unmap_low_page(map);
297 }
298
299 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
300 __flush_tlb_all();
301 early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
302 table_start<<PAGE_SHIFT,
303 table_end<<PAGE_SHIFT);
304}
305
Siddha, Suresh Bf6c2e332005-11-05 17:25:53 +0100306void __cpuinit zap_low_mappings(int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307{
Siddha, Suresh Bf6c2e332005-11-05 17:25:53 +0100308 if (cpu == 0) {
309 pgd_t *pgd = pgd_offset_k(0UL);
310 pgd_clear(pgd);
311 } else {
312 /*
313 * For AP's, zap the low identity mappings by changing the cr3
314 * to init_level4_pgt and doing local flush tlb all
315 */
316 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
317 }
318 __flush_tlb_all();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319}
320
Andi Kleena2f1b422005-11-05 17:25:53 +0100321/* Compute zone sizes for the DMA and DMA32 zones in a node. */
322__init void
323size_zones(unsigned long *z, unsigned long *h,
324 unsigned long start_pfn, unsigned long end_pfn)
325{
326 int i;
327 unsigned long w;
328
329 for (i = 0; i < MAX_NR_ZONES; i++)
330 z[i] = 0;
331
332 if (start_pfn < MAX_DMA_PFN)
333 z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
334 if (start_pfn < MAX_DMA32_PFN) {
335 unsigned long dma32_pfn = MAX_DMA32_PFN;
336 if (dma32_pfn > end_pfn)
337 dma32_pfn = end_pfn;
338 z[ZONE_DMA32] = dma32_pfn - start_pfn;
339 }
340 z[ZONE_NORMAL] = end_pfn - start_pfn;
341
342 /* Remove lower zones from higher ones. */
343 w = 0;
344 for (i = 0; i < MAX_NR_ZONES; i++) {
345 if (z[i])
346 z[i] -= w;
347 w += z[i];
348 }
349
350 /* Compute holes */
Ravikiran G Thirumalai576fc092005-12-29 13:06:11 +0100351 w = start_pfn;
Andi Kleena2f1b422005-11-05 17:25:53 +0100352 for (i = 0; i < MAX_NR_ZONES; i++) {
353 unsigned long s = w;
354 w += z[i];
355 h[i] = e820_hole_size(s, w);
356 }
Andi Kleene18c6872005-11-05 17:25:53 +0100357
358 /* Add the space pace needed for mem_map to the holes too. */
359 for (i = 0; i < MAX_NR_ZONES; i++)
360 h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
361
362 /* The 16MB DMA zone has the kernel and other misc mappings.
363 Account them too */
364 if (h[ZONE_DMA]) {
365 h[ZONE_DMA] += dma_reserve;
366 if (h[ZONE_DMA] >= z[ZONE_DMA]) {
367 printk(KERN_WARNING
368 "Kernel too large and filling up ZONE_DMA?\n");
369 h[ZONE_DMA] = z[ZONE_DMA];
370 }
371 }
Andi Kleena2f1b422005-11-05 17:25:53 +0100372}
373
Matt Tolentino2b976902005-06-23 00:08:06 -0700374#ifndef CONFIG_NUMA
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375void __init paging_init(void)
376{
Andi Kleena2f1b422005-11-05 17:25:53 +0100377 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
378 size_zones(zones, holes, 0, end_pfn);
379 free_area_init_node(0, NODE_DATA(0), zones,
380 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381}
382#endif
383
384/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
385 from the CPU leading to inconsistent cache lines. address and size
386 must be aligned to 2MB boundaries.
387 Does nothing when the mapping doesn't exist. */
388void __init clear_kernel_mapping(unsigned long address, unsigned long size)
389{
390 unsigned long end = address + size;
391
392 BUG_ON(address & ~LARGE_PAGE_MASK);
393 BUG_ON(size & ~LARGE_PAGE_MASK);
394
395 for (; address < end; address += LARGE_PAGE_SIZE) {
396 pgd_t *pgd = pgd_offset_k(address);
397 pud_t *pud;
398 pmd_t *pmd;
399 if (pgd_none(*pgd))
400 continue;
401 pud = pud_offset(pgd, address);
402 if (pud_none(*pud))
403 continue;
404 pmd = pmd_offset(pud, address);
405 if (!pmd || pmd_none(*pmd))
406 continue;
407 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
408 /* Could handle this, but it should not happen currently. */
409 printk(KERN_ERR
410 "clear_kernel_mapping: mapping has been split. will leak memory\n");
411 pmd_ERROR(*pmd);
412 }
413 set_pmd(pmd, __pmd(0));
414 }
415 __flush_tlb_all();
416}
417
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
419 kcore_vsyscall;
420
421void __init mem_init(void)
422{
Andi Kleen0a43e4b2005-09-12 18:49:24 +0200423 long codesize, reservedpages, datasize, initsize;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424
425#ifdef CONFIG_SWIOTLB
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 if (!iommu_aperture &&
Andi Kleenca8642f2006-01-11 22:44:27 +0100427 ((end_pfn-1) >= 0xffffffff>>PAGE_SHIFT || force_iommu))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 swiotlb = 1;
429 if (swiotlb)
430 swiotlb_init();
431#endif
432
433 /* How many end-of-memory variables you have, grandma! */
434 max_low_pfn = end_pfn;
435 max_pfn = end_pfn;
436 num_physpages = end_pfn;
437 high_memory = (void *) __va(end_pfn * PAGE_SIZE);
438
439 /* clear the zero-page */
440 memset(empty_zero_page, 0, PAGE_SIZE);
441
442 reservedpages = 0;
443
444 /* this will put all low memory onto the freelists */
Matt Tolentino2b976902005-06-23 00:08:06 -0700445#ifdef CONFIG_NUMA
Andi Kleen0a43e4b2005-09-12 18:49:24 +0200446 totalram_pages = numa_free_all_bootmem();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447#else
Andi Kleen0a43e4b2005-09-12 18:49:24 +0200448 totalram_pages = free_all_bootmem();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449#endif
Andi Kleen0a43e4b2005-09-12 18:49:24 +0200450 reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451
452 after_bootmem = 1;
453
454 codesize = (unsigned long) &_etext - (unsigned long) &_text;
455 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
456 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
457
458 /* Register memory areas for /proc/kcore */
459 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
460 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
461 VMALLOC_END-VMALLOC_START);
462 kclist_add(&kcore_kernel, &_stext, _end - _stext);
463 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
464 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
465 VSYSCALL_END - VSYSCALL_START);
466
Andi Kleen0a43e4b2005-09-12 18:49:24 +0200467 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
469 end_pfn << (PAGE_SHIFT-10),
470 codesize >> 10,
471 reservedpages << (PAGE_SHIFT-10),
472 datasize >> 10,
473 initsize >> 10);
474
Siddha, Suresh Bf6c2e332005-11-05 17:25:53 +0100475#ifdef CONFIG_SMP
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 /*
Siddha, Suresh Bf6c2e332005-11-05 17:25:53 +0100477 * Sync boot_level4_pgt mappings with the init_level4_pgt
478 * except for the low identity mappings which are already zapped
479 * in init_level4_pgt. This sync-up is essential for AP's bringup
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 */
Siddha, Suresh Bf6c2e332005-11-05 17:25:53 +0100481 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482#endif
483}
484
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485void free_initmem(void)
486{
487 unsigned long addr;
488
489 addr = (unsigned long)(&__init_begin);
490 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
491 ClearPageReserved(virt_to_page(addr));
492 set_page_count(virt_to_page(addr), 1);
493 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
494 free_page(addr);
495 totalram_pages++;
496 }
497 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
Andi Kleen2bc04142005-11-05 17:25:53 +0100498 printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499}
500
Arjan van de Ven67df1972006-01-06 00:12:04 -0800501#ifdef CONFIG_DEBUG_RODATA
502
503extern char __start_rodata, __end_rodata;
504void mark_rodata_ro(void)
505{
506 unsigned long addr = (unsigned long)&__start_rodata;
507
508 for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE)
509 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
510
511 printk ("Write protecting the kernel read-only data: %luk\n",
512 (&__end_rodata - &__start_rodata) >> 10);
513
514 /*
515 * change_page_attr_addr() requires a global_flush_tlb() call after it.
516 * We do this after the printk so that if something went wrong in the
517 * change, the printk gets out at least to give a better debug hint
518 * of who is the culprit.
519 */
520 global_flush_tlb();
521}
522#endif
523
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524#ifdef CONFIG_BLK_DEV_INITRD
525void free_initrd_mem(unsigned long start, unsigned long end)
526{
527 if (start < (unsigned long)&_end)
528 return;
529 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
530 for (; start < end; start += PAGE_SIZE) {
531 ClearPageReserved(virt_to_page(start));
532 set_page_count(virt_to_page(start), 1);
533 free_page(start);
534 totalram_pages++;
535 }
536}
537#endif
538
539void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
540{
541 /* Should check here against the e820 map to avoid double free */
Matt Tolentino2b976902005-06-23 00:08:06 -0700542#ifdef CONFIG_NUMA
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 int nid = phys_to_nid(phys);
544 reserve_bootmem_node(NODE_DATA(nid), phys, len);
545#else
546 reserve_bootmem(phys, len);
547#endif
Andi Kleene18c6872005-11-05 17:25:53 +0100548 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
549 dma_reserve += len / PAGE_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550}
551
552int kern_addr_valid(unsigned long addr)
553{
554 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
555 pgd_t *pgd;
556 pud_t *pud;
557 pmd_t *pmd;
558 pte_t *pte;
559
560 if (above != 0 && above != -1UL)
561 return 0;
562
563 pgd = pgd_offset_k(addr);
564 if (pgd_none(*pgd))
565 return 0;
566
567 pud = pud_offset(pgd, addr);
568 if (pud_none(*pud))
569 return 0;
570
571 pmd = pmd_offset(pud, addr);
572 if (pmd_none(*pmd))
573 return 0;
574 if (pmd_large(*pmd))
575 return pfn_valid(pmd_pfn(*pmd));
576
577 pte = pte_offset_kernel(pmd, addr);
578 if (pte_none(*pte))
579 return 0;
580 return pfn_valid(pte_pfn(*pte));
581}
582
583#ifdef CONFIG_SYSCTL
584#include <linux/sysctl.h>
585
586extern int exception_trace, page_fault_trace;
587
588static ctl_table debug_table2[] = {
589 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
590 proc_dointvec },
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 { 0, }
592};
593
594static ctl_table debug_root_table2[] = {
595 { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
596 .child = debug_table2 },
597 { 0 },
598};
599
600static __init int x8664_sysctl_init(void)
601{
602 register_sysctl_table(debug_root_table2, 1);
603 return 0;
604}
605__initcall(x8664_sysctl_init);
606#endif
607
Andi Kleen1e014412005-04-16 15:24:55 -0700608/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
609 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
610 not need special handling anymore. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611
612static struct vm_area_struct gate_vma = {
613 .vm_start = VSYSCALL_START,
614 .vm_end = VSYSCALL_END,
615 .vm_page_prot = PAGE_READONLY
616};
617
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
619{
620#ifdef CONFIG_IA32_EMULATION
Andi Kleen1e014412005-04-16 15:24:55 -0700621 if (test_tsk_thread_flag(tsk, TIF_IA32))
622 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623#endif
624 return &gate_vma;
625}
626
627int in_gate_area(struct task_struct *task, unsigned long addr)
628{
629 struct vm_area_struct *vma = get_gate_vma(task);
Andi Kleen1e014412005-04-16 15:24:55 -0700630 if (!vma)
631 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 return (addr >= vma->vm_start) && (addr < vma->vm_end);
633}
634
635/* Use this when you have no reliable task/vma, typically from interrupt
636 * context. It is less reliable than using the task's vma and may give
637 * false positives.
638 */
639int in_gate_area_no_task(unsigned long addr)
640{
Andi Kleen1e014412005-04-16 15:24:55 -0700641 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642}