blob: e5f7f1c344624126314f3aa450edfe717e989036 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86_64/mm/init.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
6 * Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
7 */
8
9#include <linux/config.h>
10#include <linux/signal.h>
11#include <linux/sched.h>
12#include <linux/kernel.h>
13#include <linux/errno.h>
14#include <linux/string.h>
15#include <linux/types.h>
16#include <linux/ptrace.h>
17#include <linux/mman.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/smp.h>
21#include <linux/init.h>
22#include <linux/pagemap.h>
23#include <linux/bootmem.h>
24#include <linux/proc_fs.h>
Andi Kleen59170892005-11-05 17:25:53 +010025#include <linux/pci.h>
Muli Ben-Yehuda17a941d2006-01-11 22:44:42 +010026#include <linux/dma-mapping.h>
Matt Tolentino44df75e2006-01-17 07:03:41 +010027#include <linux/module.h>
28#include <linux/memory_hotplug.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029
30#include <asm/processor.h>
31#include <asm/system.h>
32#include <asm/uaccess.h>
33#include <asm/pgtable.h>
34#include <asm/pgalloc.h>
35#include <asm/dma.h>
36#include <asm/fixmap.h>
37#include <asm/e820.h>
38#include <asm/apic.h>
39#include <asm/tlb.h>
40#include <asm/mmu_context.h>
41#include <asm/proto.h>
42#include <asm/smp.h>
Andi Kleen2bc04142005-11-05 17:25:53 +010043#include <asm/sections.h>
Muli Ben-Yehuda17a941d2006-01-11 22:44:42 +010044#include <asm/dma-mapping.h>
45#include <asm/swiotlb.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
47#ifndef Dprintk
48#define Dprintk(x...)
49#endif
50
Muli Ben-Yehuda17a941d2006-01-11 22:44:42 +010051struct dma_mapping_ops* dma_ops;
52EXPORT_SYMBOL(dma_ops);
53
Andi Kleene18c6872005-11-05 17:25:53 +010054static unsigned long dma_reserve __initdata;
55
Linus Torvalds1da177e2005-04-16 15:20:36 -070056DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
57
58/*
59 * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
60 * physical space so we can cache the place of the first one and move
61 * around without checking the pgd every time.
62 */
63
64void show_mem(void)
65{
Andi Kleene92343c2005-09-12 18:49:24 +020066 long i, total = 0, reserved = 0;
67 long shared = 0, cached = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 pg_data_t *pgdat;
69 struct page *page;
70
Andi Kleene92343c2005-09-12 18:49:24 +020071 printk(KERN_INFO "Mem-info:\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -070072 show_free_areas();
Andi Kleene92343c2005-09-12 18:49:24 +020073 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
Linus Torvalds1da177e2005-04-16 15:20:36 -070074
KAMEZAWA Hiroyukiec936fc2006-03-27 01:15:59 -080075 for_each_online_pgdat(pgdat) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070076 for (i = 0; i < pgdat->node_spanned_pages; ++i) {
77 page = pfn_to_page(pgdat->node_start_pfn + i);
78 total++;
Andi Kleene92343c2005-09-12 18:49:24 +020079 if (PageReserved(page))
80 reserved++;
81 else if (PageSwapCache(page))
82 cached++;
83 else if (page_count(page))
84 shared += page_count(page) - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 }
86 }
Andi Kleene92343c2005-09-12 18:49:24 +020087 printk(KERN_INFO "%lu pages of RAM\n", total);
88 printk(KERN_INFO "%lu reserved pages\n",reserved);
89 printk(KERN_INFO "%lu pages shared\n",shared);
90 printk(KERN_INFO "%lu pages swap cached\n",cached);
Linus Torvalds1da177e2005-04-16 15:20:36 -070091}
92
93/* References to section boundaries */
94
Linus Torvalds1da177e2005-04-16 15:20:36 -070095int after_bootmem;
96
Andi Kleen5f44a662006-03-25 16:30:25 +010097static __init void *spp_getpage(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -070098{
99 void *ptr;
100 if (after_bootmem)
101 ptr = (void *) get_zeroed_page(GFP_ATOMIC);
102 else
103 ptr = alloc_bootmem_pages(PAGE_SIZE);
104 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
105 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
106
107 Dprintk("spp_getpage %p\n", ptr);
108 return ptr;
109}
110
Andi Kleen5f44a662006-03-25 16:30:25 +0100111static __init void set_pte_phys(unsigned long vaddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112 unsigned long phys, pgprot_t prot)
113{
114 pgd_t *pgd;
115 pud_t *pud;
116 pmd_t *pmd;
117 pte_t *pte, new_pte;
118
119 Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
120
121 pgd = pgd_offset_k(vaddr);
122 if (pgd_none(*pgd)) {
123 printk("PGD FIXMAP MISSING, it should be setup in head.S!\n");
124 return;
125 }
126 pud = pud_offset(pgd, vaddr);
127 if (pud_none(*pud)) {
128 pmd = (pmd_t *) spp_getpage();
129 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
130 if (pmd != pmd_offset(pud, 0)) {
131 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
132 return;
133 }
134 }
135 pmd = pmd_offset(pud, vaddr);
136 if (pmd_none(*pmd)) {
137 pte = (pte_t *) spp_getpage();
138 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
139 if (pte != pte_offset_kernel(pmd, 0)) {
140 printk("PAGETABLE BUG #02!\n");
141 return;
142 }
143 }
144 new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
145
146 pte = pte_offset_kernel(pmd, vaddr);
147 if (!pte_none(*pte) &&
148 pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
149 pte_ERROR(*pte);
150 set_pte(pte, new_pte);
151
152 /*
153 * It's enough to flush this one mapping.
154 * (PGE mappings get flushed as well)
155 */
156 __flush_tlb_one(vaddr);
157}
158
159/* NOTE: this is meant to be run only at boot */
Andi Kleen5f44a662006-03-25 16:30:25 +0100160void __init
161__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162{
163 unsigned long address = __fix_to_virt(idx);
164
165 if (idx >= __end_of_fixed_addresses) {
166 printk("Invalid __set_fixmap\n");
167 return;
168 }
169 set_pte_phys(address, phys, prot);
170}
171
172unsigned long __initdata table_start, table_end;
173
174extern pmd_t temp_boot_pmds[];
175
176static struct temp_map {
177 pmd_t *pmd;
178 void *address;
179 int allocated;
180} temp_mappings[] __initdata = {
181 { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) },
182 { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) },
183 {}
184};
185
Matt Tolentino44df75e2006-01-17 07:03:41 +0100186static __meminit void *alloc_low_page(int *index, unsigned long *phys)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187{
188 struct temp_map *ti;
189 int i;
190 unsigned long pfn = table_end++, paddr;
191 void *adr;
192
Matt Tolentino44df75e2006-01-17 07:03:41 +0100193 if (after_bootmem) {
194 adr = (void *)get_zeroed_page(GFP_ATOMIC);
195 *phys = __pa(adr);
196 return adr;
197 }
198
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 if (pfn >= end_pfn)
200 panic("alloc_low_page: ran out of memory");
201 for (i = 0; temp_mappings[i].allocated; i++) {
202 if (!temp_mappings[i].pmd)
203 panic("alloc_low_page: ran out of temp mappings");
204 }
205 ti = &temp_mappings[i];
206 paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
207 set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
208 ti->allocated = 1;
209 __flush_tlb();
210 adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
Matt Tolentino44df75e2006-01-17 07:03:41 +0100211 memset(adr, 0, PAGE_SIZE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 *index = i;
213 *phys = pfn * PAGE_SIZE;
214 return adr;
215}
216
Matt Tolentino44df75e2006-01-17 07:03:41 +0100217static __meminit void unmap_low_page(int i)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218{
Matt Tolentino44df75e2006-01-17 07:03:41 +0100219 struct temp_map *ti;
220
221 if (after_bootmem)
222 return;
223
224 ti = &temp_mappings[i];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 set_pmd(ti->pmd, __pmd(0));
226 ti->allocated = 0;
227}
228
Andi Kleenf2d3efe2006-03-25 16:30:22 +0100229/* Must run before zap_low_mappings */
230__init void *early_ioremap(unsigned long addr, unsigned long size)
231{
232 unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
233
234 /* actually usually some more */
235 if (size >= LARGE_PAGE_SIZE) {
236 printk("SMBIOS area too long %lu\n", size);
237 return NULL;
238 }
239 set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
240 map += LARGE_PAGE_SIZE;
241 set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
242 __flush_tlb();
243 return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
244}
245
246/* To avoid virtual aliases later */
247__init void early_iounmap(void *addr, unsigned long size)
248{
249 if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
250 printk("early_iounmap: bad address %p\n", addr);
251 set_pmd(temp_mappings[0].pmd, __pmd(0));
252 set_pmd(temp_mappings[1].pmd, __pmd(0));
253 __flush_tlb();
254}
255
Matt Tolentino44df75e2006-01-17 07:03:41 +0100256static void __meminit
257phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
258{
259 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260
Matt Tolentino44df75e2006-01-17 07:03:41 +0100261 for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
262 unsigned long entry;
263
264 if (address > end) {
265 for (; i < PTRS_PER_PMD; i++, pmd++)
266 set_pmd(pmd, __pmd(0));
267 break;
268 }
269 entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
270 entry &= __supported_pte_mask;
271 set_pmd(pmd, __pmd(entry));
272 }
273}
274
275static void __meminit
276phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
277{
278 pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
279
280 if (pmd_none(*pmd)) {
281 spin_lock(&init_mm.page_table_lock);
282 phys_pmd_init(pmd, address, end);
283 spin_unlock(&init_mm.page_table_lock);
284 __flush_tlb_all();
285 }
286}
287
288static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
289{
290 long i = pud_index(address);
291
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 pud = pud + i;
Matt Tolentino44df75e2006-01-17 07:03:41 +0100293
294 if (after_bootmem && pud_val(*pud)) {
295 phys_pmd_update(pud, address, end);
296 return;
297 }
298
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 for (; i < PTRS_PER_PUD; pud++, i++) {
300 int map;
301 unsigned long paddr, pmd_phys;
302 pmd_t *pmd;
303
Matt Tolentino44df75e2006-01-17 07:03:41 +0100304 paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
305 if (paddr >= end)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307
Matt Tolentino44df75e2006-01-17 07:03:41 +0100308 if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 set_pud(pud, __pud(0));
310 continue;
311 }
312
313 pmd = alloc_low_page(&map, &pmd_phys);
Matt Tolentino44df75e2006-01-17 07:03:41 +0100314 spin_lock(&init_mm.page_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
Matt Tolentino44df75e2006-01-17 07:03:41 +0100316 phys_pmd_init(pmd, paddr, end);
317 spin_unlock(&init_mm.page_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 unmap_low_page(map);
319 }
320 __flush_tlb();
321}
322
323static void __init find_early_table_space(unsigned long end)
324{
Andi Kleen6c5acd12006-01-11 22:46:57 +0100325 unsigned long puds, pmds, tables, start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326
327 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
328 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
329 tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
330 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
331
Andi Kleenee408c72006-01-16 01:56:51 +0100332 /* RED-PEN putting page tables only on node 0 could
333 cause a hotspot and fill up ZONE_DMA. The page tables
334 need roughly 0.5KB per GB. */
335 start = 0x8000;
336 table_start = find_e820_area(start, end, tables);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 if (table_start == -1UL)
338 panic("Cannot find space for the kernel page tables");
339
340 table_start >>= PAGE_SHIFT;
341 table_end = table_start;
Matt Tolentino44df75e2006-01-17 07:03:41 +0100342
343 early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
344 end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345}
346
347/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
348 This runs before bootmem is initialized and gets pages directly from the
349 physical memory. To access them they are temporarily mapped. */
Matt Tolentino44df75e2006-01-17 07:03:41 +0100350void __meminit init_memory_mapping(unsigned long start, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351{
352 unsigned long next;
353
354 Dprintk("init_memory_mapping\n");
355
356 /*
357 * Find space for the kernel direct mapping tables.
358 * Later we should allocate these tables in the local node of the memory
359 * mapped. Unfortunately this is done currently before the nodes are
360 * discovered.
361 */
Matt Tolentino44df75e2006-01-17 07:03:41 +0100362 if (!after_bootmem)
363 find_early_table_space(end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364
365 start = (unsigned long)__va(start);
366 end = (unsigned long)__va(end);
367
368 for (; start < end; start = next) {
369 int map;
370 unsigned long pud_phys;
Matt Tolentino44df75e2006-01-17 07:03:41 +0100371 pgd_t *pgd = pgd_offset_k(start);
372 pud_t *pud;
373
374 if (after_bootmem)
Jan Beulichc7ea1a92006-03-25 16:29:03 +0100375 pud = pud_offset_k(pgd, start & PGDIR_MASK);
Matt Tolentino44df75e2006-01-17 07:03:41 +0100376 else
377 pud = alloc_low_page(&map, &pud_phys);
378
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 next = start + PGDIR_SIZE;
380 if (next > end)
381 next = end;
382 phys_pud_init(pud, __pa(start), __pa(next));
Matt Tolentino44df75e2006-01-17 07:03:41 +0100383 if (!after_bootmem)
384 set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 unmap_low_page(map);
386 }
387
Matt Tolentino44df75e2006-01-17 07:03:41 +0100388 if (!after_bootmem)
389 asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 __flush_tlb_all();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391}
392
Siddha, Suresh Bf6c2e332005-11-05 17:25:53 +0100393void __cpuinit zap_low_mappings(int cpu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394{
Siddha, Suresh Bf6c2e332005-11-05 17:25:53 +0100395 if (cpu == 0) {
396 pgd_t *pgd = pgd_offset_k(0UL);
397 pgd_clear(pgd);
398 } else {
399 /*
400 * For AP's, zap the low identity mappings by changing the cr3
401 * to init_level4_pgt and doing local flush tlb all
402 */
403 asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
404 }
405 __flush_tlb_all();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406}
407
Andi Kleena2f1b422005-11-05 17:25:53 +0100408/* Compute zone sizes for the DMA and DMA32 zones in a node. */
409__init void
410size_zones(unsigned long *z, unsigned long *h,
411 unsigned long start_pfn, unsigned long end_pfn)
412{
413 int i;
414 unsigned long w;
415
416 for (i = 0; i < MAX_NR_ZONES; i++)
417 z[i] = 0;
418
419 if (start_pfn < MAX_DMA_PFN)
420 z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
421 if (start_pfn < MAX_DMA32_PFN) {
422 unsigned long dma32_pfn = MAX_DMA32_PFN;
423 if (dma32_pfn > end_pfn)
424 dma32_pfn = end_pfn;
425 z[ZONE_DMA32] = dma32_pfn - start_pfn;
426 }
427 z[ZONE_NORMAL] = end_pfn - start_pfn;
428
429 /* Remove lower zones from higher ones. */
430 w = 0;
431 for (i = 0; i < MAX_NR_ZONES; i++) {
432 if (z[i])
433 z[i] -= w;
434 w += z[i];
435 }
436
437 /* Compute holes */
Ravikiran G Thirumalai576fc092005-12-29 13:06:11 +0100438 w = start_pfn;
Andi Kleena2f1b422005-11-05 17:25:53 +0100439 for (i = 0; i < MAX_NR_ZONES; i++) {
440 unsigned long s = w;
441 w += z[i];
442 h[i] = e820_hole_size(s, w);
443 }
Andi Kleene18c6872005-11-05 17:25:53 +0100444
445 /* Add the space pace needed for mem_map to the holes too. */
446 for (i = 0; i < MAX_NR_ZONES; i++)
447 h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
448
449 /* The 16MB DMA zone has the kernel and other misc mappings.
450 Account them too */
451 if (h[ZONE_DMA]) {
452 h[ZONE_DMA] += dma_reserve;
453 if (h[ZONE_DMA] >= z[ZONE_DMA]) {
454 printk(KERN_WARNING
455 "Kernel too large and filling up ZONE_DMA?\n");
456 h[ZONE_DMA] = z[ZONE_DMA];
457 }
458 }
Andi Kleena2f1b422005-11-05 17:25:53 +0100459}
460
Matt Tolentino2b976902005-06-23 00:08:06 -0700461#ifndef CONFIG_NUMA
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462void __init paging_init(void)
463{
Andi Kleena2f1b422005-11-05 17:25:53 +0100464 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
Matt Tolentino44df75e2006-01-17 07:03:41 +0100465
466 memory_present(0, 0, end_pfn);
467 sparse_init();
Andi Kleena2f1b422005-11-05 17:25:53 +0100468 size_zones(zones, holes, 0, end_pfn);
469 free_area_init_node(0, NODE_DATA(0), zones,
470 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471}
472#endif
473
474/* Unmap a kernel mapping if it exists. This is useful to avoid prefetches
475 from the CPU leading to inconsistent cache lines. address and size
476 must be aligned to 2MB boundaries.
477 Does nothing when the mapping doesn't exist. */
478void __init clear_kernel_mapping(unsigned long address, unsigned long size)
479{
480 unsigned long end = address + size;
481
482 BUG_ON(address & ~LARGE_PAGE_MASK);
483 BUG_ON(size & ~LARGE_PAGE_MASK);
484
485 for (; address < end; address += LARGE_PAGE_SIZE) {
486 pgd_t *pgd = pgd_offset_k(address);
487 pud_t *pud;
488 pmd_t *pmd;
489 if (pgd_none(*pgd))
490 continue;
491 pud = pud_offset(pgd, address);
492 if (pud_none(*pud))
493 continue;
494 pmd = pmd_offset(pud, address);
495 if (!pmd || pmd_none(*pmd))
496 continue;
497 if (0 == (pmd_val(*pmd) & _PAGE_PSE)) {
498 /* Could handle this, but it should not happen currently. */
499 printk(KERN_ERR
500 "clear_kernel_mapping: mapping has been split. will leak memory\n");
501 pmd_ERROR(*pmd);
502 }
503 set_pmd(pmd, __pmd(0));
504 }
505 __flush_tlb_all();
506}
507
Matt Tolentino44df75e2006-01-17 07:03:41 +0100508/*
509 * Memory hotplug specific functions
510 * These are only for non-NUMA machines right now.
511 */
512#ifdef CONFIG_MEMORY_HOTPLUG
513
514void online_page(struct page *page)
515{
516 ClearPageReserved(page);
Nick Piggin7835e982006-03-22 00:08:40 -0800517 init_page_count(page);
Matt Tolentino44df75e2006-01-17 07:03:41 +0100518 __free_page(page);
519 totalram_pages++;
520 num_physpages++;
521}
522
523int add_memory(u64 start, u64 size)
524{
525 struct pglist_data *pgdat = NODE_DATA(0);
526 struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
527 unsigned long start_pfn = start >> PAGE_SHIFT;
528 unsigned long nr_pages = size >> PAGE_SHIFT;
529 int ret;
530
531 ret = __add_pages(zone, start_pfn, nr_pages);
532 if (ret)
533 goto error;
534
535 init_memory_mapping(start, (start + size -1));
536
537 return ret;
538error:
539 printk("%s: Problem encountered in __add_pages!\n", __func__);
540 return ret;
541}
542EXPORT_SYMBOL_GPL(add_memory);
543
544int remove_memory(u64 start, u64 size)
545{
546 return -EINVAL;
547}
548EXPORT_SYMBOL_GPL(remove_memory);
549
550#endif
551
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
553 kcore_vsyscall;
554
555void __init mem_init(void)
556{
Andi Kleen0a43e4b2005-09-12 18:49:24 +0200557 long codesize, reservedpages, datasize, initsize;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559#ifdef CONFIG_SWIOTLB
Muli Ben-Yehuda17a941d2006-01-11 22:44:42 +0100560 pci_swiotlb_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561#endif
Muli Ben-Yehuda17a941d2006-01-11 22:44:42 +0100562 no_iommu_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563
564 /* How many end-of-memory variables you have, grandma! */
565 max_low_pfn = end_pfn;
566 max_pfn = end_pfn;
567 num_physpages = end_pfn;
568 high_memory = (void *) __va(end_pfn * PAGE_SIZE);
569
570 /* clear the zero-page */
571 memset(empty_zero_page, 0, PAGE_SIZE);
572
573 reservedpages = 0;
574
575 /* this will put all low memory onto the freelists */
Matt Tolentino2b976902005-06-23 00:08:06 -0700576#ifdef CONFIG_NUMA
Andi Kleen0a43e4b2005-09-12 18:49:24 +0200577 totalram_pages = numa_free_all_bootmem();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578#else
Andi Kleen0a43e4b2005-09-12 18:49:24 +0200579 totalram_pages = free_all_bootmem();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580#endif
Andi Kleen0a43e4b2005-09-12 18:49:24 +0200581 reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582
583 after_bootmem = 1;
584
585 codesize = (unsigned long) &_etext - (unsigned long) &_text;
586 datasize = (unsigned long) &_edata - (unsigned long) &_etext;
587 initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
588
589 /* Register memory areas for /proc/kcore */
590 kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
591 kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
592 VMALLOC_END-VMALLOC_START);
593 kclist_add(&kcore_kernel, &_stext, _end - _stext);
594 kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
595 kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
596 VSYSCALL_END - VSYSCALL_START);
597
Andi Kleen0a43e4b2005-09-12 18:49:24 +0200598 printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
600 end_pfn << (PAGE_SHIFT-10),
601 codesize >> 10,
602 reservedpages << (PAGE_SHIFT-10),
603 datasize >> 10,
604 initsize >> 10);
605
Siddha, Suresh Bf6c2e332005-11-05 17:25:53 +0100606#ifdef CONFIG_SMP
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 /*
Siddha, Suresh Bf6c2e332005-11-05 17:25:53 +0100608 * Sync boot_level4_pgt mappings with the init_level4_pgt
609 * except for the low identity mappings which are already zapped
610 * in init_level4_pgt. This sync-up is essential for AP's bringup
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 */
Siddha, Suresh Bf6c2e332005-11-05 17:25:53 +0100612 memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613#endif
614}
615
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616void free_initmem(void)
617{
618 unsigned long addr;
619
620 addr = (unsigned long)(&__init_begin);
621 for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
622 ClearPageReserved(virt_to_page(addr));
Nick Piggin7835e982006-03-22 00:08:40 -0800623 init_page_count(virt_to_page(addr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
625 free_page(addr);
626 totalram_pages++;
627 }
628 memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
Andi Kleen2bc04142005-11-05 17:25:53 +0100629 printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630}
631
Arjan van de Ven67df1972006-01-06 00:12:04 -0800632#ifdef CONFIG_DEBUG_RODATA
633
634extern char __start_rodata, __end_rodata;
635void mark_rodata_ro(void)
636{
637 unsigned long addr = (unsigned long)&__start_rodata;
638
639 for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE)
640 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
641
642 printk ("Write protecting the kernel read-only data: %luk\n",
643 (&__end_rodata - &__start_rodata) >> 10);
644
645 /*
646 * change_page_attr_addr() requires a global_flush_tlb() call after it.
647 * We do this after the printk so that if something went wrong in the
648 * change, the printk gets out at least to give a better debug hint
649 * of who is the culprit.
650 */
651 global_flush_tlb();
652}
653#endif
654
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655#ifdef CONFIG_BLK_DEV_INITRD
656void free_initrd_mem(unsigned long start, unsigned long end)
657{
Linus Torvaldsf74e6672006-01-16 11:33:09 -0800658 if (start >= end)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 return;
660 printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
661 for (; start < end; start += PAGE_SIZE) {
662 ClearPageReserved(virt_to_page(start));
Nick Piggin7835e982006-03-22 00:08:40 -0800663 init_page_count(virt_to_page(start));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 free_page(start);
665 totalram_pages++;
666 }
667}
668#endif
669
670void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
671{
672 /* Should check here against the e820 map to avoid double free */
Matt Tolentino2b976902005-06-23 00:08:06 -0700673#ifdef CONFIG_NUMA
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 int nid = phys_to_nid(phys);
675 reserve_bootmem_node(NODE_DATA(nid), phys, len);
676#else
677 reserve_bootmem(phys, len);
678#endif
Andi Kleene18c6872005-11-05 17:25:53 +0100679 if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
680 dma_reserve += len / PAGE_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681}
682
683int kern_addr_valid(unsigned long addr)
684{
685 unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
686 pgd_t *pgd;
687 pud_t *pud;
688 pmd_t *pmd;
689 pte_t *pte;
690
691 if (above != 0 && above != -1UL)
692 return 0;
693
694 pgd = pgd_offset_k(addr);
695 if (pgd_none(*pgd))
696 return 0;
697
698 pud = pud_offset(pgd, addr);
699 if (pud_none(*pud))
700 return 0;
701
702 pmd = pmd_offset(pud, addr);
703 if (pmd_none(*pmd))
704 return 0;
705 if (pmd_large(*pmd))
706 return pfn_valid(pmd_pfn(*pmd));
707
708 pte = pte_offset_kernel(pmd, addr);
709 if (pte_none(*pte))
710 return 0;
711 return pfn_valid(pte_pfn(*pte));
712}
713
714#ifdef CONFIG_SYSCTL
715#include <linux/sysctl.h>
716
717extern int exception_trace, page_fault_trace;
718
719static ctl_table debug_table2[] = {
720 { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
721 proc_dointvec },
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 { 0, }
723};
724
725static ctl_table debug_root_table2[] = {
726 { .ctl_name = CTL_DEBUG, .procname = "debug", .mode = 0555,
727 .child = debug_table2 },
728 { 0 },
729};
730
731static __init int x8664_sysctl_init(void)
732{
733 register_sysctl_table(debug_root_table2, 1);
734 return 0;
735}
736__initcall(x8664_sysctl_init);
737#endif
738
Andi Kleen1e014412005-04-16 15:24:55 -0700739/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
740 covers the 64bit vsyscall page now. 32bit has a real VMA now and does
741 not need special handling anymore. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742
743static struct vm_area_struct gate_vma = {
744 .vm_start = VSYSCALL_START,
745 .vm_end = VSYSCALL_END,
746 .vm_page_prot = PAGE_READONLY
747};
748
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
750{
751#ifdef CONFIG_IA32_EMULATION
Andi Kleen1e014412005-04-16 15:24:55 -0700752 if (test_tsk_thread_flag(tsk, TIF_IA32))
753 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754#endif
755 return &gate_vma;
756}
757
758int in_gate_area(struct task_struct *task, unsigned long addr)
759{
760 struct vm_area_struct *vma = get_gate_vma(task);
Andi Kleen1e014412005-04-16 15:24:55 -0700761 if (!vma)
762 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 return (addr >= vma->vm_start) && (addr < vma->vm_end);
764}
765
766/* Use this when you have no reliable task/vma, typically from interrupt
767 * context. It is less reliable than using the task's vma and may give
768 * false positives.
769 */
770int in_gate_area_no_task(unsigned long addr)
771{
Andi Kleen1e014412005-04-16 15:24:55 -0700772 return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773}