blob: 0217188ef4656a44a37fedf98d06c38f34037dcf [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * PowerPC version
3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Derived from "arch/i386/mm/fault.c"
6 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
7 *
8 * Modified by Cort Dougan and Paul Mackerras.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16#include <linux/config.h>
17#include <linux/signal.h>
18#include <linux/sched.h>
19#include <linux/kernel.h>
20#include <linux/errno.h>
21#include <linux/string.h>
22#include <linux/types.h>
23#include <linux/ptrace.h>
24#include <linux/mman.h>
25#include <linux/mm.h>
26#include <linux/interrupt.h>
27#include <linux/highmem.h>
28#include <linux/module.h>
29
30#include <asm/page.h>
31#include <asm/pgtable.h>
32#include <asm/mmu.h>
33#include <asm/mmu_context.h>
34#include <asm/system.h>
35#include <asm/uaccess.h>
36#include <asm/tlbflush.h>
37
38#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
39extern void (*debugger)(struct pt_regs *);
40extern void (*debugger_fault_handler)(struct pt_regs *);
41extern int (*debugger_dabr_match)(struct pt_regs *);
42int debugger_kernel_faults = 1;
43#endif
44
45unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */
46unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */
47unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */
48unsigned long pte_misses; /* updated by do_page_fault() */
49unsigned long pte_errors; /* updated by do_page_fault() */
50unsigned int probingmem;
51
52/*
53 * Check whether the instruction at regs->nip is a store using
54 * an update addressing form which will update r1.
55 */
56static int store_updates_sp(struct pt_regs *regs)
57{
58 unsigned int inst;
59
60 if (get_user(inst, (unsigned int __user *)regs->nip))
61 return 0;
62 /* check for 1 in the rA field */
63 if (((inst >> 16) & 0x1f) != 1)
64 return 0;
65 /* check major opcode */
66 switch (inst >> 26) {
67 case 37: /* stwu */
68 case 39: /* stbu */
69 case 45: /* sthu */
70 case 53: /* stfsu */
71 case 55: /* stfdu */
72 return 1;
73 case 31:
74 /* check minor opcode */
75 switch ((inst >> 1) & 0x3ff) {
76 case 183: /* stwux */
77 case 247: /* stbux */
78 case 439: /* sthux */
79 case 695: /* stfsux */
80 case 759: /* stfdux */
81 return 1;
82 }
83 }
84 return 0;
85}
86
87/*
88 * For 600- and 800-family processors, the error_code parameter is DSISR
89 * for a data fault, SRR1 for an instruction fault. For 400-family processors
90 * the error_code parameter is ESR for a data fault, 0 for an instruction
91 * fault.
92 */
93int do_page_fault(struct pt_regs *regs, unsigned long address,
94 unsigned long error_code)
95{
96 struct vm_area_struct * vma;
97 struct mm_struct *mm = current->mm;
98 siginfo_t info;
99 int code = SEGV_MAPERR;
100#if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
101 int is_write = error_code & ESR_DST;
102#else
103 int is_write = 0;
104
105 /*
106 * Fortunately the bit assignments in SRR1 for an instruction
107 * fault and DSISR for a data fault are mostly the same for the
108 * bits we are interested in. But there are some bits which
109 * indicate errors in DSISR but can validly be set in SRR1.
110 */
111 if (TRAP(regs) == 0x400)
112 error_code &= 0x48200000;
113 else
114 is_write = error_code & 0x02000000;
115#endif /* CONFIG_4xx || CONFIG_BOOKE */
116
117#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
118 if (debugger_fault_handler && TRAP(regs) == 0x300) {
119 debugger_fault_handler(regs);
120 return 0;
121 }
122#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
123 if (error_code & 0x00400000) {
124 /* DABR match */
125 if (debugger_dabr_match(regs))
126 return 0;
127 }
128#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
129#endif /* CONFIG_XMON || CONFIG_KGDB */
130
131 if (in_atomic() || mm == NULL)
132 return SIGSEGV;
133
134 down_read(&mm->mmap_sem);
135 vma = find_vma(mm, address);
136 if (!vma)
137 goto bad_area;
138 if (vma->vm_start <= address)
139 goto good_area;
140 if (!(vma->vm_flags & VM_GROWSDOWN))
141 goto bad_area;
142 if (!is_write)
143 goto bad_area;
144
145 /*
146 * N.B. The rs6000/xcoff ABI allows programs to access up to
147 * a few hundred bytes below the stack pointer.
148 * The kernel signal delivery code writes up to about 1.5kB
149 * below the stack pointer (r1) before decrementing it.
150 * The exec code can write slightly over 640kB to the stack
151 * before setting the user r1. Thus we allow the stack to
152 * expand to 1MB without further checks.
153 */
154 if (address + 0x100000 < vma->vm_end) {
155 /* get user regs even if this fault is in kernel mode */
156 struct pt_regs *uregs = current->thread.regs;
157 if (uregs == NULL)
158 goto bad_area;
159
160 /*
161 * A user-mode access to an address a long way below
162 * the stack pointer is only valid if the instruction
163 * is one which would update the stack pointer to the
164 * address accessed if the instruction completed,
165 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
166 * (or the byte, halfword, float or double forms).
167 *
168 * If we don't check this then any write to the area
169 * between the last mapped region and the stack will
170 * expand the stack rather than segfaulting.
171 */
172 if (address + 2048 < uregs->gpr[1]
173 && (!user_mode(regs) || !store_updates_sp(regs)))
174 goto bad_area;
175 }
176 if (expand_stack(vma, address))
177 goto bad_area;
178
179good_area:
180 code = SEGV_ACCERR;
181#if defined(CONFIG_6xx)
182 if (error_code & 0x95700000)
183 /* an error such as lwarx to I/O controller space,
184 address matching DABR, eciwx, etc. */
185 goto bad_area;
186#endif /* CONFIG_6xx */
187#if defined(CONFIG_8xx)
188 /* The MPC8xx seems to always set 0x80000000, which is
189 * "undefined". Of those that can be set, this is the only
190 * one which seems bad.
191 */
192 if (error_code & 0x10000000)
193 /* Guarded storage error. */
194 goto bad_area;
195#endif /* CONFIG_8xx */
196
197 /* a write */
198 if (is_write) {
199 if (!(vma->vm_flags & VM_WRITE))
200 goto bad_area;
201#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
202 /* an exec - 4xx/Book-E allows for per-page execute permission */
203 } else if (TRAP(regs) == 0x400) {
204 pte_t *ptep;
205
206#if 0
207 /* It would be nice to actually enforce the VM execute
208 permission on CPUs which can do so, but far too
209 much stuff in userspace doesn't get the permissions
210 right, so we let any page be executed for now. */
211 if (! (vma->vm_flags & VM_EXEC))
212 goto bad_area;
213#endif
214
215 /* Since 4xx/Book-E supports per-page execute permission,
216 * we lazily flush dcache to icache. */
217 ptep = NULL;
218 if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) {
219 struct page *page = pte_page(*ptep);
220
221 if (! test_bit(PG_arch_1, &page->flags)) {
222 flush_dcache_icache_page(page);
223 set_bit(PG_arch_1, &page->flags);
224 }
225 pte_update(ptep, 0, _PAGE_HWEXEC);
226 _tlbie(address);
227 pte_unmap(ptep);
228 up_read(&mm->mmap_sem);
229 return 0;
230 }
231 if (ptep != NULL)
232 pte_unmap(ptep);
233#endif
234 /* a read */
235 } else {
236 /* protection fault */
237 if (error_code & 0x08000000)
238 goto bad_area;
239 if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
240 goto bad_area;
241 }
242
243 /*
244 * If for any reason at all we couldn't handle the fault,
245 * make sure we exit gracefully rather than endlessly redo
246 * the fault.
247 */
248 survive:
249 switch (handle_mm_fault(mm, vma, address, is_write)) {
250 case VM_FAULT_MINOR:
251 current->min_flt++;
252 break;
253 case VM_FAULT_MAJOR:
254 current->maj_flt++;
255 break;
256 case VM_FAULT_SIGBUS:
257 goto do_sigbus;
258 case VM_FAULT_OOM:
259 goto out_of_memory;
260 default:
261 BUG();
262 }
263
264 up_read(&mm->mmap_sem);
265 /*
266 * keep track of tlb+htab misses that are good addrs but
267 * just need pte's created via handle_mm_fault()
268 * -- Cort
269 */
270 pte_misses++;
271 return 0;
272
273bad_area:
274 up_read(&mm->mmap_sem);
275 pte_errors++;
276
277 /* User mode accesses cause a SIGSEGV */
278 if (user_mode(regs)) {
Paul Mackerrasbb0bb3b2005-09-10 21:13:11 +1000279 _exception(SIGSEGV, regs, code, address);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 return 0;
281 }
282
283 return SIGSEGV;
284
285/*
286 * We ran out of memory, or some other thing happened to us that made
287 * us unable to handle the page fault gracefully.
288 */
289out_of_memory:
290 up_read(&mm->mmap_sem);
291 if (current->pid == 1) {
292 yield();
293 down_read(&mm->mmap_sem);
294 goto survive;
295 }
296 printk("VM: killing process %s\n", current->comm);
297 if (user_mode(regs))
298 do_exit(SIGKILL);
299 return SIGKILL;
300
301do_sigbus:
302 up_read(&mm->mmap_sem);
303 info.si_signo = SIGBUS;
304 info.si_errno = 0;
305 info.si_code = BUS_ADRERR;
306 info.si_addr = (void __user *)address;
307 force_sig_info (SIGBUS, &info, current);
308 if (!user_mode(regs))
309 return SIGBUS;
310 return 0;
311}
312
313/*
314 * bad_page_fault is called when we have a bad access from the kernel.
315 * It is called from the DSI and ISI handlers in head.S and from some
316 * of the procedures in traps.c.
317 */
318void
319bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
320{
321 const struct exception_table_entry *entry;
322
323 /* Are we prepared to handle this fault? */
324 if ((entry = search_exception_tables(regs->nip)) != NULL) {
325 regs->nip = entry->fixup;
326 return;
327 }
328
329 /* kernel has accessed a bad area */
330#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
331 if (debugger_kernel_faults)
332 debugger(regs);
333#endif
334 die("kernel access of bad area", regs, sig);
335}
336
337#ifdef CONFIG_8xx
338
339/* The pgtable.h claims some functions generically exist, but I
340 * can't find them......
341 */
342pte_t *va_to_pte(unsigned long address)
343{
344 pgd_t *dir;
345 pmd_t *pmd;
346 pte_t *pte;
347
348 if (address < TASK_SIZE)
349 return NULL;
350
351 dir = pgd_offset(&init_mm, address);
352 if (dir) {
353 pmd = pmd_offset(dir, address & PAGE_MASK);
354 if (pmd && pmd_present(*pmd)) {
355 pte = pte_offset_kernel(pmd, address & PAGE_MASK);
356 if (pte && pte_present(*pte))
357 return(pte);
358 }
359 }
360 return NULL;
361}
362
363unsigned long va_to_phys(unsigned long address)
364{
365 pte_t *pte;
366
367 pte = va_to_pte(address);
368 if (pte)
369 return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
370 return (0);
371}
372
373void
374print_8xx_pte(struct mm_struct *mm, unsigned long addr)
375{
376 pgd_t * pgd;
377 pmd_t * pmd;
378 pte_t * pte;
379
380 printk(" pte @ 0x%8lx: ", addr);
381 pgd = pgd_offset(mm, addr & PAGE_MASK);
382 if (pgd) {
383 pmd = pmd_offset(pgd, addr & PAGE_MASK);
384 if (pmd && pmd_present(*pmd)) {
385 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
386 if (pte) {
387 printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
388 (long)pgd, (long)pte, (long)pte_val(*pte));
389#define pp ((long)pte_val(*pte))
390 printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
391 "CI: %lx v: %lx\n",
392 pp>>12, /* rpn */
393 (pp>>10)&3, /* pp */
394 (pp>>3)&1, /* small */
395 (pp>>2)&1, /* shared */
396 (pp>>1)&1, /* cache inhibit */
397 pp&1 /* valid */
398 );
399#undef pp
400 }
401 else {
402 printk("no pte\n");
403 }
404 }
405 else {
406 printk("no pmd\n");
407 }
408 }
409 else {
410 printk("no pgd\n");
411 }
412}
413
414int
415get_8xx_pte(struct mm_struct *mm, unsigned long addr)
416{
417 pgd_t * pgd;
418 pmd_t * pmd;
419 pte_t * pte;
420 int retval = 0;
421
422 pgd = pgd_offset(mm, addr & PAGE_MASK);
423 if (pgd) {
424 pmd = pmd_offset(pgd, addr & PAGE_MASK);
425 if (pmd && pmd_present(*pmd)) {
426 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
427 if (pte) {
428 retval = (int)pte_val(*pte);
429 }
430 }
431 }
432 return(retval);
433}
434#endif /* CONFIG_8xx */