blob: 465f451f3bc3a8a95908dcaa4d31757e04bc9f26 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * PowerPC version
3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Derived from "arch/i386/mm/fault.c"
6 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
7 *
8 * Modified by Cort Dougan and Paul Mackerras.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/signal.h>
17#include <linux/sched.h>
18#include <linux/kernel.h>
19#include <linux/errno.h>
20#include <linux/string.h>
21#include <linux/types.h>
22#include <linux/ptrace.h>
23#include <linux/mman.h>
24#include <linux/mm.h>
25#include <linux/interrupt.h>
26#include <linux/highmem.h>
27#include <linux/module.h>
28
29#include <asm/page.h>
30#include <asm/pgtable.h>
31#include <asm/mmu.h>
32#include <asm/mmu_context.h>
33#include <asm/system.h>
34#include <asm/uaccess.h>
35#include <asm/tlbflush.h>
36
37#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
38extern void (*debugger)(struct pt_regs *);
39extern void (*debugger_fault_handler)(struct pt_regs *);
40extern int (*debugger_dabr_match)(struct pt_regs *);
41int debugger_kernel_faults = 1;
42#endif
43
44unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */
45unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */
46unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */
47unsigned long pte_misses; /* updated by do_page_fault() */
48unsigned long pte_errors; /* updated by do_page_fault() */
49unsigned int probingmem;
50
51/*
52 * Check whether the instruction at regs->nip is a store using
53 * an update addressing form which will update r1.
54 */
55static int store_updates_sp(struct pt_regs *regs)
56{
57 unsigned int inst;
58
59 if (get_user(inst, (unsigned int __user *)regs->nip))
60 return 0;
61 /* check for 1 in the rA field */
62 if (((inst >> 16) & 0x1f) != 1)
63 return 0;
64 /* check major opcode */
65 switch (inst >> 26) {
66 case 37: /* stwu */
67 case 39: /* stbu */
68 case 45: /* sthu */
69 case 53: /* stfsu */
70 case 55: /* stfdu */
71 return 1;
72 case 31:
73 /* check minor opcode */
74 switch ((inst >> 1) & 0x3ff) {
75 case 183: /* stwux */
76 case 247: /* stbux */
77 case 439: /* sthux */
78 case 695: /* stfsux */
79 case 759: /* stfdux */
80 return 1;
81 }
82 }
83 return 0;
84}
85
86/*
87 * For 600- and 800-family processors, the error_code parameter is DSISR
88 * for a data fault, SRR1 for an instruction fault. For 400-family processors
89 * the error_code parameter is ESR for a data fault, 0 for an instruction
90 * fault.
91 */
92int do_page_fault(struct pt_regs *regs, unsigned long address,
93 unsigned long error_code)
94{
95 struct vm_area_struct * vma;
96 struct mm_struct *mm = current->mm;
97 siginfo_t info;
98 int code = SEGV_MAPERR;
99#if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
100 int is_write = error_code & ESR_DST;
101#else
102 int is_write = 0;
103
104 /*
105 * Fortunately the bit assignments in SRR1 for an instruction
106 * fault and DSISR for a data fault are mostly the same for the
107 * bits we are interested in. But there are some bits which
108 * indicate errors in DSISR but can validly be set in SRR1.
109 */
110 if (TRAP(regs) == 0x400)
111 error_code &= 0x48200000;
112 else
113 is_write = error_code & 0x02000000;
114#endif /* CONFIG_4xx || CONFIG_BOOKE */
115
116#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
117 if (debugger_fault_handler && TRAP(regs) == 0x300) {
118 debugger_fault_handler(regs);
119 return 0;
120 }
121#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
122 if (error_code & 0x00400000) {
123 /* DABR match */
124 if (debugger_dabr_match(regs))
125 return 0;
126 }
127#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
128#endif /* CONFIG_XMON || CONFIG_KGDB */
129
130 if (in_atomic() || mm == NULL)
131 return SIGSEGV;
132
133 down_read(&mm->mmap_sem);
134 vma = find_vma(mm, address);
135 if (!vma)
136 goto bad_area;
137 if (vma->vm_start <= address)
138 goto good_area;
139 if (!(vma->vm_flags & VM_GROWSDOWN))
140 goto bad_area;
141 if (!is_write)
142 goto bad_area;
143
144 /*
145 * N.B. The rs6000/xcoff ABI allows programs to access up to
146 * a few hundred bytes below the stack pointer.
147 * The kernel signal delivery code writes up to about 1.5kB
148 * below the stack pointer (r1) before decrementing it.
149 * The exec code can write slightly over 640kB to the stack
150 * before setting the user r1. Thus we allow the stack to
151 * expand to 1MB without further checks.
152 */
153 if (address + 0x100000 < vma->vm_end) {
154 /* get user regs even if this fault is in kernel mode */
155 struct pt_regs *uregs = current->thread.regs;
156 if (uregs == NULL)
157 goto bad_area;
158
159 /*
160 * A user-mode access to an address a long way below
161 * the stack pointer is only valid if the instruction
162 * is one which would update the stack pointer to the
163 * address accessed if the instruction completed,
164 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
165 * (or the byte, halfword, float or double forms).
166 *
167 * If we don't check this then any write to the area
168 * between the last mapped region and the stack will
169 * expand the stack rather than segfaulting.
170 */
171 if (address + 2048 < uregs->gpr[1]
172 && (!user_mode(regs) || !store_updates_sp(regs)))
173 goto bad_area;
174 }
175 if (expand_stack(vma, address))
176 goto bad_area;
177
178good_area:
179 code = SEGV_ACCERR;
180#if defined(CONFIG_6xx)
181 if (error_code & 0x95700000)
182 /* an error such as lwarx to I/O controller space,
183 address matching DABR, eciwx, etc. */
184 goto bad_area;
185#endif /* CONFIG_6xx */
186#if defined(CONFIG_8xx)
187 /* The MPC8xx seems to always set 0x80000000, which is
188 * "undefined". Of those that can be set, this is the only
189 * one which seems bad.
190 */
191 if (error_code & 0x10000000)
192 /* Guarded storage error. */
193 goto bad_area;
194#endif /* CONFIG_8xx */
195
196 /* a write */
197 if (is_write) {
198 if (!(vma->vm_flags & VM_WRITE))
199 goto bad_area;
200#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
201 /* an exec - 4xx/Book-E allows for per-page execute permission */
202 } else if (TRAP(regs) == 0x400) {
203 pte_t *ptep;
Eugene Suroveginbab70a42006-03-28 10:13:12 -0800204 pmd_t *pmdp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205
206#if 0
207 /* It would be nice to actually enforce the VM execute
208 permission on CPUs which can do so, but far too
209 much stuff in userspace doesn't get the permissions
210 right, so we let any page be executed for now. */
211 if (! (vma->vm_flags & VM_EXEC))
212 goto bad_area;
213#endif
214
215 /* Since 4xx/Book-E supports per-page execute permission,
216 * we lazily flush dcache to icache. */
217 ptep = NULL;
Eugene Suroveginbab70a42006-03-28 10:13:12 -0800218 if (get_pteptr(mm, address, &ptep, &pmdp)) {
219 spinlock_t *ptl = pte_lockptr(mm, pmdp);
220 spin_lock(ptl);
221 if (pte_present(*ptep)) {
222 struct page *page = pte_page(*ptep);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223
Eugene Suroveginbab70a42006-03-28 10:13:12 -0800224 if (!test_bit(PG_arch_1, &page->flags)) {
225 flush_dcache_icache_page(page);
226 set_bit(PG_arch_1, &page->flags);
227 }
228 pte_update(ptep, 0, _PAGE_HWEXEC);
229 _tlbie(address);
230 pte_unmap_unlock(ptep, ptl);
231 up_read(&mm->mmap_sem);
232 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 }
Eugene Suroveginbab70a42006-03-28 10:13:12 -0800234 pte_unmap_unlock(ptep, ptl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236#endif
237 /* a read */
238 } else {
239 /* protection fault */
240 if (error_code & 0x08000000)
241 goto bad_area;
Jason Barondf67b3d2006-09-29 01:58:58 -0700242 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 goto bad_area;
244 }
245
246 /*
247 * If for any reason at all we couldn't handle the fault,
248 * make sure we exit gracefully rather than endlessly redo
249 * the fault.
250 */
251 survive:
252 switch (handle_mm_fault(mm, vma, address, is_write)) {
253 case VM_FAULT_MINOR:
254 current->min_flt++;
255 break;
256 case VM_FAULT_MAJOR:
257 current->maj_flt++;
258 break;
259 case VM_FAULT_SIGBUS:
260 goto do_sigbus;
261 case VM_FAULT_OOM:
262 goto out_of_memory;
263 default:
264 BUG();
265 }
266
267 up_read(&mm->mmap_sem);
268 /*
269 * keep track of tlb+htab misses that are good addrs but
270 * just need pte's created via handle_mm_fault()
271 * -- Cort
272 */
273 pte_misses++;
274 return 0;
275
276bad_area:
277 up_read(&mm->mmap_sem);
278 pte_errors++;
279
280 /* User mode accesses cause a SIGSEGV */
281 if (user_mode(regs)) {
Paul Mackerrasbb0bb3b2005-09-10 21:13:11 +1000282 _exception(SIGSEGV, regs, code, address);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 return 0;
284 }
285
286 return SIGSEGV;
287
288/*
289 * We ran out of memory, or some other thing happened to us that made
290 * us unable to handle the page fault gracefully.
291 */
292out_of_memory:
293 up_read(&mm->mmap_sem);
Sukadev Bhattiproluf400e192006-09-29 02:00:07 -0700294 if (is_init(current)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 yield();
296 down_read(&mm->mmap_sem);
297 goto survive;
298 }
299 printk("VM: killing process %s\n", current->comm);
300 if (user_mode(regs))
301 do_exit(SIGKILL);
302 return SIGKILL;
303
304do_sigbus:
305 up_read(&mm->mmap_sem);
306 info.si_signo = SIGBUS;
307 info.si_errno = 0;
308 info.si_code = BUS_ADRERR;
309 info.si_addr = (void __user *)address;
310 force_sig_info (SIGBUS, &info, current);
311 if (!user_mode(regs))
312 return SIGBUS;
313 return 0;
314}
315
316/*
317 * bad_page_fault is called when we have a bad access from the kernel.
318 * It is called from the DSI and ISI handlers in head.S and from some
319 * of the procedures in traps.c.
320 */
321void
322bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
323{
324 const struct exception_table_entry *entry;
325
326 /* Are we prepared to handle this fault? */
327 if ((entry = search_exception_tables(regs->nip)) != NULL) {
328 regs->nip = entry->fixup;
329 return;
330 }
331
332 /* kernel has accessed a bad area */
333#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
334 if (debugger_kernel_faults)
335 debugger(regs);
336#endif
337 die("kernel access of bad area", regs, sig);
338}
339
340#ifdef CONFIG_8xx
341
342/* The pgtable.h claims some functions generically exist, but I
343 * can't find them......
344 */
345pte_t *va_to_pte(unsigned long address)
346{
347 pgd_t *dir;
348 pmd_t *pmd;
349 pte_t *pte;
350
351 if (address < TASK_SIZE)
352 return NULL;
353
354 dir = pgd_offset(&init_mm, address);
355 if (dir) {
356 pmd = pmd_offset(dir, address & PAGE_MASK);
357 if (pmd && pmd_present(*pmd)) {
358 pte = pte_offset_kernel(pmd, address & PAGE_MASK);
359 if (pte && pte_present(*pte))
360 return(pte);
361 }
362 }
363 return NULL;
364}
365
366unsigned long va_to_phys(unsigned long address)
367{
368 pte_t *pte;
369
370 pte = va_to_pte(address);
371 if (pte)
372 return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
373 return (0);
374}
375
376void
377print_8xx_pte(struct mm_struct *mm, unsigned long addr)
378{
379 pgd_t * pgd;
380 pmd_t * pmd;
381 pte_t * pte;
382
383 printk(" pte @ 0x%8lx: ", addr);
384 pgd = pgd_offset(mm, addr & PAGE_MASK);
385 if (pgd) {
386 pmd = pmd_offset(pgd, addr & PAGE_MASK);
387 if (pmd && pmd_present(*pmd)) {
388 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
389 if (pte) {
390 printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
391 (long)pgd, (long)pte, (long)pte_val(*pte));
392#define pp ((long)pte_val(*pte))
393 printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
394 "CI: %lx v: %lx\n",
395 pp>>12, /* rpn */
396 (pp>>10)&3, /* pp */
397 (pp>>3)&1, /* small */
398 (pp>>2)&1, /* shared */
399 (pp>>1)&1, /* cache inhibit */
400 pp&1 /* valid */
401 );
402#undef pp
403 }
404 else {
405 printk("no pte\n");
406 }
407 }
408 else {
409 printk("no pmd\n");
410 }
411 }
412 else {
413 printk("no pgd\n");
414 }
415}
416
417int
418get_8xx_pte(struct mm_struct *mm, unsigned long addr)
419{
420 pgd_t * pgd;
421 pmd_t * pmd;
422 pte_t * pte;
423 int retval = 0;
424
425 pgd = pgd_offset(mm, addr & PAGE_MASK);
426 if (pgd) {
427 pmd = pmd_offset(pgd, addr & PAGE_MASK);
428 if (pmd && pmd_present(*pmd)) {
429 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
430 if (pte) {
431 retval = (int)pte_val(*pte);
432 }
433 }
434 }
435 return(retval);
436}
437#endif /* CONFIG_8xx */