blob: 839d4f1234ee8025e1239edf3a5d94d46c79487e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * MMU fault handling support.
3 *
4 * Copyright (C) 1998-2002 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7#include <linux/sched.h>
8#include <linux/kernel.h>
9#include <linux/mm.h>
10#include <linux/smp_lock.h>
11#include <linux/interrupt.h>
12
13#include <asm/pgtable.h>
14#include <asm/processor.h>
15#include <asm/system.h>
16#include <asm/uaccess.h>
Anil S Keshavamurthy7213b252005-06-23 00:09:27 -070017#include <asm/kdebug.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018
19extern void die (char *, struct pt_regs *, long);
20
21/*
22 * This routine is analogous to expand_stack() but instead grows the
23 * register backing store (which grows towards higher addresses).
24 * Since the register backing store is access sequentially, we
25 * disallow growing the RBS by more than a page at a time. Note that
26 * the VM_GROWSUP flag can be set on any VM area but that's fine
27 * because the total process size is still limited by RLIMIT_STACK and
28 * RLIMIT_AS.
29 */
30static inline long
31expand_backing_store (struct vm_area_struct *vma, unsigned long address)
32{
33 unsigned long grow;
34
35 grow = PAGE_SIZE >> PAGE_SHIFT;
36 if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur
37 || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur))
38 return -ENOMEM;
39 vma->vm_end += PAGE_SIZE;
40 vma->vm_mm->total_vm += grow;
41 if (vma->vm_flags & VM_LOCKED)
42 vma->vm_mm->locked_vm += grow;
43 __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
44 return 0;
45}
46
47/*
48 * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
49 * (inside region 5, on ia64) and that page is present.
50 */
51static int
52mapped_kernel_page_is_present (unsigned long address)
53{
54 pgd_t *pgd;
55 pud_t *pud;
56 pmd_t *pmd;
57 pte_t *ptep, pte;
58
59 pgd = pgd_offset_k(address);
60 if (pgd_none(*pgd) || pgd_bad(*pgd))
61 return 0;
62
63 pud = pud_offset(pgd, address);
64 if (pud_none(*pud) || pud_bad(*pud))
65 return 0;
66
67 pmd = pmd_offset(pud, address);
68 if (pmd_none(*pmd) || pmd_bad(*pmd))
69 return 0;
70
71 ptep = pte_offset_kernel(pmd, address);
72 if (!ptep)
73 return 0;
74
75 pte = *ptep;
76 return pte_present(pte);
77}
78
79void
80ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs)
81{
82 int signal = SIGSEGV, code = SEGV_MAPERR;
83 struct vm_area_struct *vma, *prev_vma;
84 struct mm_struct *mm = current->mm;
85 struct siginfo si;
86 unsigned long mask;
87
88 /*
89 * If we're in an interrupt or have no user context, we must not take the fault..
90 */
91 if (in_atomic() || !mm)
92 goto no_context;
93
94#ifdef CONFIG_VIRTUAL_MEM_MAP
95 /*
96 * If fault is in region 5 and we are in the kernel, we may already
97 * have the mmap_sem (pfn_valid macro is called during mmap). There
98 * is no vma for region 5 addr's anyway, so skip getting the semaphore
99 * and go directly to the exception handling code.
100 */
101
102 if ((REGION_NUMBER(address) == 5) && !user_mode(regs))
103 goto bad_area_no_up;
104#endif
105
Anil S Keshavamurthy7213b252005-06-23 00:09:27 -0700106 /*
107 * This is to handle the kprobes on user space access instructions
108 */
109 if (notify_die(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT,
110 SIGSEGV) == NOTIFY_STOP)
111 return;
112
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113 down_read(&mm->mmap_sem);
114
115 vma = find_vma_prev(mm, address, &prev_vma);
116 if (!vma)
117 goto bad_area;
118
119 /* find_vma_prev() returns vma such that address < vma->vm_end or NULL */
120 if (address < vma->vm_start)
121 goto check_expansion;
122
123 good_area:
124 code = SEGV_ACCERR;
125
126 /* OK, we've got a good vm_area for this memory area. Check the access permissions: */
127
128# define VM_READ_BIT 0
129# define VM_WRITE_BIT 1
130# define VM_EXEC_BIT 2
131
132# if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \
133 || (1 << VM_EXEC_BIT) != VM_EXEC)
134# error File is out of sync with <linux/mm.h>. Please update.
135# endif
136
137 mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
138 | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)
139 | (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT));
140
141 if ((vma->vm_flags & mask) != mask)
142 goto bad_area;
143
144 survive:
145 /*
146 * If for any reason at all we couldn't handle the fault, make
147 * sure we exit gracefully rather than endlessly redo the
148 * fault.
149 */
150 switch (handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0)) {
151 case VM_FAULT_MINOR:
152 ++current->min_flt;
153 break;
154 case VM_FAULT_MAJOR:
155 ++current->maj_flt;
156 break;
157 case VM_FAULT_SIGBUS:
158 /*
159 * We ran out of memory, or some other thing happened
160 * to us that made us unable to handle the page fault
161 * gracefully.
162 */
163 signal = SIGBUS;
164 goto bad_area;
165 case VM_FAULT_OOM:
166 goto out_of_memory;
167 default:
168 BUG();
169 }
170 up_read(&mm->mmap_sem);
171 return;
172
173 check_expansion:
174 if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
175 if (!(vma->vm_flags & VM_GROWSDOWN))
176 goto bad_area;
177 if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
178 || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
179 goto bad_area;
180 if (expand_stack(vma, address))
181 goto bad_area;
182 } else {
183 vma = prev_vma;
184 if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
185 || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
186 goto bad_area;
187 if (expand_backing_store(vma, address))
188 goto bad_area;
189 }
190 goto good_area;
191
192 bad_area:
193 up_read(&mm->mmap_sem);
194#ifdef CONFIG_VIRTUAL_MEM_MAP
195 bad_area_no_up:
196#endif
197 if ((isr & IA64_ISR_SP)
198 || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
199 {
200 /*
201 * This fault was due to a speculative load or lfetch.fault, set the "ed"
202 * bit in the psr to ensure forward progress. (Target register will get a
203 * NaT for ld.s, lfetch will be canceled.)
204 */
205 ia64_psr(regs)->ed = 1;
206 return;
207 }
208 if (user_mode(regs)) {
209 si.si_signo = signal;
210 si.si_errno = 0;
211 si.si_code = code;
212 si.si_addr = (void __user *) address;
213 si.si_isr = isr;
214 si.si_flags = __ISR_VALID;
215 force_sig_info(signal, &si, current);
216 return;
217 }
218
219 no_context:
Tony Luckf0a8d3c2005-04-25 13:22:44 -0700220 if ((isr & IA64_ISR_SP)
221 || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
222 {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 /*
Tony Luckf0a8d3c2005-04-25 13:22:44 -0700224 * This fault was due to a speculative load or lfetch.fault, set the "ed"
225 * bit in the psr to ensure forward progress. (Target register will get a
226 * NaT for ld.s, lfetch will be canceled.)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 */
228 ia64_psr(regs)->ed = 1;
229 return;
230 }
231
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 /*
233 * Since we have no vma's for region 5, we might get here even if the address is
234 * valid, due to the VHPT walker inserting a non present translation that becomes
235 * stale. If that happens, the non present fault handler already purged the stale
236 * translation, which fixed the problem. So, we check to see if the translation is
237 * valid, and return if it is.
238 */
239 if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address))
240 return;
241
Kiyoshi Ueda63028aa2005-08-24 18:03:43 -0400242 if (ia64_done_with_exception(regs))
243 return;
244
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 /*
246 * Oops. The kernel tried to access some bad page. We'll have to terminate things
247 * with extreme prejudice.
248 */
249 bust_spinlocks(1);
250
251 if (address < PAGE_SIZE)
252 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address);
253 else
254 printk(KERN_ALERT "Unable to handle kernel paging request at "
255 "virtual address %016lx\n", address);
256 die("Oops", regs, isr);
257 bust_spinlocks(0);
258 do_exit(SIGKILL);
259 return;
260
261 out_of_memory:
262 up_read(&mm->mmap_sem);
263 if (current->pid == 1) {
264 yield();
265 down_read(&mm->mmap_sem);
266 goto survive;
267 }
268 printk(KERN_CRIT "VM: killing process %s\n", current->comm);
269 if (user_mode(regs))
270 do_exit(SIGKILL);
271 goto no_context;
272}