blob: 9de7d8f6b6e1d4b47602ac7afbeb134d83086991 [file] [log] [blame]
Vegard Nossumdfec0722008-04-04 00:51:41 +02001/**
2 * kmemcheck - a heavyweight memory checker for the linux kernel
3 * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no>
4 * (With a lot of help from Ingo Molnar and Pekka Enberg.)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2) as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/init.h>
12#include <linux/interrupt.h>
13#include <linux/kallsyms.h>
14#include <linux/kernel.h>
15#include <linux/kmemcheck.h>
16#include <linux/mm.h>
17#include <linux/module.h>
18#include <linux/page-flags.h>
19#include <linux/percpu.h>
20#include <linux/ptrace.h>
21#include <linux/string.h>
22#include <linux/types.h>
23
24#include <asm/cacheflush.h>
25#include <asm/kmemcheck.h>
26#include <asm/pgtable.h>
27#include <asm/tlbflush.h>
28
29#include "error.h"
30#include "opcode.h"
31#include "pte.h"
32#include "shadow.h"
33
34#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
35# define KMEMCHECK_ENABLED 0
36#endif
37
38#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
39# define KMEMCHECK_ENABLED 1
40#endif
41
42#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
43# define KMEMCHECK_ENABLED 2
44#endif
45
46int kmemcheck_enabled = KMEMCHECK_ENABLED;
47
48int __init kmemcheck_init(void)
49{
50 printk(KERN_INFO "kmemcheck: \"Bugs, beware!\"\n");
51
52#ifdef CONFIG_SMP
53 /*
54 * Limit SMP to use a single CPU. We rely on the fact that this code
55 * runs before SMP is set up.
56 */
57 if (setup_max_cpus > 1) {
58 printk(KERN_INFO
59 "kmemcheck: Limiting number of CPUs to 1.\n");
60 setup_max_cpus = 1;
61 }
62#endif
63
64 return 0;
65}
66
67early_initcall(kmemcheck_init);
68
69#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
70int kmemcheck_enabled = 0;
71#endif
72
73#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
74int kmemcheck_enabled = 1;
75#endif
76
77#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
78int kmemcheck_enabled = 2;
79#endif
80
81/*
82 * We need to parse the kmemcheck= option before any memory is allocated.
83 */
84static int __init param_kmemcheck(char *str)
85{
86 if (!str)
87 return -EINVAL;
88
89 sscanf(str, "%d", &kmemcheck_enabled);
90 return 0;
91}
92
93early_param("kmemcheck", param_kmemcheck);
94
95int kmemcheck_show_addr(unsigned long address)
96{
97 pte_t *pte;
98
99 pte = kmemcheck_pte_lookup(address);
100 if (!pte)
101 return 0;
102
103 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
104 __flush_tlb_one(address);
105 return 1;
106}
107
108int kmemcheck_hide_addr(unsigned long address)
109{
110 pte_t *pte;
111
112 pte = kmemcheck_pte_lookup(address);
113 if (!pte)
114 return 0;
115
116 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
117 __flush_tlb_one(address);
118 return 1;
119}
120
121struct kmemcheck_context {
122 bool busy;
123 int balance;
124
125 /*
126 * There can be at most two memory operands to an instruction, but
127 * each address can cross a page boundary -- so we may need up to
128 * four addresses that must be hidden/revealed for each fault.
129 */
130 unsigned long addr[4];
131 unsigned long n_addrs;
132 unsigned long flags;
133
134 /* Data size of the instruction that caused a fault. */
135 unsigned int size;
136};
137
138static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
139
140bool kmemcheck_active(struct pt_regs *regs)
141{
142 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
143
144 return data->balance > 0;
145}
146
147/* Save an address that needs to be shown/hidden */
148static void kmemcheck_save_addr(unsigned long addr)
149{
150 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
151
152 BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
153 data->addr[data->n_addrs++] = addr;
154}
155
156static unsigned int kmemcheck_show_all(void)
157{
158 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
159 unsigned int i;
160 unsigned int n;
161
162 n = 0;
163 for (i = 0; i < data->n_addrs; ++i)
164 n += kmemcheck_show_addr(data->addr[i]);
165
166 return n;
167}
168
169static unsigned int kmemcheck_hide_all(void)
170{
171 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
172 unsigned int i;
173 unsigned int n;
174
175 n = 0;
176 for (i = 0; i < data->n_addrs; ++i)
177 n += kmemcheck_hide_addr(data->addr[i]);
178
179 return n;
180}
181
182/*
183 * Called from the #PF handler.
184 */
185void kmemcheck_show(struct pt_regs *regs)
186{
187 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
188
189 BUG_ON(!irqs_disabled());
190
191 if (unlikely(data->balance != 0)) {
192 kmemcheck_show_all();
193 kmemcheck_error_save_bug(regs);
194 data->balance = 0;
195 return;
196 }
197
198 /*
199 * None of the addresses actually belonged to kmemcheck. Note that
200 * this is not an error.
201 */
202 if (kmemcheck_show_all() == 0)
203 return;
204
205 ++data->balance;
206
207 /*
208 * The IF needs to be cleared as well, so that the faulting
209 * instruction can run "uninterrupted". Otherwise, we might take
210 * an interrupt and start executing that before we've had a chance
211 * to hide the page again.
212 *
213 * NOTE: In the rare case of multiple faults, we must not override
214 * the original flags:
215 */
216 if (!(regs->flags & X86_EFLAGS_TF))
217 data->flags = regs->flags;
218
219 regs->flags |= X86_EFLAGS_TF;
220 regs->flags &= ~X86_EFLAGS_IF;
221}
222
223/*
224 * Called from the #DB handler.
225 */
226void kmemcheck_hide(struct pt_regs *regs)
227{
228 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
229 int n;
230
231 BUG_ON(!irqs_disabled());
232
233 if (data->balance == 0)
234 return;
235
236 if (unlikely(data->balance != 1)) {
237 kmemcheck_show_all();
238 kmemcheck_error_save_bug(regs);
239 data->n_addrs = 0;
240 data->balance = 0;
241
242 if (!(data->flags & X86_EFLAGS_TF))
243 regs->flags &= ~X86_EFLAGS_TF;
244 if (data->flags & X86_EFLAGS_IF)
245 regs->flags |= X86_EFLAGS_IF;
246 return;
247 }
248
249 if (kmemcheck_enabled)
250 n = kmemcheck_hide_all();
251 else
252 n = kmemcheck_show_all();
253
254 if (n == 0)
255 return;
256
257 --data->balance;
258
259 data->n_addrs = 0;
260
261 if (!(data->flags & X86_EFLAGS_TF))
262 regs->flags &= ~X86_EFLAGS_TF;
263 if (data->flags & X86_EFLAGS_IF)
264 regs->flags |= X86_EFLAGS_IF;
265}
266
267void kmemcheck_show_pages(struct page *p, unsigned int n)
268{
269 unsigned int i;
270
271 for (i = 0; i < n; ++i) {
272 unsigned long address;
273 pte_t *pte;
274 unsigned int level;
275
276 address = (unsigned long) page_address(&p[i]);
277 pte = lookup_address(address, &level);
278 BUG_ON(!pte);
279 BUG_ON(level != PG_LEVEL_4K);
280
281 set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
282 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
283 __flush_tlb_one(address);
284 }
285}
286
287bool kmemcheck_page_is_tracked(struct page *p)
288{
289 /* This will also check the "hidden" flag of the PTE. */
290 return kmemcheck_pte_lookup((unsigned long) page_address(p));
291}
292
293void kmemcheck_hide_pages(struct page *p, unsigned int n)
294{
295 unsigned int i;
296
297 for (i = 0; i < n; ++i) {
298 unsigned long address;
299 pte_t *pte;
300 unsigned int level;
301
302 address = (unsigned long) page_address(&p[i]);
303 pte = lookup_address(address, &level);
304 BUG_ON(!pte);
305 BUG_ON(level != PG_LEVEL_4K);
306
307 set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
308 set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
309 __flush_tlb_one(address);
310 }
311}
312
313/* Access may NOT cross page boundary */
314static void kmemcheck_read_strict(struct pt_regs *regs,
315 unsigned long addr, unsigned int size)
316{
317 void *shadow;
318 enum kmemcheck_shadow status;
319
320 shadow = kmemcheck_shadow_lookup(addr);
321 if (!shadow)
322 return;
323
324 kmemcheck_save_addr(addr);
325 status = kmemcheck_shadow_test(shadow, size);
326 if (status == KMEMCHECK_SHADOW_INITIALIZED)
327 return;
328
329 if (kmemcheck_enabled)
330 kmemcheck_error_save(status, addr, size, regs);
331
332 if (kmemcheck_enabled == 2)
333 kmemcheck_enabled = 0;
334
335 /* Don't warn about it again. */
336 kmemcheck_shadow_set(shadow, size);
337}
338
339/* Access may cross page boundary */
340static void kmemcheck_read(struct pt_regs *regs,
341 unsigned long addr, unsigned int size)
342{
343 unsigned long page = addr & PAGE_MASK;
344 unsigned long next_addr = addr + size - 1;
345 unsigned long next_page = next_addr & PAGE_MASK;
346
347 if (likely(page == next_page)) {
348 kmemcheck_read_strict(regs, addr, size);
349 return;
350 }
351
352 /*
353 * What we do is basically to split the access across the
354 * two pages and handle each part separately. Yes, this means
355 * that we may now see reads that are 3 + 5 bytes, for
356 * example (and if both are uninitialized, there will be two
357 * reports), but it makes the code a lot simpler.
358 */
359 kmemcheck_read_strict(regs, addr, next_page - addr);
360 kmemcheck_read_strict(regs, next_page, next_addr - next_page);
361}
362
363static void kmemcheck_write_strict(struct pt_regs *regs,
364 unsigned long addr, unsigned int size)
365{
366 void *shadow;
367
368 shadow = kmemcheck_shadow_lookup(addr);
369 if (!shadow)
370 return;
371
372 kmemcheck_save_addr(addr);
373 kmemcheck_shadow_set(shadow, size);
374}
375
376static void kmemcheck_write(struct pt_regs *regs,
377 unsigned long addr, unsigned int size)
378{
379 unsigned long page = addr & PAGE_MASK;
380 unsigned long next_addr = addr + size - 1;
381 unsigned long next_page = next_addr & PAGE_MASK;
382
383 if (likely(page == next_page)) {
384 kmemcheck_write_strict(regs, addr, size);
385 return;
386 }
387
388 /* See comment in kmemcheck_read(). */
389 kmemcheck_write_strict(regs, addr, next_page - addr);
390 kmemcheck_write_strict(regs, next_page, next_addr - next_page);
391}
392
393/*
394 * Copying is hard. We have two addresses, each of which may be split across
395 * a page (and each page will have different shadow addresses).
396 */
397static void kmemcheck_copy(struct pt_regs *regs,
398 unsigned long src_addr, unsigned long dst_addr, unsigned int size)
399{
400 uint8_t shadow[8];
401 enum kmemcheck_shadow status;
402
403 unsigned long page;
404 unsigned long next_addr;
405 unsigned long next_page;
406
407 uint8_t *x;
408 unsigned int i;
409 unsigned int n;
410
411 BUG_ON(size > sizeof(shadow));
412
413 page = src_addr & PAGE_MASK;
414 next_addr = src_addr + size - 1;
415 next_page = next_addr & PAGE_MASK;
416
417 if (likely(page == next_page)) {
418 /* Same page */
419 x = kmemcheck_shadow_lookup(src_addr);
420 if (x) {
421 kmemcheck_save_addr(src_addr);
422 for (i = 0; i < size; ++i)
423 shadow[i] = x[i];
424 } else {
425 for (i = 0; i < size; ++i)
426 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
427 }
428 } else {
429 n = next_page - src_addr;
430 BUG_ON(n > sizeof(shadow));
431
432 /* First page */
433 x = kmemcheck_shadow_lookup(src_addr);
434 if (x) {
435 kmemcheck_save_addr(src_addr);
436 for (i = 0; i < n; ++i)
437 shadow[i] = x[i];
438 } else {
439 /* Not tracked */
440 for (i = 0; i < n; ++i)
441 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
442 }
443
444 /* Second page */
445 x = kmemcheck_shadow_lookup(next_page);
446 if (x) {
447 kmemcheck_save_addr(next_page);
448 for (i = n; i < size; ++i)
449 shadow[i] = x[i - n];
450 } else {
451 /* Not tracked */
452 for (i = n; i < size; ++i)
453 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
454 }
455 }
456
457 page = dst_addr & PAGE_MASK;
458 next_addr = dst_addr + size - 1;
459 next_page = next_addr & PAGE_MASK;
460
461 if (likely(page == next_page)) {
462 /* Same page */
463 x = kmemcheck_shadow_lookup(dst_addr);
464 if (x) {
465 kmemcheck_save_addr(dst_addr);
466 for (i = 0; i < size; ++i) {
467 x[i] = shadow[i];
468 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
469 }
470 }
471 } else {
472 n = next_page - dst_addr;
473 BUG_ON(n > sizeof(shadow));
474
475 /* First page */
476 x = kmemcheck_shadow_lookup(dst_addr);
477 if (x) {
478 kmemcheck_save_addr(dst_addr);
479 for (i = 0; i < n; ++i) {
480 x[i] = shadow[i];
481 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
482 }
483 }
484
485 /* Second page */
486 x = kmemcheck_shadow_lookup(next_page);
487 if (x) {
488 kmemcheck_save_addr(next_page);
489 for (i = n; i < size; ++i) {
490 x[i - n] = shadow[i];
491 shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
492 }
493 }
494 }
495
496 status = kmemcheck_shadow_test(shadow, size);
497 if (status == KMEMCHECK_SHADOW_INITIALIZED)
498 return;
499
500 if (kmemcheck_enabled)
501 kmemcheck_error_save(status, src_addr, size, regs);
502
503 if (kmemcheck_enabled == 2)
504 kmemcheck_enabled = 0;
505}
506
507enum kmemcheck_method {
508 KMEMCHECK_READ,
509 KMEMCHECK_WRITE,
510};
511
512static void kmemcheck_access(struct pt_regs *regs,
513 unsigned long fallback_address, enum kmemcheck_method fallback_method)
514{
515 const uint8_t *insn;
516 const uint8_t *insn_primary;
517 unsigned int size;
518
519 struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
520
521 /* Recursive fault -- ouch. */
522 if (data->busy) {
523 kmemcheck_show_addr(fallback_address);
524 kmemcheck_error_save_bug(regs);
525 return;
526 }
527
528 data->busy = true;
529
530 insn = (const uint8_t *) regs->ip;
531 insn_primary = kmemcheck_opcode_get_primary(insn);
532
533 kmemcheck_opcode_decode(insn, &size);
534
535 switch (insn_primary[0]) {
536#ifdef CONFIG_KMEMCHECK_BITOPS_OK
537 /* AND, OR, XOR */
538 /*
539 * Unfortunately, these instructions have to be excluded from
540 * our regular checking since they access only some (and not
541 * all) bits. This clears out "bogus" bitfield-access warnings.
542 */
543 case 0x80:
544 case 0x81:
545 case 0x82:
546 case 0x83:
547 switch ((insn_primary[1] >> 3) & 7) {
548 /* OR */
549 case 1:
550 /* AND */
551 case 4:
552 /* XOR */
553 case 6:
554 kmemcheck_write(regs, fallback_address, size);
555 goto out;
556
557 /* ADD */
558 case 0:
559 /* ADC */
560 case 2:
561 /* SBB */
562 case 3:
563 /* SUB */
564 case 5:
565 /* CMP */
566 case 7:
567 break;
568 }
569 break;
570#endif
571
572 /* MOVS, MOVSB, MOVSW, MOVSD */
573 case 0xa4:
574 case 0xa5:
575 /*
576 * These instructions are special because they take two
577 * addresses, but we only get one page fault.
578 */
579 kmemcheck_copy(regs, regs->si, regs->di, size);
580 goto out;
581
582 /* CMPS, CMPSB, CMPSW, CMPSD */
583 case 0xa6:
584 case 0xa7:
585 kmemcheck_read(regs, regs->si, size);
586 kmemcheck_read(regs, regs->di, size);
587 goto out;
588 }
589
590 /*
591 * If the opcode isn't special in any way, we use the data from the
592 * page fault handler to determine the address and type of memory
593 * access.
594 */
595 switch (fallback_method) {
596 case KMEMCHECK_READ:
597 kmemcheck_read(regs, fallback_address, size);
598 goto out;
599 case KMEMCHECK_WRITE:
600 kmemcheck_write(regs, fallback_address, size);
601 goto out;
602 }
603
604out:
605 data->busy = false;
606}
607
608bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
609 unsigned long error_code)
610{
611 pte_t *pte;
612 unsigned int level;
613
614 /*
615 * XXX: Is it safe to assume that memory accesses from virtual 86
616 * mode or non-kernel code segments will _never_ access kernel
617 * memory (e.g. tracked pages)? For now, we need this to avoid
618 * invoking kmemcheck for PnP BIOS calls.
619 */
620 if (regs->flags & X86_VM_MASK)
621 return false;
622 if (regs->cs != __KERNEL_CS)
623 return false;
624
625 pte = lookup_address(address, &level);
626 if (!pte)
627 return false;
628 if (level != PG_LEVEL_4K)
629 return false;
630 if (!pte_hidden(*pte))
631 return false;
632
633 if (error_code & 2)
634 kmemcheck_access(regs, address, KMEMCHECK_WRITE);
635 else
636 kmemcheck_access(regs, address, KMEMCHECK_READ);
637
638 kmemcheck_show(regs);
639 return true;
640}
641
642bool kmemcheck_trap(struct pt_regs *regs)
643{
644 if (!kmemcheck_active(regs))
645 return false;
646
647 /* We're done. */
648 kmemcheck_hide(regs);
649 return true;
650}