Brendan Gregg | fe430e5 | 2016-02-10 01:34:53 -0800 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # |
| 3 | # oomkill Trace oom_kill_process(). For Linux, uses BCC, eBPF. |
| 4 | # |
| 5 | # This traces the kernel out-of-memory killer, and prints basic details, |
| 6 | # including the system load averages. This can provide more context on the |
| 7 | # system state at the time of OOM: was it getting busier or steady, based |
| 8 | # on the load averages? This tool may also be useful to customize for |
| 9 | # investigations; for example, by adding other task_struct details at the time |
| 10 | # of OOM. |
| 11 | # |
| 12 | # Copyright 2016 Netflix, Inc. |
| 13 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 14 | # |
| 15 | # 09-Feb-2016 Brendan Gregg Created this. |
| 16 | |
| 17 | from bcc import BPF |
| 18 | from time import strftime |
| 19 | |
| 20 | # linux stats |
| 21 | loadavg = "/proc/loadavg" |
| 22 | |
| 23 | # initialize BPF |
| 24 | b = BPF(text=""" |
| 25 | #include <uapi/linux/ptrace.h> |
| 26 | #include <linux/oom.h> |
| 27 | void kprobe__oom_kill_process(struct pt_regs *ctx, struct oom_control *oc, |
| 28 | struct task_struct *p, unsigned int points, unsigned long totalpages) |
| 29 | { |
| 30 | bpf_trace_printk("OOM kill of PID %d (\\"%s\\"), %d pages\\n", p->pid, |
| 31 | p->comm, totalpages); |
| 32 | } |
| 33 | """) |
| 34 | |
| 35 | # print output |
| 36 | print("Tracing oom_kill_process()... Ctrl-C to end.") |
| 37 | while 1: |
| 38 | (task, pid, cpu, flags, ts, msg) = b.trace_fields() |
| 39 | with open(loadavg) as stats: |
| 40 | avgline = stats.read().rstrip() |
| 41 | print("%s Triggered by PID %d (\"%s\"), %s, loadavg: %s" % ( |
| 42 | strftime("%H:%M:%S"), pid, task, msg, avgline)) |