Alexey Ivanov | cc01a9c | 2019-01-16 09:50:46 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
Brendan Gregg | fe430e5 | 2016-02-10 01:34:53 -0800 | [diff] [blame] | 2 | # |
| 3 | # oomkill Trace oom_kill_process(). For Linux, uses BCC, eBPF. |
| 4 | # |
| 5 | # This traces the kernel out-of-memory killer, and prints basic details, |
| 6 | # including the system load averages. This can provide more context on the |
| 7 | # system state at the time of OOM: was it getting busier or steady, based |
| 8 | # on the load averages? This tool may also be useful to customize for |
| 9 | # investigations; for example, by adding other task_struct details at the time |
| 10 | # of OOM. |
| 11 | # |
| 12 | # Copyright 2016 Netflix, Inc. |
| 13 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 14 | # |
| 15 | # 09-Feb-2016 Brendan Gregg Created this. |
| 16 | |
| 17 | from bcc import BPF |
| 18 | from time import strftime |
| 19 | |
| 20 | # linux stats |
| 21 | loadavg = "/proc/loadavg" |
| 22 | |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 23 | # define BPF program |
| 24 | bpf_text = """ |
Brendan Gregg | fe430e5 | 2016-02-10 01:34:53 -0800 | [diff] [blame] | 25 | #include <uapi/linux/ptrace.h> |
| 26 | #include <linux/oom.h> |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 27 | |
| 28 | struct data_t { |
Xiaozhou Liu | 9edbae3 | 2020-02-23 16:26:09 +0000 | [diff] [blame] | 29 | u32 fpid; |
| 30 | u32 tpid; |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 31 | u64 pages; |
| 32 | char fcomm[TASK_COMM_LEN]; |
| 33 | char tcomm[TASK_COMM_LEN]; |
| 34 | }; |
| 35 | |
| 36 | BPF_PERF_OUTPUT(events); |
| 37 | |
Smita Koralahalli Channabasappa | 6954e25 | 2018-08-28 12:46:46 -0400 | [diff] [blame] | 38 | void kprobe__oom_kill_process(struct pt_regs *ctx, struct oom_control *oc, const char *message) |
Brendan Gregg | fe430e5 | 2016-02-10 01:34:53 -0800 | [diff] [blame] | 39 | { |
Smita Koralahalli Channabasappa | 6954e25 | 2018-08-28 12:46:46 -0400 | [diff] [blame] | 40 | struct task_struct *p = oc->chosen; |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 41 | struct data_t data = {}; |
Xiaozhou Liu | 9edbae3 | 2020-02-23 16:26:09 +0000 | [diff] [blame] | 42 | u32 pid = bpf_get_current_pid_tgid() >> 32; |
Hengqi Chen | 2c31dd7 | 2022-01-15 11:40:52 +0800 | [diff] [blame] | 43 | |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 44 | data.fpid = pid; |
Hengqi Chen | 2c31dd7 | 2022-01-15 11:40:52 +0800 | [diff] [blame] | 45 | data.tpid = p->tgid; |
Smita Koralahalli Channabasappa | 6954e25 | 2018-08-28 12:46:46 -0400 | [diff] [blame] | 46 | data.pages = oc->totalpages; |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 47 | bpf_get_current_comm(&data.fcomm, sizeof(data.fcomm)); |
Sumanth Korikkar | 7f6066d | 2020-05-20 10:49:56 -0500 | [diff] [blame] | 48 | bpf_probe_read_kernel(&data.tcomm, sizeof(data.tcomm), p->comm); |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 49 | events.perf_submit(ctx, &data, sizeof(data)); |
Brendan Gregg | fe430e5 | 2016-02-10 01:34:53 -0800 | [diff] [blame] | 50 | } |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 51 | """ |
Brendan Gregg | fe430e5 | 2016-02-10 01:34:53 -0800 | [diff] [blame] | 52 | |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 53 | # process event |
| 54 | def print_event(cpu, data, size): |
Xiaozhou Liu | 51d62d3 | 2019-02-15 13:03:05 +0800 | [diff] [blame] | 55 | event = b["events"].event(data) |
Brendan Gregg | fe430e5 | 2016-02-10 01:34:53 -0800 | [diff] [blame] | 56 | with open(loadavg) as stats: |
| 57 | avgline = stats.read().rstrip() |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 58 | print(("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\")" |
| 59 | ", %d pages, loadavg: %s") % (strftime("%H:%M:%S"), event.fpid, |
jeromemarchand | b96ebcd | 2018-10-10 01:58:15 +0200 | [diff] [blame] | 60 | event.fcomm.decode('utf-8', 'replace'), event.tpid, |
| 61 | event.tcomm.decode('utf-8', 'replace'), event.pages, avgline)) |
Brendan Gregg | 399fd73 | 2016-02-10 16:33:12 -0800 | [diff] [blame] | 62 | |
| 63 | # initialize BPF |
| 64 | b = BPF(text=bpf_text) |
| 65 | print("Tracing OOM kills... Ctrl-C to stop.") |
| 66 | b["events"].open_perf_buffer(print_event) |
| 67 | while 1: |
Jerome Marchand | 5167127 | 2018-12-19 01:57:24 +0100 | [diff] [blame] | 68 | try: |
| 69 | b.perf_buffer_poll() |
| 70 | except KeyboardInterrupt: |
| 71 | exit() |