blob: 1bf441c4e361accfbf4bf5bf7f6f686ba1f8b048 [file] [log] [blame]
Alexey Ivanovcc01a9c2019-01-16 09:50:46 -08001#!/usr/bin/python
Brendan Greggfe430e52016-02-10 01:34:53 -08002#
3# oomkill Trace oom_kill_process(). For Linux, uses BCC, eBPF.
4#
5# This traces the kernel out-of-memory killer, and prints basic details,
6# including the system load averages. This can provide more context on the
7# system state at the time of OOM: was it getting busier or steady, based
8# on the load averages? This tool may also be useful to customize for
9# investigations; for example, by adding other task_struct details at the time
10# of OOM.
11#
12# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 09-Feb-2016 Brendan Gregg Created this.
16
17from bcc import BPF
18from time import strftime
19
20# linux stats
21loadavg = "/proc/loadavg"
22
Brendan Gregg399fd732016-02-10 16:33:12 -080023# define BPF program
24bpf_text = """
Brendan Greggfe430e52016-02-10 01:34:53 -080025#include <uapi/linux/ptrace.h>
26#include <linux/oom.h>
Brendan Gregg399fd732016-02-10 16:33:12 -080027
28struct data_t {
Xiaozhou Liu9edbae32020-02-23 16:26:09 +000029 u32 fpid;
30 u32 tpid;
Brendan Gregg399fd732016-02-10 16:33:12 -080031 u64 pages;
32 char fcomm[TASK_COMM_LEN];
33 char tcomm[TASK_COMM_LEN];
34};
35
36BPF_PERF_OUTPUT(events);
37
Smita Koralahalli Channabasappa6954e252018-08-28 12:46:46 -040038void kprobe__oom_kill_process(struct pt_regs *ctx, struct oom_control *oc, const char *message)
Brendan Greggfe430e52016-02-10 01:34:53 -080039{
Smita Koralahalli Channabasappa6954e252018-08-28 12:46:46 -040040 struct task_struct *p = oc->chosen;
Brendan Gregg399fd732016-02-10 16:33:12 -080041 struct data_t data = {};
Xiaozhou Liu9edbae32020-02-23 16:26:09 +000042 u32 pid = bpf_get_current_pid_tgid() >> 32;
Hengqi Chen2c31dd72022-01-15 11:40:52 +080043
Brendan Gregg399fd732016-02-10 16:33:12 -080044 data.fpid = pid;
Hengqi Chen2c31dd72022-01-15 11:40:52 +080045 data.tpid = p->tgid;
Smita Koralahalli Channabasappa6954e252018-08-28 12:46:46 -040046 data.pages = oc->totalpages;
Brendan Gregg399fd732016-02-10 16:33:12 -080047 bpf_get_current_comm(&data.fcomm, sizeof(data.fcomm));
Sumanth Korikkar7f6066d2020-05-20 10:49:56 -050048 bpf_probe_read_kernel(&data.tcomm, sizeof(data.tcomm), p->comm);
Brendan Gregg399fd732016-02-10 16:33:12 -080049 events.perf_submit(ctx, &data, sizeof(data));
Brendan Greggfe430e52016-02-10 01:34:53 -080050}
Brendan Gregg399fd732016-02-10 16:33:12 -080051"""
Brendan Greggfe430e52016-02-10 01:34:53 -080052
Brendan Gregg399fd732016-02-10 16:33:12 -080053# process event
54def print_event(cpu, data, size):
Xiaozhou Liu51d62d32019-02-15 13:03:05 +080055 event = b["events"].event(data)
Brendan Greggfe430e52016-02-10 01:34:53 -080056 with open(loadavg) as stats:
57 avgline = stats.read().rstrip()
Brendan Gregg399fd732016-02-10 16:33:12 -080058 print(("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\")"
59 ", %d pages, loadavg: %s") % (strftime("%H:%M:%S"), event.fpid,
jeromemarchandb96ebcd2018-10-10 01:58:15 +020060 event.fcomm.decode('utf-8', 'replace'), event.tpid,
61 event.tcomm.decode('utf-8', 'replace'), event.pages, avgline))
Brendan Gregg399fd732016-02-10 16:33:12 -080062
63# initialize BPF
64b = BPF(text=bpf_text)
65print("Tracing OOM kills... Ctrl-C to stop.")
66b["events"].open_perf_buffer(print_event)
67while 1:
Jerome Marchand51671272018-12-19 01:57:24 +010068 try:
69 b.perf_buffer_poll()
70 except KeyboardInterrupt:
71 exit()