blob: db3a53786f38096b50c2ab12c9c5c00f5f545aa8 [file] [log] [blame]
Brendan Greggfe430e52016-02-10 01:34:53 -08001#!/usr/bin/env python
2#
3# oomkill Trace oom_kill_process(). For Linux, uses BCC, eBPF.
4#
5# This traces the kernel out-of-memory killer, and prints basic details,
6# including the system load averages. This can provide more context on the
7# system state at the time of OOM: was it getting busier or steady, based
8# on the load averages? This tool may also be useful to customize for
9# investigations; for example, by adding other task_struct details at the time
10# of OOM.
11#
12# Copyright 2016 Netflix, Inc.
13# Licensed under the Apache License, Version 2.0 (the "License")
14#
15# 09-Feb-2016 Brendan Gregg Created this.
16
17from bcc import BPF
18from time import strftime
Brendan Gregg399fd732016-02-10 16:33:12 -080019import ctypes as ct
Brendan Greggfe430e52016-02-10 01:34:53 -080020
21# linux stats
22loadavg = "/proc/loadavg"
23
Brendan Gregg399fd732016-02-10 16:33:12 -080024# define BPF program
25bpf_text = """
Brendan Greggfe430e52016-02-10 01:34:53 -080026#include <uapi/linux/ptrace.h>
27#include <linux/oom.h>
Brendan Gregg399fd732016-02-10 16:33:12 -080028
29struct data_t {
30 u64 fpid;
31 u64 tpid;
32 u64 pages;
33 char fcomm[TASK_COMM_LEN];
34 char tcomm[TASK_COMM_LEN];
35};
36
37BPF_PERF_OUTPUT(events);
38
Smita Koralahalli Channabasappa6954e252018-08-28 12:46:46 -040039void kprobe__oom_kill_process(struct pt_regs *ctx, struct oom_control *oc, const char *message)
Brendan Greggfe430e52016-02-10 01:34:53 -080040{
Smita Koralahalli Channabasappa6954e252018-08-28 12:46:46 -040041 unsigned long totalpages;
42 struct task_struct *p = oc->chosen;
Brendan Gregg399fd732016-02-10 16:33:12 -080043 struct data_t data = {};
44 u32 pid = bpf_get_current_pid_tgid();
45 data.fpid = pid;
46 data.tpid = p->pid;
Smita Koralahalli Channabasappa6954e252018-08-28 12:46:46 -040047 data.pages = oc->totalpages;
Brendan Gregg399fd732016-02-10 16:33:12 -080048 bpf_get_current_comm(&data.fcomm, sizeof(data.fcomm));
49 bpf_probe_read(&data.tcomm, sizeof(data.tcomm), p->comm);
50 events.perf_submit(ctx, &data, sizeof(data));
Brendan Greggfe430e52016-02-10 01:34:53 -080051}
Brendan Gregg399fd732016-02-10 16:33:12 -080052"""
Brendan Greggfe430e52016-02-10 01:34:53 -080053
Brendan Gregg399fd732016-02-10 16:33:12 -080054# kernel->user event data: struct data_t
55TASK_COMM_LEN = 16 # linux/sched.h
56class Data(ct.Structure):
57 _fields_ = [
58 ("fpid", ct.c_ulonglong),
59 ("tpid", ct.c_ulonglong),
60 ("pages", ct.c_ulonglong),
61 ("fcomm", ct.c_char * TASK_COMM_LEN),
62 ("tcomm", ct.c_char * TASK_COMM_LEN)
63 ]
64
65# process event
66def print_event(cpu, data, size):
67 event = ct.cast(data, ct.POINTER(Data)).contents
Brendan Greggfe430e52016-02-10 01:34:53 -080068 with open(loadavg) as stats:
69 avgline = stats.read().rstrip()
Brendan Gregg399fd732016-02-10 16:33:12 -080070 print(("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\")"
71 ", %d pages, loadavg: %s") % (strftime("%H:%M:%S"), event.fpid,
jeromemarchandb96ebcd2018-10-10 01:58:15 +020072 event.fcomm.decode('utf-8', 'replace'), event.tpid,
73 event.tcomm.decode('utf-8', 'replace'), event.pages, avgline))
Brendan Gregg399fd732016-02-10 16:33:12 -080074
75# initialize BPF
76b = BPF(text=bpf_text)
77print("Tracing OOM kills... Ctrl-C to stop.")
78b["events"].open_perf_buffer(print_event)
79while 1:
Jerome Marchand51671272018-12-19 01:57:24 +010080 try:
81 b.perf_buffer_poll()
82 except KeyboardInterrupt:
83 exit()