blob: db0d40087ef4d7fa88148b01f1c5d7b08220212f [file] [log] [blame]
amdnd51f4af2019-05-28 16:09:01 -05001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3from __future__ import print_function
4
5import argparse
6import ctypes as ct
7import os
8import platform
9import re
10import signal
11import sys
12
13from bcc import BPF
14from datetime import datetime
15from time import strftime
16
17#
18# exitsnoop Trace all process termination (exit, fatal signal)
19# For Linux, uses BCC, eBPF. Embedded C.
20#
21# USAGE: exitsnoop [-h] [-x] [-t] [--utc] [--label[=LABEL]] [-p PID]
22#
23_examples = """examples:
24 exitsnoop # trace all process termination
25 exitsnoop -x # trace only fails, exclude exit(0)
26 exitsnoop -t # include timestamps (local time)
amdn471f6ab2019-05-28 17:51:41 -050027 exitsnoop --utc # include timestamps (UTC)
amdnd51f4af2019-05-28 16:09:01 -050028 exitsnoop -p 181 # only trace PID 181
29 exitsnoop --label=exit # label each output line with 'exit'
Shohei YOSHIDAa28337a2020-05-22 22:13:01 +090030 exitsnoop --per-thread # trace per thread termination
amdnd51f4af2019-05-28 16:09:01 -050031"""
32"""
33 Exit status (from <include/sysexits.h>):
34
35 0 EX_OK Success
36 2 argparse error
37 70 EX_SOFTWARE syntax error detected by compiler, or
38 verifier error from kernel
39 77 EX_NOPERM Need sudo (CAP_SYS_ADMIN) for BPF() system call
40
41 The template for this script was Brendan Gregg's execsnoop
42 https://github.com/iovisor/bcc/blob/master/tools/execsnoop.py
43
44 More information about this script is in bcc/tools/exitsnoop_example.txt
45
46 Copyright 2016 Netflix, Inc.
47 Copyright 2019 Instana, Inc.
48 Licensed under the Apache License, Version 2.0 (the "License")
49
50 07-Feb-2016 Brendan Gregg (Netflix) Created execsnoop
51 04-May-2019 Arturo Martin-de-Nicolas (Instana) Created exitsnoop
52 13-May-2019 Jeroen Soeters (Instana) Refactor to import as module
53"""
54
55def _getParser():
56 parser = argparse.ArgumentParser(
57 description="Trace all process termination (exit, fatal signal)",
58 formatter_class=argparse.RawDescriptionHelpFormatter,
59 epilog=_examples)
60 a=parser.add_argument
61 a("-t", "--timestamp", action="store_true", help="include timestamp (local time default)")
amdn471f6ab2019-05-28 17:51:41 -050062 a("--utc", action="store_true", help="include timestamp in UTC (-t implied)")
amdnd51f4af2019-05-28 16:09:01 -050063 a("-p", "--pid", help="trace this PID only")
64 a("--label", help="label each line")
65 a("-x", "--failed", action="store_true", help="trace only fails, exclude exit(0)")
Shohei YOSHIDAa28337a2020-05-22 22:13:01 +090066 a("--per-thread", action="store_true", help="trace per thread termination")
amdnd51f4af2019-05-28 16:09:01 -050067 # print the embedded C program and exit, for debugging
68 a("--ebpf", action="store_true", help=argparse.SUPPRESS)
69 # RHEL 7.6 keeps task->start_time as struct timespec, convert to u64 nanoseconds
70 a("--timespec", action="store_true", help=argparse.SUPPRESS)
71 return parser.parse_args
72
73
74class Global():
75 parse_args = _getParser()
76 args = None
77 argv = None
78 SIGNUM_TO_SIGNAME = dict((v, re.sub("^SIG", "", k))
79 for k,v in signal.__dict__.items() if re.match("^SIG[A-Z]+$", k))
80
81
82class Data(ct.Structure):
83 """Event data matching struct data_t in _embedded_c()."""
84 _TASK_COMM_LEN = 16 # linux/sched.h
85 _pack_ = 1
86 _fields_ = [
87 ("start_time", ct.c_ulonglong), # task->start_time, see --timespec arg
88 ("exit_time", ct.c_ulonglong), # bpf_ktime_get_ns()
89 ("pid", ct.c_uint), # task->tgid, thread group id == sys_getpid()
90 ("tid", ct.c_uint), # task->pid, thread id == sys_gettid()
91 ("ppid", ct.c_uint),# task->parent->tgid, notified of exit
92 ("exit_code", ct.c_int),
93 ("sig_info", ct.c_uint),
94 ("task", ct.c_char * _TASK_COMM_LEN)
95 ]
96
97def _embedded_c(args):
98 """Generate C program for sched_process_exit tracepoint in kernel/exit.c."""
99 c = """
100 EBPF_COMMENT
101 #include <linux/sched.h>
102 BPF_STATIC_ASSERT_DEF
103
104 struct data_t {
105 u64 start_time;
106 u64 exit_time;
107 u32 pid;
108 u32 tid;
109 u32 ppid;
110 int exit_code;
111 u32 sig_info;
112 char task[TASK_COMM_LEN];
113 } __attribute__((packed));
114
115 BPF_STATIC_ASSERT(sizeof(struct data_t) == CTYPES_SIZEOF_DATA);
116 BPF_PERF_OUTPUT(events);
117
118 TRACEPOINT_PROBE(sched, sched_process_exit)
119 {
120 struct task_struct *task = (typeof(task))bpf_get_current_task();
121 if (FILTER_PID || FILTER_EXIT_CODE) { return 0; }
122
123 struct data_t data = {
124 .start_time = PROCESS_START_TIME_NS,
125 .exit_time = bpf_ktime_get_ns(),
126 .pid = task->tgid,
127 .tid = task->pid,
128 .ppid = task->parent->tgid,
129 .exit_code = task->exit_code >> 8,
130 .sig_info = task->exit_code & 0xFF,
131 };
132 bpf_get_current_comm(&data.task, sizeof(data.task));
133
134 events.perf_submit(args, &data, sizeof(data));
135 return 0;
136 }
137 """
138 # TODO: this macro belongs in bcc/src/cc/export/helpers.h
139 bpf_static_assert_def = r"""
140 #ifndef BPF_STATIC_ASSERT
141 #define BPF_STATIC_ASSERT(condition) __attribute__((unused)) \
142 extern int bpf_static_assert[(condition) ? 1 : -1]
143 #endif
144 """
Shohei YOSHIDAa28337a2020-05-22 22:13:01 +0900145
146 if Global.args.pid:
147 if Global.args.per_thread:
148 filter_pid = "task->tgid != %s" % Global.args.pid
149 else:
150 filter_pid = "!(task->tgid == %s && task->pid == task->tgid)" % Global.args.pid
151 else:
152 filter_pid = '0' if Global.args.per_thread else 'task->pid != task->tgid'
153
amdnd51f4af2019-05-28 16:09:01 -0500154 code_substitutions = [
155 ('EBPF_COMMENT', '' if not Global.args.ebpf else _ebpf_comment()),
156 ("BPF_STATIC_ASSERT_DEF", bpf_static_assert_def),
157 ("CTYPES_SIZEOF_DATA", str(ct.sizeof(Data))),
Shohei YOSHIDAa28337a2020-05-22 22:13:01 +0900158 ('FILTER_PID', filter_pid),
amdnd51f4af2019-05-28 16:09:01 -0500159 ('FILTER_EXIT_CODE', '0' if not Global.args.failed else 'task->exit_code == 0'),
160 ('PROCESS_START_TIME_NS', 'task->start_time' if not Global.args.timespec else
161 '(task->start_time.tv_sec * 1000000000L) + task->start_time.tv_nsec'),
162 ]
163 for old,new in code_substitutions:
164 c = c.replace(old, new)
165 return c
166
167def _ebpf_comment():
168 """Return a C-style comment with information about the generated code."""
169 comment=('Created by %s at %s:\n\t%s' %
170 (sys.argv[0], strftime("%Y-%m-%d %H:%M:%S %Z"), _embedded_c.__doc__))
171 args = str(vars(Global.args)).replace('{','{\n\t').replace(', ',',\n\t').replace('}',',\n }\n\n')
172 return ("\n /*" + ("\n %s\n\n ARGV = %s\n\n ARGS = %s/" %
173 (comment, ' '.join(Global.argv), args))
174 .replace('\n','\n\t*').replace('\t',' '))
175
176def _print_header():
177 if Global.args.timestamp:
178 title = 'TIME-' + ('UTC' if Global.args.utc else strftime("%Z"))
179 print("%-13s" % title, end="")
180 if Global.args.label is not None:
181 print("%-6s" % "LABEL", end="")
182 print("%-16s %-6s %-6s %-6s %-7s %-10s" %
183 ("PCOMM", "PID", "PPID", "TID", "AGE(s)", "EXIT_CODE"))
184
185def _print_event(cpu, data, size): # callback
186 """Print the exit event."""
187 e = ct.cast(data, ct.POINTER(Data)).contents
188 if Global.args.timestamp:
189 now = datetime.utcnow() if Global.args.utc else datetime.now()
190 print("%-13s" % (now.strftime("%H:%M:%S.%f")[:-3]), end="")
191 if Global.args.label is not None:
192 label = Global.args.label if len(Global.args.label) else 'exit'
193 print("%-6s" % label, end="")
194 age = (e.exit_time - e.start_time) / 1e9
195 print("%-16s %-6d %-6d %-6d %-7.2f " %
196 (e.task.decode(), e.pid, e.ppid, e.tid, age), end="")
197 if e.sig_info == 0:
198 print("0" if e.exit_code == 0 else "code %d" % e.exit_code)
199 else:
200 sig = e.sig_info & 0x7F
201 if sig:
202 print("signal %d (%s)" % (sig, signum_to_signame(sig)), end="")
203 if e.sig_info & 0x80:
204 print(", core dumped ", end="")
205 print()
206
207# =============================
208# Module: These functions are available for import
209# =============================
210def initialize(arg_list = sys.argv[1:]):
211 """Trace all process termination.
212
213 arg_list - list of args, if omitted then uses command line args
214 arg_list is passed to argparse.ArgumentParser.parse_args()
215
216 For example, if arg_list = [ '-x', '-t' ]
217 args.failed == True
218 args.timestamp == True
219
220 Returns a tuple (return_code, result)
221 0 = Ok, result is the return value from BPF()
222 1 = args.ebpf is requested, result is the generated C code
223 os.EX_NOPERM: need CAP_SYS_ADMIN, result is error message
224 os.EX_SOFTWARE: internal software error, result is error message
225 """
226 Global.argv = arg_list
227 Global.args = Global.parse_args(arg_list)
228 if Global.args.utc and not Global.args.timestamp:
229 Global.args.timestamp = True
230 if not Global.args.ebpf and os.geteuid() != 0:
231 return (os.EX_NOPERM, "Need sudo (CAP_SYS_ADMIN) for BPF() system call")
232 if re.match('^3\.10\..*el7.*$', platform.release()): # Centos/Red Hat
233 Global.args.timespec = True
234 for _ in range(2):
235 c = _embedded_c(Global.args)
236 if Global.args.ebpf:
237 return (1, c)
238 try:
239 return (os.EX_OK, BPF(text=c))
240 except Exception as e:
241 error = format(e)
242 if (not Global.args.timespec
243 and error.find('struct timespec')
244 and error.find('start_time')):
245 print('This kernel keeps task->start_time in a struct timespec.\n' +
246 'Retrying with --timespec')
247 Global.args.timespec = True
248 continue
249 return (os.EX_SOFTWARE, "BPF error: " + error)
250 except:
251 return (os.EX_SOFTWARE, "Unexpected error: {0}".format(sys.exc_info()[0]))
252
253def snoop(bpf, event_handler):
254 """Call event_handler for process termination events.
255
256 bpf - result returned by successful initialize()
257 event_handler - callback function to handle termination event
258 args.pid - Return after event_handler is called, only monitoring this pid
259 """
260 bpf["events"].open_perf_buffer(event_handler)
261 while True:
262 bpf.perf_buffer_poll()
263 if Global.args.pid:
264 return
265
266def signum_to_signame(signum):
267 """Return the name of the signal corresponding to signum."""
268 return Global.SIGNUM_TO_SIGNAME.get(signum, "unknown")
269
270# =============================
271# Script: invoked as a script
272# =============================
273def main():
274 try:
275 rc, buffer = initialize()
276 if rc:
277 print(buffer)
278 sys.exit(0 if Global.args.ebpf else rc)
279 _print_header()
280 snoop(buffer, _print_event)
281 except KeyboardInterrupt:
282 print()
283 sys.exit()
284
285 return 0
286
287if __name__ == '__main__':
288 main()