amdn | d51f4af | 2019-05-28 16:09:01 -0500 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # @lint-avoid-python-3-compatibility-imports |
| 3 | from __future__ import print_function |
| 4 | |
| 5 | import argparse |
| 6 | import ctypes as ct |
| 7 | import os |
| 8 | import platform |
| 9 | import re |
| 10 | import signal |
| 11 | import sys |
| 12 | |
| 13 | from bcc import BPF |
| 14 | from datetime import datetime |
| 15 | from time import strftime |
| 16 | |
| 17 | # |
| 18 | # exitsnoop Trace all process termination (exit, fatal signal) |
| 19 | # For Linux, uses BCC, eBPF. Embedded C. |
| 20 | # |
| 21 | # USAGE: exitsnoop [-h] [-x] [-t] [--utc] [--label[=LABEL]] [-p PID] |
| 22 | # |
| 23 | _examples = """examples: |
| 24 | exitsnoop # trace all process termination |
| 25 | exitsnoop -x # trace only fails, exclude exit(0) |
| 26 | exitsnoop -t # include timestamps (local time) |
amdn | 471f6ab | 2019-05-28 17:51:41 -0500 | [diff] [blame] | 27 | exitsnoop --utc # include timestamps (UTC) |
amdn | d51f4af | 2019-05-28 16:09:01 -0500 | [diff] [blame] | 28 | exitsnoop -p 181 # only trace PID 181 |
| 29 | exitsnoop --label=exit # label each output line with 'exit' |
Shohei YOSHIDA | a28337a | 2020-05-22 22:13:01 +0900 | [diff] [blame] | 30 | exitsnoop --per-thread # trace per thread termination |
amdn | d51f4af | 2019-05-28 16:09:01 -0500 | [diff] [blame] | 31 | """ |
| 32 | """ |
| 33 | Exit status (from <include/sysexits.h>): |
| 34 | |
| 35 | 0 EX_OK Success |
| 36 | 2 argparse error |
| 37 | 70 EX_SOFTWARE syntax error detected by compiler, or |
| 38 | verifier error from kernel |
| 39 | 77 EX_NOPERM Need sudo (CAP_SYS_ADMIN) for BPF() system call |
| 40 | |
| 41 | The template for this script was Brendan Gregg's execsnoop |
| 42 | https://github.com/iovisor/bcc/blob/master/tools/execsnoop.py |
| 43 | |
| 44 | More information about this script is in bcc/tools/exitsnoop_example.txt |
| 45 | |
| 46 | Copyright 2016 Netflix, Inc. |
| 47 | Copyright 2019 Instana, Inc. |
| 48 | Licensed under the Apache License, Version 2.0 (the "License") |
| 49 | |
| 50 | 07-Feb-2016 Brendan Gregg (Netflix) Created execsnoop |
| 51 | 04-May-2019 Arturo Martin-de-Nicolas (Instana) Created exitsnoop |
| 52 | 13-May-2019 Jeroen Soeters (Instana) Refactor to import as module |
| 53 | """ |
| 54 | |
| 55 | def _getParser(): |
| 56 | parser = argparse.ArgumentParser( |
| 57 | description="Trace all process termination (exit, fatal signal)", |
| 58 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 59 | epilog=_examples) |
| 60 | a=parser.add_argument |
| 61 | a("-t", "--timestamp", action="store_true", help="include timestamp (local time default)") |
amdn | 471f6ab | 2019-05-28 17:51:41 -0500 | [diff] [blame] | 62 | a("--utc", action="store_true", help="include timestamp in UTC (-t implied)") |
amdn | d51f4af | 2019-05-28 16:09:01 -0500 | [diff] [blame] | 63 | a("-p", "--pid", help="trace this PID only") |
| 64 | a("--label", help="label each line") |
| 65 | a("-x", "--failed", action="store_true", help="trace only fails, exclude exit(0)") |
Shohei YOSHIDA | a28337a | 2020-05-22 22:13:01 +0900 | [diff] [blame] | 66 | a("--per-thread", action="store_true", help="trace per thread termination") |
amdn | d51f4af | 2019-05-28 16:09:01 -0500 | [diff] [blame] | 67 | # print the embedded C program and exit, for debugging |
| 68 | a("--ebpf", action="store_true", help=argparse.SUPPRESS) |
| 69 | # RHEL 7.6 keeps task->start_time as struct timespec, convert to u64 nanoseconds |
| 70 | a("--timespec", action="store_true", help=argparse.SUPPRESS) |
| 71 | return parser.parse_args |
| 72 | |
| 73 | |
| 74 | class Global(): |
| 75 | parse_args = _getParser() |
| 76 | args = None |
| 77 | argv = None |
| 78 | SIGNUM_TO_SIGNAME = dict((v, re.sub("^SIG", "", k)) |
| 79 | for k,v in signal.__dict__.items() if re.match("^SIG[A-Z]+$", k)) |
| 80 | |
| 81 | |
| 82 | class Data(ct.Structure): |
| 83 | """Event data matching struct data_t in _embedded_c().""" |
| 84 | _TASK_COMM_LEN = 16 # linux/sched.h |
| 85 | _pack_ = 1 |
| 86 | _fields_ = [ |
| 87 | ("start_time", ct.c_ulonglong), # task->start_time, see --timespec arg |
| 88 | ("exit_time", ct.c_ulonglong), # bpf_ktime_get_ns() |
| 89 | ("pid", ct.c_uint), # task->tgid, thread group id == sys_getpid() |
| 90 | ("tid", ct.c_uint), # task->pid, thread id == sys_gettid() |
| 91 | ("ppid", ct.c_uint),# task->parent->tgid, notified of exit |
| 92 | ("exit_code", ct.c_int), |
| 93 | ("sig_info", ct.c_uint), |
| 94 | ("task", ct.c_char * _TASK_COMM_LEN) |
| 95 | ] |
| 96 | |
| 97 | def _embedded_c(args): |
| 98 | """Generate C program for sched_process_exit tracepoint in kernel/exit.c.""" |
| 99 | c = """ |
| 100 | EBPF_COMMENT |
| 101 | #include <linux/sched.h> |
| 102 | BPF_STATIC_ASSERT_DEF |
| 103 | |
| 104 | struct data_t { |
| 105 | u64 start_time; |
| 106 | u64 exit_time; |
| 107 | u32 pid; |
| 108 | u32 tid; |
| 109 | u32 ppid; |
| 110 | int exit_code; |
| 111 | u32 sig_info; |
| 112 | char task[TASK_COMM_LEN]; |
| 113 | } __attribute__((packed)); |
| 114 | |
| 115 | BPF_STATIC_ASSERT(sizeof(struct data_t) == CTYPES_SIZEOF_DATA); |
| 116 | BPF_PERF_OUTPUT(events); |
| 117 | |
| 118 | TRACEPOINT_PROBE(sched, sched_process_exit) |
| 119 | { |
| 120 | struct task_struct *task = (typeof(task))bpf_get_current_task(); |
| 121 | if (FILTER_PID || FILTER_EXIT_CODE) { return 0; } |
| 122 | |
| 123 | struct data_t data = { |
| 124 | .start_time = PROCESS_START_TIME_NS, |
| 125 | .exit_time = bpf_ktime_get_ns(), |
| 126 | .pid = task->tgid, |
| 127 | .tid = task->pid, |
| 128 | .ppid = task->parent->tgid, |
| 129 | .exit_code = task->exit_code >> 8, |
| 130 | .sig_info = task->exit_code & 0xFF, |
| 131 | }; |
| 132 | bpf_get_current_comm(&data.task, sizeof(data.task)); |
| 133 | |
| 134 | events.perf_submit(args, &data, sizeof(data)); |
| 135 | return 0; |
| 136 | } |
| 137 | """ |
| 138 | # TODO: this macro belongs in bcc/src/cc/export/helpers.h |
| 139 | bpf_static_assert_def = r""" |
| 140 | #ifndef BPF_STATIC_ASSERT |
| 141 | #define BPF_STATIC_ASSERT(condition) __attribute__((unused)) \ |
| 142 | extern int bpf_static_assert[(condition) ? 1 : -1] |
| 143 | #endif |
| 144 | """ |
Shohei YOSHIDA | a28337a | 2020-05-22 22:13:01 +0900 | [diff] [blame] | 145 | |
| 146 | if Global.args.pid: |
| 147 | if Global.args.per_thread: |
| 148 | filter_pid = "task->tgid != %s" % Global.args.pid |
| 149 | else: |
| 150 | filter_pid = "!(task->tgid == %s && task->pid == task->tgid)" % Global.args.pid |
| 151 | else: |
| 152 | filter_pid = '0' if Global.args.per_thread else 'task->pid != task->tgid' |
| 153 | |
amdn | d51f4af | 2019-05-28 16:09:01 -0500 | [diff] [blame] | 154 | code_substitutions = [ |
| 155 | ('EBPF_COMMENT', '' if not Global.args.ebpf else _ebpf_comment()), |
| 156 | ("BPF_STATIC_ASSERT_DEF", bpf_static_assert_def), |
| 157 | ("CTYPES_SIZEOF_DATA", str(ct.sizeof(Data))), |
Shohei YOSHIDA | a28337a | 2020-05-22 22:13:01 +0900 | [diff] [blame] | 158 | ('FILTER_PID', filter_pid), |
amdn | d51f4af | 2019-05-28 16:09:01 -0500 | [diff] [blame] | 159 | ('FILTER_EXIT_CODE', '0' if not Global.args.failed else 'task->exit_code == 0'), |
| 160 | ('PROCESS_START_TIME_NS', 'task->start_time' if not Global.args.timespec else |
| 161 | '(task->start_time.tv_sec * 1000000000L) + task->start_time.tv_nsec'), |
| 162 | ] |
| 163 | for old,new in code_substitutions: |
| 164 | c = c.replace(old, new) |
| 165 | return c |
| 166 | |
| 167 | def _ebpf_comment(): |
| 168 | """Return a C-style comment with information about the generated code.""" |
| 169 | comment=('Created by %s at %s:\n\t%s' % |
| 170 | (sys.argv[0], strftime("%Y-%m-%d %H:%M:%S %Z"), _embedded_c.__doc__)) |
| 171 | args = str(vars(Global.args)).replace('{','{\n\t').replace(', ',',\n\t').replace('}',',\n }\n\n') |
| 172 | return ("\n /*" + ("\n %s\n\n ARGV = %s\n\n ARGS = %s/" % |
| 173 | (comment, ' '.join(Global.argv), args)) |
| 174 | .replace('\n','\n\t*').replace('\t',' ')) |
| 175 | |
| 176 | def _print_header(): |
| 177 | if Global.args.timestamp: |
| 178 | title = 'TIME-' + ('UTC' if Global.args.utc else strftime("%Z")) |
| 179 | print("%-13s" % title, end="") |
| 180 | if Global.args.label is not None: |
| 181 | print("%-6s" % "LABEL", end="") |
| 182 | print("%-16s %-6s %-6s %-6s %-7s %-10s" % |
| 183 | ("PCOMM", "PID", "PPID", "TID", "AGE(s)", "EXIT_CODE")) |
| 184 | |
| 185 | def _print_event(cpu, data, size): # callback |
| 186 | """Print the exit event.""" |
| 187 | e = ct.cast(data, ct.POINTER(Data)).contents |
| 188 | if Global.args.timestamp: |
| 189 | now = datetime.utcnow() if Global.args.utc else datetime.now() |
| 190 | print("%-13s" % (now.strftime("%H:%M:%S.%f")[:-3]), end="") |
| 191 | if Global.args.label is not None: |
| 192 | label = Global.args.label if len(Global.args.label) else 'exit' |
| 193 | print("%-6s" % label, end="") |
| 194 | age = (e.exit_time - e.start_time) / 1e9 |
| 195 | print("%-16s %-6d %-6d %-6d %-7.2f " % |
| 196 | (e.task.decode(), e.pid, e.ppid, e.tid, age), end="") |
| 197 | if e.sig_info == 0: |
| 198 | print("0" if e.exit_code == 0 else "code %d" % e.exit_code) |
| 199 | else: |
| 200 | sig = e.sig_info & 0x7F |
| 201 | if sig: |
| 202 | print("signal %d (%s)" % (sig, signum_to_signame(sig)), end="") |
| 203 | if e.sig_info & 0x80: |
| 204 | print(", core dumped ", end="") |
| 205 | print() |
| 206 | |
| 207 | # ============================= |
| 208 | # Module: These functions are available for import |
| 209 | # ============================= |
| 210 | def initialize(arg_list = sys.argv[1:]): |
| 211 | """Trace all process termination. |
| 212 | |
| 213 | arg_list - list of args, if omitted then uses command line args |
| 214 | arg_list is passed to argparse.ArgumentParser.parse_args() |
| 215 | |
| 216 | For example, if arg_list = [ '-x', '-t' ] |
| 217 | args.failed == True |
| 218 | args.timestamp == True |
| 219 | |
| 220 | Returns a tuple (return_code, result) |
| 221 | 0 = Ok, result is the return value from BPF() |
| 222 | 1 = args.ebpf is requested, result is the generated C code |
| 223 | os.EX_NOPERM: need CAP_SYS_ADMIN, result is error message |
| 224 | os.EX_SOFTWARE: internal software error, result is error message |
| 225 | """ |
| 226 | Global.argv = arg_list |
| 227 | Global.args = Global.parse_args(arg_list) |
| 228 | if Global.args.utc and not Global.args.timestamp: |
| 229 | Global.args.timestamp = True |
| 230 | if not Global.args.ebpf and os.geteuid() != 0: |
| 231 | return (os.EX_NOPERM, "Need sudo (CAP_SYS_ADMIN) for BPF() system call") |
| 232 | if re.match('^3\.10\..*el7.*$', platform.release()): # Centos/Red Hat |
| 233 | Global.args.timespec = True |
| 234 | for _ in range(2): |
| 235 | c = _embedded_c(Global.args) |
| 236 | if Global.args.ebpf: |
| 237 | return (1, c) |
| 238 | try: |
| 239 | return (os.EX_OK, BPF(text=c)) |
| 240 | except Exception as e: |
| 241 | error = format(e) |
| 242 | if (not Global.args.timespec |
| 243 | and error.find('struct timespec') |
| 244 | and error.find('start_time')): |
| 245 | print('This kernel keeps task->start_time in a struct timespec.\n' + |
| 246 | 'Retrying with --timespec') |
| 247 | Global.args.timespec = True |
| 248 | continue |
| 249 | return (os.EX_SOFTWARE, "BPF error: " + error) |
| 250 | except: |
| 251 | return (os.EX_SOFTWARE, "Unexpected error: {0}".format(sys.exc_info()[0])) |
| 252 | |
| 253 | def snoop(bpf, event_handler): |
| 254 | """Call event_handler for process termination events. |
| 255 | |
| 256 | bpf - result returned by successful initialize() |
| 257 | event_handler - callback function to handle termination event |
| 258 | args.pid - Return after event_handler is called, only monitoring this pid |
| 259 | """ |
| 260 | bpf["events"].open_perf_buffer(event_handler) |
| 261 | while True: |
| 262 | bpf.perf_buffer_poll() |
| 263 | if Global.args.pid: |
| 264 | return |
| 265 | |
| 266 | def signum_to_signame(signum): |
| 267 | """Return the name of the signal corresponding to signum.""" |
| 268 | return Global.SIGNUM_TO_SIGNAME.get(signum, "unknown") |
| 269 | |
| 270 | # ============================= |
| 271 | # Script: invoked as a script |
| 272 | # ============================= |
| 273 | def main(): |
| 274 | try: |
| 275 | rc, buffer = initialize() |
| 276 | if rc: |
| 277 | print(buffer) |
| 278 | sys.exit(0 if Global.args.ebpf else rc) |
| 279 | _print_header() |
| 280 | snoop(buffer, _print_event) |
| 281 | except KeyboardInterrupt: |
| 282 | print() |
| 283 | sys.exit() |
| 284 | |
| 285 | return 0 |
| 286 | |
| 287 | if __name__ == '__main__': |
| 288 | main() |