blob: 6fdde97ececb3526b374086e0f03d1a159909cbc [file] [log] [blame]
Brendan Greggaf18bb32016-02-07 15:28:50 -08001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# execsnoop Trace new processes via exec() syscalls.
5# For Linux, uses BCC, eBPF. Embedded C.
6#
Brendan Gregg151a43a2016-02-09 00:28:09 -08007# USAGE: execsnoop [-h] [-t] [-x] [-n NAME]
Brendan Greggaf18bb32016-02-07 15:28:50 -08008#
9# This currently will print up to a maximum of 19 arguments, plus the process
10# name, so 20 fields in total (MAXARG).
11#
12# This won't catch all new processes: an application may fork() but not exec().
13#
14# Copyright 2016 Netflix, Inc.
15# Licensed under the Apache License, Version 2.0 (the "License")
16#
17# 07-Feb-2016 Brendan Gregg Created this.
18
19from __future__ import print_function
20from bcc import BPF
Brenden Blanco42d60982017-04-24 14:31:28 -070021from bcc.utils import ArgString, printb
22import bcc.utils as utils
Brendan Greggaf18bb32016-02-07 15:28:50 -080023import argparse
Mark Drayton5b47e0f2016-06-02 10:53:20 +010024import ctypes as ct
Brendan Greggaf18bb32016-02-07 15:28:50 -080025import re
Mark Drayton5b47e0f2016-06-02 10:53:20 +010026import time
27from collections import defaultdict
Brendan Greggaf18bb32016-02-07 15:28:50 -080028
29# arguments
30examples = """examples:
31 ./execsnoop # trace all exec() syscalls
Brendan Gregg151a43a2016-02-09 00:28:09 -080032 ./execsnoop -x # include failed exec()s
Brendan Greggaf18bb32016-02-07 15:28:50 -080033 ./execsnoop -t # include timestamps
Bastian Reitemeier059ff552018-04-08 22:26:46 +020034 ./execsnoop -q # add "quotemarks" around arguments
Brendan Greggaf18bb32016-02-07 15:28:50 -080035 ./execsnoop -n main # only print command lines containing "main"
Mauricio Vasquez Bd1324ac2017-05-17 20:26:47 -050036 ./execsnoop -l tpkg # only print command where arguments contains "tpkg"
Brendan Greggaf18bb32016-02-07 15:28:50 -080037"""
38parser = argparse.ArgumentParser(
39 description="Trace exec() syscalls",
40 formatter_class=argparse.RawDescriptionHelpFormatter,
41 epilog=examples)
42parser.add_argument("-t", "--timestamp", action="store_true",
43 help="include timestamp on output")
Brendan Gregg151a43a2016-02-09 00:28:09 -080044parser.add_argument("-x", "--fails", action="store_true",
45 help="include failed exec()s")
Bastian Reitemeier79ce51c2018-04-08 21:42:41 +020046parser.add_argument("-q", "--quote", action="store_true",
47 help="Add quotemarks (\") around arguments."
48 )
Brendan Greggaf18bb32016-02-07 15:28:50 -080049parser.add_argument("-n", "--name",
Brenden Blanco42d60982017-04-24 14:31:28 -070050 type=ArgString,
Brendan Greggaf18bb32016-02-07 15:28:50 -080051 help="only print commands matching this name (regex), any arg")
Nikita V. Shirokov0a015062017-04-19 13:07:08 -070052parser.add_argument("-l", "--line",
Brenden Blanco42d60982017-04-24 14:31:28 -070053 type=ArgString,
Nikita V. Shirokov0a015062017-04-19 13:07:08 -070054 help="only print commands where arg contains this line (regex)")
Paul Chaignona0c9b482017-09-29 13:42:18 +020055parser.add_argument("--max-args", default="20",
56 help="maximum number of arguments parsed and displayed, defaults to 20")
Nathan Scottcf0792f2018-02-02 16:56:50 +110057parser.add_argument("--ebpf", action="store_true",
58 help=argparse.SUPPRESS)
Brendan Greggaf18bb32016-02-07 15:28:50 -080059args = parser.parse_args()
60
61# define BPF program
62bpf_text = """
63#include <uapi/linux/ptrace.h>
64#include <linux/sched.h>
65#include <linux/fs.h>
66
Mark Drayton5b47e0f2016-06-02 10:53:20 +010067#define ARGSIZE 128
Brendan Greggaf18bb32016-02-07 15:28:50 -080068
Mark Drayton5b47e0f2016-06-02 10:53:20 +010069enum event_type {
70 EVENT_ARG,
71 EVENT_RET,
72};
Brendan Greggaf18bb32016-02-07 15:28:50 -080073
Mark Drayton5b47e0f2016-06-02 10:53:20 +010074struct data_t {
75 u32 pid; // PID as in the userspace term (i.e. task->tgid in kernel)
David Calavera020bcd42018-07-11 15:29:12 -070076 u32 ppid; // Parent PID as in the userspace term (i.e task->real_parent->tgid in kernel)
Mark Drayton5b47e0f2016-06-02 10:53:20 +010077 char comm[TASK_COMM_LEN];
78 enum event_type type;
79 char argv[ARGSIZE];
80 int retval;
81};
Brendan Greggaf18bb32016-02-07 15:28:50 -080082
Mark Drayton5b47e0f2016-06-02 10:53:20 +010083BPF_PERF_OUTPUT(events);
Brendan Greggaf18bb32016-02-07 15:28:50 -080084
Mark Drayton5b47e0f2016-06-02 10:53:20 +010085static int __submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
86{
87 bpf_probe_read(data->argv, sizeof(data->argv), ptr);
88 events.perf_submit(ctx, data, sizeof(struct data_t));
Brendan Greggaf18bb32016-02-07 15:28:50 -080089 return 1;
90}
91
Mark Drayton5b47e0f2016-06-02 10:53:20 +010092static int submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data)
93{
94 const char *argp = NULL;
95 bpf_probe_read(&argp, sizeof(argp), ptr);
96 if (argp) {
97 return __submit_arg(ctx, (void *)(argp), data);
98 }
99 return 0;
100}
101
yonghong-song2da34262018-06-13 06:12:22 -0700102int syscall__execve(struct pt_regs *ctx,
htbegin3c399da2017-12-09 09:34:56 +0800103 const char __user *filename,
Brendan Greggaf18bb32016-02-07 15:28:50 -0800104 const char __user *const __user *__argv,
105 const char __user *const __user *__envp)
106{
Sasha Goldshteinf41ae862016-10-19 01:14:30 +0300107 // create data here and pass to submit_arg to save stack space (#555)
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100108 struct data_t data = {};
David Calavera020bcd42018-07-11 15:29:12 -0700109 struct task_struct *task;
110
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100111 data.pid = bpf_get_current_pid_tgid() >> 32;
David Calavera020bcd42018-07-11 15:29:12 -0700112
113 task = (struct task_struct *)bpf_get_current_task();
David Calavera4fae4972018-07-13 16:18:50 -0700114 // Some kernels, like Ubuntu 4.13.0-generic, return 0
115 // as the real_parent->tgid.
116 // We use the get_ppid function as a fallback in those cases. (#1883)
David Calavera020bcd42018-07-11 15:29:12 -0700117 data.ppid = task->real_parent->tgid;
118
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100119 bpf_get_current_comm(&data.comm, sizeof(data.comm));
120 data.type = EVENT_ARG;
Brendan Greggaf18bb32016-02-07 15:28:50 -0800121
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100122 __submit_arg(ctx, (void *)filename, &data);
123
Paul Chaignona0c9b482017-09-29 13:42:18 +0200124 // skip first arg, as we submitted filename
125 #pragma unroll
126 for (int i = 1; i < MAXARG; i++) {
127 if (submit_arg(ctx, (void *)&__argv[i], &data) == 0)
128 goto out;
129 }
Brendan Greggaf18bb32016-02-07 15:28:50 -0800130
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100131 // handle truncated argument list
132 char ellipsis[] = "...";
133 __submit_arg(ctx, (void *)ellipsis, &data);
Brendan Greggaf18bb32016-02-07 15:28:50 -0800134out:
135 return 0;
136}
137
Yonghong Song64335692018-04-25 00:40:13 -0700138int do_ret_sys_execve(struct pt_regs *ctx)
Brendan Greggaf18bb32016-02-07 15:28:50 -0800139{
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100140 struct data_t data = {};
David Calavera020bcd42018-07-11 15:29:12 -0700141 struct task_struct *task;
142
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100143 data.pid = bpf_get_current_pid_tgid() >> 32;
David Calavera020bcd42018-07-11 15:29:12 -0700144
145 task = (struct task_struct *)bpf_get_current_task();
David Calavera4fae4972018-07-13 16:18:50 -0700146 // Some kernels, like Ubuntu 4.13.0-generic, return 0
147 // as the real_parent->tgid.
148 // We use the get_ppid function as a fallback in those cases. (#1883)
David Calavera020bcd42018-07-11 15:29:12 -0700149 data.ppid = task->real_parent->tgid;
150
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100151 bpf_get_current_comm(&data.comm, sizeof(data.comm));
152 data.type = EVENT_RET;
153 data.retval = PT_REGS_RC(ctx);
154 events.perf_submit(ctx, &data, sizeof(data));
155
Brendan Greggaf18bb32016-02-07 15:28:50 -0800156 return 0;
157}
158"""
159
Nathan Scottf13107a2018-02-05 08:28:20 +1100160bpf_text = bpf_text.replace("MAXARG", args.max_args)
Nathan Scottcf0792f2018-02-02 16:56:50 +1100161if args.ebpf:
162 print(bpf_text)
163 exit()
164
Brendan Greggaf18bb32016-02-07 15:28:50 -0800165# initialize BPF
Nathan Scottcf0792f2018-02-02 16:56:50 +1100166b = BPF(text=bpf_text)
Yonghong Song64335692018-04-25 00:40:13 -0700167execve_fnname = b.get_syscall_fnname("execve")
yonghong-song2da34262018-06-13 06:12:22 -0700168b.attach_kprobe(event=execve_fnname, fn_name="syscall__execve")
Yonghong Song64335692018-04-25 00:40:13 -0700169b.attach_kretprobe(event=execve_fnname, fn_name="do_ret_sys_execve")
Brendan Greggaf18bb32016-02-07 15:28:50 -0800170
171# header
172if args.timestamp:
173 print("%-8s" % ("TIME(s)"), end="")
Mark Draytonbfdb3d42016-06-02 10:53:34 +0100174print("%-16s %-6s %-6s %3s %s" % ("PCOMM", "PID", "PPID", "RET", "ARGS"))
Brendan Greggaf18bb32016-02-07 15:28:50 -0800175
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100176TASK_COMM_LEN = 16 # linux/sched.h
177ARGSIZE = 128 # should match #define in C above
Brendan Greggaf18bb32016-02-07 15:28:50 -0800178
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100179class Data(ct.Structure):
180 _fields_ = [
181 ("pid", ct.c_uint),
David Calavera020bcd42018-07-11 15:29:12 -0700182 ("ppid", ct.c_uint),
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100183 ("comm", ct.c_char * TASK_COMM_LEN),
184 ("type", ct.c_int),
185 ("argv", ct.c_char * ARGSIZE),
186 ("retval", ct.c_int),
187 ]
188
189class EventType(object):
190 EVENT_ARG = 0
191 EVENT_RET = 1
192
193start_ts = time.time()
194argv = defaultdict(list)
195
David Calavera4fae4972018-07-13 16:18:50 -0700196# This is best-effort PPID matching. Short-lived processes may exit
David Calaveradee18fe2018-07-12 08:48:15 -0700197# before we get a chance to read the PPID.
198# This is a fallback for when fetching the PPID from task->real_parent->tgip
199# returns 0, which happens in some kernel versions.
Mark Draytonbfdb3d42016-06-02 10:53:34 +0100200def get_ppid(pid):
201 try:
202 with open("/proc/%d/status" % pid) as status:
203 for line in status:
204 if line.startswith("PPid:"):
205 return int(line.split()[1])
206 except IOError:
207 pass
208 return 0
209
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100210# process event
211def print_event(cpu, data, size):
212 event = ct.cast(data, ct.POINTER(Data)).contents
213
214 skip = False
215
216 if event.type == EventType.EVENT_ARG:
217 argv[event.pid].append(event.argv)
218 elif event.type == EventType.EVENT_RET:
Kirill Smelkovce36bb62017-09-24 21:58:19 +0300219 if event.retval != 0 and not args.fails:
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100220 skip = True
Brenden Blanco42d60982017-04-24 14:31:28 -0700221 if args.name and not re.search(bytes(args.name), event.comm):
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100222 skip = True
Bastian Reitemeier9345df12018-04-08 21:46:59 +0200223 if args.line and not re.search(bytes(args.line),
224 b' '.join(argv[event.pid])):
225 skip = True
Bastian Reitemeier79ce51c2018-04-08 21:42:41 +0200226 if args.quote:
227 argv[event.pid] = [
228 "\"" + arg.replace("\"", "\\\"") + "\""
229 for arg in argv[event.pid]
230 ]
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100231
232 if not skip:
233 if args.timestamp:
234 print("%-8.3f" % (time.time() - start_ts), end="")
David Calavera020bcd42018-07-11 15:29:12 -0700235 ppid = event.ppid if event.ppid > 0 else get_ppid(event.pid)
Brenden Blanco42d60982017-04-24 14:31:28 -0700236 ppid = b"%d" % ppid if ppid > 0 else b"?"
Javier Honduvilla Coto5340c212018-05-21 12:11:32 +0200237 argv_text = b' '.join(argv[event.pid]).replace(b'\n', b'\\n')
Brenden Blanco42d60982017-04-24 14:31:28 -0700238 printb(b"%-16s %-6d %-6s %3d %s" % (event.comm, event.pid,
Javier Honduvilla Coto5340c212018-05-21 12:11:32 +0200239 ppid, event.retval, argv_text))
Nikita V. Shirokov0a015062017-04-19 13:07:08 -0700240 try:
241 del(argv[event.pid])
242 except Exception:
243 pass
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100244
Mark Drayton5b47e0f2016-06-02 10:53:20 +0100245
246# loop with callback to print_event
247b["events"].open_perf_buffer(print_event)
Brendan Greggaf18bb32016-02-07 15:28:50 -0800248while 1:
Teng Qindbf00292018-02-28 21:47:50 -0800249 b.perf_buffer_poll()