blob: a807458ecf1b833a53c88d66e0fc1e388ba6ecf1 [file] [log] [blame]
Sasha Goldshtein77893182017-03-30 05:05:10 -04001#!/usr/bin/python
2# @lint-avoid-python-3-compatibility-imports
3#
4# funcslower Trace slow kernel or user function calls.
5# For Linux, uses BCC, eBPF.
6#
7# USAGE: funcslower [-h] [-p PID] [-m MIN_MS] [-u MIN_US] [-a ARGUMENTS]
8# [-T] [-t] [-v] function [function ...]
9#
10# WARNING: This tool traces function calls by instrumenting the entry and
11# return from each function. For commonly-invoked functions like memory allocs
12# or file writes, this can be extremely expensive. Mind the overhead.
13#
Ivan Babrou525f3e92017-11-21 16:03:57 -080014# NOTE: This tool cannot trace nested functions in the same invocation
15# due to instrumentation specifics, only innermost calls will be visible.
16#
Sasha Goldshtein77893182017-03-30 05:05:10 -040017# By default, a minimum millisecond threshold of 1 is used.
18#
19# Copyright 2017, Sasha Goldshtein
20# Licensed under the Apache License, Version 2.0 (the "License")
21#
22# 30-Mar-2017 Sasha Goldshtein Created this.
23
24from __future__ import print_function
25from bcc import BPF
26import argparse
27import ctypes as ct
28import time
29
30examples = """examples:
31 ./funcslower vfs_write # trace vfs_write calls slower than 1ms
32 ./funcslower -m 10 vfs_write # same, but slower than 10ms
33 ./funcslower -u 10 c:open # trace open calls slower than 10us
34 ./funcslower -p 135 c:open # trace pid 135 only
35 ./funcslower c:malloc c:free # trace both malloc and free slower than 1ms
36 ./funcslower -a 2 c:open # show first two arguments to open
37"""
38parser = argparse.ArgumentParser(
39 description="Trace slow kernel or user function calls.",
40 formatter_class=argparse.RawDescriptionHelpFormatter,
41 epilog=examples)
42parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="tgid",
43 help="trace this PID only")
44parser.add_argument("-m", "--min-ms", type=float, dest="min_ms",
45 help="minimum duration to trace (ms)")
46parser.add_argument("-u", "--min-us", type=float, dest="min_us",
47 help="minimum duration to trace (us)")
48parser.add_argument("-a", "--arguments", type=int,
49 help="print this many entry arguments, as hex")
50parser.add_argument("-T", "--time", action="store_true",
51 help="show HH:MM:SS timestamp")
52parser.add_argument("-t", "--timestamp", action="store_true",
53 help="show timestamp in seconds at us resolution")
54parser.add_argument("-v", "--verbose", action="store_true",
55 help="print the BPF program for debugging purposes")
56parser.add_argument(metavar="function", nargs="+", dest="functions",
57 help="function(s) to trace")
58
59args = parser.parse_args()
60# fractions are allowed, but rounded to an integer nanosecond
61if args.min_ms:
62 duration_ns = int(args.min_ms * 1000000)
63elif args.min_us:
64 duration_ns = int(args.min_us * 1000)
65else:
66 duration_ns = 1000000 # default to 1ms
67
68bpf_text = """
69#include <uapi/linux/ptrace.h>
70#include <linux/sched.h> // for TASK_COMM_LEN
71
72struct entry_t {
73 u64 id;
74 u64 start_ns;
75#ifdef GRAB_ARGS
76 u64 args[6];
77#endif
78};
79
80struct data_t {
81 u64 id;
82 u64 tgid_pid;
83 u64 start_ns;
84 u64 duration_ns;
85 u64 retval;
86 char comm[TASK_COMM_LEN];
87#ifdef GRAB_ARGS
88 u64 args[6];
89#endif
90};
91
92BPF_HASH(entryinfo, u64, struct entry_t);
93BPF_PERF_OUTPUT(events);
94
95static int trace_entry(struct pt_regs *ctx, int id)
96{
97 u64 tgid_pid = bpf_get_current_pid_tgid();
98 u32 tgid = tgid_pid >> 32;
99 if (TGID_FILTER)
100 return 0;
101
102 u32 pid = tgid_pid;
103
104 struct entry_t entry = {};
105 entry.start_ns = bpf_ktime_get_ns();
106 entry.id = id;
107#ifdef GRAB_ARGS
108 entry.args[0] = PT_REGS_PARM1(ctx);
109 entry.args[1] = PT_REGS_PARM2(ctx);
110 entry.args[2] = PT_REGS_PARM3(ctx);
111 entry.args[3] = PT_REGS_PARM4(ctx);
112 entry.args[4] = PT_REGS_PARM5(ctx);
113 entry.args[5] = PT_REGS_PARM6(ctx);
114#endif
115
116 entryinfo.update(&tgid_pid, &entry);
117
118 return 0;
119}
120
121int trace_return(struct pt_regs *ctx)
122{
123 struct entry_t *entryp;
124 u64 tgid_pid = bpf_get_current_pid_tgid();
125
126 entryp = entryinfo.lookup(&tgid_pid);
127 if (entryp == 0) {
128 return 0;
129 }
130
131 u64 delta_ns = bpf_ktime_get_ns() - entryp->start_ns;
132 entryinfo.delete(&tgid_pid);
133
134 if (delta_ns < DURATION_NS)
135 return 0;
136
137 struct data_t data = {};
138 data.id = entryp->id;
139 data.tgid_pid = tgid_pid;
140 data.start_ns = entryp->start_ns;
141 data.duration_ns = delta_ns;
142 data.retval = PT_REGS_RC(ctx);
143#ifdef GRAB_ARGS
144 bpf_probe_read(&data.args[0], sizeof(data.args), entryp->args);
145#endif
146 bpf_get_current_comm(&data.comm, sizeof(data.comm));
147 events.perf_submit(ctx, &data, sizeof(data));
148
149 return 0;
150}
151"""
152
153bpf_text = bpf_text.replace('DURATION_NS', str(duration_ns))
154if args.arguments:
155 bpf_text = "#define GRAB_ARGS\n" + bpf_text
156if args.tgid:
Ivan Babrou95c20b02017-11-21 15:35:42 -0800157 bpf_text = bpf_text.replace('TGID_FILTER', 'tgid != %d' % args.tgid)
Sasha Goldshtein77893182017-03-30 05:05:10 -0400158else:
159 bpf_text = bpf_text.replace('TGID_FILTER', '0')
160
161for i in range(len(args.functions)):
162 bpf_text += """
163int trace_%d(struct pt_regs *ctx) {
164 return trace_entry(ctx, %d);
165}
166""" % (i, i)
167
168if args.verbose:
169 print(bpf_text)
170
171b = BPF(text=bpf_text)
172
173for i, function in enumerate(args.functions):
174 if ":" in function:
175 library, func = function.split(":")
176 b.attach_uprobe(name=library, sym=func, fn_name="trace_%d" % i)
177 b.attach_uretprobe(name=library, sym=func, fn_name="trace_return")
178 else:
179 b.attach_kprobe(event=function, fn_name="trace_%d" % i)
180 b.attach_kretprobe(event=function, fn_name="trace_return")
181
182TASK_COMM_LEN = 16 # linux/sched.h
183
184class Data(ct.Structure):
185 _fields_ = [
186 ("id", ct.c_ulonglong),
187 ("tgid_pid", ct.c_ulonglong),
188 ("start_ns", ct.c_ulonglong),
189 ("duration_ns", ct.c_ulonglong),
190 ("retval", ct.c_ulonglong),
191 ("comm", ct.c_char * TASK_COMM_LEN)
192 ] + ([("args", ct.c_ulonglong * 6)] if args.arguments else [])
193
194time_designator = "us" if args.min_us else "ms"
195time_value = args.min_us or args.min_ms or 1
196time_multiplier = 1000 if args.min_us else 1000000
197time_col = args.time or args.timestamp
198print("Tracing function calls slower than %g %s... Ctrl+C to quit." %
199 (time_value, time_designator))
200print((("%-10s " % "TIME" if time_col else "") + "%-14s %-6s %7s %16s %s") %
201 ("COMM", "PID", "LAT(%s)" % time_designator, "RVAL",
202 "FUNC" + (" ARGS" if args.arguments else "")))
203
204earliest_ts = 0
205
206def time_str(event):
207 if args.time:
208 return "%-10s " % time.strftime("%H:%M:%S")
209 if args.timestamp:
210 global earliest_ts
211 if earliest_ts == 0:
212 earliest_ts = event.start_ns
213 return "%-10.6f " % ((event.start_ns - earliest_ts) / 1000000000.0)
214 return ""
215
216def args_str(event):
217 if not args.arguments:
218 return ""
219 return str.join(" ", ["0x%x" % arg for arg in event.args[:args.arguments]])
220
221def print_event(cpu, data, size):
222 event = ct.cast(data, ct.POINTER(Data)).contents
223 ts = float(event.duration_ns) / time_multiplier
224 print((time_str(event) + "%-14.14s %-6s %7.2f %16x %s %s") %
225 (event.comm.decode(), event.tgid_pid >> 32,
226 ts, event.retval, args.functions[event.id], args_str(event)))
227
228b["events"].open_perf_buffer(print_event, page_cnt=64)
229while True:
230 b.kprobe_poll()