blob: baecff79c13311e4bf5e36ac4d8472f808d5718d [file] [log] [blame]
Sasha Goldshtein85384852016-02-12 01:29:39 -08001#!/usr/bin/env python
2#
3# argdist.py Trace a function and display a distribution of its
4# parameter values as a histogram or frequency count.
5#
6# USAGE: argdist.py [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL]
7# [-n COUNT] [-C specifier [specifier ...]]
8# [-H specifier [specifier ...]]
9#
10# Licensed under the Apache License, Version 2.0 (the "License")
11# Copyright (C) 2016 Sasha Goldshtein.
12
13from bcc import BPF
14from time import sleep, strftime
15import argparse
16
17class Specifier(object):
18 text = """
19DATA_DECL
20
21int PROBENAME(struct pt_regs *ctx SIGNATURE)
22{
23 PID_FILTER
24 KEY_EXPR
25 if (!(FILTER)) return 0;
26 COLLECT
27 return 0;
28}
29"""
30 next_probe_index = 0
31
32 def __init__(self, type, specifier, pid):
33 self.raw_spec = specifier
Sasha Goldshteined21adf2016-02-12 03:04:53 -080034 spec_and_label = specifier.split(';')
35 self.label = spec_and_label[1] \
36 if len(spec_and_label) == 2 else None
37 parts = spec_and_label[0].strip().split(':')
Sasha Goldshtein85384852016-02-12 01:29:39 -080038 if len(parts) < 3 or len(parts) > 6:
39 raise ValueError("invalid specifier format")
40 self.type = type # hist or freq
41 self.is_ret_probe = parts[0] == "r"
42 if self.type != "hist" and self.type != "freq":
43 raise ValueError("unrecognized probe type")
44 if parts[0] not in ["r", "p"]:
45 raise ValueError("unrecognized probe type")
46 self.library = parts[1]
47 self.is_user = len(self.library) > 0
48 fparts = parts[2].split('(')
49 if len(fparts) != 2:
50 raise ValueError("invalid specifier format")
51 self.function = fparts[0]
52 self.signature = fparts[1][:-1]
53 self.is_default_expr = len(parts) < 5
54 if not self.is_default_expr:
55 self.expr_type = parts[3]
56 self.expr = parts[4]
57 else:
58 if not self.is_ret_probe and self.type == "hist":
59 raise ValueError("dist probes must have expr")
60 self.expr_type = \
61 "u64" if not self.is_ret_probe else "int"
62 self.expr = "1" if not self.is_ret_probe else "$retval"
63 self.expr = self.expr.replace("$retval",
64 "(%s)ctx->ax" % self.expr_type)
65 self.filter = None if len(parts) != 6 else parts[5]
66 if self.filter is not None:
67 self.filter = self.filter.replace("$retval",
68 "(%s)ctx->ax" % self.expr_type)
69 self.pid = pid
70 self.probe_func_name = "%s_probe%d" % \
71 (self.function, Specifier.next_probe_index)
72 self.probe_hash_name = "%s_hash%d" % \
73 (self.function, Specifier.next_probe_index)
74 Specifier.next_probe_index += 1
75
76 def _is_string_probe(self):
77 return self.expr_type == "char*" or self.expr_type == "char *"
78
79 def generate_text(self, string_size):
80 program = self.text.replace("PROBENAME", self.probe_func_name)
81 signature = "" if len(self.signature) == 0 \
82 else "," + self.signature
83 program = program.replace("SIGNATURE", signature)
84 if self.pid is not None and not self.is_user:
85 # kernel probes need to explicitly filter pid
86 program = program.replace("PID_FILTER",
87 "u32 pid = bpf_get_current_pid_tgid();\n" + \
88 "if (pid != %d) { return 0; }" % self.pid)
89 else:
90 program = program.replace("PID_FILTER", "")
91 if self._is_string_probe():
92 decl = """
93struct %s_key_t { char key[%d]; };
94BPF_HASH(%s, struct %s_key_t, u64);
95""" \
96 % (self.function, string_size,
97 self.probe_hash_name, self.function)
98 collect = "%s.increment(__key);" % self.probe_hash_name
99 key_expr = """
100struct %s_key_t __key = {0};
101bpf_probe_read(&__key.key, sizeof(__key.key), %s);
102""" \
103 % (self.function, self.expr)
104 elif self.type == "freq":
105 decl = "BPF_HASH(%s, %s, u64);" % \
106 (self.probe_hash_name, self.expr_type)
107 collect = "%s.increment(__key);" % self.probe_hash_name
108 key_expr = "%s __key = %s;" % \
109 (self.expr_type, self.expr)
110 elif self.type == "hist":
111 decl = "BPF_HISTOGRAM(%s, %s);" % \
112 (self.probe_hash_name, self.expr_type)
113 collect = "%s.increment(bpf_log2l(__key));" % \
114 self.probe_hash_name
115 key_expr = "%s __key = %s;" % \
116 (self.expr_type, self.expr)
117 program = program.replace("DATA_DECL", decl)
118 program = program.replace("KEY_EXPR", key_expr)
119 program = program.replace("FILTER", self.filter or "1")
120 program = program.replace("COLLECT", collect)
121 return program
122
123 def attach(self, bpf):
124 self.bpf = bpf
125 if self.is_user:
126 if self.is_ret_probe:
127 bpf.attach_uretprobe(name=self.library,
128 sym=self.function,
129 fn_name=self.probe_func_name,
130 pid=self.pid or -1)
131 else:
132 bpf.attach_uprobe(name=self.library,
133 sym=self.function,
134 fn_name=self.probe_func_name,
135 pid=self.pid or -1)
136 else:
137 if self.is_ret_probe:
138 bpf.attach_kretprobe(event=self.function,
139 fn_name=self.probe_func_name)
140 else:
141 bpf.attach_kprobe(event=self.function,
142 fn_name=self.probe_func_name)
143
144 def display(self):
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800145 print(self.label or self.raw_spec)
Sasha Goldshtein85384852016-02-12 01:29:39 -0800146 data = self.bpf.get_table(self.probe_hash_name)
147 if self.type == "freq":
148 print("\t%-10s %s" % ("COUNT", "EVENT"))
149 for key, value in sorted(data.items(),
150 key=lambda kv: kv[1].value):
151 key_val = key.key if self._is_string_probe() \
152 else str(key.value)
153 if self.is_default_expr:
154 if not self.is_ret_probe:
155 key_str = "total calls"
156 else:
157 key_str = "retval = %s" % \
158 key_val
159 else:
160 key_str = "%s = %s" % \
161 (self.expr, key_val)
162 print("\t%-10s %s" % \
163 (str(value.value), key_str))
164 elif self.type == "hist":
165 label = self.expr if not self.is_default_expr \
166 else "retval"
167 data.print_log2_hist(val_type=label)
168
169examples = """
170Probe specifier syntax:
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800171 {p,r}:[library]:function(signature)[:type:expr[:filter]][;label]
Sasha Goldshtein85384852016-02-12 01:29:39 -0800172Where:
173 p,r -- probe at function entry or at function exit
174 in exit probes, only $retval is accessible
175 library -- the library that contains the function
176 (leave empty for kernel functions)
177 function -- the function name to trace
178 signature -- the function's parameters, as in the C header
179 type -- the type of the expression to collect
180 expr -- the expression to collect
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800181 filter -- the filter that is applied to collected values
182 label -- the label for this probe in the resulting output
Sasha Goldshtein85384852016-02-12 01:29:39 -0800183
184EXAMPLES:
185
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800186argdist.py -H 'p::__kmalloc(u64 size):u64:size'
Sasha Goldshtein85384852016-02-12 01:29:39 -0800187 Print a histogram of allocation sizes passed to kmalloc
188
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800189argdist.py -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16'
Sasha Goldshtein85384852016-02-12 01:29:39 -0800190 Print a frequency count of how many times process 1005 called malloc
191 with an allocation size of 16 bytes
192
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800193argdist.py -C 'r:c:gets():char*:$retval;snooped strings'
Sasha Goldshtein85384852016-02-12 01:29:39 -0800194 Snoop on all strings returned by gets()
195
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800196argdist.py -p 1005 -C 'p:c:write(int fd):int:fd'
Sasha Goldshtein85384852016-02-12 01:29:39 -0800197 Print frequency counts of how many times writes were issued to a
198 particular file descriptor number, in process 1005
199
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800200argdist.py -p 1005 -H 'r:c:read()'
Sasha Goldshtein85384852016-02-12 01:29:39 -0800201 Print a histogram of error codes returned by read() in process 1005
202
203argdist.py -H \\
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800204 'p:c:write(int fd, const void *buf, size_t count):size_t:count:fd==1'
Sasha Goldshtein85384852016-02-12 01:29:39 -0800205 Print a histogram of buffer sizes passed to write() across all
206 processes, where the file descriptor was 1 (STDOUT)
207
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800208argdist.py -C 'p:c:fork();fork calls'
Sasha Goldshtein85384852016-02-12 01:29:39 -0800209 Count fork() calls in libc across all processes
210 Can also use funccount.py, which is easier and more flexible
211
212argdist.py \\
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800213 -H 'p:c:sleep(u32 seconds):u32:seconds' \\
214 -H 'p:c:nanosleep(struct timespec { time_t tv_sec; long tv_nsec; } *req):long:req->tv_nsec'
Sasha Goldshtein85384852016-02-12 01:29:39 -0800215 Print histograms of sleep() and nanosleep() parameter values
216
217argdist.py -p 2780 -z 120 \\
Sasha Goldshteined21adf2016-02-12 03:04:53 -0800218 -C 'p:c:write(int fd, char* buf, size_t len):char*:buf:fd==1'
Sasha Goldshtein85384852016-02-12 01:29:39 -0800219 Spy on writes to STDOUT performed by process 2780, up to a string size
220 of 120 characters
221"""
222
223parser = argparse.ArgumentParser(description=
224 "Trace a function and display a summary of its parameter values.",
225 formatter_class=argparse.RawDescriptionHelpFormatter,
226 epilog=examples)
227parser.add_argument("-p", "--pid", type=int,
228 help="id of the process to trace (optional)")
229parser.add_argument("-z", "--string-size", default=80, type=int,
230 help="maximum string size to read from char* arguments")
231parser.add_argument("-i", "--interval", default=1, type=int,
232 help="output interval, in seconds")
233parser.add_argument("-n", "--number", type=int, dest="count",
234 help="number of outputs")
235parser.add_argument("-H", "--histogram", nargs="*", dest="histspecifier",
236 help="probe specifier to capture histogram of (see examples below)")
237parser.add_argument("-C", "--count", nargs="*", dest="countspecifier",
238 help="probe specifier to capture count of (see examples below)")
239parser.add_argument("-v", "--verbose", action="store_true",
240 help="print resulting BPF program code before executing")
241args = parser.parse_args()
242
243specifiers = []
244for specifier in (args.countspecifier or []):
245 specifiers.append(Specifier("freq", specifier, args.pid))
246for histspecifier in (args.histspecifier or []):
247 specifiers.append(Specifier("hist", histspecifier, args.pid))
248if len(specifiers) == 0:
249 print("at least one specifier is required")
250 exit(1)
251
252bpf_source = "#include <uapi/linux/ptrace.h>\n"
253for specifier in specifiers:
254 bpf_source += specifier.generate_text(args.string_size)
255
256if args.verbose:
257 print(bpf_source)
258
259bpf = BPF(text=bpf_source)
260
261for specifier in specifiers:
262 specifier.attach(bpf)
263
264count_so_far = 0
265while True:
266 try:
267 sleep(args.interval)
268 except KeyboardInterrupt:
269 exit()
270 print("[%s]" % strftime("%H:%M:%S"))
271 for specifier in specifiers:
272 specifier.display()
273 count_so_far += 1
274 if args.count is not None and count_so_far >= args.count:
275 exit()