Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # |
| 3 | # argdist.py Trace a function and display a distribution of its |
| 4 | # parameter values as a histogram or frequency count. |
| 5 | # |
| 6 | # USAGE: argdist.py [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL] |
| 7 | # [-n COUNT] [-C specifier [specifier ...]] |
| 8 | # [-H specifier [specifier ...]] |
| 9 | # |
| 10 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 11 | # Copyright (C) 2016 Sasha Goldshtein. |
| 12 | |
| 13 | from bcc import BPF |
| 14 | from time import sleep, strftime |
| 15 | import argparse |
| 16 | |
| 17 | class Specifier(object): |
| 18 | text = """ |
| 19 | DATA_DECL |
| 20 | |
| 21 | int PROBENAME(struct pt_regs *ctx SIGNATURE) |
| 22 | { |
| 23 | PID_FILTER |
| 24 | KEY_EXPR |
| 25 | if (!(FILTER)) return 0; |
| 26 | COLLECT |
| 27 | return 0; |
| 28 | } |
| 29 | """ |
| 30 | next_probe_index = 0 |
| 31 | |
| 32 | def __init__(self, type, specifier, pid): |
| 33 | self.raw_spec = specifier |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 34 | spec_and_label = specifier.split(';') |
| 35 | self.label = spec_and_label[1] \ |
| 36 | if len(spec_and_label) == 2 else None |
| 37 | parts = spec_and_label[0].strip().split(':') |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 38 | if len(parts) < 3 or len(parts) > 6: |
| 39 | raise ValueError("invalid specifier format") |
| 40 | self.type = type # hist or freq |
| 41 | self.is_ret_probe = parts[0] == "r" |
| 42 | if self.type != "hist" and self.type != "freq": |
| 43 | raise ValueError("unrecognized probe type") |
| 44 | if parts[0] not in ["r", "p"]: |
| 45 | raise ValueError("unrecognized probe type") |
| 46 | self.library = parts[1] |
| 47 | self.is_user = len(self.library) > 0 |
| 48 | fparts = parts[2].split('(') |
| 49 | if len(fparts) != 2: |
| 50 | raise ValueError("invalid specifier format") |
| 51 | self.function = fparts[0] |
| 52 | self.signature = fparts[1][:-1] |
| 53 | self.is_default_expr = len(parts) < 5 |
| 54 | if not self.is_default_expr: |
| 55 | self.expr_type = parts[3] |
| 56 | self.expr = parts[4] |
| 57 | else: |
| 58 | if not self.is_ret_probe and self.type == "hist": |
| 59 | raise ValueError("dist probes must have expr") |
| 60 | self.expr_type = \ |
| 61 | "u64" if not self.is_ret_probe else "int" |
| 62 | self.expr = "1" if not self.is_ret_probe else "$retval" |
| 63 | self.expr = self.expr.replace("$retval", |
| 64 | "(%s)ctx->ax" % self.expr_type) |
| 65 | self.filter = None if len(parts) != 6 else parts[5] |
| 66 | if self.filter is not None: |
| 67 | self.filter = self.filter.replace("$retval", |
| 68 | "(%s)ctx->ax" % self.expr_type) |
| 69 | self.pid = pid |
| 70 | self.probe_func_name = "%s_probe%d" % \ |
| 71 | (self.function, Specifier.next_probe_index) |
| 72 | self.probe_hash_name = "%s_hash%d" % \ |
| 73 | (self.function, Specifier.next_probe_index) |
| 74 | Specifier.next_probe_index += 1 |
| 75 | |
| 76 | def _is_string_probe(self): |
| 77 | return self.expr_type == "char*" or self.expr_type == "char *" |
| 78 | |
| 79 | def generate_text(self, string_size): |
| 80 | program = self.text.replace("PROBENAME", self.probe_func_name) |
| 81 | signature = "" if len(self.signature) == 0 \ |
| 82 | else "," + self.signature |
| 83 | program = program.replace("SIGNATURE", signature) |
| 84 | if self.pid is not None and not self.is_user: |
| 85 | # kernel probes need to explicitly filter pid |
| 86 | program = program.replace("PID_FILTER", |
| 87 | "u32 pid = bpf_get_current_pid_tgid();\n" + \ |
| 88 | "if (pid != %d) { return 0; }" % self.pid) |
| 89 | else: |
| 90 | program = program.replace("PID_FILTER", "") |
| 91 | if self._is_string_probe(): |
| 92 | decl = """ |
| 93 | struct %s_key_t { char key[%d]; }; |
| 94 | BPF_HASH(%s, struct %s_key_t, u64); |
| 95 | """ \ |
| 96 | % (self.function, string_size, |
| 97 | self.probe_hash_name, self.function) |
| 98 | collect = "%s.increment(__key);" % self.probe_hash_name |
| 99 | key_expr = """ |
| 100 | struct %s_key_t __key = {0}; |
| 101 | bpf_probe_read(&__key.key, sizeof(__key.key), %s); |
| 102 | """ \ |
| 103 | % (self.function, self.expr) |
| 104 | elif self.type == "freq": |
| 105 | decl = "BPF_HASH(%s, %s, u64);" % \ |
| 106 | (self.probe_hash_name, self.expr_type) |
| 107 | collect = "%s.increment(__key);" % self.probe_hash_name |
| 108 | key_expr = "%s __key = %s;" % \ |
| 109 | (self.expr_type, self.expr) |
| 110 | elif self.type == "hist": |
| 111 | decl = "BPF_HISTOGRAM(%s, %s);" % \ |
| 112 | (self.probe_hash_name, self.expr_type) |
| 113 | collect = "%s.increment(bpf_log2l(__key));" % \ |
| 114 | self.probe_hash_name |
| 115 | key_expr = "%s __key = %s;" % \ |
| 116 | (self.expr_type, self.expr) |
| 117 | program = program.replace("DATA_DECL", decl) |
| 118 | program = program.replace("KEY_EXPR", key_expr) |
| 119 | program = program.replace("FILTER", self.filter or "1") |
| 120 | program = program.replace("COLLECT", collect) |
| 121 | return program |
| 122 | |
| 123 | def attach(self, bpf): |
| 124 | self.bpf = bpf |
| 125 | if self.is_user: |
| 126 | if self.is_ret_probe: |
| 127 | bpf.attach_uretprobe(name=self.library, |
| 128 | sym=self.function, |
| 129 | fn_name=self.probe_func_name, |
| 130 | pid=self.pid or -1) |
| 131 | else: |
| 132 | bpf.attach_uprobe(name=self.library, |
| 133 | sym=self.function, |
| 134 | fn_name=self.probe_func_name, |
| 135 | pid=self.pid or -1) |
| 136 | else: |
| 137 | if self.is_ret_probe: |
| 138 | bpf.attach_kretprobe(event=self.function, |
| 139 | fn_name=self.probe_func_name) |
| 140 | else: |
| 141 | bpf.attach_kprobe(event=self.function, |
| 142 | fn_name=self.probe_func_name) |
| 143 | |
| 144 | def display(self): |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 145 | print(self.label or self.raw_spec) |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 146 | data = self.bpf.get_table(self.probe_hash_name) |
| 147 | if self.type == "freq": |
| 148 | print("\t%-10s %s" % ("COUNT", "EVENT")) |
| 149 | for key, value in sorted(data.items(), |
| 150 | key=lambda kv: kv[1].value): |
| 151 | key_val = key.key if self._is_string_probe() \ |
| 152 | else str(key.value) |
| 153 | if self.is_default_expr: |
| 154 | if not self.is_ret_probe: |
| 155 | key_str = "total calls" |
| 156 | else: |
| 157 | key_str = "retval = %s" % \ |
| 158 | key_val |
| 159 | else: |
| 160 | key_str = "%s = %s" % \ |
| 161 | (self.expr, key_val) |
| 162 | print("\t%-10s %s" % \ |
| 163 | (str(value.value), key_str)) |
| 164 | elif self.type == "hist": |
| 165 | label = self.expr if not self.is_default_expr \ |
| 166 | else "retval" |
| 167 | data.print_log2_hist(val_type=label) |
| 168 | |
| 169 | examples = """ |
| 170 | Probe specifier syntax: |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 171 | {p,r}:[library]:function(signature)[:type:expr[:filter]][;label] |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 172 | Where: |
| 173 | p,r -- probe at function entry or at function exit |
| 174 | in exit probes, only $retval is accessible |
| 175 | library -- the library that contains the function |
| 176 | (leave empty for kernel functions) |
| 177 | function -- the function name to trace |
| 178 | signature -- the function's parameters, as in the C header |
| 179 | type -- the type of the expression to collect |
| 180 | expr -- the expression to collect |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 181 | filter -- the filter that is applied to collected values |
| 182 | label -- the label for this probe in the resulting output |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 183 | |
| 184 | EXAMPLES: |
| 185 | |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 186 | argdist.py -H 'p::__kmalloc(u64 size):u64:size' |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 187 | Print a histogram of allocation sizes passed to kmalloc |
| 188 | |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 189 | argdist.py -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16' |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 190 | Print a frequency count of how many times process 1005 called malloc |
| 191 | with an allocation size of 16 bytes |
| 192 | |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 193 | argdist.py -C 'r:c:gets():char*:$retval;snooped strings' |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 194 | Snoop on all strings returned by gets() |
| 195 | |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 196 | argdist.py -p 1005 -C 'p:c:write(int fd):int:fd' |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 197 | Print frequency counts of how many times writes were issued to a |
| 198 | particular file descriptor number, in process 1005 |
| 199 | |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 200 | argdist.py -p 1005 -H 'r:c:read()' |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 201 | Print a histogram of error codes returned by read() in process 1005 |
| 202 | |
| 203 | argdist.py -H \\ |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 204 | 'p:c:write(int fd, const void *buf, size_t count):size_t:count:fd==1' |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 205 | Print a histogram of buffer sizes passed to write() across all |
| 206 | processes, where the file descriptor was 1 (STDOUT) |
| 207 | |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 208 | argdist.py -C 'p:c:fork();fork calls' |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 209 | Count fork() calls in libc across all processes |
| 210 | Can also use funccount.py, which is easier and more flexible |
| 211 | |
| 212 | argdist.py \\ |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 213 | -H 'p:c:sleep(u32 seconds):u32:seconds' \\ |
| 214 | -H 'p:c:nanosleep(struct timespec { time_t tv_sec; long tv_nsec; } *req):long:req->tv_nsec' |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 215 | Print histograms of sleep() and nanosleep() parameter values |
| 216 | |
| 217 | argdist.py -p 2780 -z 120 \\ |
Sasha Goldshtein | ed21adf | 2016-02-12 03:04:53 -0800 | [diff] [blame^] | 218 | -C 'p:c:write(int fd, char* buf, size_t len):char*:buf:fd==1' |
Sasha Goldshtein | 8538485 | 2016-02-12 01:29:39 -0800 | [diff] [blame] | 219 | Spy on writes to STDOUT performed by process 2780, up to a string size |
| 220 | of 120 characters |
| 221 | """ |
| 222 | |
| 223 | parser = argparse.ArgumentParser(description= |
| 224 | "Trace a function and display a summary of its parameter values.", |
| 225 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 226 | epilog=examples) |
| 227 | parser.add_argument("-p", "--pid", type=int, |
| 228 | help="id of the process to trace (optional)") |
| 229 | parser.add_argument("-z", "--string-size", default=80, type=int, |
| 230 | help="maximum string size to read from char* arguments") |
| 231 | parser.add_argument("-i", "--interval", default=1, type=int, |
| 232 | help="output interval, in seconds") |
| 233 | parser.add_argument("-n", "--number", type=int, dest="count", |
| 234 | help="number of outputs") |
| 235 | parser.add_argument("-H", "--histogram", nargs="*", dest="histspecifier", |
| 236 | help="probe specifier to capture histogram of (see examples below)") |
| 237 | parser.add_argument("-C", "--count", nargs="*", dest="countspecifier", |
| 238 | help="probe specifier to capture count of (see examples below)") |
| 239 | parser.add_argument("-v", "--verbose", action="store_true", |
| 240 | help="print resulting BPF program code before executing") |
| 241 | args = parser.parse_args() |
| 242 | |
| 243 | specifiers = [] |
| 244 | for specifier in (args.countspecifier or []): |
| 245 | specifiers.append(Specifier("freq", specifier, args.pid)) |
| 246 | for histspecifier in (args.histspecifier or []): |
| 247 | specifiers.append(Specifier("hist", histspecifier, args.pid)) |
| 248 | if len(specifiers) == 0: |
| 249 | print("at least one specifier is required") |
| 250 | exit(1) |
| 251 | |
| 252 | bpf_source = "#include <uapi/linux/ptrace.h>\n" |
| 253 | for specifier in specifiers: |
| 254 | bpf_source += specifier.generate_text(args.string_size) |
| 255 | |
| 256 | if args.verbose: |
| 257 | print(bpf_source) |
| 258 | |
| 259 | bpf = BPF(text=bpf_source) |
| 260 | |
| 261 | for specifier in specifiers: |
| 262 | specifier.attach(bpf) |
| 263 | |
| 264 | count_so_far = 0 |
| 265 | while True: |
| 266 | try: |
| 267 | sleep(args.interval) |
| 268 | except KeyboardInterrupt: |
| 269 | exit() |
| 270 | print("[%s]" % strftime("%H:%M:%S")) |
| 271 | for specifier in specifiers: |
| 272 | specifier.display() |
| 273 | count_so_far += 1 |
| 274 | if args.count is not None and count_so_far >= args.count: |
| 275 | exit() |