Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 1 | #!/usr/bin/env python |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 2 | # |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 3 | # stackcount Count events and their stack traces. |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 4 | # For Linux, uses BCC, eBPF. |
| 5 | # |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 6 | # USAGE: stackcount [-h] [-p PID] [-i INTERVAL] [-T] [-r] [-s] |
| 7 | # [-P] [-v] pattern |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 8 | # |
| 9 | # The pattern is a string with optional '*' wildcards, similar to file |
| 10 | # globbing. If you'd prefer to use regular expressions, use the -r option. |
| 11 | # |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 12 | # Copyright 2016 Netflix, Inc. |
| 13 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 14 | # |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 15 | # 12-Jan-2016 Brendan Gregg Created this. |
| 16 | # 09-Jul-2016 Sasha Goldshtein Generalized for uprobes and tracepoints. |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 17 | |
| 18 | from __future__ import print_function |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 19 | from bcc import BPF, USDT |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 20 | from time import sleep, strftime |
| 21 | import argparse |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 22 | import re |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 23 | import signal |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 24 | import sys |
| 25 | import traceback |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 26 | |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 27 | debug = False |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 28 | |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 29 | class Probe(object): |
| 30 | def __init__(self, pattern, use_regex=False, pid=None, per_pid=False): |
| 31 | """Init a new probe. |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 32 | |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 33 | Init the probe from the pattern provided by the user. The supported |
| 34 | patterns mimic the 'trace' and 'argdist' tools, but are simpler because |
| 35 | we don't have to distinguish between probes and retprobes. |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 36 | |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 37 | func -- probe a kernel function |
| 38 | lib:func -- probe a user-space function in the library 'lib' |
| 39 | p::func -- same thing as 'func' |
| 40 | p:lib:func -- same thing as 'lib:func' |
| 41 | t:cat:event -- probe a kernel tracepoint |
| 42 | u:lib:probe -- probe a USDT tracepoint |
| 43 | """ |
| 44 | parts = pattern.split(':') |
| 45 | if len(parts) == 1: |
| 46 | parts = ["p", "", parts[0]] |
| 47 | elif len(parts) == 2: |
| 48 | parts = ["p", parts[0], parts[1]] |
| 49 | elif len(parts) == 3: |
| 50 | if parts[0] == "t": |
| 51 | parts = ["t", "", "%s:%s" % tuple(parts[1:])] |
| 52 | if parts[0] not in ["p", "t", "u"]: |
| 53 | raise Exception("Type must be 'p', 't', or 'u', but got %s" % |
| 54 | parts[0]) |
| 55 | else: |
| 56 | raise Exception("Too many ':'-separated components in pattern %s" % |
| 57 | pattern) |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 58 | |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 59 | (self.type, self.library, self.pattern) = parts |
| 60 | if not use_regex: |
| 61 | self.pattern = self.pattern.replace('*', '.*') |
| 62 | self.pattern = '^' + self.pattern + '$' |
| 63 | |
| 64 | if (self.type == "p" and self.library) or self.type == "u": |
| 65 | libpath = BPF.find_library(self.library) |
| 66 | if libpath is None: |
| 67 | # This might be an executable (e.g. 'bash') |
| 68 | libpath = BPF.find_exe(self.library) |
| 69 | if libpath is None or len(libpath) == 0: |
| 70 | raise Exception("unable to find library %s" % self.library) |
| 71 | self.library = libpath |
| 72 | |
| 73 | self.pid = pid |
| 74 | self.per_pid = per_pid |
| 75 | self.matched = 0 |
| 76 | |
| 77 | def is_kernel_probe(self): |
| 78 | return self.type == "t" or (self.type == "p" and self.library == "") |
| 79 | |
| 80 | def attach(self): |
| 81 | if self.type == "p": |
| 82 | if self.library: |
| 83 | self.bpf.attach_uprobe(name=self.library, |
| 84 | sym_re=self.pattern, |
| 85 | fn_name="trace_count", |
| 86 | pid=self.pid or -1) |
| 87 | self.matched = self.bpf.num_open_uprobes() |
| 88 | else: |
| 89 | self.bpf.attach_kprobe(event_re=self.pattern, |
| 90 | fn_name="trace_count", |
| 91 | pid=self.pid or -1) |
| 92 | self.matched = self.bpf.num_open_kprobes() |
| 93 | elif self.type == "t": |
| 94 | self.bpf.attach_tracepoint(tp_re=self.pattern, |
| 95 | fn_name="trace_count", |
| 96 | pid=self.pid or -1) |
| 97 | self.matched = self.bpf.num_open_tracepoints() |
| 98 | elif self.type == "u": |
Sasha Goldshtein | b778ccd | 2016-10-08 06:59:37 -0700 | [diff] [blame] | 99 | pass # Nothing to do -- attach already happened in `load` |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 100 | |
| 101 | if self.matched == 0: |
Sasha Goldshtein | b778ccd | 2016-10-08 06:59:37 -0700 | [diff] [blame] | 102 | raise Exception("No functions matched by pattern %s" % |
| 103 | self.pattern) |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 104 | |
| 105 | def load(self): |
| 106 | trace_count_text = """ |
| 107 | int trace_count(void *ctx) { |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 108 | FILTER |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 109 | struct key_t key = {}; |
| 110 | key.pid = GET_PID; |
| 111 | key.stackid = stack_traces.get_stackid(ctx, STACK_FLAGS); |
Vicent Marti | b4ebed0 | 2016-03-27 18:39:18 +0200 | [diff] [blame] | 112 | u64 zero = 0; |
| 113 | u64 *val = counts.lookup_or_init(&key, &zero); |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 114 | (*val)++; |
| 115 | return 0; |
| 116 | } |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 117 | """ |
| 118 | bpf_text = """#include <uapi/linux/ptrace.h> |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 119 | |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 120 | struct key_t { |
| 121 | u32 pid; |
| 122 | int stackid; |
| 123 | }; |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 124 | |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 125 | BPF_HASH(counts, struct key_t); |
| 126 | BPF_STACK_TRACE(stack_traces, 1024); |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 127 | |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 128 | """ |
| 129 | |
| 130 | # We really mean the tgid from the kernel's perspective, which is in |
| 131 | # the top 32 bits of bpf_get_current_pid_tgid(). |
| 132 | if self.is_kernel_probe() and self.pid: |
| 133 | trace_count_text = trace_count_text.replace('FILTER', |
| 134 | ('u32 pid; pid = bpf_get_current_pid_tgid() >> 32; ' + |
| 135 | 'if (pid != %d) { return 0; }') % (self.pid)) |
| 136 | else: |
| 137 | trace_count_text = trace_count_text.replace('FILTER', '') |
| 138 | |
| 139 | # We need per-pid statistics when tracing a user-space process, because |
| 140 | # the meaning of the symbols depends on the pid. We also need them if |
| 141 | # per-pid statistics were requested with -P. |
| 142 | if self.per_pid or not self.is_kernel_probe(): |
| 143 | trace_count_text = trace_count_text.replace('GET_PID', |
| 144 | 'bpf_get_current_pid_tgid() >> 32') |
| 145 | else: |
Sasha Goldshtein | b778ccd | 2016-10-08 06:59:37 -0700 | [diff] [blame] | 146 | trace_count_text = trace_count_text.replace( |
| 147 | 'GET_PID', '0xffffffff') |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 148 | |
| 149 | stack_flags = 'BPF_F_REUSE_STACKID' |
| 150 | if not self.is_kernel_probe(): |
Sasha Goldshtein | b778ccd | 2016-10-08 06:59:37 -0700 | [diff] [blame] | 151 | stack_flags += '| BPF_F_USER_STACK' # can't do both U *and* K |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 152 | trace_count_text = trace_count_text.replace('STACK_FLAGS', stack_flags) |
| 153 | |
| 154 | self.usdt = None |
| 155 | if self.type == "u": |
| 156 | self.usdt = USDT(path=self.library, pid=self.pid) |
| 157 | for probe in self.usdt.enumerate_probes(): |
| 158 | if not self.pid and (probe.bin_path != self.library): |
| 159 | continue |
| 160 | if re.match(self.pattern, probe.name): |
| 161 | # This hack is required because the bpf_usdt_readarg |
| 162 | # functions generated need different function names for |
| 163 | # each attached probe. If we just stick to trace_count, |
| 164 | # we'd get multiple bpf_usdt_readarg helpers with the same |
| 165 | # name when enabling more than one USDT probe. |
| 166 | new_func = "trace_count_%d" % self.matched |
| 167 | bpf_text += trace_count_text.replace( |
| 168 | "trace_count", new_func) |
| 169 | self.usdt.enable_probe(probe.name, new_func) |
| 170 | self.matched += 1 |
| 171 | if debug: |
| 172 | print(self.usdt.get_text()) |
| 173 | else: |
| 174 | bpf_text += trace_count_text |
| 175 | |
| 176 | if debug: |
| 177 | print(bpf_text) |
Sasha Goldshtein | b778ccd | 2016-10-08 06:59:37 -0700 | [diff] [blame] | 178 | self.bpf = BPF(text=bpf_text, |
| 179 | usdt_contexts=[self.usdt] if self.usdt else []) |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 180 | |
| 181 | class Tool(object): |
| 182 | def __init__(self): |
| 183 | examples = """examples: |
Sasha Goldshtein | b778ccd | 2016-10-08 06:59:37 -0700 | [diff] [blame] | 184 | ./stackcount submit_bio # count kernel stack traces for submit_bio |
| 185 | ./stackcount -s ip_output # show symbol offsets |
| 186 | ./stackcount -sv ip_output # show offsets and raw addresses (verbose) |
| 187 | ./stackcount 'tcp_send*' # count stacks for funcs matching tcp_send* |
| 188 | ./stackcount -r '^tcp_send.*' # same as above, using regular expressions |
| 189 | ./stackcount -Ti 5 ip_output # output every 5 seconds, with timestamps |
| 190 | ./stackcount -p 185 ip_output # count ip_output stacks for PID 185 only |
| 191 | ./stackcount -p 185 c:malloc # count stacks for malloc in PID 185 |
| 192 | ./stackcount t:sched:sched_fork # count stacks for sched_fork tracepoint |
| 193 | ./stackcount -p 185 u:node:* # count stacks for all USDT probes in node |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 194 | """ |
| 195 | parser = argparse.ArgumentParser( |
| 196 | description="Count events and their stack traces", |
| 197 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 198 | epilog=examples) |
| 199 | parser.add_argument("-p", "--pid", type=int, |
| 200 | help="trace this PID only") |
| 201 | parser.add_argument("-i", "--interval", default=99999999, |
| 202 | help="summary interval, seconds") |
| 203 | parser.add_argument("-T", "--timestamp", action="store_true", |
| 204 | help="include timestamp on output") |
| 205 | parser.add_argument("-r", "--regexp", action="store_true", |
| 206 | help="use regular expressions. Default is \"*\" wildcards only.") |
| 207 | parser.add_argument("-s", "--offset", action="store_true", |
| 208 | help="show address offsets") |
| 209 | parser.add_argument("-P", "--perpid", action="store_true", |
| 210 | help="display stacks separately for each process") |
| 211 | parser.add_argument("-v", "--verbose", action="store_true", |
| 212 | help="show raw addresses") |
| 213 | parser.add_argument("-d", "--debug", action="store_true", |
| 214 | help="print BPF program before starting (for debugging purposes)") |
| 215 | parser.add_argument("pattern", |
| 216 | help="search expression for events") |
| 217 | self.args = parser.parse_args() |
| 218 | global debug |
| 219 | debug = self.args.debug |
| 220 | self.probe = Probe(self.args.pattern, self.args.regexp, |
| 221 | self.args.pid, self.args.perpid) |
| 222 | |
| 223 | def _print_frame(self, addr, pid): |
| 224 | print(" ", end="") |
| 225 | if self.args.verbose: |
| 226 | print("%-16x " % addr, end="") |
| 227 | if self.args.offset: |
Sasha Goldshtein | 0155385 | 2017-02-09 03:58:09 -0500 | [diff] [blame] | 228 | print("%s" % self.probe.bpf.sym(addr, pid, show_offset=True)) |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 229 | else: |
| 230 | print("%s" % self.probe.bpf.sym(addr, pid)) |
| 231 | |
| 232 | @staticmethod |
| 233 | def _signal_ignore(signal, frame): |
| 234 | print() |
| 235 | |
| 236 | def _comm_for_pid(self, pid): |
| 237 | if pid in self.comm_cache: |
| 238 | return self.comm_cache[pid] |
| 239 | |
| 240 | try: |
| 241 | comm = " %s [%d]" % ( |
| 242 | open("/proc/%d/comm" % pid).read().strip(), |
| 243 | pid) |
| 244 | self.comm_cache[pid] = comm |
| 245 | return comm |
| 246 | except: |
| 247 | return " unknown process [%d]" % pid |
| 248 | |
| 249 | def run(self): |
| 250 | self.probe.load() |
| 251 | self.probe.attach() |
| 252 | print("Tracing %d functions for \"%s\"... Hit Ctrl-C to end." % |
| 253 | (self.probe.matched, self.args.pattern)) |
| 254 | exiting = 0 if self.args.interval else 1 |
| 255 | while True: |
| 256 | try: |
| 257 | sleep(int(self.args.interval)) |
| 258 | except KeyboardInterrupt: |
| 259 | exiting = 1 |
| 260 | # as cleanup can take many seconds, trap Ctrl-C: |
| 261 | signal.signal(signal.SIGINT, Tool._signal_ignore) |
| 262 | |
| 263 | print() |
| 264 | if self.args.timestamp: |
| 265 | print("%-8s\n" % strftime("%H:%M:%S"), end="") |
| 266 | |
| 267 | counts = self.probe.bpf["counts"] |
| 268 | stack_traces = self.probe.bpf["stack_traces"] |
| 269 | self.comm_cache = {} |
| 270 | for k, v in sorted(counts.items(), |
| 271 | key=lambda counts: counts[1].value): |
| 272 | for addr in stack_traces.walk(k.stackid): |
| 273 | pid = -1 if self.probe.is_kernel_probe() else k.pid |
| 274 | self._print_frame(addr, pid) |
| 275 | if not self.args.pid and k.pid != 0xffffffff: |
| 276 | print(self._comm_for_pid(k.pid)) |
| 277 | print(" %d\n" % v.value) |
| 278 | counts.clear() |
| 279 | |
| 280 | if exiting: |
| 281 | print("Detaching...") |
| 282 | exit() |
| 283 | |
| 284 | if __name__ == "__main__": |
Brendan Gregg | 38cef48 | 2016-01-15 17:26:30 -0800 | [diff] [blame] | 285 | try: |
Sasha Goldshtein | 07175d0 | 2016-10-06 01:11:55 +0300 | [diff] [blame] | 286 | Tool().run() |
| 287 | except Exception: |
| 288 | if debug: |
| 289 | traceback.print_exc() |
| 290 | elif sys.exc_info()[0] is not SystemExit: |
| 291 | print(sys.exc_info()[1]) |