Alexey Ivanov | cc01a9c | 2019-01-16 09:50:46 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 2 | # @lint-avoid-python-3-compatibility-imports |
| 3 | # |
| 4 | # ustat Activity stats from high-level languages, including exceptions, |
| 5 | # method calls, class loads, garbage collections, and more. |
| 6 | # For Linux, uses BCC, eBPF. |
| 7 | # |
Marko Myllynen | 9f3662e | 2018-10-10 21:48:53 +0300 | [diff] [blame] | 8 | # USAGE: ustat [-l {java,node,perl,php,python,ruby,tcl}] [-C] |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 9 | # [-S {cload,excp,gc,method,objnew,thread}] [-r MAXROWS] [-d] |
| 10 | # [interval [count]] |
| 11 | # |
| 12 | # This uses in-kernel eBPF maps to store per process summaries for efficiency. |
| 13 | # Newly-created processes might only be traced at the next interval, if the |
| 14 | # relevant USDT probe requires enabling through a semaphore. |
| 15 | # |
| 16 | # Copyright 2016 Sasha Goldshtein |
| 17 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 18 | # |
| 19 | # 26-Oct-2016 Sasha Goldshtein Created this. |
| 20 | |
| 21 | from __future__ import print_function |
| 22 | import argparse |
Alexey Ivanov | 82f2b9a | 2019-01-02 18:18:05 -0800 | [diff] [blame] | 23 | from bcc import BPF, USDT, USDTException |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 24 | import os |
Alexey Ivanov | 82f2b9a | 2019-01-02 18:18:05 -0800 | [diff] [blame] | 25 | import sys |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 26 | from subprocess import call |
| 27 | from time import sleep, strftime |
| 28 | |
| 29 | class Category(object): |
| 30 | THREAD = "THREAD" |
| 31 | METHOD = "METHOD" |
| 32 | OBJNEW = "OBJNEW" |
| 33 | CLOAD = "CLOAD" |
| 34 | EXCP = "EXCP" |
| 35 | GC = "GC" |
| 36 | |
| 37 | class Probe(object): |
| 38 | def __init__(self, language, procnames, events): |
| 39 | """ |
| 40 | Initialize a new probe object with a specific language, set of process |
| 41 | names to monitor for that language, and a dictionary of events and |
| 42 | categories. The dictionary is a mapping of USDT probe names (such as |
| 43 | 'gc__start') to event categories supported by this tool -- from the |
| 44 | Category class. |
| 45 | """ |
| 46 | self.language = language |
| 47 | self.procnames = procnames |
| 48 | self.events = events |
| 49 | |
| 50 | def _find_targets(self): |
| 51 | """Find pids where the comm is one of the specified list""" |
| 52 | self.targets = {} |
| 53 | all_pids = [int(pid) for pid in os.listdir('/proc') if pid.isdigit()] |
| 54 | for pid in all_pids: |
| 55 | try: |
| 56 | comm = open('/proc/%d/comm' % pid).read().strip() |
| 57 | if comm in self.procnames: |
| 58 | cmdline = open('/proc/%d/cmdline' % pid).read() |
Sasha Goldshtein | d8c7f47 | 2016-10-27 15:17:58 -0700 | [diff] [blame] | 59 | self.targets[pid] = cmdline.replace('\0', ' ') |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 60 | except IOError: |
| 61 | continue # process may already have terminated |
| 62 | |
| 63 | def _enable_probes(self): |
| 64 | self.usdts = [] |
| 65 | for pid in self.targets: |
Alexey Ivanov | 82f2b9a | 2019-01-02 18:18:05 -0800 | [diff] [blame] | 66 | try: |
| 67 | usdt = USDT(pid=pid) |
| 68 | except USDTException: |
| 69 | # avoid race condition on pid going away. |
| 70 | print("failed to instrument %d" % pid, file=sys.stderr) |
| 71 | continue |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 72 | for event in self.events: |
Sasha Goldshtein | fb3c471 | 2016-10-27 15:58:14 -0700 | [diff] [blame] | 73 | try: |
| 74 | usdt.enable_probe(event, "%s_%s" % (self.language, event)) |
| 75 | except Exception: |
| 76 | # This process might not have a recent version of the USDT |
| 77 | # probes enabled, or might have been compiled without USDT |
| 78 | # probes at all. The process could even have been shut down |
| 79 | # and the pid been recycled. We have to gracefully handle |
| 80 | # the possibility that we can't attach probes to it at all. |
| 81 | pass |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 82 | self.usdts.append(usdt) |
| 83 | |
| 84 | def _generate_tables(self): |
| 85 | text = """ |
| 86 | BPF_HASH(%s_%s_counts, u32, u64); // pid to event count |
| 87 | """ |
| 88 | return str.join('', [text % (self.language, event) |
| 89 | for event in self.events]) |
| 90 | |
| 91 | def _generate_functions(self): |
| 92 | text = """ |
| 93 | int %s_%s(void *ctx) { |
| 94 | u64 *valp, zero = 0; |
| 95 | u32 tgid = bpf_get_current_pid_tgid() >> 32; |
yonghong-song | 82f4302 | 2019-10-31 08:16:12 -0700 | [diff] [blame] | 96 | valp = %s_%s_counts.lookup_or_try_init(&tgid, &zero); |
Philip Gladstone | ba64f03 | 2019-09-20 01:12:01 -0400 | [diff] [blame] | 97 | if (valp) { |
| 98 | ++(*valp); |
| 99 | } |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 100 | return 0; |
| 101 | } |
| 102 | """ |
| 103 | lang = self.language |
| 104 | return str.join('', [text % (lang, event, lang, event) |
| 105 | for event in self.events]) |
| 106 | |
| 107 | def get_program(self): |
| 108 | self._find_targets() |
| 109 | self._enable_probes() |
| 110 | return self._generate_tables() + self._generate_functions() |
| 111 | |
| 112 | def get_usdts(self): |
| 113 | return self.usdts |
| 114 | |
| 115 | def get_counts(self, bpf): |
| 116 | """Return a map of event counts per process""" |
| 117 | event_dict = dict([(category, 0) for category in self.events.values()]) |
| 118 | result = dict([(pid, event_dict.copy()) for pid in self.targets]) |
| 119 | for event, category in self.events.items(): |
| 120 | counts = bpf["%s_%s_counts" % (self.language, event)] |
| 121 | for pid, count in counts.items(): |
Alexey Ivanov | 82f2b9a | 2019-01-02 18:18:05 -0800 | [diff] [blame] | 122 | if pid.value not in result: |
| 123 | print("result was not found for %d" % pid.value, file=sys.stderr) |
| 124 | continue |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 125 | result[pid.value][category] = count.value |
| 126 | counts.clear() |
| 127 | return result |
| 128 | |
| 129 | def cleanup(self): |
| 130 | self.usdts = None |
| 131 | |
| 132 | class Tool(object): |
| 133 | def _parse_args(self): |
| 134 | examples = """examples: |
| 135 | ./ustat # stats for all languages, 1 second refresh |
| 136 | ./ustat -C # don't clear the screen |
| 137 | ./ustat -l java # Java processes only |
| 138 | ./ustat 5 # 5 second summaries |
| 139 | ./ustat 5 10 # 5 second summaries, 10 times only |
| 140 | """ |
| 141 | parser = argparse.ArgumentParser( |
| 142 | description="Activity stats from high-level languages.", |
| 143 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 144 | epilog=examples) |
| 145 | parser.add_argument("-l", "--language", |
Marko Myllynen | 9f3662e | 2018-10-10 21:48:53 +0300 | [diff] [blame] | 146 | choices=["java", "node", "perl", "php", "python", "ruby", "tcl"], |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 147 | help="language to trace (default: all languages)") |
| 148 | parser.add_argument("-C", "--noclear", action="store_true", |
| 149 | help="don't clear the screen") |
| 150 | parser.add_argument("-S", "--sort", |
| 151 | choices=[cat.lower() for cat in dir(Category) if cat.isupper()], |
| 152 | help="sort by this field (descending order)") |
| 153 | parser.add_argument("-r", "--maxrows", default=20, type=int, |
| 154 | help="maximum rows to print, default 20") |
| 155 | parser.add_argument("-d", "--debug", action="store_true", |
| 156 | help="Print the resulting BPF program (for debugging purposes)") |
| 157 | parser.add_argument("interval", nargs="?", default=1, type=int, |
| 158 | help="output interval, in seconds") |
Sasha Goldshtein | 087dd73 | 2016-10-26 06:50:31 -0700 | [diff] [blame] | 159 | parser.add_argument("count", nargs="?", default=99999999, type=int, |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 160 | help="number of outputs") |
Marko Myllynen | 27e7aea | 2018-09-26 20:09:07 +0300 | [diff] [blame] | 161 | parser.add_argument("--ebpf", action="store_true", |
| 162 | help=argparse.SUPPRESS) |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 163 | self.args = parser.parse_args() |
| 164 | |
| 165 | def _create_probes(self): |
| 166 | probes_by_lang = { |
Marko Myllynen | 9162be4 | 2018-09-04 19:45:16 +0300 | [diff] [blame] | 167 | "java": Probe("java", ["java"], { |
| 168 | "gc__begin": Category.GC, |
| 169 | "mem__pool__gc__begin": Category.GC, |
| 170 | "thread__start": Category.THREAD, |
| 171 | "class__loaded": Category.CLOAD, |
| 172 | "object__alloc": Category.OBJNEW, |
| 173 | "method__entry": Category.METHOD, |
| 174 | "ExceptionOccurred__entry": Category.EXCP |
| 175 | }), |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 176 | "node": Probe("node", ["node"], { |
| 177 | "gc__start": Category.GC |
| 178 | }), |
Marko Myllynen | 9162be4 | 2018-09-04 19:45:16 +0300 | [diff] [blame] | 179 | "perl": Probe("perl", ["perl"], { |
| 180 | "sub__entry": Category.METHOD |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 181 | }), |
Sasha Goldshtein | cfb5ee7 | 2017-02-08 14:32:51 -0500 | [diff] [blame] | 182 | "php": Probe("php", ["php"], { |
| 183 | "function__entry": Category.METHOD, |
| 184 | "compile__file__entry": Category.CLOAD, |
| 185 | "exception__thrown": Category.EXCP |
| 186 | }), |
Marko Myllynen | 9162be4 | 2018-09-04 19:45:16 +0300 | [diff] [blame] | 187 | "python": Probe("python", ["python"], { |
| 188 | "function__entry": Category.METHOD, |
| 189 | "gc__start": Category.GC |
| 190 | }), |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 191 | "ruby": Probe("ruby", ["ruby", "irb"], { |
| 192 | "method__entry": Category.METHOD, |
| 193 | "cmethod__entry": Category.METHOD, |
| 194 | "gc__mark__begin": Category.GC, |
| 195 | "gc__sweep__begin": Category.GC, |
| 196 | "object__create": Category.OBJNEW, |
| 197 | "hash__create": Category.OBJNEW, |
| 198 | "string__create": Category.OBJNEW, |
| 199 | "array__create": Category.OBJNEW, |
| 200 | "require__entry": Category.CLOAD, |
| 201 | "load__entry": Category.CLOAD, |
| 202 | "raise": Category.EXCP |
| 203 | }), |
Marko Myllynen | 9f3662e | 2018-10-10 21:48:53 +0300 | [diff] [blame] | 204 | "tcl": Probe("tcl", ["tclsh", "wish"], { |
| 205 | "proc__entry": Category.METHOD, |
| 206 | "obj__create": Category.OBJNEW |
| 207 | }), |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 208 | } |
| 209 | |
| 210 | if self.args.language: |
Sasha Goldshtein | fb3c471 | 2016-10-27 15:58:14 -0700 | [diff] [blame] | 211 | self.probes = [probes_by_lang[self.args.language]] |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 212 | else: |
| 213 | self.probes = probes_by_lang.values() |
| 214 | |
| 215 | def _attach_probes(self): |
| 216 | program = str.join('\n', [p.get_program() for p in self.probes]) |
Marko Myllynen | 27e7aea | 2018-09-26 20:09:07 +0300 | [diff] [blame] | 217 | if self.args.debug or self.args.ebpf: |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 218 | print(program) |
Marko Myllynen | 27e7aea | 2018-09-26 20:09:07 +0300 | [diff] [blame] | 219 | if self.args.ebpf: |
| 220 | exit() |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 221 | for probe in self.probes: |
| 222 | print("Attached to %s processes:" % probe.language, |
| 223 | str.join(', ', map(str, probe.targets))) |
| 224 | self.bpf = BPF(text=program) |
| 225 | usdts = [usdt for probe in self.probes for usdt in probe.get_usdts()] |
| 226 | # Filter out duplicates when we have multiple processes with the same |
| 227 | # uprobe. We are attaching to these probes manually instead of using |
| 228 | # the USDT support from the bcc module, because the USDT class attaches |
| 229 | # to each uprobe with a specific pid. When there is more than one |
| 230 | # process from some language, we end up attaching more than once to the |
| 231 | # same uprobe (albeit with different pids), which is not allowed. |
| 232 | # Instead, we use a global attach (with pid=-1). |
| 233 | uprobes = set([(path, func, addr) for usdt in usdts |
| 234 | for (path, func, addr, _) |
| 235 | in usdt.enumerate_active_probes()]) |
| 236 | for (path, func, addr) in uprobes: |
| 237 | self.bpf.attach_uprobe(name=path, fn_name=func, addr=addr, pid=-1) |
| 238 | |
| 239 | def _detach_probes(self): |
| 240 | for probe in self.probes: |
| 241 | probe.cleanup() # Cleans up USDT contexts |
| 242 | self.bpf.cleanup() # Cleans up all attached probes |
| 243 | self.bpf = None |
| 244 | |
| 245 | def _loop_iter(self): |
| 246 | self._attach_probes() |
| 247 | try: |
| 248 | sleep(self.args.interval) |
| 249 | except KeyboardInterrupt: |
| 250 | self.exiting = True |
| 251 | |
| 252 | if not self.args.noclear: |
| 253 | call("clear") |
| 254 | else: |
| 255 | print() |
| 256 | with open("/proc/loadavg") as stats: |
| 257 | print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read())) |
Sasha Goldshtein | d8c7f47 | 2016-10-27 15:17:58 -0700 | [diff] [blame] | 258 | print("%-6s %-20s %-10s %-6s %-10s %-8s %-6s %-6s" % ( |
| 259 | "PID", "CMDLINE", "METHOD/s", "GC/s", "OBJNEW/s", |
| 260 | "CLOAD/s", "EXC/s", "THR/s")) |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 261 | |
| 262 | line = 0 |
| 263 | counts = {} |
| 264 | targets = {} |
| 265 | for probe in self.probes: |
| 266 | counts.update(probe.get_counts(self.bpf)) |
| 267 | targets.update(probe.targets) |
| 268 | if self.args.sort: |
Paul Chaignon | 956ca1c | 2017-03-04 20:07:56 +0100 | [diff] [blame] | 269 | sort_field = self.args.sort.upper() |
| 270 | counts = sorted(counts.items(), |
| 271 | key=lambda kv: -kv[1].get(sort_field, 0)) |
Sasha Goldshtein | 9f6d03b | 2016-10-26 06:40:35 -0700 | [diff] [blame] | 272 | else: |
Rafael Fonseca | c465a24 | 2017-02-13 16:04:33 +0100 | [diff] [blame] | 273 | counts = sorted(counts.items(), key=lambda kv: kv[0]) |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 274 | for pid, stats in counts: |
Sasha Goldshtein | d8c7f47 | 2016-10-27 15:17:58 -0700 | [diff] [blame] | 275 | print("%-6d %-20s %-10d %-6d %-10d %-8d %-6d %-6d" % ( |
| 276 | pid, targets[pid][:20], |
Sasha Goldshtein | 1cba422 | 2016-10-25 11:52:39 -0700 | [diff] [blame] | 277 | stats.get(Category.METHOD, 0) / self.args.interval, |
| 278 | stats.get(Category.GC, 0) / self.args.interval, |
| 279 | stats.get(Category.OBJNEW, 0) / self.args.interval, |
| 280 | stats.get(Category.CLOAD, 0) / self.args.interval, |
| 281 | stats.get(Category.EXCP, 0) / self.args.interval, |
| 282 | stats.get(Category.THREAD, 0) / self.args.interval |
| 283 | )) |
| 284 | line += 1 |
| 285 | if line >= self.args.maxrows: |
| 286 | break |
| 287 | self._detach_probes() |
| 288 | |
| 289 | def run(self): |
| 290 | self._parse_args() |
| 291 | self._create_probes() |
| 292 | print('Tracing... Output every %d secs. Hit Ctrl-C to end' % |
| 293 | self.args.interval) |
| 294 | countdown = self.args.count |
| 295 | self.exiting = False |
| 296 | while True: |
| 297 | self._loop_iter() |
| 298 | countdown -= 1 |
| 299 | if self.exiting or countdown == 0: |
| 300 | print("Detaching...") |
| 301 | exit() |
| 302 | |
| 303 | if __name__ == "__main__": |
Sasha Goldshtein | 9f6d03b | 2016-10-26 06:40:35 -0700 | [diff] [blame] | 304 | try: |
| 305 | Tool().run() |
| 306 | except KeyboardInterrupt: |
| 307 | pass |