Alexey Ivanov | cc01a9c | 2019-01-16 09:50:46 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 2 | # @lint-avoid-python-3-compatibility-imports |
| 3 | # |
| 4 | # ugc Summarize garbage collection events in high-level languages. |
| 5 | # For Linux, uses BCC, eBPF. |
| 6 | # |
Marko Myllynen | 27e7aea | 2018-09-26 20:09:07 +0300 | [diff] [blame] | 7 | # USAGE: ugc [-v] [-m] [-M MSEC] [-F FILTER] {java,node,python,ruby} pid |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 8 | # |
| 9 | # Copyright 2016 Sasha Goldshtein |
| 10 | # Licensed under the Apache License, Version 2.0 (the "License") |
| 11 | # |
| 12 | # 19-Oct-2016 Sasha Goldshtein Created this. |
| 13 | |
| 14 | from __future__ import print_function |
| 15 | import argparse |
Paul Chaignon | 4bb6d7f | 2017-03-30 19:05:40 +0200 | [diff] [blame] | 16 | from bcc import BPF, USDT, utils |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 17 | import ctypes as ct |
| 18 | import time |
Paul Chaignon | 4bb6d7f | 2017-03-30 19:05:40 +0200 | [diff] [blame] | 19 | import os |
| 20 | |
Marko Myllynen | 27e7aea | 2018-09-26 20:09:07 +0300 | [diff] [blame] | 21 | languages = ["java", "node", "python", "ruby"] |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 22 | |
| 23 | examples = """examples: |
Paul Chaignon | 4bb6d7f | 2017-03-30 19:05:40 +0200 | [diff] [blame] | 24 | ./ugc -l java 185 # trace Java GCs in process 185 |
| 25 | ./ugc -l ruby 1344 -m # trace Ruby GCs reporting in ms |
| 26 | ./ugc -M 10 -l java 185 # trace only Java GCs longer than 10ms |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 27 | """ |
| 28 | parser = argparse.ArgumentParser( |
| 29 | description="Summarize garbage collection events in high-level languages.", |
| 30 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 31 | epilog=examples) |
Paul Chaignon | 4bb6d7f | 2017-03-30 19:05:40 +0200 | [diff] [blame] | 32 | parser.add_argument("-l", "--language", choices=languages, |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 33 | help="language to trace") |
| 34 | parser.add_argument("pid", type=int, help="process id to attach to") |
| 35 | parser.add_argument("-v", "--verbose", action="store_true", |
| 36 | help="verbose mode: print the BPF program (for debugging purposes)") |
| 37 | parser.add_argument("-m", "--milliseconds", action="store_true", |
| 38 | help="report times in milliseconds (default is microseconds)") |
Sasha Goldshtein | c325200 | 2017-02-08 13:22:40 -0500 | [diff] [blame] | 39 | parser.add_argument("-M", "--minimum", type=int, default=0, |
| 40 | help="display only GCs longer than this many milliseconds") |
| 41 | parser.add_argument("-F", "--filter", type=str, |
| 42 | help="display only GCs whose description contains this text") |
Marko Myllynen | 27e7aea | 2018-09-26 20:09:07 +0300 | [diff] [blame] | 43 | parser.add_argument("--ebpf", action="store_true", |
| 44 | help=argparse.SUPPRESS) |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 45 | args = parser.parse_args() |
| 46 | |
| 47 | usdt = USDT(pid=args.pid) |
| 48 | |
| 49 | program = """ |
| 50 | struct gc_event_t { |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 51 | u64 probe_index; |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 52 | u64 elapsed_ns; |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 53 | u64 field1; |
| 54 | u64 field2; |
| 55 | u64 field3; |
| 56 | u64 field4; |
| 57 | char string1[32]; |
| 58 | char string2[32]; |
| 59 | }; |
| 60 | struct entry_t { |
| 61 | u64 start_ns; |
| 62 | u64 field1; |
| 63 | u64 field2; |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 64 | }; |
| 65 | |
| 66 | BPF_PERF_OUTPUT(gcs); |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 67 | BPF_HASH(entry, u64, struct entry_t); |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 68 | """ |
| 69 | |
| 70 | class Probe(object): |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 71 | def __init__(self, begin, end, begin_save, end_save, formatter): |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 72 | self.begin = begin |
| 73 | self.end = end |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 74 | self.begin_save = begin_save |
| 75 | self.end_save = end_save |
| 76 | self.formatter = formatter |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 77 | |
| 78 | def generate(self): |
| 79 | text = """ |
| 80 | int trace_%s(struct pt_regs *ctx) { |
| 81 | u64 pid = bpf_get_current_pid_tgid(); |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 82 | struct entry_t e = {}; |
| 83 | e.start_ns = bpf_ktime_get_ns(); |
| 84 | %s |
| 85 | entry.update(&pid, &e); |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 86 | return 0; |
| 87 | } |
| 88 | int trace_%s(struct pt_regs *ctx) { |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 89 | u64 elapsed; |
| 90 | struct entry_t *e; |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 91 | struct gc_event_t event = {}; |
| 92 | u64 pid = bpf_get_current_pid_tgid(); |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 93 | e = entry.lookup(&pid); |
| 94 | if (!e) { |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 95 | return 0; // missed the entry event on this thread |
| 96 | } |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 97 | elapsed = bpf_ktime_get_ns() - e->start_ns; |
Sasha Goldshtein | c325200 | 2017-02-08 13:22:40 -0500 | [diff] [blame] | 98 | if (elapsed < %d) { |
| 99 | return 0; |
| 100 | } |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 101 | event.elapsed_ns = elapsed; |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 102 | %s |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 103 | gcs.perf_submit(ctx, &event, sizeof(event)); |
| 104 | return 0; |
| 105 | } |
Sasha Goldshtein | c325200 | 2017-02-08 13:22:40 -0500 | [diff] [blame] | 106 | """ % (self.begin, self.begin_save, self.end, |
| 107 | args.minimum * 1000000, self.end_save) |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 108 | return text |
| 109 | |
| 110 | def attach(self): |
Sasha Goldshtein | dc3a57c | 2017-02-08 16:02:11 -0500 | [diff] [blame] | 111 | usdt.enable_probe_or_bail(self.begin, "trace_%s" % self.begin) |
| 112 | usdt.enable_probe_or_bail(self.end, "trace_%s" % self.end) |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 113 | |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 114 | def format(self, data): |
| 115 | return self.formatter(data) |
| 116 | |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 117 | probes = [] |
| 118 | |
Paul Chaignon | 4bb6d7f | 2017-03-30 19:05:40 +0200 | [diff] [blame] | 119 | language = args.language |
| 120 | if not language: |
| 121 | language = utils.detect_language(languages, args.pid) |
| 122 | |
Sasha Goldshtein | bee71b2 | 2016-10-26 06:34:06 -0700 | [diff] [blame] | 123 | # |
| 124 | # Java |
| 125 | # |
Paul Chaignon | 4bb6d7f | 2017-03-30 19:05:40 +0200 | [diff] [blame] | 126 | if language == "java": |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 127 | # Oddly, the gc__begin/gc__end probes don't really have any useful |
| 128 | # information, while the mem__pool* ones do. There's also a bunch of |
| 129 | # probes described in the hotspot_gc*.stp file which aren't there |
| 130 | # when looking at a live Java process. |
| 131 | begin_save = """ |
| 132 | bpf_usdt_readarg(6, ctx, &e.field1); // used bytes |
| 133 | bpf_usdt_readarg(8, ctx, &e.field2); // max bytes |
| 134 | """ |
| 135 | end_save = """ |
| 136 | event.field1 = e->field1; // used bytes at start |
| 137 | event.field2 = e->field2; // max bytes at start |
| 138 | bpf_usdt_readarg(6, ctx, &event.field3); // used bytes at end |
| 139 | bpf_usdt_readarg(8, ctx, &event.field4); // max bytes at end |
| 140 | u64 manager = 0, pool = 0; |
| 141 | bpf_usdt_readarg(1, ctx, &manager); // ptr to manager name |
| 142 | bpf_usdt_readarg(3, ctx, &pool); // ptr to pool name |
Sumanth Korikkar | 023154c | 2020-04-20 05:54:57 -0500 | [diff] [blame] | 143 | bpf_probe_read_user(&event.string1, sizeof(event.string1), (void *)manager); |
| 144 | bpf_probe_read_user(&event.string2, sizeof(event.string2), (void *)pool); |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 145 | """ |
Paul Chaignon | 956ca1c | 2017-03-04 20:07:56 +0100 | [diff] [blame] | 146 | |
| 147 | def formatter(e): |
| 148 | "%s %s used=%d->%d max=%d->%d" % \ |
| 149 | (e.string1, e.string2, e.field1, e.field3, e.field2, e.field4) |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 150 | probes.append(Probe("mem__pool__gc__begin", "mem__pool__gc__end", |
| 151 | begin_save, end_save, formatter)) |
| 152 | probes.append(Probe("gc__begin", "gc__end", |
| 153 | "", "", lambda _: "no additional info available")) |
Sasha Goldshtein | bee71b2 | 2016-10-26 06:34:06 -0700 | [diff] [blame] | 154 | # |
Marko Myllynen | 27e7aea | 2018-09-26 20:09:07 +0300 | [diff] [blame] | 155 | # Node |
| 156 | # |
| 157 | elif language == "node": |
| 158 | end_save = """ |
| 159 | u32 gc_type = 0; |
| 160 | bpf_usdt_readarg(1, ctx, &gc_type); |
| 161 | event.field1 = gc_type; |
| 162 | """ |
| 163 | descs = {"GC scavenge": 1, "GC mark-sweep-compact": 2, |
| 164 | "GC incremental mark": 4, "GC weak callbacks": 8} |
| 165 | probes.append(Probe("gc__start", "gc__done", "", end_save, |
| 166 | lambda e: str.join(", ", |
| 167 | [desc for desc, val in descs.items() |
| 168 | if e.field1 & val != 0]))) |
| 169 | # |
Sasha Goldshtein | bee71b2 | 2016-10-26 06:34:06 -0700 | [diff] [blame] | 170 | # Python |
| 171 | # |
Paul Chaignon | 4bb6d7f | 2017-03-30 19:05:40 +0200 | [diff] [blame] | 172 | elif language == "python": |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 173 | begin_save = """ |
| 174 | int gen = 0; |
| 175 | bpf_usdt_readarg(1, ctx, &gen); |
| 176 | e.field1 = gen; |
| 177 | """ |
| 178 | end_save = """ |
| 179 | long objs = 0; |
| 180 | bpf_usdt_readarg(1, ctx, &objs); |
| 181 | event.field1 = e->field1; |
| 182 | event.field2 = objs; |
| 183 | """ |
Paul Chaignon | 956ca1c | 2017-03-04 20:07:56 +0100 | [diff] [blame] | 184 | |
| 185 | def formatter(event): |
| 186 | "gen %d GC collected %d objects" % \ |
| 187 | (event.field1, event.field2) |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 188 | probes.append(Probe("gc__start", "gc__done", |
| 189 | begin_save, end_save, formatter)) |
Sasha Goldshtein | bee71b2 | 2016-10-26 06:34:06 -0700 | [diff] [blame] | 190 | # |
| 191 | # Ruby |
| 192 | # |
Paul Chaignon | 4bb6d7f | 2017-03-30 19:05:40 +0200 | [diff] [blame] | 193 | elif language == "ruby": |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 194 | # Ruby GC probes do not have any additional information available. |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 195 | probes.append(Probe("gc__mark__begin", "gc__mark__end", |
| 196 | "", "", lambda _: "GC mark stage")) |
| 197 | probes.append(Probe("gc__sweep__begin", "gc__sweep__end", |
| 198 | "", "", lambda _: "GC sweep stage")) |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 199 | |
Paul Chaignon | 4bb6d7f | 2017-03-30 19:05:40 +0200 | [diff] [blame] | 200 | else: |
| 201 | print("No language detected; use -l to trace a language.") |
| 202 | exit(1) |
| 203 | |
| 204 | |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 205 | for probe in probes: |
| 206 | program += probe.generate() |
| 207 | probe.attach() |
| 208 | |
Marko Myllynen | 27e7aea | 2018-09-26 20:09:07 +0300 | [diff] [blame] | 209 | if args.ebpf or args.verbose: |
| 210 | if args.verbose: |
| 211 | print(usdt.get_text()) |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 212 | print(program) |
Marko Myllynen | 27e7aea | 2018-09-26 20:09:07 +0300 | [diff] [blame] | 213 | if args.ebpf: |
| 214 | exit() |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 215 | |
| 216 | bpf = BPF(text=program, usdt_contexts=[usdt]) |
| 217 | print("Tracing garbage collections in %s process %d... Ctrl-C to quit." % |
Paul Chaignon | 4bb6d7f | 2017-03-30 19:05:40 +0200 | [diff] [blame] | 218 | (language, args.pid)) |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 219 | time_col = "TIME (ms)" if args.milliseconds else "TIME (us)" |
Sasha Goldshtein | c325200 | 2017-02-08 13:22:40 -0500 | [diff] [blame] | 220 | print("%-8s %-8s %-40s" % ("START", time_col, "DESCRIPTION")) |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 221 | |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 222 | class GCEvent(ct.Structure): |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 223 | _fields_ = [ |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 224 | ("probe_index", ct.c_ulonglong), |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 225 | ("elapsed_ns", ct.c_ulonglong), |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 226 | ("field1", ct.c_ulonglong), |
| 227 | ("field2", ct.c_ulonglong), |
| 228 | ("field3", ct.c_ulonglong), |
| 229 | ("field4", ct.c_ulonglong), |
| 230 | ("string1", ct.c_char * 32), |
| 231 | ("string2", ct.c_char * 32) |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 232 | ] |
| 233 | |
| 234 | start_ts = time.time() |
| 235 | |
| 236 | def print_event(cpu, data, size): |
Sasha Goldshtein | ddef09a | 2016-10-20 16:07:23 -0700 | [diff] [blame] | 237 | event = ct.cast(data, ct.POINTER(GCEvent)).contents |
Paul Chaignon | 956ca1c | 2017-03-04 20:07:56 +0100 | [diff] [blame] | 238 | elapsed = event.elapsed_ns / 1000000 if args.milliseconds else \ |
| 239 | event.elapsed_ns / 1000 |
Sasha Goldshtein | c325200 | 2017-02-08 13:22:40 -0500 | [diff] [blame] | 240 | description = probes[event.probe_index].format(event) |
Paul Chaignon | 956ca1c | 2017-03-04 20:07:56 +0100 | [diff] [blame] | 241 | if args.filter and args.filter not in description: |
Sasha Goldshtein | c325200 | 2017-02-08 13:22:40 -0500 | [diff] [blame] | 242 | return |
| 243 | print("%-8.3f %-8.2f %s" % (time.time() - start_ts, elapsed, description)) |
Sasha Goldshtein | e5d9d99 | 2016-10-17 09:04:37 -0700 | [diff] [blame] | 244 | |
| 245 | bpf["gcs"].open_perf_buffer(print_event) |
| 246 | while 1: |
Jerome Marchand | 5167127 | 2018-12-19 01:57:24 +0100 | [diff] [blame] | 247 | try: |
| 248 | bpf.perf_buffer_poll() |
| 249 | except KeyboardInterrupt: |
| 250 | exit() |