ugc: Add description messages for GC events For runtimes that make it available, we now extract GC event information such as heap sizes and print this information in addition to the GC duration and timestamp.

commit: ddef09a1c99a344905c8268ccc80555a2e341555 [log] [tgz]
author: Sasha Goldshtein <goldshtn@gmail.com> Thu Oct 20 16:07:23 2016 -0700
committer: Sasha Goldshtein <goldshtn@gmail.com> Mon Dec 19 09:46:05 2016 +0000
tree: 1a3c581ea32c758b0e87993113d391253ea493ca
parent: e5d9d99a134901a3af2fb78821eb6989c7f3a17d [diff] [blame]
diff --git a/tools/ugc.py b/tools/ugc.py
index 93464f4..395f9a7 100755
--- a/tools/ugc.py
+++ b/tools/ugc.py

@@ -11,6 +11,8 @@
 #
 # 19-Oct-2016   Sasha Goldshtein   Created this.
 
+# TODO Add Node: gc__start, gc__done (with arguments)
+
 from __future__ import print_function
 import argparse
 from bcc import BPF, USDT
@@ -38,68 +40,118 @@
 
 program = """
 struct gc_event_t {
+    u64 probe_index;
     u64 elapsed_ns;
-    char description[80];
+    u64 field1;
+    u64 field2;
+    u64 field3;
+    u64 field4;
+    char string1[32];
+    char string2[32];
+};
+struct entry_t {
+    u64 start_ns;
+    u64 field1;
+    u64 field2;
 };
 
 BPF_PERF_OUTPUT(gcs);
-BPF_HASH(entry, u64, u64);  // pid to start timestamp
+BPF_HASH(entry, u64, struct entry_t);
 """
 
 class Probe(object):
-    def __init__(self, begin, end, description):
+    def __init__(self, begin, end, begin_save, end_save, formatter):
         self.begin = begin
         self.end = end
-        self.description = description
+        self.begin_save = begin_save
+        self.end_save = end_save
+        self.formatter = formatter
 
     def generate(self):
         text = """
 int trace_%s(struct pt_regs *ctx) {
     u64 pid = bpf_get_current_pid_tgid();
-    u64 timestamp = bpf_ktime_get_ns();
-    entry.update(&pid, &timestamp);
+    struct entry_t e = {};
+    e.start_ns = bpf_ktime_get_ns();
+    %s
+    entry.update(&pid, &e);
     return 0;
 }
 int trace_%s(struct pt_regs *ctx) {
-    u64 *start, elapsed;
-    char description[] = "%s";
+    u64 elapsed;
+    struct entry_t *e;
     struct gc_event_t event = {};
     u64 pid = bpf_get_current_pid_tgid();
-    start = entry.lookup(&pid);
-    if (!start) {
+    e = entry.lookup(&pid);
+    if (!e) {
         return 0;   // missed the entry event on this thread
     }
-    elapsed = bpf_ktime_get_ns() - *start;
-    __builtin_memcpy(&event.description, description, sizeof(description));
+    elapsed = bpf_ktime_get_ns() - e->start_ns;
     event.elapsed_ns = elapsed;
+    %s
     gcs.perf_submit(ctx, &event, sizeof(event));
     return 0;
 }
-        """ % (self.begin, self.end, self.description)
+        """ % (self.begin, self.begin_save, self.end, self.end_save)
         return text
 
     def attach(self):
         usdt.enable_probe(self.begin, "trace_%s" % self.begin)
         usdt.enable_probe(self.end, "trace_%s" % self.end)
 
+    def format(self, data):
+        return self.formatter(data)
+
 probes = []
 
 if args.language == "java":
-    # TODO Extract additional info like mark/sweep/compact/generation etc.
-    #      Oddly, the gc__begin/gc__end probes don't really have any useful
-    #      information, while the mem__pool* ones do. There's also a bunch of
-    #      probes described in the hotspot_gc*.stp file which aren't there
-    #      when looking at a live Java process.
-    probes.append(Probe("mem__pool__gc__begin", "mem__pool__gc__end", "TODO"))
-    probes.append(Probe("gc__begin", "gc__end", "TODO"))
+    # Oddly, the gc__begin/gc__end probes don't really have any useful
+    # information, while the mem__pool* ones do. There's also a bunch of
+    # probes described in the hotspot_gc*.stp file which aren't there
+    # when looking at a live Java process.
+    begin_save = """
+    bpf_usdt_readarg(6, ctx, &e.field1);    // used bytes
+    bpf_usdt_readarg(8, ctx, &e.field2);    // max bytes
+    """
+    end_save = """
+    event.field1 = e->field1;                  // used bytes at start
+    event.field2 = e->field2;                  // max bytes at start
+    bpf_usdt_readarg(6, ctx, &event.field3);   // used bytes at end
+    bpf_usdt_readarg(8, ctx, &event.field4);   // max bytes at end
+    u64 manager = 0, pool = 0;
+    bpf_usdt_readarg(1, ctx, &manager);        // ptr to manager name
+    bpf_usdt_readarg(3, ctx, &pool);           // ptr to pool name
+    bpf_probe_read(&event.string1, sizeof(event.string1), (void *)manager);
+    bpf_probe_read(&event.string2, sizeof(event.string2), (void *)pool);
+    """
+    formatter = lambda e: "%s %s used=%d->%d max=%d->%d" % \
+                (e.string1, e.string2, e.field1, e.field3, e.field2, e.field4)
+    probes.append(Probe("mem__pool__gc__begin", "mem__pool__gc__end",
+                        begin_save, end_save, formatter))
+    probes.append(Probe("gc__begin", "gc__end",
+                        "", "", lambda _: "no additional info available"))
 elif args.language == "python":
-    # TODO In gc__start, arg1 is the generation to collect. In gc__end,
-    #      arg1 is the number of collected objects.
-    probes.append(Probe("gc__start", "gc__done", "GC"))
+    begin_save = """
+    int gen = 0;
+    bpf_usdt_readarg(1, ctx, &gen);
+    e.field1 = gen;
+    """
+    end_save = """
+    long objs = 0;
+    bpf_usdt_readarg(1, ctx, &objs);
+    event.field1 = e->field1;
+    event.field2 = objs;
+    """
+    formatter = lambda event: "gen %d GC collected %d objects" % \
+                              (event.field1, event.field2)
+    probes.append(Probe("gc__start", "gc__done",
+                        begin_save, end_save, formatter))
 elif args.language == "ruby":
     # Ruby GC probes do not have any additional information available.
-    probes.append(Probe("gc__mark__begin", "gc__mark__end", "mark"))
-    probes.append(Probe("gc__sweep__begin", "gc__sweep__end", "sweep"))
+    probes.append(Probe("gc__mark__begin", "gc__mark__end",
+                        "", "", lambda _: "GC mark stage"))
+    probes.append(Probe("gc__sweep__begin", "gc__sweep__end",
+                        "", "", lambda _: "GC sweep stage"))
 
 for probe in probes:
     program += probe.generate()
@@ -113,22 +165,29 @@
 print("Tracing garbage collections in %s process %d... Ctrl-C to quit." %
       (args.language, args.pid))
 time_col = "TIME (ms)" if args.milliseconds else "TIME (us)"
-print("%-8s %-30s %-8s" % ("START", "DESCRIPTION", time_col))
+print("%-8s %-40s %-8s" % ("START", "DESCRIPTION", time_col))
 
-class Data(ct.Structure):
+class GCEvent(ct.Structure):
     _fields_ = [
+        ("probe_index", ct.c_ulonglong),
         ("elapsed_ns", ct.c_ulonglong),
-        ("description", ct.c_char * 80)
+        ("field1", ct.c_ulonglong),
+        ("field2", ct.c_ulonglong),
+        ("field3", ct.c_ulonglong),
+        ("field4", ct.c_ulonglong),
+        ("string1", ct.c_char * 32),
+        ("string2", ct.c_char * 32)
         ]
 
 start_ts = time.time()
 
 def print_event(cpu, data, size):
-    event = ct.cast(data, ct.POINTER(Data)).contents
+    event = ct.cast(data, ct.POINTER(GCEvent)).contents
     elapsed = event.elapsed_ns/1000000 if args.milliseconds else \
               event.elapsed_ns/1000
-    print("%-8.3f %-30s %-8.2f" % (time.time() - start_ts,
-                                   event.description, elapsed))
+    print("%-8.3f %-40s %-8.2f" % (time.time() - start_ts,
+                                   probes[event.probe_index].format(event),
+                                   elapsed))
 
 bpf["gcs"].open_perf_buffer(print_event)
 while 1:
commit	ddef09a1c99a344905c8268ccc80555a2e341555	[log] [tgz]
author	Sasha Goldshtein <goldshtn@gmail.com>	Thu Oct 20 16:07:23 2016 -0700
committer	Sasha Goldshtein <goldshtn@gmail.com>	Mon Dec 19 09:46:05 2016 +0000
tree	1a3c581ea32c758b0e87993113d391253ea493ca
parent	e5d9d99a134901a3af2fb78821eb6989c7f3a17d [diff] [blame]