Merge pull request #835 from ColinIanKing/master
Add snapcraft script to package up bcc as a snap
diff --git a/README.md b/README.md
index 719ee12..3fe4dc9 100644
--- a/README.md
+++ b/README.md
@@ -129,6 +129,12 @@
- tools/[tplist](tools/tplist.py): Display kernel tracepoints or USDT probes and their formats. [Examples](tools/tplist_example.txt).
- tools/[trace](tools/trace.py): Trace arbitrary functions, with filters. [Examples](tools/trace_example.txt)
- tools/[ttysnoop](tools/ttysnoop.py): Watch live output from a tty or pts device. [Examples](tools/ttysnoop_example.txt)
+- tools/[ucalls](tools/ucalls.py): Summarize method calls or Linux syscalls in high-level languages. [Examples](tools/ucalls_example.txt)
+- tools/[uflow](tools/uflow.py): Print a method flow graph in high-level languages. [Examples](tools/uflow_example.txt)
+- tools/[ugc](tools/ugc.py): Trace garbage collection events in high-level languages. [Examples](tools/ugc_example.txt)
+- tools/[uobjnew](tools/uobjnew.py): Summarize object allocation events by object type and number of bytes allocated. [Examples](tools/uobjnew_example.txt)
+- tools/[ustat](tools/ustat.py): Collect events such as GCs, thread creations, object allocations, exceptions and more in high-level languages. [Examples](tools/ustat_example.txt)
+- tools/[uthreads](tools/uthreads.py): Trace thread creation events in Java and raw pthreads. [Examples](tools/uthreads_example.txt)
- tools/[vfscount](tools/vfscount.py) tools/[vfscount.c](tools/vfscount.c): Count VFS calls. [Examples](tools/vfscount_example.txt).
- tools/[vfsstat](tools/vfsstat.py) tools/[vfsstat.c](tools/vfsstat.c): Count some VFS calls, with column output. [Examples](tools/vfsstat_example.txt).
- tools/[wakeuptime](tools/wakeuptime.py): Summarize sleep to wakeup time by waker kernel stack. [Examples](tools/wakeuptime_example.txt).
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index b720d5f..998315c 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -26,3 +26,7 @@
add_executable(LLCStat LLCStat.cc)
target_link_libraries(LLCStat bcc-static)
install (TARGETS LLCStat DESTINATION share/bcc/examples/cpp)
+
+add_executable(FollyRequestContextSwitch FollyRequestContextSwitch.cc)
+target_link_libraries(FollyRequestContextSwitch bcc-static)
+install (TARGETS FollyRequestContextSwitch DESTINATION share/bcc/examples/cpp)
diff --git a/examples/cpp/FollyRequestContextSwitch.cc b/examples/cpp/FollyRequestContextSwitch.cc
new file mode 100644
index 0000000..bf3493e
--- /dev/null
+++ b/examples/cpp/FollyRequestContextSwitch.cc
@@ -0,0 +1,105 @@
+/*
+ * FollyRequestContextSwitch Monitor RequestContext switch events for any binary
+ * uses the class from [folly](http://bit.ly/2h6S1yx).
+ * For Linux, uses BCC, eBPF. Embedded C.
+ *
+ * Basic example of using USDT with BCC.
+ *
+ * USAGE: FollyRequestContextSwitch PATH_TO_BINARY
+ *
+ * Copyright (c) Facebook, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ */
+
+#include <signal.h>
+#include <iostream>
+#include <vector>
+
+#include "BPF.h"
+
+const std::string BPF_PROGRAM = R"(
+#include <linux/sched.h>
+#include <uapi/linux/ptrace.h>
+
+struct event_t {
+ int pid;
+ char name[16];
+ uint64_t old_addr;
+ uint64_t new_addr;
+};
+
+BPF_PERF_OUTPUT(events);
+
+int on_context_switch(struct pt_regs *ctx) {
+ struct event_t event = {};
+
+ event.pid = bpf_get_current_pid_tgid();
+ bpf_get_current_comm(&event.name, sizeof(event.name));
+
+ bpf_usdt_readarg(1, ctx, &event.old_addr);
+ bpf_usdt_readarg(2, ctx, &event.new_addr);
+
+ events.perf_submit(ctx, &event, sizeof(event));
+ return 0;
+}
+)";
+
+// Define the same struct to use in user space.
+struct event_t {
+ int pid;
+ char name[16];
+ uint64_t old_addr;
+ uint64_t new_addr;
+};
+
+void handle_output(void* cb_cookie, void* data, int data_size) {
+ auto event = static_cast<event_t*>(data);
+ std::cout << "PID " << event->pid << " (" << event->name << ") ";
+ std::cout << "folly::RequestContext switch from " << event->old_addr << " to "
+ << event->new_addr << std::endl;
+}
+
+ebpf::BPF* bpf;
+
+void signal_handler(int s) {
+ std::cerr << "Terminating..." << std::endl;
+ delete bpf;
+ exit(0);
+}
+
+int main(int argc, char** argv) {
+ if (argc != 2) {
+ std::cout << "USAGE: FollyRequestContextSwitch PATH_TO_BINARY" << std::endl;
+ exit(1);
+ }
+ std::string binary_path(argv[1]);
+
+ bpf = new ebpf::BPF();
+ std::vector<ebpf::USDT> u;
+ u.emplace_back(binary_path, "folly", "request_context_switch_before",
+ "on_context_switch");
+ auto init_res = bpf->init(BPF_PROGRAM, {}, u);
+ if (init_res.code() != 0) {
+ std::cerr << init_res.msg() << std::endl;
+ return 1;
+ }
+
+ auto attach_res = bpf->attach_usdt(u[0]);
+ if (attach_res.code() != 0) {
+ std::cerr << attach_res.msg() << std::endl;
+ return 1;
+ }
+
+ auto open_res = bpf->open_perf_buffer("events", &handle_output);
+ if (open_res.code() != 0) {
+ std::cerr << open_res.msg() << std::endl;
+ return 1;
+ }
+
+ signal(SIGINT, signal_handler);
+ std::cout << "Started tracing, hit Ctrl-C to terminate." << std::endl;
+ while (true)
+ bpf->poll_perf_buffer("events");
+
+ return 0;
+}
diff --git a/man/man8/trace.8 b/man/man8/trace.8
index c65f849..536bbb8 100644
--- a/man/man8/trace.8
+++ b/man/man8/trace.8
@@ -2,8 +2,8 @@
.SH NAME
trace \- Trace a function and print its arguments or return value, optionally evaluating a filter. Uses Linux eBPF/bcc.
.SH SYNOPSIS
-.B trace [-h] [-p PID] [-t TID] [-v] [-Z STRING_SIZE] [-S]
- [-M MAX_EVENTS] [-o] [-K] [-U] [-I header]
+.B trace [-h] [-p PID] [-L TID] [-v] [-Z STRING_SIZE] [-S]
+ [-M MAX_EVENTS] [-t] [-T] [-K] [-U] [-I header]
probe [probe ...]
.SH DESCRIPTION
trace probes functions you specify and displays trace messages if a particular
@@ -21,7 +21,7 @@
\-p PID
Trace only functions in the process PID.
.TP
-\-t TID
+\-L TID
Trace only functions in the thread TID.
.TP
\-v
@@ -39,9 +39,11 @@
\-M MAX_EVENTS
Print up to MAX_EVENTS trace messages and then exit.
.TP
-\-o
-Print times relative to the beginning of the trace (offsets), in seconds. The
-default is to print absolute time.
+\-t
+Print times relative to the beginning of the trace (offsets), in seconds.
+.TP
+\-T
+Print the time column.
.TP
\-K
Print the kernel stack for each event.
diff --git a/man/man8/ucalls.8 b/man/man8/ucalls.8
new file mode 100644
index 0000000..b1f4710
--- /dev/null
+++ b/man/man8/ucalls.8
@@ -0,0 +1,84 @@
+.TH ucalls 8 "2016-11-07" "USER COMMANDS"
+.SH NAME
+ucalls \- Summarize method calls from high-level languages and Linux syscalls.
+.SH SYNOPSIS
+.B ucalls [-l {java,python,ruby}] [-h] [-T TOP] [-L] [-S] [-v] [-m] pid [interval]
+.SH DESCRIPTION
+This tool summarizes method calls from high-level languages such as Python,
+Java, and Ruby. It can also trace Linux system calls. Whenever a method is
+invoked, ucalls records the call count and optionally the method's execution
+time (latency) and displays a summary.
+
+This uses in-kernel eBPF maps to store per process summaries for efficiency.
+
+This tool relies on USDT probes embedded in many high-level languages, such as
+Node, Java, Python, and Ruby. It requires a runtime instrumented with these
+probes, which in some cases requires building from source with a USDT-specific
+flag, such as "--enable-dtrace" or "--with-dtrace". For Java, method probes are
+not enabled by default, and can be turned on by running the Java process with
+the "-XX:+ExtendedDTraceProbes" flag.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-l {java,python,ruby,node}
+The language to trace. If not provided, only syscalls are traced (when the \-S
+option is used).
+.TP
+\-T TOP
+Print only the top methods by frequency or latency.
+.TP
+\-L
+Collect method invocation latency (duration).
+.TP
+\-S
+Collect Linux syscalls frequency and timing.
+.TP
+\-v
+Print the resulting BPF program, for debugging purposes.
+.TP
+\-m
+Print times in milliseconds (the default is microseconds).
+.TP
+pid
+The process id to trace.
+.TP
+interval
+Print summary after this number of seconds and then exit. By default, wait for
+Ctrl+C to terminate.
+.SH EXAMPLES
+.TP
+Trace the top 10 Ruby method calls:
+#
+.B ucalls -T 10 -l ruby 1344
+.TP
+Trace Python method calls and Linux syscalls including latency in milliseconds:
+#
+.B ucalls -l python -mL 2020
+.TP
+Trace only syscalls and print a summary after 10 seconds:
+#
+.B ucalls -S 788 10
+.SH OVERHEAD
+Tracing individual method calls will produce a considerable overhead in all
+high-level languages. For languages with just-in-time compilation, such as
+Java, the overhead can be more considerable than for interpreted languages.
+On the other hand, syscall tracing will typically be tolerable for most
+processes, unless they have a very unusual rate of system calls.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _example.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Sasha Goldshtein
+.SH SEE ALSO
+ustat(8), argdist(8)
diff --git a/man/man8/uflow.8 b/man/man8/uflow.8
new file mode 100644
index 0000000..35daff2
--- /dev/null
+++ b/man/man8/uflow.8
@@ -0,0 +1,84 @@
+.TH uflow 8 "2016-11-07" "USER COMMANDS"
+.SH NAME
+uflow \- Print a flow graph of method calls in high-level languages.
+.SH SYNOPSIS
+.B uflow [-h] [-M METHOD] [-C CLAZZ] [-v] {java,python,ruby} pid
+.SH DESCRIPTION
+uflow traces method calls and prints them in a flow graph that can facilitate
+debugging and diagnostics by following the program's execution (method flow).
+
+This tool relies on USDT probes embedded in many high-level languages, such as
+Node, Java, Python, and Ruby. It requires a runtime instrumented with these
+probes, which in some cases requires building from source with a USDT-specific
+flag, such as "--enable-dtrace" or "--with-dtrace". For Java processes, the
+startup flag "-XX:+ExtendedDTraceProbes" is required.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-M METHOD
+Print only method calls where the method name begins with this string.
+.TP
+\-C CLAZZ
+Print only method calls where the class name begins with this string. The class
+name interpretation strongly depends on the language. For example, in Java use
+"package/subpackage/ClassName" to refer to classes.
+.TP
+\-v
+Print the resulting BPF program, for debugging purposes.
+.TP
+{java,python,ruby}
+The language to trace.
+.TP
+pid
+The process id to trace.
+.SH EXAMPLES
+.TP
+Follow method flow in a Ruby process:
+#
+.B uflow ruby 148
+.TP
+Follow method flow in a Java process where the class name is java.lang.Thread:
+#
+.B uflow -C java/lang/Thread java 1802
+.SH FIELDS
+.TP
+CPU
+The CPU number on which the method was invoked. This is useful to easily see
+where the output skips to a different CPU.
+.TP
+PID
+The process id.
+.TP
+TID
+The thread id.
+.TP
+TIME
+The duration of the method call.
+.TP
+METHOD
+The method name.
+.SH OVERHEAD
+This tool has extremely high overhead because it prints every method call. For
+some scenarios, you might see lost samples in the output as the tool is unable
+to keep up with the rate of data coming from the kernel. Filtering by class
+or method prefix can help reduce the amount of data printed, but there is still
+a very high overhead in the collection mechanism. Do not use for performance-
+sensitive production scenarios, and always test first.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _example.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Sasha Goldshtein
+.SH SEE ALSO
+trace(8), ustat(8)
diff --git a/man/man8/ugc.8 b/man/man8/ugc.8
new file mode 100644
index 0000000..2629fd9
--- /dev/null
+++ b/man/man8/ugc.8
@@ -0,0 +1,71 @@
+.TH ugc 8 "2016-11-07" "USER COMMANDS"
+.SH NAME
+ugc \- Trace garbage collection events in high-level languages.
+.SH SYNOPSIS
+.B ugc [-h] [-v] [-m] {java,python,ruby,node} pid
+.SH DESCRIPTION
+This traces garbage collection events as they occur, including their duration
+and any additional information (such as generation collected or type of GC)
+provided by the respective language's runtime.
+
+This tool relies on USDT probes embedded in many high-level languages, such as
+Node, Java, Python, and Ruby. It requires a runtime instrumented with these
+probes, which in some cases requires building from source with a USDT-specific
+flag, such as "--enable-dtrace" or "--with-dtrace".
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-v
+Print the resulting BPF program, for debugging purposes.
+.TP
+\-m
+Print times in milliseconds. The default is microseconds.
+.TP
+{java,python,ruby,node}
+The language to trace.
+.TP
+pid
+The process id to trace.
+.SH EXAMPLES
+.TP
+Trace garbage collections in a specific Node process:
+#
+.B ugc node 148
+.TP
+Trace garbage collections in a specific Java process, and print GC times in
+milliseconds:
+#
+.B ugc -m java 6004
+.SH FIELDS
+.TP
+START
+The start time of the GC, in seconds from the beginning of the trace.
+.TP
+DESCRIPTION
+The runtime-provided description of this garbage collection event.
+.TP
+TIME
+The duration of the garbage collection event.
+.SH OVERHEAD
+Garbage collection events, even if frequent, should not produce a considerable
+overhead when traced because they are still not very common. Even hundreds of
+GCs per second (which is a very high rate) will still produce a fairly
+negligible overhead.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _example.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Sasha Goldshtein
+.SH SEE ALSO
+trace(8), ustat(8), uobjnew(8)
diff --git a/man/man8/uobjnew.8 b/man/man8/uobjnew.8
new file mode 100644
index 0000000..1abaec4
--- /dev/null
+++ b/man/man8/uobjnew.8
@@ -0,0 +1,79 @@
+.TH uobjnew 8 "2016-11-07" "USER COMMANDS"
+.SH NAME
+uobjnew \- Summarize object allocations in high-level languages.
+.SH SYNOPSIS
+.B uobjnew [-h] [-C TOP_COUNT] [-S TOP_SIZE] [-v] {java,ruby,c} pid [interval]
+.SH DESCRIPTION
+uobjnew traces object allocations in high-level languages (including "malloc")
+and prints summaries of the most frequently allocated types by number of
+objects or number of bytes.
+
+This tool relies on USDT probes embedded in many high-level languages, such as
+Node, Java, Python, and Ruby. It requires a runtime instrumented with these
+probes, which in some cases requires building from source with a USDT-specific
+flag, such as "--enable-dtrace" or "--with-dtrace". For Java, the Java process
+must be started with the "-XX:+ExtendedDTraceProbes" flag.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-C TOP_COUNT
+Print the top object types sorted by number of instances.
+.TP
+\-S TOP_SIZE
+Print the top object types sorted by size.
+.TP
+\-v
+Print the resulting BPF program, for debugging purposes.
+.TP
+{java,ruby,c}
+The language to trace.
+.TP
+pid
+The process id to trace.
+.TP
+interval
+Wait this many seconds and then print the summary and exit. By default, wait
+for Ctrl+C to exit.
+.SH EXAMPLES
+.TP
+Trace object allocations in a Ruby process:
+#
+.B uobjnew ruby 148
+.TP
+Trace object allocations from "malloc" and print the top 10 by total size:
+#
+.B uobjnew -S 10 c 1788
+.SH FIELDS
+.TP
+TYPE
+The object type being allocated. For C (malloc), this is the block size.
+.TP
+ALLOCS
+The number of objects allocated.
+.TP
+BYTES
+The number of bytes allocated.
+.SH OVERHEAD
+Object allocation events are quite frequent, and therefore the overhead from
+running this tool can be considerable. Use with caution and make sure to
+test before using in a production environment. Nonetheless, even thousands of
+allocations per second will likely produce a reasonable overhead when
+investigating a problem.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _example.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Sasha Goldshtein
+.SH SEE ALSO
+ustat(8), ugc(8), memleak(8)
diff --git a/man/man8/ustat.8 b/man/man8/ustat.8
new file mode 100644
index 0000000..a55ee09
--- /dev/null
+++ b/man/man8/ustat.8
@@ -0,0 +1,116 @@
+.TH ustat 8 "2016-11-07" "USER COMMANDS"
+.SH NAME
+ustat \- Activity stats from high-level languages.
+.SH SYNOPSIS
+.B ustat [-l {java,python,ruby,node}] [-C] [-S {cload,excp,gc,method,objnew,thread}] [-r MAXROWS] [-d] [interval [count]]
+.SH DESCRIPTION
+This is "top" for high-level language events, such as garbage collections,
+exceptions, thread creations, object allocations, method calls, and more. The
+events are aggregated for each process and printed in a top-like table, which
+can be sorted by various fields.
+
+This uses in-kernel eBPF maps to store per process summaries for efficiency.
+
+This tool relies on USDT probes embedded in many high-level languages, such as
+Node, Java, Python, and Ruby. It requires a runtime instrumented with these
+probes, which in some cases requires building from source with a USDT-specific
+flag, such as "--enable-dtrace" or "--with-dtrace". For Java, some probes are
+not enabled by default, and can be turned on by running the Java process with
+the "-XX:+ExtendedDTraceProbes" flag.
+
+Newly-created processes will only be traced at the next interval. If you run
+this tool with a short interval (say, 1-5 seconds), this should be virtually
+unnoticeable. For longer intervals, you might miss processes that were started
+and terminated during the interval window.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-l {java,python,ruby,node}
+The language to trace. By default, all languages are traced.
+.TP
+\-C
+Do not clear the screen between updates.
+.TP
+\-S {cload,excp,gc,method,objnew,thread}
+Sort the output by the specified field.
+.TP
+\-r MAXROWS
+Do not print more than this number of rows.
+.TP
+\-d
+Print the resulting BPF program, for debugging purposes.
+.TP
+interval
+Interval between updates, seconds.
+.TP
+count
+Number of interval summaries.
+.SH EXAMPLES
+.TP
+Summarize activity in high-level languages, 1 second refresh:
+#
+.B ustat
+.TP
+Don't clear the screen, and top 8 rows only:
+#
+.B ustat -Cr 8
+.TP
+5 second summaries, 10 times only:
+#
+.B ustat 5 10
+.SH FIELDS
+.TP
+loadavg
+The contents of /proc/loadavg
+.TP
+PID
+Process ID.
+.TP
+CMDLINE
+Process command line (often the second and following arguments will give you a
+hint as to which application is being run.
+.TP
+METHOD/s
+Count of method invocations during interval.
+.TP
+GC/s
+Count of garbage collections during interval.
+.TP
+OBJNEW/s
+Count of objects allocated during interval.
+.TP
+CLOAD/s
+Count of classes loaded during interval.
+.TP
+EXC/s
+Count of exceptions thrown during interval.
+.TP
+THR/s
+Count of threads created during interval.
+.SH OVERHEAD
+When using this tool with high-frequency events, such as method calls, a very
+significant slow-down can be expected. However, many of the high-level
+languages covered by this tool already have a fairly high per-method invocation
+cost, especially when running in interpreted mode. For the lower-frequency
+events, such as garbage collections or thread creations, the overhead should
+not be significant. Specifically, when probing Java processes and not using the
+"-XX:+ExtendedDTraceProbes" flag, the most expensive probes are not emitted,
+and the overhead should be acceptable.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _example.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Sasha Goldshtein
+.SH SEE ALSO
+trace(8), argdist(8), tplist(8)
diff --git a/man/man8/uthreads.8 b/man/man8/uthreads.8
new file mode 100644
index 0000000..8d4d2bb
--- /dev/null
+++ b/man/man8/uthreads.8
@@ -0,0 +1,64 @@
+.TH uthreads 8 "2016-11-07" "USER COMMANDS"
+.SH NAME
+uthreads \- Trace thread creation events in Java or pthreads.
+.SH SYNOPSIS
+.B uthreads [-h] [-l {java}] [-v] pid
+.SH DESCRIPTION
+This traces thread creation events in Java processes, or pthread creation
+events in any process. When a thread is created, its name or start address
+is printed.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-l {java}
+The language to trace (currently only Java is supported). When no language is
+specified, only pthread creations are traced.
+.TP
+\-v
+Print the resulting BPF program, for debugging purposes.
+.TP
+pid
+The process id to trace.
+.SH EXAMPLES
+.TP
+Trace Java thread creations:
+#
+.B uthreads -l java 148
+.TP
+Trace pthread creations:
+#
+.B uthreads 1802
+.SH FIELDS
+.TP
+TIME
+The event's time in seconds from the beginning of the trace.
+.TP
+ID
+The thread's ID. The information in this column depends on the runtime.
+.TP
+TYPE
+Event type -- thread start, stop, or pthread event.
+.TP
+DESCRIPTION
+The thread's name or start address function name.
+.SH OVERHEAD
+Thread start and stop events are usually not very frequent, which makes this
+tool's overhead negligible.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _example.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Sasha Goldshtein
+.SH SEE ALSO
+ustat(8), trace(8)
diff --git a/src/cc/BPF.cc b/src/cc/BPF.cc
index 265b4df..4a7ca2c 100644
--- a/src/cc/BPF.cc
+++ b/src/cc/BPF.cc
@@ -30,6 +30,7 @@
#include "bpf_module.h"
#include "libbpf.h"
#include "perf_reader.h"
+#include "usdt.h"
#include "BPF.h"
@@ -50,13 +51,25 @@
}
StatusTuple BPF::init(const std::string& bpf_program,
- std::vector<std::string> cflags) {
+ std::vector<std::string> cflags, std::vector<USDT> usdt) {
+ std::string all_bpf_program;
+
+ for (auto u : usdt) {
+ if (!u.initialized_)
+ TRY2(u.init());
+ all_bpf_program += u.program_text_;
+ usdt_.push_back(std::move(u));
+ }
+
auto flags_len = cflags.size();
const char* flags[flags_len];
for (size_t i = 0; i < flags_len; i++)
flags[i] = cflags[i].c_str();
- if (bpf_module_->load_string(bpf_program, flags, flags_len) != 0)
+
+ all_bpf_program += bpf_program;
+ if (bpf_module_->load_string(all_bpf_program, flags, flags_len) != 0)
return StatusTuple(-1, "Unable to initialize BPF program");
+
return StatusTuple(0);
};
@@ -206,6 +219,37 @@
return StatusTuple(0);
}
+StatusTuple BPF::attach_usdt(const USDT& usdt, pid_t pid, int cpu,
+ int group_fd) {
+ for (auto& u : usdt_)
+ if (u == usdt) {
+ bool failed = false;
+ std::string err_msg;
+ int cnt = 0;
+ for (auto addr : u.addresses_) {
+ auto res =
+ attach_uprobe(u.binary_path_, std::string(), u.probe_func_, addr);
+ if (res.code() != 0) {
+ failed = true;
+ err_msg += "USDT " + u.print_name() + " at " + std::to_string(addr);
+ err_msg += ": " + res.msg() + "\n";
+ break;
+ }
+ cnt++;
+ }
+ if (failed) {
+ for (int i = 0; i < cnt; i++) {
+ auto res =
+ detach_uprobe(u.binary_path_, std::string(), u.addresses_[i]);
+ err_msg += "During clean up: " + res.msg() + "\n";
+ }
+ return StatusTuple(-1, err_msg);
+ } else
+ return StatusTuple(0);
+ }
+ return StatusTuple(-1, "USDT %s not found", usdt.print_name().c_str());
+}
+
StatusTuple BPF::attach_tracepoint(const std::string& tracepoint,
const std::string& probe_func,
pid_t pid, int cpu, int group_fd,
@@ -311,6 +355,27 @@
return StatusTuple(0);
}
+StatusTuple BPF::detach_usdt(const USDT& usdt) {
+ for (auto& u : usdt_)
+ if (u == usdt) {
+ bool failed = false;
+ std::string err_msg;
+ for (auto addr : u.addresses_) {
+ auto res = detach_uprobe(u.binary_path_, std::string(), addr);
+ if (res.code() != 0) {
+ failed = true;
+ err_msg += "USDT " + u.print_name() + " at " + std::to_string(addr);
+ err_msg += ": " + res.msg() + "\n";
+ }
+ }
+ if (failed)
+ return StatusTuple(-1, err_msg);
+ else
+ return StatusTuple(0);
+ }
+ return StatusTuple(-1, "USDT %s not found", usdt.print_name().c_str());
+}
+
StatusTuple BPF::detach_tracepoint(const std::string& tracepoint) {
auto it = tracepoints_.find(tracepoint);
if (it == tracepoints_.end())
@@ -383,7 +448,7 @@
StatusTuple BPF::unload_func(const std::string& func_name) {
auto it = funcs_.find(func_name);
if (it == funcs_.end())
- return StatusTuple(-1, "Probe function %s not loaded", func_name.c_str());
+ return StatusTuple(0);
int res = close(it->second);
if (res != 0)
@@ -478,4 +543,28 @@
return StatusTuple(0);
}
+StatusTuple USDT::init() {
+ auto ctx =
+ std::unique_ptr<::USDT::Context>(new ::USDT::Context(binary_path_));
+ if (!ctx->loaded())
+ return StatusTuple(-1, "Unable to load USDT " + print_name());
+ auto probe = ctx->get(name_);
+ if (probe == nullptr)
+ return StatusTuple(-1, "Unable to find USDT " + print_name());
+
+ if (!probe->enable(probe_func_))
+ return StatusTuple(-1, "Failed to enable USDT " + print_name());
+ std::ostringstream stream;
+ if (!probe->usdt_getarg(stream))
+ return StatusTuple(
+ -1, "Unable to generate program text for USDT " + print_name());
+ program_text_ = ::USDT::USDT_PROGRAM_HEADER + stream.str();
+
+ for (size_t i = 0; i < probe->num_locations(); i++)
+ addresses_.push_back(probe->address(i));
+
+ initialized_ = true;
+ return StatusTuple(0);
+}
+
} // namespace ebpf
diff --git a/src/cc/BPF.h b/src/cc/BPF.h
index 420dd6b..b96c66f 100644
--- a/src/cc/BPF.h
+++ b/src/cc/BPF.h
@@ -40,13 +40,16 @@
std::map<int, int>* per_cpu_fd;
};
+class USDT;
+
class BPF {
public:
static const int BPF_MAX_STACK_DEPTH = 127;
explicit BPF(unsigned int flag = 0) : bpf_module_(new BPFModule(flag)) {}
StatusTuple init(const std::string& bpf_program,
- std::vector<std::string> cflags = {});
+ std::vector<std::string> cflags = {},
+ std::vector<USDT> usdt = {});
~BPF();
StatusTuple detach_all();
@@ -70,6 +73,9 @@
const std::string& binary_path, const std::string& symbol,
uint64_t symbol_addr = 0,
bpf_attach_type attach_type = bpf_attach_type::probe_entry);
+ StatusTuple attach_usdt(const USDT& usdt, pid_t pid = -1, int cpu = 0,
+ int group_fd = -1);
+ StatusTuple detach_usdt(const USDT& usdt);
StatusTuple attach_tracepoint(const std::string& tracepoint,
const std::string& probe_func,
@@ -151,6 +157,8 @@
std::map<std::string, int> funcs_;
+ std::vector<USDT> usdt_;
+
std::map<std::string, open_probe_t> kprobes_;
std::map<std::string, open_probe_t> uprobes_;
std::map<std::string, open_probe_t> tracepoints_;
@@ -158,4 +166,40 @@
std::map<std::pair<uint32_t, uint32_t>, open_probe_t> perf_events_;
};
+class USDT {
+public:
+ USDT(const std::string& binary_path, const std::string& provider,
+ const std::string& name, const std::string& probe_func)
+ : initialized_(false),
+ binary_path_(binary_path),
+ provider_(provider),
+ name_(name),
+ probe_func_(probe_func) {}
+
+ bool operator==(const USDT& other) const {
+ return (provider_ == other.provider_) && (name_ == other.name_) &&
+ (binary_path_ == other.binary_path_) &&
+ (probe_func_ == other.probe_func_);
+ }
+
+ std::string print_name() const {
+ return provider_ + ":" + name_ + " from " + binary_path_;
+ }
+
+private:
+ StatusTuple init();
+ bool initialized_;
+
+ std::string binary_path_;
+ std::string provider_;
+ std::string name_;
+ std::string probe_func_;
+
+ std::vector<intptr_t> addresses_;
+
+ std::string program_text_;
+
+ friend class BPF;
+};
+
} // namespace ebpf
diff --git a/src/cc/CMakeLists.txt b/src/cc/CMakeLists.txt
index febcee2..fed6d3a 100644
--- a/src/cc/CMakeLists.txt
+++ b/src/cc/CMakeLists.txt
@@ -67,7 +67,7 @@
install(TARGETS bcc-shared LIBRARY COMPONENT libbcc
DESTINATION ${CMAKE_INSTALL_LIBDIR})
-install(FILES bpf_common.h bpf_module.h bcc_syms.h bcc_exception.h libbpf.h perf_reader.h BPF.h BPFTable.h COMPONENT libbcc
+install(FILES bpf_common.h bpf_module.h bcc_syms.h bcc_exception.h libbpf.h perf_reader.h BPF.h BPFTable.h shared_table.h COMPONENT libbcc
DESTINATION include/bcc)
install(DIRECTORY compat/linux/ COMPONENT libbcc
DESTINATION include/bcc/compat/linux
diff --git a/src/cc/bpf_module.cc b/src/cc/bpf_module.cc
index be0a524..ee39c00 100644
--- a/src/cc/bpf_module.cc
+++ b/src/cc/bpf_module.cc
@@ -118,10 +118,11 @@
ctx_.reset();
if (tables_) {
for (auto table : *tables_) {
- if (table.is_shared)
+ if (table.is_shared) {
SharedTables::instance()->remove_fd(table.name);
- else
+ } else if (!table.is_extern) {
close(table.fd);
+ }
}
}
}
diff --git a/src/cc/frontends/clang/b_frontend_action.cc b/src/cc/frontends/clang/b_frontend_action.cc
index 9370386..397ecc6 100644
--- a/src/cc/frontends/clang/b_frontend_action.cc
+++ b/src/cc/frontends/clang/b_frontend_action.cc
@@ -336,6 +336,12 @@
// to:
// bpf_table_foo_elem(bpf_pseudo_fd(table), &key [,&leaf])
bool BTypeVisitor::VisitCallExpr(CallExpr *Call) {
+ // Get rewritten text given a source range, w/ expansion range applied
+ auto getRewrittenText = [this] (SourceRange R) {
+ auto r = rewriter_.getSourceMgr().getExpansionRange(R);
+ return rewriter_.getRewrittenText(r);
+ };
+
// make sure node is a reference to a bpf table, which is assured by the
// presence of the section("maps/<typename>") GNU __attribute__
if (MemberExpr *Memb = dyn_cast<MemberExpr>(Call->getCallee()->IgnoreImplicit())) {
@@ -345,9 +351,8 @@
if (!A->getName().startswith("maps"))
return true;
- SourceRange argRange(Call->getArg(0)->getLocStart(),
- Call->getArg(Call->getNumArgs()-1)->getLocEnd());
- string args = rewriter_.getRewrittenText(argRange);
+ string args = getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
+ Call->getArg(Call->getNumArgs() - 1)->getLocEnd()));
// find the table fd, which was opened at declaration time
auto table_it = tables_.begin();
@@ -366,10 +371,8 @@
if (memb_name == "lookup_or_init") {
map_update_policy = "BPF_NOEXIST";
string name = Ref->getDecl()->getName();
- string arg0 = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
- Call->getArg(0)->getLocEnd()));
- string arg1 = rewriter_.getRewrittenText(SourceRange(Call->getArg(1)->getLocStart(),
- Call->getArg(1)->getLocEnd()));
+ string arg0 = getRewrittenText(Call->getArg(0)->getSourceRange());
+ string arg1 = getRewrittenText(Call->getArg(1)->getSourceRange());
string lookup = "bpf_map_lookup_elem_(bpf_pseudo_fd(1, " + fd + ")";
string update = "bpf_map_update_elem_(bpf_pseudo_fd(1, " + fd + ")";
txt = "({typeof(" + name + ".leaf) *leaf = " + lookup + ", " + arg0 + "); ";
@@ -381,8 +384,7 @@
txt += "leaf;})";
} else if (memb_name == "increment") {
string name = Ref->getDecl()->getName();
- string arg0 = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
- Call->getArg(0)->getLocEnd()));
+ string arg0 = getRewrittenText(Call->getArg(0)->getSourceRange());
string lookup = "bpf_map_lookup_elem_(bpf_pseudo_fd(1, " + fd + ")";
string update = "bpf_map_update_elem_(bpf_pseudo_fd(1, " + fd + ")";
txt = "({ typeof(" + name + ".key) _key = " + arg0 + "; ";
@@ -394,21 +396,16 @@
txt += "if (_leaf) (*_leaf)++; })";
} else if (memb_name == "perf_submit") {
string name = Ref->getDecl()->getName();
- string arg0 = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
- Call->getArg(0)->getLocEnd()));
- string args_other = rewriter_.getRewrittenText(SourceRange(Call->getArg(1)->getLocStart(),
- Call->getArg(2)->getLocEnd()));
+ string arg0 = getRewrittenText(Call->getArg(0)->getSourceRange());
+ string args_other = getRewrittenText(SourceRange(Call->getArg(1)->getLocStart(),
+ Call->getArg(2)->getLocEnd()));
txt = "bpf_perf_event_output(" + arg0 + ", bpf_pseudo_fd(1, " + fd + ")";
txt += ", bpf_get_smp_processor_id(), " + args_other + ")";
} else if (memb_name == "perf_submit_skb") {
- string skb = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
- Call->getArg(0)->getLocEnd()));
- string skb_len = rewriter_.getRewrittenText(SourceRange(Call->getArg(1)->getLocStart(),
- Call->getArg(1)->getLocEnd()));
- string meta = rewriter_.getRewrittenText(SourceRange(Call->getArg(2)->getLocStart(),
- Call->getArg(2)->getLocEnd()));
- string meta_len = rewriter_.getRewrittenText(SourceRange(Call->getArg(3)->getLocStart(),
- Call->getArg(3)->getLocEnd()));
+ string skb = getRewrittenText(Call->getArg(0)->getSourceRange());
+ string skb_len = getRewrittenText(Call->getArg(1)->getSourceRange());
+ string meta = getRewrittenText(Call->getArg(2)->getSourceRange());
+ string meta_len = getRewrittenText(Call->getArg(3)->getSourceRange());
txt = "bpf_perf_event_output(" +
skb + ", " +
"bpf_pseudo_fd(1, " + fd + "), " +
@@ -417,8 +414,7 @@
meta_len + ");";
} else if (memb_name == "get_stackid") {
if (table_it->type == BPF_MAP_TYPE_STACK_TRACE) {
- string arg0 = rewriter_.getRewrittenText(SourceRange(Call->getArg(0)->getLocStart(),
- Call->getArg(0)->getLocEnd()));
+ string arg0 = getRewrittenText(Call->getArg(0)->getSourceRange());
txt = "bpf_get_stackid(";
txt += "bpf_pseudo_fd(1, " + fd + "), " + arg0;
rewrite_end = Call->getArg(0)->getLocEnd();
@@ -474,7 +470,7 @@
vector<string> args;
for (auto arg : Call->arguments())
- args.push_back(rewriter_.getRewrittenText(SourceRange(arg->getLocStart(), arg->getLocEnd())));
+ args.push_back(getRewrittenText(arg->getSourceRange()));
string text;
if (Decl->getName() == "incr_cksum_l3") {
@@ -635,7 +631,6 @@
++i;
}
- bool is_extern = false;
bpf_map_type map_type = BPF_MAP_TYPE_UNSPEC;
if (A->getName() == "maps/hash") {
map_type = BPF_MAP_TYPE_HASH;
@@ -670,8 +665,9 @@
} else if (A->getName() == "maps/stacktrace") {
map_type = BPF_MAP_TYPE_STACK_TRACE;
} else if (A->getName() == "maps/extern") {
- is_extern = true;
+ table.is_extern = true;
table.fd = SharedTables::instance()->lookup_fd(table.name);
+ table.type = SharedTables::instance()->lookup_type(table.name);
} else if (A->getName() == "maps/export") {
if (table.name.substr(0, 2) == "__")
table.name = table.name.substr(2);
@@ -682,7 +678,7 @@
error(Decl->getLocStart(), "reference to undefined table");
return false;
}
- if (!SharedTables::instance()->insert_fd(table.name, table_it->fd)) {
+ if (!SharedTables::instance()->insert_fd(table.name, table_it->fd, table_it->type)) {
error(Decl->getLocStart(), "could not export bpf map %0: %1") << table.name << "already in use";
return false;
}
@@ -690,7 +686,7 @@
return true;
}
- if (!is_extern) {
+ if (!table.is_extern) {
if (map_type == BPF_MAP_TYPE_UNSPEC) {
error(Decl->getLocStart(), "unsupported map type: %0") << A->getName();
return false;
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
index 24967b9..4d34c62 100644
--- a/src/cc/libbpf.c
+++ b/src/cc/libbpf.c
@@ -631,3 +631,22 @@
// callers to detach anything they attach.
return 0;
}
+
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ .bpf_fd = fd,
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr = {
+ .pathname = ptr_to_u64((void *)pathname),
+ };
+
+ return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
+}
diff --git a/src/cc/libbpf.h b/src/cc/libbpf.h
index cc4e0f3..b4499ec 100644
--- a/src/cc/libbpf.h
+++ b/src/cc/libbpf.h
@@ -71,6 +71,9 @@
pid_t pid, int cpu, int group_fd);
int bpf_detach_perf_event(uint32_t ev_type, uint32_t ev_config);
+int bpf_obj_pin(int fd, const char *pathname);
+int bpf_obj_get(const char *pathname);
+
#define LOG_BUF_SIZE 65536
// Put non-static/inline functions in their own section with this prefix +
diff --git a/src/cc/shared_table.cc b/src/cc/shared_table.cc
index c27f768..f389fad 100644
--- a/src/cc/shared_table.cc
+++ b/src/cc/shared_table.cc
@@ -17,6 +17,7 @@
#include <unistd.h>
#include "shared_table.h"
+#include "compat/linux/bpf.h"
namespace ebpf {
@@ -35,13 +36,20 @@
auto table = tables_.find(name);
if (table == tables_.end())
return -1;
- return table->second;
+ return table->second.first;
}
-bool SharedTables::insert_fd(const string &name, int fd) {
+int SharedTables::lookup_type(const string &name) const {
+ auto table = tables_.find(name);
+ if (table == tables_.end())
+ return BPF_MAP_TYPE_UNSPEC;
+ return table->second.second;
+}
+
+bool SharedTables::insert_fd(const string &name, int fd, int type) {
if (tables_.find(name) != tables_.end())
return false;
- tables_[name] = fd;
+ tables_[name] = std::make_pair(fd, type);
return true;
}
@@ -49,7 +57,7 @@
auto table = tables_.find(name);
if (table == tables_.end())
return false;
- close(table->second);
+ close(table->second.first);
tables_.erase(table);
return true;
}
diff --git a/src/cc/shared_table.h b/src/cc/shared_table.h
index 051dfbd..7b92914 100644
--- a/src/cc/shared_table.h
+++ b/src/cc/shared_table.h
@@ -27,14 +27,16 @@
public:
static SharedTables * instance();
// add an fd to the shared table, return true if successfully inserted
- bool insert_fd(const std::string &name, int fd);
+ bool insert_fd(const std::string &name, int fd, int type);
// lookup an fd in the shared table, or -1 if not found
int lookup_fd(const std::string &name) const;
+ // lookup on map type in the shared table, or BPF_MAP_TYPE_UNSPEC if not found
+ int lookup_type(const std::string &name) const;
// close and remove a shared fd. return true if the value was found
bool remove_fd(const std::string &name);
private:
static SharedTables *instance_;
- std::map<std::string, int> tables_;
+ std::map<std::string, std::pair<int, int>> tables_;
};
}
diff --git a/src/cc/table_desc.h b/src/cc/table_desc.h
index a5196e2..d299f5d 100644
--- a/src/cc/table_desc.h
+++ b/src/cc/table_desc.h
@@ -40,6 +40,7 @@
llvm::Function *key_snprintf;
llvm::Function *leaf_snprintf;
bool is_shared;
+ bool is_extern;
};
} // namespace ebpf
diff --git a/src/cc/usdt.cc b/src/cc/usdt.cc
index 0bbc9dc..4f1b00a 100644
--- a/src/cc/usdt.cc
+++ b/src/cc/usdt.cc
@@ -239,7 +239,7 @@
}
bool Context::generate_usdt_args(std::ostream &stream) {
- stream << "#include <uapi/linux/ptrace.h>\n";
+ stream << USDT_PROGRAM_HEADER;
for (auto &p : probes_) {
if (p->enabled() && !p->usdt_getarg(stream))
return false;
diff --git a/src/cc/usdt.h b/src/cc/usdt.h
index bdf9412..49251f6 100644
--- a/src/cc/usdt.h
+++ b/src/cc/usdt.h
@@ -31,6 +31,9 @@
using std::experimental::nullopt;
class ArgumentParser;
+static const std::string USDT_PROGRAM_HEADER =
+ "#include <uapi/linux/ptrace.h>\n";
+
class Argument {
private:
optional<int> arg_size_;
diff --git a/src/python/bcc/__init__.py b/src/python/bcc/__init__.py
index d95dc77..347f491 100644
--- a/src/python/bcc/__init__.py
+++ b/src/python/bcc/__init__.py
@@ -221,7 +221,7 @@
"possible cause is missing pid when a " +
"probe in a shared object has multiple " +
"locations")
- text = usdt_context.get_text() + text
+ text = usdt_text + text
if text:
self.module = lib.bpf_module_create_c_from_string(text.encode("ascii"),
@@ -1058,5 +1058,11 @@
lib.bpf_module_destroy(self.module)
self.module = None
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ self.cleanup()
+
from .usdt import USDT
diff --git a/src/python/bcc/usdt.py b/src/python/bcc/usdt.py
index adcd3d7..19a3a98 100644
--- a/src/python/bcc/usdt.py
+++ b/src/python/bcc/usdt.py
@@ -145,13 +145,15 @@
# This is called by the BPF module's __init__ when it realizes that there
# is a USDT context and probes need to be attached.
def attach_uprobes(self, bpf):
+ probes = self.enumerate_active_probes()
+ for (binpath, fn_name, addr, pid) in probes:
+ bpf.attach_uprobe(name=binpath, fn_name=fn_name,
+ addr=addr, pid=pid)
+
+ def enumerate_active_probes(self):
probes = []
def _add_probe(binpath, fn_name, addr, pid):
probes.append((binpath, fn_name, addr, pid))
lib.bcc_usdt_foreach_uprobe(self.context, _USDT_PROBE_CB(_add_probe))
-
- for (binpath, fn_name, addr, pid) in probes:
- bpf.attach_uprobe(name=binpath, fn_name=fn_name,
- addr=addr, pid=pid)
-
+ return probes
diff --git a/tests/python/test_clang.py b/tests/python/test_clang.py
index 2d6e5bf..4725a84 100755
--- a/tests/python/test_clang.py
+++ b/tests/python/test_clang.py
@@ -352,5 +352,45 @@
with self.assertRaises(Exception):
b = BPF(text=text)
+ def test_call_macro_arg(self):
+ text = """
+BPF_TABLE("prog", u32, u32, jmp, 32);
+
+#define JMP_IDX_PIPE (1U << 1)
+
+enum action {
+ ACTION_PASS
+};
+
+int process(struct xdp_md *ctx) {
+ jmp.call((void *)ctx, ACTION_PASS);
+ jmp.call((void *)ctx, JMP_IDX_PIPE);
+ return XDP_PASS;
+}
+ """
+ b = BPF(text=text)
+ t = b["jmp"]
+ self.assertEquals(len(t), 32);
+
+ def test_update_macro_arg(self):
+ text = """
+BPF_TABLE("array", u32, u32, act, 32);
+
+#define JMP_IDX_PIPE (1U << 1)
+
+enum action {
+ ACTION_PASS
+};
+
+int process(struct xdp_md *ctx) {
+ act.increment(ACTION_PASS);
+ act.increment(JMP_IDX_PIPE);
+ return XDP_PASS;
+}
+ """
+ b = BPF(text=text)
+ t = b["act"]
+ self.assertEquals(len(t), 32);
+
if __name__ == "__main__":
main()
diff --git a/tests/python/test_shared_table.py b/tests/python/test_shared_table.py
new file mode 100644
index 0000000..10dd63f
--- /dev/null
+++ b/tests/python/test_shared_table.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# Copyright (c) 2016 Facebook, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+
+import ctypes as ct
+import unittest
+from bcc import BPF
+
+class TestSharedTable(unittest.TestCase):
+ def test_close_extern(self):
+ b1 = BPF(text="""BPF_TABLE_PUBLIC("array", int, int, table1, 10);""")
+
+ with BPF(text="""BPF_TABLE("extern", int, int, table1, 10);""") as b2:
+ t2 = b2["table1"]
+ t2[ct.c_int(1)] = ct.c_int(10)
+ self.assertEqual(len(t2), 10)
+
+ t1 = b1["table1"]
+ self.assertEqual(t1[ct.c_int(1)].value, 10)
+ self.assertEqual(len(t1), 10)
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tools/trace.py b/tools/trace.py
index ba93998..cd211fd 100755
--- a/tools/trace.py
+++ b/tools/trace.py
@@ -3,8 +3,8 @@
# trace Trace a function and print a trace message based on its
# parameters, with an optional filter.
#
-# usage: trace [-h] [-p PID] [-t TID] [-v] [-Z STRING_SIZE] [-S]
-# [-M MAX_EVENTS] [-o] [-K] [-U] [-I header]
+# usage: trace [-h] [-p PID] [-L TID] [-v] [-Z STRING_SIZE] [-S]
+# [-M MAX_EVENTS] [-T] [-t] [-K] [-U] [-I header]
# probe [probe ...]
#
# Licensed under the Apache License, Version 2.0 (the "License")
@@ -58,7 +58,8 @@
@classmethod
def configure(cls, args):
cls.max_events = args.max_events
- cls.use_localtime = not args.offset
+ cls.print_time = args.timestamp or args.time
+ cls.use_localtime = not args.timestamp
cls.first_ts = Time.monotonic_time()
cls.tgid = args.tgid or -1
cls.pid = args.pid or -1
@@ -485,11 +486,16 @@
values = map(lambda i: getattr(event, "v%d" % i),
range(0, len(self.values)))
msg = self._format_message(bpf, event.tgid, values)
- time = strftime("%H:%M:%S") if Probe.use_localtime else \
- Probe._time_off_str(event.timestamp_ns)
- print("%-8s %-6d %-6d %-12s %-16s %s" %
- (time[:8], event.tgid, event.pid, event.comm,
- self._display_function(), msg))
+ if not Probe.print_time:
+ print("%-6d %-6d %-12s %-16s %s" %
+ (event.tgid, event.pid, event.comm,
+ self._display_function(), msg))
+ else:
+ time = strftime("%H:%M:%S") if Probe.use_localtime else \
+ Probe._time_off_str(event.timestamp_ns)
+ print("%-8s %-6d %-6d %-12s %-16s %s" %
+ (time[:8], event.tgid, event.pid, event.comm,
+ self._display_function(), msg))
if self.kernel_stack:
self.print_stack(bpf, event.kernel_stack_id, -1)
@@ -579,7 +585,7 @@
# their kernel names -- tgid and pid -- inside the script
parser.add_argument("-p", "--pid", type=int, metavar="PID",
dest="tgid", help="id of the process to trace (optional)")
- parser.add_argument("-t", "--tid", type=int, metavar="TID",
+ parser.add_argument("-L", "--tid", type=int, metavar="TID",
dest="pid", help="id of the thread to trace (optional)")
parser.add_argument("-v", "--verbose", action="store_true",
help="print resulting BPF program code before executing")
@@ -590,8 +596,10 @@
help="do not filter trace's own pid from the trace")
parser.add_argument("-M", "--max-events", type=int,
help="number of events to print before quitting")
- parser.add_argument("-o", "--offset", action="store_true",
- help="use relative time from first traced message")
+ parser.add_argument("-t", "--timestamp", action="store_true",
+ help="print timestamp column (offset from trace start)")
+ parser.add_argument("-T", "--time", action="store_true",
+ help="print time column")
parser.add_argument("-K", "--kernel-stack",
action="store_true", help="output kernel stack trace")
parser.add_argument("-U", "--user-stack",
@@ -653,9 +661,14 @@
self.probes))
# Print header
- print("%-8s %-6s %-6s %-12s %-16s %s" %
- ("TIME", "PID", "TID", "COMM", "FUNC",
- "-" if not all_probes_trivial else ""))
+ if self.args.timestamp or self.args.time:
+ print("%-8s %-6s %-6s %-12s %-16s %s" %
+ ("TIME", "PID", "TID", "COMM", "FUNC",
+ "-" if not all_probes_trivial else ""))
+ else:
+ print("%-6s %-6s %-12s %-16s %s" %
+ ("PID", "TID", "COMM", "FUNC",
+ "-" if not all_probes_trivial else ""))
while True:
self.bpf.kprobe_poll()
diff --git a/tools/trace_example.txt b/tools/trace_example.txt
index 08b9061..46dc843 100644
--- a/tools/trace_example.txt
+++ b/tools/trace_example.txt
@@ -9,20 +9,20 @@
system:
# trace 'sys_execve "%s", arg1'
-TIME PID COMM FUNC -
-05:11:51 4402 bash sys_execve /usr/bin/man
-05:11:51 4411 man sys_execve /usr/local/bin/less
-05:11:51 4411 man sys_execve /usr/bin/less
-05:11:51 4410 man sys_execve /usr/local/bin/nroff
-05:11:51 4410 man sys_execve /usr/bin/nroff
-05:11:51 4409 man sys_execve /usr/local/bin/tbl
-05:11:51 4409 man sys_execve /usr/bin/tbl
-05:11:51 4408 man sys_execve /usr/local/bin/preconv
-05:11:51 4408 man sys_execve /usr/bin/preconv
-05:11:51 4415 nroff sys_execve /usr/bin/locale
-05:11:51 4416 nroff sys_execve /usr/bin/groff
-05:11:51 4418 groff sys_execve /usr/bin/grotty
-05:11:51 4417 groff sys_execve /usr/bin/troff
+PID COMM FUNC -
+4402 bash sys_execve /usr/bin/man
+4411 man sys_execve /usr/local/bin/less
+4411 man sys_execve /usr/bin/less
+4410 man sys_execve /usr/local/bin/nroff
+4410 man sys_execve /usr/bin/nroff
+4409 man sys_execve /usr/local/bin/tbl
+4409 man sys_execve /usr/bin/tbl
+4408 man sys_execve /usr/local/bin/preconv
+4408 man sys_execve /usr/bin/preconv
+4415 nroff sys_execve /usr/bin/locale
+4416 nroff sys_execve /usr/bin/groff
+4418 groff sys_execve /usr/bin/grotty
+4417 groff sys_execve /usr/bin/troff
^C
The ::sys_execve syntax specifies that you want an entry probe (which is the
@@ -38,11 +38,11 @@
bytes to be read:
# trace 'sys_read (arg3 > 20000) "read %d bytes", arg3'
-TIME PID COMM FUNC -
-05:18:23 4490 dd sys_read read 1048576 bytes
-05:18:23 4490 dd sys_read read 1048576 bytes
-05:18:23 4490 dd sys_read read 1048576 bytes
-05:18:23 4490 dd sys_read read 1048576 bytes
+PID COMM FUNC -
+4490 dd sys_read read 1048576 bytes
+4490 dd sys_read read 1048576 bytes
+4490 dd sys_read read 1048576 bytes
+4490 dd sys_read read 1048576 bytes
^C
During the trace, I executed "dd if=/dev/zero of=/dev/null bs=1M count=4".
@@ -55,9 +55,9 @@
value, effectively snooping all bash shell input across the system:
# trace 'r:bash:readline "%s", retval'
-TIME PID COMM FUNC -
-05:24:50 2740 bash readline echo hi!
-05:24:53 2740 bash readline man ls
+PID COMM FUNC -
+2740 bash readline echo hi!
+2740 bash readline man ls
^C
The special retval keywords stands for the function's return value, and can
@@ -67,10 +67,10 @@
can specify the full path to the executable (e.g. "/usr/bin/bash").
Multiple probes can be combined on the same command line. For example, let's
-trace failed read and write calls on the libc level:
+trace failed read and write calls on the libc level, and include a time column:
# trace 'r:c:read ((int)retval < 0) "read failed: %d", retval' \
- 'r:c:write ((int)retval < 0) "write failed: %d", retval'
+ 'r:c:write ((int)retval < 0) "write failed: %d", retval' -T
TIME PID COMM FUNC -
05:31:57 3388 bash write write failed: -1
05:32:00 3388 bash write write failed: -1
@@ -84,7 +84,7 @@
trace the block:block_rq_complete tracepoint and print out the number of sectors
transferred:
-# trace 't:block:block_rq_complete "sectors=%d", args->nr_sector'
+# trace 't:block:block_rq_complete "sectors=%d", args->nr_sector' -T
TIME PID COMM FUNC -
01:23:51 0 swapper/0 block_rq_complete sectors=8
01:23:55 10017 kworker/u64: block_rq_complete sectors=1
@@ -110,7 +110,7 @@
These probes can be traced by trace just like kernel tracepoints. For example,
trace new threads being created and their function name:
-# trace 'u:pthread:pthread_create "%U", arg3'
+# trace 'u:pthread:pthread_create "%U", arg3' -T
TIME PID COMM FUNC -
02:07:29 4051 contentions pthread_create primes_thread+0x0
02:07:29 4051 contentions pthread_create primes_thread+0x0
@@ -125,7 +125,7 @@
trace Ruby methods being called (this requires a version of Ruby built with
the --enable-dtrace configure flag):
-# trace 'u:ruby:method__entry "%s.%s", arg1, arg2' -p $(pidof irb)
+# trace 'u:ruby:method__entry "%s.%s", arg1, arg2' -p $(pidof irb) -T
TIME PID COMM FUNC -
12:08:43 18420 irb method__entry IRB::Context.verbose?
12:08:43 18420 irb method__entry RubyLex.ungetc
@@ -139,7 +139,7 @@
Occasionally, it can be useful to filter specific strings. For example, you
might be interested in open() calls that open a specific file:
-# trace 'p:c:open (STRCMP("test.txt", arg1)) "opening %s", arg1'
+# trace 'p:c:open (STRCMP("test.txt", arg1)) "opening %s", arg1' -T
TIME PID COMM FUNC -
01:43:15 10938 cat open opening test.txt
01:43:20 10939 cat open opening test.txt
@@ -149,7 +149,7 @@
As a final example, let's trace open syscalls for a specific process. By
default, tracing is system-wide, but the -p switch overrides this:
-# trace -p 2740 'do_sys_open "%s", arg2'
+# trace -p 2740 'do_sys_open "%s", arg2' -T
TIME PID COMM FUNC -
05:36:16 15872 ls do_sys_open /etc/ld.so.cache
05:36:16 15872 ls do_sys_open /lib64/libselinux.so.1
@@ -171,8 +171,8 @@
USAGE message:
# trace -h
-usage: trace [-h] [-p PID] [-t TID] [-v] [-Z STRING_SIZE] [-S]
- [-M MAX_EVENTS] [-o] [-K] [-U] [-I header]
+usage: trace [-h] [-p PID] [-L TID] [-v] [-Z STRING_SIZE] [-S]
+ [-M MAX_EVENTS] [-t] [-T] [-K] [-U] [-I header]
probe [probe ...]
Attach to functions and print trace messages.
@@ -183,14 +183,15 @@
optional arguments:
-h, --help show this help message and exit
-p PID, --pid PID id of the process to trace (optional)
- -t TID, --tid TID id of the thread to trace (optional)
+ -L TID, --tid TID id of the thread to trace (optional)
-v, --verbose print resulting BPF program code before executing
-Z STRING_SIZE, --string-size STRING_SIZE
maximum size to read from strings
-S, --include-self do not filter trace's own pid from the trace
-M MAX_EVENTS, --max-events MAX_EVENTS
number of events to print before quitting
- -o, --offset use relative time from first traced message
+ -t, --timestamp print timestamp column (offset from trace start)
+ -T, --time print time column
-K, --kernel-stack output kernel stack trace
-U, --user-stack output user stack trace
-I header, --include header
diff --git a/tools/ucalls.py b/tools/ucalls.py
new file mode 100755
index 0000000..ed476cd
--- /dev/null
+++ b/tools/ucalls.py
@@ -0,0 +1,300 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# ucalls Summarize method calls in high-level languages and/or system calls.
+# For Linux, uses BCC, eBPF.
+#
+# USAGE: ucalls [-l {java,python,ruby}] [-h] [-T TOP] [-L] [-S] [-v] [-m]
+# pid [interval]
+#
+# Copyright 2016 Sasha Goldshtein
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 19-Oct-2016 Sasha Goldshtein Created this.
+
+from __future__ import print_function
+import argparse
+from bcc import BPF, USDT
+from time import sleep
+
+examples = """examples:
+ ./ucalls -l java 185 # trace Java calls and print statistics on ^C
+ ./ucalls -l python 2020 1 # trace Python calls and print every second
+ ./ucalls -l java 185 -S # trace Java calls and syscalls
+ ./ucalls 6712 -S # trace only syscall counts
+ ./ucalls -l ruby 1344 -T 10 # trace top 10 Ruby method calls
+ ./ucalls -l ruby 1344 -L # trace Ruby calls including latency
+ ./ucalls -l ruby 1344 -LS # trace Ruby calls and syscalls with latency
+ ./ucalls -l python 2020 -mL # trace Python calls including latency in ms
+"""
+parser = argparse.ArgumentParser(
+ description="Summarize method calls in high-level languages.",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("pid", type=int, help="process id to attach to")
+parser.add_argument("interval", type=int, nargs='?',
+ help="print every specified number of seconds")
+parser.add_argument("-l", "--language", choices=["java", "python", "ruby"],
+ help="language to trace (if none, trace syscalls only)")
+parser.add_argument("-T", "--top", type=int,
+ help="number of most frequent/slow calls to print")
+parser.add_argument("-L", "--latency", action="store_true",
+ help="record method latency from enter to exit (except recursive calls)")
+parser.add_argument("-S", "--syscalls", action="store_true",
+ help="record syscall latency (adds overhead)")
+parser.add_argument("-v", "--verbose", action="store_true",
+ help="verbose mode: print the BPF program (for debugging purposes)")
+parser.add_argument("-m", "--milliseconds", action="store_true",
+ help="report times in milliseconds (default is microseconds)")
+args = parser.parse_args()
+
+# We assume that the entry and return probes have the same arguments. This is
+# the case for Java, Python, and Ruby. If there's a language where it's not the
+# case, we will need to build a custom correlator from entry to exit.
+if args.language == "java":
+ # TODO for JVM entries, we actually have the real length of the class
+ # and method strings in arg3 and arg5 respectively, so we can insert
+ # the null terminator in its proper position.
+ entry_probe = "method__entry"
+ return_probe = "method__return"
+ read_class = "bpf_usdt_readarg(2, ctx, &clazz);"
+ read_method = "bpf_usdt_readarg(4, ctx, &method);"
+elif args.language == "python":
+ entry_probe = "function__entry"
+ return_probe = "function__return"
+ read_class = "bpf_usdt_readarg(1, ctx, &clazz);" # filename really
+ read_method = "bpf_usdt_readarg(2, ctx, &method);"
+elif args.language == "ruby":
+ # TODO Also probe cmethod__entry and cmethod__return with same arguments
+ entry_probe = "method__entry"
+ return_probe = "method__return"
+ read_class = "bpf_usdt_readarg(1, ctx, &clazz);"
+ read_method = "bpf_usdt_readarg(2, ctx, &method);"
+elif not args.language:
+ if not args.syscalls:
+ print("Nothing to do; use -S to trace syscalls.")
+ exit(1)
+ entry_probe, return_probe, read_class, read_method = ("", "", "", "")
+
+program = """
+#include <linux/ptrace.h>
+
+#define MAX_STRING_LENGTH 80
+DEFINE_NOLANG
+DEFINE_LATENCY
+DEFINE_SYSCALLS
+
+struct method_t {
+ char clazz[MAX_STRING_LENGTH];
+ char method[MAX_STRING_LENGTH];
+};
+struct entry_t {
+ u64 pid;
+ struct method_t method;
+};
+struct info_t {
+ u64 num_calls;
+ u64 total_ns;
+};
+struct syscall_entry_t {
+ u64 timestamp;
+ u64 ip;
+};
+
+#ifndef LATENCY
+ BPF_HASH(counts, struct method_t, u64); // number of calls
+ #ifdef SYSCALLS
+ BPF_HASH(syscounts, u64, u64); // number of calls per IP
+ #endif // SYSCALLS
+#else
+ BPF_HASH(times, struct method_t, struct info_t);
+ BPF_HASH(entry, struct entry_t, u64); // timestamp at entry
+ #ifdef SYSCALLS
+ BPF_HASH(systimes, u64, struct info_t); // latency per IP
+ BPF_HASH(sysentry, u64, struct syscall_entry_t); // ts + IP at entry
+ #endif // SYSCALLS
+#endif
+
+#ifndef NOLANG
+int trace_entry(struct pt_regs *ctx) {
+ u64 clazz = 0, method = 0, val = 0;
+ u64 *valp;
+ struct entry_t data = {0};
+#ifdef LATENCY
+ u64 timestamp = bpf_ktime_get_ns();
+ data.pid = bpf_get_current_pid_tgid();
+#endif
+ READ_CLASS
+ READ_METHOD
+ bpf_probe_read(&data.method.clazz, sizeof(data.method.clazz),
+ (void *)clazz);
+ bpf_probe_read(&data.method.method, sizeof(data.method.method),
+ (void *)method);
+#ifndef LATENCY
+ valp = counts.lookup_or_init(&data.method, &val);
+ ++(*valp);
+#endif
+#ifdef LATENCY
+ entry.update(&data, ×tamp);
+#endif
+ return 0;
+}
+
+#ifdef LATENCY
+int trace_return(struct pt_regs *ctx) {
+ u64 *entry_timestamp, clazz = 0, method = 0;
+ struct info_t *info, zero = {};
+ struct entry_t data = {};
+ data.pid = bpf_get_current_pid_tgid();
+ READ_CLASS
+ READ_METHOD
+ bpf_probe_read(&data.method.clazz, sizeof(data.method.clazz),
+ (void *)clazz);
+ bpf_probe_read(&data.method.method, sizeof(data.method.method),
+ (void *)method);
+ entry_timestamp = entry.lookup(&data);
+ if (!entry_timestamp) {
+ return 0; // missed the entry event
+ }
+ info = times.lookup_or_init(&data.method, &zero);
+ info->num_calls += 1;
+ info->total_ns += bpf_ktime_get_ns() - *entry_timestamp;
+ entry.delete(&data);
+ return 0;
+}
+#endif // LATENCY
+#endif // NOLANG
+
+#ifdef SYSCALLS
+int syscall_entry(struct pt_regs *ctx) {
+ u64 pid = bpf_get_current_pid_tgid();
+ u64 *valp, ip = ctx->ip, val = 0;
+ PID_FILTER
+#ifdef LATENCY
+ struct syscall_entry_t data = {};
+ data.timestamp = bpf_ktime_get_ns();
+ data.ip = ip;
+#endif
+#ifndef LATENCY
+ valp = syscounts.lookup_or_init(&ip, &val);
+ ++(*valp);
+#endif
+#ifdef LATENCY
+ sysentry.update(&pid, &data);
+#endif
+ return 0;
+}
+
+#ifdef LATENCY
+int syscall_return(struct pt_regs *ctx) {
+ struct syscall_entry_t *e;
+ struct info_t *info, zero = {};
+ u64 pid = bpf_get_current_pid_tgid(), ip;
+ PID_FILTER
+ e = sysentry.lookup(&pid);
+ if (!e) {
+ return 0; // missed the entry event
+ }
+ ip = e->ip;
+ info = systimes.lookup_or_init(&ip, &zero);
+ info->num_calls += 1;
+ info->total_ns += bpf_ktime_get_ns() - e->timestamp;
+ sysentry.delete(&pid);
+ return 0;
+}
+#endif // LATENCY
+#endif // SYSCALLS
+""".replace("READ_CLASS", read_class) \
+ .replace("READ_METHOD", read_method) \
+ .replace("PID_FILTER", "if ((pid >> 32) != %d) { return 0; }" % args.pid) \
+ .replace("DEFINE_NOLANG", "#define NOLANG" if not args.language else "") \
+ .replace("DEFINE_LATENCY", "#define LATENCY" if args.latency else "") \
+ .replace("DEFINE_SYSCALLS", "#define SYSCALLS" if args.syscalls else "")
+
+if args.language:
+ usdt = USDT(pid=args.pid)
+ usdt.enable_probe(entry_probe, "trace_entry")
+ if args.latency:
+ usdt.enable_probe(return_probe, "trace_return")
+else:
+ usdt = None
+
+if args.verbose:
+ if usdt:
+ print(usdt.get_text())
+ print(program)
+
+bpf = BPF(text=program, usdt_contexts=[usdt] if usdt else [])
+if args.syscalls:
+ syscall_regex = "^[Ss]y[Ss]_.*"
+ bpf.attach_kprobe(event_re=syscall_regex, fn_name="syscall_entry")
+ if args.latency:
+ bpf.attach_kretprobe(event_re=syscall_regex, fn_name="syscall_return")
+ print("Attached %d kernel probes for syscall tracing." %
+ bpf.num_open_kprobes())
+
+def get_data():
+ # Will be empty when no language was specified for tracing
+ if args.latency:
+ data = map(lambda (k, v): (k.clazz + "." + k.method,
+ (v.num_calls, v.total_ns)),
+ bpf["times"].items())
+ else:
+ data = map(lambda (k, v): (k.clazz + "." + k.method, (v.value, 0)),
+ bpf["counts"].items())
+
+ if args.syscalls:
+ if args.latency:
+ syscalls = map(lambda (k, v): (bpf.ksym(k.value),
+ (v.num_calls, v.total_ns)),
+ bpf["systimes"].items())
+ data.extend(syscalls)
+ else:
+ syscalls = map(lambda (k, v): (bpf.ksym(k.value), (v.value, 0)),
+ bpf["syscounts"].items())
+ data.extend(syscalls)
+
+ return sorted(data, key=lambda (k, v): v[1 if args.latency else 0])
+
+def clear_data():
+ if args.latency:
+ bpf["times"].clear()
+ else:
+ bpf["counts"].clear()
+
+ if args.syscalls:
+ if args.latency:
+ bpf["systimes"].clear()
+ else:
+ bpf["syscounts"].clear()
+
+exit_signaled = False
+print("Tracing calls in process %d (language: %s)... Ctrl-C to quit." %
+ (args.pid, args.language or "none"))
+while True:
+ try:
+ sleep(args.interval or 99999999)
+ except KeyboardInterrupt:
+ exit_signaled = True
+ print()
+ data = get_data() # [(function, (num calls, latency in ns))]
+ if args.latency:
+ time_col = "TIME (ms)" if args.milliseconds else "TIME (us)"
+ print("%-50s %8s %8s" % ("METHOD", "# CALLS", time_col))
+ else:
+ print("%-50s %8s" % ("METHOD", "# CALLS"))
+ if args.top:
+ data = data[-args.top:]
+ for key, value in data:
+ if args.latency:
+ time = value[1]/1000000.0 if args.milliseconds else \
+ value[1]/1000.0
+ print("%-50s %8d %6.2f" % (key, value[0], time))
+ else:
+ print("%-50s %8d" % (key, value[0]))
+ if args.interval and not exit_signaled:
+ clear_data()
+ else:
+ if args.syscalls:
+ print("Detaching kernel probes, please wait...")
+ exit()
diff --git a/tools/ucalls_example.txt b/tools/ucalls_example.txt
new file mode 100644
index 0000000..7410f88
--- /dev/null
+++ b/tools/ucalls_example.txt
@@ -0,0 +1,92 @@
+Demonstrations of ucalls.
+
+
+ucalls summarizes method calls in various high-level languages, including Java,
+Python, Ruby, and Linux system calls. It displays statistics on the most
+frequently called methods, as well as the latency (duration) of these methods.
+
+Through the syscalls support, ucalls can provide basic information on a
+process' interaction with the system including syscall counts and latencies.
+This can then be used for further exploration with other BCC tools like trace,
+argdist, biotop, fileslower, and others.
+
+For example, to trace method call latency in a Java application:
+
+# ucalls -L -l java $(pidof java)
+Tracing calls in process 26877 (language: java)... Ctrl-C to quit.
+
+METHOD # CALLS TIME (us)
+java/io/BufferedInputStream.getBufIfOpen 1 7.00
+slowy/App.isSimplePrime 8970 8858.35
+slowy/App.isDivisible 3228196 3076985.12
+slowy/App.isPrime 8969 4841017.64
+^C
+
+
+To trace only syscalls in a particular process and print the top 10 most
+frequently-invoked ones:
+
+# ucalls -ST 10 3018
+Attached 375 kernel probes for syscall tracing.
+Tracing calls in process 3018 (language: none)... Ctrl-C to quit.
+
+METHOD # CALLS
+sys_rt_sigaction 4
+SyS_rt_sigprocmask 4
+sys_mprotect 5
+sys_read 22
+SyS_write 39
+SyS_epoll_wait 42
+sys_futex 177
+SyS_mmap 180
+sys_mmap_pgoff 181
+sys_munmap 817
+^C
+Detaching kernel probes, please wait...
+
+
+To print only the top 5 methods and report times in milliseconds (the default
+is microseconds):
+
+# ucalls -l python -mT 5 $(pidof python)
+Tracing calls in process 26914 (language: python)... Ctrl-C to quit.
+
+METHOD # CALLS
+<stdin>.<module> 1
+<stdin>.fibo 14190928
+^C
+
+
+USAGE message:
+
+# ./ucalls.py -h
+usage: ucalls.py [-h] [-l {java,python,ruby}] [-T TOP] [-L] [-S] [-v] [-m]
+ pid [interval]
+
+Summarize method calls in high-level languages.
+
+positional arguments:
+ pid process id to attach to
+ interval print every specified number of seconds
+
+optional arguments:
+ -h, --help show this help message and exit
+ -l {java,python,ruby}, --language {java,python,ruby}
+ language to trace (if none, trace syscalls only)
+ -T TOP, --top TOP number of most frequent/slow calls to print
+ -L, --latency record method latency from enter to exit (except
+ recursive calls)
+ -S, --syscalls record syscall latency (adds overhead)
+ -v, --verbose verbose mode: print the BPF program (for debugging
+ purposes)
+ -m, --milliseconds report times in milliseconds (default is microseconds)
+
+examples:
+ ./ucalls -l java 185 # trace Java calls and print statistics on ^C
+ ./ucalls -l python 2020 1 # trace Python calls and print every second
+ ./ucalls -l java 185 -S # trace Java calls and syscalls
+ ./ucalls 6712 -S # trace only syscall counts
+ ./ucalls -l ruby 1344 -T 10 # trace top 10 Ruby method calls
+ ./ucalls -l ruby 1344 -L # trace Ruby calls including latency
+ ./ucalls -l ruby 1344 -LS # trace Ruby calls and syscalls with latency
+ ./ucalls -l python 2020 -mL # trace Python calls including latency in ms
diff --git a/tools/uflow.py b/tools/uflow.py
new file mode 100755
index 0000000..6bf8b53
--- /dev/null
+++ b/tools/uflow.py
@@ -0,0 +1,174 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# uflow Trace method execution flow in high-level languages.
+# For Linux, uses BCC, eBPF.
+#
+# USAGE: uflow [-C CLASS] [-M METHOD] [-v] {java,python,ruby} pid
+#
+# Copyright 2016 Sasha Goldshtein
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 27-Oct-2016 Sasha Goldshtein Created this.
+
+from __future__ import print_function
+import argparse
+from bcc import BPF, USDT
+import ctypes as ct
+import time
+
+examples = """examples:
+ ./uflow java 185 # trace Java method calls in process 185
+ ./uflow ruby 1344 # trace Ruby method calls in process 1344
+ ./uflow -M indexOf java 185 # trace only 'indexOf'-prefixed methods
+ ./uflow -C '<stdin>' python 180 # trace only REPL-defined methods
+"""
+parser = argparse.ArgumentParser(
+ description="Trace method execution flow in high-level languages.",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("language", choices=["java", "python", "ruby"],
+ help="language to trace")
+parser.add_argument("pid", type=int, help="process id to attach to")
+parser.add_argument("-M", "--method",
+ help="trace only calls to methods starting with this prefix")
+parser.add_argument("-C", "--class", dest="clazz",
+ help="trace only calls to classes starting with this prefix")
+parser.add_argument("-v", "--verbose", action="store_true",
+ help="verbose mode: print the BPF program (for debugging purposes)")
+args = parser.parse_args()
+
+usdt = USDT(pid=args.pid)
+
+program = """
+struct call_t {
+ u64 depth; // first bit is direction (0 entry, 1 return)
+ u64 pid; // (tgid << 32) + pid from bpf_get_current...
+ u64 timestamp; // ns
+ char clazz[80];
+ char method[80];
+};
+
+BPF_PERF_OUTPUT(calls);
+BPF_HASH(entry, u64, u64);
+"""
+
+prefix_template = """
+static inline bool prefix_%s(char *actual) {
+ char expected[] = "%s";
+ for (int i = 0; i < sizeof(expected) - 1; ++i) {
+ if (expected[i] != actual[i]) {
+ return false;
+ }
+ }
+ return true;
+}
+"""
+
+if args.clazz:
+ program += prefix_template % ("class", args.clazz)
+if args.method:
+ program += prefix_template % ("method", args.method)
+
+trace_template = """
+int NAME(struct pt_regs *ctx) {
+ u64 *depth, zero = 0, clazz = 0, method = 0 ;
+ struct call_t data = {};
+
+ READ_CLASS
+ READ_METHOD
+ bpf_probe_read(&data.clazz, sizeof(data.clazz), (void *)clazz);
+ bpf_probe_read(&data.method, sizeof(data.method), (void *)method);
+
+ FILTER_CLASS
+ FILTER_METHOD
+
+ data.pid = bpf_get_current_pid_tgid();
+ data.timestamp = bpf_ktime_get_ns();
+ depth = entry.lookup_or_init(&data.pid, &zero);
+ data.depth = DEPTH;
+ UPDATE
+
+ calls.perf_submit(ctx, &data, sizeof(data));
+ return 0;
+}
+"""
+
+def enable_probe(probe_name, func_name, read_class, read_method, is_return):
+ global program, trace_template, usdt
+ depth = "*depth + 1" if not is_return else "*depth | (1ULL << 63)"
+ update = "++(*depth);" if not is_return else "if (*depth) --(*depth);"
+ filter_class = "if (!prefix_class(data.clazz)) { return 0; }" \
+ if args.clazz else ""
+ filter_method = "if (!prefix_method(data.method)) { return 0; }" \
+ if args.method else ""
+ program += trace_template.replace("NAME", func_name) \
+ .replace("READ_CLASS", read_class) \
+ .replace("READ_METHOD", read_method) \
+ .replace("FILTER_CLASS", filter_class) \
+ .replace("FILTER_METHOD", filter_method) \
+ .replace("DEPTH", depth) \
+ .replace("UPDATE", update)
+ usdt.enable_probe(probe_name, func_name)
+
+usdt = USDT(pid=args.pid)
+
+if args.language == "java":
+ enable_probe("method__entry", "java_entry",
+ "bpf_usdt_readarg(2, ctx, &clazz);",
+ "bpf_usdt_readarg(4, ctx, &method);", is_return=False)
+ enable_probe("method__return", "java_return",
+ "bpf_usdt_readarg(2, ctx, &clazz);",
+ "bpf_usdt_readarg(4, ctx, &method);", is_return=True)
+elif args.language == "python":
+ enable_probe("function__entry", "python_entry",
+ "bpf_usdt_readarg(1, ctx, &clazz);", # filename really
+ "bpf_usdt_readarg(2, ctx, &method);", is_return=False)
+ enable_probe("function__return", "python_return",
+ "bpf_usdt_readarg(1, ctx, &clazz);", # filename really
+ "bpf_usdt_readarg(2, ctx, &method);", is_return=True)
+elif args.language == "ruby":
+ enable_probe("method__entry", "ruby_entry",
+ "bpf_usdt_readarg(1, ctx, &clazz);",
+ "bpf_usdt_readarg(2, ctx, &method);", is_return=False)
+ enable_probe("method__return", "ruby_return",
+ "bpf_usdt_readarg(1, ctx, &clazz);",
+ "bpf_usdt_readarg(2, ctx, &method);", is_return=True)
+ enable_probe("cmethod__entry", "ruby_centry",
+ "bpf_usdt_readarg(1, ctx, &clazz);",
+ "bpf_usdt_readarg(2, ctx, &method);", is_return=False)
+ enable_probe("cmethod__return", "ruby_creturn",
+ "bpf_usdt_readarg(1, ctx, &clazz);",
+ "bpf_usdt_readarg(2, ctx, &method);", is_return=True)
+
+if args.verbose:
+ print(usdt.get_text())
+ print(program)
+
+bpf = BPF(text=program, usdt_contexts=[usdt])
+print("Tracing method calls in %s process %d... Ctrl-C to quit." %
+ (args.language, args.pid))
+print("%-3s %-6s %-6s %-8s %s" % ("CPU", "PID", "TID", "TIME(us)", "METHOD"))
+
+class CallEvent(ct.Structure):
+ _fields_ = [
+ ("depth", ct.c_ulonglong),
+ ("pid", ct.c_ulonglong),
+ ("timestamp", ct.c_ulonglong),
+ ("clazz", ct.c_char * 80),
+ ("method", ct.c_char * 80)
+ ]
+
+start_ts = time.time()
+
+def print_event(cpu, data, size):
+ event = ct.cast(data, ct.POINTER(CallEvent)).contents
+ depth = event.depth & (~(1 << 63))
+ direction = "<- " if event.depth & (1 << 63) else "-> "
+ print("%-3d %-6d %-6d %-8.3f %-40s" % (cpu, event.pid >> 32,
+ event.pid & 0xFFFFFFFF, time.time() - start_ts,
+ (" " * (depth - 1)) + direction + event.clazz + "." + event.method))
+
+bpf["calls"].open_perf_buffer(print_event)
+while 1:
+ bpf.kprobe_poll()
diff --git a/tools/uflow_example.txt b/tools/uflow_example.txt
new file mode 100644
index 0000000..34dd533
--- /dev/null
+++ b/tools/uflow_example.txt
@@ -0,0 +1,112 @@
+Demonstrations of uflow.
+
+
+uflow traces method entry and exit events and prints a visual flow graph that
+shows how methods are entered and exited, similar to a tracing debugger with
+breakpoints. This can be useful for understanding program flow in high-level
+languages such as Java, Python, and Ruby, which provide USDT probes for method
+invocations.
+
+
+For example, trace all Ruby method calls in a specific process:
+
+# ./uflow ruby 27245
+Tracing method calls in ruby process 27245... Ctrl-C to quit.
+CPU PID TID TIME(us) METHOD
+3 27245 27245 4.536 <- IO.gets
+3 27245 27245 4.536 <- IRB::StdioInputMethod.gets
+3 27245 27245 4.536 -> IRB::Context.verbose?
+3 27245 27245 4.536 -> NilClass.nil?
+3 27245 27245 4.536 <- NilClass.nil?
+3 27245 27245 4.536 -> IO.tty?
+3 27245 27245 4.536 <- IO.tty?
+3 27245 27245 4.536 -> Kernel.kind_of?
+3 27245 27245 4.536 <- Kernel.kind_of?
+3 27245 27245 4.536 <- IRB::Context.verbose?
+3 27245 27245 4.536 <- IRB::Irb.signal_status
+3 27245 27245 4.536 -> String.chars
+3 27245 27245 4.536 <- String.chars
+^C
+
+In the preceding output, indentation indicates the depth of the flow graph,
+and the <- and -> arrows indicate the direction of the event (exit or entry).
+
+Often, the amount of output can be overwhelming. You can filter specific
+classes or methods. For example, trace only methods from the Thread class:
+
+# ./uflow -C java/lang/Thread java $(pidof java)
+Tracing method calls in java process 27722... Ctrl-C to quit.
+CPU PID TID TIME(us) METHOD
+3 27722 27731 3.144 -> java/lang/Thread.<init>
+3 27722 27731 3.144 -> java/lang/Thread.init
+3 27722 27731 3.144 -> java/lang/Thread.init
+3 27722 27731 3.144 -> java/lang/Thread.currentThread
+3 27722 27731 3.144 <- java/lang/Thread.currentThread
+3 27722 27731 3.144 -> java/lang/Thread.getThreadGroup
+3 27722 27731 3.144 <- java/lang/Thread.getThreadGroup
+3 27722 27731 3.144 -> java/lang/ThreadGroup.checkAccess
+3 27722 27731 3.144 <- java/lang/ThreadGroup.checkAccess
+3 27722 27731 3.144 -> java/lang/ThreadGroup.addUnstarted
+3 27722 27731 3.144 <- java/lang/ThreadGroup.addUnstarted
+3 27722 27731 3.145 -> java/lang/Thread.isDaemon
+3 27722 27731 3.145 <- java/lang/Thread.isDaemon
+3 27722 27731 3.145 -> java/lang/Thread.getPriority
+3 27722 27731 3.145 <- java/lang/Thread.getPriority
+3 27722 27731 3.145 -> java/lang/Thread.getContextClassLoader
+3 27722 27731 3.145 <- java/lang/Thread.getContextClassLoader
+3 27722 27731 3.145 -> java/lang/Thread.setPriority
+3 27722 27731 3.145 -> java/lang/Thread.checkAccess
+3 27722 27731 3.145 <- java/lang/Thread.checkAccess
+3 27722 27731 3.145 -> java/lang/Thread.getThreadGroup
+3 27722 27731 3.145 <- java/lang/Thread.getThreadGroup
+3 27722 27731 3.145 -> java/lang/ThreadGroup.getMaxPriority
+3 27722 27731 3.145 <- java/lang/ThreadGroup.getMaxPriority
+3 27722 27731 3.145 -> java/lang/Thread.setPriority0
+3 27722 27731 3.145 <- java/lang/Thread.setPriority0
+3 27722 27731 3.145 <- java/lang/Thread.setPriority
+3 27722 27731 3.145 -> java/lang/Thread.nextThreadID
+3 27722 27731 3.145 <- java/lang/Thread.nextThreadID
+3 27722 27731 3.145 <- java/lang/Thread.init
+3 27722 27731 3.145 <- java/lang/Thread.init
+3 27722 27731 3.145 <- java/lang/Thread.<init>
+3 27722 27731 3.145 -> java/lang/Thread.start
+3 27722 27731 3.145 -> java/lang/ThreadGroup.add
+3 27722 27731 3.145 <- java/lang/ThreadGroup.add
+3 27722 27731 3.145 -> java/lang/Thread.start0
+3 27722 27731 3.145 <- java/lang/Thread.start0
+3 27722 27731 3.146 <- java/lang/Thread.start
+2 27722 27742 3.146 -> java/lang/Thread.run
+^C
+
+The reason that the CPU number is printed in the first column is that events
+from different threads can be reordered when running on different CPUs, and
+produce non-sensible output. By looking for changes in the CPU column, you can
+easily see if the events you're following make sense and belong to the same
+thread running on the same CPU.
+
+
+USAGE message:
+
+# ./uflow -h
+usage: uflow.py [-h] [-M METHOD] [-C CLAZZ] [-v] {java,python,ruby} pid
+
+Trace method execution flow in high-level languages.
+
+positional arguments:
+ {java,python,ruby} language to trace
+ pid process id to attach to
+
+optional arguments:
+ -h, --help show this help message and exit
+ -M METHOD, --method METHOD
+ trace only calls to methods starting with this prefix
+ -C CLAZZ, --class CLAZZ
+ trace only calls to classes starting with this prefix
+ -v, --verbose verbose mode: print the BPF program (for debugging
+ purposes)
+
+examples:
+ ./uflow java 185 # trace Java method calls in process 185
+ ./uflow ruby 1344 # trace Ruby method calls in process 1344
+ ./uflow -M indexOf java 185 # trace only 'indexOf'-prefixed methods
+ ./uflow -C '<stdin>' python 180 # trace only REPL-defined methods
diff --git a/tools/ugc.py b/tools/ugc.py
new file mode 100755
index 0000000..8638a25
--- /dev/null
+++ b/tools/ugc.py
@@ -0,0 +1,216 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# ugc Summarize garbage collection events in high-level languages.
+# For Linux, uses BCC, eBPF.
+#
+# USAGE: ugc [-v] [-m] {java,python,ruby,node} pid
+#
+# Copyright 2016 Sasha Goldshtein
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 19-Oct-2016 Sasha Goldshtein Created this.
+
+from __future__ import print_function
+import argparse
+from bcc import BPF, USDT
+import ctypes as ct
+import time
+
+examples = """examples:
+ ./ugc java 185 # trace Java GCs in process 185
+ ./ugc ruby 1344 -m # trace Ruby GCs reporting in ms
+"""
+parser = argparse.ArgumentParser(
+ description="Summarize garbage collection events in high-level languages.",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("language", choices=["java", "python", "ruby", "node"],
+ help="language to trace")
+parser.add_argument("pid", type=int, help="process id to attach to")
+parser.add_argument("-v", "--verbose", action="store_true",
+ help="verbose mode: print the BPF program (for debugging purposes)")
+parser.add_argument("-m", "--milliseconds", action="store_true",
+ help="report times in milliseconds (default is microseconds)")
+args = parser.parse_args()
+
+usdt = USDT(pid=args.pid)
+
+program = """
+struct gc_event_t {
+ u64 probe_index;
+ u64 elapsed_ns;
+ u64 field1;
+ u64 field2;
+ u64 field3;
+ u64 field4;
+ char string1[32];
+ char string2[32];
+};
+struct entry_t {
+ u64 start_ns;
+ u64 field1;
+ u64 field2;
+};
+
+BPF_PERF_OUTPUT(gcs);
+BPF_HASH(entry, u64, struct entry_t);
+"""
+
+class Probe(object):
+ def __init__(self, begin, end, begin_save, end_save, formatter):
+ self.begin = begin
+ self.end = end
+ self.begin_save = begin_save
+ self.end_save = end_save
+ self.formatter = formatter
+
+ def generate(self):
+ text = """
+int trace_%s(struct pt_regs *ctx) {
+ u64 pid = bpf_get_current_pid_tgid();
+ struct entry_t e = {};
+ e.start_ns = bpf_ktime_get_ns();
+ %s
+ entry.update(&pid, &e);
+ return 0;
+}
+int trace_%s(struct pt_regs *ctx) {
+ u64 elapsed;
+ struct entry_t *e;
+ struct gc_event_t event = {};
+ u64 pid = bpf_get_current_pid_tgid();
+ e = entry.lookup(&pid);
+ if (!e) {
+ return 0; // missed the entry event on this thread
+ }
+ elapsed = bpf_ktime_get_ns() - e->start_ns;
+ event.elapsed_ns = elapsed;
+ %s
+ gcs.perf_submit(ctx, &event, sizeof(event));
+ return 0;
+}
+ """ % (self.begin, self.begin_save, self.end, self.end_save)
+ return text
+
+ def attach(self):
+ usdt.enable_probe(self.begin, "trace_%s" % self.begin)
+ usdt.enable_probe(self.end, "trace_%s" % self.end)
+
+ def format(self, data):
+ return self.formatter(data)
+
+probes = []
+
+#
+# Java
+#
+if args.language == "java":
+ # Oddly, the gc__begin/gc__end probes don't really have any useful
+ # information, while the mem__pool* ones do. There's also a bunch of
+ # probes described in the hotspot_gc*.stp file which aren't there
+ # when looking at a live Java process.
+ begin_save = """
+ bpf_usdt_readarg(6, ctx, &e.field1); // used bytes
+ bpf_usdt_readarg(8, ctx, &e.field2); // max bytes
+ """
+ end_save = """
+ event.field1 = e->field1; // used bytes at start
+ event.field2 = e->field2; // max bytes at start
+ bpf_usdt_readarg(6, ctx, &event.field3); // used bytes at end
+ bpf_usdt_readarg(8, ctx, &event.field4); // max bytes at end
+ u64 manager = 0, pool = 0;
+ bpf_usdt_readarg(1, ctx, &manager); // ptr to manager name
+ bpf_usdt_readarg(3, ctx, &pool); // ptr to pool name
+ bpf_probe_read(&event.string1, sizeof(event.string1), (void *)manager);
+ bpf_probe_read(&event.string2, sizeof(event.string2), (void *)pool);
+ """
+ formatter = lambda e: "%s %s used=%d->%d max=%d->%d" % \
+ (e.string1, e.string2, e.field1, e.field3, e.field2, e.field4)
+ probes.append(Probe("mem__pool__gc__begin", "mem__pool__gc__end",
+ begin_save, end_save, formatter))
+ probes.append(Probe("gc__begin", "gc__end",
+ "", "", lambda _: "no additional info available"))
+#
+# Python
+#
+elif args.language == "python":
+ begin_save = """
+ int gen = 0;
+ bpf_usdt_readarg(1, ctx, &gen);
+ e.field1 = gen;
+ """
+ end_save = """
+ long objs = 0;
+ bpf_usdt_readarg(1, ctx, &objs);
+ event.field1 = e->field1;
+ event.field2 = objs;
+ """
+ formatter = lambda event: "gen %d GC collected %d objects" % \
+ (event.field1, event.field2)
+ probes.append(Probe("gc__start", "gc__done",
+ begin_save, end_save, formatter))
+#
+# Ruby
+#
+elif args.language == "ruby":
+ # Ruby GC probes do not have any additional information available.
+ probes.append(Probe("gc__mark__begin", "gc__mark__end",
+ "", "", lambda _: "GC mark stage"))
+ probes.append(Probe("gc__sweep__begin", "gc__sweep__end",
+ "", "", lambda _: "GC sweep stage"))
+#
+# Node
+#
+elif args.language == "node":
+ end_save = """
+ u32 gc_type = 0;
+ bpf_usdt_readarg(1, ctx, &gc_type);
+ event.field1 = gc_type;
+ """
+ descs = {"GC scavenge": 1, "GC mark-sweep-compact": 2,
+ "GC incremental mark": 4, "GC weak callbacks": 8}
+ probes.append(Probe("gc__start", "gc__done", "", end_save,
+ lambda e: str.join(", ",
+ [desc for desc, val in descs.items()
+ if e.field1 & val != 0])))
+
+for probe in probes:
+ program += probe.generate()
+ probe.attach()
+
+if args.verbose:
+ print(usdt.get_text())
+ print(program)
+
+bpf = BPF(text=program, usdt_contexts=[usdt])
+print("Tracing garbage collections in %s process %d... Ctrl-C to quit." %
+ (args.language, args.pid))
+time_col = "TIME (ms)" if args.milliseconds else "TIME (us)"
+print("%-8s %-40s %-8s" % ("START", "DESCRIPTION", time_col))
+
+class GCEvent(ct.Structure):
+ _fields_ = [
+ ("probe_index", ct.c_ulonglong),
+ ("elapsed_ns", ct.c_ulonglong),
+ ("field1", ct.c_ulonglong),
+ ("field2", ct.c_ulonglong),
+ ("field3", ct.c_ulonglong),
+ ("field4", ct.c_ulonglong),
+ ("string1", ct.c_char * 32),
+ ("string2", ct.c_char * 32)
+ ]
+
+start_ts = time.time()
+
+def print_event(cpu, data, size):
+ event = ct.cast(data, ct.POINTER(GCEvent)).contents
+ elapsed = event.elapsed_ns/1000000 if args.milliseconds else \
+ event.elapsed_ns/1000
+ print("%-8.3f %-40s %-8.2f" % (time.time() - start_ts,
+ probes[event.probe_index].format(event),
+ elapsed))
+
+bpf["gcs"].open_perf_buffer(print_event)
+while 1:
+ bpf.kprobe_poll()
diff --git a/tools/ugc_example.txt b/tools/ugc_example.txt
new file mode 100644
index 0000000..27f1e51
--- /dev/null
+++ b/tools/ugc_example.txt
@@ -0,0 +1,66 @@
+Demonstrations of ugc.
+
+
+ugc traces garbage collection events in high-level languages, including Java,
+Python, Ruby, and Node. Each GC event is printed with some additional
+information provided by that language's runtime, if available. The duration of
+the GC event is also provided.
+
+For example, to trace all garbage collection events in a specific Node process:
+
+# ./ugc node $(pidof node)
+Tracing garbage collections in node process 3018... Ctrl-C to quit.
+START DESCRIPTION TIME (us)
+3.864 GC mark-sweep-compact 3189.00
+4.937 GC scavenge 1254.00
+4.940 GC scavenge 1657.00
+4.943 GC scavenge 1171.00
+4.949 GC scavenge 2216.00
+4.954 GC scavenge 2515.00
+4.960 GC scavenge 2243.00
+4.966 GC scavenge 2410.00
+4.976 GC scavenge 3003.00
+4.986 GC scavenge 4174.00
+4.994 GC scavenge 1508.00
+5.003 GC scavenge 1966.00
+5.010 GC scavenge 1636.00
+5.022 GC scavenge 3564.00
+5.035 GC scavenge 3275.00
+5.045 GC incremental mark 157.00
+5.049 GC mark-sweep-compact 3248.00
+5.060 GC scavenge 4785.00
+5.081 GC scavenge 6616.00
+5.094 GC scavenge 8570.00
+5.144 GC scavenge 456.00
+7.188 GC scavenge 2345.00
+7.227 GC scavenge 12054.00
+7.253 GC scavenge 15626.00
+7.304 GC scavenge 15329.00
+7.384 GC scavenge 7168.00
+7.411 GC scavenge 3794.00
+7.414 GC incremental mark 123.00
+7.430 GC mark-sweep-compact 7110.00
+^C
+
+
+USAGE message:
+
+# ./ugc -h
+usage: ugc.py [-h] [-v] [-m] {java,python,ruby,node} pid
+
+Summarize garbage collection events in high-level languages.
+
+positional arguments:
+ {java,python,ruby,node}
+ language to trace
+ pid process id to attach to
+
+optional arguments:
+ -h, --help show this help message and exit
+ -v, --verbose verbose mode: print the BPF program (for debugging
+ purposes)
+ -m, --milliseconds report times in milliseconds (default is microseconds)
+
+examples:
+ ./ugc java 185 # trace Java GCs in process 185
+ ./ugc ruby 1344 -m # trace Ruby GCs reporting in ms
diff --git a/tools/uobjnew.py b/tools/uobjnew.py
new file mode 100755
index 0000000..993bca8
--- /dev/null
+++ b/tools/uobjnew.py
@@ -0,0 +1,168 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# uobjnew Summarize object allocations in high-level languages.
+# For Linux, uses BCC, eBPF.
+#
+# USAGE: uobjnew [-h] [-T TOP] [-v] {java,ruby,c} pid [interval]
+#
+# Copyright 2016 Sasha Goldshtein
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 25-Oct-2016 Sasha Goldshtein Created this.
+
+from __future__ import print_function
+import argparse
+from bcc import BPF, USDT
+from time import sleep
+
+examples = """examples:
+ ./uobjnew java 145 # summarize Java allocations in process 145
+ ./uobjnew c 2020 1 # grab malloc() sizes and print every second
+ ./uobjnew ruby 6712 -C 10 # top 10 Ruby types by number of allocations
+ ./uobjnew ruby 6712 -S 10 # top 10 Ruby types by total size
+"""
+parser = argparse.ArgumentParser(
+ description="Summarize object allocations in high-level languages.",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("language", choices=["java", "ruby", "c"],
+ help="language to trace")
+parser.add_argument("pid", type=int, help="process id to attach to")
+parser.add_argument("interval", type=int, nargs='?',
+ help="print every specified number of seconds")
+parser.add_argument("-C", "--top-count", type=int,
+ help="number of most frequently allocated types to print")
+parser.add_argument("-S", "--top-size", type=int,
+ help="number of largest types by allocated bytes to print")
+parser.add_argument("-v", "--verbose", action="store_true",
+ help="verbose mode: print the BPF program (for debugging purposes)")
+args = parser.parse_args()
+
+program = """
+#include <linux/ptrace.h>
+
+struct key_t {
+#if MALLOC_TRACING
+ u64 size;
+#else
+ char name[50];
+#endif
+};
+
+struct val_t {
+ u64 total_size;
+ u64 num_allocs;
+};
+
+BPF_HASH(allocs, struct key_t, struct val_t);
+""".replace("MALLOC_TRACING", "1" if args.language == "c" else "0")
+
+usdt = USDT(pid=args.pid)
+
+#
+# Java
+#
+if args.language == "java":
+ program += """
+int alloc_entry(struct pt_regs *ctx) {
+ struct key_t key = {};
+ struct val_t *valp, zero = {};
+ u64 classptr = 0, size = 0;
+ bpf_usdt_readarg(2, ctx, &classptr);
+ bpf_usdt_readarg(4, ctx, &size);
+ bpf_probe_read(&key.name, sizeof(key.name), (void *)classptr);
+ valp = allocs.lookup_or_init(&key, &zero);
+ valp->total_size += size;
+ valp->num_allocs += 1;
+ return 0;
+}
+ """
+ usdt.enable_probe("object__alloc", "alloc_entry")
+#
+# Ruby
+#
+elif args.language == "ruby":
+ create_template = """
+int THETHING_alloc_entry(struct pt_regs *ctx) {
+ struct key_t key = { .name = "THETHING" };
+ struct val_t *valp, zero = {};
+ u64 size = 0;
+ bpf_usdt_readarg(1, ctx, &size);
+ valp = allocs.lookup_or_init(&key, &zero);
+ valp->total_size += size;
+ valp->num_allocs += 1;
+ return 0;
+}
+ """
+ program += """
+int object_alloc_entry(struct pt_regs *ctx) {
+ struct key_t key = {};
+ struct val_t *valp, zero = {};
+ u64 classptr = 0;
+ bpf_usdt_readarg(1, ctx, &classptr);
+ bpf_probe_read(&key.name, sizeof(key.name), (void *)classptr);
+ valp = allocs.lookup_or_init(&key, &zero);
+ valp->num_allocs += 1; // We don't know the size, unfortunately
+ return 0;
+}
+ """
+ usdt.enable_probe("object__create", "object_alloc_entry")
+ for thing in ["string", "hash", "array"]:
+ program += create_template.replace("THETHING", thing)
+ usdt.enable_probe("%s__create" % thing, "%s_alloc_entry" % thing)
+#
+# C
+#
+elif args.language == "c":
+ program += """
+int alloc_entry(struct pt_regs *ctx, size_t size) {
+ struct key_t key = {};
+ struct val_t *valp, zero = {};
+ key.size = size;
+ valp = allocs.lookup_or_init(&key, &zero);
+ valp->total_size += size;
+ valp->num_allocs += 1;
+ return 0;
+}
+ """
+
+if args.verbose:
+ print(usdt.get_text())
+ print(program)
+
+bpf = BPF(text=program, usdt_contexts=[usdt])
+if args.language == "c":
+ bpf.attach_uprobe(name="c", sym="malloc", fn_name="alloc_entry",
+ pid=args.pid)
+
+exit_signaled = False
+print("Tracing allocations in process %d (language: %s)... Ctrl-C to quit." %
+ (args.pid, args.language or "none"))
+while True:
+ try:
+ sleep(args.interval or 99999999)
+ except KeyboardInterrupt:
+ exit_signaled = True
+ print()
+ data = bpf["allocs"]
+ if args.top_count:
+ data = sorted(data.items(), key=lambda (k, v): v.num_allocs)
+ data = data[-args.top_count:]
+ elif args.top_size:
+ data = sorted(data.items(), key=lambda (k, v): v.total_size)
+ data = data[-args.top_size:]
+ else:
+ data = sorted(data.items(), key=lambda (k, v): v.total_size)
+ print("%-30s %8s %12s" % ("TYPE", "# ALLOCS", "# BYTES"))
+ for key, value in data:
+ if args.language == "c":
+ obj_type = "block size %d" % key.size
+ else:
+ obj_type = key.name
+ print("%-30s %8d %12d" %
+ (obj_type, value.num_allocs, value.total_size))
+ if args.interval and not exit_signaled:
+ bpf["allocs"].clear()
+ else:
+ exit()
diff --git a/tools/uobjnew_example.txt b/tools/uobjnew_example.txt
new file mode 100644
index 0000000..61d2afb
--- /dev/null
+++ b/tools/uobjnew_example.txt
@@ -0,0 +1,74 @@
+Demonstrations of uobjnew.
+
+
+uobjnew summarizes new object allocation events and prints out statistics on
+which object type has been allocated frequently, and how many bytes of that
+type have been allocated. This helps diagnose common allocation paths, which
+can in turn cause heavy garbage collection.
+
+For example, trace Ruby object allocations when running some simple commands
+in irb (the Ruby REPL):
+
+# ./uobjnew ruby 27245
+Tracing allocations in process 27245 (language: ruby)... Ctrl-C to quit.
+
+TYPE # ALLOCS # BYTES
+NameError 1 0
+RubyToken::TkSPACE 1 0
+RubyToken::TkSTRING 1 0
+String 7 0
+RubyToken::TkNL 2 0
+RubyToken::TkIDENTIFIER 2 0
+array 55 129
+string 344 1348
+^C
+
+
+Plain C/C++ allocations (through "malloc") are also supported. We can't report
+the type being allocated, but we can report the object sizes at least. Also,
+print only the top 10 rows by number of bytes allocated:
+
+# ./uobjnew -S 10 c 27245
+Tracing allocations in process 27245 (language: c)... Ctrl-C to quit.
+
+TYPE # ALLOCS # BYTES
+block size 64 22 1408
+block size 992 2 1984
+block size 32 68 2176
+block size 48 48 2304
+block size 944 4 3776
+block size 1104 4 4416
+block size 160 32 5120
+block size 535 15 8025
+block size 128 112 14336
+block size 80 569 45520
+^C
+
+
+USAGE message:
+
+# ./uobjnew -h
+usage: uobjnew.py [-h] [-C TOP_COUNT] [-S TOP_SIZE] [-v]
+ {java,ruby,c} pid [interval]
+
+Summarize object allocations in high-level languages.
+
+positional arguments:
+ {java,ruby,c} language to trace
+ pid process id to attach to
+ interval print every specified number of seconds
+
+optional arguments:
+ -h, --help show this help message and exit
+ -C TOP_COUNT, --top-count TOP_COUNT
+ number of most frequently allocated types to print
+ -S TOP_SIZE, --top-size TOP_SIZE
+ number of largest types by allocated bytes to print
+ -v, --verbose verbose mode: print the BPF program (for debugging
+ purposes)
+
+examples:
+ ./uobjnew java 145 # summarize Java allocations in process 145
+ ./uobjnew c 2020 1 # grab malloc() sizes and print every second
+ ./uobjnew ruby 6712 -C 10 # top 10 Ruby types by number of allocations
+ ./uobjnew ruby 6712 -S 10 # top 10 Ruby types by total size
diff --git a/tools/ustat.py b/tools/ustat.py
new file mode 100755
index 0000000..cc410df
--- /dev/null
+++ b/tools/ustat.py
@@ -0,0 +1,279 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# ustat Activity stats from high-level languages, including exceptions,
+# method calls, class loads, garbage collections, and more.
+# For Linux, uses BCC, eBPF.
+#
+# USAGE: ustat [-l {java,python,ruby,node}] [-C]
+# [-S {cload,excp,gc,method,objnew,thread}] [-r MAXROWS] [-d]
+# [interval [count]]
+#
+# This uses in-kernel eBPF maps to store per process summaries for efficiency.
+# Newly-created processes might only be traced at the next interval, if the
+# relevant USDT probe requires enabling through a semaphore.
+#
+# Copyright 2016 Sasha Goldshtein
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 26-Oct-2016 Sasha Goldshtein Created this.
+
+from __future__ import print_function
+import argparse
+from bcc import BPF, USDT
+import os
+from subprocess import call
+from time import sleep, strftime
+
+class Category(object):
+ THREAD = "THREAD"
+ METHOD = "METHOD"
+ OBJNEW = "OBJNEW"
+ CLOAD = "CLOAD"
+ EXCP = "EXCP"
+ GC = "GC"
+
+class Probe(object):
+ def __init__(self, language, procnames, events):
+ """
+ Initialize a new probe object with a specific language, set of process
+ names to monitor for that language, and a dictionary of events and
+ categories. The dictionary is a mapping of USDT probe names (such as
+ 'gc__start') to event categories supported by this tool -- from the
+ Category class.
+ """
+ self.language = language
+ self.procnames = procnames
+ self.events = events
+
+ def _find_targets(self):
+ """Find pids where the comm is one of the specified list"""
+ self.targets = {}
+ all_pids = [int(pid) for pid in os.listdir('/proc') if pid.isdigit()]
+ for pid in all_pids:
+ try:
+ comm = open('/proc/%d/comm' % pid).read().strip()
+ if comm in self.procnames:
+ cmdline = open('/proc/%d/cmdline' % pid).read()
+ self.targets[pid] = cmdline.replace('\0', ' ')
+ except IOError:
+ continue # process may already have terminated
+
+ def _enable_probes(self):
+ self.usdts = []
+ for pid in self.targets:
+ usdt = USDT(pid=pid)
+ for event in self.events:
+ try:
+ usdt.enable_probe(event, "%s_%s" % (self.language, event))
+ except Exception:
+ # This process might not have a recent version of the USDT
+ # probes enabled, or might have been compiled without USDT
+ # probes at all. The process could even have been shut down
+ # and the pid been recycled. We have to gracefully handle
+ # the possibility that we can't attach probes to it at all.
+ pass
+ self.usdts.append(usdt)
+
+ def _generate_tables(self):
+ text = """
+BPF_HASH(%s_%s_counts, u32, u64); // pid to event count
+ """
+ return str.join('', [text % (self.language, event)
+ for event in self.events])
+
+ def _generate_functions(self):
+ text = """
+int %s_%s(void *ctx) {
+ u64 *valp, zero = 0;
+ u32 tgid = bpf_get_current_pid_tgid() >> 32;
+ valp = %s_%s_counts.lookup_or_init(&tgid, &zero);
+ ++(*valp);
+ return 0;
+}
+ """
+ lang = self.language
+ return str.join('', [text % (lang, event, lang, event)
+ for event in self.events])
+
+ def get_program(self):
+ self._find_targets()
+ self._enable_probes()
+ return self._generate_tables() + self._generate_functions()
+
+ def get_usdts(self):
+ return self.usdts
+
+ def get_counts(self, bpf):
+ """Return a map of event counts per process"""
+ event_dict = dict([(category, 0) for category in self.events.values()])
+ result = dict([(pid, event_dict.copy()) for pid in self.targets])
+ for event, category in self.events.items():
+ counts = bpf["%s_%s_counts" % (self.language, event)]
+ for pid, count in counts.items():
+ result[pid.value][category] = count.value
+ counts.clear()
+ return result
+
+ def cleanup(self):
+ self.usdts = None
+
+class Tool(object):
+ def _parse_args(self):
+ examples = """examples:
+ ./ustat # stats for all languages, 1 second refresh
+ ./ustat -C # don't clear the screen
+ ./ustat -l java # Java processes only
+ ./ustat 5 # 5 second summaries
+ ./ustat 5 10 # 5 second summaries, 10 times only
+ """
+ parser = argparse.ArgumentParser(
+ description="Activity stats from high-level languages.",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+ parser.add_argument("-l", "--language",
+ choices=["java", "python", "ruby", "node"],
+ help="language to trace (default: all languages)")
+ parser.add_argument("-C", "--noclear", action="store_true",
+ help="don't clear the screen")
+ parser.add_argument("-S", "--sort",
+ choices=[cat.lower() for cat in dir(Category) if cat.isupper()],
+ help="sort by this field (descending order)")
+ parser.add_argument("-r", "--maxrows", default=20, type=int,
+ help="maximum rows to print, default 20")
+ parser.add_argument("-d", "--debug", action="store_true",
+ help="Print the resulting BPF program (for debugging purposes)")
+ parser.add_argument("interval", nargs="?", default=1, type=int,
+ help="output interval, in seconds")
+ parser.add_argument("count", nargs="?", default=99999999, type=int,
+ help="number of outputs")
+ self.args = parser.parse_args()
+
+ def _create_probes(self):
+ probes_by_lang = {
+ "node": Probe("node", ["node"], {
+ "gc__start": Category.GC
+ }),
+ "python": Probe("python", ["python"], {
+ "function__entry": Category.METHOD,
+ "gc__start": Category.GC
+ }),
+ "ruby": Probe("ruby", ["ruby", "irb"], {
+ "method__entry": Category.METHOD,
+ "cmethod__entry": Category.METHOD,
+ "gc__mark__begin": Category.GC,
+ "gc__sweep__begin": Category.GC,
+ "object__create": Category.OBJNEW,
+ "hash__create": Category.OBJNEW,
+ "string__create": Category.OBJNEW,
+ "array__create": Category.OBJNEW,
+ "require__entry": Category.CLOAD,
+ "load__entry": Category.CLOAD,
+ "raise": Category.EXCP
+ }),
+ "java": Probe("java", ["java"], {
+ "gc__begin": Category.GC,
+ "mem__pool__gc__begin": Category.GC,
+ "thread__start": Category.THREAD,
+ "class__loaded": Category.CLOAD,
+ "object__alloc": Category.OBJNEW,
+ "method__entry": Category.METHOD,
+ "ExceptionOccurred__entry": Category.EXCP
+ })
+ }
+
+ if self.args.language:
+ self.probes = [probes_by_lang[self.args.language]]
+ else:
+ self.probes = probes_by_lang.values()
+
+ def _attach_probes(self):
+ program = str.join('\n', [p.get_program() for p in self.probes])
+ if self.args.debug:
+ print(program)
+ for probe in self.probes:
+ print("Attached to %s processes:" % probe.language,
+ str.join(', ', map(str, probe.targets)))
+ self.bpf = BPF(text=program)
+ usdts = [usdt for probe in self.probes for usdt in probe.get_usdts()]
+ # Filter out duplicates when we have multiple processes with the same
+ # uprobe. We are attaching to these probes manually instead of using
+ # the USDT support from the bcc module, because the USDT class attaches
+ # to each uprobe with a specific pid. When there is more than one
+ # process from some language, we end up attaching more than once to the
+ # same uprobe (albeit with different pids), which is not allowed.
+ # Instead, we use a global attach (with pid=-1).
+ uprobes = set([(path, func, addr) for usdt in usdts
+ for (path, func, addr, _)
+ in usdt.enumerate_active_probes()])
+ for (path, func, addr) in uprobes:
+ self.bpf.attach_uprobe(name=path, fn_name=func, addr=addr, pid=-1)
+
+ def _detach_probes(self):
+ for probe in self.probes:
+ probe.cleanup() # Cleans up USDT contexts
+ self.bpf.cleanup() # Cleans up all attached probes
+ self.bpf = None
+
+ def _loop_iter(self):
+ self._attach_probes()
+ try:
+ sleep(self.args.interval)
+ except KeyboardInterrupt:
+ self.exiting = True
+
+ if not self.args.noclear:
+ call("clear")
+ else:
+ print()
+ with open("/proc/loadavg") as stats:
+ print("%-8s loadavg: %s" % (strftime("%H:%M:%S"), stats.read()))
+ print("%-6s %-20s %-10s %-6s %-10s %-8s %-6s %-6s" % (
+ "PID", "CMDLINE", "METHOD/s", "GC/s", "OBJNEW/s",
+ "CLOAD/s", "EXC/s", "THR/s"))
+
+ line = 0
+ counts = {}
+ targets = {}
+ for probe in self.probes:
+ counts.update(probe.get_counts(self.bpf))
+ targets.update(probe.targets)
+ if self.args.sort:
+ counts = sorted(counts.items(), key=lambda (_, v):
+ -v.get(self.args.sort.upper(), 0))
+ else:
+ counts = sorted(counts.items(), key=lambda (k, _): k)
+ for pid, stats in counts:
+ print("%-6d %-20s %-10d %-6d %-10d %-8d %-6d %-6d" % (
+ pid, targets[pid][:20],
+ stats.get(Category.METHOD, 0) / self.args.interval,
+ stats.get(Category.GC, 0) / self.args.interval,
+ stats.get(Category.OBJNEW, 0) / self.args.interval,
+ stats.get(Category.CLOAD, 0) / self.args.interval,
+ stats.get(Category.EXCP, 0) / self.args.interval,
+ stats.get(Category.THREAD, 0) / self.args.interval
+ ))
+ line += 1
+ if line >= self.args.maxrows:
+ break
+ self._detach_probes()
+
+ def run(self):
+ self._parse_args()
+ self._create_probes()
+ print('Tracing... Output every %d secs. Hit Ctrl-C to end' %
+ self.args.interval)
+ countdown = self.args.count
+ self.exiting = False
+ while True:
+ self._loop_iter()
+ countdown -= 1
+ if self.exiting or countdown == 0:
+ print("Detaching...")
+ exit()
+
+if __name__ == "__main__":
+ try:
+ Tool().run()
+ except KeyboardInterrupt:
+ pass
diff --git a/tools/ustat_example.txt b/tools/ustat_example.txt
new file mode 100644
index 0000000..7da01e6
--- /dev/null
+++ b/tools/ustat_example.txt
@@ -0,0 +1,78 @@
+Demonstrations of ustat.
+
+
+ustat is a "top"-like tool for monitoring events in high-level languages. It
+prints statistics about garbage collections, method calls, object allocations,
+and various other events for every process that it recognizes with a Java,
+Python, Ruby, or Node runtime.
+
+For example:
+
+# ./ustat.py
+Tracing... Output every 10 secs. Hit Ctrl-C to end
+12:17:17 loadavg: 0.33 0.08 0.02 5/211 26284
+
+PID CMDLINE METHOD/s GC/s OBJNEW/s CLOAD/s EXC/s THR/s
+3018 node/node 0 3 0 0 0 0
+^C
+Detaching...
+
+
+If desired, you can instruct ustat to print a certain number of entries and
+exit, which can be useful to get a quick picture on what's happening on the
+system over a short time interval. Here, we ask ustat to print 5-second
+summaries 12 times (for a total time of 1 minute):
+
+# ./ustat.py -C 5 12
+Tracing... Output every 5 secs. Hit Ctrl-C to end
+12:18:26 loadavg: 0.27 0.11 0.04 2/336 26455
+
+PID CMDLINE METHOD/s GC/s OBJNEW/s CLOAD/s EXC/s THR/s
+3018 node/node 0 1 0 0 0 0
+
+12:18:31 loadavg: 0.33 0.12 0.04 2/336 26456
+
+PID CMDLINE METHOD/s GC/s OBJNEW/s CLOAD/s EXC/s THR/s
+3018 node/node 0 0 0 0 0 0
+26439 java -XX:+ExtendedDT 2776045 0 0 0 0 0
+
+12:18:37 loadavg: 0.38 0.14 0.05 2/336 26457
+
+PID CMDLINE METHOD/s GC/s OBJNEW/s CLOAD/s EXC/s THR/s
+3018 node/node 0 0 0 0 0 0
+26439 java -XX:+ExtendedDT 2804378 0 0 0 0 0
+
+(...more output omitted for brevity)
+
+
+USAGE message:
+
+# ./ustat.py -h
+usage: ustat.py [-h] [-l {java,python,ruby,node}] [-C]
+ [-S {cload,excp,gc,method,objnew,thread}] [-r MAXROWS] [-d]
+ [interval] [count]
+
+Activity stats from high-level languages.
+
+positional arguments:
+ interval output interval, in seconds
+ count number of outputs
+
+optional arguments:
+ -h, --help show this help message and exit
+ -l {java,python,ruby,node}, --language {java,python,ruby,node}
+ language to trace (default: all languages)
+ -C, --noclear don't clear the screen
+ -S {cload,excp,gc,method,objnew,thread}, --sort {cload,excp,gc,method,objnew,thread}
+ sort by this field (descending order)
+ -r MAXROWS, --maxrows MAXROWS
+ maximum rows to print, default 20
+ -d, --debug Print the resulting BPF program (for debugging
+ purposes)
+
+examples:
+ ./ustat # stats for all languages, 1 second refresh
+ ./ustat -C # don't clear the screen
+ ./ustat -l java # Java processes only
+ ./ustat 5 # 5 second summaries
+ ./ustat 5 10 # 5 second summaries, 10 times only
diff --git a/tools/uthreads.py b/tools/uthreads.py
new file mode 100755
index 0000000..4f089d4
--- /dev/null
+++ b/tools/uthreads.py
@@ -0,0 +1,116 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# uthreads Trace thread creation/destruction events in high-level languages.
+# For Linux, uses BCC, eBPF.
+#
+# USAGE: uthreads [-l {java}] [-v] pid
+#
+# Copyright 2016 Sasha Goldshtein
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 25-Oct-2016 Sasha Goldshtein Created this.
+
+from __future__ import print_function
+import argparse
+from bcc import BPF, USDT
+import ctypes as ct
+import time
+
+examples = """examples:
+ ./uthreads -l java 185 # trace Java threads in process 185
+ ./uthreads 12245 # trace only pthreads in process 12245
+"""
+parser = argparse.ArgumentParser(
+ description="Trace thread creation/destruction events in " +
+ "high-level languages.",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("-l", "--language", choices=["java"],
+ help="language to trace (none for pthreads only)")
+parser.add_argument("pid", type=int, help="process id to attach to")
+parser.add_argument("-v", "--verbose", action="store_true",
+ help="verbose mode: print the BPF program (for debugging purposes)")
+args = parser.parse_args()
+
+usdt = USDT(pid=args.pid)
+
+program = """
+struct thread_event_t {
+ u64 runtime_id;
+ u64 native_id;
+ char type[8];
+ char name[80];
+};
+
+BPF_PERF_OUTPUT(threads);
+
+int trace_pthread(struct pt_regs *ctx) {
+ struct thread_event_t te = {};
+ u64 start_routine = 0;
+ char type[] = "pthread";
+ te.native_id = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
+ bpf_usdt_readarg(2, ctx, &start_routine);
+ te.runtime_id = start_routine; // This is really a function pointer
+ __builtin_memcpy(&te.type, type, sizeof(te.type));
+ threads.perf_submit(ctx, &te, sizeof(te));
+ return 0;
+}
+"""
+usdt.enable_probe("pthread_start", "trace_pthread")
+
+if args.language == "java":
+ template = """
+int %s(struct pt_regs *ctx) {
+ char type[] = "%s";
+ struct thread_event_t te = {};
+ u64 nameptr = 0, id = 0, native_id = 0;
+ bpf_usdt_readarg(1, ctx, &nameptr);
+ bpf_usdt_readarg(3, ctx, &id);
+ bpf_usdt_readarg(4, ctx, &native_id);
+ bpf_probe_read(&te.name, sizeof(te.name), (void *)nameptr);
+ te.runtime_id = id;
+ te.native_id = native_id;
+ __builtin_memcpy(&te.type, type, sizeof(te.type));
+ threads.perf_submit(ctx, &te, sizeof(te));
+ return 0;
+}
+ """
+ program += template % ("trace_start", "start")
+ program += template % ("trace_stop", "stop")
+ usdt.enable_probe("thread__start", "trace_start")
+ usdt.enable_probe("thread__stop", "trace_stop")
+
+if args.verbose:
+ print(usdt.get_text())
+ print(program)
+
+bpf = BPF(text=program, usdt_contexts=[usdt])
+print("Tracing thread events in process %d (language: %s)... Ctrl-C to quit." %
+ (args.pid, args.language or "none"))
+print("%-8s %-16s %-8s %-30s" % ("TIME", "ID", "TYPE", "DESCRIPTION"))
+
+class ThreadEvent(ct.Structure):
+ _fields_ = [
+ ("runtime_id", ct.c_ulonglong),
+ ("native_id", ct.c_ulonglong),
+ ("type", ct.c_char * 8),
+ ("name", ct.c_char * 80),
+ ]
+
+start_ts = time.time()
+
+def print_event(cpu, data, size):
+ event = ct.cast(data, ct.POINTER(ThreadEvent)).contents
+ name = event.name
+ if event.type == "pthread":
+ name = bpf.sym(event.runtime_id, args.pid)
+ tid = event.native_id
+ else:
+ tid = "R=%s/N=%s" % (event.runtime_id, event.native_id)
+ print("%-8.3f %-16s %-8s %-30s" % (
+ time.time() - start_ts, tid, event.type, name))
+
+bpf["threads"].open_perf_buffer(print_event)
+while 1:
+ bpf.kprobe_poll()
diff --git a/tools/uthreads_example.txt b/tools/uthreads_example.txt
new file mode 100644
index 0000000..664b341
--- /dev/null
+++ b/tools/uthreads_example.txt
@@ -0,0 +1,58 @@
+Demonstrations of uthreads.
+
+
+uthreads traces thread creation events in Java or raw pthreads, and prints
+details about the newly created thread. For Java threads, the thread name is
+printed; for pthreads, the thread's start function is printed, if there is
+symbol information to resolve it.
+
+For example, trace all Java thread creation events:
+
+# ./uthreads -l java 27420
+Tracing thread events in process 27420 (language: java)... Ctrl-C to quit.
+TIME ID TYPE DESCRIPTION
+18.596 R=9/N=0 start SIGINT handler
+18.596 R=4/N=0 stop Signal Dispatcher
+^C
+
+The ID column in the preceding output shows the thread's runtime ID and native
+ID, when available. The accuracy of this information depends on the Java
+runtime.
+
+
+Next, trace only pthread creation events in some native application:
+
+# ./uthreads 27450
+Tracing thread events in process 27450 (language: none)... Ctrl-C to quit.
+TIME ID TYPE DESCRIPTION
+0.924 27462 pthread primes_thread
+0.927 27463 pthread primes_thread
+0.928 27464 pthread primes_thread
+0.928 27465 pthread primes_thread
+^C
+
+The thread name ("primes_thread" in this example) is resolved from debuginfo.
+If symbol information is not present, the thread's start address is printed
+instead.
+
+
+USAGE message:
+
+# ./uthreads -h
+usage: uthreads.py [-h] [-l {java}] [-v] pid
+
+Trace thread creation/destruction events in high-level languages.
+
+positional arguments:
+ pid process id to attach to
+
+optional arguments:
+ -h, --help show this help message and exit
+ -l {java}, --language {java}
+ language to trace (none for pthreads only)
+ -v, --verbose verbose mode: print the BPF program (for debugging
+ purposes)
+
+examples:
+ ./uthreads -l java 185 # trace Java threads in process 185
+ ./uthreads 12245 # trace only pthreads in process 12245