funclatency
diff --git a/tools/funclatency b/tools/funclatency
new file mode 100755
index 0000000..842fea9
--- /dev/null
+++ b/tools/funclatency
@@ -0,0 +1,150 @@
+#!/usr/bin/python
+#
+# funclatency Time kernel funcitons and print latency as a histogram.
+# For Linux, uses BCC, eBPF.
+#
+# USAGE: funclatency [-h] [-p PID] [-i INTERVAL] [-T] [-u] [-m] [-r] pattern
+#
+# Run "funclatency -h" for full usage.
+#
+# The pattern is a string with optional '*' wildcards, similar to file globbing.
+# If you'd prefer to use regular expressions, use the -r option. Matching
+# multiple functions is of limited use, since the output has one histogram for
+# everything. Future versions should split the output histogram by the function.
+#
+# Copyright (c) 2015 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 20-Sep-2015 Brendan Gregg Created this.
+
+from __future__ import print_function
+from bcc import BPF
+from time import sleep, strftime
+import argparse
+import signal
+
+# arguments
+examples = """examples:
+ ./funclatency do_sys_open # time the do_sys_open() kenel function
+ ./funclatency -u vfs_read # time vfs_read(), in microseconds
+ ./funclatency -m do_nanosleep # time do_nanosleep(), in milliseconds
+ ./funclatency -mTi 5 vfs_read # output every 5 seconds, with timestamps
+ ./funclatency -p 181 vfs_read # time process 181 only
+ ./funclatency 'vfs_fstat*' # time both vfs_fstat() and vfs_fstatat()
+"""
+parser = argparse.ArgumentParser(
+ description="Time kernel funcitons and print latency as a histogram",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=examples)
+parser.add_argument("-p", "--pid",
+ help="trace this PID only")
+parser.add_argument("-i", "--interval", default=99999999,
+ help="summary interval, seconds")
+parser.add_argument("-T", "--timestamp", action="store_true",
+ help="include timestamp on output")
+parser.add_argument("-u", "--microseconds", action="store_true",
+ help="microsecond histogram")
+parser.add_argument("-m", "--milliseconds", action="store_true",
+ help="millisecond histogram")
+parser.add_argument("-r", "--regexp", action="store_true",
+ help="use regular expressions. Default is \"*\" wildcards only.")
+parser.add_argument("pattern",
+ help="search expression for kernel functions")
+args = parser.parse_args()
+pattern = args.pattern
+if not args.regexp:
+ pattern = pattern.replace('*', '.*')
+ pattern = '^' + pattern + '$'
+debug = 0
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/blkdev.h>
+
+BPF_TABLE(\"array\", int, u64, dist, 64);
+BPF_HASH(start, u32);
+
+int trace_func_entry(struct pt_regs *ctx)
+{
+ u32 pid = bpf_get_current_pid_tgid();
+ u64 ts = bpf_ktime_get_ns();
+
+ FILTER
+ start.update(&pid, &ts);
+
+ return 0;
+}
+
+int trace_func_return(struct pt_regs *ctx)
+{
+ u64 *tsp, delta;
+ u32 pid = bpf_get_current_pid_tgid();
+
+ // calculate delta time
+ tsp = start.lookup(&pid);
+ if (tsp == 0) {
+ return 0; // missed start
+ }
+ start.delete(&pid);
+ delta = bpf_ktime_get_ns() - *tsp;
+ FACTOR
+
+ // store as histogram
+ int index = bpf_log2l(delta);
+ u64 *leaf = dist.lookup(&index);
+ if (leaf) (*leaf)++;
+
+ return 0;
+}
+"""
+if args.pid:
+ bpf_text = bpf_text.replace('FILTER',
+ 'if (pid != %s) { return 0; }' % args.pid)
+else:
+ bpf_text = bpf_text.replace('FILTER', '')
+if args.milliseconds:
+ bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
+ label = "msecs"
+elif args.microseconds:
+ bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;')
+ label = "usecs"
+else:
+ bpf_text = bpf_text.replace('FACTOR', '')
+ label = "nsecs"
+if debug:
+ print(bpf_text)
+
+# signal handler
+def signal_ignore(signal, frame):
+ print()
+
+# load BPF program
+b = BPF(text=bpf_text)
+b.attach_kprobe(event_re=pattern, fn_name="trace_func_entry")
+b.attach_kretprobe(event_re=pattern, fn_name="trace_func_return")
+
+# header
+print("Tracing %s... Hit Ctrl-C to end." % args.pattern)
+
+# output
+exiting = 0 if args.interval else 1
+dist = b.get_table("dist")
+while (1):
+ try:
+ sleep(int(args.interval))
+ except KeyboardInterrupt:
+ exiting=1
+ # as cleanup can take many seconds, trap Ctrl-C:
+ signal.signal(signal.SIGINT, signal_ignore)
+
+ print()
+ if args.timestamp:
+ print("%-8s\n" % strftime("%H:%M:%S"), end="")
+
+ dist.print_log2_hist(label)
+ dist.clear()
+
+ if exiting:
+ print("Detaching...")
+ exit()
diff --git a/tools/funclatency_example.txt b/tools/funclatency_example.txt
new file mode 100644
index 0000000..f618267
--- /dev/null
+++ b/tools/funclatency_example.txt
@@ -0,0 +1,225 @@
+Demonstrations of funclatency, the Linux eBPF/bcc version.
+
+
+Timing the do_sys_open() kernel function until Ctrl-C:
+
+# ./funclatency do_sys_open
+Tracing do_sys_open... Hit Ctrl-C to end.
+^C
+ nsecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 0 | |
+ 8 -> 15 : 0 | |
+ 16 -> 31 : 0 | |
+ 32 -> 63 : 0 | |
+ 64 -> 127 : 0 | |
+ 128 -> 255 : 0 | |
+ 256 -> 511 : 0 | |
+ 512 -> 1023 : 0 | |
+ 1024 -> 2047 : 0 | |
+ 2048 -> 4095 : 124 |**************** |
+ 4096 -> 8191 : 291 |**************************************|
+ 8192 -> 16383 : 36 |**** |
+ 16384 -> 32767 : 16 |** |
+ 32768 -> 65535 : 8 |* |
+ 65536 -> 131071 : 0 | |
+ 131072 -> 262143 : 0 | |
+ 262144 -> 524287 : 0 | |
+ 524288 -> 1048575 : 0 | |
+ 1048576 -> 2097151 : 0 | |
+ 2097152 -> 4194303 : 1 | |
+Detaching...
+
+The output shows a histogram of function latency (call time), measured from when
+the function began executing (was called) to when it finished (returned).
+
+This example output shows that most of the time, do_sys_open() took between
+2048 and 65536 nanoseconds (2 to 65 microseconds). The peak of this distribution
+shows 291 calls of between 4096 and 8191 nanoseconds. There was also one
+occurrance, an outlier, in the 2 to 4 millisecond range.
+
+How this works: the function entry and return are traced using the kernel kprobe
+and kretprobe tracer. Timestamps are collected, the delta time calculated, which
+is the bucketized and stored as an in-kernel histogram for efficiency. The
+histgram is visible in the output: it's the "count" column; everything else is
+decoration. Only the count column is copied to user-level on output. This is an
+efficient way to time kernel functions and examine their latency distribution.
+
+
+Now vfs_read() is traced, and a microseconds histogram printed:
+
+# ./funclatency -u vfs_read
+Tracing vfs_read... Hit Ctrl-C to end.
+^C
+ usecs : count distribution
+ 0 -> 1 : 1143 |**************************************|
+ 2 -> 3 : 420 |************* |
+ 4 -> 7 : 159 |***** |
+ 8 -> 15 : 295 |********* |
+ 16 -> 31 : 25 | |
+ 32 -> 63 : 5 | |
+ 64 -> 127 : 1 | |
+ 128 -> 255 : 0 | |
+ 256 -> 511 : 0 | |
+ 512 -> 1023 : 0 | |
+ 1024 -> 2047 : 1 | |
+ 2048 -> 4095 : 0 | |
+ 4096 -> 8191 : 5 | |
+ 8192 -> 16383 : 0 | |
+ 16384 -> 32767 : 0 | |
+ 32768 -> 65535 : 0 | |
+ 65536 -> 131071 : 7 | |
+ 131072 -> 262143 : 7 | |
+ 262144 -> 524287 : 3 | |
+ 524288 -> 1048575 : 7 | |
+Detaching...
+
+This shows a bimodal distribution. Many vfs_read() calls were faster than 15
+microseconds, however, there was also a small handful between 65 milliseconds
+and 1 second, seen at the bottom of the table. These are likely network reads
+from SSH, waiting on interactive keystrokes.
+
+
+Tracing do_nanosleep() in milliseconds:
+
+# ./funclatency -m do_nanosleep
+Tracing do_nanosleep... Hit Ctrl-C to end.
+^C
+ msecs : count distribution
+ 0 -> 1 : 0 | |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 0 | |
+ 8 -> 15 : 0 | |
+ 16 -> 31 : 0 | |
+ 32 -> 63 : 0 | |
+ 64 -> 127 : 0 | |
+ 128 -> 255 : 0 | |
+ 256 -> 511 : 0 | |
+ 512 -> 1023 : 328 |**************************************|
+ 1024 -> 2047 : 0 | |
+ 2048 -> 4095 : 0 | |
+ 4096 -> 8191 : 32 |*** |
+ 8192 -> 16383 : 0 | |
+ 16384 -> 32767 : 0 | |
+ 32768 -> 65535 : 2 | |
+Detaching...
+
+This looks like it has found threads that are sleeping every 1, 5, and 60
+seconds.
+
+
+An interval can be provided using -i, and timestamps added using -T. For
+example, tracing vfs_read() latency in milliseconds and printing output
+every 5 seconds:
+
+# ./funclatency -mTi 5 vfs_read
+Tracing vfs_read... Hit Ctrl-C to end.
+
+20:10:08
+ msecs : count distribution
+ 0 -> 1 : 1500 |*************************************+|
+ 2 -> 3 : 3 | |
+ 4 -> 7 : 1 | |
+ 8 -> 15 : 2 | |
+ 16 -> 31 : 0 | |
+ 32 -> 63 : 0 | |
+ 64 -> 127 : 4 | |
+ 128 -> 255 : 3 | |
+ 256 -> 511 : 1 | |
+ 512 -> 1023 : 7 | |
+
+20:10:13
+ msecs : count distribution
+ 0 -> 1 : 1251 |*************************************+|
+ 2 -> 3 : 3 | |
+ 4 -> 7 : 2 | |
+ 8 -> 15 : 0 | |
+ 16 -> 31 : 2 | |
+ 32 -> 63 : 3 | |
+ 64 -> 127 : 5 | |
+ 128 -> 255 : 5 | |
+ 256 -> 511 : 3 | |
+ 512 -> 1023 : 6 | |
+ 1024 -> 2047 : 2 | |
+
+20:10:18
+ msecs : count distribution
+ 0 -> 1 : 1265 |*************************************+|
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 5 | |
+ 8 -> 15 : 9 | |
+ 16 -> 31 : 7 | |
+ 32 -> 63 : 1 | |
+ 64 -> 127 : 2 | |
+ 128 -> 255 : 3 | |
+ 256 -> 511 : 5 | |
+ 512 -> 1023 : 5 | |
+ 1024 -> 2047 : 0 | |
+ 2048 -> 4095 : 1 | |
+^C
+20:10:20
+ msecs : count distribution
+ 0 -> 1 : 249 |*************************************+|
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 0 | |
+ 8 -> 15 : 1 | |
+ 16 -> 31 : 0 | |
+ 32 -> 63 : 0 | |
+ 64 -> 127 : 0 | |
+ 128 -> 255 : 0 | |
+ 256 -> 511 : 0 | |
+ 512 -> 1023 : 1 | |
+Detaching...
+
+
+A single process can be traced, which filters in-kernel for efficiency. Here,
+the vfs_read() function is timed as milliseconds for PID 17064, which is a
+bash shell:
+
+# ./funclatency -mp 17064 vfs_read
+Tracing vfs_read... Hit Ctrl-C to end.
+^C
+ msecs : count distribution
+ 0 -> 1 : 1 |** |
+ 2 -> 3 : 0 | |
+ 4 -> 7 : 0 | |
+ 8 -> 15 : 1 |** |
+ 16 -> 31 : 2 |***** |
+ 32 -> 63 : 0 | |
+ 64 -> 127 : 13 |**************************************|
+ 128 -> 255 : 10 |***************************** |
+ 256 -> 511 : 4 |*********** |
+Detaching...
+
+The distribution between 64 and 511 milliseconds shows keystroke latency.
+
+
+USAGE message:
+
+# ./funclatency -h
+usage: funclatency [-h] [-p PID] [-i INTERVAL] [-T] [-u] [-m] [-r] pattern
+
+Time kernel funcitons and print latency as a histogram
+
+positional arguments:
+ pattern search expression for kernel functions
+
+optional arguments:
+ -h, --help show this help message and exit
+ -p PID, --pid PID trace this PID only
+ -i INTERVAL, --interval INTERVAL
+ summary interval, seconds
+ -T, --timestamp include timestamp on output
+ -u, --microseconds microsecond histogram
+ -m, --milliseconds millisecond histogram
+ -r, --regexp use regular expressions. Default is "*" wildcards
+ only.
+
+examples:
+ ./funclatency do_sys_open # time the do_sys_open() kenel function
+ ./funclatency -u vfs_read # time vfs_read(), in microseconds
+ ./funclatency -m do_nanosleep # time do_nanosleep(), in milliseconds
+ ./funclatency -mTi 5 vfs_read # output every 5 seconds, with timestamps
+ ./funclatency -p 181 vfs_read # time process 181 only
+ ./funclatency 'vfs_fstat*' # time both vfs_fstat() and vfs_fstatat()