funclatency
diff --git a/tools/funclatency b/tools/funclatency
new file mode 100755
index 0000000..842fea9
--- /dev/null
+++ b/tools/funclatency
@@ -0,0 +1,150 @@
+#!/usr/bin/python
+#
+# funclatency	Time kernel funcitons and print latency as a histogram.
+#		For Linux, uses BCC, eBPF.
+#
+# USAGE: funclatency [-h] [-p PID] [-i INTERVAL] [-T] [-u] [-m] [-r] pattern
+#
+# Run "funclatency -h" for full usage.
+#
+# The pattern is a string with optional '*' wildcards, similar to file globbing.
+# If you'd prefer to use regular expressions, use the -r option. Matching
+# multiple functions is of limited use, since the output has one histogram for
+# everything. Future versions should split the output histogram by the function.
+#
+# Copyright (c) 2015 Brendan Gregg.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 20-Sep-2015	Brendan Gregg	Created this.
+
+from __future__ import print_function
+from bcc import BPF
+from time import sleep, strftime
+import argparse
+import signal
+
+# arguments
+examples = """examples:
+    ./funclatency do_sys_open       # time the do_sys_open() kenel function
+    ./funclatency -u vfs_read       # time vfs_read(), in microseconds
+    ./funclatency -m do_nanosleep   # time do_nanosleep(), in milliseconds
+    ./funclatency -mTi 5 vfs_read   # output every 5 seconds, with timestamps
+    ./funclatency -p 181 vfs_read   # time process 181 only
+    ./funclatency 'vfs_fstat*'      # time both vfs_fstat() and vfs_fstatat()
+"""
+parser = argparse.ArgumentParser(
+	description="Time kernel funcitons and print latency as a histogram",
+	formatter_class=argparse.RawDescriptionHelpFormatter,
+	epilog=examples)
+parser.add_argument("-p", "--pid",
+	help="trace this PID only")
+parser.add_argument("-i", "--interval", default=99999999,
+	help="summary interval, seconds")
+parser.add_argument("-T", "--timestamp", action="store_true",
+	help="include timestamp on output")
+parser.add_argument("-u", "--microseconds", action="store_true",
+	help="microsecond histogram")
+parser.add_argument("-m", "--milliseconds", action="store_true",
+	help="millisecond histogram")
+parser.add_argument("-r", "--regexp", action="store_true",
+	help="use regular expressions. Default is \"*\" wildcards only.")
+parser.add_argument("pattern",
+	help="search expression for kernel functions")
+args = parser.parse_args()
+pattern = args.pattern
+if not args.regexp:
+	pattern = pattern.replace('*', '.*')
+	pattern = '^' + pattern + '$'
+debug = 0
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/blkdev.h>
+
+BPF_TABLE(\"array\", int, u64, dist, 64);
+BPF_HASH(start, u32);
+
+int trace_func_entry(struct pt_regs *ctx)
+{
+	u32 pid = bpf_get_current_pid_tgid();
+	u64 ts = bpf_ktime_get_ns();
+
+	FILTER
+	start.update(&pid, &ts);
+
+	return 0;
+}
+
+int trace_func_return(struct pt_regs *ctx)
+{
+	u64 *tsp, delta;
+	u32 pid = bpf_get_current_pid_tgid();
+
+	// calculate delta time
+	tsp = start.lookup(&pid);
+	if (tsp == 0) {
+		return 0; 	// missed start
+	}
+	start.delete(&pid);
+	delta = bpf_ktime_get_ns() - *tsp;
+	FACTOR
+
+	// store as histogram
+	int index = bpf_log2l(delta);
+	u64 *leaf = dist.lookup(&index);
+	if (leaf) (*leaf)++;
+
+	return 0;
+}
+"""
+if args.pid:
+	bpf_text = bpf_text.replace('FILTER',
+	    'if (pid != %s) { return 0; }' % args.pid)
+else:
+	bpf_text = bpf_text.replace('FILTER', '')
+if args.milliseconds:
+	bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
+	label = "msecs"
+elif args.microseconds:
+	bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;')
+	label = "usecs"
+else:
+	bpf_text = bpf_text.replace('FACTOR', '')
+	label = "nsecs"
+if debug:
+	print(bpf_text)
+
+# signal handler
+def signal_ignore(signal, frame):
+	print()
+
+# load BPF program
+b = BPF(text=bpf_text)
+b.attach_kprobe(event_re=pattern, fn_name="trace_func_entry")
+b.attach_kretprobe(event_re=pattern, fn_name="trace_func_return")
+
+# header
+print("Tracing %s... Hit Ctrl-C to end." % args.pattern)
+
+# output
+exiting = 0 if args.interval else 1
+dist = b.get_table("dist")
+while (1):
+	try:
+		sleep(int(args.interval))
+	except KeyboardInterrupt:
+		exiting=1
+		# as cleanup can take many seconds, trap Ctrl-C:
+		signal.signal(signal.SIGINT, signal_ignore)
+
+	print()
+	if args.timestamp:
+		print("%-8s\n" % strftime("%H:%M:%S"), end="")
+
+	dist.print_log2_hist(label)
+	dist.clear()
+
+	if exiting:
+		print("Detaching...")
+		exit()