runqlat
diff --git a/tools/runqlat.py b/tools/runqlat.py
new file mode 100755
index 0000000..558981d
--- /dev/null
+++ b/tools/runqlat.py
@@ -0,0 +1,167 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# runqlat   Run queue (scheduler) latency as a histogram.
+#           For Linux, uses BCC, eBPF.
+#
+# USAGE: runqlat [-h] [-T] [-m] [-P] [-p PID] [interval] [count]
+#
+# This measures the time a task spends waiting on a run queue for a turn
+# on-CPU, and shows this time as a histogram. This time should be small, but a
+# task may need to wait its turn due to CPU load.
+#
+# This measures two types of run queue latency:
+# 1. The time from a task being enqueued on a run queue to its context switch
+#    and execution. This traces enqueue_task_*() -> finish_task_switch(),
+#    and instruments the run queue latency after a voluntary context switch.
+# 2. The time from when a task was involuntary context switched and still
+#    in the runnable state, to when it next executed. This is instrumented
+#    from finish_task_switch() alone.
+#
+# Copyright 2016 Netflix, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 07-Feb-2016   Brendan Gregg   Created this.
+
+from __future__ import print_function
+from bcc import BPF
+from time import sleep, strftime
+import argparse
+
+# arguments
+examples = """examples:
+    ./runqlat            # summarize run queue latency as a histogram
+    ./runqlat 1 10       # print 1 second summaries, 10 times
+    ./runqlat -mT 1      # 1s summaries, milliseconds, and timestamps
+    ./runqlat -P         # show each PID separately
+    ./runqlat -p 185     # trace PID 185 only
+"""
+parser = argparse.ArgumentParser(
+    description="Summarize run queue (schedular) latency as a histogram",
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    epilog=examples)
+parser.add_argument("-T", "--timestamp", action="store_true",
+    help="include timestamp on output")
+parser.add_argument("-m", "--milliseconds", action="store_true",
+    help="millisecond histogram")
+parser.add_argument("-P", "--pids", action="store_true",
+    help="print a histogram per process ID")
+parser.add_argument("-p", "--pid",
+    help="trace this PID only")
+parser.add_argument("interval", nargs="?", default=99999999,
+    help="output interval, in seconds")
+parser.add_argument("count", nargs="?", default=99999999,
+    help="number of outputs")
+args = parser.parse_args()
+countdown = int(args.count)
+debug = 0
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include "kernel/sched/sched.h"
+
+typedef struct pid_key {
+    u64 pid;    // work around
+    u64 slot;
+} pid_key_t;
+BPF_HASH(start, u32);
+STORAGE
+
+// record enqueue timestamp
+int trace_enqueue(struct pt_regs *ctx, struct rq *rq, struct task_struct *p,
+    int flags)
+{
+    u32 pid = p->pid;
+    if (FILTER)
+        return 0;
+    u64 ts = bpf_ktime_get_ns();
+    start.update(&pid, &ts);
+    return 0;
+}
+
+// calculate latency
+int trace_run(struct pt_regs *ctx, struct task_struct *prev)
+{
+    u32 pid;
+
+    // ivcsw: treat like an enqueue event and store timestamp
+    if (prev->state == TASK_RUNNING) {
+        pid = prev->pid;
+        if (!(FILTER)) {
+            u64 ts = bpf_ktime_get_ns();
+            start.update(&pid, &ts);
+        }
+    }
+
+    pid = bpf_get_current_pid_tgid();
+    if (FILTER)
+        return 0;
+    u64 *tsp, delta;
+
+    // fetch timestamp and calculate delta
+    tsp = start.lookup(&pid);
+    if (tsp == 0) {
+        return 0;   // missed enqueue
+    }
+    delta = bpf_ktime_get_ns() - *tsp;
+    FACTOR
+
+    // store as histogram
+    STORE
+
+    start.delete(&pid);
+    return 0;
+}
+"""
+
+# code substitutions
+if args.pid:
+    bpf_text = bpf_text.replace('FILTER', 'pid != %s' % args.pid)
+else:
+    bpf_text = bpf_text.replace('FILTER', '0')
+if args.milliseconds:
+    bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000000;')
+    label = "msecs"
+else:
+    bpf_text = bpf_text.replace('FACTOR', 'delta /= 1000;')
+    label = "usecs"
+if args.pids:
+    bpf_text = bpf_text.replace('STORAGE',
+        'BPF_HISTOGRAM(dist, pid_key_t);')
+    bpf_text = bpf_text.replace('STORE',
+        'pid_key_t key = {.pid = pid, .slot = bpf_log2l(delta)}; ' +
+        'dist.increment(key);')
+else:
+    bpf_text = bpf_text.replace('STORAGE', 'BPF_HISTOGRAM(dist);')
+    bpf_text = bpf_text.replace('STORE',
+        'dist.increment(bpf_log2l(delta));')
+if debug:
+    print(bpf_text)
+
+# load BPF program
+b = BPF(text=bpf_text)
+b.attach_kprobe(event_re="enqueue_task_*", fn_name="trace_enqueue")
+b.attach_kprobe(event="finish_task_switch", fn_name="trace_run")
+
+print("Tracing run queue latency... Hit Ctrl-C to end.")
+
+# output
+exiting = 0 if args.interval else 1
+dist = b.get_table("dist")
+while (1):
+    try:
+        sleep(int(args.interval))
+    except KeyboardInterrupt:
+        exiting = 1
+
+    print()
+    if args.timestamp:
+        print("%-8s\n" % strftime("%H:%M:%S"), end="")
+
+    dist.print_log2_hist(label, "pid", section_print_fn=int)
+    dist.clear()
+
+    countdown -= 1
+    if exiting or countdown == 0:
+        exit()
diff --git a/tools/runqlat_example.txt b/tools/runqlat_example.txt
new file mode 100644
index 0000000..9999dd3
--- /dev/null
+++ b/tools/runqlat_example.txt
@@ -0,0 +1,486 @@
+Demonstrations of runqlat, the Linux eBPF/bcc version.
+
+
+This program summarizes scheduler run queue latency as a histogram, showing
+how long tasks spent waiting their turn to run on-CPU.
+
+Here is a heavily loaded system:
+
+# ./runqlat 
+Tracing run queue latency... Hit Ctrl-C to end.
+^C
+     usecs               : count     distribution
+         0 -> 1          : 233      |***********                             |
+         2 -> 3          : 742      |************************************    |
+         4 -> 7          : 203      |**********                              |
+         8 -> 15         : 173      |********                                |
+        16 -> 31         : 24       |*                                       |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 30       |*                                       |
+       128 -> 255        : 6        |                                        |
+       256 -> 511        : 3        |                                        |
+       512 -> 1023       : 5        |                                        |
+      1024 -> 2047       : 27       |*                                       |
+      2048 -> 4095       : 30       |*                                       |
+      4096 -> 8191       : 20       |                                        |
+      8192 -> 16383      : 29       |*                                       |
+     16384 -> 32767      : 809      |****************************************|
+     32768 -> 65535      : 64       |***                                     |
+
+The distribution is bimodal, with one mode between 0 and 15 microseconds,
+and another between 16 and 65 milliseconds. These modes are visible as the
+spikes in the ASCII distribution (which is merely a visual representation
+of the "count" column). As an example of reading one line: 809 events fell
+into the 16384 to 32767 microsecond range (16 to 32 ms) while tracing.
+
+I would expect the two modes to be due the workload: 16 hot CPU-bound threads,
+and many other mostly idle threads doing occasional work. I suspect the mostly
+idle threads will run with a higher priority when they wake up, and are
+the reason for the low latency mode. The high latency mode will be the
+CPU-bound threads. More analysis with this and other tools can confirm.
+
+
+A -m option can be used to show milliseconds instead, as well as an interval
+and a count. For example, showing three x five second summary in milliseconds:
+
+# ./runqlat -m 5 3
+Tracing run queue latency... Hit Ctrl-C to end.
+
+     msecs               : count     distribution
+         0 -> 1          : 3818     |****************************************|
+         2 -> 3          : 39       |                                        |
+         4 -> 7          : 39       |                                        |
+         8 -> 15         : 62       |                                        |
+        16 -> 31         : 2214     |***********************                 |
+        32 -> 63         : 226      |**                                      |
+
+     msecs               : count     distribution
+         0 -> 1          : 3775     |****************************************|
+         2 -> 3          : 52       |                                        |
+         4 -> 7          : 37       |                                        |
+         8 -> 15         : 65       |                                        |
+        16 -> 31         : 2230     |***********************                 |
+        32 -> 63         : 212      |**                                      |
+
+     msecs               : count     distribution
+         0 -> 1          : 3816     |****************************************|
+         2 -> 3          : 49       |                                        |
+         4 -> 7          : 40       |                                        |
+         8 -> 15         : 53       |                                        |
+        16 -> 31         : 2228     |***********************                 |
+        32 -> 63         : 221      |**                                      |
+
+This shows a similar distribution across the three summaries.
+
+
+A -p option can be used to show one PID only, which is filtered in kernel for
+efficiency. For example, PID 4505, and one second summaries:
+
+# ./runqlat -mp 4505 1
+Tracing run queue latency... Hit Ctrl-C to end.
+
+     msecs               : count     distribution
+         0 -> 1          : 1        |*                                       |
+         2 -> 3          : 2        |***                                     |
+         4 -> 7          : 1        |*                                       |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 25       |****************************************|
+        32 -> 63         : 3        |****                                    |
+
+     msecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 2        |**                                      |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 1        |*                                       |
+        16 -> 31         : 30       |****************************************|
+        32 -> 63         : 1        |*                                       |
+
+     msecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 28       |****************************************|
+        32 -> 63         : 2        |**                                      |
+
+     msecs               : count     distribution
+         0 -> 1          : 1        |*                                       |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 27       |****************************************|
+        32 -> 63         : 4        |*****                                   |
+[...]
+
+For comparison, here is pidstat(1) for that process:
+
+# pidstat -p 4505 1
+Linux 4.4.0-virtual (bgregg-xxxxxxxx) 	02/08/2016 	_x86_64_	(8 CPU)
+
+08:56:11 AM   UID       PID    %usr %system  %guest    %CPU   CPU  Command
+08:56:12 AM     0      4505    9.00    3.00    0.00   12.00     0  bash
+08:56:13 AM     0      4505    7.00    5.00    0.00   12.00     0  bash
+08:56:14 AM     0      4505   10.00    2.00    0.00   12.00     0  bash
+08:56:15 AM     0      4505   11.00    2.00    0.00   13.00     0  bash
+08:56:16 AM     0      4505    9.00    3.00    0.00   12.00     0  bash
+[...]
+
+This is a synthetic workload that is CPU bound. It's only spending 12% on-CPU
+each second because of high CPU demand on this server: the remaining time
+is spent waiting on a run queue, as visualized by runqlat.
+
+
+Here is the same system, but when it is CPU idle:
+
+# ./runqlat 5 1
+Tracing run queue latency... Hit Ctrl-C to end.
+
+     usecs               : count     distribution
+         0 -> 1          : 2250     |********************************        |
+         2 -> 3          : 2340     |**********************************      |
+         4 -> 7          : 2746     |****************************************|
+         8 -> 15         : 418      |******                                  |
+        16 -> 31         : 93       |*                                       |
+        32 -> 63         : 28       |                                        |
+        64 -> 127        : 119      |*                                       |
+       128 -> 255        : 9        |                                        |
+       256 -> 511        : 4        |                                        |
+       512 -> 1023       : 20       |                                        |
+      1024 -> 2047       : 22       |                                        |
+      2048 -> 4095       : 5        |                                        |
+      4096 -> 8191       : 2        |                                        |
+
+Back to a microsecond scale, this time there is little run queue latency past 1
+millisecond, as would be expected.
+
+
+Now 16 threads are performing heavy disk I/O:
+
+# ./runqlat 5 1
+Tracing run queue latency... Hit Ctrl-C to end.
+
+     usecs               : count     distribution
+         0 -> 1          : 204      |                                        |
+         2 -> 3          : 944      |*                                       |
+         4 -> 7          : 16315    |*********************                   |
+         8 -> 15         : 29897    |****************************************|
+        16 -> 31         : 1044     |*                                       |
+        32 -> 63         : 23       |                                        |
+        64 -> 127        : 128      |                                        |
+       128 -> 255        : 24       |                                        |
+       256 -> 511        : 5        |                                        |
+       512 -> 1023       : 13       |                                        |
+      1024 -> 2047       : 15       |                                        |
+      2048 -> 4095       : 13       |                                        |
+      4096 -> 8191       : 10       |                                        |
+
+The distribution hasn't changed too much. While the disks are 100% busy, there
+is still plenty of CPU headroom, and threads still don't spend much time
+waiting their turn.
+
+
+A -P option will print a distribution for each PID:
+
+# ./runqlat -P
+Tracing run queue latency... Hit Ctrl-C to end.
+^C
+
+pid = 0
+     usecs               : count     distribution
+         0 -> 1          : 351      |********************************        |
+         2 -> 3          : 96       |********                                |
+         4 -> 7          : 437      |****************************************|
+         8 -> 15         : 12       |*                                       |
+        16 -> 31         : 10       |                                        |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 16       |*                                       |
+       128 -> 255        : 0        |                                        |
+       256 -> 511        : 0        |                                        |
+       512 -> 1023       : 0        |                                        |
+      1024 -> 2047       : 0        |                                        |
+      2048 -> 4095       : 0        |                                        |
+      4096 -> 8191       : 0        |                                        |
+      8192 -> 16383      : 1        |                                        |
+
+pid = 12929
+     usecs               : count     distribution
+         0 -> 1          : 1        |****************************************|
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 1        |****************************************|
+
+pid = 12930
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 1        |****************************************|
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 1        |****************************************|
+
+pid = 12931
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 1        |********************                    |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 0        |                                        |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 0        |                                        |
+       128 -> 255        : 0        |                                        |
+       256 -> 511        : 0        |                                        |
+       512 -> 1023       : 2        |****************************************|
+
+pid = 12932
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 0        |                                        |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 0        |                                        |
+       128 -> 255        : 1        |****************************************|
+       256 -> 511        : 0        |                                        |
+       512 -> 1023       : 1        |****************************************|
+
+pid = 7
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 426      |*************************************   |
+         4 -> 7          : 457      |****************************************|
+         8 -> 15         : 16       |*                                       |
+
+pid = 9
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 425      |****************************************|
+         8 -> 15         : 16       |*                                       |
+
+pid = 11
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 10       |****************************************|
+
+pid = 14
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 8        |****************************************|
+         4 -> 7          : 2        |**********                              |
+
+pid = 18
+     usecs               : count     distribution
+         0 -> 1          : 414      |****************************************|
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 20       |*                                       |
+         8 -> 15         : 8        |                                        |
+
+pid = 12928
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 1        |****************************************|
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 0        |                                        |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 1        |****************************************|
+
+pid = 1867
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 15       |****************************************|
+        16 -> 31         : 1        |**                                      |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 0        |                                        |
+       128 -> 255        : 4        |**********                              |
+
+pid = 1871
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 2        |****************************************|
+        16 -> 31         : 0        |                                        |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 0        |                                        |
+       128 -> 255        : 0        |                                        |
+       256 -> 511        : 0        |                                        |
+       512 -> 1023       : 1        |********************                    |
+
+pid = 1876
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 1        |****************************************|
+        16 -> 31         : 0        |                                        |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 0        |                                        |
+       128 -> 255        : 0        |                                        |
+       256 -> 511        : 1        |****************************************|
+
+pid = 1878
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 3        |****************************************|
+
+pid = 1880
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 3        |****************************************|
+
+pid = 9307
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 1        |****************************************|
+
+pid = 1886
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 1        |********************                    |
+         8 -> 15         : 2        |****************************************|
+
+pid = 1888
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 3        |****************************************|
+
+pid = 3297
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 1        |****************************************|
+
+pid = 1892
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 1        |********************                    |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 0        |                                        |
+       128 -> 255        : 0        |                                        |
+       256 -> 511        : 0        |                                        |
+       512 -> 1023       : 2        |****************************************|
+
+pid = 7024
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 4        |****************************************|
+
+pid = 16468
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 3        |****************************************|
+
+pid = 12922
+     usecs               : count     distribution
+         0 -> 1          : 1        |****************************************|
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 1        |****************************************|
+        16 -> 31         : 1        |****************************************|
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 1        |****************************************|
+
+pid = 12923
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 1        |********************                    |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 0        |                                        |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 2        |****************************************|
+       128 -> 255        : 0        |                                        |
+       256 -> 511        : 0        |                                        |
+       512 -> 1023       : 1        |********************                    |
+      1024 -> 2047       : 1        |********************                    |
+
+pid = 12924
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 2        |********************                    |
+         8 -> 15         : 4        |****************************************|
+        16 -> 31         : 1        |**********                              |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 0        |                                        |
+       128 -> 255        : 0        |                                        |
+       256 -> 511        : 0        |                                        |
+       512 -> 1023       : 0        |                                        |
+      1024 -> 2047       : 1        |**********                              |
+
+pid = 12925
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 0        |                                        |
+        16 -> 31         : 0        |                                        |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 1        |****************************************|
+
+pid = 12926
+     usecs               : count     distribution
+         0 -> 1          : 0        |                                        |
+         2 -> 3          : 1        |****************************************|
+         4 -> 7          : 0        |                                        |
+         8 -> 15         : 1        |****************************************|
+        16 -> 31         : 0        |                                        |
+        32 -> 63         : 0        |                                        |
+        64 -> 127        : 0        |                                        |
+       128 -> 255        : 0        |                                        |
+       256 -> 511        : 0        |                                        |
+       512 -> 1023       : 1        |****************************************|
+
+pid = 12927
+     usecs               : count     distribution
+         0 -> 1          : 1        |****************************************|
+         2 -> 3          : 0        |                                        |
+         4 -> 7          : 1        |****************************************|
+
+
+USAGE message:
+
+# ./runqlat -h
+usage: runqlat [-h] [-T] [-m] [-P] [-p PID] [interval] [count]
+
+Summarize run queue (schedular) latency as a histogram
+
+positional arguments:
+  interval            output interval, in seconds
+  count               number of outputs
+
+optional arguments:
+  -h, --help          show this help message and exit
+  -T, --timestamp     include timestamp on output
+  -m, --milliseconds  millisecond histogram
+  -P, --pids          print a histogram per process ID
+  -p PID, --pid PID   trace this PID only
+
+examples:
+    ./runqlat            # summarize run queue latency as a histogram
+    ./runqlat 1 10       # print 1 second summaries, 10 times
+    ./runqlat -mT 1      # 1s summaries, milliseconds, and timestamps
+    ./runqlat -P         # show each PID separately
+    ./runqlat -p 185     # trace PID 185 only