new tool: capable (#690)

* add new tool: capable

* refactor a little, remove extra bpf_get_current_pid_tgid()
diff --git a/README.md b/README.md
index 5c746a9..1e4f76a 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,7 @@
 - tools/[bitesize](tools/bitesize.py): Show per process I/O size histogram. [Examples](tools/bitesize_example.txt).
 - tools/[btrfsdist](tools/btrfsdist.py): Summarize btrfs operation latency distribution as a histogram. [Examples](tools/btrfsdist_example.txt).
 - tools/[btrfsslower](tools/btrfsslower.py): Trace slow btrfs operations. [Examples](tools/btrfsslower_example.txt).
+- tools/[capable](tools/capable.py): Trace security capability checks. [Examples](tools/capable_example.txt).
 - tools/[cachestat](tools/cachestat.py): Trace page cache hit/miss ratio. [Examples](tools/cachestat_example.txt).
 - tools/[cachetop](tools/cachetop.py): Trace page cache hit/miss ratio by processes. [Examples](tools/cachetop_example.txt).
 - tools/[cpudist](tools/cpudist.py): Summarize on- and off-CPU time per task as a histogram. [Examples](tools/cpudist_example.txt)
diff --git a/man/man8/capable.8 b/man/man8/capable.8
new file mode 100644
index 0000000..c847ff0
--- /dev/null
+++ b/man/man8/capable.8
@@ -0,0 +1,69 @@
+.TH capable 8  "2016-09-13" "USER COMMANDS"
+.SH NAME
+capable \- Trace security capability checks (cap_capable()).
+.SH SYNOPSIS
+.B capable [\-h] [\-v] [\-p PID]
+.SH DESCRIPTION
+This traces security capability checks in the kernel, and prints details for
+each call. This can be useful for general debugging, and also security
+enforcement: determining a white list of capabilities an application needs.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF, bcc.
+.SH OPTIONS
+\-h
+USAGE message.
+.TP
+\-v
+Include non-audit capability checks. These are those deemed not interesting and
+not necessary to audit, such as CAP_SYS_ADMIN checks on memory allocation to
+affect the behavior of overcommit.
+.SH EXAMPLES
+.TP
+Trace all capability checks system-wide:
+#
+.B capable
+.TP
+Trace capability checks for PID 181:
+#
+.B capable \-p 181
+.SH FIELDS
+.TP
+TIME(s)
+Time of capability check: HH:MM:SS.
+.TP
+UID
+User ID.
+.TP
+PID
+Process ID.
+.TP
+COMM
+Process name.
+CAP
+Capability number.
+NAME
+Capability name. See capabilities(7) for descriptions.
+.TP
+AUDIT
+Whether this was an audit event. Use \-v to include non-audit events.
+.SH OVERHEAD
+This adds low-overhead instrumentation to capability checks, which are expected
+to be low frequency, however, that depends on the application. Test in a lab
+environment before use.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+capabilities(7)
diff --git a/tools/capable.py b/tools/capable.py
new file mode 100755
index 0000000..defeab9
--- /dev/null
+++ b/tools/capable.py
@@ -0,0 +1,156 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# capable   Trace security capabilitiy checks (cap_capable()).
+#           For Linux, uses BCC, eBPF. Embedded C.
+#
+# USAGE: capable [-h] [-v] [-p PID]
+#
+# ToDo: add -s for kernel stacks.
+#
+# Copyright 2016 Netflix, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 13-Sep-2016   Brendan Gregg   Created this.
+
+from __future__ import print_function
+from bcc import BPF
+import argparse
+from time import strftime
+import ctypes as ct
+
+# arguments
+examples = """examples:
+    ./capable             # trace capability checks
+    ./capable -v          # verbose: include non-audit checks
+    ./capable -p 181      # only trace PID 181
+"""
+parser = argparse.ArgumentParser(
+    description="Trace security capability checks",
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    epilog=examples)
+parser.add_argument("-v", "--verbose", action="store_true",
+    help="include non-audit checks")
+parser.add_argument("-p", "--pid",
+    help="trace this PID only")
+args = parser.parse_args()
+debug = 0
+
+# capabilities to names, generated from (and will need updating):
+# awk '/^#define.CAP_.*[0-9]$/ { print "    " $3 ": \"" $2 "\"," }' \
+#     include/uapi/linux/capability.h
+capabilities = {
+    0: "CAP_CHOWN",
+    1: "CAP_DAC_OVERRIDE",
+    2: "CAP_DAC_READ_SEARCH",
+    3: "CAP_FOWNER",
+    4: "CAP_FSETID",
+    5: "CAP_KILL",
+    6: "CAP_SETGID",
+    7: "CAP_SETUID",
+    8: "CAP_SETPCAP",
+    9: "CAP_LINUX_IMMUTABLE",
+    10: "CAP_NET_BIND_SERVICE",
+    11: "CAP_NET_BROADCAST",
+    12: "CAP_NET_ADMIN",
+    13: "CAP_NET_RAW",
+    14: "CAP_IPC_LOCK",
+    15: "CAP_IPC_OWNER",
+    16: "CAP_SYS_MODULE",
+    17: "CAP_SYS_RAWIO",
+    18: "CAP_SYS_CHROOT",
+    19: "CAP_SYS_PTRACE",
+    20: "CAP_SYS_PACCT",
+    21: "CAP_SYS_ADMIN",
+    22: "CAP_SYS_BOOT",
+    23: "CAP_SYS_NICE",
+    24: "CAP_SYS_RESOURCE",
+    25: "CAP_SYS_TIME",
+    26: "CAP_SYS_TTY_CONFIG",
+    27: "CAP_MKNOD",
+    28: "CAP_LEASE",
+    29: "CAP_AUDIT_WRITE",
+    30: "CAP_AUDIT_CONTROL",
+    31: "CAP_SETFCAP",
+    32: "CAP_MAC_OVERRIDE",
+    33: "CAP_MAC_ADMIN",
+    34: "CAP_SYSLOG",
+    35: "CAP_WAKE_ALARM",
+    36: "CAP_BLOCK_SUSPEND",
+    37: "CAP_AUDIT_READ",
+}
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/sched.h>
+
+struct data_t {
+   // switch to u32s when supported
+   u64 pid;
+   u64 uid;
+   int cap;
+   int audit;
+   char comm[TASK_COMM_LEN];
+};
+
+BPF_PERF_OUTPUT(events);
+
+int kprobe__cap_capable(struct pt_regs *ctx, const struct cred *cred,
+    struct user_namespace *targ_ns, int cap, int audit)
+{
+    u32 pid = bpf_get_current_pid_tgid();
+    FILTER1
+    FILTER2
+
+    u32 uid = bpf_get_current_uid_gid();
+    struct data_t data = {.pid = pid, .uid = uid, .cap = cap, .audit = audit};
+    bpf_get_current_comm(&data.comm, sizeof(data.comm));
+    events.perf_submit(ctx, &data, sizeof(data));
+
+    return 0;
+};
+"""
+if args.pid:
+    bpf_text = bpf_text.replace('FILTER1',
+        'if (pid != %s) { return 0; }' % args.pid)
+if not args.verbose:
+    bpf_text = bpf_text.replace('FILTER2', 'if (audit == 0) { return 0; }')
+bpf_text = bpf_text.replace('FILTER1', '')
+bpf_text = bpf_text.replace('FILTER2', '')
+if debug:
+    print(bpf_text)
+
+# initialize BPF
+b = BPF(text=bpf_text)
+
+TASK_COMM_LEN = 16    # linux/sched.h
+
+class Data(ct.Structure):
+    _fields_ = [
+        ("pid", ct.c_ulonglong),
+        ("uid", ct.c_ulonglong),
+        ("cap", ct.c_int),
+        ("audit", ct.c_int),
+        ("comm", ct.c_char * TASK_COMM_LEN)
+    ]
+
+# header
+print("%-9s %-6s %-6s %-16s %-4s %-20s %s" % (
+    "TIME", "UID", "PID", "COMM", "CAP", "NAME", "AUDIT"))
+
+# process event
+def print_event(cpu, data, size):
+    event = ct.cast(data, ct.POINTER(Data)).contents
+
+    if event.cap in capabilities:
+        name = capabilities[event.cap]
+    else:
+        name = "?"
+    print("%-9s %-6d %-6d %-16s %-4d %-20s %d" % (strftime("%H:%M:%S"),
+        event.uid, event.pid, event.comm, event.cap, name, event.audit))
+
+# loop with callback to print_event
+b["events"].open_perf_buffer(print_event)
+while 1:
+    b.kprobe_poll()
diff --git a/tools/capable_example.txt b/tools/capable_example.txt
new file mode 100644
index 0000000..0a63765
--- /dev/null
+++ b/tools/capable_example.txt
@@ -0,0 +1,79 @@
+Demonstrations of capable, the Linux eBPF/bcc version.
+
+
+capable traces calls to the kernel cap_capable() function, which does security
+capability checks, and prints details for each call. For example:
+
+# ./capable.py 
+TIME      UID    PID    COMM             CAP  NAME                 AUDIT
+22:11:23  114    2676   snmpd            12   CAP_NET_ADMIN        1
+22:11:23  0      6990   run              24   CAP_SYS_RESOURCE     1
+22:11:23  0      7003   chmod            3    CAP_FOWNER           1
+22:11:23  0      7003   chmod            4    CAP_FSETID           1
+22:11:23  0      7005   chmod            4    CAP_FSETID           1
+22:11:23  0      7005   chmod            4    CAP_FSETID           1
+22:11:23  0      7006   chown            4    CAP_FSETID           1
+22:11:23  0      7006   chown            4    CAP_FSETID           1
+22:11:23  0      6990   setuidgid        6    CAP_SETGID           1
+22:11:23  0      6990   setuidgid        6    CAP_SETGID           1
+22:11:23  0      6990   setuidgid        7    CAP_SETUID           1
+22:11:24  0      7013   run              24   CAP_SYS_RESOURCE     1
+22:11:24  0      7026   chmod            3    CAP_FOWNER           1
+22:11:24  0      7026   chmod            4    CAP_FSETID           1
+22:11:24  0      7028   chmod            4    CAP_FSETID           1
+22:11:24  0      7028   chmod            4    CAP_FSETID           1
+22:11:24  0      7029   chown            4    CAP_FSETID           1
+22:11:24  0      7029   chown            4    CAP_FSETID           1
+22:11:24  0      7013   setuidgid        6    CAP_SETGID           1
+22:11:24  0      7013   setuidgid        6    CAP_SETGID           1
+22:11:24  0      7013   setuidgid        7    CAP_SETUID           1
+22:11:25  0      7036   run              24   CAP_SYS_RESOURCE     1
+22:11:25  0      7049   chmod            3    CAP_FOWNER           1
+22:11:25  0      7049   chmod            4    CAP_FSETID           1
+22:11:25  0      7051   chmod            4    CAP_FSETID           1
+22:11:25  0      7051   chmod            4    CAP_FSETID           1
+[...]
+
+This can be useful for general debugging, and also security enforcement:
+determining a whitelist of capabilities an application needs.
+
+The output above includes various capability checks: snmpd checking
+CAP_NET_ADMIN, run checking CAP_SYS_RESOURCES, then some short-lived processes
+checking CAP_FOWNER, CAP_FSETID, etc.
+
+To see what each of these capabilities does, check the capabilities(7) man
+page and the kernel source.
+
+
+Sometimes capable catches itself starting up:
+
+# ./capable.py 
+TIME      UID    PID    COMM             CAP  NAME                 AUDIT
+22:22:19  0      21949  capable.py       21   CAP_SYS_ADMIN        1
+22:22:19  0      21949  capable.py       21   CAP_SYS_ADMIN        1
+22:22:19  0      21949  capable.py       21   CAP_SYS_ADMIN        1
+22:22:19  0      21949  capable.py       21   CAP_SYS_ADMIN        1
+22:22:19  0      21949  capable.py       21   CAP_SYS_ADMIN        1
+22:22:19  0      21949  capable.py       21   CAP_SYS_ADMIN        1
+22:22:19  0      21952  run              24   CAP_SYS_RESOURCE     1
+[...]
+
+These are capability checks from BPF and perf_events syscalls.
+
+
+USAGE:
+
+# ./capable.py -h
+usage: capable.py [-h] [-v] [-p PID]
+
+Trace security capability checks
+
+optional arguments:
+  -h, --help         show this help message and exit
+  -v, --verbose      include non-audit checks
+  -p PID, --pid PID  trace this PID only
+
+examples:
+    ./capable             # trace capability checks
+    ./capable -v          # verbose: include non-audit checks
+    ./capable -p 181      # only trace PID 181
diff --git a/tools/killsnoop.py b/tools/killsnoop.py
index 2302316..90b6f7e 100755
--- a/tools/killsnoop.py
+++ b/tools/killsnoop.py
@@ -49,7 +49,7 @@
 
 struct data_t {
    u64 pid;
-   u64 tpid;
+   int tpid;
    int sig;
    int ret;
    char comm[TASK_COMM_LEN];
@@ -60,12 +60,11 @@
 
 int kprobe__sys_kill(struct pt_regs *ctx, int tpid, int sig)
 {
-    struct val_t val = {};
     u32 pid = bpf_get_current_pid_tgid();
-
     FILTER
+
+    struct val_t val = {.pid = pid};
     if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
-        val.pid = bpf_get_current_pid_tgid();
         val.tpid = tpid;
         val.sig = sig;
         infotmp.update(&pid, &val);
@@ -114,7 +113,7 @@
 class Data(ct.Structure):
     _fields_ = [
         ("pid", ct.c_ulonglong),
-        ("tpid", ct.c_ulonglong),
+        ("tpid", ct.c_int),
         ("sig", ct.c_int),
         ("ret", ct.c_int),
         ("comm", ct.c_char * TASK_COMM_LEN)
@@ -128,7 +127,8 @@
 def print_event(cpu, data, size):
     event = ct.cast(data, ct.POINTER(Data)).contents
 
-    if (args.failed and (event.ret >= 0)): return
+    if (args.failed and (event.ret >= 0)):
+        return
 
     print("%-9s %-6d %-16s %-4d %-6d %d" % (strftime("%H:%M:%S"),
         event.pid, event.comm, event.sig, event.tpid, event.ret))