Make perf ring buffer size configurable

As discussed in #966, this PR makes the size of the ring buffer used to send
data to userspace configurable. It changes the Python, Lua and C++ APIs to
expose this knob.

It also defaults the buffer size to a larger value (64 pages per CPU, an 8x
increase) for several tools which produce a lot of output, as well as making it
configurable in `trace` via a `-b` flag.
diff --git a/tools/biosnoop.lua b/tools/biosnoop.lua
index ac08897..fac7f3b 100644
--- a/tools/biosnoop.lua
+++ b/tools/biosnoop.lua
@@ -175,9 +175,9 @@
       uint64_t sector;
       uint64_t len;
       uint64_t ts;
-      char disk_name[%d];
-      char name[%d];
+      char disk_name[$];
+      char name[$];
     }
-  ]] % {DISK_NAME_LEN, TASK_COMM_LEN})
+  ]], {DISK_NAME_LEN, TASK_COMM_LEN}, 64)
   bpf:kprobe_poll_loop()
 end
diff --git a/tools/biosnoop.py b/tools/biosnoop.py
index aa8a077..3d77e52 100755
--- a/tools/biosnoop.py
+++ b/tools/biosnoop.py
@@ -182,6 +182,6 @@
     start_ts = 1
 
 # loop with callback to print_event
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     b.kprobe_poll()
diff --git a/tools/btrfsslower.py b/tools/btrfsslower.py
index fcc155e..8b34900 100755
--- a/tools/btrfsslower.py
+++ b/tools/btrfsslower.py
@@ -343,6 +343,6 @@
         "BYTES", "OFF_KB", "LAT(ms)", "FILENAME"))
 
 # read events
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     b.kprobe_poll()
diff --git a/tools/cpuunclaimed.py b/tools/cpuunclaimed.py
index 9624b50..3998f9f 100755
--- a/tools/cpuunclaimed.py
+++ b/tools/cpuunclaimed.py
@@ -205,7 +205,7 @@
 trigger = int(0.8 * (1000000000 / frequency))
 
 # read events
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     # allow some buffering by calling sleep(), to reduce the context switch
     # rate and lower overhead.
diff --git a/tools/dbslower.py b/tools/dbslower.py
index 70e0503..6ddec41 100755
--- a/tools/dbslower.py
+++ b/tools/dbslower.py
@@ -131,7 +131,7 @@
       (', '.join(map(str, args.pids)), args.threshold))
 print("%-14s %-6s %8s %s" % ("TIME(s)", "PID", "MS", "QUERY"))
 
-bpf["events"].open_perf_buffer(print_event)
+bpf["events"].open_perf_buffer(print_event, page_cnt=64)
 while True:
     bpf.kprobe_poll()
 
diff --git a/tools/dcsnoop.py b/tools/dcsnoop.py
index d162a66..a72ba41 100755
--- a/tools/dcsnoop.py
+++ b/tools/dcsnoop.py
@@ -153,6 +153,6 @@
 # header
 print("%-11s %-6s %-16s %1s %s" % ("TIME(s)", "PID", "COMM", "T", "FILE"))
 
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     b.kprobe_poll()
diff --git a/tools/ext4slower.py b/tools/ext4slower.py
index 20865a5..4950325 100755
--- a/tools/ext4slower.py
+++ b/tools/ext4slower.py
@@ -337,6 +337,6 @@
         "BYTES", "OFF_KB", "LAT(ms)", "FILENAME"))
 
 # read events
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     b.kprobe_poll()
diff --git a/tools/fileslower.py b/tools/fileslower.py
index 2ae4756..ab29990 100755
--- a/tools/fileslower.py
+++ b/tools/fileslower.py
@@ -243,6 +243,6 @@
         time.time() - start_ts, event.comm, event.pid, mode_s[event.mode],
         event.sz, ms, name))
 
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     b.kprobe_poll()
diff --git a/tools/mysqld_qslower.py b/tools/mysqld_qslower.py
index 94906a8..3ed18ec 100755
--- a/tools/mysqld_qslower.py
+++ b/tools/mysqld_qslower.py
@@ -128,6 +128,6 @@
         event.pid, float(event.delta) / 1000000, event.query))
 
 # loop with callback to print_event
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     b.kprobe_poll()
diff --git a/tools/opensnoop.py b/tools/opensnoop.py
index 0c2b9b5..dae4ff4 100755
--- a/tools/opensnoop.py
+++ b/tools/opensnoop.py
@@ -178,6 +178,6 @@
            event.comm, fd_s, err, event.fname))
 
 # loop with callback to print_event
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     b.kprobe_poll()
diff --git a/tools/stacksnoop.lua b/tools/stacksnoop.lua
index 8f5f5b4..7dfaf3d 100755
--- a/tools/stacksnoop.lua
+++ b/tools/stacksnoop.lua
@@ -102,6 +102,6 @@
 
   bpf:get_table("events"):open_perf_buffer(print_event,
     "struct { uint64_t stack_id; uint32_t pid; char comm[$]; }",
-    TASK_COMM_LEN)
+    {TASK_COMM_LEN})
   bpf:kprobe_poll_loop()
 end
diff --git a/tools/statsnoop.py b/tools/statsnoop.py
index 2fc2164..d9164b6 100755
--- a/tools/statsnoop.py
+++ b/tools/statsnoop.py
@@ -159,6 +159,6 @@
         fd_s, err, event.fname))
 
 # loop with callback to print_event
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     b.kprobe_poll()
diff --git a/tools/tcplife.py b/tools/tcplife.py
index 1125f9c..69ba174 100755
--- a/tools/tcplife.py
+++ b/tools/tcplife.py
@@ -354,7 +354,7 @@
 start_ts = 0
 
 # read events
-b["ipv4_events"].open_perf_buffer(print_ipv4_event)
-b["ipv6_events"].open_perf_buffer(print_ipv6_event)
+b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64)
+b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64)
 while 1:
     b.kprobe_poll()
diff --git a/tools/trace.py b/tools/trace.py
index 46bc97e..029194c 100755
--- a/tools/trace.py
+++ b/tools/trace.py
@@ -29,6 +29,7 @@
         use_localtime = True
         tgid = -1
         pid = -1
+        page_cnt = None
 
         @classmethod
         def configure(cls, args):
@@ -38,6 +39,7 @@
                 cls.first_ts = BPF.monotonic_time()
                 cls.tgid = args.tgid or -1
                 cls.pid = args.pid or -1
+                cls.page_cnt = args.buffer_pages
 
         def __init__(self, probe, string_size, kernel_stack, user_stack):
                 self.usdt = None
@@ -510,7 +512,8 @@
                         self._attach_u(bpf)
                 self.python_struct = self._generate_python_data_decl()
                 callback = partial(self.print_event, bpf)
-                bpf[self.events_name].open_perf_buffer(callback)
+                bpf[self.events_name].open_perf_buffer(callback,
+                        page_cnt=self.page_cnt)
 
         def _attach_k(self, bpf):
                 if self.probe_type == "r":
@@ -543,6 +546,7 @@
                                           pid=Probe.tgid)
 
 class Tool(object):
+        DEFAULT_PERF_BUFFER_PAGES = 64
         examples = """
 EXAMPLES:
 
@@ -577,6 +581,10 @@
                   "functions and print trace messages.",
                   formatter_class=argparse.RawDescriptionHelpFormatter,
                   epilog=Tool.examples)
+                parser.add_argument("-b", "--buffer-pages", type=int,
+                  default=Tool.DEFAULT_PERF_BUFFER_PAGES,
+                  help="number of pages to use for perf_events ring buffer "
+                       "(default: %(default)d)")
                 # we'll refer to the userspace concepts of "pid" and "tid" by
                 # their kernel names -- tgid and pid -- inside the script
                 parser.add_argument("-p", "--pid", type=int, metavar="PID",
diff --git a/tools/trace_example.txt b/tools/trace_example.txt
index 504030c..eb72e5e 100644
--- a/tools/trace_example.txt
+++ b/tools/trace_example.txt
@@ -201,11 +201,28 @@
 libraries and then accessing the /home/vagrant directory listing.
 
 
+Lastly, if a high-frequency event is traced you may overflow the perf ring
+buffer. This shows as "Lost N samples":
+
+# trace sys_open
+5087   5087   pgrep        sys_open
+5087   5087   pgrep        sys_open
+5087   5087   pgrep        sys_open
+5087   5087   pgrep        sys_open
+5087   5087   pgrep        sys_open
+Lost 764896 samples
+Lost 764896 samples
+Lost 764896 samples
+
+The perf ring buffer size can be changed with -b. The unit is size per-CPU buffer
+size and is measured in pages. The value must be a power of two and defaults to
+64 pages.
+
+
 USAGE message:
 
-# trace -h
-usage: trace [-h] [-p PID] [-L TID] [-v] [-Z STRING_SIZE] [-S]
-             [-M MAX_EVENTS] [-t] [-T] [-K] [-U] [-I header]
+usage: trace [-h] [-b BUFFER_PAGES] [-p PID] [-L TID] [-v] [-Z STRING_SIZE]
+             [-S] [-M MAX_EVENTS] [-t] [-T] [-K] [-U] [-I header]
              probe [probe ...]
 
 Attach to functions and print trace messages.
@@ -215,6 +232,9 @@
 
 optional arguments:
   -h, --help            show this help message and exit
+  -b BUFFER_PAGES, --buffer-pages BUFFER_PAGES
+                        number of pages to use for perf_events ring buffer
+                        (default: 64)
   -p PID, --pid PID     id of the process to trace (optional)
   -L TID, --tid TID     id of the thread to trace (optional)
   -v, --verbose         print resulting BPF program code before executing
@@ -224,7 +244,7 @@
   -M MAX_EVENTS, --max-events MAX_EVENTS
                         number of events to print before quitting
   -t, --timestamp       print timestamp column (offset from trace start)
-  -T, --time		print time column
+  -T, --time            print time column
   -K, --kernel-stack    output kernel stack trace
   -U, --user-stack      output user stack trace
   -I header, --include header
@@ -247,9 +267,9 @@
         Trace malloc calls and print the size being allocated
 trace 'p:c:write (arg1 == 1) "writing %d bytes to STDOUT", arg3'
         Trace the write() call from libc to monitor writes to STDOUT
-trace 'r::__kmalloc (retval == 0) "kmalloc failed!"
+trace 'r::__kmalloc (retval == 0) "kmalloc failed!"'
         Trace returns from __kmalloc which returned a null pointer
-trace 'r:c:malloc (retval) "allocated = %x", retval
+trace 'r:c:malloc (retval) "allocated = %x", retval'
         Trace returns from malloc and print non-NULL allocated buffers
 trace 't:block:block_rq_complete "sectors=%d", args->nr_sector'
         Trace the block_rq_complete kernel tracepoint and print # of tx sectors
diff --git a/tools/xfsslower.py b/tools/xfsslower.py
index 25c5a20..3fbc96d 100755
--- a/tools/xfsslower.py
+++ b/tools/xfsslower.py
@@ -293,6 +293,6 @@
         "BYTES", "OFF_KB", "LAT(ms)", "FILENAME"))
 
 # read events
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     b.kprobe_poll()
diff --git a/tools/zfsslower.py b/tools/zfsslower.py
index e2be684..f5e8cbb 100755
--- a/tools/zfsslower.py
+++ b/tools/zfsslower.py
@@ -297,6 +297,6 @@
         "BYTES", "OFF_KB", "LAT(ms)", "FILENAME"))
 
 # read events
-b["events"].open_perf_buffer(print_event)
+b["events"].open_perf_buffer(print_event, page_cnt=64)
 while 1:
     b.kprobe_poll()