Added small list of automatic includes to make the -I switch less necessary
diff --git a/man/man8/argdist.8 b/man/man8/argdist.8
index d39d4ee..66176bb 100644
--- a/man/man8/argdist.8
+++ b/man/man8/argdist.8
@@ -45,7 +45,8 @@
 One or more header files that should be included in the BPF program. This 
 enables the use of structure definitions, enumerations, and constants that
 are available in these headers. You should provide the same path you would
-include in the BPF program, e.g. 'linux/blkdev.h' or 'linux/time.h'.
+include in the BPF program, e.g. 'linux/blkdev.h' or 'linux/time.h'. Note: in
+many cases, argdist will deduce the necessary header files automatically. 
 .SH SPECIFIER SYNTAX
 The general specifier syntax is as follows:
 
@@ -106,47 +107,47 @@
 .TP
 Print a histogram of allocation sizes passed to kmalloc:
 #
-.B argdist.py -H 'p::__kmalloc(u64 size):u64:size'
+.B argdist -H 'p::__kmalloc(u64 size):u64:size'
 .TP
 Print a count of how many times process 1005 called malloc with an allocation size of 16 bytes:
 #
-.B argdist.py -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16'
+.B argdist -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16'
 .TP
 Snoop on all strings returned by gets():
 #
-.B argdist.py -C 'r:c:gets():char*:$retval'
+.B argdist -C 'r:c:gets():char*:$retval'
 .TP
 Print a histogram of read sizes that were longer than 1ms:
 #
-.B argdist.py -H 'r::__vfs_read(void *file, void *buf, size_t count):size_t:$entry(count):$latency > 1000000'
+.B argdist -H 'r::__vfs_read(void *file, void *buf, size_t count):size_t:$entry(count):$latency > 1000000'
 .TP
 Print frequency counts of how many times writes were issued to a particular file descriptor number, in process 1005:
 #
-.B argdist.py -p 1005 -C 'p:c:write(int fd):int:fd'
+.B argdist -p 1005 -C 'p:c:write(int fd):int:fd'
 .TP
 Print a histogram of error codes returned by read() in process 1005:
 #
-.B argdist.py -p 1005 -H 'r:c:read()'
+.B argdist -p 1005 -H 'r:c:read()'
 .TP
 Print a histogram of buffer sizes passed to write() across all processes, where the file descriptor was 1 (STDOUT):
 #
-.B argdist.py -H 'p:c:write(int fd, const void *buf, size_t count):size_t:count:fd==1'
+.B argdist -H 'p:c:write(int fd, const void *buf, size_t count):size_t:count:fd==1'
 .TP
 Count fork() calls in libc across all processes, grouped by pid:
 #
-.B argdist.py -C 'p:c:fork():int:$PID;fork per process'
+.B argdist -C 'p:c:fork():int:$PID;fork per process'
 .TP
 Print histograms of sleep() and nanosleep() parameter values:
 #
-.B argdist.py -I 'linux/time.h' -H 'p:c:sleep(u32 seconds):u32:seconds' 'p:c:nanosleep(struct timespec *req):long:req->tv_nsec'
+.B argdist -H 'p:c:sleep(u32 seconds):u32:seconds' 'p:c:nanosleep(struct timespec *req):long:req->tv_nsec'
 .TP
 Spy on writes to STDOUT performed by process 2780, up to a string size of 120 characters:
 #
-.B argdist.py -p 2780 -z 120 -C 'p:c:write(int fd, char* buf, size_t len):char*:buf:fd==1'
+.B argdist -p 2780 -z 120 -C 'p:c:write(int fd, char* buf, size_t len):char*:buf:fd==1'
 .TP
 Group files being read from and the read sizes from __vfs_read:
 #
-.B argdist.py -I 'linux/fs.h' -C 'p::__vfs_read(struct file *file, void *buf, size_t count):char*,size_t:file->f_path.dentry->d_iname,count:file->f_path.dentry->d_iname[0]!=0'
+.B argdist -C 'p::__vfs_read(struct file *file, void *buf, size_t count):char*,size_t:file->f_path.dentry->d_iname,count:file->f_path.dentry->d_iname[0]!=0'
 .SH SOURCE
 This is from bcc.
 .IP
diff --git a/tools/argdist.py b/tools/argdist.py
index fd53cf3..e62f4fe 100755
--- a/tools/argdist.py
+++ b/tools/argdist.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python
 #
-# argdist.py   Trace a function and display a distribution of its
+# argdist   Trace a function and display a distribution of its
 #              parameter values as a histogram or frequency count.
 #
-# USAGE: argdist.py [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL]
+# USAGE: argdist [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL]
 #                   [-n COUNT] [-v] [-T TOP]
 #                   [-C specifier [specifier ...]]
 #                   [-H specifier [specifier ...]]
@@ -33,6 +33,23 @@
 """
         next_probe_index = 0
         aliases = { "$PID": "bpf_get_current_pid_tgid()" }
+        auto_includes = {
+                "linux/time.h"    : ["time"],
+                "linux/fs.h"      : ["fs", "file"],
+                "linux/blkdev.h"  : ["bio", "request"],
+                "linux/slab.h"    : ["alloc"]
+        }
+
+        @staticmethod
+        def generate_auto_includes(specifiers):
+                headers = ""
+                for header, keywords in Specifier.auto_includes.items():
+                        for keyword in keywords:
+                                for specifier in specifiers:                            
+                                        if keyword in specifier:
+                                                headers += "#include <%s>\n" \
+                                                           % header
+                return headers
 
         def _substitute_aliases(self, expr):
                 if expr is None:
@@ -443,52 +460,51 @@
 
 EXAMPLES:
 
-argdist.py -H 'p::__kmalloc(u64 size):u64:size'
+argdist -H 'p::__kmalloc(u64 size):u64:size'
         Print a histogram of allocation sizes passed to kmalloc
 
-argdist.py -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16'
+argdist -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16'
         Print a frequency count of how many times process 1005 called malloc
         with an allocation size of 16 bytes
 
-argdist.py -C 'r:c:gets():char*:(char*)$retval#snooped strings'
+argdist -C 'r:c:gets():char*:(char*)$retval#snooped strings'
         Snoop on all strings returned by gets()
 
-argdist.py -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size)#ns per byte'
+argdist -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size)#ns per byte'
         Print a histogram of nanoseconds per byte from kmalloc allocations
 
-argdist.py -I 'linux/slab.h' \\
-        -C 'p::__kmalloc(size_t size, gfp_t flags):size_t:size:flags&GFP_ATOMIC'
+argdist -C 'p::__kmalloc(size_t size, gfp_t flags):size_t:size:flags&GFP_ATOMIC'
         Print frequency count of kmalloc allocation sizes that have GFP_ATOMIC
 
-argdist.py -p 1005 -C 'p:c:write(int fd):int:fd' -T 5
+argdist -p 1005 -C 'p:c:write(int fd):int:fd' -T 5
         Print frequency counts of how many times writes were issued to a
         particular file descriptor number, in process 1005, but only show
         the top 5 busiest fds
 
-argdist.py -p 1005 -H 'r:c:read()'
+argdist -p 1005 -H 'r:c:read()'
         Print a histogram of results (sizes) returned by read() in process 1005
 
-argdist.py -C 'r::__vfs_read():u32:$PID:$latency > 100000'
+argdist -C 'r::__vfs_read():u32:$PID:$latency > 100000'
         Print frequency of reads by process where the latency was >0.1ms
 
-argdist.py -H 'r::__vfs_read(void *file, void *buf, size_t count):size_t:$entry(count):$latency > 1000000' 
+argdist -H 'r::__vfs_read(void *file, void *buf, size_t count):size_t:$entry(count):$latency > 1000000' 
         Print a histogram of read sizes that were longer than 1ms
 
-argdist.py -H \\
+argdist -H \\
         'p:c:write(int fd, const void *buf, size_t count):size_t:count:fd==1'
         Print a histogram of buffer sizes passed to write() across all
         processes, where the file descriptor was 1 (STDOUT)
 
-argdist.py -C 'p:c:fork()#fork calls'
+argdist -C 'p:c:fork()#fork calls'
         Count fork() calls in libc across all processes
         Can also use funccount.py, which is easier and more flexible 
 
-argdist.py -I 'linux/time.h' -H \\
+argdist  -H \\
         'p:c:sleep(u32 seconds):u32:seconds' \\
         'p:c:nanosleep(struct timespec *req):long:req->tv_nsec'
         Print histograms of sleep() and nanosleep() parameter values
 
-argdist.py -p 2780 -z 120 \\
+argdist -p 2780 -z 120 \\
         -C 'p:c:write(int fd, char* buf, size_t len):char*:buf:fd==1'
         Spy on writes to STDOUT performed by process 2780, up to a string size
         of 120 characters 
@@ -536,6 +552,7 @@
 """ % args.string_size
 for include in (args.include or []):
         bpf_source += "#include <%s>\n" % include
+bpf_source += Specifier.generate_auto_includes(map(lambda s: s.raw_spec, specifiers))
 for specifier in specifiers:
         bpf_source += specifier.generate_text()
 
diff --git a/tools/argdist_examples.txt b/tools/argdist_examples.txt
index 52406a6..55e5a61 100644
--- a/tools/argdist_examples.txt
+++ b/tools/argdist_examples.txt
@@ -10,7 +10,7 @@
 For example, suppose you want to find what allocation sizes are common in
 your application:
 
-# ./argdist.py -p 2420 -C 'p:c:malloc(size_t size):size_t:size'
+# ./argdist -p 2420 -C 'p:c:malloc(size_t size):size_t:size'
 [01:42:29]
 p:c:malloc(size_t size):size_t:size
         COUNT      EVENT
@@ -43,7 +43,7 @@
 Now, suppose you wanted a histogram of buffer sizes passed to the write()
 function across the system:
 
-# ./argdist.py -H 'p:c:write(int fd, void *buf, size_t len):size_t:len'
+# ./argdist -H 'p:c:write(int fd, void *buf, size_t len):size_t:len'
 [01:45:22]
 p:c:write(int fd, void *buf, size_t len):size_t:len
      len                 : count     distribution
@@ -81,7 +81,7 @@
 But these are writes across the board -- what if you wanted to focus on writes
 to STDOUT?
 
-# ./argdist.py -H 'p:c:write(int fd, void *buf, size_t len):size_t:len:fd==1'
+# ./argdist -H 'p:c:write(int fd, void *buf, size_t len):size_t:len:fd==1'
 [01:47:17]
 p:c:write(int fd, void *buf, size_t len):size_t:len:fd==1
      len                 : count     distribution
@@ -120,7 +120,7 @@
 wanted a histogram of kernel allocation (kmalloc) sizes across the system,
 printed twice with 3 second intervals:
 
-# ./argdist.py -i 3 -n 2 -H 'p::__kmalloc(size_t size):size_t:size'
+# ./argdist -i 3 -n 2 -H 'p::__kmalloc(size_t size):size_t:size'
 [01:50:00]
 p::__kmalloc(size_t size):size_t:size
      size                : count     distribution
@@ -143,7 +143,7 @@
 Occasionally, numeric information isn't enough and you want to capture strings.
 What are the strings printed by puts() across the system?
 
-# ./argdist.py -i 10 -n 1 -C 'p:c:puts(char *str):char*:str'
+# ./argdist -i 10 -n 1 -C 'p:c:puts(char *str):char*:str'
 [01:53:54]
 p:c:puts(char *str):char*:str
         COUNT      EVENT
@@ -156,7 +156,7 @@
 strings input by the user (note how "r" is used instead of "p" to attach a
 probe to the function's return):
 
-# ./argdist.py -i 10 -n 1 -C 'r:c:gets():char*:(char*)$retval:$retval!=0'
+# ./argdist -i 10 -n 1 -C 'r:c:gets():char*:(char*)$retval:$retval!=0'
 [02:12:23]
 r:c:gets():char*:$retval:$retval!=0
         COUNT      EVENT
@@ -166,7 +166,7 @@
 
 Similarly, we could get a histogram of the error codes returned by read():
 
-# ./argdist.py -i 10 -c 1 -H 'r:c:read()'
+# ./argdist -i 10 -c 1 -H 'r:c:read()'
 [02:15:36]
 r:c:read()
      retval              : count     distribution
@@ -188,7 +188,7 @@
 which processes are performing slow synchronous filesystem reads -- say,
 longer than 0.1ms (100,000ns):
 
-# ./argdist.py -C 'r::__vfs_read():u32:$PID:$latency > 100000'
+# ./argdist -C 'r::__vfs_read():u32:$PID:$latency > 100000'
 [01:08:48]
 r::__vfs_read():u32:$PID:$latency > 100000
         COUNT      EVENT
@@ -207,7 +207,7 @@
 might be curious how long it takes malloc() to allocate memory -- nanoseconds
 per byte allocated. Let's go:
 
-# ./argdist.py -H 'r:c:malloc(size_t size):u64:$latency/$entry(size);ns per byte' -n 1 -i 10
+# ./argdist -H 'r:c:malloc(size_t size):u64:$latency/$entry(size);ns per byte' -n 1 -i 10
 [01:11:13]
      ns per byte         : count     distribution
          0 -> 1          : 0        |                                        |
@@ -232,7 +232,7 @@
 You could also group results by more than one field. For example, __kmalloc
 takes an additional flags parameter that describes how to allocate memory:
 
-# ./argdist.py -I 'linux/slab.h' -C 'p::__kmalloc(size_t size, gfp_t flags):gfp_t,size_t:flags,size'
+# ./argdist -C 'p::__kmalloc(size_t size, gfp_t flags):gfp_t,size_t:flags,size'
 [03:42:29]
 p::__kmalloc(size_t size, gfp_t flags):gfp_t,size_t:flags,size
         COUNT      EVENT
@@ -265,7 +265,7 @@
 Here's a final example that finds how many write() system calls are performed
 by each process on the system:
 
-# argdist.py -C 'p:c:write():int:$PID;write per process' -n 2
+# argdist -C 'p:c:write():int:$PID;write per process' -n 2
 [06:47:18]
 write by process
         COUNT      EVENT
@@ -282,8 +282,8 @@
 
 USAGE message:
 
-# argdist.py -h
-usage: argdist.py [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL] [-n COUNT] [-v]
+# argdist -h
+usage: argdist [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL] [-n COUNT] [-v]
                   [-T TOP] [-H [specifier [specifier ...]]]
                   [-C [specifier [specifier ...]]] [-I [header [header ...]]]
 
@@ -326,52 +326,51 @@
 
 EXAMPLES:
 
-argdist.py -H 'p::__kmalloc(u64 size):u64:size'
+argdist -H 'p::__kmalloc(u64 size):u64:size'
         Print a histogram of allocation sizes passed to kmalloc
 
-argdist.py -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16'
+argdist -p 1005 -C 'p:c:malloc(size_t size):size_t:size:size==16'
         Print a frequency count of how many times process 1005 called malloc
         with an allocation size of 16 bytes
 
-argdist.py -C 'r:c:gets():char*:$retval#snooped strings'
+argdist -C 'r:c:gets():char*:$retval#snooped strings'
         Snoop on all strings returned by gets()
 
-argdist.py -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size)#ns per byte'
+argdist -H 'r::__kmalloc(size_t size):u64:$latency/$entry(size)#ns per byte'
         Print a histogram of nanoseconds per byte from kmalloc allocations
 
-argdist.py -I 'linux/slab.h' \
-        -C 'p::__kmalloc(size_t size, gfp_t flags):size_t:size:flags&GFP_ATOMIC'
+argdist -C 'p::__kmalloc(size_t size, gfp_t flags):size_t:size:flags&GFP_ATOMIC'
         Print frequency count of kmalloc allocation sizes that have GFP_ATOMIC
 
-argdist.py -p 1005 -C 'p:c:write(int fd):int:fd' -T 5
+argdist -p 1005 -C 'p:c:write(int fd):int:fd' -T 5
         Print frequency counts of how many times writes were issued to a
         particular file descriptor number, in process 1005, but only show
         the top 5 busiest fds
 
-argdist.py -p 1005 -H 'r:c:read()'
+argdist -p 1005 -H 'r:c:read()'
         Print a histogram of error codes returned by read() in process 1005
 
-argdist.py -C 'r::__vfs_read():u32:$PID:$latency > 100000'
+argdist -C 'r::__vfs_read():u32:$PID:$latency > 100000'
         Print frequency of reads by process where the latency was >0.1ms
 
-argdist.py -H 'r::__vfs_read(void *file, void *buf, size_t count):size_t:$entry(count):$latency > 1000000' 
+argdist -H 'r::__vfs_read(void *file, void *buf, size_t count):size_t:$entry(count):$latency > 1000000' 
         Print a histogram of read sizes that were longer than 1ms
 
-argdist.py -H \
+argdist -H \
         'p:c:write(int fd, const void *buf, size_t count):size_t:count:fd==1'
         Print a histogram of buffer sizes passed to write() across all
         processes, where the file descriptor was 1 (STDOUT)
 
-argdist.py -C 'p:c:fork()#fork calls'
+argdist -C 'p:c:fork()#fork calls'
         Count fork() calls in libc across all processes
         Can also use funccount.py, which is easier and more flexible 
 
-argdist.py -I 'linux/time.h' -H \
+argdist  -H \
         'p:c:sleep(u32 seconds):u32:seconds' \
         'p:c:nanosleep(struct timespec *req):long:req->tv_nsec'
         Print histograms of sleep() and nanosleep() parameter values
 
-argdist.py -p 2780 -z 120 \
+argdist -p 2780 -z 120 \
         -C 'p:c:write(int fd, char* buf, size_t len):char*:buf:fd==1'
         Spy on writes to STDOUT performed by process 2780, up to a string size
         of 120 characters