add -u, and change from 2 to 1 traced funcitons
diff --git a/tools/offcputime b/tools/offcputime
index f40e070..78cfc5f 100755
--- a/tools/offcputime
+++ b/tools/offcputime
@@ -27,12 +27,15 @@
     ./offcputime             # trace off-CPU stack time until Ctrl-C
     ./offcputime 5           # trace for 5 seconds only
     ./offcputime -f 5        # 5 seconds, and output in folded format
+    ./offcputime -u          # don't include kernel threads (user only)
     ./offcputime -p 185      # trace fo PID 185 only
 """
 parser = argparse.ArgumentParser(
     description="Summarize off-CPU time by kernel stack trace",
     formatter_class=argparse.RawDescriptionHelpFormatter,
     epilog=examples)
+parser.add_argument("-u", "--useronly", action="store_true",
+    help="user threads only (no kernel threads)")
 parser.add_argument("-p", "--pid",
     help="trace this PID only")
 parser.add_argument("-v", "--verbose", action="store_true",
@@ -46,12 +49,15 @@
 duration = int(args.duration)
 debug = 0
 maxdepth = 20    # and MAXDEPTH
+if args.pid and args.useronly:
+    print("ERROR: use either -p or -u.")
+    exit()
 
 # signal handler
 def signal_ignore(signal, frame):
     print()
 
-# load BPF program
+# define BPF program
 bpf_text = """
 #include <uapi/linux/ptrace.h>
 #include <linux/sched.h>
@@ -82,19 +88,19 @@
     return 0;
 }
 
-int offcpu(struct pt_regs *ctx) {
-    u32 pid = bpf_get_current_pid_tgid();
-    u64 ts = bpf_ktime_get_ns();
-    FILTER
-    start.update(&pid, &ts);
-    return 0;
-}
+int oncpu(struct pt_regs *ctx, struct task_struct *prev) {
+    u32 pid;
+    u64 ts, *tsp;
 
-int oncpu(struct pt_regs *ctx) {
-    u32 pid = bpf_get_current_pid_tgid();
-    u64 *tsp;
+    // record previous thread sleep time
+    if (FILTER) {
+        pid = prev->pid;
+        ts = bpf_ktime_get_ns();
+        start.update(&pid, &ts);
+    }
 
-    // calculate delta time
+    // calculate current thread's delta time
+    pid = bpf_get_current_pid_tgid();
     tsp = start.lookup(&pid);
     if (tsp == 0)
         return 0;        // missed start or filtered
@@ -141,14 +147,17 @@
 }
 """
 if args.pid:
-    bpf_text = bpf_text.replace('FILTER',
-        'if (pid != %s) { return 0; }' % (args.pid))
+    filter = 'pid == %s' % args.pid
+elif args.useronly:
+    filter = '!(prev->flags & PF_KTHREAD)'
 else:
-    bpf_text = bpf_text.replace('FILTER', '')
+    filter = '1'
+bpf_text = bpf_text.replace('FILTER', filter)
 if debug:
     print(bpf_text)
+
+# initialize BPF
 b = BPF(text=bpf_text)
-b.attach_kprobe(event="schedule", fn_name="offcpu")
 b.attach_kprobe(event="finish_task_switch", fn_name="oncpu")
 matched = b.num_open_kprobes()
 if matched == 0: