Merge pull request #1797 from dpayne/feat/add_stack_frames_to_funcslower

Feat/add stack frames to funcslower
diff --git a/LINKS.md b/LINKS.md
index 49a29c4..9eb1abe 100644
--- a/LINKS.md
+++ b/LINKS.md
@@ -1,3 +1,4 @@
+- 2018-05-03: [Linux System Monitoring with eBPF](https://www.circonus.com/2018/05/linux-system-monitoring-with-ebpf)
 - 2018-02-22: [Some advanced BCC topics](https://lwn.net/Articles/747640)
 - 2018-01-23: [BPFd: Running BCC tools remotely across systems and architectures](https://lwn.net/Articles/744522)
 - 2017-12-22: [An introduction to the BPF Compiler Collection](https://lwn.net/Articles/742082)
diff --git a/README.md b/README.md
index 7fa188b..68c426a 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 # BPF Compiler Collection (BCC)
 
 BCC is a toolkit for creating efficient kernel tracing and manipulation
-programs, and includes several useful tools and examples. It makes use of 
+programs, and includes several useful tools and examples. It makes use of
 extended BPF (Berkeley Packet Filters), formally known as eBPF, a new feature
 that was first added to Linux 3.15. Much of what BCC uses requires Linux 4.1
 and above.
@@ -23,7 +23,7 @@
 summary is returned to user-level.
 
 ```Shell
-# ./bitehist.py 
+# ./bitehist.py
 Tracing... Hit Ctrl-C to end.
 ^C
      kbytes          : count     distribution
@@ -130,6 +130,7 @@
 - tools/[reset-trace](tools/reset-trace.sh): Reset the state of tracing. Maintenance tool only. [Examples](tools/reset-trace_example.txt).
 - tools/[runqlat](tools/runqlat.py): Run queue (scheduler) latency as a histogram. [Examples](tools/runqlat_example.txt).
 - tools/[runqlen](tools/runqlen.py): Run queue length as a histogram. [Examples](tools/runqlen_example.txt).
+- tools/[runqslower](tools/runqslower.py): Trace long process scheduling delays. [Examples](tools/runqslower_example.txt).
 - tools/[slabratetop](tools/slabratetop.py): Kernel SLAB/SLUB memory cache allocation rate top. [Examples](tools/slabratetop_example.txt).
 - tools/[softirqs](tools/softirqs.py):  Measure soft IRQ (soft interrupt) event time. [Examples](tools/softirqs_example.txt).
 - tools/[solisten](tools/solisten.py): Trace TCP socket listen. [Examples](tools/solisten_example.txt).
diff --git a/debian/bcc-tools.install b/debian/bcc-tools.install
index 115a22d..60d92a5 100644
--- a/debian/bcc-tools.install
+++ b/debian/bcc-tools.install
@@ -1,2 +1,3 @@
+usr/share/bcc/introspection/*
 usr/share/bcc/tools/*
 usr/share/bcc/man/*
diff --git a/docs/kernel-versions.md b/docs/kernel-versions.md
index fd95391..408eb12 100644
--- a/docs/kernel-versions.md
+++ b/docs/kernel-versions.md
@@ -23,7 +23,7 @@
 Sparc64 | 4.12 | [`7a12b5031c6b`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7a12b5031c6b947cc13918237ae652b536243b76)
 MIPS | 4.13 | [`f381bf6d82f0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=f381bf6d82f032b7410185b35d000ea370ac706b)
 ARM32 | 4.14 | [`39c13c204bb1`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=39c13c204bb1150d401e27d41a9d8b332be47c49)
-x86\_32 | ? | [Not upstream yet](https://lwn.net/Articles/752957/)
+x86\_32 | 4.18 |  [`03f5781be2c7`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=03f5781be2c7b7e728d724ac70ba10799cc710d7)
 
 ## Main features
 
@@ -68,8 +68,10 @@
 BPF attached to raw tracepoints | 4.17 | [`c4f6699dfcb8`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c4f6699dfcb8558d138fe838f741b2c10f416cf9)
 BPF attached to `bind()` system call | 4.17 | [`4fbac77d2d09`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=4fbac77d2d092b475dda9eea66da674369665427)
 BPF Type Format (BTF) | 4.18 | [`69b693f0aefa`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=69b693f0aefa0ed521e8bd02260523b5ae446ad7)
-AF_XDP | ? | [Not upstream yet](https://lwn.net/Articles/752959/)
-bpfilter | ? | [Not upstream yet](https://lwn.net/Articles/747504/)
+AF_XDP | 4.18 |  [`fbfc504a24f5`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=fbfc504a24f53f7ebe128ab55cb5dba634f4ece8)
+bpfilter | 4.18 |  [`d2ba09c17a06`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=d2ba09c17a0647f899d6c20a11bab9e6d3382f07)
+End.BPF action for seg6local LWT | 4.18 |  [`004d4b274e2a`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=004d4b274e2a1a895a0e5dc66158b90a7d463d44)
+BPF attached to LIRC devices | 4.18 |  [`f4364dcfc86d`](https://git.kernel.org/cgit/linux/kernel/git/bpf/bpf-next.git/commit/?id=f4364dcfc86df7c1ca47b256eaf6b6d0cdd0d936)
 
 ## Tables (_a.k.a._ Maps)
 
@@ -95,8 +97,10 @@
 Array of maps | 4.12 | [`56f668dfe00d`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=56f668dfe00dcf086734f1c42ea999398fad6572)
 Hash of maps | 4.12 | [`bcc6b1b7ebf8`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=bcc6b1b7ebf857a9fe56202e2be3361131588c15)
 Netdevice references | 4.14 | [`546ac1ffb70d`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=546ac1ffb70d25b56c1126940e5ec639c4dd7413)
-Socket references | 4.14 | [`174a79ff9515`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=174a79ff9515f400b9a6115643dafd62a635b7e6)
+Socket references (array) | 4.14 | [`174a79ff9515`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=174a79ff9515f400b9a6115643dafd62a635b7e6)
 CPU references | 4.15 | [`6710e1126934`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=6710e1126934d8b4372b4d2f9ae1646cd3f151bf)
+AF_XDP socket (XSK) references | 4.18 | [`fbfc504a24f5`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=fbfc504a24f53f7ebe128ab55cb5dba634f4ece8)
+Socket references (hashmap) | 4.18 | [`81110384441a`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=81110384441a59cff47430f20f049e69b98c17f4)
 
 ## XDP
 
@@ -144,6 +148,7 @@
 `BPF_FUNC_csum_diff()` | 4.6 | [`7d672345ed29`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7d672345ed295b1356a5d9f7111da1d1d7d65867)
 `BPF_FUNC_csum_update()` | 4.9 | [`36bbef52c7eb`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=36bbef52c7eb646ed6247055a2acd3851e317857)
 `BPF_FUNC_current_task_under_cgroup()` | 4.9 | [`60d20f9195b2`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=60d20f9195b260bdf0ac10c275ae9f6016f9c069)
+`BPF_FUNC_fib_lookup()` | 4.18 | [`87f5fc7e48dd`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=87f5fc7e48dd3175b30dd03b41564e1a8e136323)
 `BPF_FUNC_get_cgroup_classid()` | 4.3 | [`8d20aabe1c76`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=8d20aabe1c76cccac544d9fcc3ad7823d9e98a2d)
 `BPF_FUNC_get_current_comm()` | 4.2 | [`ffeedafbf023`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89)
 `BPF_FUNC_get_current_pid_tgid()` | 4.2 | [`ffeedafbf023`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89)
@@ -156,17 +161,23 @@
 `BPF_FUNC_get_smp_processor_id()` | 4.1 | [`c04167ce2ca0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c04167ce2ca0ecaeaafef006cb0d65cf01b68e42)
 `BPF_FUNC_get_socket_cookie()` | 4.12 | [`91b8270f2a4d`](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=91b8270f2a4d1d9b268de90451cdca63a70052d6)
 `BPF_FUNC_get_socket_uid()` | 4.12 | [`6acc5c291068`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=6acc5c2910689fc6ee181bf63085c5efff6a42bd)
+`BPF_FUNC_get_stack()` | 4.18 | [`de2ff05f48af`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=de2ff05f48afcde816ff4edb217417f62f624ab5)
 `BPF_FUNC_get_stackid()` | 4.6 | [`d5a3b1f69186`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d5a3b1f691865be576c2bffa708549b8cdccda19)
 `BPF_FUNC_getsockopt()` | 4.15 | [`cd86d1fd2102`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=cd86d1fd21025fdd6daf23d1288da405e7ad0ec6)
 `BPF_FUNC_ktime_get_ns()` | 4.1 | [`d9847d310ab4`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d9847d310ab4003725e6ed1822682e24bd406908)
 `BPF_FUNC_l3_csum_replace()` | 4.1 | [`91bc4822c3d6`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=91bc4822c3d61b9bb7ef66d3b77948a4f9177954)
 `BPF_FUNC_l4_csum_replace()` | 4.1 | [`91bc4822c3d6`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=91bc4822c3d61b9bb7ef66d3b77948a4f9177954)
+`BPF_FUNC_lwt_push_encap()` | 4.18 | [`fe94cc290f53`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=fe94cc290f535709d3c5ebd1e472dfd0aec7ee79)
+`BPF_FUNC_lwt_seg6_action()` | 4.18 | [`fe94cc290f53`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=fe94cc290f535709d3c5ebd1e472dfd0aec7ee79)
+`BPF_FUNC_lwt_seg6_adjust_srh()` | 4.18 | [`fe94cc290f53`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=fe94cc290f535709d3c5ebd1e472dfd0aec7ee79)
+`BPF_FUNC_lwt_seg6_store_bytes()` | 4.18 | [`fe94cc290f53`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=fe94cc290f535709d3c5ebd1e472dfd0aec7ee79)
 `BPF_FUNC_map_delete_elem()` | 3.19 | [`d0003ec01c66`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d0003ec01c667b731c139e23de3306a8b328ccf5)
 `BPF_FUNC_map_lookup_elem()` | 3.19 | [`d0003ec01c66`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d0003ec01c667b731c139e23de3306a8b328ccf5)
 `BPF_FUNC_map_update_elem()` | 3.19 | [`d0003ec01c66`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d0003ec01c667b731c139e23de3306a8b328ccf5)
 `BPF_FUNC_msg_apply_bytes()` | 4.17 | [`2a100317c9eb`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=2a100317c9ebc204a166f16294884fbf9da074ce)
 `BPF_FUNC_msg_cork_bytes()` | 4.17 | [`91843d540a13`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=91843d540a139eb8070bcff8aa10089164436deb)
 `BPF_FUNC_msg_pull_data()` | 4.17 | [`015632bb30da`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=015632bb30daaaee64e1bcac07570860e0bf3092)
+`BPF_FUNC_msg_redirect_hash()` | 4.18 | [`81110384441a`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=81110384441a59cff47430f20f049e69b98c17f4)
 `BPF_FUNC_msg_redirect_map()` | 4.17 | [`4f738adba30a`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=4f738adba30a7cfc006f605707e7aee847ffefa0)
 `BPF_FUNC_perf_event_output()` | 4.4 | [`a43eec304259`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=a43eec304259a6c637f4014a6d4767159b6a3aa3)
 `BPF_FUNC_perf_event_read()` | 4.3 | [`35578d798400`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=35578d7984003097af2b1e34502bc943d40c1804)
@@ -175,11 +186,14 @@
 `BPF_FUNC_probe_read()` | 4.1 | [`2541517c32be`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=2541517c32be2531e0da59dfd7efc1ce844644f5)
 `BPF_FUNC_probe_read_str()` | 4.11 | [`a5e8c07059d0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=a5e8c07059d0f0b31737408711d44794928ac218)
 `BPF_FUNC_probe_write_user()` | 4.8 | [`96ae52279594`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=96ae52279594470622ff0585621a13e96b700600)
+`BPF_FUNC_rc_keydown()` | 4.18 | [`f4364dcfc86d`](https://git.kernel.org/cgit/linux/kernel/git/bpf/bpf-next.git/commit/?id=f4364dcfc86df7c1ca47b256eaf6b6d0cdd0d936)
+`BPF_FUNC_rc_repeat()` | 4.18 | [`f4364dcfc86d`](https://git.kernel.org/cgit/linux/kernel/git/bpf/bpf-next.git/commit/?id=f4364dcfc86df7c1ca47b256eaf6b6d0cdd0d936)
 `BPF_FUNC_redirect()` | 4.4 | [`27b29f63058d`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=27b29f63058d26c6c1742f1993338280d5a41dc6)
 `BPF_FUNC_redirect_map()` | 4.14 | [`97f91a7cf04f`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=97f91a7cf04ff605845c20948b8a80e54cbd3376)
 `BPF_FUNC_set_hash()` | 4.13 | [`ded092cd73c2`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=ded092cd73c2c56a394b936f86897f29b2e131c0)
 `BPF_FUNC_set_hash_invalid()` | 4.9 | [`7a4b28c6cc9f`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7a4b28c6cc9ffac50f791b99cc7e46106436e5d8)
 `BPF_FUNC_setsockopt()` | 4.13 | [`8c4b4c7e9ff0`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=8c4b4c7e9ff0447995750d9329949fa082520269)
+`BPF_FUNC_sk_redirect_hash()` | 4.18 | [`81110384441a`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=81110384441a59cff47430f20f049e69b98c17f4)
 `BPF_FUNC_sk_redirect_map()` | 4.14 | [`174a79ff9515`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=174a79ff9515f400b9a6115643dafd62a635b7e6)
 `BPF_FUNC_skb_adjust_room()` | 4.13 | [`2be7e212d541`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=2be7e212d5419a400d051c84ca9fdd083e5aacac)
 `BPF_FUNC_skb_change_head()` | 4.10 | [`3a0af8fd61f9`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=3a0af8fd61f90920f6fa04e4f1e9a6a73c1b4fd2)
@@ -190,6 +204,7 @@
 `BPF_FUNC_skb_get_tunnel_opt()` | 4.6 | [`14ca0751c96f`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=14ca0751c96f8d3d0f52e8ed3b3236f8b34d3460)
 `BPF_FUNC_skb_get_xfrm_state()` | 4.18 | [`12bed760a78d`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=12bed760a78da6e12ac8252fec64d019a9eac523)
 `BPF_FUNC_skb_load_bytes()` | 4.5 | [`05c74e5e53f6`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=05c74e5e53f6cb07502c3e6a820f33e2777b6605)
+`BPF_FUNC_skb_load_bytes_relative()` | 4.18 | [`4e1ec56cdc59`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=4e1ec56cdc59746943b2acfab3c171b930187bbe)
 `BPF_FUNC_skb_pull_data()` | 4.9 | [`36bbef52c7eb`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=36bbef52c7eb646ed6247055a2acd3851e317857)
 `BPF_FUNC_skb_set_tunnel_key()` | 4.3 | [`d3aa45ce6b94`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d3aa45ce6b94c65b83971257317867db13e5f492)
 `BPF_FUNC_skb_set_tunnel_opt()` | 4.6 | [`14ca0751c96f`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=14ca0751c96f8d3d0f52e8ed3b3236f8b34d3460)
@@ -197,6 +212,7 @@
 `BPF_FUNC_skb_under_cgroup()` | 4.8 | [`4a482f34afcc`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=4a482f34afcc162d8456f449b137ec2a95be60d8)
 `BPF_FUNC_skb_vlan_pop()` | 4.3 | [`4e10df9a60d9`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=4e10df9a60d96ced321dd2af71da558c6b750078)
 `BPF_FUNC_skb_vlan_push()` | 4.3 | [`4e10df9a60d9`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=4e10df9a60d96ced321dd2af71da558c6b750078)
+`BPF_FUNC_sock_hash_update()` | 4.18 | [`81110384441a`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=81110384441a59cff47430f20f049e69b98c17f4)
 `BPF_FUNC_sock_map_update()` | 4.14 | [`174a79ff9515`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=174a79ff9515f400b9a6115643dafd62a635b7e6)
 `BPF_FUNC_tail_call()` | 4.2 | [`04fd61ab36ec`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=04fd61ab36ec065e194ab5e74ae34a5240d992bb)
 `BPF_FUNC_trace_printk()` | 4.1 | [`9c959c863f82`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=9c959c863f8217a2ff3d7c296e8223654d240569)
diff --git a/examples/cpp/FollyRequestContextSwitch.cc b/examples/cpp/FollyRequestContextSwitch.cc
index 0031821..06b027d 100644
--- a/examples/cpp/FollyRequestContextSwitch.cc
+++ b/examples/cpp/FollyRequestContextSwitch.cc
@@ -74,10 +74,16 @@
   }
   std::string binary_path(argv[1]);
 
-  bpf = new ebpf::BPF();
   std::vector<ebpf::USDT> u;
   u.emplace_back(binary_path, "folly", "request_context_switch_before",
                  "on_context_switch");
+  auto usdt_init_res = u[0].init();
+  if (usdt_init_res.code() != 0) {
+    std::cerr << usdt_init_res.msg() << std::endl;
+    return 1;
+  }
+
+  bpf = new ebpf::BPF();
   auto init_res = bpf->init(BPF_PROGRAM, {}, u);
   if (init_res.code() != 0) {
     std::cerr << init_res.msg() << std::endl;
@@ -88,6 +94,8 @@
   if (attach_res.code() != 0) {
     std::cerr << attach_res.msg() << std::endl;
     return 1;
+  } else {
+    std::cout << "Attached to USDT " << u[0];
   }
 
   auto open_res = bpf->open_perf_buffer("events", &handle_output);
diff --git a/examples/networking/tc_perf_event.py b/examples/networking/tc_perf_event.py
index a385916..40e7411 100755
--- a/examples/networking/tc_perf_event.py
+++ b/examples/networking/tc_perf_event.py
@@ -77,7 +77,7 @@
            parent="ffff:fff3", classid=1, direct_action=True)
 
     b["skb_events"].open_perf_buffer(print_skb_event)
-    print('Try: "ping -6 ff02::1%me"\n')
+    print('Try: "ping6 ff02::1%me"\n')
     print("%-3s %-32s %-12s %-10s" % ("CPU", "SRC IP", "DST IP", "Magic"))
     while True:
         b.perf_buffer_poll()
diff --git a/examples/networking/vlan_filter/README.md b/examples/networking/vlan_filter/README.md
new file mode 100644
index 0000000..9c17a54
--- /dev/null
+++ b/examples/networking/vlan_filter/README.md
@@ -0,0 +1,34 @@
+# VLAN Filter #
+This is eBPF application to parse VXLAN packets and then extracts encapsulated VLAN packets to monitor traffic from each VLAN. Extracted packet header fields can be stored in a file or sent to remote server via Apache Kafka messaging system.
+
+Also part of this example is a simulation of a multi-host environment. Simulation environment can be setup by using test_setup.sh script. Then a sample script (traffic.sh) can be used to send traffic from one client (VLAN=100) from host1 talking to another client at host2 and one client (VLAN=200) from host2 talking to another client at host1 while running vlan_filter application in parallel by using command 'python data-plane-tracing -i veth7'.
+
+![picture](scenario.jpg)
+
+### Usage Example ###
+* $ sudo python data-plane-tracing.py
+
+Timestamp | Host Name  | Host IP   | IP Version   | Source Host IP   | Dest Host IP   | Source Host Port   | Dest Host Port   | VNI   | Source VM MAC  | Dest VM MAC  | VLAN ID  | Source VM IP   | Dest VM IP   | Protocol   | Source VM Port   | Dest VM Port   | Packet Length   |
+---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---
+ 2018-05-24 18:43:30.386228 | Box1 | x.x.x.x  | 4 | 10.1.1.11 | 10.1.1.12 | 54836 | 4789 | 10 | fa:16:3e:ec:22:99 | fa:16:3e:1c:6f:2d | 100 | 192.168.100.11 | 192.168.100.12 | 6 | 1285 | 20302 | 1200
+
+
+# Implementation overview #
+Example application implementation is split into two parts: the former that exploits eBPF code, the latter that performs some additional processing in user space (python wrapper).
+
+### First part: eBPF Filter ###
+This component filters VXLAN packets.
+The program is loaded as PROG_TYPE_SOCKET_FILTER and attached to a socket, bind to eth0.
+Packets matching VXLAN filter are forwarded to the user space, while other packets are dropped.
+
+### Python code in user space ###
+The Python script reads filtered raw packets from the socket, extracts all the useful header fields and stores extracted packet into a file by default or can be sent to a remote server via Apache Kafka messaging system.
+
+# How to execute this example application #
+VLAN Filter application can be executed by using one of the below commands:
+* $ sudo python data-plane-tracing.py
+* $ sudo python data-plane-tracing -i eth2 -k vc.manage.overcloud:9092
+
+# How to install Required Dependencies
+* $ pip install kafka-python
+* $ pip install netifaces
diff --git a/examples/networking/vlan_filter/data-plane-tracing.c b/examples/networking/vlan_filter/data-plane-tracing.c
new file mode 100644
index 0000000..8b725a5
--- /dev/null
+++ b/examples/networking/vlan_filter/data-plane-tracing.c
@@ -0,0 +1,54 @@
+#include <uapi/linux/ptrace.h>
+#include <net/sock.h>
+#include <bcc/proto.h>
+
+#define IP_TCP 	6
+#define IP_UDP 17
+#define IP_ICMP 1
+/* 
+  In 802.3, both the source and destination addresses are 48 bits (4 bytes) MAC address.
+  6 bytes (src) + 6 bytes (dst) + 2 bytes (type) = 14 bytes 
+*/
+#define ETH_HLEN 14
+
+/*eBPF program.
+  Filter TCP/UDP/ICMP packets, having payload not empty
+  if the program is loaded as PROG_TYPE_SOCKET_FILTER
+  and attached to a socket
+  return  0 -> DROP the packet
+  return -1 -> KEEP the packet and return it to user space (userspace can read it from the socket_fd )
+*/
+int vlan_filter(struct __sk_buff *skb) { 
+	u8 *cursor = 0;	
+
+	struct ethernet_t *ethernet = cursor_advance(cursor, sizeof(*ethernet));
+	
+	//filter IP packets (ethernet type = 0x0800) 0x0800 is IPv4 packet
+	switch(ethernet->type){
+		case 0x0800: goto IP;
+	    	default: goto DROP;
+	}
+
+	
+	IP: ;
+		struct ip_t *ip = cursor_advance(cursor, sizeof(*ip));  // IP header (datagram)
+	        switch (ip->nextp){
+			case 17: goto UDP;
+			default: goto DROP;
+		}
+
+	UDP: ;
+		struct udp_t *udp = cursor_advance(cursor, sizeof(*udp));
+		switch (udp->dport) {
+    			case 4789: goto KEEP;
+    			default: goto DROP;
+  		}
+
+	//keep the packet and send it to userspace retruning -1
+	KEEP:
+		return -1;
+
+	//drop the packet returning 0
+	DROP:
+		return 0;
+}
\ No newline at end of file
diff --git a/examples/networking/vlan_filter/data-plane-tracing.py b/examples/networking/vlan_filter/data-plane-tracing.py
new file mode 100755
index 0000000..efaa7f1
--- /dev/null
+++ b/examples/networking/vlan_filter/data-plane-tracing.py
@@ -0,0 +1,193 @@
+#!/usr/bin/python
+from __future__ import print_function
+from bcc import BPF
+
+import sys
+import socket
+import os
+import argparse
+import time
+import netifaces as ni
+
+from sys import argv
+from kafka import KafkaProducer
+from kafka.errors import KafkaError
+from datetime import datetime
+
+#args
+def usage():
+    print("USAGE: %s [-i <if_name>]" % argv[0])
+    print("")
+    print("Try '%s -h' for more options." % argv[0])
+    exit()
+
+#help
+def help():
+    print("USAGE: %s [-i <if_name>][-k <kafka_server_name:kafka_port>]" % argv[0])
+    print("")
+    print("optional arguments:")
+    print("   -h                       print this help")
+    print("   -i if_name               select interface if_name. Default is eth0")
+    print("   -k kafka_server_name     select kafka server name. Default is save to file")
+    print("                            If -k option is not specified data will be saved to file.")
+    
+    print("")
+    print("examples:")
+    print("    data-plane-tracing                                      # bind socket to eth0")
+    print("    data-plane-tracing -i eno2 -k vc.manage.overcloud:9092  # bind socket to eno2 and send data to kafka server in iovisor-topic.")
+    exit()
+
+#arguments
+interface="eth0"
+kafkaserver=''
+        
+#check provided arguments
+if len(argv) == 2:
+    if str(argv[1]) == '-h':
+        help()
+    else:
+        usage()
+
+if len(argv) == 3:
+    if str(argv[1]) == '-i':
+        interface = argv[2]
+    elif str(argv[1]) == '-k':
+        kafkaserver = argv[2] 
+    else:
+        usage()
+    
+if len(argv) == 5:
+    if str(argv[1]) == '-i':
+        interface = argv[2]
+        kafkaserver = argv[4]
+    elif str(argv[1]) == '-k':
+        kafkaserver = argv[2] 
+        interface = argv[4]
+    else:
+        usage()
+
+if len(argv) > 5:
+    usage()
+
+print ("binding socket to '%s'" % interface)	
+ 
+#initialize BPF - load source code from http-parse-simple.c
+bpf = BPF(src_file = "data-plane-tracing.c", debug = 0)
+
+#load eBPF program http_filter of type SOCKET_FILTER into the kernel eBPF vm
+#more info about eBPF program types http://man7.org/linux/man-pages/man2/bpf.2.html
+function_vlan_filter = bpf.load_func("vlan_filter", BPF.SOCKET_FILTER)
+
+#create raw socket, bind it to eth0
+#attach bpf program to socket created
+BPF.attach_raw_socket(function_vlan_filter, interface)
+
+#get file descriptor of the socket previously created inside BPF.attach_raw_socket
+socket_fd = function_vlan_filter.sock
+
+#create python socket object, from the file descriptor
+sock = socket.fromfd(socket_fd,socket.PF_PACKET,socket.SOCK_RAW,socket.IPPROTO_IP)
+
+#set it as blocking socket
+sock.setblocking(True)
+
+#get interface ip address. In case ip is not set then just add 127.0.0.1.
+ni.ifaddresses(interface)
+try:
+    ip = ni.ifaddresses(interface)[ni.AF_INET][0]['addr']
+except:
+    ip = '127.0.0.1'    
+
+print("| Timestamp | Host Name | Host IP | IP Version | Source Host IP | Dest Host IP | Source Host Port | Dest Host Port | VNI | Source VM MAC | Dest VM MAC | VLAN ID | Source VM IP | Dest VM IP | Protocol | Source VM Port | Dest VM Port | Packet Length |")
+
+while 1:
+    #retrieve raw packet from socket
+    packet_str = os.read(socket_fd, 2048)
+    
+    #convert packet into bytearray
+    packet_bytearray = bytearray(packet_str)
+    
+    #ethernet header length
+    ETH_HLEN = 14 
+    
+    #VXLAN header length
+    VXLAN_HLEN = 8
+    
+    #VLAN header length
+    VLAN_HLEN = 4
+    
+    #Inner TCP/UDP header length
+    TCP_HLEN = 20
+    UDP_HLEN = 8
+    
+    #calculate packet total length
+    total_length = packet_bytearray[ETH_HLEN + 2]               #load MSB
+    total_length = total_length << 8                            #shift MSB
+    total_length = total_length + packet_bytearray[ETH_HLEN+3]  #add LSB
+    
+    #calculate ip header length
+    ip_header_length = packet_bytearray[ETH_HLEN]               #load Byte
+    ip_header_length = ip_header_length & 0x0F                  #mask bits 0..3
+    ip_header_length = ip_header_length << 2                    #shift to obtain length
+    
+    #calculate payload offset
+    payload_offset = ETH_HLEN + ip_header_length + UDP_HLEN + VXLAN_HLEN
+    
+    #parsing ip version from ip packet header
+    ipversion = str(bin(packet_bytearray[14])[2:5])
+    
+    #parsing source ip address, destination ip address from ip packet header
+    src_host_ip = str(packet_bytearray[26]) + "." + str(packet_bytearray[27]) + "." + str(packet_bytearray[28]) + "." + str(packet_bytearray[29])
+    dest_host_ip = str(packet_bytearray[30]) + "." + str(packet_bytearray[31]) + "." + str(packet_bytearray[32]) + "." + str(packet_bytearray[33])
+    
+    #parsing source port and destination port
+    src_host_port = packet_bytearray[34] << 8 | packet_bytearray[35]
+    dest_host_port = packet_bytearray[36] << 8 | packet_bytearray[37]
+    
+    #parsing VNI from VXLAN header
+    VNI = str((packet_bytearray[46])+(packet_bytearray[47])+(packet_bytearray[48]))
+    
+    #parsing source mac address and destination mac address
+    mac_add = [packet_bytearray[50], packet_bytearray[51], packet_bytearray[52], packet_bytearray[53], packet_bytearray[54], packet_bytearray[55]]
+    src_vm_mac = ":".join(map(lambda b: format(b, "02x"), mac_add))
+    mac_add = [packet_bytearray[56], packet_bytearray[57], packet_bytearray[58], packet_bytearray[59], packet_bytearray[60], packet_bytearray[61]]
+    dest_vm_mac = ":".join(map(lambda b: format(b, "02x"), mac_add))
+    
+    #parsing VLANID from VLAN header
+    VLANID=""
+    VLANID = str((packet_bytearray[64])+(packet_bytearray[65]))
+
+    #parsing source vm ip address, destination vm ip address from encapsulated ip packet header
+    src_vm_ip = str(packet_bytearray[80]) + "." + str(packet_bytearray[81]) + "." + str(packet_bytearray[82]) + "." + str(packet_bytearray[83])
+    dest_vm_ip = str(packet_bytearray[84]) + "." + str(packet_bytearray[85]) + "." + str(packet_bytearray[86]) + "." + str(packet_bytearray[87]) 
+    
+    #parsing source port and destination port
+    if (packet_bytearray[77]==6 or packet_bytearray[77]==17):
+        src_vm_port = packet_bytearray[88] << 8 | packet_bytearray[88]
+        dest_vm_port = packet_bytearray[90] << 8 | packet_bytearray[91]
+    elif (packet_bytearray[77]==1):
+        src_vm_port = -1
+        dest_vm_port = -1
+        type = str(packet_bytearray[88])
+    else:
+        continue
+    
+    timestamp = str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S.%f'))
+    
+    #send data to remote server via Kafka Messaging Bus
+    if kafkaserver:
+        MESSAGE = (timestamp, socket.gethostname(),ip, str(int(ipversion, 2)), str(src_host_ip), str(dest_host_ip), str(src_host_port), str(dest_host_port), str(int(VNI)), str(src_vm_mac), str(dest_vm_mac), str(int(VLANID)), src_vm_ip, dest_vm_ip, str(packet_bytearray[77]), str(src_vm_port), str(dest_vm_port), str(total_length))
+        print (MESSAGE)
+        MESSAGE = ','.join(MESSAGE)
+        MESSAGE = MESSAGE.encode() 
+        producer = KafkaProducer(bootstrap_servers=[kafkaserver])
+        producer.send('iovisor-topic', key=b'iovisor', value=MESSAGE)
+    
+    #save data to files
+    else:
+        MESSAGE = timestamp+","+socket.gethostname()+","+ip+","+str(int(ipversion, 2))+","+src_host_ip+","+dest_host_ip+","+str(src_host_port)+","+str(dest_host_port)+","+str(int(VNI))+","+str(src_vm_mac)+","+str(dest_vm_mac)+","+str(int(VLANID))+","+src_vm_ip+","+dest_vm_ip+","+str(packet_bytearray[77])+","+str(src_vm_port)+","+str(dest_vm_port)+","+str(total_length)
+        print (MESSAGE)
+        #save data to a file on hour basis 
+        filename = "./vlan-data-"+time.strftime("%Y-%m-%d-%H")+"-00"
+        with open(filename, "a") as f:
+            f.write("%s\n" % MESSAGE)
diff --git a/examples/networking/vlan_filter/scenario.jpg b/examples/networking/vlan_filter/scenario.jpg
new file mode 100644
index 0000000..ba3d7ab
--- /dev/null
+++ b/examples/networking/vlan_filter/scenario.jpg
Binary files differ
diff --git a/examples/networking/vlan_filter/test_setup.sh b/examples/networking/vlan_filter/test_setup.sh
new file mode 100755
index 0000000..967cf21
--- /dev/null
+++ b/examples/networking/vlan_filter/test_setup.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+
+# This script must be executed by root user
+if [ "$(id -u)" != "0" ]; then
+   echo "This script must be run as root" 1>&2
+   exit 1
+fi
+
+# add namespaces
+ip netns add netns11
+ip netns add netns12
+ip netns add netns21
+ip netns add netns22
+ip netns add netns3
+ip netns add netns4
+
+# set up veth devices in netns11 to netns21 with connection to netns3  
+ip link add veth11 type veth peer name veth13
+ip link add veth21 type veth peer name veth23
+ip link set veth11 netns netns11
+ip link set veth21 netns netns21
+ip link set veth13 netns netns3
+ip link set veth23 netns netns3
+
+# set up veth devices in netns12 and netns22 with connection to netns4 
+ip link add veth12 type veth peer name veth14
+ip link add veth22 type veth peer name veth24
+ip link set veth12 netns netns12
+ip link set veth22 netns netns22
+ip link set veth14 netns netns4
+ip link set veth24 netns netns4
+  
+# assign IP addresses and set the devices up 
+ip netns exec netns11 ifconfig veth11 192.168.100.11/24 up
+ip netns exec netns11 ip link set lo up
+ip netns exec netns12 ifconfig veth12 192.168.100.12/24 up
+ip netns exec netns12 ip link set lo up
+ip netns exec netns21 ifconfig veth21 192.168.200.21/24 up
+ip netns exec netns21 ip link set lo up
+ip netns exec netns22 ifconfig veth22 192.168.200.22/24 up
+ip netns exec netns22 ip link set lo up
+
+# set up bridge brx and its ports 
+ip netns exec netns3 brctl addbr brx  
+ip netns exec netns3 ip link set brx up
+ip netns exec netns3 ip link set veth13 up
+ip netns exec netns3 ip link set veth23 up
+ip netns exec netns3 brctl addif brx veth13
+ip netns exec netns3 brctl addif brx veth23
+
+# set up bridge bry and its ports 
+ip netns exec netns4 brctl addbr bry  
+ip netns exec netns4 ip link set bry up
+ip netns exec netns4 ip link set veth14 up
+ip netns exec netns4 ip link set veth24 up
+ip netns exec netns4 brctl addif bry veth14
+ip netns exec netns4 brctl addif bry veth24
+
+# create veth devices to connect the bridges
+ip link add vethx type veth peer name vethx11
+ip link add vethy type veth peer name vethy11
+ip link set vethx netns netns3
+ip link set vethx11 netns netns3
+ip link set vethy netns netns4
+ip link set vethy11 netns netns4
+
+ip netns exec netns3 brctl addif brx vethx
+ip netns exec netns3 ip link set vethx up
+ip netns exec netns3 bridge vlan add vid 100 tagged dev vethx
+ip netns exec netns3 bridge vlan add vid 200 tagged dev vethx
+ip netns exec netns3 bridge vlan del vid 1 dev vethx
+ip netns exec netns3 bridge vlan show
+
+ip netns exec netns4 brctl addif bry vethy
+ip netns exec netns4 ip link set vethy up
+ip netns exec netns4 bridge vlan add vid 100 tagged dev vethy
+ip netns exec netns4 bridge vlan add vid 200 tagged dev vethy
+ip netns exec netns4 bridge vlan del vid 1 dev vethy
+ip netns exec netns4 bridge vlan show
+
+ip netns exec netns3 ip link set dev brx type bridge vlan_filtering 1
+ip netns exec netns4 ip link set dev bry type bridge vlan_filtering 1
+ip netns exec netns3 bridge vlan del vid 1 dev brx self
+ip netns exec netns4 bridge vlan del vid 1 dev bry self
+ip netns exec netns3 bridge vlan show
+ip netns exec netns4 bridge vlan show
+
+ip netns exec netns3 bridge vlan add vid 100 pvid untagged dev veth13
+ip netns exec netns3 bridge vlan add vid 200 pvid untagged dev veth23
+ip netns exec netns4 bridge vlan add vid 100 pvid untagged dev veth14
+ip netns exec netns4 bridge vlan add vid 200 pvid untagged dev veth24
+
+ip netns exec netns3 bridge vlan del vid 1 dev veth13
+ip netns exec netns3 bridge vlan del vid 1 dev veth23
+ip netns exec netns4 bridge vlan del vid 1 dev veth14
+ip netns exec netns4 bridge vlan del vid 1 dev veth24
+
+# set up bridge brvx and its ports 
+ip netns exec netns3 brctl addbr brvx  
+ip netns exec netns3 ip link set brvx up
+ip netns exec netns3 ip link set vethx11 up
+ip netns exec netns3 brctl addif brvx vethx11
+
+# set up bridge brvy and its ports 
+ip netns exec netns4 brctl addbr brvy  
+ip netns exec netns4 ip link set brvy up
+ip netns exec netns4 ip link set vethy11 up
+ip netns exec netns4 brctl addif brvy vethy11
+
+# create veth devices to connect the vxlan bridges
+ip link add veth3 type veth peer name veth4
+ip link add veth5 type veth peer name veth6
+ip link set veth3 netns netns3
+ip link set veth5 netns netns4
+ip netns exec netns3 ip link set veth3 up
+ip netns exec netns4 ip link set veth5 up
+ip link set veth4 up
+ip link set veth6 up
+ip netns exec netns3 ifconfig veth3 10.1.1.11/24 up
+ip netns exec netns4 ifconfig veth5 10.1.1.12/24 up
+
+# add vxlan ports
+ip netns exec netns3 ip link add vxlan-10 type vxlan id 10 remote 10.1.1.12 dstport 4789 dev veth3
+ip netns exec netns4 ip link add vxlan-10 type vxlan id 10 remote 10.1.1.11 dstport 4789 dev veth5
+ip netns exec netns3 ip link set vxlan-10 up
+ip netns exec netns4 ip link set vxlan-10 up
+ip netns exec netns3 brctl addif brvx vxlan-10
+ip netns exec netns4 brctl addif brvy vxlan-10
+
+# create veth devices to connect the vxlan bridges
+ip link add veth7 type veth peer name veth8
+ip link set veth7 up
+ip link set veth8 up
+
+# set up bridge brjx and its ports 
+brctl addbr brjx  
+ip link set brjx up
+ip link set veth4 up
+brctl addif brjx veth4
+brctl addif brjx veth7
+
+# set up bridge brjy and its ports 
+brctl addbr brjy  
+ip link set brjy up
+ip link set veth6 up
+brctl addif brjy veth6
+brctl addif brjy veth8
diff --git a/examples/networking/vlan_filter/test_traffic.sh b/examples/networking/vlan_filter/test_traffic.sh
new file mode 100755
index 0000000..4be4515
--- /dev/null
+++ b/examples/networking/vlan_filter/test_traffic.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+ip netns exec netns11 ping 192.168.100.12 -c 10
+ip netns exec netns22 ping 192.168.200.21 -c 10
diff --git a/examples/tracing/bitehist.py b/examples/tracing/bitehist.py
index 1dee5dc..c8c7f7a 100755
--- a/examples/tracing/bitehist.py
+++ b/examples/tracing/bitehist.py
@@ -13,6 +13,7 @@
 #
 # 15-Aug-2015	Brendan Gregg	Created this.
 
+from __future__ import print_function
 from bcc import BPF
 from time import sleep
 
@@ -37,7 +38,7 @@
 try:
 	sleep(99999999)
 except KeyboardInterrupt:
-	print
+	print()
 
 # output
 b["dist"].print_log2_hist("kbytes")
diff --git a/examples/tracing/trace_fields.py b/examples/tracing/trace_fields.py
index 0baf03d..63a7b53 100755
--- a/examples/tracing/trace_fields.py
+++ b/examples/tracing/trace_fields.py
@@ -6,6 +6,7 @@
 # run in project examples directory with:
 # sudo ./trace_fields.py"
 
+from __future__ import print_function
 from bcc import BPF
 
 prog = """
@@ -16,5 +17,5 @@
 """
 b = BPF(text=prog)
 b.attach_kprobe(event=b.get_syscall_fnname("clone"), fn_name="hello")
-print "PID MESSAGE"
+print("PID MESSAGE")
 b.trace_print(fmt="{1} {5}")
diff --git a/examples/tracing/vfsreadlat.py b/examples/tracing/vfsreadlat.py
index bd16dd5..b2c4156 100755
--- a/examples/tracing/vfsreadlat.py
+++ b/examples/tracing/vfsreadlat.py
@@ -15,6 +15,7 @@
 #
 # 15-Aug-2015	Brendan Gregg	Created this.
 
+from __future__ import print_function
 from bcc import BPF
 from ctypes import c_ushort, c_int, c_ulonglong
 from time import sleep
@@ -58,7 +59,7 @@
 	except KeyboardInterrupt:
 		pass; do_exit = 1
 
-	print
+	print()
 	b["dist"].print_log2_hist("usecs")
 	b["dist"].clear()
 	if do_exit:
diff --git a/introspection/CMakeLists.txt b/introspection/CMakeLists.txt
index f6fb1db..836bc0a 100644
--- a/introspection/CMakeLists.txt
+++ b/introspection/CMakeLists.txt
@@ -7,6 +7,6 @@
 option(INSTALL_INTROSPECTION "Install BPF introspection tools" ON)
 
 add_executable(bps bps.c)
-target_link_libraries(bps bcc-static)
+target_link_libraries(bps bpf-static)
 
 install (TARGETS bps DESTINATION share/bcc/introspection)
diff --git a/man/man8/argdist.8 b/man/man8/argdist.8
index b0a539f..4116cd4 100644
--- a/man/man8/argdist.8
+++ b/man/man8/argdist.8
@@ -2,7 +2,7 @@
 .SH NAME
 argdist \- Trace a function and display a histogram or frequency count of its parameter values. Uses Linux eBPF/bcc.
 .SH SYNOPSIS
-.B argdist [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL] [-n COUNT] [-v] [-T TOP] [-H specifier] [-C specifier] [-I header]
+.B argdist [-h] [-p PID] [-z STRING_SIZE] [-i INTERVAL] [-d DURATION] [-n COUNT] [-v] [-T TOP] [-H specifier] [-C specifier] [-I header]
 .SH DESCRIPTION
 argdist attaches to function entry and exit points, collects specified parameter
 values, and stores them in a histogram or a frequency collection that counts
@@ -27,6 +27,9 @@
 \-i INTERVAL
 Print the collected data every INTERVAL seconds. The default is 1 second.
 .TP
+\-d DURATION
+Total duration of trace in seconds.
+.TP
 \-n NUMBER
 Print the collected data COUNT times and then exit.
 .TP
diff --git a/man/man8/funclatency.8 b/man/man8/funclatency.8
index 7b7771b..b82626c 100644
--- a/man/man8/funclatency.8
+++ b/man/man8/funclatency.8
@@ -2,7 +2,7 @@
 .SH NAME
 funclatency \- Time functions and print latency as a histogram.
 .SH SYNOPSIS
-.B funclatency [\-h] [\-p PID] [\-i INTERVAL] [\-T] [\-u] [\-m] [\-F] [\-r] [\-v] pattern
+.B funclatency [\-h] [\-p PID] [\-i INTERVAL] [\-d DURATION] [\-T] [\-u] [\-m] [\-F] [\-r] [\-v] pattern
 .SH DESCRIPTION
 This tool traces function calls and times their duration (latency), and
 shows the latency distribution as a histogram. The time is measured from when
@@ -37,6 +37,9 @@
 \-i INTERVAL
 Print output every interval seconds.
 .TP
+\-d DURATION
+Total duration of trace, in seconds.
+.TP
 \-T
 Include timestamps on output.
 .TP
@@ -72,6 +75,10 @@
 #
 .B funclatency \-m do_nanosleep
 .TP
+Time libc open(), and print output every 2 seconds, for duration 10 seconds:
+#
+.B funclatency \-i 2 -d 10 c:read
+.TP
 Time vfs_read(), and print output every 5 seconds, with timestamps:
 #
 .B funclatency \-mTi 5 vfs_read
diff --git a/man/man8/runqlat.8 b/man/man8/runqlat.8
index 2986ff5..d535ebb 100644
--- a/man/man8/runqlat.8
+++ b/man/man8/runqlat.8
@@ -13,7 +13,8 @@
 This tool measures two types of run queue latency:
 
 1. The time from a task being enqueued on a run queue to its context switch
-and execution. This traces enqueue_task_*() -> finish_task_switch(),
+and execution. This traces ttwu_do_wakeup(), wake_up_new_task() ->
+finish_task_switch() with either raw tracepoints (if supported) or kprobes
 and instruments the run queue latency after a voluntary context switch.
 
 2. The time from when a task was involuntary context switched and still
@@ -109,4 +110,4 @@
 .SH AUTHOR
 Brendan Gregg
 .SH SEE ALSO
-runqlen(8), pidstat(1)
+runqlen(8), runqslower(8), pidstat(1)
diff --git a/man/man8/runqlen.8 b/man/man8/runqlen.8
index 1cc2789..27a649d 100644
--- a/man/man8/runqlen.8
+++ b/man/man8/runqlen.8
@@ -83,4 +83,4 @@
 .SH AUTHOR
 Brendan Gregg
 .SH SEE ALSO
-runqlat(8), pidstat(1)
+runqlat(8), runqslower(8), pidstat(1)
diff --git a/man/man8/runqslower.8 b/man/man8/runqslower.8
new file mode 100644
index 0000000..0baee64
--- /dev/null
+++ b/man/man8/runqslower.8
@@ -0,0 +1,86 @@
+.TH runqslower 8  "2016-02-07" "USER COMMANDS"
+.SH NAME
+runqlat \- Trace long process scheduling delays.
+.SH SYNOPSIS
+.B runqslower [\-p PID] [min_us]
+.SH DESCRIPTION
+This measures the time a task spends waiting on a run queue (or equivalent
+scheduler data structure) for a turn on-CPU, and shows occurrences of time
+exceeding passed threshold. This time should be small, but a task may need
+to wait its turn due to CPU load. The higher the CPU load, the longer a task
+will generally need to wait its turn.
+
+This tool measures two types of run queue latency:
+
+1. The time from a task being enqueued on a run queue to its context switch
+and execution. This traces ttwu_do_wakeup(), wake_up_new_task() ->
+finish_task_switch() with either raw tracepoints (if supported) or kprobes
+and instruments the run queue latency after a voluntary context switch.
+
+2. The time from when a task was involuntary context switched and still
+in the runnable state, to when it next executed. This is instrumented
+from finish_task_switch() alone.
+
+The overhead of this tool may become significant for some workloads:
+see the OVERHEAD section.
+
+This works by tracing various kernel scheduler functions using dynamic tracing,
+and will need updating to match any changes to these functions.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-p PID
+Only show this PID (filtered in kernel for efficiency).
+.TP
+min_us
+Minimum scheduling delay in microseconds to output.
+.SH EXAMPLES
+.TP
+Show scheduling delays longer than 10ms:
+#
+.B runqslower
+.TP
+Show scheduling delays longer than 1ms for process with PID 123:
+#
+.B runqslower -p 123 1000
+.SH FIELDS
+.TP
+TIME
+Time of when scheduling event occurred.
+.TP
+COMM
+Process name.
+.TP
+PID
+Process ID.
+.TP
+LAT(us)
+Scheduling latency from time when task was ready to run to the time it was
+assigned to a CPU to run.
+.SH OVERHEAD
+This traces scheduler functions, which can become very frequent. While eBPF
+has very low overhead, and this tool uses in-kernel maps for efficiency, the
+frequency of scheduler events for some workloads may be high enough that the
+overhead of this tool becomes significant. Measure in a lab environment
+to quantify the overhead before use.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Ivan Babrou
+.SH SEE ALSO
+runqlen(8), runqlat(8), pidstat(1)
diff --git a/man/man8/syscount.8 b/man/man8/syscount.8
index 2a47240..d13793b 100644
--- a/man/man8/syscount.8
+++ b/man/man8/syscount.8
@@ -2,7 +2,7 @@
 .SH NAME
 syscount \- Summarize syscall counts and latencies.
 .SH SYNOPSIS
-.B syscount [-h] [-p PID] [-i INTERVAL] [-T TOP] [-x] [-e ERRNO] [-L] [-m] [-P] [-l]
+.B syscount [-h] [-p PID] [-i INTERVAL] [-d DURATION] [-T TOP] [-x] [-e ERRNO] [-L] [-m] [-P] [-l]
 .SH DESCRIPTION
 This tool traces syscall entry and exit tracepoints and summarizes either the
 number of syscalls of each type, or the number of syscalls per process. It can
@@ -23,6 +23,9 @@
 \-i INTERVAL
 Print the summary at the specified interval (in seconds).
 .TP
+\-d DURATION
+Total duration of trace (in seconds).
+.TP
 \-T TOP
 Print only this many entries. Default: 10.
 .TP
diff --git a/src/cc/CMakeLists.txt b/src/cc/CMakeLists.txt
index f518d15..8ddfd8f 100644
--- a/src/cc/CMakeLists.txt
+++ b/src/cc/CMakeLists.txt
@@ -52,7 +52,7 @@
 add_library(bcc-static STATIC
   ${bcc_common_sources} ${bcc_table_sources} ${bcc_util_sources})
 set_target_properties(bcc-static PROPERTIES OUTPUT_NAME bcc)
-add_library(bcc-lua-static STATIC
+set(bcc-lua-static
   ${bcc_common_sources} ${bcc_table_sources} ${bcc_sym_sources} ${bcc_util_sources})
 
 include(clang_libs)
@@ -64,9 +64,9 @@
 set(bcc_common_libs_for_a b_frontend clang_frontend bpf-static
   -Wl,--whole-archive ${clang_libs} ${llvm_libs} -Wl,--no-whole-archive
   ${LIBELF_LIBRARIES})
-set(bcc_common_libs_for_s b_frontend clang_frontend bpf-static
+set(bcc_common_libs_for_s ${bcc_common_libs_for_a})
+set(bcc_common_libs_for_lua b_frontend clang_frontend bpf-static
   ${clang_libs} ${llvm_libs} ${LIBELF_LIBRARIES})
-set(bcc_common_libs_for_lua ${bcc_common_libs_for_s})
 
 if(ENABLE_CPP_API)
   add_subdirectory(api)
@@ -87,7 +87,7 @@
 # Link against LLVM libraries
 target_link_libraries(bcc-shared ${bcc_common_libs_for_s})
 target_link_libraries(bcc-static ${bcc_common_libs_for_a} bcc-loader-static)
-target_link_libraries(bcc-lua-static ${bcc_common_libs_for_lua})
+set(bcc-lua-static ${bcc-lua-static} ${bcc_common_libs_for_lua})
 
 install(TARGETS bcc-shared LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
 install(FILES ${bcc_table_headers} DESTINATION include/bcc)
diff --git a/src/cc/api/BPF.cc b/src/cc/api/BPF.cc
index a821024..8bd1e48 100644
--- a/src/cc/api/BPF.cc
+++ b/src/cc/api/BPF.cc
@@ -32,6 +32,7 @@
 #include "common.h"
 #include "libbpf.h"
 #include "perf_reader.h"
+#include "syms.h"
 #include "table_storage.h"
 #include "usdt.h"
 
@@ -39,11 +40,6 @@
 
 namespace ebpf {
 
-static const char* syscall_prefix[] = {
-    "sys_",
-    "__x64_sys_",
-};
-
 std::string uint_to_hex(uint64_t value) {
   std::stringstream ss;
   ss << std::hex << value;
@@ -62,10 +58,6 @@
                       const std::vector<std::string>& cflags,
                       const std::vector<USDT>& usdt) {
   std::string all_bpf_program;
-  bcc_symbol_option symbol_option = {};
-  void* ksym_cache;
-  uint64_t addr;
-  int ret;
 
   for (auto u : usdt) {
     if (!u.initialized_)
@@ -83,16 +75,6 @@
   if (bpf_module_->load_string(all_bpf_program, flags, flags_len) != 0)
     return StatusTuple(-1, "Unable to initialize BPF program");
 
-  ksym_cache = bcc_symcache_new(-1, &symbol_option);
-  ret = bcc_symcache_resolve_name(ksym_cache, NULL, "sys_bpf", &addr);
-  if (ret == 0) {
-    syscall_prefix_idx_ = 0;
-  } else {
-    ret = bcc_symcache_resolve_name(ksym_cache, NULL, "__x64_sys_bpf", &addr);
-    syscall_prefix_idx_ = (ret == 0) ? 1 : 0;
-  }
-  bcc_free_symcache(ksym_cache, -1);
-
   return StatusTuple(0);
 };
 
@@ -179,6 +161,7 @@
 
 StatusTuple BPF::attach_kprobe(const std::string& kernel_func,
                                const std::string& probe_func,
+                               uint64_t kernel_func_offset,
                                bpf_probe_attach_type attach_type) {
   std::string probe_event = get_kprobe_event(kernel_func, attach_type);
   if (kprobes_.find(probe_event) != kprobes_.end())
@@ -188,7 +171,7 @@
   TRY2(load_func(probe_func, BPF_PROG_TYPE_KPROBE, probe_fd));
 
   int res_fd = bpf_attach_kprobe(probe_fd, attach_type, probe_event.c_str(),
-                                 kernel_func.c_str());
+                                 kernel_func.c_str(), kernel_func_offset);
 
   if (res_fd < 0) {
     TRY2(unload_func(probe_func));
@@ -342,7 +325,8 @@
 
 StatusTuple BPF::attach_perf_event_raw(void* perf_event_attr,
                                        const std::string& probe_func, pid_t pid,
-                                       int cpu, int group_fd) {
+                                       int cpu, int group_fd,
+                                       unsigned long extra_flags) {
   auto attr = static_cast<struct perf_event_attr*>(perf_event_attr);
   auto ev_pair = std::make_pair(attr->type, attr->config);
   if (perf_events_.find(ev_pair) != perf_events_.end())
@@ -360,7 +344,8 @@
   auto fds = new std::vector<std::pair<int, int>>();
   fds->reserve(cpus.size());
   for (int i : cpus) {
-    int fd = bpf_attach_perf_event_raw(probe_fd, attr, pid, i, group_fd);
+    int fd = bpf_attach_perf_event_raw(probe_fd, attr, pid, i, group_fd,
+                                       extra_flags);
     if (fd < 0) {
       for (const auto& it : *fds)
         close(it.second);
@@ -567,8 +552,19 @@
 }
 
 std::string BPF::get_syscall_fnname(const std::string& name) {
-  std::string fn_name = syscall_prefix[syscall_prefix_idx_] + name;
-  return std::move(fn_name);
+  if (syscall_prefix_ == nullptr) {
+    KSyms ksym;
+    uint64_t addr;
+
+    if (ksym.resolve_name(nullptr, "sys_bpf", &addr))
+      syscall_prefix_.reset(new std::string("sys_"));
+    else if (ksym.resolve_name(nullptr, "__x64_sys_bpf", &addr))
+      syscall_prefix_.reset(new std::string("__x64_sys_"));
+    else
+      syscall_prefix_.reset(new std::string());
+  }
+
+  return *syscall_prefix_ + name;
 }
 
 StatusTuple BPF::check_binary_symbol(const std::string& binary_path,
diff --git a/src/cc/api/BPF.h b/src/cc/api/BPF.h
index f101e9b..1688733 100644
--- a/src/cc/api/BPF.h
+++ b/src/cc/api/BPF.h
@@ -19,6 +19,7 @@
 #include <cctype>
 #include <cstdint>
 #include <memory>
+#include <ostream>
 #include <string>
 
 #include "BPFTable.h"
@@ -47,9 +48,7 @@
 
   explicit BPF(unsigned int flag = 0, TableStorage* ts = nullptr,
                bool rw_engine_enabled = true)
-      : flag_(flag),
-        syscall_prefix_idx_(0),
-        bpf_module_(new BPFModule(flag, ts, rw_engine_enabled)) {}
+      : flag_(flag), bpf_module_(new BPFModule(flag, ts, rw_engine_enabled)) {}
   StatusTuple init(const std::string& bpf_program,
                    const std::vector<std::string>& cflags = {},
                    const std::vector<USDT>& usdt = {});
@@ -59,6 +58,7 @@
 
   StatusTuple attach_kprobe(const std::string& kernel_func,
                             const std::string& probe_func,
+                            uint64_t kernel_func_offset = 0,
                             bpf_probe_attach_type = BPF_PROBE_ENTRY);
   StatusTuple detach_kprobe(
       const std::string& kernel_func,
@@ -89,7 +89,8 @@
   StatusTuple attach_perf_event_raw(void* perf_event_attr,
                                     const std::string& probe_func,
                                     pid_t pid = -1, int cpu = -1,
-                                    int group_fd = -1);
+                                    int group_fd = -1,
+                                    unsigned long extra_flags = 0);
   StatusTuple detach_perf_event(uint32_t ev_type, uint32_t ev_config);
   StatusTuple detach_perf_event_raw(void* perf_event_attr);
   std::string get_syscall_fnname(const std::string& name);
@@ -219,7 +220,7 @@
 
   int flag_;
 
-  int syscall_prefix_idx_;
+  std::unique_ptr<std::string> syscall_prefix_;
 
   std::unique_ptr<BPFModule> bpf_module_;
 
@@ -245,6 +246,8 @@
         name_(name),
         probe_func_(probe_func) {}
 
+  StatusTuple init();
+
   bool operator==(const USDT& other) const {
     return (provider_ == other.provider_) && (name_ == other.name_) &&
            (binary_path_ == other.binary_path_) &&
@@ -252,11 +255,16 @@
   }
 
   std::string print_name() const {
-    return provider_ + ":" + name_ + " from " + binary_path_;
+    return provider_ + ":" + name_ + " from " + binary_path_ + " for probe " +
+           "probe_func_";
+  }
+
+  friend std::ostream& operator<<(std::ostream& out, const USDT& usdt) {
+    return out << usdt.provider_ << ":" << usdt.name_ << " from "
+               << usdt.binary_path_ << " for probe " << usdt.probe_func_;
   }
 
  private:
-  StatusTuple init();
   bool initialized_;
 
   std::string binary_path_;
diff --git a/src/cc/bpf_module.cc b/src/cc/bpf_module.cc
index 65f6b7e..9344340 100644
--- a/src/cc/bpf_module.cc
+++ b/src/cc/bpf_module.cc
@@ -615,7 +615,6 @@
   std::map<std::string, std::tuple<uint8_t *, uintptr_t>> tmp_sections,
       *sections_p;
 
-  mod->setDataLayout("e-m:e-p:64:64-i64:64-n32:64-S128");
   mod->setTargetTriple("bpf-pc-linux");
   sections_p = rw_engine_enabled_ ? &sections_ : &tmp_sections;
 
diff --git a/src/cc/compat/linux/bpf.h b/src/cc/compat/linux/bpf.h
index bbf2b7a..9e0211e 100644
--- a/src/cc/compat/linux/bpf.h
+++ b/src/cc/compat/linux/bpf.h
@@ -96,6 +96,8 @@
 	BPF_PROG_QUERY,
 	BPF_RAW_TRACEPOINT_OPEN,
 	BPF_BTF_LOAD,
+	BPF_BTF_GET_FD_BY_ID,
+	BPF_TASK_FD_QUERY,
 };
 
 enum bpf_map_type {
@@ -116,6 +118,8 @@
 	BPF_MAP_TYPE_DEVMAP,
 	BPF_MAP_TYPE_SOCKMAP,
 	BPF_MAP_TYPE_CPUMAP,
+	BPF_MAP_TYPE_XSKMAP,
+	BPF_MAP_TYPE_SOCKHASH,
 };
 
 enum bpf_prog_type {
@@ -138,6 +142,8 @@
 	BPF_PROG_TYPE_SK_MSG,
 	BPF_PROG_TYPE_RAW_TRACEPOINT,
 	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	BPF_PROG_TYPE_LWT_SEG6LOCAL,
+	BPF_PROG_TYPE_LIRC_MODE2,
 };
 
 enum bpf_attach_type {
@@ -155,6 +161,9 @@
 	BPF_CGROUP_INET6_CONNECT,
 	BPF_CGROUP_INET4_POST_BIND,
 	BPF_CGROUP_INET6_POST_BIND,
+	BPF_CGROUP_UDP4_SENDMSG,
+	BPF_CGROUP_UDP6_SENDMSG,
+	BPF_LIRC_MODE2,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -281,8 +290,8 @@
 		char	map_name[BPF_OBJ_NAME_LEN];
 		__u32	map_ifindex;	/* ifindex of netdev to create on */
 		__u32	btf_fd;		/* fd pointing to a BTF type data */
-		__u32	btf_key_id;	/* BTF type_id of the key */
-		__u32	btf_value_id;	/* BTF type_id of the value */
+		__u32	btf_key_type_id;	/* BTF type_id of the key */
+		__u32	btf_value_type_id;	/* BTF type_id of the value */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -343,6 +352,7 @@
 			__u32		start_id;
 			__u32		prog_id;
 			__u32		map_id;
+			__u32		btf_id;
 		};
 		__u32		next_id;
 		__u32		open_flags;
@@ -375,6 +385,22 @@
 		__u32		btf_log_size;
 		__u32		btf_log_level;
 	};
+
+	struct {
+		__u32		pid;		/* input: pid */
+		__u32		fd;		/* input: fd */
+		__u32		flags;		/* input: flags */
+		__u32		buf_len;	/* input/output: buf len */
+		__aligned_u64	buf;		/* input/output:
+						 *   tp_name for tracepoint
+						 *   symbol for kprobe
+						 *   filename for uprobe
+						 */
+		__u32		prog_id;	/* output: prod_id */
+		__u32		fd_type;	/* output: BPF_FD_TYPE_* */
+		__u64		probe_offset;	/* output: probe_offset */
+		__u64		probe_addr;	/* output: probe_addr */
+	} task_fd_query;
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
@@ -828,12 +854,12 @@
  *
  * 		Also, be aware that the newer helper
  * 		**bpf_perf_event_read_value**\ () is recommended over
- * 		**bpf_perf_event_read*\ () in general. The latter has some ABI
+ * 		**bpf_perf_event_read**\ () in general. The latter has some ABI
  * 		quirks where error and counter value are used as a return code
  * 		(which is wrong to do since ranges may overlap). This issue is
- * 		fixed with bpf_perf_event_read_value(), which at the same time
- * 		provides more features over the **bpf_perf_event_read**\ ()
- * 		interface. Please refer to the description of
+ * 		fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * 		time provides more features over the **bpf_perf_event_read**\
+ * 		() interface. Please refer to the description of
  * 		**bpf_perf_event_read_value**\ () for details.
  * 	Return
  * 		The value of the perf event counter read from the map, or a
@@ -986,7 +1012,6 @@
  * 		::
  *
  * 			# sysctl kernel.perf_event_max_stack=<new value>
- *
  * 	Return
  * 		The positive or null stack id on success, or a negative error
  * 		in case of failure.
@@ -1361,7 +1386,7 @@
  * 	Return
  * 		0
  *
- * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
  * 	Description
  * 		Emulate a call to **setsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1435,7 +1460,7 @@
  * 	Return
  * 		**SK_PASS** on success, or **SK_DROP** on error.
  *
- * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  * 	Description
  * 		Add an entry to, or update a *map* referencing sockets. The
  * 		*skops* is used as a new value for the entry associated to
@@ -1533,7 +1558,7 @@
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
  * 	Description
  * 		For en eBPF program attached to a perf event, retrieve the
  * 		value of the event counter associated to *ctx* and store it in
@@ -1544,7 +1569,7 @@
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
  * 	Description
  * 		Emulate a call to **getsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1588,7 +1613,7 @@
  * 	Return
  * 		0
  *
- * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval)
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
  * 	Description
  * 		Attempt to set the value of the **bpf_sock_ops_cb_flags** field
  * 		for the full TCP socket associated to *bpf_sock_ops* to
@@ -1721,7 +1746,7 @@
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len)
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
  * 	Description
  * 		Bind the socket associated to *ctx* to the address pointed by
  * 		*addr*, of length *addr_len*. This allows for making outgoing
@@ -1767,6 +1792,268 @@
  * 		**CONFIG_XFRM** configuration option.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ * 	Description
+ * 		Return a user or a kernel stack in bpf program provided buffer.
+ * 		To achieve this, the helper needs *ctx*, which is a pointer
+ * 		to the context on which the tracing program is executed.
+ * 		To store the stacktrace, the bpf program provides *buf* with
+ * 		a nonnegative *size*.
+ *
+ * 		The last argument, *flags*, holds the number of stack frames to
+ * 		skip (from 0 to 255), masked with
+ * 		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * 		the following flags:
+ *
+ * 		**BPF_F_USER_STACK**
+ * 			Collect a user space stack instead of a kernel stack.
+ * 		**BPF_F_USER_BUILD_ID**
+ * 			Collect buildid+offset instead of ips for user stack,
+ * 			only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * 		**bpf_get_stack**\ () can collect up to
+ * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * 		to sufficient large buffer size. Note that
+ * 		this limit can be controlled with the **sysctl** program, and
+ * 		that it should be manually increased in order to profile long
+ * 		user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * 		::
+ *
+ * 			# sysctl kernel.perf_event_max_stack=<new value>
+ * 	Return
+ * 		A non-negative value equal to or less than *size* on success,
+ * 		or a negative error in case of failure.
+ *
+ * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * 	Description
+ * 		This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * 		it provides an easy way to load *len* bytes from *offset*
+ * 		from the packet associated to *skb*, into the buffer pointed
+ * 		by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * 		a fifth argument *start_header* exists in order to select a
+ * 		base offset to start from. *start_header* can be one of:
+ *
+ * 		**BPF_HDR_START_MAC**
+ * 			Base offset to load data from is *skb*'s mac header.
+ * 		**BPF_HDR_START_NET**
+ * 			Base offset to load data from is *skb*'s network header.
+ *
+ * 		In general, "direct packet access" is the preferred method to
+ * 		access packet data, however, this helper is in particular useful
+ * 		in socket filters where *skb*\ **->data** does not always point
+ * 		to the start of the mac header and where "direct packet access"
+ * 		is not available.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ *	Description
+ *		Do FIB lookup in kernel tables using parameters in *params*.
+ *		If lookup is successful and result shows packet is to be
+ *		forwarded, the neighbor tables are searched for the nexthop.
+ *		If successful (ie., FIB lookup shows forwarding and nexthop
+ *		is resolved), the nexthop address is returned in ipv4_dst
+ *		or ipv6_dst based on family, smac is set to mac address of
+ *		egress device, dmac is set to nexthop mac address, rt_metric
+ *		is set to metric from route (IPv4/IPv6 only).
+ *
+ *             *plen* argument is the size of the passed in struct.
+ *             *flags* argument can be a combination of one or more of the
+ *             following values:
+ *
+ *		**BPF_FIB_LOOKUP_DIRECT**
+ *			Do a direct table lookup vs full lookup using FIB
+ *			rules.
+ *		**BPF_FIB_LOOKUP_OUTPUT**
+ *			Perform lookup from an egress perspective (default is
+ *			ingress).
+ *
+ *             *ctx* is either **struct xdp_md** for XDP programs or
+ *             **struct sk_buff** tc cls_act programs.
+ *     Return
+ *             Egress device index on success, 0 if packet needs to continue
+ *             up the stack for further processing or a negative error in case
+ *             of failure.
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		Add an entry to, or update a sockhash *map* referencing sockets.
+ *		The *skops* is used as a new value for the entry associated to
+ *		*key*. *flags* is one of:
+ *
+ *		**BPF_NOEXIST**
+ *			The entry for *key* must not exist in the map.
+ *		**BPF_EXIST**
+ *			The entry for *key* must already exist in the map.
+ *		**BPF_ANY**
+ *			No condition on the existence of the entry for *key*.
+ *
+ *		If the *map* has eBPF programs (parser and verdict), those will
+ *		be inherited by the socket being added. If the socket is
+ *		already attached to eBPF programs, this results in an error.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		This helper is used in programs implementing policies at the
+ *		socket level. If the message *msg* is allowed to pass (i.e. if
+ *		the verdict eBPF program returns **SK_PASS**), redirect it to
+ *		the socket referenced by *map* (of type
+ *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ *		egress interfaces can be used for redirection. The
+ *		**BPF_F_INGRESS** value in *flags* is used to make the
+ *		distinction (ingress path is selected if the flag is present,
+ *		egress path otherwise). This is the only flag supported for now.
+ *	Return
+ *		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		This helper is used in programs implementing policies at the
+ *		skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ *		if the verdeict eBPF program returns **SK_PASS**), redirect it
+ *		to the socket referenced by *map* (of type
+ *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ *		egress interfaces can be used for redirection. The
+ *		**BPF_F_INGRESS** value in *flags* is used to make the
+ *		distinction (ingress path is selected if the flag is present,
+ *		egress otherwise). This is the only flag supported for now.
+ *	Return
+ *		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ *	Description
+ *		Encapsulate the packet associated to *skb* within a Layer 3
+ *		protocol header. This header is provided in the buffer at
+ *		address *hdr*, with *len* its size in bytes. *type* indicates
+ *		the protocol of the header and can be one of:
+ *
+ *		**BPF_LWT_ENCAP_SEG6**
+ *			IPv6 encapsulation with Segment Routing Header
+ *			(**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ *			the IPv6 header is computed by the kernel.
+ *		**BPF_LWT_ENCAP_SEG6_INLINE**
+ *			Only works if *skb* contains an IPv6 packet. Insert a
+ *			Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ *			the IPv6 header.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ *	Description
+ *		Store *len* bytes from address *from* into the packet
+ *		associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ *		inside the outermost IPv6 Segment Routing Header can be
+ *		modified through this helper.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ *	Description
+ *		Adjust the size allocated to TLVs in the outermost IPv6
+ *		Segment Routing Header contained in the packet associated to
+ *		*skb*, at position *offset* by *delta* bytes. Only offsets
+ *		after the segments are accepted. *delta* can be as well
+ *		positive (growing) as negative (shrinking).
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ *	Description
+ *		Apply an IPv6 Segment Routing action of type *action* to the
+ *		packet associated to *skb*. Each action takes a parameter
+ *		contained at address *param*, and of length *param_len* bytes.
+ *		*action* can be one of:
+ *
+ *		**SEG6_LOCAL_ACTION_END_X**
+ *			End.X action: Endpoint with Layer-3 cross-connect.
+ *			Type of *param*: **struct in6_addr**.
+ *		**SEG6_LOCAL_ACTION_END_T**
+ *			End.T action: Endpoint with specific IPv6 table lookup.
+ *			Type of *param*: **int**.
+ *		**SEG6_LOCAL_ACTION_END_B6**
+ *			End.B6 action: Endpoint bound to an SRv6 policy.
+ *			Type of param: **struct ipv6_sr_hdr**.
+ *		**SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ *			End.B6.Encap action: Endpoint bound to an SRv6
+ *			encapsulation policy.
+ *			Type of param: **struct ipv6_sr_hdr**.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ *	Description
+ *		This helper is used in programs implementing IR decoding, to
+ *		report a successfully decoded key press with *scancode*,
+ *		*toggle* value in the given *protocol*. The scancode will be
+ *		translated to a keycode using the rc keymap, and reported as
+ *		an input key down event. After a period a key up event is
+ *		generated. This period can be extended by calling either
+ *		**bpf_rc_keydown** () again with the same values, or calling
+ *		**bpf_rc_repeat** ().
+ *
+ *		Some protocols include a toggle bit, in case the button	was
+ *		released and pressed again between consecutive scancodes.
+ *
+ *		The *ctx* should point to the lirc sample as passed into
+ *		the program.
+ *
+ *		The *protocol* is the decoded protocol number (see
+ *		**enum rc_proto** for some predefined values).
+ *
+ *		This helper is only available is the kernel was compiled with
+ *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ *		"**y**".
+ *
+ *	Return
+ *		0
+ *
+ * int bpf_rc_repeat(void *ctx)
+ *	Description
+ *		This helper is used in programs implementing IR decoding, to
+ *		report a successfully decoded repeat key message. This delays
+ *		the generation of a key up event for previously generated
+ *		key down event.
+ *
+ *		Some IR protocols like NEC have a special IR message for
+ *		repeating last button, for when a button is held down.
+ *
+ *		The *ctx* should point to the lirc sample as passed into
+ *		the program.
+ *
+ *		This helper is only available is the kernel was compiled with
+ *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ *		"**y**".
+ *
+ *	Return
+ *		0
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -1835,7 +2122,19 @@
 	FN(msg_pull_data),		\
 	FN(bind),			\
 	FN(xdp_adjust_tail),		\
-	FN(skb_get_xfrm_state),
+	FN(skb_get_xfrm_state),		\
+	FN(get_stack),			\
+	FN(skb_load_bytes_relative),	\
+	FN(fib_lookup),			\
+	FN(sock_hash_update),		\
+	FN(msg_redirect_hash),		\
+	FN(sk_redirect_hash),		\
+	FN(lwt_push_encap),		\
+	FN(lwt_seg6_store_bytes),	\
+	FN(lwt_seg6_adjust_srh),	\
+	FN(lwt_seg6_action),		\
+	FN(rc_repeat),			\
+	FN(rc_keydown),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -1869,11 +2168,14 @@
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
 
-/* BPF_FUNC_get_stackid flags. */
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
 #define BPF_F_SKIP_FIELD_MASK		0xffULL
 #define BPF_F_USER_STACK		(1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
 #define BPF_F_FAST_STACK_CMP		(1ULL << 9)
 #define BPF_F_REUSE_STACKID		(1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID		(1ULL << 11)
 
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
@@ -1893,6 +2195,18 @@
 	BPF_ADJ_ROOM_NET,
 };
 
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+	BPF_HDR_START_MAC,
+	BPF_HDR_START_NET,
+};
+
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+	BPF_LWT_ENCAP_SEG6,
+	BPF_LWT_ENCAP_SEG6_INLINE
+};
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -2026,6 +2340,14 @@
 struct sk_msg_md {
 	void *data;
 	void *data_end;
+
+	__u32 family;
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
 };
 
 #define BPF_TAG_SIZE	8
@@ -2047,6 +2369,10 @@
 	__u32 gpl_compatible:1;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 nr_jited_ksyms;
+	__u32 nr_jited_func_lens;
+	__aligned_u64 jited_ksyms;
+	__aligned_u64 jited_func_lens;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -2060,6 +2386,15 @@
 	__u32 ifindex;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 btf_id;
+	__u32 btf_key_type_id;
+	__u32 btf_value_type_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+	__aligned_u64 btf;
+	__u32 btf_size;
+	__u32 id;
 } __attribute__((aligned(8)));
 
 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
@@ -2080,6 +2415,12 @@
 	__u32 family;		/* Allows 4-byte read, but no write */
 	__u32 type;		/* Allows 4-byte read, but no write */
 	__u32 protocol;		/* Allows 4-byte read, but no write */
+	__u32 msg_src_ip4;	/* Allows 1,2,4-byte read an 4-byte write.
+				 * Stored in network byte order.
+				 */
+	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
+				 * Stored in network byte order.
+				 */
 };
 
 /* User bpf_sock_ops struct to access socket values and specify request ops
@@ -2240,4 +2581,64 @@
 	__u64 args[0];
 };
 
+/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT:  Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT  BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
+
+struct bpf_fib_lookup {
+	/* input:  network family for lookup (AF_INET, AF_INET6)
+	 * output: network family of egress nexthop
+	 */
+	__u8	family;
+
+	/* set if lookup is to consider L4 data - e.g., FIB rules */
+	__u8	l4_protocol;
+	__be16	sport;
+	__be16	dport;
+
+	/* total length of packet from network header - used for MTU check */
+	__u16	tot_len;
+	__u32	ifindex;  /* L3 device index for lookup */
+
+	union {
+		/* inputs to lookup */
+		__u8	tos;		/* AF_INET  */
+		__be32	flowlabel;	/* AF_INET6 */
+
+		/* output: metric of fib result (IPv4/IPv6 only) */
+		__u32	rt_metric;
+	};
+
+	union {
+		__be32		ipv4_src;
+		__u32		ipv6_src[4];  /* in6_addr; network order */
+	};
+
+	/* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+	 * network header. output: bpf_fib_lookup sets to gateway address
+	 * if FIB lookup returns gateway route
+	 */
+	union {
+		__be32		ipv4_dst;
+		__u32		ipv6_dst[4];  /* in6_addr; network order */
+	};
+
+	/* output */
+	__be16	h_vlan_proto;
+	__be16	h_vlan_TCI;
+	__u8	smac[6];     /* ETH_ALEN */
+	__u8	dmac[6];     /* ETH_ALEN */
+};
+
+enum bpf_task_fd_type {
+	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
+	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
+	BPF_FD_TYPE_KPROBE,		/* (symbol + offset) or addr */
+	BPF_FD_TYPE_KRETPROBE,		/* (symbol + offset) or addr */
+	BPF_FD_TYPE_UPROBE,		/* filename + offset */
+	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h
index aa7e24f..6efc601 100644
--- a/src/cc/compat/linux/virtual_bpf.h
+++ b/src/cc/compat/linux/virtual_bpf.h
@@ -97,6 +97,8 @@
 	BPF_PROG_QUERY,
 	BPF_RAW_TRACEPOINT_OPEN,
 	BPF_BTF_LOAD,
+	BPF_BTF_GET_FD_BY_ID,
+	BPF_TASK_FD_QUERY,
 };
 
 enum bpf_map_type {
@@ -117,6 +119,8 @@
 	BPF_MAP_TYPE_DEVMAP,
 	BPF_MAP_TYPE_SOCKMAP,
 	BPF_MAP_TYPE_CPUMAP,
+	BPF_MAP_TYPE_XSKMAP,
+	BPF_MAP_TYPE_SOCKHASH,
 };
 
 enum bpf_prog_type {
@@ -139,6 +143,8 @@
 	BPF_PROG_TYPE_SK_MSG,
 	BPF_PROG_TYPE_RAW_TRACEPOINT,
 	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	BPF_PROG_TYPE_LWT_SEG6LOCAL,
+	BPF_PROG_TYPE_LIRC_MODE2,
 };
 
 enum bpf_attach_type {
@@ -156,6 +162,9 @@
 	BPF_CGROUP_INET6_CONNECT,
 	BPF_CGROUP_INET4_POST_BIND,
 	BPF_CGROUP_INET6_POST_BIND,
+	BPF_CGROUP_UDP4_SENDMSG,
+	BPF_CGROUP_UDP6_SENDMSG,
+	BPF_LIRC_MODE2,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -282,8 +291,8 @@
 		char	map_name[BPF_OBJ_NAME_LEN];
 		__u32	map_ifindex;	/* ifindex of netdev to create on */
 		__u32	btf_fd;		/* fd pointing to a BTF type data */
-		__u32	btf_key_id;	/* BTF type_id of the key */
-		__u32	btf_value_id;	/* BTF type_id of the value */
+		__u32	btf_key_type_id;	/* BTF type_id of the key */
+		__u32	btf_value_type_id;	/* BTF type_id of the value */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -344,6 +353,7 @@
 			__u32		start_id;
 			__u32		prog_id;
 			__u32		map_id;
+			__u32		btf_id;
 		};
 		__u32		next_id;
 		__u32		open_flags;
@@ -376,6 +386,22 @@
 		__u32		btf_log_size;
 		__u32		btf_log_level;
 	};
+
+	struct {
+		__u32		pid;		/* input: pid */
+		__u32		fd;		/* input: fd */
+		__u32		flags;		/* input: flags */
+		__u32		buf_len;	/* input/output: buf len */
+		__aligned_u64	buf;		/* input/output:
+						 *   tp_name for tracepoint
+						 *   symbol for kprobe
+						 *   filename for uprobe
+						 */
+		__u32		prog_id;	/* output: prod_id */
+		__u32		fd_type;	/* output: BPF_FD_TYPE_* */
+		__u64		probe_offset;	/* output: probe_offset */
+		__u64		probe_addr;	/* output: probe_addr */
+	} task_fd_query;
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
@@ -829,12 +855,12 @@
  *
  * 		Also, be aware that the newer helper
  * 		**bpf_perf_event_read_value**\ () is recommended over
- * 		**bpf_perf_event_read*\ () in general. The latter has some ABI
+ * 		**bpf_perf_event_read**\ () in general. The latter has some ABI
  * 		quirks where error and counter value are used as a return code
  * 		(which is wrong to do since ranges may overlap). This issue is
- * 		fixed with bpf_perf_event_read_value(), which at the same time
- * 		provides more features over the **bpf_perf_event_read**\ ()
- * 		interface. Please refer to the description of
+ * 		fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * 		time provides more features over the **bpf_perf_event_read**\
+ * 		() interface. Please refer to the description of
  * 		**bpf_perf_event_read_value**\ () for details.
  * 	Return
  * 		The value of the perf event counter read from the map, or a
@@ -987,7 +1013,6 @@
  * 		::
  *
  * 			# sysctl kernel.perf_event_max_stack=<new value>
- *
  * 	Return
  * 		The positive or null stack id on success, or a negative error
  * 		in case of failure.
@@ -1362,7 +1387,7 @@
  * 	Return
  * 		0
  *
- * int bpf_setsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
  * 	Description
  * 		Emulate a call to **setsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1436,7 +1461,7 @@
  * 	Return
  * 		**SK_PASS** on success, or **SK_DROP** on error.
  *
- * int bpf_sock_map_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
  * 	Description
  * 		Add an entry to, or update a *map* referencing sockets. The
  * 		*skops* is used as a new value for the entry associated to
@@ -1534,7 +1559,7 @@
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_perf_prog_read_value(struct bpf_perf_event_data_kern *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
  * 	Description
  * 		For en eBPF program attached to a perf event, retrieve the
  * 		value of the event counter associated to *ctx* and store it in
@@ -1545,7 +1570,7 @@
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_getsockopt(struct bpf_sock_ops_kern *bpf_socket, int level, int optname, char *optval, int optlen)
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
  * 	Description
  * 		Emulate a call to **getsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1589,7 +1614,7 @@
  * 	Return
  * 		0
  *
- * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops_kern *bpf_sock, int argval)
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
  * 	Description
  * 		Attempt to set the value of the **bpf_sock_ops_cb_flags** field
  * 		for the full TCP socket associated to *bpf_sock_ops* to
@@ -1722,7 +1747,7 @@
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_bind(struct bpf_sock_addr_kern *ctx, struct sockaddr *addr, int addr_len)
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
  * 	Description
  * 		Bind the socket associated to *ctx* to the address pointed by
  * 		*addr*, of length *addr_len*. This allows for making outgoing
@@ -1768,6 +1793,268 @@
  * 		**CONFIG_XFRM** configuration option.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ * 	Description
+ * 		Return a user or a kernel stack in bpf program provided buffer.
+ * 		To achieve this, the helper needs *ctx*, which is a pointer
+ * 		to the context on which the tracing program is executed.
+ * 		To store the stacktrace, the bpf program provides *buf* with
+ * 		a nonnegative *size*.
+ *
+ * 		The last argument, *flags*, holds the number of stack frames to
+ * 		skip (from 0 to 255), masked with
+ * 		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * 		the following flags:
+ *
+ * 		**BPF_F_USER_STACK**
+ * 			Collect a user space stack instead of a kernel stack.
+ * 		**BPF_F_USER_BUILD_ID**
+ * 			Collect buildid+offset instead of ips for user stack,
+ * 			only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * 		**bpf_get_stack**\ () can collect up to
+ * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * 		to sufficient large buffer size. Note that
+ * 		this limit can be controlled with the **sysctl** program, and
+ * 		that it should be manually increased in order to profile long
+ * 		user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * 		::
+ *
+ * 			# sysctl kernel.perf_event_max_stack=<new value>
+ * 	Return
+ * 		A non-negative value equal to or less than *size* on success,
+ * 		or a negative error in case of failure.
+ *
+ * int skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * 	Description
+ * 		This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * 		it provides an easy way to load *len* bytes from *offset*
+ * 		from the packet associated to *skb*, into the buffer pointed
+ * 		by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * 		a fifth argument *start_header* exists in order to select a
+ * 		base offset to start from. *start_header* can be one of:
+ *
+ * 		**BPF_HDR_START_MAC**
+ * 			Base offset to load data from is *skb*'s mac header.
+ * 		**BPF_HDR_START_NET**
+ * 			Base offset to load data from is *skb*'s network header.
+ *
+ * 		In general, "direct packet access" is the preferred method to
+ * 		access packet data, however, this helper is in particular useful
+ * 		in socket filters where *skb*\ **->data** does not always point
+ * 		to the start of the mac header and where "direct packet access"
+ * 		is not available.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ *	Description
+ *		Do FIB lookup in kernel tables using parameters in *params*.
+ *		If lookup is successful and result shows packet is to be
+ *		forwarded, the neighbor tables are searched for the nexthop.
+ *		If successful (ie., FIB lookup shows forwarding and nexthop
+ *		is resolved), the nexthop address is returned in ipv4_dst
+ *		or ipv6_dst based on family, smac is set to mac address of
+ *		egress device, dmac is set to nexthop mac address, rt_metric
+ *		is set to metric from route (IPv4/IPv6 only).
+ *
+ *             *plen* argument is the size of the passed in struct.
+ *             *flags* argument can be a combination of one or more of the
+ *             following values:
+ *
+ *		**BPF_FIB_LOOKUP_DIRECT**
+ *			Do a direct table lookup vs full lookup using FIB
+ *			rules.
+ *		**BPF_FIB_LOOKUP_OUTPUT**
+ *			Perform lookup from an egress perspective (default is
+ *			ingress).
+ *
+ *             *ctx* is either **struct xdp_md** for XDP programs or
+ *             **struct sk_buff** tc cls_act programs.
+ *     Return
+ *             Egress device index on success, 0 if packet needs to continue
+ *             up the stack for further processing or a negative error in case
+ *             of failure.
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		Add an entry to, or update a sockhash *map* referencing sockets.
+ *		The *skops* is used as a new value for the entry associated to
+ *		*key*. *flags* is one of:
+ *
+ *		**BPF_NOEXIST**
+ *			The entry for *key* must not exist in the map.
+ *		**BPF_EXIST**
+ *			The entry for *key* must already exist in the map.
+ *		**BPF_ANY**
+ *			No condition on the existence of the entry for *key*.
+ *
+ *		If the *map* has eBPF programs (parser and verdict), those will
+ *		be inherited by the socket being added. If the socket is
+ *		already attached to eBPF programs, this results in an error.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		This helper is used in programs implementing policies at the
+ *		socket level. If the message *msg* is allowed to pass (i.e. if
+ *		the verdict eBPF program returns **SK_PASS**), redirect it to
+ *		the socket referenced by *map* (of type
+ *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ *		egress interfaces can be used for redirection. The
+ *		**BPF_F_INGRESS** value in *flags* is used to make the
+ *		distinction (ingress path is selected if the flag is present,
+ *		egress path otherwise). This is the only flag supported for now.
+ *	Return
+ *		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ *	Description
+ *		This helper is used in programs implementing policies at the
+ *		skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ *		if the verdeict eBPF program returns **SK_PASS**), redirect it
+ *		to the socket referenced by *map* (of type
+ *		**BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ *		egress interfaces can be used for redirection. The
+ *		**BPF_F_INGRESS** value in *flags* is used to make the
+ *		distinction (ingress path is selected if the flag is present,
+ *		egress otherwise). This is the only flag supported for now.
+ *	Return
+ *		**SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ *	Description
+ *		Encapsulate the packet associated to *skb* within a Layer 3
+ *		protocol header. This header is provided in the buffer at
+ *		address *hdr*, with *len* its size in bytes. *type* indicates
+ *		the protocol of the header and can be one of:
+ *
+ *		**BPF_LWT_ENCAP_SEG6**
+ *			IPv6 encapsulation with Segment Routing Header
+ *			(**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ *			the IPv6 header is computed by the kernel.
+ *		**BPF_LWT_ENCAP_SEG6_INLINE**
+ *			Only works if *skb* contains an IPv6 packet. Insert a
+ *			Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ *			the IPv6 header.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ *	Description
+ *		Store *len* bytes from address *from* into the packet
+ *		associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ *		inside the outermost IPv6 Segment Routing Header can be
+ *		modified through this helper.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ *	Description
+ *		Adjust the size allocated to TLVs in the outermost IPv6
+ *		Segment Routing Header contained in the packet associated to
+ *		*skb*, at position *offset* by *delta* bytes. Only offsets
+ *		after the segments are accepted. *delta* can be as well
+ *		positive (growing) as negative (shrinking).
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ *	Description
+ *		Apply an IPv6 Segment Routing action of type *action* to the
+ *		packet associated to *skb*. Each action takes a parameter
+ *		contained at address *param*, and of length *param_len* bytes.
+ *		*action* can be one of:
+ *
+ *		**SEG6_LOCAL_ACTION_END_X**
+ *			End.X action: Endpoint with Layer-3 cross-connect.
+ *			Type of *param*: **struct in6_addr**.
+ *		**SEG6_LOCAL_ACTION_END_T**
+ *			End.T action: Endpoint with specific IPv6 table lookup.
+ *			Type of *param*: **int**.
+ *		**SEG6_LOCAL_ACTION_END_B6**
+ *			End.B6 action: Endpoint bound to an SRv6 policy.
+ *			Type of param: **struct ipv6_sr_hdr**.
+ *		**SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ *			End.B6.Encap action: Endpoint bound to an SRv6
+ *			encapsulation policy.
+ *			Type of param: **struct ipv6_sr_hdr**.
+ *
+ * 		A call to this helper is susceptible to change the underlaying
+ * 		packet buffer. Therefore, at load time, all checks on pointers
+ * 		previously done by the verifier are invalidated and must be
+ * 		performed again, if the helper is used in combination with
+ * 		direct packet access.
+ *	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ *	Description
+ *		This helper is used in programs implementing IR decoding, to
+ *		report a successfully decoded key press with *scancode*,
+ *		*toggle* value in the given *protocol*. The scancode will be
+ *		translated to a keycode using the rc keymap, and reported as
+ *		an input key down event. After a period a key up event is
+ *		generated. This period can be extended by calling either
+ *		**bpf_rc_keydown** () again with the same values, or calling
+ *		**bpf_rc_repeat** ().
+ *
+ *		Some protocols include a toggle bit, in case the button	was
+ *		released and pressed again between consecutive scancodes.
+ *
+ *		The *ctx* should point to the lirc sample as passed into
+ *		the program.
+ *
+ *		The *protocol* is the decoded protocol number (see
+ *		**enum rc_proto** for some predefined values).
+ *
+ *		This helper is only available is the kernel was compiled with
+ *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ *		"**y**".
+ *
+ *	Return
+ *		0
+ *
+ * int bpf_rc_repeat(void *ctx)
+ *	Description
+ *		This helper is used in programs implementing IR decoding, to
+ *		report a successfully decoded repeat key message. This delays
+ *		the generation of a key up event for previously generated
+ *		key down event.
+ *
+ *		Some IR protocols like NEC have a special IR message for
+ *		repeating last button, for when a button is held down.
+ *
+ *		The *ctx* should point to the lirc sample as passed into
+ *		the program.
+ *
+ *		This helper is only available is the kernel was compiled with
+ *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ *		"**y**".
+ *
+ *	Return
+ *		0
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -1836,7 +2123,19 @@
 	FN(msg_pull_data),		\
 	FN(bind),			\
 	FN(xdp_adjust_tail),		\
-	FN(skb_get_xfrm_state),
+	FN(skb_get_xfrm_state),		\
+	FN(get_stack),			\
+	FN(skb_load_bytes_relative),	\
+	FN(fib_lookup),			\
+	FN(sock_hash_update),		\
+	FN(msg_redirect_hash),		\
+	FN(sk_redirect_hash),		\
+	FN(lwt_push_encap),		\
+	FN(lwt_seg6_store_bytes),	\
+	FN(lwt_seg6_adjust_srh),	\
+	FN(lwt_seg6_action),		\
+	FN(rc_repeat),			\
+	FN(rc_keydown),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -1870,11 +2169,14 @@
 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
 #define BPF_F_TUNINFO_IPV6		(1ULL << 0)
 
-/* BPF_FUNC_get_stackid flags. */
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
 #define BPF_F_SKIP_FIELD_MASK		0xffULL
 #define BPF_F_USER_STACK		(1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
 #define BPF_F_FAST_STACK_CMP		(1ULL << 9)
 #define BPF_F_REUSE_STACKID		(1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID		(1ULL << 11)
 
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
@@ -1894,6 +2196,18 @@
 	BPF_ADJ_ROOM_NET,
 };
 
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+	BPF_HDR_START_MAC,
+	BPF_HDR_START_NET,
+};
+
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+	BPF_LWT_ENCAP_SEG6,
+	BPF_LWT_ENCAP_SEG6_INLINE
+};
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -2027,6 +2341,14 @@
 struct sk_msg_md {
 	void *data;
 	void *data_end;
+
+	__u32 family;
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
 };
 
 #define BPF_TAG_SIZE	8
@@ -2048,6 +2370,10 @@
 	__u32 gpl_compatible:1;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 nr_jited_ksyms;
+	__u32 nr_jited_func_lens;
+	__aligned_u64 jited_ksyms;
+	__aligned_u64 jited_func_lens;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -2061,6 +2387,15 @@
 	__u32 ifindex;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 btf_id;
+	__u32 btf_key_type_id;
+	__u32 btf_value_type_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+	__aligned_u64 btf;
+	__u32 btf_size;
+	__u32 id;
 } __attribute__((aligned(8)));
 
 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
@@ -2081,6 +2416,12 @@
 	__u32 family;		/* Allows 4-byte read, but no write */
 	__u32 type;		/* Allows 4-byte read, but no write */
 	__u32 protocol;		/* Allows 4-byte read, but no write */
+	__u32 msg_src_ip4;	/* Allows 1,2,4-byte read an 4-byte write.
+				 * Stored in network byte order.
+				 */
+	__u32 msg_src_ip6[4];	/* Allows 1,2,4-byte read an 4-byte write.
+				 * Stored in network byte order.
+				 */
 };
 
 /* User bpf_sock_ops struct to access socket values and specify request ops
@@ -2241,5 +2582,65 @@
 	__u64 args[0];
 };
 
+/* DIRECT:  Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT:  Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT  BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT  BIT(1)
+
+struct bpf_fib_lookup {
+	/* input:  network family for lookup (AF_INET, AF_INET6)
+	 * output: network family of egress nexthop
+	 */
+	__u8	family;
+
+	/* set if lookup is to consider L4 data - e.g., FIB rules */
+	__u8	l4_protocol;
+	__be16	sport;
+	__be16	dport;
+
+	/* total length of packet from network header - used for MTU check */
+	__u16	tot_len;
+	__u32	ifindex;  /* L3 device index for lookup */
+
+	union {
+		/* inputs to lookup */
+		__u8	tos;		/* AF_INET  */
+		__be32	flowlabel;	/* AF_INET6 */
+
+		/* output: metric of fib result (IPv4/IPv6 only) */
+		__u32	rt_metric;
+	};
+
+	union {
+		__be32		ipv4_src;
+		__u32		ipv6_src[4];  /* in6_addr; network order */
+	};
+
+	/* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+	 * network header. output: bpf_fib_lookup sets to gateway address
+	 * if FIB lookup returns gateway route
+	 */
+	union {
+		__be32		ipv4_dst;
+		__u32		ipv6_dst[4];  /* in6_addr; network order */
+	};
+
+	/* output */
+	__be16	h_vlan_proto;
+	__be16	h_vlan_TCI;
+	__u8	smac[6];     /* ETH_ALEN */
+	__u8	dmac[6];     /* ETH_ALEN */
+};
+
+enum bpf_task_fd_type {
+	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
+	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
+	BPF_FD_TYPE_KPROBE,		/* (symbol + offset) or addr */
+	BPF_FD_TYPE_KRETPROBE,		/* (symbol + offset) or addr */
+	BPF_FD_TYPE_UPROBE,		/* filename + offset */
+	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
 )********"
diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h
index 11044cf..411c6e8 100644
--- a/src/cc/export/helpers.h
+++ b/src/cc/export/helpers.h
@@ -356,6 +356,30 @@
   (void *) BPF_FUNC_xdp_adjust_tail;
 static int (*bpf_skb_get_xfrm_state)(void *ctx, u32 index, void *xfrm_state, u32 size, u64 flags) =
   (void *) BPF_FUNC_skb_get_xfrm_state;
+static int (*bpf_get_stack)(void *ctx, void *buf, u32 size, u64 flags) =
+  (void *) BPF_FUNC_get_stack;
+static int (*bpf_skb_load_bytes_relative)(void *ctx, u32 offset, void *to, u32 len, u32 start_header) =
+  (void *) BPF_FUNC_skb_load_bytes_relative;
+static int (*bpf_fib_lookup)(void *ctx, void *params, int plen, u32 flags) =
+  (void *) BPF_FUNC_fib_lookup;
+static int (*bpf_sock_hash_update)(void *ctx, void *map, void *key, u64 flags) =
+  (void *) BPF_FUNC_sock_hash_update;
+static int (*bpf_msg_redirect_hash)(void *ctx, void *map, void *key, u64 flags) =
+  (void *) BPF_FUNC_msg_redirect_hash;
+static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, u64 flags) =
+  (void *) BPF_FUNC_sk_redirect_hash;
+static int (*bpf_lwt_push_encap)(void *skb, u32 type, void *hdr, u32 len) =
+  (void *) BPF_FUNC_lwt_push_encap;
+static int (*bpf_lwt_seg6_store_bytes)(void *ctx, u32 offset, const void *from, u32 len) =
+  (void *) BPF_FUNC_lwt_seg6_store_bytes;
+static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, u32 offset, s32 delta) =
+  (void *) BPF_FUNC_lwt_seg6_adjust_srh;
+static int (*bpf_lwt_seg6_action)(void *ctx, u32 action, void *param, u32 param_len) =
+  (void *) BPF_FUNC_lwt_seg6_action;
+static int (*bpf_rc_keydown)(void *ctx, u32 protocol, u64 scancode, u32 toggle) =
+  (void *) BPF_FUNC_rc_keydown;
+static int (*bpf_rc_repeat)(void *ctx) =
+  (void *) BPF_FUNC_rc_repeat;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/src/cc/frontends/clang/b_frontend_action.cc b/src/cc/frontends/clang/b_frontend_action.cc
index 9382f63..e793ac9 100644
--- a/src/cc/frontends/clang/b_frontend_action.cc
+++ b/src/cc/frontends/clang/b_frontend_action.cc
@@ -82,6 +82,8 @@
 using std::map;
 using std::move;
 using std::set;
+using std::tuple;
+using std::make_tuple;
 using std::string;
 using std::to_string;
 using std::unique_ptr;
@@ -90,44 +92,100 @@
 
 class ProbeChecker : public RecursiveASTVisitor<ProbeChecker> {
  public:
-  explicit ProbeChecker(Expr *arg, const set<Decl *> &ptregs)
-      : needs_probe_(false), is_transitive_(false), ptregs_(ptregs) {
+  explicit ProbeChecker(Expr *arg, const set<tuple<Decl *, int>> &ptregs,
+                        bool track_helpers, bool is_assign)
+      : needs_probe_(false), is_transitive_(false), ptregs_(ptregs),
+        track_helpers_(track_helpers), nb_derefs_(0), is_assign_(is_assign) {
     if (arg) {
       TraverseStmt(arg);
       if (arg->getType()->isPointerType())
         is_transitive_ = needs_probe_;
     }
   }
+  explicit ProbeChecker(Expr *arg, const set<tuple<Decl *, int>> &ptregs,
+                        bool is_transitive)
+      : ProbeChecker(arg, ptregs, is_transitive, false) {}
   bool VisitCallExpr(CallExpr *E) {
     needs_probe_ = false;
-    if (VarDecl *V = dyn_cast<VarDecl>(E->getCalleeDecl())) {
+    if (!track_helpers_)
+      return false;
+    if (VarDecl *V = dyn_cast<VarDecl>(E->getCalleeDecl()))
       needs_probe_ = V->getName() == "bpf_get_current_task";
-    }
     return false;
   }
-  bool VisitDeclRefExpr(DeclRefExpr *E) {
-    if (ptregs_.find(E->getDecl()) != ptregs_.end())
+  bool VisitMemberExpr(MemberExpr *M) {
+    tuple<Decl *, int> pt = make_tuple(M->getMemberDecl(), nb_derefs_);
+    if (ptregs_.find(pt) != ptregs_.end()) {
       needs_probe_ = true;
+      return false;
+    }
+    return true;
+  }
+  bool VisitUnaryOperator(UnaryOperator *E) {
+    if (E->getOpcode() == UO_Deref)
+      nb_derefs_++;
+    else if (E->getOpcode() == UO_AddrOf)
+      nb_derefs_--;
+    return true;
+  }
+  bool VisitDeclRefExpr(DeclRefExpr *E) {
+    if (is_assign_) {
+      // We're looking for an external pointer, regardless of the number of
+      // dereferences.
+      for(auto p : ptregs_) {
+        if (std::get<0>(p) == E->getDecl()) {
+          needs_probe_ = true;
+          nb_derefs_ += std::get<1>(p);
+          return false;
+        }
+      }
+    } else {
+      tuple<Decl *, int> pt = make_tuple(E->getDecl(), nb_derefs_);
+      if (ptregs_.find(pt) != ptregs_.end())
+        needs_probe_ = true;
+    }
     return true;
   }
   bool needs_probe() const { return needs_probe_; }
   bool is_transitive() const { return is_transitive_; }
+  int get_nb_derefs() const { return nb_derefs_; }
  private:
   bool needs_probe_;
   bool is_transitive_;
-  const set<Decl *> &ptregs_;
+  const set<tuple<Decl *, int>> &ptregs_;
+  bool track_helpers_;
+  // Nb of dereferences we go through before finding the external pointer.
+  // A negative number counts the number of addrof.
+  int nb_derefs_;
+  bool is_assign_;
 };
 
 // Visit a piece of the AST and mark it as needing probe reads
 class ProbeSetter : public RecursiveASTVisitor<ProbeSetter> {
  public:
-  explicit ProbeSetter(set<Decl *> *ptregs) : ptregs_(ptregs) {}
+  explicit ProbeSetter(set<tuple<Decl *, int>> *ptregs, int nb_addrof)
+      : ptregs_(ptregs), nb_derefs_(-nb_addrof) {}
   bool VisitDeclRefExpr(DeclRefExpr *E) {
-    ptregs_->insert(E->getDecl());
+    tuple<Decl *, int> pt = make_tuple(E->getDecl(), nb_derefs_);
+    ptregs_->insert(pt);
     return true;
   }
+  explicit ProbeSetter(set<tuple<Decl *, int>> *ptregs)
+      : ProbeSetter(ptregs, 0) {}
+  bool VisitUnaryOperator(UnaryOperator *E) {
+    if (E->getOpcode() == UO_Deref)
+      nb_derefs_++;
+    return true;
+  }
+  bool VisitMemberExpr(MemberExpr *M) {
+    tuple<Decl *, int> pt = make_tuple(M->getMemberDecl(), nb_derefs_);
+    ptregs_->insert(pt);
+    return false;
+  }
  private:
-  set<Decl *> *ptregs_;
+  set<tuple<Decl *, int>> *ptregs_;
+  // Nb of dereferences we go through before getting to the actual variable.
+  int nb_derefs_;
 };
 
 MapVisitor::MapVisitor(set<Decl *> &m) : m_(m) {}
@@ -141,9 +199,10 @@
           return true;
 
         if (memb_name == "update" || memb_name == "insert") {
-          if (ProbeChecker(Call->getArg(1), ptregs_).needs_probe()) {
+          ProbeChecker checker = ProbeChecker(Call->getArg(1), ptregs_, true,
+                                              true);
+          if (checker.needs_probe())
             m_.insert(Ref->getDecl());
-          }
         }
       }
     }
@@ -151,24 +210,90 @@
   return true;
 }
 
-ProbeVisitor::ProbeVisitor(ASTContext &C, Rewriter &rewriter, set<Decl *> &m) :
-  C(C), rewriter_(rewriter), m_(m) {}
+ProbeVisitor::ProbeVisitor(ASTContext &C, Rewriter &rewriter,
+                           set<Decl *> &m, bool track_helpers) :
+  C(C), rewriter_(rewriter), m_(m), track_helpers_(track_helpers) {}
 
-bool ProbeVisitor::VisitVarDecl(VarDecl *Decl) {
-  if (Expr *E = Decl->getInit()) {
-    if (ProbeChecker(E, ptregs_).is_transitive() || IsContextMemberExpr(E)) {
-      set_ptreg(Decl);
+bool ProbeVisitor::assignsExtPtr(Expr *E, int *nbAddrOf) {
+  if (IsContextMemberExpr(E)) {
+    *nbAddrOf = 0;
+    return true;
+  }
+
+  ProbeChecker checker = ProbeChecker(E, ptregs_, track_helpers_,
+                                      true);
+  if (checker.is_transitive()) {
+    // The negative of the number of dereferences is the number of addrof.  In
+    // an assignment, if we went through n addrof before getting the external
+    // pointer, then we'll need n dereferences on the left-hand side variable
+    // to get to the external pointer.
+    *nbAddrOf = -checker.get_nb_derefs();
+    return true;
+  }
+
+  if (E->getStmtClass() == Stmt::CallExprClass) {
+    CallExpr *Call = dyn_cast<CallExpr>(E);
+    if (MemberExpr *Memb = dyn_cast<MemberExpr>(Call->getCallee()->IgnoreImplicit())) {
+      StringRef memb_name = Memb->getMemberDecl()->getName();
+      if (DeclRefExpr *Ref = dyn_cast<DeclRefExpr>(Memb->getBase())) {
+        if (SectionAttr *A = Ref->getDecl()->getAttr<SectionAttr>()) {
+          if (!A->getName().startswith("maps"))
+            return false;
+
+          if (memb_name == "lookup" || memb_name == "lookup_or_init") {
+            if (m_.find(Ref->getDecl()) != m_.end()) {
+              // Retrieved an ext. pointer from a map, mark LHS as ext. pointer.
+              // Pointers from maps always need a single dereference to get the
+              // actual value.  The value may be an external pointer but cannot
+              // be a pointer to an external pointer as the verifier prohibits
+              // storing known pointers (to map values, context, the stack, or
+              // the packet) in maps.
+              *nbAddrOf = 1;
+              return true;
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+bool ProbeVisitor::VisitVarDecl(VarDecl *D) {
+  if (Expr *E = D->getInit()) {
+    int nbAddrOf;
+    if (assignsExtPtr(E, &nbAddrOf)) {
+      // The negative of the number of addrof is the number of dereferences.
+      tuple<Decl *, int> pt = make_tuple(D, -nbAddrOf);
+      set_ptreg(pt);
     }
   }
   return true;
 }
+
 bool ProbeVisitor::VisitCallExpr(CallExpr *Call) {
+  // Skip bpf_probe_read for the third argument if it is an AddrOf.
+  if (VarDecl *V = dyn_cast<VarDecl>(Call->getCalleeDecl())) {
+    if (V->getName() == "bpf_probe_read" && Call->getNumArgs() >= 3) {
+      const Expr *E = Call->getArg(2)->IgnoreParenCasts();
+      if (const UnaryOperator *UnaryExpr = dyn_cast<UnaryOperator>(E)) {
+        if (UnaryExpr->getOpcode() == UO_AddrOf)
+          return false;
+      }
+      return true;
+    }
+  }
+
   if (FunctionDecl *F = dyn_cast<FunctionDecl>(Call->getCalleeDecl())) {
     if (F->hasBody()) {
       unsigned i = 0;
       for (auto arg : Call->arguments()) {
-        if (ProbeChecker(arg, ptregs_).needs_probe())
-          ptregs_.insert(F->getParamDecl(i));
+        ProbeChecker checker = ProbeChecker(arg, ptregs_, track_helpers_,
+                                            true);
+        if (checker.needs_probe()) {
+          tuple<Decl *, int> pt = make_tuple(F->getParamDecl(i),
+                                             checker.get_nb_derefs());
+          ptregs_.insert(pt);
+        }
         ++i;
       }
       if (fn_visited_.find(F) == fn_visited_.end()) {
@@ -182,31 +307,11 @@
 bool ProbeVisitor::VisitBinaryOperator(BinaryOperator *E) {
   if (!E->isAssignmentOp())
     return true;
-  // copy probe attribute from RHS to LHS if present
-  if (ProbeChecker(E->getRHS(), ptregs_).is_transitive()) {
-    ProbeSetter setter(&ptregs_);
-    setter.TraverseStmt(E->getLHS());
-  } else if (E->getRHS()->getStmtClass() == Stmt::CallExprClass) {
-    CallExpr *Call = dyn_cast<CallExpr>(E->getRHS());
-    if (MemberExpr *Memb = dyn_cast<MemberExpr>(Call->getCallee()->IgnoreImplicit())) {
-      StringRef memb_name = Memb->getMemberDecl()->getName();
-      if (DeclRefExpr *Ref = dyn_cast<DeclRefExpr>(Memb->getBase())) {
-        if (SectionAttr *A = Ref->getDecl()->getAttr<SectionAttr>()) {
-          if (!A->getName().startswith("maps"))
-            return true;
 
-          if (memb_name == "lookup" || memb_name == "lookup_or_init") {
-            if (m_.find(Ref->getDecl()) != m_.end()) {
-            // Retrieved an external pointer from a map, mark LHS as external pointer.
-              ProbeSetter setter(&ptregs_);
-              setter.TraverseStmt(E->getLHS());
-            }
-          }
-        }
-      }
-    }
-  } else if (IsContextMemberExpr(E->getRHS())) {
-    ProbeSetter setter(&ptregs_);
+  // copy probe attribute from RHS to LHS if present
+  int nbAddrOf;
+  if (assignsExtPtr(E->getRHS(), &nbAddrOf)) {
+    ProbeSetter setter(&ptregs_, nbAddrOf);
     setter.TraverseStmt(E->getLHS());
   }
   return true;
@@ -216,10 +321,10 @@
     return true;
   if (memb_visited_.find(E) != memb_visited_.end())
     return true;
-  if (!ProbeChecker(E, ptregs_).needs_probe())
+  Expr *sub = E->getSubExpr();
+  if (!ProbeChecker(sub, ptregs_, track_helpers_).needs_probe())
     return true;
   memb_visited_.insert(E);
-  Expr *sub = E->getSubExpr();
   string rhs = rewriter_.getRewrittenText(expansionRange(sub->getSourceRange()));
   string text;
   text = "({ typeof(" + E->getType().getAsString() + ") _val; __builtin_memset(&_val, 0, sizeof(_val));";
@@ -231,11 +336,6 @@
 bool ProbeVisitor::VisitMemberExpr(MemberExpr *E) {
   if (memb_visited_.find(E) != memb_visited_.end()) return true;
 
-  // Checks to see if the expression references something that needs to be run
-  // through bpf_probe_read.
-  if (!ProbeChecker(E, ptregs_).needs_probe())
-    return true;
-
   Expr *base;
   SourceLocation rhs_start, member;
   bool found = false;
@@ -255,6 +355,12 @@
     error(base->getLocEnd(), "internal error: MemberLoc is invalid while preparing probe rewrite");
     return false;
   }
+
+  // Checks to see if the expression references something that needs to be run
+  // through bpf_probe_read.
+  if (!ProbeChecker(base, ptregs_, track_helpers_).needs_probe())
+    return true;
+
   string rhs = rewriter_.getRewrittenText(expansionRange(SourceRange(rhs_start, E->getLocEnd())));
   string base_type = base->getType()->getPointeeType().getAsString();
   string pre, post;
@@ -276,7 +382,6 @@
   bool found = false;
   MemberExpr *M;
   for (M = Memb; M; M = dyn_cast<MemberExpr>(M->getBase())) {
-    memb_visited_.insert(M);
     rhs_start = M->getLocEnd();
     base = M->getBase();
     member = M->getMemberLoc();
@@ -302,7 +407,11 @@
 
 SourceRange
 ProbeVisitor::expansionRange(SourceRange range) {
+#if LLVM_MAJOR_VERSION >= 7
+  return rewriter_.getSourceMgr().getExpansionRange(range).getAsRange();
+#else
   return rewriter_.getSourceMgr().getExpansionRange(range);
+#endif
 }
 
 template <unsigned N>
@@ -695,7 +804,11 @@
 
 SourceRange
 BTypeVisitor::expansionRange(SourceRange range) {
+#if LLVM_MAJOR_VERSION >= 7
+  return rewriter_.getSourceMgr().getExpansionRange(range).getAsRange();
+#else
   return rewriter_.getSourceMgr().getExpansionRange(range);
+#endif
 }
 
 template <unsigned N>
@@ -869,31 +982,18 @@
     : fe_(fe),
       map_visitor_(m),
       btype_visitor_(C, fe),
-      probe_visitor_(C, rewriter, m) {}
-
-bool BTypeConsumer::HandleTopLevelDecl(DeclGroupRef Group) {
-  for (auto D : Group) {
-    if (FunctionDecl *F = dyn_cast<FunctionDecl>(D)) {
-      if (fe_.is_rewritable_ext_func(F)) {
-        for (auto arg : F->parameters()) {
-          if (arg != F->getParamDecl(0) && !arg->getType()->isFundamentalType()) {
-            map_visitor_.set_ptreg(arg);
-          }
-        }
-        map_visitor_.TraverseDecl(D);
-      }
-    }
-  }
-  return true;
-}
+      probe_visitor1_(C, rewriter, m, true),
+      probe_visitor2_(C, rewriter, m, false) {}
 
 void BTypeConsumer::HandleTranslationUnit(ASTContext &Context) {
   DeclContext::decl_iterator it;
   DeclContext *DC = TranslationUnitDecl::castToDeclContext(Context.getTranslationUnitDecl());
 
   /**
-   * ProbeVisitor's traversal runs after an entire translation unit has been parsed.
-   * to make sure maps with external pointers have been identified.
+   * In a first traversal, ProbeVisitor tracks external pointers identified
+   * through each function's arguments and replaces their dereferences with
+   * calls to bpf_probe_read. It also passes all identified pointers to
+   * external addresses to MapVisitor.
    */
   for (it = DC->decls_begin(); it != DC->decls_end(); it++) {
     Decl *D = *it;
@@ -901,12 +1001,62 @@
       if (fe_.is_rewritable_ext_func(F)) {
         for (auto arg : F->parameters()) {
           if (arg == F->getParamDecl(0)) {
-            probe_visitor_.set_ctx(arg);
+            /**
+             * Limit tracing of pointers from context to tracing contexts.
+             * We're whitelisting instead of blacklisting to avoid issues with
+             * existing programs if new context types are added in the future.
+             */
+            string type = arg->getType().getAsString();
+            if (type == "struct pt_regs *" ||
+                type == "struct bpf_raw_tracepoint_args *" ||
+                type.substr(0, 19) == "struct tracepoint__")
+              probe_visitor1_.set_ctx(arg);
           } else if (!arg->getType()->isFundamentalType()) {
-            probe_visitor_.set_ptreg(arg);
+            tuple<Decl *, int> pt = make_tuple(arg, 0);
+            probe_visitor1_.set_ptreg(pt);
           }
         }
-        probe_visitor_.TraverseDecl(D);
+
+        probe_visitor1_.TraverseDecl(D);
+        for (auto ptreg : probe_visitor1_.get_ptregs()) {
+          map_visitor_.set_ptreg(ptreg);
+        }
+      }
+    }
+  }
+
+  /**
+   * MapVisitor uses external pointers identified by the first ProbeVisitor
+   * traversal to identify all maps with external pointers as values.
+   * MapVisitor runs only after ProbeVisitor finished its traversal of the
+   * whole translation unit to clearly separate the role of each ProbeVisitor's
+   * traversal: the first tracks external pointers from function arguments,
+   * whereas the second tracks external pointers from maps. Without this clear
+   * separation, ProbeVisitor might attempt to replace several times the same
+   * dereferences.
+   */
+  for (it = DC->decls_begin(); it != DC->decls_end(); it++) {
+    Decl *D = *it;
+    if (FunctionDecl *F = dyn_cast<FunctionDecl>(D)) {
+      if (fe_.is_rewritable_ext_func(F)) {
+        map_visitor_.TraverseDecl(D);
+      }
+    }
+  }
+
+  /**
+   * In a second traversal, ProbeVisitor tracks pointers passed through the
+   * maps identified by MapVisitor and replaces their dereferences with calls
+   * to bpf_probe_read.
+   * This last traversal runs after MapVisitor went through an entire
+   * translation unit, to ensure maps with external pointers have all been
+   * identified.
+   */
+  for (it = DC->decls_begin(); it != DC->decls_end(); it++) {
+    Decl *D = *it;
+    if (FunctionDecl *F = dyn_cast<FunctionDecl>(D)) {
+      if (fe_.is_rewritable_ext_func(F)) {
+        probe_visitor2_.TraverseDecl(D);
       }
     }
 
@@ -933,7 +1083,28 @@
           (file_name.empty() || file_name == main_path_));
 }
 
+void BFrontendAction::DoMiscWorkAround() {
+  // In 4.16 and later, CONFIG_CC_STACKPROTECTOR is moved out of Kconfig and into
+  // Makefile. It will be set depending on CONFIG_CC_STACKPROTECTOR_{AUTO|REGULAR|STRONG}.
+  // CONFIG_CC_STACKPROTECTOR is still used in various places, e.g., struct task_struct,
+  // to guard certain fields. The workaround here intends to define
+  // CONFIG_CC_STACKPROTECTOR properly based on other configs, so it relieved any bpf
+  // program (using task_struct, etc.) of patching the below code.
+  rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).InsertText(0,
+    "#if !defined(CONFIG_CC_STACKPROTECTOR)\n"
+    "#if defined(CONFIG_CC_STACKPROTECTOR_AUTO) \\\n"
+    "    || defined(CONFIG_CC_STACKPROTECTOR_REGULAR) \\\n"
+    "    || defined(CONFIG_CC_STACKPROTECTOR_STRONG)\n"
+    "#define CONFIG_CC_STACKPROTECTOR\n"
+    "#endif\n"
+    "#endif\n",
+    false);
+}
+
 void BFrontendAction::EndSourceFileAction() {
+  // Additional misc rewrites
+  DoMiscWorkAround();
+
   if (flags_ & DEBUG_PREPROCESSOR)
     rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).write(llvm::errs());
   if (flags_ & DEBUG_SOURCE) {
diff --git a/src/cc/frontends/clang/b_frontend_action.h b/src/cc/frontends/clang/b_frontend_action.h
index b091a87..7dc373c 100644
--- a/src/cc/frontends/clang/b_frontend_action.h
+++ b/src/cc/frontends/clang/b_frontend_action.h
@@ -47,10 +47,10 @@
  public:
   explicit MapVisitor(std::set<clang::Decl *> &m);
   bool VisitCallExpr(clang::CallExpr *Call);
-  void set_ptreg(clang::Decl *D) { ptregs_.insert(D); }
+  void set_ptreg(std::tuple<clang::Decl *, int> &pt) { ptregs_.insert(pt); }
  private:
   std::set<clang::Decl *> &m_;
-  std::set<clang::Decl *> ptregs_;
+  std::set<std::tuple<clang::Decl *, int>> ptregs_;
 };
 
 // Type visitor and rewriter for B programs.
@@ -88,15 +88,18 @@
 // Do a depth-first search to rewrite all pointers that need to be probed
 class ProbeVisitor : public clang::RecursiveASTVisitor<ProbeVisitor> {
  public:
-  explicit ProbeVisitor(clang::ASTContext &C, clang::Rewriter &rewriter, std::set<clang::Decl *> &m);
+  explicit ProbeVisitor(clang::ASTContext &C, clang::Rewriter &rewriter,
+                        std::set<clang::Decl *> &m, bool track_helpers);
   bool VisitVarDecl(clang::VarDecl *Decl);
   bool VisitCallExpr(clang::CallExpr *Call);
   bool VisitBinaryOperator(clang::BinaryOperator *E);
   bool VisitUnaryOperator(clang::UnaryOperator *E);
   bool VisitMemberExpr(clang::MemberExpr *E);
-  void set_ptreg(clang::Decl *D) { ptregs_.insert(D); }
+  void set_ptreg(std::tuple<clang::Decl *, int> &pt) { ptregs_.insert(pt); }
   void set_ctx(clang::Decl *D) { ctx_ = D; }
+  std::set<std::tuple<clang::Decl *, int>> get_ptregs() { return ptregs_; }
  private:
+  bool assignsExtPtr(clang::Expr *E, int *nbAddrOf);
   bool IsContextMemberExpr(clang::Expr *E);
   clang::SourceRange expansionRange(clang::SourceRange range);
   template <unsigned N>
@@ -106,22 +109,24 @@
   clang::Rewriter &rewriter_;
   std::set<clang::Decl *> fn_visited_;
   std::set<clang::Expr *> memb_visited_;
-  std::set<clang::Decl *> ptregs_;
+  std::set<std::tuple<clang::Decl *, int>> ptregs_;
   std::set<clang::Decl *> &m_;
   clang::Decl *ctx_;
+  bool track_helpers_;
 };
 
 // A helper class to the frontend action, walks the decls
 class BTypeConsumer : public clang::ASTConsumer {
  public:
-  explicit BTypeConsumer(clang::ASTContext &C, BFrontendAction &fe, clang::Rewriter &rewriter, std::set<clang::Decl *> &map);
-  bool HandleTopLevelDecl(clang::DeclGroupRef Group) override;
+  explicit BTypeConsumer(clang::ASTContext &C, BFrontendAction &fe,
+                         clang::Rewriter &rewriter, std::set<clang::Decl *> &m);
   void HandleTranslationUnit(clang::ASTContext &Context) override;
  private:
   BFrontendAction &fe_;
   MapVisitor map_visitor_;
   BTypeVisitor btype_visitor_;
-  ProbeVisitor probe_visitor_;
+  ProbeVisitor probe_visitor1_;
+  ProbeVisitor probe_visitor2_;
 };
 
 // Create a B program in 2 phases (everything else is normal C frontend):
@@ -146,6 +151,7 @@
   TableStorage &table_storage() const { return ts_; }
   std::string id() const { return id_; }
   bool is_rewritable_ext_func(clang::FunctionDecl *D);
+  void DoMiscWorkAround();
 
  private:
   llvm::raw_ostream &os_;
diff --git a/src/cc/frontends/clang/loader.cc b/src/cc/frontends/clang/loader.cc
index eb6a307..72c5843 100644
--- a/src/cc/frontends/clang/loader.cc
+++ b/src/cc/frontends/clang/loader.cc
@@ -147,7 +147,11 @@
   // Enable -O2 for clang. In clang 5.0, -O0 may result in function marking as
   // noinline and optnone (if not always inlining).
   // Note that first argument is ignored in clang compilation invocation.
+  // "-D __BPF_TRACING__" below is added to suppress a warning in 4.17+.
+  // It can be removed once clang supports asm-goto or the kernel removes
+  // the warning.
   vector<const char *> flags_cstr({"-O0", "-O2", "-emit-llvm", "-I", dstack.cwd(),
+                                   "-D", "__BPF_TRACING__",
                                    "-Wno-deprecated-declarations",
                                    "-Wno-gnu-variable-sized-type-not-at-end",
                                    "-Wno-pragma-once-outside-header",
diff --git a/src/cc/frontends/p4/compiler/topoSorting.py b/src/cc/frontends/p4/compiler/topoSorting.py
index 76364d0..21daba3 100644
--- a/src/cc/frontends/p4/compiler/topoSorting.py
+++ b/src/cc/frontends/p4/compiler/topoSorting.py
@@ -20,6 +20,7 @@
 
 # -*- coding: utf-8 -*-
 
+from __future__ import print_function
 
 class Node(object):
     def __init__(self, n):
@@ -55,7 +56,7 @@
                 sequence += [str(node)]
             if node._behavioral_topo_sorting_mark == 1:
                 if sequence is not None:
-                    print "cycle", sequence
+                    print("cycle", sequence)
                 return False
             if node._behavioral_topo_sorting_mark != 2:
                 node._behavioral_topo_sorting_mark = 1
diff --git a/src/cc/frontends/p4/test/testP4toEbpf.py b/src/cc/frontends/p4/test/testP4toEbpf.py
index 0e9a298..5406f59 100755
--- a/src/cc/frontends/p4/test/testP4toEbpf.py
+++ b/src/cc/frontends/p4/test/testP4toEbpf.py
@@ -6,6 +6,7 @@
 # Runs the compiler on all files in the 'testprograms' folder
 # Writes outputs in the 'testoutputs' folder
 
+from __future__ import print_function
 from bcc import BPF
 import os, sys
 sys.path.append("../compiler") # To get hold of p4toEbpf
@@ -37,9 +38,9 @@
     errors = 0
 
     if not is_root():
-        print "Loading EBPF programs requires root privilege."
-        print "Will only test compilation, not loading."
-        print "(Run with sudo to test program loading.)"
+        print("Loading EBPF programs requires root privilege.")
+        print("Will only test compilation, not loading.")
+        print("(Run with sudo to test program loading.)")
 
     for f in files:
         path = os.path.join(testpath, f)
@@ -57,7 +58,7 @@
         result = p4toEbpf.process(args)
         if result.kind != "OK":
             errors += 1
-            print path, result.error
+            print(path, result.error)
             set_error(result.kind, path, result.error)
         else:
             # Try to load the compiled function
@@ -72,11 +73,11 @@
 
         filesDone += 1
 
-    print "Compiled", filesDone, "files", errors, "errors"
+    print("Compiled", filesDone, "files", errors, "errors")
     for key in sorted(filesFailed):
-        print key, ":", len(filesFailed[key]), "programs"
+        print(key, ":", len(filesFailed[key]), "programs")
         for v in filesFailed[key]:
-            print "\t", v
+            print("\t", v)
     exit(len(filesFailed) != 0)
 
 
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
index 722350e..84094d7 100644
--- a/src/cc/libbpf.c
+++ b/src/cc/libbpf.c
@@ -156,6 +156,18 @@
   {"bind", "4.17"},
   {"xdp_adjust_tail", "4.18"},
   {"skb_get_xfrm_state", "4.18"},
+  {"get_stack", "4.18"},
+  {"skb_load_bytes_relative", "4.18"},
+  {"fib_lookup", "4.18"},
+  {"sock_hash_update", "4.18"},
+  {"msg_redirect_hash", "4.18"},
+  {"sk_redirect_hash", "4.18"},
+  {"lwt_push_encap", "4.18"},
+  {"lwt_seg6_store_bytes", "4.18"},
+  {"lwt_seg6_adjust_srh", "4.18"},
+  {"lwt_seg6_action", "4.18"},
+  {"rc_repeat", "4.18"},
+  {"rc_keydown", "4.18"},
 };
 
 static uint64_t ptr_to_u64(void *ptr)
@@ -794,7 +806,7 @@
 }
 
 int bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type,
-                      const char *ev_name, const char *fn_name)
+                      const char *ev_name, const char *fn_name, uint64_t fn_offset)
 {
   int kfd, pfd = -1;
   char buf[256];
@@ -802,7 +814,7 @@
   static char *event_type = "kprobe";
 
   // Try create the kprobe Perf Event with perf_event_open API.
-  pfd = bpf_try_perf_event_open_with_probe(fn_name, 0, -1, event_type,
+  pfd = bpf_try_perf_event_open_with_probe(fn_name, fn_offset, -1, event_type,
                                            attach_type != BPF_PROBE_ENTRY);
   // If failed, most likely Kernel doesn't support the new perf_event_open API
   // yet. Try create the event using debugfs.
@@ -815,8 +827,15 @@
     }
 
     snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
-    snprintf(buf, sizeof(buf), "%c:%ss/%s %s", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
-             event_type, event_alias, fn_name);
+
+    if (fn_offset > 0 && attach_type == BPF_PROBE_ENTRY)
+      snprintf(buf, sizeof(buf), "p:%ss/%s %s+%"PRIu64,
+               event_type, event_alias, fn_name, fn_offset);
+    else
+      snprintf(buf, sizeof(buf), "%c:%ss/%s %s",
+               attach_type == BPF_PROBE_ENTRY ? 'p' : 'r',
+               event_type, event_alias, fn_name);
+
     if (write(kfd, buf, strlen(buf)) < 0) {
       if (errno == ENOENT)
          fprintf(stderr, "cannot attach kprobe, probe entry may not exist\n");
@@ -995,6 +1014,7 @@
       found_event = 1;
       break;
     }
+  free(cptr);
   fclose(fp);
   fp = NULL;
 
@@ -1305,9 +1325,9 @@
 }
 
 int bpf_attach_perf_event_raw(int progfd, void *perf_event_attr, pid_t pid,
-                              int cpu, int group_fd) {
+                              int cpu, int group_fd, unsigned long extra_flags) {
   int fd = syscall(__NR_perf_event_open, perf_event_attr, pid, cpu, group_fd,
-                   PERF_FLAG_FD_CLOEXEC);
+                   PERF_FLAG_FD_CLOEXEC | extra_flags);
   if (fd < 0) {
     perror("perf_event_open failed");
     return -1;
@@ -1351,7 +1371,7 @@
     attr.sample_period = sample_period;
   }
 
-  return bpf_attach_perf_event_raw(progfd, &attr, pid, cpu, group_fd);
+  return bpf_attach_perf_event_raw(progfd, &attr, pid, cpu, group_fd, 0);
 }
 
 int bpf_close_perf_event_fd(int fd) {
diff --git a/src/cc/libbpf.h b/src/cc/libbpf.h
index 589006f..72bc4a6 100644
--- a/src/cc/libbpf.h
+++ b/src/cc/libbpf.h
@@ -69,7 +69,7 @@
 typedef void (*perf_reader_lost_cb)(void *cb_cookie, uint64_t lost);
 
 int bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type,
-                      const char *ev_name, const char *fn_name);
+                      const char *ev_name, const char *fn_name, uint64_t fn_offset);
 int bpf_detach_kprobe(const char *ev_name);
 
 int bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type,
@@ -93,7 +93,7 @@
 // attach a prog expressed by progfd to run on a specific perf event. The perf
 // event will be created using the perf_event_attr pointer provided.
 int bpf_attach_perf_event_raw(int progfd, void *perf_event_attr, pid_t pid,
-                              int cpu, int group_fd);
+                              int cpu, int group_fd, unsigned long extra_flags);
 // attach a prog expressed by progfd to run on a specific perf event, with
 // certain sample period or sample frequency
 int bpf_attach_perf_event(int progfd, uint32_t ev_type, uint32_t ev_config,
diff --git a/src/cc/usdt/usdt.cc b/src/cc/usdt/usdt.cc
index 7f62ad9..c9f5bff 100644
--- a/src/cc/usdt/usdt.cc
+++ b/src/cc/usdt/usdt.cc
@@ -19,6 +19,7 @@
 #include <unordered_set>
 
 #include <fcntl.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -393,10 +394,19 @@
 void *bcc_usdt_new_frompid(int pid, const char *path) {
   USDT::Context *ctx;
 
-  if (!path)
+  if (!path) {
     ctx = new USDT::Context(pid);
-  else
+  } else {
+    struct stat buffer;
+    if (strlen(path) >= 1 && path[0] != '/') {
+      fprintf(stderr, "HINT: Binary path should be absolute.\n\n");
+      return nullptr;
+    } else if (stat(path, &buffer) == -1) {
+      fprintf(stderr, "HINT: Specified binary doesn't exist.\n\n");
+      return nullptr;
+    }
     ctx = new USDT::Context(pid, path);
+  }
   if (!ctx->loaded()) {
     delete ctx;
     return nullptr;
diff --git a/src/lua/CMakeLists.txt b/src/lua/CMakeLists.txt
index 49eaabd..7541d48 100644
--- a/src/lua/CMakeLists.txt
+++ b/src/lua/CMakeLists.txt
@@ -23,7 +23,7 @@
 	add_executable(bcc-lua src/main.c bcc.o)
 	set_target_properties(bcc-lua PROPERTIES LINKER_LANGUAGE C)
 	target_link_libraries(bcc-lua ${LUAJIT_LIBRARIES})
-	target_link_libraries(bcc-lua -Wl,--whole-archive bcc-lua-static -Wl,--no-whole-archive)
+	target_link_libraries(bcc-lua ${bcc-lua-static})
 	if (NOT COMPILER_NOPIE_FLAG EQUAL "")
 		target_link_libraries(bcc-lua ${COMPILER_NOPIE_FLAG})
 	endif()
diff --git a/src/python/bcc/__init__.py b/src/python/bcc/__init__.py
index 5dccd65..ca9dfc0 100644
--- a/src/python/bcc/__init__.py
+++ b/src/python/bcc/__init__.py
@@ -219,7 +219,7 @@
         if filename:
             if not os.path.isfile(filename):
                 argv0 = ArgString(sys.argv[0])
-                t = b"/".join([os.path.abspath(os.path.dirname(argv0)), filename])
+                t = b"/".join([os.path.abspath(os.path.dirname(argv0.__str__())), filename])
                 if os.path.isfile(t):
                     filename = t
                 else:
@@ -306,7 +306,7 @@
             self.module = lib.bpf_module_create_c_from_string(text,
                     self.debug, cflags_array, len(cflags_array))
             if not self.module:
-                raise Exception("Failed to compile BPF text:\n%s" % text)
+                raise Exception("Failed to compile BPF text")
         else:
             src_file = BPF._find_file(src_file)
             hdr_file = BPF._find_file(hdr_file)
@@ -445,12 +445,12 @@
         if map_fd < 0:
             raise KeyError
         if not keytype:
-            key_desc = lib.bpf_table_key_desc(self.module, name)
+            key_desc = lib.bpf_table_key_desc(self.module, name).decode("utf-8")
             if not key_desc:
                 raise Exception("Failed to load BPF Table %s key desc" % name)
             keytype = BPF._decode_table_type(json.loads(key_desc))
         if not leaftype:
-            leaf_desc = lib.bpf_table_leaf_desc(self.module, name)
+            leaf_desc = lib.bpf_table_leaf_desc(self.module, name).decode("utf-8")
             if not leaf_desc:
                 raise Exception("Failed to load BPF Table %s leaf desc" % name)
             leaftype = BPF._decode_table_type(json.loads(leaf_desc))
@@ -495,17 +495,18 @@
             blacklist = set([line.rstrip().split()[1] for line in blacklist_f])
         fns = []
 
-        found_stext = False
+        in_init_section = 0
         with open("/proc/kallsyms", "rb") as avail_file:
             for line in avail_file:
-                (_, t, fn) = line.rstrip().split()[:3]
-                if found_stext is False:
-                    if fn == b'_stext':
-                        found_stext = True
+                (t, fn) = line.rstrip().split()[1:3]
+                if in_init_section == 0:
+                    if fn == b'__init_begin':
+                        in_init_section = 1
+                        continue
+                elif in_init_section == 1:
+                    if fn == b'__init_end':
+                        in_init_section = 2
                     continue
-
-                if fn == b'_etext':
-                    break
                 if (t.lower() in [b't', b'w']) and re.match(event_re, fn) \
                     and fn not in blacklist:
                     fns.append(fn)
@@ -549,7 +550,7 @@
     def get_syscall_fnname(self, name):
         return self.get_syscall_prefix() + name
        
-    def attach_kprobe(self, event=b"", fn_name=b"", event_re=b""):
+    def attach_kprobe(self, event=b"", event_off=0, fn_name=b"", event_re=b""):
         event = _assert_is_bytes(event)
         fn_name = _assert_is_bytes(fn_name)
         event_re = _assert_is_bytes(event_re)
@@ -568,7 +569,7 @@
         self._check_probe_quota(1)
         fn = self.load_func(fn_name, BPF.KPROBE)
         ev_name = b"p_" + event.replace(b"+", b"_").replace(b".", b"_")
-        fd = lib.bpf_attach_kprobe(fn.fd, 0, ev_name, event)
+        fd = lib.bpf_attach_kprobe(fn.fd, 0, ev_name, event, event_off)
         if fd < 0:
             raise Exception("Failed to attach BPF to kprobe")
         self._add_kprobe_fd(ev_name, fd)
@@ -1057,7 +1058,7 @@
                 # however, the illegal address will be printed out.
                 # Hence, both cases are handled here.
                 line = line[ts_end + 1:]
-                sym_end = line.find(":")
+                sym_end = line.find(b":")
                 msg = line[sym_end + 2:]
                 return (task, int(pid), int(cpu), flags, float(ts), msg)
         except KeyboardInterrupt:
diff --git a/src/python/bcc/usdt.py b/src/python/bcc/usdt.py
index 9934e22..5fb1cda 100644
--- a/src/python/bcc/usdt.py
+++ b/src/python/bcc/usdt.py
@@ -175,7 +175,7 @@
 
     def get_probe_arg_ctype(self, probe_name, arg_index):
         return lib.bcc_usdt_get_probe_argctype(
-            self.context, probe_name, arg_index)
+            self.context, probe_name.encode('ascii'), arg_index).decode()
 
     def enumerate_probes(self):
         probes = []
diff --git a/tests/python/test_clang.py b/tests/python/test_clang.py
index b740256..35cabb2 100755
--- a/tests/python/test_clang.py
+++ b/tests/python/test_clang.py
@@ -76,6 +76,24 @@
         b = BPF(text=text, debug=0)
         fn = b.load_func("count_foo", BPF.KPROBE)
 
+    def test_probe_read3(self):
+        text = """
+#define KBUILD_MODNAME "foo"
+#include <net/tcp.h>
+int count_tcp(struct pt_regs *ctx, struct sk_buff *skb) {
+    // The below define is in net/tcp.h:
+    //    #define TCP_SKB_CB(__skb)	((struct tcp_skb_cb *)&((__skb)->cb[0]))
+    // Note that it has AddrOf in the macro, which will cause current rewriter
+    // failing below statement
+    // return TCP_SKB_CB(skb)->tcp_gso_size;
+    u16 val = 0;
+    bpf_probe_read(&val, sizeof(val), &(TCP_SKB_CB(skb)->tcp_gso_size));
+    return val;
+}
+"""
+        b = BPF(text=text)
+        fn = b.load_func("count_tcp", BPF.KPROBE)
+
     def test_probe_read_keys(self):
         text = """
 #include <uapi/linux/ptrace.h>
@@ -343,6 +361,32 @@
     return 0;
 }""")
 
+    def test_probe_simple_member_assign(self):
+        b = BPF(text="""
+#include <uapi/linux/ptrace.h>
+#include <linux/netdevice.h>
+struct leaf { void *ptr; };
+int test(struct pt_regs *ctx, struct sk_buff *skb) {
+    struct leaf l = {};
+    struct leaf *lp = &l;
+    lp->ptr = skb;
+    return 0;
+}""")
+        b.load_func("test", BPF.KPROBE)
+
+    def test_probe_member_expr(self):
+        b = BPF(text="""
+#include <uapi/linux/ptrace.h>
+#include <linux/netdevice.h>
+struct leaf { struct sk_buff *ptr; };
+int test(struct pt_regs *ctx, struct sk_buff *skb) {
+    struct leaf l = {};
+    struct leaf *lp = &l;
+    lp->ptr = skb;
+    return lp->ptr->priority;
+}""")
+        b.load_func("test", BPF.KPROBE)
+
     def test_unop_probe_read(self):
         text = """
 #include <linux/blkdev.h>
@@ -357,6 +401,52 @@
         b = BPF(text=text)
         fn = b.load_func("trace_entry", BPF.KPROBE)
 
+    def test_probe_read_nested_deref(self):
+        text = """
+#include <net/inet_sock.h>
+int test(struct pt_regs *ctx, struct sock *sk) {
+    struct sock *ptr1;
+    struct sock **ptr2 = &ptr1;
+    *ptr2 = sk;
+    return ((struct sock *)(*ptr2))->sk_daddr;
+}
+"""
+        b = BPF(text=text)
+        fn = b.load_func("test", BPF.KPROBE)
+
+    def test_probe_read_nested_deref2(self):
+        text = """
+#include <net/inet_sock.h>
+int test(struct pt_regs *ctx, struct sock *sk) {
+    struct sock *ptr1;
+    struct sock **ptr2 = &ptr1;
+    struct sock ***ptr3 = &ptr2;
+    *ptr2 = sk;
+    *ptr3 = ptr2;
+    return ((struct sock *)(**ptr3))->sk_daddr;
+}
+"""
+        b = BPF(text=text)
+        fn = b.load_func("test", BPF.KPROBE)
+
+    def test_probe_read_nested_deref_func(self):
+        text = """
+#include <net/inet_sock.h>
+static int subtest(struct sock ***skp) {
+    return ((struct sock *)(**skp))->sk_daddr;
+}
+int test(struct pt_regs *ctx, struct sock *sk) {
+    struct sock *ptr1;
+    struct sock **ptr2 = &ptr1;
+    struct sock ***ptr3 = &ptr2;
+    *ptr2 = sk;
+    *ptr3 = ptr2;
+    return subtest(ptr3);
+}
+"""
+        b = BPF(text=text)
+        fn = b.load_func("test", BPF.KPROBE)
+
     def test_paren_probe_read(self):
         text = """
 #include <net/inet_sock.h>
@@ -466,7 +556,7 @@
         t = b["act"]
         self.assertEqual(len(t), 32);
 
-    def test_ext_ptr_maps(self):
+    def test_ext_ptr_maps1(self):
         bpf_text = """
 #include <uapi/linux/ptrace.h>
 #include <net/sock.h>
@@ -496,6 +586,94 @@
         b.load_func("trace_entry", BPF.KPROBE)
         b.load_func("trace_exit", BPF.KPROBE)
 
+    def test_ext_ptr_maps2(self):
+        bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <net/sock.h>
+#include <bcc/proto.h>
+
+BPF_HASH(currsock, u32, struct sock *);
+
+int trace_entry(struct pt_regs *ctx, struct sock *sk,
+    struct sockaddr *uaddr, int addr_len) {
+    u32 pid = bpf_get_current_pid_tgid();
+    currsock.update(&pid, &sk);
+    return 0;
+};
+
+int trace_exit(struct pt_regs *ctx) {
+    u32 pid = bpf_get_current_pid_tgid();
+    struct sock **skpp = currsock.lookup(&pid);
+    if (skpp) {
+        struct sock *skp = *skpp;
+        return skp->__sk_common.skc_dport;
+    }
+    return 0;
+}
+        """
+        b = BPF(text=bpf_text)
+        b.load_func("trace_entry", BPF.KPROBE)
+        b.load_func("trace_exit", BPF.KPROBE)
+
+    def test_ext_ptr_maps_reverse(self):
+        bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <net/sock.h>
+#include <bcc/proto.h>
+
+BPF_HASH(currsock, u32, struct sock *);
+
+int trace_exit(struct pt_regs *ctx) {
+    u32 pid = bpf_get_current_pid_tgid();
+    struct sock **skpp;
+    skpp = currsock.lookup(&pid);
+    if (skpp) {
+        struct sock *skp = *skpp;
+        return skp->__sk_common.skc_dport;
+    }
+    return 0;
+}
+
+int trace_entry(struct pt_regs *ctx, struct sock *sk) {
+    u32 pid = bpf_get_current_pid_tgid();
+    currsock.update(&pid, &sk);
+    return 0;
+};
+        """
+        b = BPF(text=bpf_text)
+        b.load_func("trace_entry", BPF.KPROBE)
+        b.load_func("trace_exit", BPF.KPROBE)
+
+    def test_ext_ptr_maps_indirect(self):
+        bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <net/sock.h>
+#include <bcc/proto.h>
+
+BPF_HASH(currsock, u32, struct sock *);
+
+int trace_entry(struct pt_regs *ctx, struct sock *sk) {
+    u32 pid = bpf_get_current_pid_tgid();
+    struct sock **skp = &sk;
+    currsock.update(&pid, skp);
+    return 0;
+};
+
+int trace_exit(struct pt_regs *ctx) {
+    u32 pid = bpf_get_current_pid_tgid();
+    struct sock **skpp;
+    skpp = currsock.lookup(&pid);
+    if (skpp) {
+        struct sock *skp = *skpp;
+        return skp->__sk_common.skc_dport;
+    }
+    return 0;
+}
+        """
+        b = BPF(text=bpf_text)
+        b.load_func("trace_entry", BPF.KPROBE)
+        b.load_func("trace_exit", BPF.KPROBE)
+
     def test_bpf_dins_pkt_rewrite(self):
         text = """
 #include <bcc/proto.h>
@@ -725,6 +903,25 @@
         b = BPF(text=text)
         fn = b.load_func("test", BPF.KPROBE)
 
+    @skipUnless(kernel_version_ge(4,7), "requires kernel >= 4.7")
+    def test_probe_read_tc_ctx(self):
+        text = """
+#include <uapi/linux/pkt_cls.h>
+#include <linux/if_ether.h>
+int test(struct __sk_buff *ctx) {
+    void* data_end = (void*)(long)ctx->data_end;
+    void* data = (void*)(long)ctx->data;
+    if (data + sizeof(struct ethhdr) > data_end)
+        return TC_ACT_SHOT;
+    struct ethhdr *eh = (struct ethhdr *)data;
+    if (eh->h_proto == 0x1)
+        return TC_ACT_SHOT;
+    return TC_ACT_OK;
+}
+"""
+        b = BPF(text=text)
+        fn = b.load_func("test", BPF.SCHED_CLS)
+
 
 if __name__ == "__main__":
     main()
diff --git a/tests/python/test_tools_smoke.py b/tests/python/test_tools_smoke.py
index 78a2049..1a04785 100755
--- a/tests/python/test_tools_smoke.py
+++ b/tests/python/test_tools_smoke.py
@@ -308,7 +308,7 @@
 
     @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
     def test_tcplife(self):
-        self.run_with_int("tcpconnlat.py")
+        self.run_with_int("tcplife.py")
 
     @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
     def test_tcpretrans(self):
diff --git a/tools/argdist.py b/tools/argdist.py
index 9724073..dfb06b4 100755
--- a/tools/argdist.py
+++ b/tools/argdist.py
@@ -610,7 +610,9 @@
                   type=int,
                   help="maximum string size to read from char* arguments")
                 parser.add_argument("-i", "--interval", default=1, type=int,
-                  help="output interval, in seconds")
+                  help="output interval, in seconds (default 1 second)")
+                parser.add_argument("-d", "--duration", type=int,
+                  help="total duration of trace, in seconds")
                 parser.add_argument("-n", "--number", type=int, dest="count",
                   help="number of outputs")
                 parser.add_argument("-v", "--verbose", action="store_true",
@@ -684,9 +686,11 @@
 
         def _main_loop(self):
                 count_so_far = 0
+                seconds = 0
                 while True:
                         try:
                                 sleep(self.args.interval)
+                                seconds += self.args.interval
                         except KeyboardInterrupt:
                                 exit()
                         print("[%s]" % strftime("%H:%M:%S"))
@@ -696,6 +700,9 @@
                         if self.args.count is not None and \
                            count_so_far >= self.args.count:
                                 exit()
+                        if self.args.duration and \
+                           seconds >= self.args.duration:
+                                exit()
 
         def run(self):
                 try:
diff --git a/tools/argdist_example.txt b/tools/argdist_example.txt
index dec9e6f..7098e56 100644
--- a/tools/argdist_example.txt
+++ b/tools/argdist_example.txt
@@ -348,7 +348,9 @@
   -z STRING_SIZE, --string-size STRING_SIZE
                         maximum string size to read from char* arguments
   -i INTERVAL, --interval INTERVAL
-                        output interval, in seconds
+                        output interval, in seconds (default 1 second)
+  -d DURATION, --duration DURATION
+			total duration of trace, in seconds
   -n COUNT, --number COUNT
                         number of outputs
   -v, --verbose         print resulting BPF program code before executing
diff --git a/tools/btrfsslower.py b/tools/btrfsslower.py
index a84a511..644cb22 100755
--- a/tools/btrfsslower.py
+++ b/tools/btrfsslower.py
@@ -224,8 +224,8 @@
     // workaround (rewriter should handle file to d_name in one step):
     struct dentry *de = NULL;
     struct qstr qs = {};
-    bpf_probe_read(&de, sizeof(de), &valp->fp->f_path.dentry);
-    bpf_probe_read(&qs, sizeof(qs), (void *)&de->d_name);
+    de = valp->fp->f_path.dentry;
+    qs = de->d_name;
     if (qs.len == 0)
         return 0;
     bpf_probe_read(&data.file, sizeof(data.file), (void *)qs.name);
diff --git a/tools/execsnoop.py b/tools/execsnoop.py
index 7079408..9a66b39 100755
--- a/tools/execsnoop.py
+++ b/tools/execsnoop.py
@@ -213,8 +213,9 @@
                 print("%-8.3f" % (time.time() - start_ts), end="")
             ppid = get_ppid(event.pid)
             ppid = b"%d" % ppid if ppid > 0 else b"?"
+            argv_text = b' '.join(argv[event.pid]).replace(b'\n', b'\\n')
             printb(b"%-16s %-6d %-6s %3d %s" % (event.comm, event.pid,
-                   ppid, event.retval, b' '.join(argv[event.pid])))
+                   ppid, event.retval, argv_text))
         try:
             del(argv[event.pid])
         except Exception:
diff --git a/tools/ext4slower.py b/tools/ext4slower.py
index 01c74ef..756d826 100755
--- a/tools/ext4slower.py
+++ b/tools/ext4slower.py
@@ -219,8 +219,8 @@
     // workaround (rewriter should handle file to d_name in one step):
     struct dentry *de = NULL;
     struct qstr qs = {};
-    bpf_probe_read(&de, sizeof(de), &valp->fp->f_path.dentry);
-    bpf_probe_read(&qs, sizeof(qs), (void *)&de->d_name);
+    de = valp->fp->f_path.dentry;
+    qs = de->d_name;
     if (qs.len == 0)
         return 0;
     bpf_probe_read(&data.file, sizeof(data.file), (void *)qs.name);
diff --git a/tools/funclatency.py b/tools/funclatency.py
index fe06d1b..3f08a7e 100755
--- a/tools/funclatency.py
+++ b/tools/funclatency.py
@@ -35,6 +35,7 @@
     ./funclatency c:read            # time the read() C library function
     ./funclatency -u vfs_read       # time vfs_read(), in microseconds
     ./funclatency -m do_nanosleep   # time do_nanosleep(), in milliseconds
+    ./funclatency -i 2 -d 10 c:open # output every 2 seconds, for duration 10s
     ./funclatency -mTi 5 vfs_read   # output every 5 seconds, with timestamps
     ./funclatency -p 181 vfs_read   # time process 181 only
     ./funclatency 'vfs_fstat*'      # time both vfs_fstat() and vfs_fstatat()
@@ -47,8 +48,10 @@
     epilog=examples)
 parser.add_argument("-p", "--pid", type=int,
     help="trace this PID only")
-parser.add_argument("-i", "--interval", default=99999999,
-    help="summary interval, seconds")
+parser.add_argument("-i", "--interval", type=int,
+    help="summary interval, in seconds")
+parser.add_argument("-d", "--duration", type=int,
+    help="total duration of trace, in seconds")
 parser.add_argument("-T", "--timestamp", action="store_true",
     help="include timestamp on output")
 parser.add_argument("-u", "--microseconds", action="store_true",
@@ -66,6 +69,10 @@
 parser.add_argument("--ebpf", action="store_true",
     help=argparse.SUPPRESS)
 args = parser.parse_args()
+if args.duration and not args.interval:
+    args.interval = args.duration
+if not args.interval:
+    args.interval = 99999999
 
 def bail(error):
     print("Error: " + error)
@@ -226,14 +233,18 @@
         return "%s [%d]" % (BPF.sym(key[0], key[1]), key[1])
 
 exiting = 0 if args.interval else 1
+seconds = 0
 dist = b.get_table("dist")
 while (1):
     try:
-        sleep(int(args.interval))
+        sleep(args.interval)
+        seconds += args.interval
     except KeyboardInterrupt:
         exiting = 1
         # as cleanup can take many seconds, trap Ctrl-C:
         signal.signal(signal.SIGINT, signal_ignore)
+    if args.duration and seconds >= args.duration:
+        exiting = 1
 
     print()
     if args.timestamp:
diff --git a/tools/funclatency_example.txt b/tools/funclatency_example.txt
index ee63a5b..d8217a2 100644
--- a/tools/funclatency_example.txt
+++ b/tools/funclatency_example.txt
@@ -343,7 +343,9 @@
   -h, --help            show this help message and exit
   -p PID, --pid PID     trace this PID only
   -i INTERVAL, --interval INTERVAL
-                        summary interval, seconds
+                        summary interval, in seconds
+  -d DURATION, --duration DURATION
+			total duration of trace, in seconds
   -T, --timestamp       include timestamp on output
   -u, --microseconds    microsecond histogram
   -m, --milliseconds    millisecond histogram
@@ -357,6 +359,7 @@
     ./funclatency c:read            # time the read() C library function
     ./funclatency -u vfs_read       # time vfs_read(), in microseconds
     ./funclatency -m do_nanosleep   # time do_nanosleep(), in milliseconds
+    ./funclatency -i 2 -d 10 c:open # output every 2 seconds, for duration 10s
     ./funclatency -mTi 5 vfs_read   # output every 5 seconds, with timestamps
     ./funclatency -p 181 vfs_read   # time process 181 only
     ./funclatency 'vfs_fstat*'      # time both vfs_fstat() and vfs_fstatat()
diff --git a/tools/old/bashreadline.py b/tools/old/bashreadline.py
index 464dfda..571b662 100755
--- a/tools/old/bashreadline.py
+++ b/tools/old/bashreadline.py
@@ -22,7 +22,7 @@
         return 0;
 
     char str[80] = {};
-    bpf_probe_read(&str, sizeof(str), (void *)ctx->ax);
+    bpf_probe_read(&str, sizeof(str), (void *)PT_REGS_RC(ctx));
     bpf_trace_printk("%s\\n", &str);
 
     return 0;
diff --git a/tools/old/tcpaccept.py b/tools/old/tcpaccept.py
index 2fda8a7..8125eaa 100755
--- a/tools/old/tcpaccept.py
+++ b/tools/old/tcpaccept.py
@@ -47,7 +47,7 @@
 
 int kretprobe__inet_csk_accept(struct pt_regs *ctx)
 {
-    struct sock *newsk = (struct sock *)ctx->ax;
+    struct sock *newsk = (struct sock *)PT_REGS_RC(ctx);
     u32 pid = bpf_get_current_pid_tgid();
 
     if (newsk == NULL)
diff --git a/tools/old/tcpconnect.py b/tools/old/tcpconnect.py
index 2fb5307..579a85f 100755
--- a/tools/old/tcpconnect.py
+++ b/tools/old/tcpconnect.py
@@ -55,7 +55,7 @@
 
 static int trace_connect_return(struct pt_regs *ctx, short ipver)
 {
-    int ret = ctx->ax;
+    int ret = PT_REGS_RC(ctx);
     u32 pid = bpf_get_current_pid_tgid();
 
     struct sock **skpp;
@@ -75,12 +75,10 @@
     struct sock *skp = *skpp;
     u32 saddr = 0, daddr = 0;
     u16 dport = 0;
-    bpf_probe_read(&dport, sizeof(dport), &skp->__sk_common.skc_dport);
+    dport = skp->__sk_common.skc_dport;
     if (ipver == 4) {
-        bpf_probe_read(&saddr, sizeof(saddr),
-            &skp->__sk_common.skc_rcv_saddr);
-        bpf_probe_read(&daddr, sizeof(daddr),
-            &skp->__sk_common.skc_daddr);
+        saddr = skp->__sk_common.skc_rcv_saddr;
+        daddr = skp->__sk_common.skc_daddr;
 
         // output
         bpf_trace_printk("4 %x %x %d\\n", saddr, daddr, ntohs(dport));
diff --git a/tools/profile.py b/tools/profile.py
index bf532ce..3040a48 100755
--- a/tools/profile.py
+++ b/tools/profile.py
@@ -9,10 +9,6 @@
 # counting there. Only the unique stacks and counts are passed to user space
 # at the end of the profile, greatly reducing the kernel<->user transfer.
 #
-# This uses perf_event_open to setup a timer which is instrumented by BPF,
-# and for efficiency it does not initialize the perf ring buffer, so the
-# redundant perf samples are not collected.
-#
 # REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). Under tools/old is
 # a version of this tool that may work on Linux 4.6 - 4.8.
 #
diff --git a/tools/runqlat.py b/tools/runqlat.py
index ebda11d..085543c 100755
--- a/tools/runqlat.py
+++ b/tools/runqlat.py
@@ -12,7 +12,8 @@
 #
 # This measures two types of run queue latency:
 # 1. The time from a task being enqueued on a run queue to its context switch
-#    and execution. This traces enqueue_task_*() -> finish_task_switch(),
+#    and execution. This traces ttwu_do_wakeup(), wake_up_new_task() ->
+#    finish_task_switch() with either raw tracepoints (if supported) or kprobes
 #    and instruments the run queue latency after a voluntary context switch.
 # 2. The time from when a task was involuntary context switched and still
 #    in the runnable state, to when it next executed. This is instrumented
@@ -89,7 +90,7 @@
 // record enqueue timestamp
 static int trace_enqueue(u32 tgid, u32 pid)
 {
-    if (FILTER)
+    if (FILTER || pid == 0)
         return 0;
     u64 ts = bpf_ktime_get_ns();
     start.update(&pid, &ts);
@@ -118,7 +119,7 @@
     if (prev->state == TASK_RUNNING) {
         tgid = prev->tgid;
         pid = prev->pid;
-        if (!(FILTER)) {
+        if (!(FILTER || pid == 0)) {
             u64 ts = bpf_ktime_get_ns();
             start.update(&pid, &ts);
         }
@@ -126,7 +127,7 @@
 
     tgid = bpf_get_current_pid_tgid() >> 32;
     pid = bpf_get_current_pid_tgid();
-    if (FILTER)
+    if (FILTER || pid == 0)
         return 0;
     u64 *tsp, delta;
 
@@ -182,7 +183,7 @@
     if (state == TASK_RUNNING) {
         bpf_probe_read(&tgid, sizeof(prev->tgid), &prev->tgid);
         bpf_probe_read(&pid, sizeof(prev->pid), &prev->pid);
-        if (!(FILTER)) {
+        if (!(FILTER || pid == 0)) {
             u64 ts = bpf_ktime_get_ns();
             start.update(&pid, &ts);
         }
@@ -190,7 +191,7 @@
 
     bpf_probe_read(&tgid, sizeof(next->tgid), &next->tgid);
     bpf_probe_read(&pid, sizeof(next->pid), &next->pid);
-    if (FILTER)
+    if (FILTER || pid == 0)
         return 0;
     u64 *tsp, delta;
 
diff --git a/tools/runqlen.py b/tools/runqlen.py
index 4217f4d..b56a591 100755
--- a/tools/runqlen.py
+++ b/tools/runqlen.py
@@ -182,8 +182,6 @@
     if args.ebpf:
         exit()
 
-# load BPF program
-b = BPF(text=bpf_text)
 # initialize BPF & perf_events
 b = BPF(text=bpf_text)
 b.attach_perf_event(ev_type=PerfType.SOFTWARE,
diff --git a/tools/runqslower.py b/tools/runqslower.py
new file mode 100755
index 0000000..a28a823
--- /dev/null
+++ b/tools/runqslower.py
@@ -0,0 +1,261 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# runqslower    Trace long process scheduling delays.
+#               For Linux, uses BCC, eBPF.
+#
+# This script traces high scheduling delays between tasks being
+# ready to run and them running on CPU after that.
+#
+# USAGE: runqslower [-p PID] [min_us]
+#
+# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support).
+#
+# This measures the time a task spends waiting on a run queue for a turn
+# on-CPU, and shows this time as a individual events. This time should be small,
+# but a task may need to wait its turn due to CPU load.
+#
+# This measures two types of run queue latency:
+# 1. The time from a task being enqueued on a run queue to its context switch
+#    and execution. This traces ttwu_do_wakeup(), wake_up_new_task() ->
+#    finish_task_switch() with either raw tracepoints (if supported) or kprobes
+#    and instruments the run queue latency after a voluntary context switch.
+# 2. The time from when a task was involuntary context switched and still
+#    in the runnable state, to when it next executed. This is instrumented
+#    from finish_task_switch() alone.
+#
+# Copyright 2016 Cloudflare, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 02-May-2018   Ivan Babrou   Created this.
+
+from __future__ import print_function
+from bcc import BPF
+import argparse
+from time import strftime
+import ctypes as ct
+
+# arguments
+examples = """examples:
+    ./runqslower         # trace run queue latency higher than 10000 us (default)
+    ./runqslower 1000    # trace run queue latency higher than 1000 us
+    ./runqslower -p 123  # trace pid 123 only
+"""
+parser = argparse.ArgumentParser(
+    description="Trace high run queue latency",
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    epilog=examples)
+parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="pid",
+    help="trace this PID only")
+parser.add_argument("min_us", nargs="?", default='10000',
+    help="minimum run queue latecy to trace, in ms (default 10000)")
+parser.add_argument("--ebpf", action="store_true",
+    help=argparse.SUPPRESS)
+args = parser.parse_args()
+min_us = int(args.min_us)
+debug = 0
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/nsproxy.h>
+#include <linux/pid_namespace.h>
+
+BPF_HASH(start, u32);
+
+struct rq;
+
+struct data_t {
+    u32 pid;
+    char task[TASK_COMM_LEN];
+    u64 delta_us;
+};
+
+BPF_PERF_OUTPUT(events);
+
+// record enqueue timestamp
+static int trace_enqueue(u32 tgid, u32 pid)
+{
+    if (FILTER_PID || pid == 0)
+        return 0;
+    u64 ts = bpf_ktime_get_ns();
+    start.update(&pid, &ts);
+    return 0;
+}
+"""
+
+bpf_text_kprobe = """
+int trace_wake_up_new_task(struct pt_regs *ctx, struct task_struct *p)
+{
+    return trace_enqueue(p->tgid, p->pid);
+}
+
+int trace_ttwu_do_wakeup(struct pt_regs *ctx, struct rq *rq, struct task_struct *p,
+    int wake_flags)
+{
+    return trace_enqueue(p->tgid, p->pid);
+}
+
+// calculate latency
+int trace_run(struct pt_regs *ctx, struct task_struct *prev)
+{
+    u32 pid, tgid;
+
+    // ivcsw: treat like an enqueue event and store timestamp
+    if (prev->state == TASK_RUNNING) {
+        tgid = prev->tgid;
+        pid = prev->pid;
+        if (!(FILTER_PID || pid == 0)) {
+            u64 ts = bpf_ktime_get_ns();
+            start.update(&pid, &ts);
+        }
+    }
+
+    tgid = bpf_get_current_pid_tgid() >> 32;
+    pid = bpf_get_current_pid_tgid();
+
+    u64 *tsp, delta_us;
+
+    // fetch timestamp and calculate delta
+    tsp = start.lookup(&pid);
+    if (tsp == 0) {
+        return 0;   // missed enqueue
+    }
+    delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
+
+    if (FILTER_US)
+        return 0;
+
+    struct data_t data = {};
+    data.pid = pid;
+    data.delta_us = delta_us;
+    bpf_get_current_comm(&data.task, sizeof(data.task));
+
+    // output
+    events.perf_submit(ctx, &data, sizeof(data));
+
+    start.delete(&pid);
+    return 0;
+}
+"""
+
+bpf_text_raw_tp = """
+RAW_TRACEPOINT_PROBE(sched_wakeup)
+{
+    // TP_PROTO(struct task_struct *p)
+    struct task_struct *p = (struct task_struct *)ctx->args[0];
+    u32 tgid, pid;
+
+    bpf_probe_read(&tgid, sizeof(tgid), &p->tgid);
+    bpf_probe_read(&pid, sizeof(pid), &p->pid);
+    return trace_enqueue(tgid, pid);
+}
+
+RAW_TRACEPOINT_PROBE(sched_wakeup_new)
+{
+    // TP_PROTO(struct task_struct *p)
+    struct task_struct *p = (struct task_struct *)ctx->args[0];
+    u32 tgid, pid;
+
+    bpf_probe_read(&tgid, sizeof(tgid), &p->tgid);
+    bpf_probe_read(&pid, sizeof(pid), &p->pid);
+    return trace_enqueue(tgid, pid);
+}
+
+RAW_TRACEPOINT_PROBE(sched_switch)
+{
+    // TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next)
+    struct task_struct *prev = (struct task_struct *)ctx->args[1];
+    struct task_struct *next= (struct task_struct *)ctx->args[2];
+    u32 pid, tgid;
+    long state;
+
+    // ivcsw: treat like an enqueue event and store timestamp
+    bpf_probe_read(&state, sizeof(long), &prev->state);
+    if (state == TASK_RUNNING) {
+        bpf_probe_read(&tgid, sizeof(prev->tgid), &prev->tgid);
+        bpf_probe_read(&pid, sizeof(prev->pid), &prev->pid);
+        if (!(FILTER_PID || pid == 0)) {
+            u64 ts = bpf_ktime_get_ns();
+            start.update(&pid, &ts);
+        }
+    }
+
+    bpf_probe_read(&tgid, sizeof(next->tgid), &next->tgid);
+    bpf_probe_read(&pid, sizeof(next->pid), &next->pid);
+
+    u64 *tsp, delta_us;
+
+    // fetch timestamp and calculate delta
+    tsp = start.lookup(&pid);
+    if (tsp == 0) {
+        return 0;   // missed enqueue
+    }
+    delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
+
+    if (FILTER_US)
+        return 0;
+
+    struct data_t data = {};
+    data.pid = pid;
+    data.delta_us = delta_us;
+    bpf_get_current_comm(&data.task, sizeof(data.task));
+
+    // output
+    events.perf_submit(ctx, &data, sizeof(data));
+
+    start.delete(&pid);
+    return 0;
+}
+"""
+
+is_support_raw_tp = BPF.support_raw_tracepoint()
+if is_support_raw_tp:
+    bpf_text += bpf_text_raw_tp
+else:
+    bpf_text += bpf_text_kprobe
+
+# code substitutions
+if min_us == 0:
+    bpf_text = bpf_text.replace('FILTER_US', '0')
+else:
+    bpf_text = bpf_text.replace('FILTER_US', 'delta_us <= %s' % str(min_us))
+if args.pid:
+    bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % pid)
+else:
+    bpf_text = bpf_text.replace('FILTER_PID', '0')
+if debug or args.ebpf:
+    print(bpf_text)
+    if args.ebpf:
+        exit()
+
+# kernel->user event data: struct data_t
+DNAME_INLINE_LEN = 32   # linux/dcache.h
+TASK_COMM_LEN = 16      # linux/sched.h
+class Data(ct.Structure):
+    _fields_ = [
+        ("pid", ct.c_uint),
+        ("task", ct.c_char * TASK_COMM_LEN),
+        ("delta_us", ct.c_ulonglong),
+    ]
+
+# process event
+def print_event(cpu, data, size):
+    event = ct.cast(data, ct.POINTER(Data)).contents
+    print("%-8s %-16s %-6s %14s" % (strftime("%H:%M:%S"), event.task, event.pid, event.delta_us))
+
+# load BPF program
+b = BPF(text=bpf_text)
+if not is_support_raw_tp:
+    b.attach_kprobe(event="ttwu_do_wakeup", fn_name="trace_ttwu_do_wakeup")
+    b.attach_kprobe(event="wake_up_new_task", fn_name="trace_wake_up_new_task")
+    b.attach_kprobe(event="finish_task_switch", fn_name="trace_run")
+
+print("Tracing run queue latency higher than %d us" % min_us)
+print("%-8s %-16s %-6s %14s" % ("TIME", "COMM", "PID", "LAT(us)"))
+
+# read events
+b["events"].open_perf_buffer(print_event, page_cnt=64)
+while 1:
+    b.perf_buffer_poll()
diff --git a/tools/runqslower_example.txt b/tools/runqslower_example.txt
new file mode 100644
index 0000000..64b604e
--- /dev/null
+++ b/tools/runqslower_example.txt
@@ -0,0 +1,49 @@
+Demonstrations of runqslower, the Linux eBPF/bcc version.
+
+
+runqslower shows high latency scheduling times between tasks being
+ready to run and them running on CPU after that. For example:
+
+# runqslower
+Tracing run queue latency higher than 10000 us
+TIME     COMM             PID           LAT(us)
+04:16:32 cc1              12924           12739
+04:16:32 sh               13640           12118
+04:16:32 make             13639           12730
+04:16:32 bash             13655           12047
+04:16:32 bash             13657           12744
+04:16:32 bash             13656           12880
+04:16:32 sh               13660           10846
+04:16:32 gcc              13663           12681
+04:16:32 make             13668           10814
+04:16:32 make             13670           12988
+04:16:32 gcc              13677           11770
+04:16:32 gcc              13678           23519
+04:16:32 as               12999           20541
+[...]
+
+This shows various processes waiting for available CPU during a Linux kernel
+build. By default the output contains delays for more than 10ms.
+
+These delays can be analyzed in depth with "perf sched" tool, see:
+
+* http://www.brendangregg.com/blog/2017-03-16/perf-sched.html
+
+USAGE message:
+
+# ./runqslower -h
+usage: runqslower.py [-h] [-p PID] [min_us]
+
+Trace high run queue latency
+
+positional arguments:
+  min_us             minimum run queue latecy to trace, in ms (default 10000)
+
+optional arguments:
+  -h, --help         show this help message and exit
+  -p PID, --pid PID  trace this PID only
+
+examples:
+    ./runqslower         # trace run queue latency higher than 10000 us (default)
+    ./runqslower 1000    # trace run queue latency higher than 1000 us
+    ./runqslower -p 123  # trace pid 123 only
diff --git a/tools/syscount.py b/tools/syscount.py
index e219e91..a23abde 100755
--- a/tools/syscount.py
+++ b/tools/syscount.py
@@ -17,6 +17,7 @@
 import itertools
 import subprocess
 import sys
+import signal
 import platform
 
 if sys.version_info.major < 3:
@@ -370,6 +371,9 @@
     else:
         raise Exception("ausyscall: command not found")
 
+# signal handler
+def signal_ignore(signal, frame):
+    print()
 
 def handle_errno(errstr):
     try:
@@ -388,6 +392,8 @@
 parser.add_argument("-p", "--pid", type=int, help="trace only this pid")
 parser.add_argument("-i", "--interval", type=int,
     help="print summary at this interval (seconds)")
+parser.add_argument("-d", "--duration", type=int,
+    help="total duration of trace, in seconds")
 parser.add_argument("-T", "--top", type=int, default=10,
     help="print only the top syscalls by count or latency")
 parser.add_argument("-x", "--failures", action="store_true",
@@ -405,6 +411,10 @@
 parser.add_argument("--ebpf", action="store_true",
     help=argparse.SUPPRESS)
 args = parser.parse_args()
+if args.duration and not args.interval:
+    args.interval = args.duration
+if not args.interval:
+    args.interval = 99999999
 
 if args.list:
     for grp in izip_longest(*(iter(sorted(syscalls.values())),) * 4):
@@ -545,11 +555,20 @@
 
 print("Tracing %ssyscalls, printing top %d... Ctrl+C to quit." %
       ("failed " if args.failures else "", args.top))
+exiting = 0 if args.interval else 1
+seconds = 0
 while True:
     try:
-        sleep(args.interval or 999999999)
-        print_stats()
+        sleep(args.interval)
+        seconds += args.interval
     except KeyboardInterrupt:
-        if not args.interval:
-            print_stats()
-        break
+        exiting = 1
+        signal.signal(signal.SIGINT, signal_ignore)
+    if args.duration and seconds >= args.duration:
+        exiting = 1
+
+    print_stats()
+
+    if exiting:
+        print("Detaching...")
+        exit()
diff --git a/tools/syscount_example.txt b/tools/syscount_example.txt
index be6d3e6..aad51c4 100644
--- a/tools/syscount_example.txt
+++ b/tools/syscount_example.txt
@@ -151,6 +151,8 @@
   -p PID, --pid PID     trace only this pid
   -i INTERVAL, --interval INTERVAL
                         print summary at this interval (seconds)
+  -d DURATION, --duration DURATION
+			total duration of trace, in seconds
   -T TOP, --top TOP     print only the top syscalls by count or latency
   -x, --failures        trace only failed syscalls (return < 0)
   -e ERRNO, --errno ERRNO
diff --git a/tools/tcplife.py b/tools/tcplife.py
index 560bb6f..bda0788 100755
--- a/tools/tcplife.py
+++ b/tools/tcplife.py
@@ -128,6 +128,7 @@
 
     // dport is either used in a filter here, or later
     u16 dport = sk->__sk_common.skc_dport;
+    dport = ntohs(dport);
     FILTER_DPORT
 
     /*
@@ -311,8 +312,8 @@
     // get throughput stats. see tcp_get_info().
     u64 rx_b = 0, tx_b = 0, sport = 0;
     struct tcp_sock *tp = (struct tcp_sock *)sk;
-    bpf_probe_read(&rx_b, sizeof(rx_b), &tp->bytes_received);
-    bpf_probe_read(&tx_b, sizeof(tx_b), &tp->bytes_acked);
+    rx_b = tp->bytes_received;
+    tx_b = tp->bytes_acked;
 
     if (args->family == AF_INET) {
         struct ipv4_data_t data4 = {.span_us = delta_us,
@@ -366,7 +367,7 @@
         'if (pid != %s) { return 0; }' % args.pid)
 if args.remoteport:
     dports = [int(dport) for dport in args.remoteport.split(',')]
-    dports_if = ' && '.join(['dport != %d' % ntohs(dport) for dport in dports])
+    dports_if = ' && '.join(['dport != %d' % dport for dport in dports])
     bpf_text = bpf_text.replace('FILTER_DPORT',
         'if (%s) { birth.delete(&sk); return 0; }' % dports_if)
 if args.localport:
diff --git a/tools/tcptop.py b/tools/tcptop.py
index 58bfeab..7d2babb 100755
--- a/tools/tcptop.py
+++ b/tools/tcptop.py
@@ -118,14 +118,10 @@
     } else if (family == AF_INET6) {
         struct ipv6_key_t ipv6_key = {.pid = pid};
 
-        bpf_probe_read(&ipv6_key.saddr0, sizeof(ipv6_key.saddr0),
-            &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0]);
-        bpf_probe_read(&ipv6_key.saddr1, sizeof(ipv6_key.saddr1),
-            &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2]);
-        bpf_probe_read(&ipv6_key.daddr0, sizeof(ipv6_key.daddr0),
-            &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0]);
-        bpf_probe_read(&ipv6_key.daddr1, sizeof(ipv6_key.daddr1),
-            &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2]);
+        ipv6_key.saddr0 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0];
+        ipv6_key.saddr1 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2];
+        ipv6_key.daddr0 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0];
+        ipv6_key.daddr1 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2];
         ipv6_key.lport = sk->__sk_common.skc_num;
         dport = sk->__sk_common.skc_dport;
         ipv6_key.dport = ntohs(dport);
@@ -165,14 +161,10 @@
 
     } else if (family == AF_INET6) {
         struct ipv6_key_t ipv6_key = {.pid = pid};
-        bpf_probe_read(&ipv6_key.saddr0, sizeof(ipv6_key.saddr0),
-            &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0]);
-        bpf_probe_read(&ipv6_key.saddr1, sizeof(ipv6_key.saddr1),
-            &sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2]);
-        bpf_probe_read(&ipv6_key.daddr0, sizeof(ipv6_key.daddr0),
-            &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0]);
-        bpf_probe_read(&ipv6_key.daddr1, sizeof(ipv6_key.daddr1),
-            &sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2]);
+        ipv6_key.saddr0 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[0];
+        ipv6_key.saddr1 = *(u64 *)&sk->__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32[2];
+        ipv6_key.daddr0 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[0];
+        ipv6_key.daddr1 = *(u64 *)&sk->__sk_common.skc_v6_daddr.in6_u.u6_addr32[2];
         ipv6_key.lport = sk->__sk_common.skc_num;
         dport = sk->__sk_common.skc_dport;
         ipv6_key.dport = ntohs(dport);
diff --git a/tools/xfsslower.py b/tools/xfsslower.py
index 5055dd3..da70c57 100755
--- a/tools/xfsslower.py
+++ b/tools/xfsslower.py
@@ -187,10 +187,7 @@
     bpf_get_current_comm(&data.task, sizeof(data.task));
 
     // workaround (rewriter should handle file to d_name in one step):
-    struct dentry *de = NULL;
-    struct qstr qs = {};
-    bpf_probe_read(&de, sizeof(de), &valp->fp->f_path.dentry);
-    bpf_probe_read(&qs, sizeof(qs), (void *)&de->d_name);
+    struct qstr qs = valp->fp->f_path.dentry->d_name;
     if (qs.len == 0)
         return 0;
     bpf_probe_read(&data.file, sizeof(data.file), (void *)qs.name);
diff --git a/tools/zfsdist.py b/tools/zfsdist.py
index 5d5645a..6b29b99 100755
--- a/tools/zfsdist.py
+++ b/tools/zfsdist.py
@@ -137,10 +137,10 @@
 b = BPF(text=bpf_text)
 
 # common file functions
-if BPF.get_kprobe_functions('zpl_iter'):
+if BPF.get_kprobe_functions(b'zpl_iter'):
     b.attach_kprobe(event="zpl_iter_read", fn_name="trace_entry")
     b.attach_kprobe(event="zpl_iter_write", fn_name="trace_entry")
-elif BPF.get_kprobe_functions('zpl_aio'):
+elif BPF.get_kprobe_functions(b'zpl_aio'):
     b.attach_kprobe(event="zpl_aio_read", fn_name="trace_entry")
     b.attach_kprobe(event="zpl_aio_write", fn_name="trace_entry")
 else:
@@ -148,10 +148,10 @@
     b.attach_kprobe(event="zpl_write", fn_name="trace_entry")
 b.attach_kprobe(event="zpl_open", fn_name="trace_entry")
 b.attach_kprobe(event="zpl_fsync", fn_name="trace_entry")
-if BPF.get_kprobe_functions('zpl_iter'):
+if BPF.get_kprobe_functions(b'zpl_iter'):
     b.attach_kretprobe(event="zpl_iter_read", fn_name="trace_read_return")
     b.attach_kretprobe(event="zpl_iter_write", fn_name="trace_write_return")
-elif BPF.get_kprobe_functions('zpl_aio'):
+elif BPF.get_kprobe_functions(b'zpl_aio'):
     b.attach_kretprobe(event="zpl_aio_read", fn_name="trace_read_return")
     b.attach_kretprobe(event="zpl_aio_write", fn_name="trace_write_return")
 else:
diff --git a/tools/zfsslower.py b/tools/zfsslower.py
index 53c566f..5e1c328 100755
--- a/tools/zfsslower.py
+++ b/tools/zfsslower.py
@@ -190,11 +190,7 @@
     data.offset = valp->offset;
     bpf_get_current_comm(&data.task, sizeof(data.task));
 
-    // workaround (rewriter should handle file to d_name in one step):
-    struct dentry *de = NULL;
-    struct qstr qs = {};
-    bpf_probe_read(&de, sizeof(de), &valp->fp->f_path.dentry);
-    bpf_probe_read(&qs, sizeof(qs), (void *)&de->d_name);
+    struct qstr qs = valp->fp->f_path.dentry->d_name;
     if (qs.len == 0)
         return 0;
     bpf_probe_read(&data.file, sizeof(data.file), (void *)qs.name);