Merge pull request #1633 from hMcLauchlan/inject-tool

Add new targeted error injection tool
diff --git a/README.md b/README.md
index f503168..7fa188b 100644
--- a/README.md
+++ b/README.md
@@ -142,6 +142,7 @@
 - tools/[tcpconnlat](tools/tcpconnlat.py): Trace TCP active connection latency (connect()). [Examples](tools/tcpconnlat_example.txt).
 - tools/[tcplife](tools/tcplife.py): Trace TCP sessions and summarize lifespan. [Examples](tools/tcplife_example.txt).
 - tools/[tcpretrans](tools/tcpretrans.py): Trace TCP retransmits and TLPs. [Examples](tools/tcpretrans_example.txt).
+- tools/[tcpstates](tools/tcpstates.py): Trace TCP session state changes with durations. [Examples](tools/tcpstates_example.txt).
 - tools/[tcpsubnet](tools/tcpsubnet.py): Summarize and aggregate TCP send by subnet. [Examples](tools/tcpsubnet_example.txt).
 - tools/[tcptop](tools/tcptop.py): Summarize TCP send/recv throughput by host. Top for TCP. [Examples](tools/tcptop_example.txt).
 - tools/[tcptracer](tools/tcptracer.py): Trace TCP established connections (connect(), accept(), close()). [Examples](tools/tcptracer_example.txt).
diff --git a/debian/control b/debian/control
index e02fdd3..db11fba 100644
--- a/debian/control
+++ b/debian/control
@@ -8,7 +8,8 @@
     libelf-dev, bison, flex, libfl-dev, libedit-dev, zlib1g-dev, git,
     clang-format | clang-format-3.7 | clang-format-3.8, python (>= 2.7),
     python-netaddr, python-pyroute2, luajit, libluajit-5.1-dev, arping,
-    inetutils-ping | iputils-ping, iperf, netperf, ethtool, devscripts
+    inetutils-ping | iputils-ping, iperf, netperf, ethtool, devscripts,
+    python3
 Homepage: https://github.com/iovisor/bcc
 
 Package: libbcc
@@ -28,6 +29,11 @@
 Depends: libbcc, python, binutils
 Description: Python wrappers for BPF Compiler Collection (BCC)
 
+Package: python3-bcc
+Architecture: all
+Depends: libbcc, python3, binutils
+Description: Python3 wrappers for BPF Compiler Collection (BCC)
+
 Package: bcc-tools
 Architecture: all
 Depends: python-bcc
diff --git a/debian/python-bcc.install b/debian/python-bcc.install
index 607c065..b2cc136 100644
--- a/debian/python-bcc.install
+++ b/debian/python-bcc.install
@@ -1 +1 @@
-usr/lib/python*
+usr/lib/python2*
diff --git a/debian/python3-bcc.install b/debian/python3-bcc.install
new file mode 100644
index 0000000..4606faa
--- /dev/null
+++ b/debian/python3-bcc.install
@@ -0,0 +1 @@
+usr/lib/python3*
diff --git a/debian/rules b/debian/rules
index 80f4642..49460be 100755
--- a/debian/rules
+++ b/debian/rules
@@ -9,7 +9,7 @@
 UPSTREAM_VERSION := $(shell dpkg-parsechangelog | sed -rne "s,^Version: ([0-9.]+)(~|-)(.*),\1,p")
 
 %:
-	dh $@ --buildsystem=cmake --parallel
+	dh $@ --buildsystem=cmake --parallel --with python2,python3
 
 # tests cannot be run in parallel
 override_dh_auto_test:
@@ -17,4 +17,4 @@
 
 # FIXME: LLVM_DEFINITIONS is broken somehow in LLVM cmake upstream
 override_dh_auto_configure:
-	dh_auto_configure -- -DREVISION_LAST=$(UPSTREAM_VERSION) -DREVISION=$(UPSTREAM_VERSION) -DLLVM_DEFINITIONS="-D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"
+	dh_auto_configure -- -DREVISION_LAST=$(UPSTREAM_VERSION) -DREVISION=$(UPSTREAM_VERSION) -DLLVM_DEFINITIONS="-D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS" -DPYTHON_CMD="python2;python3"
diff --git a/docs/kernel-versions.md b/docs/kernel-versions.md
index e1f2391..701ac78 100644
--- a/docs/kernel-versions.md
+++ b/docs/kernel-versions.md
@@ -130,6 +130,10 @@
 `BPF_FUNC_map_delete_elem()` | 3.19 | [`d0003ec01c66`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d0003ec01c667b731c139e23de3306a8b328ccf5)
 `BPF_FUNC_map_lookup_elem()` | 3.19 | [`d0003ec01c66`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d0003ec01c667b731c139e23de3306a8b328ccf5)
 `BPF_FUNC_map_update_elem()` | 3.19 | [`d0003ec01c66`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=d0003ec01c667b731c139e23de3306a8b328ccf5)
+`BPF_FUNC_msg_apply_bytes()` | 4.16 | [`2a100317c9eb`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=2a100317c9ebc204a166f16294884fbf9da074ce)
+`BPF_FUNC_msg_cork_bytes()` | 4.16 | [`91843d540a13`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=91843d540a139eb8070bcff8aa10089164436deb)
+`BPF_FUNC_msg_pull_data()` | 4.16 | [`015632bb30da`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=015632bb30daaaee64e1bcac07570860e0bf3092)
+`BPF_FUNC_msg_redirect_map()` | 4.16 | [`4f738adba30a`](https://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/commit/?id=4f738adba30a7cfc006f605707e7aee847ffefa0)
 `BPF_FUNC_perf_event_output()` | 4.4 | [`a43eec304259`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=a43eec304259a6c637f4014a6d4767159b6a3aa3)
 `BPF_FUNC_perf_event_read()` | 4.3 | [`35578d798400`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=35578d7984003097af2b1e34502bc943d40c1804)
 `BPF_FUNC_perf_event_read_value()` | 4.15 | [`908432ca84fc`](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=908432ca84fc229e906ba164219e9ad0fe56f755)
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
index 7d6ccee..9d5e5d4 100644
--- a/examples/cpp/CMakeLists.txt
+++ b/examples/cpp/CMakeLists.txt
@@ -27,6 +27,9 @@
 add_executable(FollyRequestContextSwitch FollyRequestContextSwitch.cc)
 target_link_libraries(FollyRequestContextSwitch bcc-static)
 
+add_executable(UseExternalMap UseExternalMap.cc)
+target_link_libraries(UseExternalMap bcc-static)
+
 if(INSTALL_CPP_EXAMPLES)
   install (TARGETS HelloWorld DESTINATION share/bcc/examples/cpp)
   install (TARGETS CPUDistribution DESTINATION share/bcc/examples/cpp)
@@ -35,4 +38,5 @@
   install (TARGETS RandomRead DESTINATION share/bcc/examples/cpp)
   install (TARGETS LLCStat DESTINATION share/bcc/examples/cpp)
   install (TARGETS FollyRequestContextSwitch DESTINATION share/bcc/examples/cpp)
+  install (TARGETS UseExternalMap DESTINATION share/bcc/examples/cpp)
 endif(INSTALL_CPP_EXAMPLES)
diff --git a/examples/cpp/UseExternalMap.cc b/examples/cpp/UseExternalMap.cc
new file mode 100644
index 0000000..3d4d759
--- /dev/null
+++ b/examples/cpp/UseExternalMap.cc
@@ -0,0 +1,134 @@
+/*
+ * UseExternalMap shows how to access an external map through
+ * C++ interface. The external map could be a pinned map.
+ * This example simulates the pinned map through a locally
+ * created map by calling libbpf bpf_create_map.
+ *
+ * Copyright (c) Facebook, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ */
+
+#include <stdint.h>
+#include <iostream>
+
+#include "BPF.h"
+
+// Used by C++ get hash_table
+struct sched_switch_info {
+  int prev_pid;
+  int next_pid;
+  char prev_comm[16];
+  char next_comm[16];
+};
+
+#define CHECK(condition, msg)        \
+  ({                                 \
+    if (condition) {                 \
+      std::cerr << msg << std::endl; \
+      return 1;                      \
+    }                                \
+  })
+
+const std::string BPF_PROGRAM = R"(
+#include <linux/sched.h>
+
+struct sched_switch_info {
+  int prev_pid;
+  int next_pid;
+  char prev_comm[16];
+  char next_comm[16];
+};
+
+BPF_TABLE("extern", u32, u32, control, 1);
+BPF_HASH(counts, struct sched_switch_info, u32);
+int on_sched_switch(struct tracepoint__sched__sched_switch *args) {
+  struct sched_switch_info key = {};
+  u32 zero = 0, *val;
+
+  /* only do something when control is on */
+  val = control.lookup(&zero);
+  if (!val || *val == 0)
+    return 0;
+
+  /* record sched_switch info in counts table */
+  key.prev_pid = args->prev_pid;
+  key.next_pid = args->next_pid;
+  __builtin_memcpy(&key.prev_comm, args->prev_comm, 16);
+  __builtin_memcpy(&key.next_comm, args->next_comm, 16);
+  val = counts.lookup_or_init(&key, &zero);
+  (*val)++;
+
+  return 0;
+}
+)";
+
+static void print_counts(ebpf::BPF *bpfp, std::string msg) {
+  auto counts_table_hdl =
+      bpfp->get_hash_table<struct sched_switch_info, uint32_t>("counts");
+  printf("%s\n", msg.c_str());
+  printf("%-8s  %-16s      %-8s  %-16s   %-4s\n", "PREV_PID", "PREV_COMM",
+         "CURR_PID", "CURR_COMM", "CNT");
+  for (auto it : counts_table_hdl.get_table_offline()) {
+    printf("%-8d (%-16s) ==> %-8d (%-16s): %-4d\n", it.first.prev_pid,
+           it.first.prev_comm, it.first.next_pid, it.first.next_comm,
+           it.second);
+  }
+}
+
+int main() {
+  int ctrl_map_fd;
+  uint32_t val;
+
+  // create a map through bpf_create_map, bcc knows nothing about this map.
+  ctrl_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, "control", sizeof(uint32_t),
+                               sizeof(uint32_t), 1, 0);
+  CHECK(ctrl_map_fd < 0, "bpf_create_map failure");
+
+  // populate control map into TableStorage
+  std::unique_ptr<ebpf::TableStorage> local_ts =
+      ebpf::createSharedTableStorage();
+  ebpf::Path global_path({"control"});
+  ebpf::TableDesc table_desc("control", ebpf::FileDesc(ctrl_map_fd),
+                             BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
+                             sizeof(uint32_t), 1, 0);
+  local_ts->Insert(global_path, std::move(table_desc));
+
+  // constructor with the pre-populated table storage
+  ebpf::BPF bpf(0, &*local_ts);
+  auto res = bpf.init(BPF_PROGRAM);
+  CHECK(res.code(), res.msg());
+
+  // attach to the tracepoint sched:sched_switch
+  res = bpf.attach_tracepoint("sched:sched_switch", "on_sched_switch");
+  CHECK(res.code(), res.msg());
+
+  // wait for some scheduling events
+  sleep(1);
+
+  auto control_table_hdl = bpf.get_array_table<uint32_t>("control");
+  res = control_table_hdl.get_value(0, val);
+  CHECK(res.code() || val != 0, res.msg());
+
+  // we should not see any events here
+  print_counts(&bpf, "events with control off:");
+
+  printf("\n");
+
+  // change the control to on so bpf program starts to count events
+  val = 1;
+  res = control_table_hdl.update_value(0, val);
+  CHECK(res.code(), res.msg());
+
+  // verify we get the control on back
+  val = 0;
+  res = control_table_hdl.get_value(0, val);
+  CHECK(res.code() || val != 1, res.msg());
+
+  // wait for some scheduling events
+  sleep(1);
+
+  // we should see a bunch of events here
+  print_counts(&bpf, "events with control on:");
+
+  return 0;
+}
diff --git a/examples/networking/tc_perf_event.py b/examples/networking/tc_perf_event.py
index 0b9b3f5..a385916 100755
--- a/examples/networking/tc_perf_event.py
+++ b/examples/networking/tc_perf_event.py
@@ -54,8 +54,8 @@
 
     # Only print for echo request
     if icmp_type == 128:
-        src_ip = bytes(skb_event.raw[22:38])
-        dst_ip = bytes(skb_event.raw[38:54])
+        src_ip = bytes(bytearray(skb_event.raw[22:38]))
+        dst_ip = bytes(bytearray(skb_event.raw[38:54]))
         print("%-3s %-32s %-12s 0x%08x" %
               (cpu, socket.inet_ntop(socket.AF_INET6, src_ip),
                socket.inet_ntop(socket.AF_INET6, dst_ip),
diff --git a/man/man8/tcplife.8 b/man/man8/tcplife.8
index 2baa315..f6b8991 100644
--- a/man/man8/tcplife.8
+++ b/man/man8/tcplife.8
@@ -10,10 +10,10 @@
 characterisation and flow accounting: identifying what connections are
 happening, with the bytes transferred.
 
-This tool works using the tcp:tcp_set_state tracepoint if it exists, added
-to Linux 4.15, and switches to using kernel dynamic tracing for older kernels.
-Only TCP state changes are traced, so it is expected that the overhead of
-this tool is much lower than typical send/receive tracing.
+This tool works using the sock:inet_sock_set_state tracepoint if it exists,
+added to Linux 4.16, and switches to using kernel dynamic tracing for older
+kernels. Only TCP state changes are traced, so it is expected that the
+overhead of this tool is much lower than typical send/receive tracing.
 
 Since this uses BPF, only the root user can use this tool.
 .SH REQUIREMENTS
diff --git a/man/man8/tcpstates.8 b/man/man8/tcpstates.8
new file mode 100644
index 0000000..b31fd64
--- /dev/null
+++ b/man/man8/tcpstates.8
@@ -0,0 +1,128 @@
+.TH tcpstates 8  "2018-03-20" "USER COMMANDS"
+.SH NAME
+tcpstates \- Trace TCP session state changes with durations. Uses Linux eBPF/bcc.
+.SH SYNOPSIS
+.B tcpstates [\-h] [\-T] [\-t] [\-w] [\-s] [\-D PORTS] [\-L PORTS]
+.SH DESCRIPTION
+This tool traces TCP session state changes while tracing, and prints details
+including the duration in each state. This can help explain the latency of
+TCP connections: whether the time is spent in the ESTABLISHED state (data
+transfer), or initialization state (SYN_SENT), etc.
+
+This tool works using the sock:inet_sock_set_state tracepoint, which was
+added to Linux 4.16. Linux 4.16 also included extra state transitions so that
+all TCP transitions could be observed by this tracepoint.
+
+Only TCP state changes are traced, so it is expected that the
+overhead of this tool is much lower than typical send/receive tracing.
+
+Since this uses BPF, only the root user can use this tool.
+.SH REQUIREMENTS
+CONFIG_BPF and bcc, and the sock:inet_sock_set_state tracepoint.
+.SH OPTIONS
+.TP
+\-h
+Print usage message.
+.TP
+\-s
+Comma separated values output (parseable).
+.TP
+\-t
+Include a timestamp column (seconds).
+.TP
+\-T
+Include a time column (HH:MM:SS).
+.TP
+\-w
+Wide column output (fits IPv6 addresses).
+.TP
+\-L PORTS
+Comma-separated list of local ports to trace (filtered in-kernel).
+.TP
+\-D PORTS
+Comma-separated list of destination ports to trace (filtered in-kernel).
+.SH EXAMPLES
+.TP
+Trace all TCP sessions, and show all state changes:
+#
+.B tcpstates
+.TP
+Include a timestamp column, and wide column output:
+#
+.B tcpstates \-tw
+.TP
+Trace connections to local ports 80 and 81 only:
+#
+.B tcpstates \-L 80,81
+.TP
+Trace connections to remote port 80 only:
+#
+.B tcpstates \-D 80
+.SH FIELDS
+.TP
+TIME
+Time of the change, in HH:MM:SS format.
+.TP
+TIME(s)
+Time of the change, in seconds.
+.TP
+C-PID
+The current on-CPU process ID. This may show the process that owns the TCP
+session if the state change executes in synchronous process context, else it
+is likely to show the kernel (asynchronous state change).
+.TP
+C-COMM
+The current on-CPU process name. This may show the process that owns the TCP
+session if the state change executes in synchronous process context, else it
+is likely to show the kernel (asynchronous state change).
+.TP
+IP
+IP address family (4 or 6)
+.TP
+LADDR
+Local IP address.
+.TP
+DADDR
+Remote IP address.
+.TP
+LPORT
+Local port.
+.TP
+DPORT
+Destination port.
+.TP
+OLDSTATE
+Previous TCP state.
+.TP
+NEWSTATE
+New TCP state.
+.TP
+MS
+Duration of this state.
+.SH OVERHEAD
+This traces the kernel TCP set state function, which should be called much
+less often than send/receive tracing, and therefore have lower overhead. The
+overhead of the tool is relative to the rate of new TCP sessions: if this is
+high, over 10,000 per second, then there may be noticeable overhead just to
+print out 10k lines of formatted output per second.
+
+You can find out the rate of new TCP sessions using "sar \-n TCP 1", and
+adding the active/s and passive/s columns.
+
+As always, test and understand this tools overhead for your types of
+workloads before production use.
+.SH SOURCE
+This is from bcc.
+.IP
+https://github.com/iovisor/bcc
+.PP
+Also look in the bcc distribution for a companion _examples.txt file containing
+example usage, output, and commentary for this tool.
+.SH OS
+Linux
+.SH STABILITY
+Unstable - in development.
+.SH AUTHOR
+Brendan Gregg
+.SH SEE ALSO
+tcpaccept(8), tcpconnect(8), tcptop(8), tcplife(8)
diff --git a/scripts/check-helpers.sh b/scripts/check-helpers.sh
index f50bcb8..fe79971 100755
--- a/scripts/check-helpers.sh
+++ b/scripts/check-helpers.sh
@@ -18,6 +18,14 @@
 	((ret++))
 fi
 
+virtual=$(grep -oP "(?<=^\sFN\()\w+" src/cc/compat/linux/virtual_bpf.h | tail -n +2 | sort -u)
+dif=$(diff <(echo "$compat") <(echo "$virtual"))
+if [ $? -ne 0 ]; then
+	echo "The lists of helpers in src/cc/compat/linux/bpf.h and src/cc/compat/linux/virtual_bpf.h differ:"
+	echo "$dif"
+	((ret++))
+fi
+
 export=$(grep -oP "(?<=BPF_FUNC_)\w+" src/cc/export/helpers.h | sort -u)
 dif=$(diff <(echo "$compat") <(echo "$export"))
 if [ $? -ne 0 ]; then
diff --git a/src/cc/api/BPFTable.cc b/src/cc/api/BPFTable.cc
index 8f709cf..090b6e2 100644
--- a/src/cc/api/BPFTable.cc
+++ b/src/cc/api/BPFTable.cc
@@ -157,6 +157,13 @@
   };
 }
 
+BPFStackTable::BPFStackTable(BPFStackTable&& that) :
+  BPFTableBase<int, stacktrace_t>(that.desc),
+  symbol_option_(std::move(that.symbol_option_)),
+  pid_sym_(std::move(that.pid_sym_)) {
+    that.pid_sym_.clear();
+}
+
 BPFStackTable::~BPFStackTable() {
   for (auto it : pid_sym_)
     bcc_free_symcache(it.second, it.first);
diff --git a/src/cc/api/BPFTable.h b/src/cc/api/BPFTable.h
index dee7537..3a161a0 100644
--- a/src/cc/api/BPFTable.h
+++ b/src/cc/api/BPFTable.h
@@ -289,6 +289,7 @@
   BPFStackTable(const TableDesc& desc,
                 bool use_debug_file,
                 bool check_debug_file_crc);
+  BPFStackTable(BPFStackTable&& that);
   ~BPFStackTable();
 
   void clear_table_non_atomic();
diff --git a/src/cc/compat/linux/bpf.h b/src/cc/compat/linux/bpf.h
index 3f3ff80..83d7ea0 100644
--- a/src/cc/compat/linux/bpf.h
+++ b/src/cc/compat/linux/bpf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  *
  * This program is free software; you can redistribute it and/or
@@ -32,12 +33,12 @@
 
 /* jmp encodings */
 #define BPF_JNE		0x50	/* jump != */
-#define BPF_JLT		0xa0    /* LT is unsigned, '<' */
-#define BPF_JLE		0xb0    /* LE is unsigned, '<=' */
+#define BPF_JLT		0xa0	/* LT is unsigned, '<' */
+#define BPF_JLE		0xb0	/* LE is unsigned, '<=' */
 #define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
 #define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
-#define BPF_JSLT	0xc0    /* SLT is signed, '<' */
-#define BPF_JSLE	0xd0    /* SLE is signed, '<=' */
+#define BPF_JSLT	0xc0	/* SLT is signed, '<' */
+#define BPF_JSLE	0xd0	/* SLE is signed, '<=' */
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
@@ -132,6 +133,7 @@
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
+	BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -142,6 +144,7 @@
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
+	BPF_SK_MSG_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -222,13 +225,35 @@
 #define BPF_F_NUMA_NODE		(1U << 2)
 
 /* flags for BPF_PROG_QUERY */
-#define BPF_F_QUERY_EFFECTIVE  (1U << 0)
+#define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
 #define BPF_OBJ_NAME_LEN 16U
 
 /* Flags for accessing BPF object */
-#define BPF_F_RDONLY           (1U << 3)
-#define BPF_F_WRONLY           (1U << 4)
+#define BPF_F_RDONLY		(1U << 3)
+#define BPF_F_WRONLY		(1U << 4)
+
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID	(1U << 5)
+
+enum bpf_stack_build_id_status {
+	/* user space need an empty entry to identify end of a trace */
+	BPF_STACK_BUILD_ID_EMPTY = 0,
+	/* with valid build_id and offset */
+	BPF_STACK_BUILD_ID_VALID = 1,
+	/* couldn't get build_id, fallback to ip */
+	BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+	__s32		status;
+	unsigned char	build_id[BPF_BUILD_ID_SIZE];
+	union {
+		__u64	offset;
+		__u64	ip;
+	};
+};
 
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
@@ -268,7 +293,7 @@
 		__u32		kern_version;	/* checked when prog_type=kprobe */
 		__u32		prog_flags;
 		char		prog_name[BPF_OBJ_NAME_LEN];
-		__u32		prog_ifindex;    /* ifindex of netdev to prep for */
+		__u32		prog_ifindex;	/* ifindex of netdev to prep for */
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -311,14 +336,14 @@
 		__aligned_u64	info;
 	} info;
 
-        struct { /* anonymous struct used by BPF_PROG_QUERY command */
-		__u32           target_fd;      /* container object to query */
-		__u32           attach_type;
-		__u32           query_flags;
-		__u32           attach_flags;
-		__aligned_u64   prog_ids;
-		__u32           prog_cnt;
-        } query;
+	struct { /* anonymous struct used by BPF_PROG_QUERY command */
+		__u32		target_fd;	/* container object to query */
+		__u32		attach_type;
+		__u32		query_flags;
+		__u32		attach_flags;
+		__aligned_u64	prog_ids;
+		__u32		prog_cnt;
+	} query;
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -332,7 +357,7 @@
  * int bpf_map_delete_elem(&map, &key)
  *     Return: 0 on success or negative error
  *
- * int bpf_probe_read(void *dst, int size, const void *src)
+ * int bpf_probe_read(void *dst, int size, void *src)
  *     Return: 0 on success or negative error
  *
  * u64 bpf_ktime_get_ns(void)
@@ -433,14 +458,13 @@
  *     redirect to another netdev
  *     @ifindex: ifindex of the net device
  *     @flags:
- *       cls_bpf:
+ *	  cls_bpf:
  *          bit 0 - if set, redirect to ingress instead of egress
  *          other bits - reserved
- *       xdp_bpf:
- *         all bits - reserved
+ *	  xdp_bpf:
+ *	    all bits - reserved
  *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
- *            xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
- *
+ *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
  * int bpf_redirect_map(map, key, flags)
  *     redirect to endpoint in map
  *     @map: pointer to dev map
@@ -667,10 +691,10 @@
  *     Return: SK_PASS
  *
  * int bpf_sock_map_update(skops, map, key, flags)
- *     @skops: pointer to bpf_sock_ops
- *     @map: pointer to sockmap to update
- *     @key: key to insert/update sock in map
- *     @flags: same flags as map update elem
+ *	@skops: pointer to bpf_sock_ops
+ *	@map: pointer to sockmap to update
+ *	@key: key to insert/update sock in map
+ *	@flags: same flags as map update elem
  *
  * int bpf_xdp_adjust_meta(xdp_md, delta)
  *     Adjust the xdp_md.data_meta by delta
@@ -694,8 +718,17 @@
  *     Return : 0 on success or negative error code
  *
  * int bpf_override_return(pt_regs, rc)
- *     @pt_regs: pointer to struct pt_regs
- *     @rc: the return value to set
+ *	@pt_regs: pointer to struct pt_regs
+ *	@rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -757,7 +790,11 @@
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
 	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),
+	FN(sock_ops_cb_flags_set),	\
+	FN(msg_redirect_map),		\
+	FN(msg_apply_bytes),		\
+	FN(msg_cork_bytes),		\
+	FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -800,8 +837,9 @@
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
+#define BPF_F_SEQ_NUMBER		(1ULL << 3)
 
-/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read and
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
  */
 #define BPF_F_INDEX_MASK		0xffffffffULL
@@ -839,12 +877,12 @@
 
 	/* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
 	__u32 family;
-	__u32 remote_ip4;       /* Stored in network byte order */
-	__u32 local_ip4;        /* Stored in network byte order */
-	__u32 remote_ip6[4];    /* Stored in network byte order */
-	__u32 local_ip6[4];     /* Stored in network byte order */
-	__u32 remote_port;      /* Stored in network byte order */
-	__u32 local_port;       /* stored in host byte order */
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
 	/* ... here. */
 
 	__u32 data_meta;
@@ -919,6 +957,14 @@
 	SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+	void *data;
+	void *data_end;
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
@@ -929,11 +975,11 @@
 	__u32 xlated_prog_len;
 	__aligned_u64 jited_prog_insns;
 	__aligned_u64 xlated_prog_insns;
-	__u64 load_time;        /* ns since boottime */
+	__u64 load_time;	/* ns since boottime */
 	__u32 created_by_uid;
 	__u32 nr_map_ids;
 	__aligned_u64 map_ids;
-	char  name[BPF_OBJ_NAME_LEN];
+	char name[BPF_OBJ_NAME_LEN];
 	__u32 ifindex;
 	__u64 netns_dev;
 	__u64 netns_ino;
@@ -962,8 +1008,8 @@
 	__u32 op;
 	union {
 		__u32 args[4];		/* Optionally passed to bpf program */
-		__u32 reply;		/* Returned by bpf program */
-		__u32 replylong[4];	/* Optionally returned by bpf prog */
+		__u32 reply;		/* Returned by bpf program	    */
+		__u32 replylong[4];	/* Optionally returned by bpf prog  */
 	};
 	__u32 family;
 	__u32 remote_ip4;	/* Stored in network byte order */
@@ -978,7 +1024,7 @@
 				 */
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
-	__u32 bpf_sock_ops_cb_flags;	/* flags defined in uapi/linux/tcp.h */
+	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
 	__u32 state;
 	__u32 rtt_min;
 	__u32 snd_ssthresh;
@@ -1007,9 +1053,9 @@
 #define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
 #define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
 #define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS	0x7	/* Mask of all currently
-						 * supported cb flags
-						 */
+#define BPF_SOCK_OPS_ALL_CB_FLAGS       0x7		/* Mask of all currently
+							 * supported cb flags
+							 */
 
 /* List of known BPF sock_ops operators.
  * New entries can only be added at the end
@@ -1077,13 +1123,12 @@
 	BPF_TCP_CLOSE_WAIT,
 	BPF_TCP_LAST_ACK,
 	BPF_TCP_LISTEN,
-	BPF_TCP_CLOSING,        /* Now a valid state */
+	BPF_TCP_CLOSING,	/* Now a valid state */
 	BPF_TCP_NEW_SYN_RECV,
 
-	BPF_TCP_MAX_STATES      /* Leave at the end! */
+	BPF_TCP_MAX_STATES	/* Leave at the end! */
 };
 
-
 #define TCP_BPF_IW		1001	/* Set TCP initial congestion window */
 #define TCP_BPF_SNDCWND_CLAMP	1002	/* Set sndcwnd_clamp */
 
@@ -1093,12 +1138,12 @@
 	__u64 running;
 };
 
-#define BPF_DEVCG_ACC_MKNOD    (1ULL << 0)
-#define BPF_DEVCG_ACC_READ     (1ULL << 1)
-#define BPF_DEVCG_ACC_WRITE    (1ULL << 2)
+#define BPF_DEVCG_ACC_MKNOD	(1ULL << 0)
+#define BPF_DEVCG_ACC_READ	(1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE	(1ULL << 2)
 
-#define BPF_DEVCG_DEV_BLOCK    (1ULL << 0)
-#define BPF_DEVCG_DEV_CHAR     (1ULL << 1)
+#define BPF_DEVCG_DEV_BLOCK	(1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR	(1ULL << 1)
 
 struct bpf_cgroup_dev_ctx {
 	/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
diff --git a/src/cc/compat/linux/virtual_bpf.h b/src/cc/compat/linux/virtual_bpf.h
index 469a421..14925d3 100644
--- a/src/cc/compat/linux/virtual_bpf.h
+++ b/src/cc/compat/linux/virtual_bpf.h
@@ -1,4 +1,5 @@
 R"********(
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  *
  * This program is free software; you can redistribute it and/or
@@ -33,12 +34,12 @@
 
 /* jmp encodings */
 #define BPF_JNE		0x50	/* jump != */
-#define BPF_JLT		0xa0    /* LT is unsigned, '<' */
-#define BPF_JLE		0xb0    /* LE is unsigned, '<=' */
+#define BPF_JLT		0xa0	/* LT is unsigned, '<' */
+#define BPF_JLE		0xb0	/* LE is unsigned, '<=' */
 #define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
 #define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
-#define BPF_JSLT	0xc0    /* SLT is signed, '<' */
-#define BPF_JSLE	0xd0    /* SLE is signed, '<=' */
+#define BPF_JSLT	0xc0	/* SLT is signed, '<' */
+#define BPF_JSLE	0xd0	/* SLE is signed, '<=' */
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
@@ -133,6 +134,7 @@
 	BPF_PROG_TYPE_SOCK_OPS,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_CGROUP_DEVICE,
+	BPF_PROG_TYPE_SK_MSG,
 };
 
 enum bpf_attach_type {
@@ -143,6 +145,7 @@
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_DEVICE,
+	BPF_SK_MSG_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -210,6 +213,7 @@
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
 
+/* flags for BPF_MAP_CREATE command */
 #define BPF_F_NO_PREALLOC	(1U << 0)
 /* Instead of having one common LRU list in the
  * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
@@ -222,13 +226,35 @@
 #define BPF_F_NUMA_NODE		(1U << 2)
 
 /* flags for BPF_PROG_QUERY */
-#define BPF_F_QUERY_EFFECTIVE  (1U << 0)
+#define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
 #define BPF_OBJ_NAME_LEN 16U
 
 /* Flags for accessing BPF object */
-#define BPF_F_RDONLY           (1U << 3)
-#define BPF_F_WRONLY           (1U << 4)
+#define BPF_F_RDONLY		(1U << 3)
+#define BPF_F_WRONLY		(1U << 4)
+
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID	(1U << 5)
+
+enum bpf_stack_build_id_status {
+	/* user space need an empty entry to identify end of a trace */
+	BPF_STACK_BUILD_ID_EMPTY = 0,
+	/* with valid build_id and offset */
+	BPF_STACK_BUILD_ID_VALID = 1,
+	/* couldn't get build_id, fallback to ip */
+	BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+	__s32		status;
+	unsigned char	build_id[BPF_BUILD_ID_SIZE];
+	union {
+		__u64	offset;
+		__u64	ip;
+	};
+};
 
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
@@ -268,7 +294,7 @@
 		__u32		kern_version;	/* checked when prog_type=kprobe */
 		__u32		prog_flags;
 		char		prog_name[BPF_OBJ_NAME_LEN];
-		__u32		prog_ifindex;    /* ifindex of netdev to prep for */
+		__u32		prog_ifindex;	/* ifindex of netdev to prep for */
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -311,14 +337,14 @@
 		__aligned_u64	info;
 	} info;
 
-        struct { /* anonymous struct used by BPF_PROG_QUERY command */
-		__u32           target_fd;      /* container object to query */
-		__u32           attach_type;
-		__u32           query_flags;
-		__u32           attach_flags;
-		__aligned_u64   prog_ids;
-		__u32           prog_cnt;
-        } query;
+	struct { /* anonymous struct used by BPF_PROG_QUERY command */
+		__u32		target_fd;	/* container object to query */
+		__u32		attach_type;
+		__u32		query_flags;
+		__u32		attach_flags;
+		__aligned_u64	prog_ids;
+		__u32		prog_cnt;
+	} query;
 } __attribute__((aligned(8)));
 
 /* BPF helper function descriptions:
@@ -332,7 +358,7 @@
  * int bpf_map_delete_elem(&map, &key)
  *     Return: 0 on success or negative error
  *
- * int bpf_probe_read(void *dst, int size, const void *src)
+ * int bpf_probe_read(void *dst, int size, void *src)
  *     Return: 0 on success or negative error
  *
  * u64 bpf_ktime_get_ns(void)
@@ -433,13 +459,13 @@
  *     redirect to another netdev
  *     @ifindex: ifindex of the net device
  *     @flags:
- *       cls_bpf:
+ *	  cls_bpf:
  *          bit 0 - if set, redirect to ingress instead of egress
  *          other bits - reserved
- *       xdp_bpf:
- *         all bits - reserved
+ *	  xdp_bpf:
+ *	    all bits - reserved
  *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
- *            xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
+ *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
  * int bpf_redirect_map(map, key, flags)
  *     redirect to endpoint in map
  *     @map: pointer to dev map
@@ -628,7 +654,7 @@
  *     @level: SOL_SOCKET or IPPROTO_TCP
  *     @optname: option name
  *     @optval: pointer to option value
- *     @optlen: length of optval in byes
+ *     @optlen: length of optval in bytes
  *     Return: 0 or negative error
  *
  * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
@@ -666,10 +692,10 @@
  *     Return: SK_PASS
  *
  * int bpf_sock_map_update(skops, map, key, flags)
- *     @skops: pointer to bpf_sock_ops
- *     @map: pointer to sockmap to update
- *     @key: key to insert/update sock in map
- *     @flags: same flags as map update elem
+ *	@skops: pointer to bpf_sock_ops
+ *	@map: pointer to sockmap to update
+ *	@key: key to insert/update sock in map
+ *	@flags: same flags as map update elem
  *
  * int bpf_xdp_adjust_meta(xdp_md, delta)
  *     Adjust the xdp_md.data_meta by delta
@@ -693,8 +719,17 @@
  *     Return : 0 on success or negative error code
  *
  * int bpf_override_return(pt_regs, rc)
- *     @pt_regs: pointer to struct pt_regs
- *     @rc: the return value to set
+ *	@pt_regs: pointer to struct pt_regs
+ *	@rc: the return value to set
+ *
+ * int bpf_msg_redirect_map(map, key, flags)
+ *     Redirect msg to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_PASS
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -756,7 +791,11 @@
 	FN(perf_prog_read_value),	\
 	FN(getsockopt),			\
 	FN(override_return),		\
-	FN(sock_ops_cb_flags_set),
+	FN(sock_ops_cb_flags_set),	\
+	FN(msg_redirect_map),		\
+	FN(msg_apply_bytes),		\
+	FN(msg_cork_bytes),		\
+	FN(msg_pull_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -799,8 +838,9 @@
 /* BPF_FUNC_skb_set_tunnel_key flags. */
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
+#define BPF_F_SEQ_NUMBER		(1ULL << 3)
 
-/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read and
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
  */
 #define BPF_F_INDEX_MASK		0xffffffffULL
@@ -838,12 +878,12 @@
 
 	/* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
 	__u32 family;
-	__u32 remote_ip4;       /* Stored in network byte order */
-	__u32 local_ip4;        /* Stored in network byte order */
-	__u32 remote_ip6[4];    /* Stored in network byte order */
-	__u32 local_ip6[4];     /* Stored in network byte order */
-	__u32 remote_port;      /* Stored in network byte order */
-	__u32 local_port;       /* stored in host byte order */
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
 	/* ... here. */
 
 	__u32 data_meta;
@@ -918,6 +958,14 @@
 	SK_PASS,
 };
 
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+	void *data;
+	void *data_end;
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
@@ -928,11 +976,11 @@
 	__u32 xlated_prog_len;
 	__aligned_u64 jited_prog_insns;
 	__aligned_u64 xlated_prog_insns;
-	__u64 load_time;        /* ns since boottime */
+	__u64 load_time;	/* ns since boottime */
 	__u32 created_by_uid;
 	__u32 nr_map_ids;
 	__aligned_u64 map_ids;
-	char  name[BPF_OBJ_NAME_LEN];
+	char name[BPF_OBJ_NAME_LEN];
 	__u32 ifindex;
 	__u64 netns_dev;
 	__u64 netns_ino;
@@ -961,8 +1009,8 @@
 	__u32 op;
 	union {
 		__u32 args[4];		/* Optionally passed to bpf program */
-		__u32 reply;		/* Returned by bpf program */
-		__u32 replylong[4];	/* Optionally returned by bpf prog */
+		__u32 reply;		/* Returned by bpf program	    */
+		__u32 replylong[4];	/* Optionally returned by bpf prog  */
 	};
 	__u32 family;
 	__u32 remote_ip4;	/* Stored in network byte order */
@@ -977,7 +1025,7 @@
 				 */
 	__u32 snd_cwnd;
 	__u32 srtt_us;		/* Averaged RTT << 3 in usecs */
-	__u32 bpf_sock_ops_cb_flags;    /* flags defined in uapi/linux/tcp.h */
+	__u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
 	__u32 state;
 	__u32 rtt_min;
 	__u32 snd_ssthresh;
@@ -1006,9 +1054,9 @@
 #define BPF_SOCK_OPS_RTO_CB_FLAG	(1<<0)
 #define BPF_SOCK_OPS_RETRANS_CB_FLAG	(1<<1)
 #define BPF_SOCK_OPS_STATE_CB_FLAG	(1<<2)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS	0x7	/* Mask of all currently
-						 * supported cb flags
-						 */
+#define BPF_SOCK_OPS_ALL_CB_FLAGS       0x7		/* Mask of all currently
+							 * supported cb flags
+							 */
 
 /* List of known BPF sock_ops operators.
  * New entries can only be added at the end
@@ -1091,12 +1139,12 @@
 	__u64 running;
 };
 
-#define BPF_DEVCG_ACC_MKNOD    (1ULL << 0)
-#define BPF_DEVCG_ACC_READ     (1ULL << 1)
-#define BPF_DEVCG_ACC_WRITE    (1ULL << 2)
+#define BPF_DEVCG_ACC_MKNOD	(1ULL << 0)
+#define BPF_DEVCG_ACC_READ	(1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE	(1ULL << 2)
 
-#define BPF_DEVCG_DEV_BLOCK    (1ULL << 0)
-#define BPF_DEVCG_DEV_CHAR     (1ULL << 1)
+#define BPF_DEVCG_DEV_BLOCK	(1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR	(1ULL << 1)
 
 struct bpf_cgroup_dev_ctx {
 	/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
@@ -1105,6 +1153,5 @@
 	__u32 minor;
 };
 
-
 #endif /* _UAPI__LINUX_BPF_H__ */
 )********"
diff --git a/src/cc/export/helpers.h b/src/cc/export/helpers.h
index cb72552..64bb895 100644
--- a/src/cc/export/helpers.h
+++ b/src/cc/export/helpers.h
@@ -329,7 +329,15 @@
 static int (*bpf_override_return)(void *pt_regs, unsigned long rc) =
   (void *) BPF_FUNC_override_return;
 static int (*bpf_sock_ops_cb_flags_set)(void *skops, int flags) =
-  (void *)BPF_FUNC_sock_ops_cb_flags_set;
+  (void *) BPF_FUNC_sock_ops_cb_flags_set;
+static int (*bpf_msg_redirect_map)(void *msg, void *map, u32 key, u64 flags) =
+  (void *) BPF_FUNC_msg_redirect_map;
+static int (*bpf_msg_apply_bytes)(void *msg, u32 bytes) =
+  (void *) BPF_FUNC_msg_apply_bytes;
+static int (*bpf_msg_cork_bytes)(void *msg, u32 bytes) =
+  (void *) BPF_FUNC_msg_cork_bytes;
+static int (*bpf_msg_pull_data)(void *msg, u32 start, u32 end, u64 flags) =
+  (void *) BPF_FUNC_msg_pull_data;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
index 730e4f6..a8eb7ee 100644
--- a/src/cc/libbpf.c
+++ b/src/cc/libbpf.c
@@ -149,6 +149,10 @@
   {"getsockopt", "4.15"},
   {"override_return", "4.16"},
   {"sock_ops_cb_flags_set", "4.16"},
+  {"msg_redirect_map", "4.16"},
+  {"msg_apply_bytes", "4.16"},
+  {"msg_cork_bytes", "4.16"},
+  {"msg_pull_data", "4.16"},
 };
 
 static uint64_t ptr_to_u64(void *ptr)
diff --git a/src/python/CMakeLists.txt b/src/python/CMakeLists.txt
index 151d1d6..28e46a7 100644
--- a/src/python/CMakeLists.txt
+++ b/src/python/CMakeLists.txt
@@ -11,19 +11,24 @@
   set(PYTHON_CMD "python")
 endif()
 
-set(PIP_INSTALLABLE "${CMAKE_CURRENT_BINARY_DIR}/dist/bcc-${REVISION}.tar.gz")
 configure_file(setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py @ONLY)
-# build the pip installable
-add_custom_command(OUTPUT ${PIP_INSTALLABLE}
-  COMMAND ${PYTHON_CMD} setup.py sdist
-  WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bcc/__init__.py ${CMAKE_CURRENT_BINARY_DIR}/setup.py
-  )
-add_custom_target(bcc_py ALL DEPENDS ${PIP_INSTALLABLE})
-
 if(EXISTS "/etc/debian_version")
   set(PYTHON_FLAGS "${PYTHON_FLAGS} --install-layout deb")
 endif()
-install(CODE "execute_process(COMMAND ${PYTHON_CMD} setup.py install -f ${PYTHON_FLAGS}
-  --prefix=${CMAKE_INSTALL_PREFIX} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})"
-  COMPONENT python)
+
+foreach(PY_CMD ${PYTHON_CMD})
+  string(REPLACE "/" "-" PY_CMD_ESCAPED ${PY_CMD})
+
+  set(PIP_INSTALLABLE "${CMAKE_CURRENT_BINARY_DIR}/dist-${PY_CMD_ESCAPED}/bcc-${REVISION}.tar.gz")
+  # build the pip installable
+  add_custom_command(OUTPUT ${PIP_INSTALLABLE}
+    COMMAND ${PY_CMD} setup.py sdist --dist-dir dist-${PY_CMD_ESCAPED}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bcc/__init__.py ${CMAKE_CURRENT_BINARY_DIR}/setup.py
+    )
+  add_custom_target(bcc_py_${PY_CMD_ESCAPED} ALL DEPENDS ${PIP_INSTALLABLE})
+
+  install(CODE "execute_process(COMMAND ${PY_CMD} setup.py install -f ${PYTHON_FLAGS}
+    --prefix=${CMAKE_INSTALL_PREFIX} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})"
+    COMPONENT python)
+endforeach()
diff --git a/tools/tcplife.py b/tools/tcplife.py
index 5a58830..560bb6f 100755
--- a/tools/tcplife.py
+++ b/tools/tcplife.py
@@ -6,8 +6,9 @@
 #
 # USAGE: tcplife [-h] [-C] [-S] [-p PID] [interval [count]]
 #
-# This uses the tcp:tcp_set_state tracepoint if it exists (added to
-# Linux 4.15), else it uses kernel dynamic tracing of tcp_set_state().
+# This uses the sock:inet_sock_set_state tracepoint if it exists (added to
+# Linux 4.16, and replacing the earlier tcp:tcp_set_state), else it uses
+# kernel dynamic tracing of tcp_set_state().
 #
 # While throughput counters are emitted, they are fetched in a low-overhead
 # manner: reading members of the tcp_info struct on TCP close. ie, we do not
@@ -110,9 +111,9 @@
 
 #
 # XXX: The following is temporary code for older kernels, Linux 4.14 and
-# older. It uses kprobes to instrument tcp_set_state(). On Linux 4.15 and
-# later, the tcp:tcp_set_state tracepoint should be used instead, as is
-# done by the code that follows this. In the distant future (2021?), this
+# older. It uses kprobes to instrument tcp_set_state(). On Linux 4.16 and
+# later, the sock:inet_sock_set_state tracepoint should be used instead, as
+# is done by the code that follows this. In the distant future (2021?), this
 # kprobe code can be removed. This is why there is so much code
 # duplication: to make removal easier.
 #
@@ -235,10 +236,13 @@
 """
 
 bpf_text_tracepoint = """
-TRACEPOINT_PROBE(tcp, tcp_set_state)
+TRACEPOINT_PROBE(sock, inet_sock_set_state)
 {
+    if (args->protocol != IPPROTO_TCP)
+        return 0;
+
     u32 pid = bpf_get_current_pid_tgid() >> 32;
-    // sk is mostly used as a UUID, once for skc_family, and two tcp stats:
+    // sk is mostly used as a UUID, and for two tcp stats:
     struct sock *sk = (struct sock *)args->skaddr;
 
     // lport is either used in a filter here, or later
@@ -310,10 +314,7 @@
     bpf_probe_read(&rx_b, sizeof(rx_b), &tp->bytes_received);
     bpf_probe_read(&tx_b, sizeof(tx_b), &tp->bytes_acked);
 
-    u16 family = 0;
-    bpf_probe_read(&family, sizeof(family), &sk->__sk_common.skc_family);
-
-    if (family == AF_INET) {
+    if (args->family == AF_INET) {
         struct ipv4_data_t data4 = {.span_us = delta_us,
             .rx_b = rx_b, .tx_b = tx_b};
         data4.ts_us = bpf_ktime_get_ns() / 1000;
@@ -354,7 +355,7 @@
 }
 """
 
-if (BPF.tracepoint_exists("tcp", "tcp_set_state")):
+if (BPF.tracepoint_exists("sock", "inet_sock_set_state")):
     bpf_text += bpf_text_tracepoint
 else:
     bpf_text += bpf_text_kprobe
diff --git a/tools/tcpstates.py b/tools/tcpstates.py
new file mode 100755
index 0000000..e0e05bf
--- /dev/null
+++ b/tools/tcpstates.py
@@ -0,0 +1,332 @@
+#!/usr/bin/python
+# @lint-avoid-python-3-compatibility-imports
+#
+# tcpstates   Trace the TCP session state changes with durations.
+#             For Linux, uses BCC, BPF. Embedded C.
+#
+# USAGE: tcpstates [-h] [-C] [-S] [interval [count]]
+#
+# This uses the sock:inet_sock_set_state tracepoint, added to Linux 4.16.
+# Linux 4.16 also adds more state transitions so that they can be traced.
+#
+# Copyright 2018 Netflix, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License")
+#
+# 20-Mar-2018   Brendan Gregg   Created this.
+
+from __future__ import print_function
+from bcc import BPF
+import argparse
+from socket import inet_ntop, AF_INET, AF_INET6
+from struct import pack
+import ctypes as ct
+from time import strftime
+
+# arguments
+examples = """examples:
+    ./tcpstates           # trace all TCP state changes
+    ./tcpstates -t        # include timestamp column
+    ./tcpstates -T        # include time column (HH:MM:SS)
+    ./tcpstates -w        # wider colums (fit IPv6)
+    ./tcpstates -stT      # csv output, with times & timestamps
+    ./tcpstates -L 80     # only trace local port 80
+    ./tcpstates -L 80,81  # only trace local ports 80 and 81
+    ./tcpstates -D 80     # only trace remote port 80
+"""
+parser = argparse.ArgumentParser(
+    description="Trace TCP session state changes and durations",
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    epilog=examples)
+parser.add_argument("-T", "--time", action="store_true",
+    help="include time column on output (HH:MM:SS)")
+parser.add_argument("-t", "--timestamp", action="store_true",
+    help="include timestamp on output (seconds)")
+parser.add_argument("-w", "--wide", action="store_true",
+    help="wide column output (fits IPv6 addresses)")
+parser.add_argument("-s", "--csv", action="store_true",
+    help="comma separated values output")
+parser.add_argument("-L", "--localport",
+    help="comma-separated list of local ports to trace.")
+parser.add_argument("-D", "--remoteport",
+    help="comma-separated list of remote ports to trace.")
+parser.add_argument("--ebpf", action="store_true",
+    help=argparse.SUPPRESS)
+args = parser.parse_args()
+debug = 0
+
+# define BPF program
+bpf_text = """
+#include <uapi/linux/ptrace.h>
+#define KBUILD_MODNAME "foo"
+#include <linux/tcp.h>
+#include <net/sock.h>
+#include <bcc/proto.h>
+
+BPF_HASH(last, struct sock *, u64);
+
+// separate data structs for ipv4 and ipv6
+struct ipv4_data_t {
+    u64 ts_us;
+    u64 skaddr;
+    u64 saddr;
+    u64 daddr;
+    u64 span_us;
+    u32 pid;
+    u32 ports;
+    u32 oldstate;
+    u32 newstate;
+    char task[TASK_COMM_LEN];
+};
+BPF_PERF_OUTPUT(ipv4_events);
+
+struct ipv6_data_t {
+    u64 ts_us;
+    u64 skaddr;
+    unsigned __int128 saddr;
+    unsigned __int128 daddr;
+    u64 span_us;
+    u32 pid;
+    u32 ports;
+    u32 oldstate;
+    u32 newstate;
+    char task[TASK_COMM_LEN];
+};
+BPF_PERF_OUTPUT(ipv6_events);
+
+struct id_t {
+    u32 pid;
+    char task[TASK_COMM_LEN];
+};
+
+TRACEPOINT_PROBE(sock, inet_sock_set_state)
+{
+    if (args->protocol != IPPROTO_TCP)
+        return 0;
+
+    u32 pid = bpf_get_current_pid_tgid() >> 32;
+    // sk is used as a UUID
+    struct sock *sk = (struct sock *)args->skaddr;
+
+    // lport is either used in a filter here, or later
+    u16 lport = args->sport;
+    FILTER_LPORT
+
+    // dport is either used in a filter here, or later
+    u16 dport = args->dport;
+    FILTER_DPORT
+
+    // calculate delta
+    u64 *tsp, delta_us;
+    tsp = last.lookup(&sk);
+    if (tsp == 0)
+        delta_us = 0;
+    else
+        delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
+
+    if (args->family == AF_INET) {
+        struct ipv4_data_t data4 = {
+            .span_us = delta_us,
+            .oldstate = args->oldstate, .newstate = args->newstate};
+        data4.skaddr = (u64)args->skaddr;
+        data4.ts_us = bpf_ktime_get_ns() / 1000;
+        bpf_probe_read(&data4.saddr, sizeof(u32), args->saddr);
+        bpf_probe_read(&data4.daddr, sizeof(u32), args->daddr);
+        // a workaround until data4 compiles with separate lport/dport
+        data4.ports = dport + ((0ULL + lport) << 32);
+        data4.pid = pid;
+
+        bpf_get_current_comm(&data4.task, sizeof(data4.task));
+        ipv4_events.perf_submit(args, &data4, sizeof(data4));
+
+    } else /* 6 */ {
+        struct ipv6_data_t data6 = {
+            .span_us = delta_us,
+            .oldstate = args->oldstate, .newstate = args->newstate};
+        data6.skaddr = (u64)args->skaddr;
+        data6.ts_us = bpf_ktime_get_ns() / 1000;
+        bpf_probe_read(&data6.saddr, sizeof(data6.saddr), args->saddr_v6);
+        bpf_probe_read(&data6.daddr, sizeof(data6.daddr), args->saddr_v6);
+        // a workaround until data6 compiles with separate lport/dport
+        data6.ports = dport + ((0ULL + lport) << 32);
+        data6.pid = pid;
+        bpf_get_current_comm(&data6.task, sizeof(data6.task));
+        ipv6_events.perf_submit(args, &data6, sizeof(data6));
+    }
+
+    u64 ts = bpf_ktime_get_ns();
+    last.update(&sk, &ts);
+
+    return 0;
+}
+"""
+
+if (not BPF.tracepoint_exists("sock", "inet_sock_set_state")):
+    print("ERROR: tracepoint sock:inet_sock_set_state missing "
+        "(added in Linux 4.16). Exiting")
+    exit()
+
+# code substitutions
+if args.remoteport:
+    dports = [int(dport) for dport in args.remoteport.split(',')]
+    dports_if = ' && '.join(['dport != %d' % dport for dport in dports])
+    bpf_text = bpf_text.replace('FILTER_DPORT',
+        'if (%s) { last.delete(&sk); return 0; }' % dports_if)
+if args.localport:
+    lports = [int(lport) for lport in args.localport.split(',')]
+    lports_if = ' && '.join(['lport != %d' % lport for lport in lports])
+    bpf_text = bpf_text.replace('FILTER_LPORT',
+        'if (%s) { last.delete(&sk); return 0; }' % lports_if)
+bpf_text = bpf_text.replace('FILTER_DPORT', '')
+bpf_text = bpf_text.replace('FILTER_LPORT', '')
+
+if debug or args.ebpf:
+    print(bpf_text)
+    if args.ebpf:
+        exit()
+
+# event data
+TASK_COMM_LEN = 16      # linux/sched.h
+
+class Data_ipv4(ct.Structure):
+    _fields_ = [
+        ("ts_us", ct.c_ulonglong),
+        ("skaddr", ct.c_ulonglong),
+        ("saddr", ct.c_ulonglong),
+        ("daddr", ct.c_ulonglong),
+        ("span_us", ct.c_ulonglong),
+        ("pid", ct.c_uint),
+        ("ports", ct.c_uint),
+        ("oldstate", ct.c_uint),
+        ("newstate", ct.c_uint),
+        ("task", ct.c_char * TASK_COMM_LEN)
+    ]
+
+class Data_ipv6(ct.Structure):
+    _fields_ = [
+        ("ts_us", ct.c_ulonglong),
+        ("skaddr", ct.c_ulonglong),
+        ("saddr", (ct.c_ulonglong * 2)),
+        ("daddr", (ct.c_ulonglong * 2)),
+        ("span_us", ct.c_ulonglong),
+        ("pid", ct.c_uint),
+        ("ports", ct.c_uint),
+        ("oldstate", ct.c_uint),
+        ("newstate", ct.c_uint),
+        ("task", ct.c_char * TASK_COMM_LEN)
+    ]
+
+#
+# Setup output formats
+#
+# Don't change the default output (next 2 lines): this fits in 80 chars. I
+# know it doesn't have NS or UIDs etc. I know. If you really, really, really
+# need to add columns, columns that solve real actual problems, I'd start by
+# adding an extended mode (-x) to included those columns.
+#
+header_string = "%-16s %-5s %-10.10s %s%-15s %-5s %-15s %-5s %-11s -> %-11s %s"
+format_string = ("%-16x %-5d %-10.10s %s%-15s %-5d %-15s %-5d %-11s " +
+    "-> %-11s %.3f")
+if args.wide:
+    header_string = ("%-16s %-5s %-16.16s %-2s %-26s %-5s %-26s %-5s %-11s " +
+        "-> %-11s %s")
+    format_string = ("%-16x %-5d %-16.16s %-2s %-26s %-5s %-26s %-5d %-11s " +
+        "-> %-11s %.3f")
+if args.csv:
+    header_string = "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s"
+    format_string = "%x,%d,%s,%s,%s,%s,%s,%d,%s,%s,%.3f"
+
+def tcpstate2str(state):
+    # from include/net/tcp_states.h:
+    tcpstate = {
+        1: "ESTABLISHED",
+        2: "SYN_SENT",
+        3: "SYN_RECV",
+        4: "FIN_WAIT1",
+        5: "FIN_WAIT2",
+        6: "TIME_WAIT",
+        7: "CLOSE",
+        8: "CLOSE_WAIT",
+        9: "LAST_ACK",
+        10: "LISTEN",
+        11: "CLOSING",
+        12: "NEW_SYN_RECV",
+    }
+
+    if state in tcpstate:
+        return tcpstate[state]
+    else:
+        return str(state)
+
+# process event
+def print_ipv4_event(cpu, data, size):
+    event = ct.cast(data, ct.POINTER(Data_ipv4)).contents
+    global start_ts
+    if args.time:
+        if args.csv:
+            print("%s," % strftime("%H:%M:%S"), end="")
+        else:
+            print("%-8s " % strftime("%H:%M:%S"), end="")
+    if args.timestamp:
+        if start_ts == 0:
+            start_ts = event.ts_us
+        delta_s = (float(event.ts_us) - start_ts) / 1000000
+        if args.csv:
+            print("%.6f," % delta_s, end="")
+        else:
+            print("%-9.6f " % delta_s, end="")
+    print(format_string % (event.skaddr, event.pid, event.task.decode(),
+        "4" if args.wide or args.csv else "",
+        inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 32,
+        inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffffffff,
+        tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
+        float(event.span_us) / 1000))
+
+def print_ipv6_event(cpu, data, size):
+    event = ct.cast(data, ct.POINTER(Data_ipv6)).contents
+    global start_ts
+    if args.time:
+        if args.csv:
+            print("%s," % strftime("%H:%M:%S"), end="")
+        else:
+            print("%-8s " % strftime("%H:%M:%S"), end="")
+    if args.timestamp:
+        if start_ts == 0:
+            start_ts = event.ts_us
+        delta_s = (float(event.ts_us) - start_ts) / 1000000
+        if args.csv:
+            print("%.6f," % delta_s, end="")
+        else:
+            print("%-9.6f " % delta_s, end="")
+    print(format_string % (event.skaddr, event.pid, event.task.decode(),
+        "6" if args.wide or args.csv else "",
+        inet_ntop(AF_INET6, event.saddr), event.ports >> 32,
+        inet_ntop(AF_INET6, event.daddr), event.ports & 0xffffffff,
+        tcpstate2str(event.oldstate), tcpstate2str(event.newstate),
+        float(event.span_us) / 1000))
+
+# initialize BPF
+b = BPF(text=bpf_text)
+
+# header
+if args.time:
+    if args.csv:
+        print("%s," % ("TIME"), end="")
+    else:
+        print("%-8s " % ("TIME"), end="")
+if args.timestamp:
+    if args.csv:
+        print("%s," % ("TIME(s)"), end="")
+    else:
+        print("%-9s " % ("TIME(s)"), end="")
+print(header_string % ("SKADDR", "C-PID", "C-COMM",
+    "IP" if args.wide or args.csv else "",
+    "LADDR", "LPORT", "RADDR", "RPORT",
+    "OLDSTATE", "NEWSTATE", "MS"))
+
+start_ts = 0
+
+# read events
+b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64)
+b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64)
+while 1:
+    b.perf_buffer_poll()
diff --git a/tools/tcpstates_example.txt b/tools/tcpstates_example.txt
new file mode 100644
index 0000000..aca857a
--- /dev/null
+++ b/tools/tcpstates_example.txt
@@ -0,0 +1,52 @@
+Demonstrations of tcpstates, the Linux BPF/bcc version.
+
+
+tcpstates prints TCP state change information, including the duration in each
+state as milliseconds. For example, a single TCP session:
+
+# tcpstates
+SKADDR           C-PID C-COMM     LADDR           LPORT RADDR           RPORT OLDSTATE    -> NEWSTATE    MS
+ffff9fd7e8192000 22384 curl       100.66.100.185  0     52.33.159.26    80    CLOSE       -> SYN_SENT    0.000
+ffff9fd7e8192000 0     swapper/5  100.66.100.185  63446 52.33.159.26    80    SYN_SENT    -> ESTABLISHED 1.373
+ffff9fd7e8192000 22384 curl       100.66.100.185  63446 52.33.159.26    80    ESTABLISHED -> FIN_WAIT1   176.042
+ffff9fd7e8192000 0     swapper/5  100.66.100.185  63446 52.33.159.26    80    FIN_WAIT1   -> FIN_WAIT2   0.536
+ffff9fd7e8192000 0     swapper/5  100.66.100.185  63446 52.33.159.26    80    FIN_WAIT2   -> CLOSE       0.006
+^C
+
+This showed that the most time was spent in the ESTABLISHED state (which then
+transitioned to FIN_WAIT1), which was 176.042 milliseconds.
+
+The first column is the socked address, as the output may include lines from
+different sessions interleaved. The next two columns show the current on-CPU
+process ID and command name: these may show the process that owns the TCP
+session, depending on whether the state change executes synchronously in
+process context. If that's not the case, they may show kernel details.
+
+
+USAGE:
+
+# tcpstates -h
+usage: tcpstates [-h] [-T] [-t] [-w] [-s] [-L LOCALPORT] [-D REMOTEPORT]
+
+Trace TCP session state changes and durations
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -T, --time            include time column on output (HH:MM:SS)
+  -t, --timestamp       include timestamp on output (seconds)
+  -w, --wide            wide column output (fits IPv6 addresses)
+  -s, --csv             comma separated values output
+  -L LOCALPORT, --localport LOCALPORT
+                        comma-separated list of local ports to trace.
+  -D REMOTEPORT, --remoteport REMOTEPORT
+                        comma-separated list of remote ports to trace.
+
+examples:
+    ./tcpstates           # trace all TCP state changes
+    ./tcpstates -t        # include timestamp column
+    ./tcpstates -T        # include time column (HH:MM:SS)
+    ./tcpstates -w        # wider colums (fit IPv6)
+    ./tcpstates -stT      # csv output, with times & timestamps
+    ./tcpstates -L 80     # only trace local port 80
+    ./tcpstates -L 80,81  # only trace local ports 80 and 81
+    ./tcpstates -D 80     # only trace remote port 80