Use c program as the eBPF kernel program

With proper Clang and LLVM support. The eBPF program can be compiled and
generated from simplified C program. The original c program is
bpf_ingress.c and bpf_egress.c and the corresponding .o file the output
containing the BPF bytecode that need to be loaded into the kernel at
runtime.

Bug: 30950746
Test: run cts -m CtsNetTestCases -t android.net.cts.TrafficStatsTest
Change-Id: Iedff82bf759c979bbe8e698570eabba436b56d80
diff --git a/bpfloader/Android.bp b/bpfloader/Android.bp
index 800a797..125f94b 100644
--- a/bpfloader/Android.bp
+++ b/bpfloader/Android.bp
@@ -37,8 +37,21 @@
     ],
     srcs: [
         "BpfLoader.cpp",
-        "BpfProgSets.cpp",
     ],
 
+    required: [
+        "cgroup_bpf_ingress_prog",
+        "cgroup_bpf_egress_prog",
+    ],
+}
+
+cc_prebuilt_binary {
+    name: "cgroup_bpf_ingress_prog",
+    srcs: [ "bpf_ingress.o" ],
+}
+
+cc_prebuilt_binary {
+    name: "cgroup_bpf_egress_prog",
+    srcs: [ "bpf_egress.o" ],
 }
 
diff --git a/bpfloader/BpfLoader.cpp b/bpfloader/BpfLoader.cpp
index f663154..726e632 100644
--- a/bpfloader/BpfLoader.cpp
+++ b/bpfloader/BpfLoader.cpp
@@ -14,26 +14,193 @@
  * limitations under the License.
  */
 
+#include <arpa/inet.h>
+#include <elf.h>
 #include <error.h>
+#include <fcntl.h>
+#include <inttypes.h>
 #include <linux/bpf.h>
 #include <linux/unistd.h>
 #include <net/if.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 
 #include <android-base/stringprintf.h>
 #include <android-base/unique_fd.h>
 
 #include <netdutils/Misc.h>
-#include "BpfProgSets.h"
+#include <netdutils/Slice.h>
 #include "bpf/BpfUtils.h"
 
+#include "bpf_shared.h"
+
 using android::base::unique_fd;
+using android::netdutils::Slice;
+
+#define INGRESS_PROG "/system/bin/cgroup_bpf_ingress_prog"
+#define EGRESS_PROG "/system/bin/cgroup_bpf_egress_prog"
+#define MAP_LD_CMD_HEAD 0x18
+
+#define FAIL(str)      \
+    do {               \
+        perror((str)); \
+        return -1;     \
+    } while (0)
+
+// The BPF instruction bytes that we need to replace. x is a placeholder (e.g., COOKIE_TAG_MAP).
+#define MAP_SEARCH_PATTERN(x)             \
+    {                                     \
+        0x18, 0x01, 0x00, 0x00,           \
+        (x)[0], (x)[1], (x)[2], (x)[3],   \
+        0x00, 0x00, 0x00, 0x00,           \
+        (x)[4], (x)[5], (x)[6], (x)[7]    \
+    }
+
+// The bytes we'll replace them with. x is the actual fd number for the map at runtime.
+// The second byte is changed from 0x01 to 0x11 since 0x11 is the special command used
+// for bpf map fd loading. The original 0x01 is only a normal load command.
+#define MAP_REPLACE_PATTERN(x)            \
+    {                                     \
+        0x18, 0x11, 0x00, 0x00,           \
+        (x)[0], (x)[1], (x)[2], (x)[3],   \
+        0x00, 0x00, 0x00, 0x00,           \
+        (x)[4], (x)[5], (x)[6], (x)[7]    \
+    }
+
+#define MAP_CMD_SIZE 16
+#define LOG_BUF_SIZE 65536
 
 namespace android {
 namespace bpf {
 
+void makeFdReplacePattern(uint64_t code, uint64_t mapFd, char* pattern, char* cmd) {
+    char mapCode[sizeof(uint64_t)];
+    char mapCmd[sizeof(uint64_t)];
+    // The byte order is little endian for arm devices.
+    for (uint32_t i = 0; i < sizeof(uint64_t); i++) {
+        mapCode[i] = (code >> (i * 8)) & 0xFF;
+        mapCmd[i] = (mapFd >> (i * 8)) & 0xFF;
+    }
+
+    char tmpPattern[] = MAP_SEARCH_PATTERN(mapCode);
+    memcpy(pattern, tmpPattern, MAP_CMD_SIZE);
+    char tmpCmd[] = MAP_REPLACE_PATTERN(mapCmd);
+    memcpy(cmd, tmpCmd, MAP_CMD_SIZE);
+}
+
+int loadProg(const char* path, int cookieTagMap, int uidStatsMap, int tagStatsMap,
+             int uidCounterSetMap) {
+    int fd = open(path, O_RDONLY);
+    if (fd == -1) {
+        fprintf(stderr, "Failed to open %s program: %s", path, strerror(errno));
+        return -1;
+    }
+
+    struct stat stat;
+    if (fstat(fd, &stat)) FAIL("Fail to get file size");
+
+    off_t fileLen = stat.st_size;
+    char* baseAddr = (char*)mmap(NULL, fileLen, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (baseAddr == MAP_FAILED) FAIL("Failed to map the program into memory");
+
+    if ((uint32_t)fileLen < sizeof(Elf64_Ehdr)) FAIL("file size too small for Elf64_Ehdr");
+
+    Elf64_Ehdr* elf = (Elf64_Ehdr*)baseAddr;
+
+    // Find section names string table. This is the section whose index is e_shstrndx.
+    if (elf->e_shstrndx == SHN_UNDEF ||
+        elf->e_shoff + (elf->e_shstrndx + 1) * sizeof(Elf64_Shdr) > (uint32_t)fileLen) {
+        FAIL("cannot locate namesSection\n");
+    }
+
+    Elf64_Shdr* sections = (Elf64_Shdr*)(baseAddr + elf->e_shoff);
+
+    Elf64_Shdr* namesSection = sections + elf->e_shstrndx;
+
+    if (namesSection->sh_offset + namesSection->sh_size > (uint32_t)fileLen)
+        FAIL("namesSection out of bound\n");
+
+    const char* strTab = baseAddr + namesSection->sh_offset;
+    void* progSection = nullptr;
+    uint64_t progSize = 0;
+    for (int i = 0; i < elf->e_shnum; i++) {
+        Elf64_Shdr* section = sections + i;
+        if (((char*)section - baseAddr) + sizeof(Elf64_Shdr) > (uint32_t)fileLen) {
+            FAIL("next section is out of bound\n");
+        }
+
+        if (!strcmp(strTab + section->sh_name, BPF_PROG_SEC_NAME)) {
+            progSection = baseAddr + section->sh_offset;
+            progSize = (uint64_t)section->sh_size;
+            break;
+        }
+    }
+
+    if (!progSection) FAIL("program section not found");
+    if ((char*)progSection - baseAddr + progSize > (uint32_t)fileLen)
+        FAIL("programSection out of bound\n");
+
+    char* prog = new char[progSize]();
+    memcpy(prog, progSection, progSize);
+
+    char cookieTagMapFdPattern[MAP_CMD_SIZE];
+    char cookieTagMapFdLoadByte[MAP_CMD_SIZE];
+    makeFdReplacePattern(COOKIE_TAG_MAP, cookieTagMap, cookieTagMapFdPattern,
+                         cookieTagMapFdLoadByte);
+
+    char uidCounterSetMapFdPattern[MAP_CMD_SIZE];
+    char uidCounterSetMapFdLoadByte[MAP_CMD_SIZE];
+    makeFdReplacePattern(UID_COUNTERSET_MAP, uidCounterSetMap, uidCounterSetMapFdPattern,
+                         uidCounterSetMapFdLoadByte);
+
+    char tagStatsMapFdPattern[MAP_CMD_SIZE];
+    char tagStatsMapFdLoadByte[MAP_CMD_SIZE];
+    makeFdReplacePattern(TAG_STATS_MAP, tagStatsMap, tagStatsMapFdPattern, tagStatsMapFdLoadByte);
+
+    char uidStatsMapFdPattern[MAP_CMD_SIZE];
+    char uidStatsMapFdLoadByte[MAP_CMD_SIZE];
+    makeFdReplacePattern(UID_STATS_MAP, uidStatsMap, uidStatsMapFdPattern, uidStatsMapFdLoadByte);
+
+    char* mapHead = prog;
+    while ((uint64_t)(mapHead - prog + MAP_CMD_SIZE) < progSize) {
+        // Scan the program, examining all possible places that might be the start of a map load
+        // operation (i.e., all byes of value MAP_LD_CMD_HEAD).
+        //
+        // In each of these places, check whether it is the start of one of the patterns we want to
+        // replace, and if so, replace it.
+        mapHead = (char*)memchr(mapHead, MAP_LD_CMD_HEAD, progSize);
+        if (!mapHead) break;
+        if ((uint64_t)(mapHead - prog + MAP_CMD_SIZE) < progSize) {
+            if (!memcmp(mapHead, cookieTagMapFdPattern, MAP_CMD_SIZE)) {
+                memcpy(mapHead, cookieTagMapFdLoadByte, MAP_CMD_SIZE);
+                mapHead += MAP_CMD_SIZE;
+            } else if (!memcmp(mapHead, uidCounterSetMapFdPattern, MAP_CMD_SIZE)) {
+                memcpy(mapHead, uidCounterSetMapFdLoadByte, MAP_CMD_SIZE);
+                mapHead += MAP_CMD_SIZE;
+            } else if (!memcmp(mapHead, tagStatsMapFdPattern, MAP_CMD_SIZE)) {
+                memcpy(mapHead, tagStatsMapFdLoadByte, MAP_CMD_SIZE);
+                mapHead += MAP_CMD_SIZE;
+            } else if (!memcmp(mapHead, uidStatsMapFdPattern, MAP_CMD_SIZE)) {
+                memcpy(mapHead, uidStatsMapFdLoadByte, MAP_CMD_SIZE);
+                mapHead += MAP_CMD_SIZE;
+            }
+        }
+        mapHead++;
+    }
+    Slice insns = Slice(prog, progSize);
+    char bpf_log_buf[LOG_BUF_SIZE];
+    Slice bpfLog = Slice(bpf_log_buf, sizeof(bpf_log_buf));
+    return bpfProgLoad(BPF_PROG_TYPE_CGROUP_SKB, insns, "Apache 2.0", 0, bpfLog);
+}
+
 int loadAndAttachProgram(bpf_attach_type type, const char* path, const char* name,
                          const unique_fd& cookieTagMap, const unique_fd& uidCounterSetMap,
                          const unique_fd& uidStatsMap, const unique_fd& tagStatsMap) {
@@ -45,11 +212,11 @@
 
     unique_fd fd;
     if (type == BPF_CGROUP_INET_EGRESS) {
-        fd.reset(loadEgressProg(cookieTagMap.get(), uidStatsMap.get(), tagStatsMap.get(),
-                                uidCounterSetMap.get()));
+        fd.reset(loadProg(INGRESS_PROG, cookieTagMap.get(), uidStatsMap.get(), tagStatsMap.get(),
+                          uidCounterSetMap.get()));
     } else {
-        fd.reset(loadIngressProg(cookieTagMap.get(), uidStatsMap.get(), tagStatsMap.get(),
-                                 uidCounterSetMap.get()));
+        fd.reset(loadProg(EGRESS_PROG, cookieTagMap.get(), uidStatsMap.get(), tagStatsMap.get(),
+                          uidCounterSetMap.get()));
     }
 
     if (fd < 0) {
diff --git a/bpfloader/BpfProgSets.cpp b/bpfloader/BpfProgSets.cpp
deleted file mode 100644
index 195d7bd..0000000
--- a/bpfloader/BpfProgSets.cpp
+++ /dev/null
@@ -1,604 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <arpa/inet.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/in.h>
-#include <linux/unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/socket.h>
-
-#include "BpfProgSets.h"
-#include "bpf/BpfUtils.h"
-#include "netdutils/Slice.h"
-
-using android::netdutils::Slice;
-
-namespace android {
-namespace bpf {
-
-int loadIngressProg(int cookieTagMap, int uidStatsMap, int tagStatsMap, int uidCounterSetMap) {
-    struct bpf_insn ingressProg[] = {
-        /*
-         * Save sk_buff for future usage. value stored in R6 to R10 will
-         * not be reset after a bpf helper function call.
-         */
-        BPF_INS_BLK(REG_MOV64, BPF_REG_6, BPF_REG_1, 0, 0),
-        /*
-         * pc1: BPF_FUNC_get_socket_cookie takes one parameter,
-         * R1: sk_buff
-         */
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_socket_cookie),
-        /* pc2-4: save &socketCookie to r7 for future usage*/
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_DW), BPF_REG_10, BPF_REG_0, -8, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_7, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_7, 0, 0, -8),
-        /*
-         * pc5-8: set up the registers for BPF_FUNC_map_lookup_elem,
-         * it takes two parameters (R1: map_fd,  R2: &socket_cookie)
-         */
-        LOAD_MAP_FD(BPF_REG_1, (uint32_t)cookieTagMap),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        /*
-         * pc9. if r0 != 0x0, go to pc+14, since we have the cookie
-         * stored already
-         * Otherwise do pc10-22 to setup a new data entry.
-         */
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 1, 0),  // 10
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 81, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_8, BPF_REG_0, 0, 0), LOAD_MAP_FD(BPF_REG_7, tagStatsMap),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_2, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, uid)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_2, -132, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_2, 0, 0, -132),
-        LOAD_MAP_FD(BPF_REG_1, uidCounterSetMap),  // 20
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 2, 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_W), BPF_REG_10, 0,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, counterSet)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 2, 0),
-        BPF_INS_BLK(MEM_LD(BPF_B), BPF_REG_1, BPF_REG_0, 0, 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_1,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, counterSet)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_2, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, ifindex)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_3, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, uid)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_4, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, tag)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_2,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, ifaceIndex)), 0),  // 30
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_3,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, uid)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_4,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, tag)), 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_9, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_9, 0, 0, -32),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 23, 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxTcpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxTcpPackets)), 0),  // 40
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxUdpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxUdpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txTcpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txTcpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txUdpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txUdpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxOtherPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxOtherBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txOtherBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txOtherPackets)),
-                    0),  // 50
-        /*
-         * add new map entry using BPF_FUNC_map_update_elem, it takes
-         * 4 parameters (R1: map_fd, R2: &socket_cookie, R3: &stats,
-         * R4: flags)
-         */
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -128),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 1, 0),  // 60
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 56, 0),
-        /*
-         * pc24-30 update the packet info to a exist data entry, it can
-         * be done by directly write to pointers instead of using
-         * BPF_FUNC_map_update_elem helper function
-         */
-        BPF_INS_BLK(REG_MOV64, BPF_REG_9, BPF_REG_0, 0, 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_7, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, len)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_1, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, protocol)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_1, 0, 7, htons(ETH_P_IP)),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_6, 0, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_2, 0, 0, IPV4_TRANSPORT_PROTOCOL_OFFSET),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -133),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 1),  // 70
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 7, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_1, 0, 15, htons(ETH_P_IPV6)),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_6, 0, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_2, 0, 0, IPV6_TRANSPORT_PROTOCOL_OFFSET),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -133),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 1),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_5, 0, 0, 1),  // 80
-        BPF_INS_BLK(MEM_LD(BPF_B), BPF_REG_0, BPF_REG_10, -133, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 3, IPPROTO_TCP),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_5,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxTcpPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxTcpBytes)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 6, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 3, IPPROTO_UDP),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_5,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxUdpPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxUdpBytes)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 2, 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_5,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxOtherPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxOtherBytes)), 0),  // 90
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 25, 0),
-
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_6, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_socket_uid),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_0,
-                    -16 + static_cast<__s16>(offsetof(struct UidTag, uid)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_W), BPF_REG_10, 0,
-                    -16 + static_cast<__s16>(offsetof(struct UidTag, tag)), 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_8, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_8, 0, 0, -16),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_2, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, uid)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_2, -132, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_10, 0, 0),  // 100
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_2, 0, 0, -132),
-        LOAD_MAP_FD(BPF_REG_1, uidCounterSetMap),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 2, 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_W), BPF_REG_10, 0,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, counterSet)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 2, 0),
-        BPF_INS_BLK(MEM_LD(BPF_B), BPF_REG_1, BPF_REG_0, 0, 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_1,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, counterSet)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_2, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, ifindex)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_3, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, uid)), 0),  // 110
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_4, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, tag)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_2,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, ifaceIndex)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_3,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, uid)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_4,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, tag)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 1, 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_W), BPF_REG_10, 0,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, tag)), 0),
-        LOAD_MAP_FD(BPF_REG_7, uidStatsMap), BPF_INS_BLK(REG_MOV64, BPF_REG_9, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_9, 0, 0, -32),  // 120
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 24, 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxTcpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxTcpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxUdpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxUdpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txTcpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txTcpBytes)), 0),  // 130
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txUdpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txUdpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxOtherPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxOtherBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txOtherBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txOtherPackets)), 0),
-        /*
-         * add new map entry using BPF_FUNC_map_update_elem, it takes
-         * 4 parameters (R1: map_fd, R2: &socket_cookie, R3: &stats,
-         * R4: flags)
-         */
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -128),  // 140
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 2, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_0, 0, 0, 1), BPF_INS_BLK(PROG_EXIT, 0, 0, 0, 0),
-        /*
-         * pc24-30 update the packet info to a exist data entry, it can
-         * be done by directly write to pointers instead of using
-         * BPF_FUNC_map_update_elem helper function
-         */
-        BPF_INS_BLK(REG_MOV64, BPF_REG_9, BPF_REG_0, 0, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_7, 0, 0, 1),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_8, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, len)), 0),  // 150
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_1, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, protocol)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_1, 0, 7, htons(ETH_P_IP)),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_6, 0, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_2, 0, 0, IPV4_TRANSPORT_PROTOCOL_OFFSET),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -133),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 1),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 7, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_1, 0, 15, htons(ETH_P_IPV6)),  // 160
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_6, 0, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_2, 0, 0, IPV6_TRANSPORT_PROTOCOL_OFFSET),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -133),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 1),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
-        BPF_INS_BLK(MEM_LD(BPF_B), BPF_REG_0, BPF_REG_10, -133, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 3, IPPROTO_TCP),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxTcpPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxTcpBytes)), 0),  // 170
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 6, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 3, IPPROTO_UDP),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxUdpPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxUdpBytes)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 2, 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxOtherPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct StatsValue, rxOtherBytes)), 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_0, 0, 0, 1), BPF_INS_BLK(PROG_EXIT, 0, 0, 0, 0),  // 179
-    };
-    Slice ingressInsn = Slice(ingressProg, sizeof(ingressProg));
-    char bpf_log_buf[LOG_BUF_SIZE];
-    Slice bpfLog = Slice(bpf_log_buf, sizeof(bpf_log_buf));
-
-    return bpfProgLoad(BPF_PROG_TYPE_CGROUP_SKB, ingressInsn, "Apache", 0, bpfLog);
-}
-
-int loadEgressProg(int cookieTagMap, int uidStatsMap, int tagStatsMap, int uidCounterSetMap) {
-    struct bpf_insn egressProg[] = {
-        /*
-         * Save sk_buff for future usage. value stored in R6 to R10 will
-         * not be reset after a bpf helper function call.
-         */
-        BPF_INS_BLK(REG_MOV64, BPF_REG_6, BPF_REG_1, 0, 0),
-        /*
-         * pc1: BPF_FUNC_get_socket_cookie takes one parameter,
-         * R1: sk_buff
-         */
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_socket_cookie),
-        /* pc2-4: save &socketCookie to r7 for future usage*/
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_DW), BPF_REG_10, BPF_REG_0, -8, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_7, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_7, 0, 0, -8),
-        /*
-         * pc5-8: set up the registers for BPF_FUNC_map_lookup_elem,
-         * it takes two parameters (R1: map_fd,  R2: &socket_cookie)
-         */
-        LOAD_MAP_FD(BPF_REG_1, (uint32_t)cookieTagMap),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        /*
-         * pc9. if r0 != 0x0, go to pc+14, since we have the cookie
-         * stored already
-         * Otherwise do pc10-22 to setup a new data entry.
-         */
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 1, 0),  // 10
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 81, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_8, BPF_REG_0, 0, 0), LOAD_MAP_FD(BPF_REG_7, tagStatsMap),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_2, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, uid)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_2, -132, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_2, 0, 0, -132),
-        LOAD_MAP_FD(BPF_REG_1, uidCounterSetMap),  // 20
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 2, 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_W), BPF_REG_10, 0,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, counterSet)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 2, 0),
-        BPF_INS_BLK(MEM_LD(BPF_B), BPF_REG_1, BPF_REG_0, 0, 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_1,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, counterSet)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_2, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, ifindex)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_3, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, uid)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_4, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, tag)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_2,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, ifaceIndex)), 0),  // 30
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_3,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, uid)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_4,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, tag)), 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_9, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_9, 0, 0, -32),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 23, 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxTcpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxTcpPackets)), 0),  // 40
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxUdpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxUdpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txTcpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txTcpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txUdpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txUdpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxOtherPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxOtherBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txOtherBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txOtherPackets)),
-                    0),  // 50
-        /*
-         * add new map entry using BPF_FUNC_map_update_elem, it takes
-         * 4 parameters (R1: map_fd, R2: &socket_cookie, R3: &stats,
-         * R4: flags)
-         */
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -128),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 1, 0),  // 60
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 56, 0),
-        /*
-         * pc24-30 update the packet info to a exist data entry, it can
-         * be done by directly write to pointers instead of using
-         * BPF_FUNC_map_update_elem helper function
-         */
-        BPF_INS_BLK(REG_MOV64, BPF_REG_9, BPF_REG_0, 0, 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_7, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, len)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_1, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, protocol)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_1, 0, 7, htons(ETH_P_IP)),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_6, 0, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_2, 0, 0, IPV4_TRANSPORT_PROTOCOL_OFFSET),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -133),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 1),  // 70
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 7, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_1, 0, 15, htons(ETH_P_IPV6)),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_6, 0, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_2, 0, 0, IPV6_TRANSPORT_PROTOCOL_OFFSET),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -133),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 1),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_5, 0, 0, 1),  // 80
-        BPF_INS_BLK(MEM_LD(BPF_B), BPF_REG_0, BPF_REG_10, -133, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 3, IPPROTO_TCP),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_5,
-                    static_cast<__s16>(offsetof(struct StatsValue, txTcpPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, txTcpBytes)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 6, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 3, IPPROTO_UDP),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_5,
-                    static_cast<__s16>(offsetof(struct StatsValue, txUdpPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, txUdpBytes)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 2, 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_5,
-                    static_cast<__s16>(offsetof(struct StatsValue, txOtherPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, txOtherBytes)), 0),  // 90
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 25, 0),
-
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_6, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_socket_uid),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_0,
-                    -16 + static_cast<__s16>(offsetof(struct UidTag, uid)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_W), BPF_REG_10, 0,
-                    -16 + static_cast<__s16>(offsetof(struct UidTag, tag)), 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_8, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_8, 0, 0, -16),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_2, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, uid)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_2, -132, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_10, 0, 0),  // 100
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_2, 0, 0, -132),
-        LOAD_MAP_FD(BPF_REG_1, uidCounterSetMap),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 2, 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_W), BPF_REG_10, 0,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, counterSet)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 2, 0),
-        BPF_INS_BLK(MEM_LD(BPF_B), BPF_REG_1, BPF_REG_0, 0, 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_1,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, counterSet)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_2, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, ifindex)), 0),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_3, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, uid)), 0),  // 110
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_4, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct UidTag, tag)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_2,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, ifaceIndex)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_3,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, uid)), 0),
-        BPF_INS_BLK(MEM_SET_BY_REG(BPF_W), BPF_REG_10, BPF_REG_4,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, tag)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 1, 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_W), BPF_REG_10, 0,
-                    -32 + static_cast<__s16>(offsetof(struct StatsKey, tag)), 0),
-        LOAD_MAP_FD(BPF_REG_7, uidStatsMap), BPF_INS_BLK(REG_MOV64, BPF_REG_9, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_9, 0, 0, -32),  // 120
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 24, 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxTcpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxTcpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxUdpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxUdpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txTcpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txTcpBytes)), 0),  // 130
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txUdpPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txUdpBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxOtherPackets)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, rxOtherBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txOtherBytes)), 0),
-        BPF_INS_BLK(MEM_SET_BY_VAL(BPF_DW), BPF_REG_10, 0,
-                    -128 + static_cast<__s16>(offsetof(struct StatsValue, txOtherPackets)), 0),
-        /*
-         * add new map entry using BPF_FUNC_map_update_elem, it takes
-         * 4 parameters (R1: map_fd, R2: &socket_cookie, R3: &stats,
-         * R4: flags)
-         */
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -128),  // 140
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_7, 0, 0),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_2, BPF_REG_9, 0, 0),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 2, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_0, 0, 0, 1), BPF_INS_BLK(PROG_EXIT, 0, 0, 0, 0),
-        /*
-         * pc24-30 update the packet info to a exist data entry, it can
-         * be done by directly write to pointers instead of using
-         * BPF_FUNC_map_update_elem helper function
-         */
-        BPF_INS_BLK(REG_MOV64, BPF_REG_9, BPF_REG_0, 0, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_7, 0, 0, 1),
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_8, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, len)), 0),  // 150
-        BPF_INS_BLK(MEM_LD(BPF_W), BPF_REG_1, BPF_REG_6,
-                    static_cast<__s16>(offsetof(struct __sk_buff, protocol)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_1, 0, 7, htons(ETH_P_IP)),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_6, 0, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_2, 0, 0, IPV4_TRANSPORT_PROTOCOL_OFFSET),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -133),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 1),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 7, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_1, 0, 15, htons(ETH_P_IPV6)),  // 160
-        BPF_INS_BLK(REG_MOV64, BPF_REG_1, BPF_REG_6, 0, 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_2, 0, 0, IPV6_TRANSPORT_PROTOCOL_OFFSET),
-        BPF_INS_BLK(REG_MOV64, BPF_REG_3, BPF_REG_10, 0, 0),
-        BPF_INS_BLK(VAL_ALU64(BPF_ADD), BPF_REG_3, 0, 0, -133),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_4, 0, 0, 1),
-        BPF_INS_BLK(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
-        BPF_INS_BLK(MEM_LD(BPF_B), BPF_REG_0, BPF_REG_10, -133, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 3, IPPROTO_TCP),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, txTcpPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct StatsValue, txTcpBytes)), 0),  // 170
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 6, 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JNE), BPF_REG_0, 0, 3, IPPROTO_UDP),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, txUdpPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct StatsValue, txUdpBytes)), 0),
-        BPF_INS_BLK(VAL_ALU_JMP(BPF_JA), 0, 0, 2, 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_7,
-                    static_cast<__s16>(offsetof(struct StatsValue, txOtherPackets)), 0),
-        BPF_INS_BLK(REG_ATOMIC_ADD(BPF_DW), BPF_REG_9, BPF_REG_8,
-                    static_cast<__s16>(offsetof(struct StatsValue, txOtherBytes)), 0),
-        BPF_INS_BLK(VAL_MOV64, BPF_REG_0, 0, 0, 1), BPF_INS_BLK(PROG_EXIT, 0, 0, 0, 0),  // 179
-    };
-
-    Slice egressInsn = Slice(egressProg, sizeof(egressProg));
-    char bpf_log_buf[LOG_BUF_SIZE];
-    Slice bpfLog = Slice(bpf_log_buf, sizeof(bpf_log_buf));
-
-    return bpfProgLoad(BPF_PROG_TYPE_CGROUP_SKB, egressInsn, "Apache", 0, bpfLog);
-}
-
-}  // namespace bpf
-}  // namespace android
diff --git a/bpfloader/BpfProgSets.h b/bpfloader/BpfProgSets.h
deleted file mode 100644
index 702fa28..0000000
--- a/bpfloader/BpfProgSets.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-namespace android {
-namespace bpf {
-
-#define LOG_BUF_SIZE 65536
-
-constexpr const int IPV6_TRANSPORT_PROTOCOL_OFFSET = 6;
-constexpr const int IPV4_TRANSPORT_PROTOCOL_OFFSET = 9;
-
-int loadIngressProg(int cookieTagMap, int uidStatsMap, int tagStatsMap, int uidCounterSetMap);
-int loadEgressProg(int cookieTagMap, int uidStatsMap, int tagStatsMap, int uidCounterSetMap);
-
-}  // namespace bpf
-}  // namespace android
diff --git a/bpfloader/bpf_egress.c b/bpfloader/bpf_egress.c
new file mode 100644
index 0000000..5f577d0
--- /dev/null
+++ b/bpfloader/bpf_egress.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include "bpf_kern.h"
+#include "bpf_shared.h"
+
+ELF_SEC(BPF_PROG_SEC_NAME)
+int bpf_cgroup_egress(struct __sk_buff* skb) {
+    uint64_t cookie = get_socket_cookie(skb);
+    struct uid_tag* utag = find_map_entry(COOKIE_TAG_MAP, &cookie);
+    uint32_t uid, tag;
+    if (utag) {
+        uid = utag->uid;
+        tag = utag->tag;
+    } else {
+        uid = get_socket_uid(skb);
+        tag = 0;
+    }
+
+    struct stats_key key = {.uid = uid, .tag = tag, .counterSet = 0, .ifaceIndex = skb->ifindex};
+
+    uint32_t* counterSet;
+    counterSet = find_map_entry(UID_COUNTERSET_MAP, &uid);
+    if (counterSet) key.counterSet = *counterSet;
+
+    int ret;
+    if (tag) {
+        struct stats_value* tagValue;
+        tagValue = find_map_entry(TAG_STATS_MAP, &key);
+        if (!tagValue) {
+            struct stats_value newValue = {};
+            write_to_map_entry(TAG_STATS_MAP, &key, &newValue, BPF_NOEXIST);
+            tagValue = find_map_entry(TAG_STATS_MAP, &key);
+        }
+        if (tagValue) {
+            __sync_fetch_and_add(&tagValue->txPackets, 1);
+            __sync_fetch_and_add(&tagValue->txBytes, skb->len);
+        }
+    }
+
+    key.tag = 0;
+    struct stats_value* value;
+    value = find_map_entry(UID_STATS_MAP, &key);
+    if (!value) {
+        struct stats_value newValue = {};
+        write_to_map_entry(UID_STATS_MAP, &key, &newValue, BPF_NOEXIST);
+        value = find_map_entry(UID_STATS_MAP, &key);
+    }
+    if (value) {
+        __sync_fetch_and_add(&value->txPackets, 1);
+        __sync_fetch_and_add(&value->txBytes, skb->len);
+    }
+    return BPF_PASS;
+}
diff --git a/bpfloader/bpf_egress.o b/bpfloader/bpf_egress.o
new file mode 100644
index 0000000..b8a09ee
--- /dev/null
+++ b/bpfloader/bpf_egress.o
Binary files differ
diff --git a/bpfloader/bpf_ingress.c b/bpfloader/bpf_ingress.c
new file mode 100644
index 0000000..0a5c5b8
--- /dev/null
+++ b/bpfloader/bpf_ingress.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include "bpf_kern.h"
+#include "bpf_shared.h"
+
+ELF_SEC(BPF_PROG_SEC_NAME)
+int bpf_cgroup_ingress(struct __sk_buff* skb) {
+    uint64_t cookie = get_socket_cookie(skb);
+    struct uid_tag* utag = find_map_entry(COOKIE_TAG_MAP, &cookie);
+    uint32_t uid, tag;
+    if (utag) {
+        uid = utag->uid;
+        tag = utag->tag;
+    } else {
+        uid = get_socket_uid(skb);
+        tag = 0;
+    }
+
+    struct stats_key key = {.uid = uid, .tag = tag, .counterSet = 0, .ifaceIndex = skb->ifindex};
+
+    uint32_t* counterSet;
+    counterSet = find_map_entry(UID_COUNTERSET_MAP, &uid);
+    if (counterSet) key.counterSet = *counterSet;
+
+    int ret;
+    if (tag) {
+        struct stats_value* tagValue;
+        tagValue = find_map_entry(TAG_STATS_MAP, &key);
+        if (!tagValue) {
+            struct stats_value newValue = {};
+            write_to_map_entry(TAG_STATS_MAP, &key, &newValue, BPF_NOEXIST);
+            tagValue = find_map_entry(TAG_STATS_MAP, &key);
+        }
+        if (tagValue) {
+            __sync_fetch_and_add(&tagValue->rxPackets, 1);
+            __sync_fetch_and_add(&tagValue->rxBytes, skb->len);
+        }
+    }
+
+    key.tag = 0;
+    struct stats_value* value;
+    value = find_map_entry(UID_STATS_MAP, &key);
+    if (!value) {
+        struct stats_value newValue = {};
+        write_to_map_entry(UID_STATS_MAP, &key, &newValue, BPF_NOEXIST);
+        value = find_map_entry(UID_STATS_MAP, &key);
+    }
+    if (value) {
+        __sync_fetch_and_add(&value->rxPackets, 1);
+        __sync_fetch_and_add(&value->rxBytes, skb->len);
+    }
+    return BPF_PASS;
+}
diff --git a/bpfloader/bpf_ingress.o b/bpfloader/bpf_ingress.o
new file mode 100644
index 0000000..4d15cf1
--- /dev/null
+++ b/bpfloader/bpf_ingress.o
Binary files differ
diff --git a/bpfloader/bpf_kern.h b/bpfloader/bpf_kern.h
new file mode 100644
index 0000000..ee818f8
--- /dev/null
+++ b/bpfloader/bpf_kern.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+
+#define ELF_SEC(NAME) __attribute__((section(NAME), used))
+
+struct uid_tag {
+    uint32_t uid;
+    uint32_t tag;
+};
+
+struct stats_key {
+    uint32_t uid;
+    uint32_t tag;
+    uint32_t counterSet;
+    uint32_t ifaceIndex;
+};
+
+struct stats_value {
+    uint64_t rxPackets;
+    uint64_t rxBytes;
+    uint64_t txPackets;
+    uint64_t txBytes;
+};
+
+/* helper functions called from eBPF programs written in C */
+static void* (*find_map_entry)(uint64_t map, void* key) = (void*)BPF_FUNC_map_lookup_elem;
+static int (*write_to_map_entry)(uint64_t map, void* key, void* value,
+                                 uint64_t flags) = (void*)BPF_FUNC_map_update_elem;
+static int (*delete_map_entry)(uint64_t map, void* key) = (void*)BPF_FUNC_map_delete_elem;
+static uint64_t (*get_socket_cookie)(struct __sk_buff* skb) = (void*)BPF_FUNC_get_socket_cookie;
+static uint32_t (*get_socket_uid)(struct __sk_buff* skb) = (void*)BPF_FUNC_get_socket_uid;
+static int (*bpf_skb_load_bytes)(struct __sk_buff* skb, int off, void* to,
+                                 int len) = (void*)BPF_FUNC_skb_load_bytes;
+
+#define BPF_PASS 1
+#define BPF_DROP 0
diff --git a/bpfloader/bpf_shared.h b/bpfloader/bpf_shared.h
new file mode 100644
index 0000000..c314c2b
--- /dev/null
+++ b/bpfloader/bpf_shared.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// const values shared by both kernel program and userspace bpfloader
+
+#define BPF_PROG_SEC_NAME "kern_prog"
+
+#define COOKIE_TAG_MAP 0xbfceaaffffffffff
+#define UID_COUNTERSET_MAP 0xbfdceeafffffffff
+#define UID_STATS_MAP 0xbfdaafffffffffff
+#define TAG_STATS_MAP 0xbfaaafffffffffff