ClatUtils - implement tcFilterAddDevBpf()

Test: atest netd_unit_test
Bug: 65674744
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Change-Id: Ibb4de088b97039d4212c1db183f3142803e50387
diff --git a/server/ClatUtils.cpp b/server/ClatUtils.cpp
index f282ab0..edfce57 100644
--- a/server/ClatUtils.cpp
+++ b/server/ClatUtils.cpp
@@ -16,9 +16,11 @@
 
 #include "ClatUtils.h"
 
+#include <arpa/inet.h>
 #include <errno.h>
 #include <linux/if.h>
 #include <linux/netlink.h>
+#include <linux/pkt_cls.h>
 #include <linux/pkt_sched.h>
 #include <linux/rtnetlink.h>
 #include <sys/ioctl.h>
@@ -204,5 +206,152 @@
     return doTcQdiscClsact(fd, ifIndex, RTM_DELQDISC, 0);
 }
 
+// tc filter add dev ... egress prio 1 protocol ipv6 bpf object-pinned /sys/fs/bpf/... direct-action
+int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet) {
+    // The priority doesn't matter until we actually start attaching multiple
+    // things to the same interface's egress point.
+    const int prio = 1;
+
+    // This is the name of the filter we're attaching (ie. this is the 'bpf'
+    // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
+    //
+    // We go through some hoops in order to make this compile time constants
+    // so that we can define the struct further down the function with the
+    // field for this sized correctly already during the build.
+#define BPF "bpf"
+    const char bpf[] = BPF;
+    // sizeof() includes the terminating NULL
+#define ASCIIZ_LEN_BPF sizeof(bpf)
+
+    // This is to replicate program name suffix used by 'tc' Linux cli
+    // when it attaches programs.
+#define FSOBJ_SUFFIX ":[*fsobj]"
+
+    // This macro expands (from header files) to:
+    //   prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
+    // and is the name of the pinned ebpf program for ARPHRD_RAWIP interfaces.
+    // (also compatible with anything that has 0 size L2 header)
+#define NAME_RAWIP CLAT_PROG_RAWIP_NAME FSOBJ_SUFFIX
+    const char name_rawip[] = NAME_RAWIP;
+
+    // This macro expands (from header files) to:
+    //   prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
+    // and is the name of the pinned ebpf program for ARPHRD_ETHER interfaces.
+    // (also compatible with anything that has standard ethernet header)
+#define NAME_ETHER CLAT_PROG_ETHER_NAME FSOBJ_SUFFIX
+    const char name_ether[] = NAME_ETHER;
+
+    // The actual name we'll use is determined at run time via 'ethernet'
+    // boolean.  We need to compile time allocate enough space in the struct
+    // hence this macro magic to make sure we have enough space for either
+    // possibility.  In practice both are actually the same size.
+#define ASCIIZ_MAXLEN_NAME \
+    ((sizeof(name_rawip) > sizeof(name_ether)) ? sizeof(name_rawip) : sizeof(name_ether))
+
+    // This is not a compile time constant and is used in strcpy below
+#define NAME (ethernet ? NAME_ETHER : NAME_RAWIP)
+
+    struct {
+        nlmsghdr n;
+        tcmsg t;
+        struct {
+            nlattr attr;
+            char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
+        } kind;
+        struct {
+            nlattr attr;
+            struct {
+                nlattr attr;
+                __u32 u32;
+            } fd;
+            struct {
+                nlattr attr;
+                char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
+            } name;
+            struct {
+                nlattr attr;
+                __u32 u32;
+            } flags;
+        } options;
+    } req = {
+            .n =
+                    {
+                            .nlmsg_len = sizeof(req),
+                            .nlmsg_type = RTM_NEWTFILTER,
+                            .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
+                    },
+            .t =
+                    {
+                            .tcm_family = AF_UNSPEC,
+                            .tcm_ifindex = ifIndex,
+                            .tcm_handle = TC_H_UNSPEC,
+                            .tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS),
+                            .tcm_info = (prio << 16) | htons(ETH_P_IPV6),
+                    },
+            .kind =
+                    {
+                            .attr =
+                                    {
+                                            .nla_len = sizeof(req.kind),
+                                            .nla_type = TCA_KIND,
+                                    },
+                            .str = BPF,
+                    },
+            .options =
+                    {
+                            .attr =
+                                    {
+                                            .nla_len = sizeof(req.options),
+                                            .nla_type = TCA_OPTIONS,
+                                    },
+                            .fd =
+                                    {
+                                            .attr =
+                                                    {
+                                                            .nla_len = sizeof(req.options.fd),
+                                                            .nla_type = TCA_BPF_FD,
+                                                    },
+                                            .u32 = static_cast<__u32>(bpfFd),
+                                    },
+                            .name =
+                                    {
+                                            .attr =
+                                                    {
+                                                            .nla_len = sizeof(req.options.name),
+                                                            .nla_type = TCA_BPF_NAME,
+                                                    },
+                                            // Visible via 'tc filter show', but
+                                            // is overwritten by strcpy below
+                                            .str = "placeholder",
+                                    },
+                            .flags =
+                                    {
+                                            .attr =
+                                                    {
+                                                            .nla_len = sizeof(req.options.flags),
+                                                            .nla_type = TCA_BPF_FLAGS,
+                                                    },
+                                            .u32 = TCA_BPF_FLAG_ACT_DIRECT,
+                                    },
+                    },
+    };
+
+    strncpy(req.options.name.str, NAME, sizeof(req.options.name.str));
+
+#undef NAME
+#undef ASCIIZ_MAXLEN_NAME
+#undef NAME_ETHER
+#undef NAME_RAWIP
+#undef NAME
+#undef ASCIIZ_LEN_BPF
+#undef BPF
+
+    const int rv = send(fd, &req, sizeof(req), 0);
+    if (rv == -1) return -errno;
+    if (rv != sizeof(req)) return -EMSGSIZE;
+
+    return processNetlinkResponse(fd);
+}
+
 }  // namespace net
 }  // namespace android
diff --git a/server/ClatUtils.h b/server/ClatUtils.h
index 6c3681f..adf8400 100644
--- a/server/ClatUtils.h
+++ b/server/ClatUtils.h
@@ -36,6 +36,8 @@
 int tcQdiscReplaceDevClsact(int fd, int ifIndex);
 int tcQdiscDelDevClsact(int fd, int ifIndex);
 
+int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet);
+
 }  // namespace net
 }  // namespace android
 
diff --git a/server/ClatUtilsTest.cpp b/server/ClatUtilsTest.cpp
index 8c21528..e5c3b3f 100644
--- a/server/ClatUtilsTest.cpp
+++ b/server/ClatUtilsTest.cpp
@@ -21,6 +21,8 @@
 #include "ClatUtils.h"
 
 #include <linux/if_arp.h>
+#include <stdlib.h>
+#include <sys/wait.h>
 
 #include "bpf/BpfUtils.h"
 #include "netdbpf/bpf_shared.h"
@@ -112,5 +114,63 @@
     close(fd);
 }
 
+// The SKIP_IF_BPF_NOT_SUPPORTED macro is effectively a check for 4.9+ kernel
+// combined with a launched on P device.  Ie. it's a test for 4.9-P or better.
+// NET_CLS_BPF is only enabled starting with 4.9-Q and as such we need
+// a separate way to test for this...
+int doKernelSupportsNetClsBpf(void) {
+    return system("zcat /proc/config.gz | egrep -q '^CONFIG_NET_CLS_BPF=[my]$'");
+}
+
+// Make sure the above function actually executes correctly rather than failing
+// due to missing binary or execution failure...
+TEST_F(ClatUtilsTest, KernelSupportsNetClsBpf) {
+    // Make sure the file is present and readable and decompressable.
+    ASSERT_EQ(W_EXITCODE(0, 0), system("zcat /proc/config.gz > /dev/null"));
+
+    int v = doKernelSupportsNetClsBpf();
+
+    // It should always either return 0 (match) or 1 (no match),
+    // anything else is some sort of exec/environment/etc failure.
+    if (v != W_EXITCODE(1, 0)) ASSERT_EQ(v, W_EXITCODE(0, 0));
+}
+
+// True iff CONFIG_NET_CLS_BPF is enabled in /proc/config.gz
+bool kernelSupportsNetClsBpf(void) {
+    return doKernelSupportsNetClsBpf() == W_EXITCODE(0, 0);
+}
+
+void checkAttachBpfFilterClsactLo(const bool ethernet) {
+    // This test requires kernel 4.9-Q or better
+    SKIP_IF_BPF_NOT_SUPPORTED;
+    if (!kernelSupportsNetClsBpf()) return;
+
+    int bpf_fd = getClatProgFd(false);
+    ASSERT_LE(3, bpf_fd);
+
+    int fd = openNetlinkSocket();
+    EXPECT_LE(3, fd);
+    if (fd >= 0) {
+        // This attaches and detaches a clsact plus ebpf program to loopback
+        // interface, but it should not affect traffic by virtue of us not
+        // actually populating the ebpf control map.
+        // Furthermore: it only takes fractions of a second.
+        EXPECT_EQ(0, tcQdiscAddDevClsact(fd, LOOPBACK_IFINDEX));
+        EXPECT_EQ(0, tcFilterAddDevBpf(fd, LOOPBACK_IFINDEX, bpf_fd, ethernet));
+        EXPECT_EQ(0, tcQdiscDelDevClsact(fd, LOOPBACK_IFINDEX));
+        close(fd);
+    }
+
+    close(bpf_fd);
+}
+
+TEST_F(ClatUtilsTest, CheckAttachBpfFilterRawIpClsactLo) {
+    checkAttachBpfFilterClsactLo(false);
+}
+
+TEST_F(ClatUtilsTest, CheckAttachBpfFilterEthernetClsactLo) {
+    checkAttachBpfFilterClsactLo(true);
+}
+
 }  // namespace net
 }  // namespace android