ClatUtils - implement tcFilterAddDevBpf()
Test: atest netd_unit_test
Bug: 65674744
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Change-Id: Ibb4de088b97039d4212c1db183f3142803e50387
diff --git a/server/ClatUtils.cpp b/server/ClatUtils.cpp
index f282ab0..edfce57 100644
--- a/server/ClatUtils.cpp
+++ b/server/ClatUtils.cpp
@@ -16,9 +16,11 @@
#include "ClatUtils.h"
+#include <arpa/inet.h>
#include <errno.h>
#include <linux/if.h>
#include <linux/netlink.h>
+#include <linux/pkt_cls.h>
#include <linux/pkt_sched.h>
#include <linux/rtnetlink.h>
#include <sys/ioctl.h>
@@ -204,5 +206,152 @@
return doTcQdiscClsact(fd, ifIndex, RTM_DELQDISC, 0);
}
+// tc filter add dev ... egress prio 1 protocol ipv6 bpf object-pinned /sys/fs/bpf/... direct-action
+int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet) {
+ // The priority doesn't matter until we actually start attaching multiple
+ // things to the same interface's egress point.
+ const int prio = 1;
+
+ // This is the name of the filter we're attaching (ie. this is the 'bpf'
+ // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
+ //
+ // We go through some hoops in order to make this compile time constants
+ // so that we can define the struct further down the function with the
+ // field for this sized correctly already during the build.
+#define BPF "bpf"
+ const char bpf[] = BPF;
+ // sizeof() includes the terminating NULL
+#define ASCIIZ_LEN_BPF sizeof(bpf)
+
+ // This is to replicate program name suffix used by 'tc' Linux cli
+ // when it attaches programs.
+#define FSOBJ_SUFFIX ":[*fsobj]"
+
+ // This macro expands (from header files) to:
+ // prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
+ // and is the name of the pinned ebpf program for ARPHRD_RAWIP interfaces.
+ // (also compatible with anything that has 0 size L2 header)
+#define NAME_RAWIP CLAT_PROG_RAWIP_NAME FSOBJ_SUFFIX
+ const char name_rawip[] = NAME_RAWIP;
+
+ // This macro expands (from header files) to:
+ // prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
+ // and is the name of the pinned ebpf program for ARPHRD_ETHER interfaces.
+ // (also compatible with anything that has standard ethernet header)
+#define NAME_ETHER CLAT_PROG_ETHER_NAME FSOBJ_SUFFIX
+ const char name_ether[] = NAME_ETHER;
+
+ // The actual name we'll use is determined at run time via 'ethernet'
+ // boolean. We need to compile time allocate enough space in the struct
+ // hence this macro magic to make sure we have enough space for either
+ // possibility. In practice both are actually the same size.
+#define ASCIIZ_MAXLEN_NAME \
+ ((sizeof(name_rawip) > sizeof(name_ether)) ? sizeof(name_rawip) : sizeof(name_ether))
+
+ // This is not a compile time constant and is used in strcpy below
+#define NAME (ethernet ? NAME_ETHER : NAME_RAWIP)
+
+ struct {
+ nlmsghdr n;
+ tcmsg t;
+ struct {
+ nlattr attr;
+ char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
+ } kind;
+ struct {
+ nlattr attr;
+ struct {
+ nlattr attr;
+ __u32 u32;
+ } fd;
+ struct {
+ nlattr attr;
+ char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
+ } name;
+ struct {
+ nlattr attr;
+ __u32 u32;
+ } flags;
+ } options;
+ } req = {
+ .n =
+ {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = RTM_NEWTFILTER,
+ .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
+ },
+ .t =
+ {
+ .tcm_family = AF_UNSPEC,
+ .tcm_ifindex = ifIndex,
+ .tcm_handle = TC_H_UNSPEC,
+ .tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS),
+ .tcm_info = (prio << 16) | htons(ETH_P_IPV6),
+ },
+ .kind =
+ {
+ .attr =
+ {
+ .nla_len = sizeof(req.kind),
+ .nla_type = TCA_KIND,
+ },
+ .str = BPF,
+ },
+ .options =
+ {
+ .attr =
+ {
+ .nla_len = sizeof(req.options),
+ .nla_type = TCA_OPTIONS,
+ },
+ .fd =
+ {
+ .attr =
+ {
+ .nla_len = sizeof(req.options.fd),
+ .nla_type = TCA_BPF_FD,
+ },
+ .u32 = static_cast<__u32>(bpfFd),
+ },
+ .name =
+ {
+ .attr =
+ {
+ .nla_len = sizeof(req.options.name),
+ .nla_type = TCA_BPF_NAME,
+ },
+ // Visible via 'tc filter show', but
+ // is overwritten by strcpy below
+ .str = "placeholder",
+ },
+ .flags =
+ {
+ .attr =
+ {
+ .nla_len = sizeof(req.options.flags),
+ .nla_type = TCA_BPF_FLAGS,
+ },
+ .u32 = TCA_BPF_FLAG_ACT_DIRECT,
+ },
+ },
+ };
+
+ strncpy(req.options.name.str, NAME, sizeof(req.options.name.str));
+
+#undef NAME
+#undef ASCIIZ_MAXLEN_NAME
+#undef NAME_ETHER
+#undef NAME_RAWIP
+#undef NAME
+#undef ASCIIZ_LEN_BPF
+#undef BPF
+
+ const int rv = send(fd, &req, sizeof(req), 0);
+ if (rv == -1) return -errno;
+ if (rv != sizeof(req)) return -EMSGSIZE;
+
+ return processNetlinkResponse(fd);
+}
+
} // namespace net
} // namespace android
diff --git a/server/ClatUtils.h b/server/ClatUtils.h
index 6c3681f..adf8400 100644
--- a/server/ClatUtils.h
+++ b/server/ClatUtils.h
@@ -36,6 +36,8 @@
int tcQdiscReplaceDevClsact(int fd, int ifIndex);
int tcQdiscDelDevClsact(int fd, int ifIndex);
+int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet);
+
} // namespace net
} // namespace android
diff --git a/server/ClatUtilsTest.cpp b/server/ClatUtilsTest.cpp
index 8c21528..e5c3b3f 100644
--- a/server/ClatUtilsTest.cpp
+++ b/server/ClatUtilsTest.cpp
@@ -21,6 +21,8 @@
#include "ClatUtils.h"
#include <linux/if_arp.h>
+#include <stdlib.h>
+#include <sys/wait.h>
#include "bpf/BpfUtils.h"
#include "netdbpf/bpf_shared.h"
@@ -112,5 +114,63 @@
close(fd);
}
+// The SKIP_IF_BPF_NOT_SUPPORTED macro is effectively a check for 4.9+ kernel
+// combined with a launched on P device. Ie. it's a test for 4.9-P or better.
+// NET_CLS_BPF is only enabled starting with 4.9-Q and as such we need
+// a separate way to test for this...
+int doKernelSupportsNetClsBpf(void) {
+ return system("zcat /proc/config.gz | egrep -q '^CONFIG_NET_CLS_BPF=[my]$'");
+}
+
+// Make sure the above function actually executes correctly rather than failing
+// due to missing binary or execution failure...
+TEST_F(ClatUtilsTest, KernelSupportsNetClsBpf) {
+ // Make sure the file is present and readable and decompressable.
+ ASSERT_EQ(W_EXITCODE(0, 0), system("zcat /proc/config.gz > /dev/null"));
+
+ int v = doKernelSupportsNetClsBpf();
+
+ // It should always either return 0 (match) or 1 (no match),
+ // anything else is some sort of exec/environment/etc failure.
+ if (v != W_EXITCODE(1, 0)) ASSERT_EQ(v, W_EXITCODE(0, 0));
+}
+
+// True iff CONFIG_NET_CLS_BPF is enabled in /proc/config.gz
+bool kernelSupportsNetClsBpf(void) {
+ return doKernelSupportsNetClsBpf() == W_EXITCODE(0, 0);
+}
+
+void checkAttachBpfFilterClsactLo(const bool ethernet) {
+ // This test requires kernel 4.9-Q or better
+ SKIP_IF_BPF_NOT_SUPPORTED;
+ if (!kernelSupportsNetClsBpf()) return;
+
+ int bpf_fd = getClatProgFd(false);
+ ASSERT_LE(3, bpf_fd);
+
+ int fd = openNetlinkSocket();
+ EXPECT_LE(3, fd);
+ if (fd >= 0) {
+ // This attaches and detaches a clsact plus ebpf program to loopback
+ // interface, but it should not affect traffic by virtue of us not
+ // actually populating the ebpf control map.
+ // Furthermore: it only takes fractions of a second.
+ EXPECT_EQ(0, tcQdiscAddDevClsact(fd, LOOPBACK_IFINDEX));
+ EXPECT_EQ(0, tcFilterAddDevBpf(fd, LOOPBACK_IFINDEX, bpf_fd, ethernet));
+ EXPECT_EQ(0, tcQdiscDelDevClsact(fd, LOOPBACK_IFINDEX));
+ close(fd);
+ }
+
+ close(bpf_fd);
+}
+
+TEST_F(ClatUtilsTest, CheckAttachBpfFilterRawIpClsactLo) {
+ checkAttachBpfFilterClsactLo(false);
+}
+
+TEST_F(ClatUtilsTest, CheckAttachBpfFilterEthernetClsactLo) {
+ checkAttachBpfFilterClsactLo(true);
+}
+
} // namespace net
} // namespace android