ClatdController - populate ebpf ingress clat map on start and stop
and also attach/detach tc clsact egress qdisc and filter.
Test: atest netd_unit_test netd_integration_test
Bug: 65674744
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Change-Id: I865403ae320b95cec59659b34a69a4f304e1f082
diff --git a/server/ClatdController.cpp b/server/ClatdController.cpp
index 6f2f43b..3ef2fe1 100644
--- a/server/ClatdController.cpp
+++ b/server/ClatdController.cpp
@@ -14,13 +14,12 @@
* limitations under the License.
*/
-#include "ClatdController.h"
-
#include <map>
#include <string>
#include <arpa/inet.h>
#include <errno.h>
+#include <linux/if_arp.h>
#include <net/if.h>
#include <netinet/in.h>
#include <spawn.h>
@@ -31,6 +30,9 @@
#define LOG_TAG "ClatdController"
#include <log/log.h>
+#include "ClatdController.h"
+
+#include "android-base/properties.h"
#include "android-base/unique_fd.h"
#include "bpf/BpfMap.h"
#include "netdbpf/bpf_shared.h"
@@ -69,6 +71,66 @@
ClatdController::~ClatdController() {
}
+void ClatdController::Init(void) {
+ // TODO: should refactor into separate function for testability
+ if (bpf::getBpfSupportLevel() == bpf::BpfLevel::NONE) {
+ ALOGI("Pre-4.9 kernel or pre-P api shipping level - disabling clat ebpf.");
+ mClatEbpfMode = ClatEbpfDisabled;
+ return;
+ }
+
+ // We know the device initially shipped with at least P...,
+ // but did it ship with at least Q?
+
+ uint64_t api_level = base::GetUintProperty<uint64_t>("ro.product.first_api_level", 0);
+ if (api_level == 0) {
+ ALOGE("Cannot determine initial API level of the device.");
+ api_level = base::GetUintProperty<uint64_t>("ro.build.version.sdk", 0);
+ }
+
+ // Note: MINIMUM_API_REQUIRED is for eBPF as a whole and is thus P
+ if (api_level > bpf::MINIMUM_API_REQUIRED) {
+ ALOGI("4.9+ kernel and device shipped with Q+ - clat ebpf should work.");
+ mClatEbpfMode = ClatEbpfEnabled;
+ } else {
+ // We cannot guarantee that 4.9-P kernels will include NET_CLS_BPF support.
+ ALOGI("4.9+ kernel and device shipped with P - clat ebpf might work.");
+ mClatEbpfMode = ClatEbpfMaybe;
+ }
+
+ int rv = openNetlinkSocket();
+ if (rv < 0) {
+ ALOGE("openNetlinkSocket() failure: %s", strerror(-rv));
+ mClatEbpfMode = ClatEbpfDisabled;
+ return;
+ }
+ mNetlinkFd.reset(rv);
+
+ rv = getClatIngressMapFd();
+ if (rv < 0) {
+ ALOGE("getClatIngressMapFd() failure: %s", strerror(-rv));
+ mClatEbpfMode = ClatEbpfDisabled;
+ mNetlinkFd.reset(-1);
+ return;
+ }
+ mClatIngressMap.reset(rv);
+
+ int netlinkFd = mNetlinkFd.get();
+
+ // TODO: perhaps this initial cleanup should be in its own function?
+ const auto del = [&netlinkFd](const ClatIngressKey& key,
+ const BpfMap<ClatIngressKey, ClatIngressValue>&) {
+ ALOGW("Removing stale clat config on interface %d.", key.iif);
+ int rv = tcQdiscDelDevClsact(netlinkFd, key.iif);
+ if (rv < 0) ALOGE("tcQdiscDelDevClsact() failure: %s", strerror(-rv));
+ return netdutils::status::ok; // keep on going regardless
+ };
+ auto ret = mClatIngressMap.iterate(del);
+ if (!isOk(ret)) ALOGE("mClatIngressMap.iterate() failure: %s", strerror(ret.code()));
+ ret = mClatIngressMap.clear();
+ if (!isOk(ret)) ALOGE("mClatIngressMap.clear() failure: %s", strerror(ret.code()));
+}
+
bool ClatdController::isIpv4AddressFree(in_addr_t addr) {
int s = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (s == -1) {
@@ -170,6 +232,113 @@
return 0;
}
+void ClatdController::maybeStartBpf(const ClatdTracker& tracker) {
+ if (mClatEbpfMode == ClatEbpfDisabled) return;
+
+ int rv = hardwareAddressType(tracker.iface);
+ if (rv < 0) {
+ ALOGE("hardwareAddressType(%s[%d]) failure: %s", tracker.iface, tracker.ifIndex,
+ strerror(-rv));
+ return;
+ }
+
+ bool isEthernet;
+ switch (rv) {
+ case ARPHRD_ETHER:
+ isEthernet = true;
+ break;
+ case ARPHRD_RAWIP: // in Linux 4.14+ rmnet support was upstreamed and this is 519
+ case 530: // this is ARPHRD_RAWIP on some Android 4.9 kernels with rmnet
+ isEthernet = false;
+ break;
+ default:
+ ALOGE("hardwareAddressType(%s[%d]) returned unknown type %d.", tracker.iface,
+ tracker.ifIndex, rv);
+ return;
+ }
+
+ rv = getClatIngressProgFd(isEthernet);
+ if (rv < 0) {
+ ALOGE("getClatIngressProgFd(%d) failure: %s", isEthernet, strerror(-rv));
+ return;
+ }
+ unique_fd progFd(rv);
+
+ ClatIngressKey key = {
+ .iif = tracker.ifIndex,
+ .pfx96 = tracker.pfx96,
+ .local6 = tracker.v6,
+ };
+ ClatIngressValue value = {
+ // Redirect the mangled packets to the same interface so we can see them in tcpdump.
+ // TODO: move the tun interface creation to netd, and use that ifindex instead.
+ // TODO: move all the clat code to eBPF and remove the tun interface entirely.
+ .oif = tracker.ifIndex,
+ .local4 = tracker.v4,
+ };
+
+ auto ret = mClatIngressMap.writeValue(key, value, BPF_ANY);
+ if (!isOk(ret)) {
+ ALOGE("mClatIngress.Map.writeValue failure: %s", strerror(ret.code()));
+ return;
+ }
+
+ // We do tc setup *after* populating map, so scanning through map
+ // can always be used to tell us what needs cleanup.
+
+ rv = tcQdiscAddDevClsact(mNetlinkFd, tracker.ifIndex);
+ if (rv) {
+ ALOGE("tcQdiscAddDevClsact(%d[%s]) failure: %s", tracker.ifIndex, tracker.iface,
+ strerror(-rv));
+ ret = mClatIngressMap.deleteValue(key);
+ if (!isOk(ret)) ALOGE("mClatIngressMap.deleteValue failure: %s", strerror(ret.code()));
+ return;
+ }
+
+ rv = tcFilterAddDevBpf(mNetlinkFd, tracker.ifIndex, progFd, isEthernet);
+ if (rv) {
+ if ((rv == -ENOENT) && (mClatEbpfMode == ClatEbpfMaybe)) {
+ ALOGI("tcFilterAddDevBpf(%d[%s], %d): %s", tracker.ifIndex, tracker.iface, isEthernet,
+ strerror(-rv));
+ } else {
+ ALOGE("tcFilterAddDevBpf(%d[%s], %d) failure: %s", tracker.ifIndex, tracker.iface,
+ isEthernet, strerror(-rv));
+ }
+ rv = tcQdiscDelDevClsact(mNetlinkFd, tracker.ifIndex);
+ if (rv)
+ ALOGE("tcQdiscDelDevClsact(%d[%s]) failure: %s", tracker.ifIndex, tracker.iface,
+ strerror(-rv));
+ ret = mClatIngressMap.deleteValue(key);
+ if (!isOk(ret)) ALOGE("mClatIngressMap.deleteValue failure: %s", strerror(ret.code()));
+ return;
+ }
+
+ // success
+}
+
+void ClatdController::maybeStopBpf(const ClatdTracker& tracker) {
+ if (mClatEbpfMode == ClatEbpfDisabled) return;
+
+ // No need to remove filter, since we remove qdisc it is attached to,
+ // which automatically removes everything attached to the qdisc.
+ int rv = tcQdiscDelDevClsact(mNetlinkFd, tracker.ifIndex);
+ if (rv < 0)
+ ALOGE("tcQdiscDelDevClsact(%d[%s]) failure: %s", tracker.ifIndex, tracker.iface,
+ strerror(-rv));
+
+ // We cleanup map last, so scanning through map can be used to
+ // determine what still needs cleanup.
+
+ ClatIngressKey key = {
+ .iif = tracker.ifIndex,
+ .pfx96 = tracker.pfx96,
+ .local6 = tracker.v6,
+ };
+
+ auto ret = mClatIngressMap.deleteValue(key);
+ if (!isOk(ret)) ALOGE("mClatIngressMap.deleteValue failure: %s", strerror(ret.code()));
+}
+
// Finds the tracker of the clatd running on interface |interface|, or nullptr if clatd has not been
// started on |interface|.
ClatdController::ClatdTracker* ClatdController::getClatdTracker(const std::string& interface) {
@@ -268,6 +437,8 @@
return -res;
}
+ maybeStartBpf(tracker);
+
mClatdTrackers[interface] = tracker;
ALOGD("clatd started on %s", interface.c_str());
@@ -285,6 +456,8 @@
ALOGD("Stopping clatd pid=%d on %s", tracker->pid, interface.c_str());
+ maybeStopBpf(*tracker);
+
kill(tracker->pid, SIGTERM);
waitpid(tracker->pid, nullptr, 0);
mClatdTrackers.erase(interface);