Support killing sockets using SOCK_DESTROY.
This gives netd the ability to close sockets on a particular
source IP address using SOCK_DESTROY. It does not yet enable
this behaviour.
The microbenchmark is able to close 500 IPv6 sockets in ~30ms on
my angler. Specifically:
- Scanning 500 socketpairs: ~5ms
- Scanning 500 socketpairs and killing one half of each: ~30ms
- Scanning 500 socketpairs and killing both halves of each: ~40ms
This is about ~2.5x-3.5x slower than SIOCKILLADDR:
- For 500 sockets, it's 9.5ms vs. 22.9ms.
- For 4000 sockets, it's ~40ms vs ~135ms.
A large part of that is due to sending RST packets, which
SIOCKILLADDR does not do. If the kernel is modified so that
SOCK_DESTROY does not send RSTs, the time taken to kill 4000
sockets goes down to ~70ms
Batching the destroy operations does not help much. It saves
5-10%, but it complicates error handling.
Bug: 26976388
Change-Id: I2e1ac30af5dbcdb98dbb7c6e4d4d67c55b9fd00f
diff --git a/server/Android.mk b/server/Android.mk
index d0cd49e..b713b4f 100644
--- a/server/Android.mk
+++ b/server/Android.mk
@@ -68,6 +68,7 @@
PppController.cpp \
ResolverController.cpp \
RouteController.cpp \
+ SockDiag.cpp \
SoftapController.cpp \
StrictController.cpp \
TetherController.cpp \
diff --git a/server/SockDiag.cpp b/server/SockDiag.cpp
new file mode 100644
index 0000000..2f1437c
--- /dev/null
+++ b/server/SockDiag.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>
+#include <netdb.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+
+#include <linux/netlink.h>
+#include <linux/sock_diag.h>
+#include <linux/inet_diag.h>
+
+#define LOG_TAG "Netd"
+
+#include <cutils/log.h>
+
+#include "NetdConstants.h"
+#include "SockDiag.h"
+
+#ifndef SOCK_DESTROY
+#define SOCK_DESTROY 21
+#endif
+
+namespace {
+
+struct AddrinfoDeleter {
+ void operator()(addrinfo *a) { if (a) freeaddrinfo(a); }
+};
+
+typedef std::unique_ptr<addrinfo, AddrinfoDeleter> ScopedAddrinfo;
+
+int checkError(int fd) {
+ struct {
+ nlmsghdr h;
+ nlmsgerr err;
+ } __attribute__((__packed__)) ack;
+ ssize_t bytesread = recv(fd, &ack, sizeof(ack), MSG_DONTWAIT | MSG_PEEK);
+ if (bytesread == -1) {
+ // Read failed (error), or nothing to read (good).
+ return (errno == EAGAIN) ? 0 : -errno;
+ } else if (bytesread == (ssize_t) sizeof(ack) && ack.h.nlmsg_type == NLMSG_ERROR) {
+ // We got an error. Consume it.
+ recv(fd, &ack, sizeof(ack), 0);
+ return ack.err.error;
+ } else {
+ // The kernel replied with something. Leave it to the caller.
+ return 0;
+ }
+}
+
+} // namespace
+
+bool SockDiag::open() {
+ if (hasSocks()) {
+ return false;
+ }
+
+ mSock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_INET_DIAG);
+ mWriteSock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_INET_DIAG);
+ if (!hasSocks()) {
+ closeSocks();
+ return false;
+ }
+
+ sockaddr_nl nl = { .nl_family = AF_NETLINK };
+ if ((connect(mSock, reinterpret_cast<sockaddr *>(&nl), sizeof(nl)) == -1) ||
+ (connect(mWriteSock, reinterpret_cast<sockaddr *>(&nl), sizeof(nl)) == -1)) {
+ closeSocks();
+ return false;
+ }
+
+ return true;
+}
+
+int SockDiag::sendDumpRequest(uint8_t proto, uint8_t family, const char *addrstr) {
+ addrinfo hints = { .ai_flags = AI_NUMERICHOST };
+ addrinfo *res;
+ in6_addr mapped = { .s6_addr32 = { 0, 0, htonl(0xffff), 0 } };
+ int ret;
+
+ // TODO: refactor the netlink parsing code out of system/core, bring it into netd, and stop
+ // doing string conversions when they're not necessary.
+ if ((ret = getaddrinfo(addrstr, nullptr, &hints, &res)) != 0) {
+ return -EINVAL;
+ }
+
+ // So we don't have to call freeaddrinfo on every failure path.
+ ScopedAddrinfo resP(res);
+
+ void *addr;
+ uint8_t addrlen;
+ if (res->ai_family == AF_INET && family == AF_INET) {
+ in_addr& ina = reinterpret_cast<sockaddr_in*>(res->ai_addr)->sin_addr;
+ addr = &ina;
+ addrlen = sizeof(ina);
+ } else if (res->ai_family == AF_INET && family == AF_INET6) {
+ in_addr& ina = reinterpret_cast<sockaddr_in*>(res->ai_addr)->sin_addr;
+ mapped.s6_addr32[3] = ina.s_addr;
+ addr = &mapped;
+ addrlen = sizeof(mapped);
+ } else if (res->ai_family == AF_INET6 && family == AF_INET6) {
+ in6_addr& in6a = reinterpret_cast<sockaddr_in6*>(res->ai_addr)->sin6_addr;
+ addr = &in6a;
+ addrlen = sizeof(in6a);
+ } else {
+ return -EAFNOSUPPORT;
+ }
+
+ uint8_t prefixlen = addrlen * 8;
+ uint8_t yesjump = sizeof(inet_diag_bc_op) + sizeof(inet_diag_hostcond) + addrlen;
+ uint8_t nojump = yesjump + 4;
+ uint32_t states = ~(1 << TCP_TIME_WAIT);
+
+ struct {
+ nlmsghdr nlh;
+ inet_diag_req_v2 req;
+ nlattr nla;
+ inet_diag_bc_op op;
+ inet_diag_hostcond cond;
+ } __attribute__((__packed__)) request = {
+ .nlh = {
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
+ },
+ .req = {
+ .sdiag_family = family,
+ .sdiag_protocol = proto,
+ .idiag_states = states,
+ },
+ .nla = {
+ .nla_type = INET_DIAG_REQ_BYTECODE,
+ },
+ .op = {
+ INET_DIAG_BC_S_COND,
+ yesjump,
+ nojump,
+ },
+ .cond = {
+ family,
+ prefixlen,
+ -1,
+ {}
+ },
+ };
+
+ request.nlh.nlmsg_len = sizeof(request) + addrlen;
+ request.nla.nla_len = sizeof(request.nla) + sizeof(request.op) + sizeof(request.cond) + addrlen;
+
+ struct iovec iov[] = {
+ { &request, sizeof(request) },
+ { addr, addrlen },
+ };
+
+ if (writev(mSock, iov, ARRAY_SIZE(iov)) != (int) request.nlh.nlmsg_len) {
+ return -errno;
+ }
+
+ return checkError(mSock);
+}
+
+int SockDiag::readDiagMsg(uint8_t proto, SockDiag::DumpCallback callback) {
+ char buf[kBufferSize];
+
+ ssize_t bytesread;
+ do {
+ bytesread = read(mSock, buf, sizeof(buf));
+
+ if (bytesread < 0) {
+ return -errno;
+ }
+
+ uint32_t len = bytesread;
+ for (nlmsghdr *nlh = reinterpret_cast<nlmsghdr *>(buf);
+ NLMSG_OK(nlh, len);
+ nlh = NLMSG_NEXT(nlh, len)) {
+ switch (nlh->nlmsg_type) {
+ case NLMSG_DONE:
+ callback(proto, NULL);
+ return 0;
+ case NLMSG_ERROR: {
+ nlmsgerr *err = reinterpret_cast<nlmsgerr *>(NLMSG_DATA(nlh));
+ return err->error;
+ }
+ default:
+ inet_diag_msg *msg = reinterpret_cast<inet_diag_msg *>(NLMSG_DATA(nlh));
+ callback(proto, msg);
+ }
+ }
+ } while (bytesread > 0);
+
+ return 0;
+}
+
+int SockDiag::sockDestroy(uint8_t proto, const inet_diag_msg *msg) {
+ DestroyRequest request = {
+ .nlh = {
+ .nlmsg_type = SOCK_DESTROY,
+ .nlmsg_flags = NLM_F_REQUEST,
+ },
+ .req = {
+ .sdiag_family = msg->idiag_family,
+ .sdiag_protocol = proto,
+ .idiag_states = (uint32_t) (1 << msg->idiag_state),
+ .id = msg->id,
+ },
+ };
+ request.nlh.nlmsg_len = sizeof(request);
+
+ if (write(mWriteSock, &request, sizeof(request)) < (ssize_t) sizeof(request)) {
+ return -errno;
+ }
+
+ return checkError(mWriteSock);
+}
diff --git a/server/SockDiag.h b/server/SockDiag.h
new file mode 100644
index 0000000..3b6ca8b
--- /dev/null
+++ b/server/SockDiag.h
@@ -0,0 +1,33 @@
+#include <functional>
+
+#include <linux/netlink.h>
+#include <linux/sock_diag.h>
+#include <linux/inet_diag.h>
+
+struct inet_diag_msg;
+
+class SockDiag {
+
+ public:
+ static const int kBufferSize = 4096;
+ typedef std::function<int(uint8_t proto, const inet_diag_msg *)> DumpCallback;
+
+ struct DestroyRequest {
+ nlmsghdr nlh;
+ inet_diag_req_v2 req;
+ } __attribute__((__packed__));
+
+ SockDiag() : mSock(-1), mWriteSock(-1) {}
+ bool open();
+ virtual ~SockDiag() { closeSocks(); }
+
+ int sendDumpRequest(uint8_t proto, uint8_t family, const char *addrstr);
+ int readDiagMsg(uint8_t proto, DumpCallback callback);
+ int sockDestroy(uint8_t proto, const inet_diag_msg *);
+
+ private:
+ int mSock;
+ int mWriteSock;
+ bool hasSocks() { return mSock != -1 && mWriteSock != -1; }
+ void closeSocks() { close(mSock); close(mWriteSock); mSock = mWriteSock = -1; }
+};
diff --git a/tests/Android.mk b/tests/Android.mk
new file mode 100644
index 0000000..d9659f7
--- /dev/null
+++ b/tests/Android.mk
@@ -0,0 +1,13 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := sock_diag_test
+LOCAL_CFLAGS := -Wall -Werror -Wunused-parameter
+LOCAL_C_INCLUDES := system/netd/server
+LOCAL_SRC_FILES := sock_diag_test.cpp ../server/SockDiag.cpp
+LOCAL_MODULE_TAGS := tests
+LOCAL_SHARED_LIBRARIES := liblog
+
+include $(BUILD_NATIVE_TEST)
+
diff --git a/tests/sock_diag_test.cpp b/tests/sock_diag_test.cpp
new file mode 100644
index 0000000..67af978
--- /dev/null
+++ b/tests/sock_diag_test.cpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * sock_diag_test.cpp - unit tests for SockDiag.cpp
+ */
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <linux/inet_diag.h>
+
+#include <gtest/gtest.h>
+
+#include "NetdConstants.h"
+#include "SockDiag.h"
+
+
+#define NUM_SOCKETS 500
+
+
+class SockDiagTest : public ::testing::Test {
+};
+
+uint16_t bindAndListen(int s) {
+ for (int i = 0; i < 10; i++) {
+ uint16_t port = 1024 + arc4random_uniform(0xffff - 1024);
+ sockaddr_in6 sin6 = { .sin6_family = AF_INET6, .sin6_port = htons(port) };
+ if (bind(s, (sockaddr *) &sin6, sizeof(sin6)) == 0) {
+ listen(s, 1);
+ return port;
+ }
+ }
+ close(s);
+ return 0;
+}
+
+const char *tcpStateName(uint8_t state) {
+ static const char *states[] = {
+ "???",
+ "TCP_ESTABLISHED",
+ "TCP_SYN_SENT",
+ "TCP_SYN_RECV",
+ "TCP_FIN_WAIT1",
+ "TCP_FIN_WAIT2",
+ "TCP_TIME_WAIT",
+ "TCP_CLOSE",
+ "TCP_CLOSE_WAIT",
+ "TCP_LAST_ACK",
+ "TCP_LISTEN",
+ "TCP_CLOSING",
+ "TCP_NEW_SYN_RECV",
+ };
+ return states[(state < ARRAY_SIZE(states)) ? state : 0];
+}
+
+TEST_F(SockDiagTest, TestDump) {
+ int v4socket = socket(AF_INET, SOCK_STREAM, 0);
+ int v6socket = socket(AF_INET6, SOCK_STREAM, 0);
+ int listensocket = socket(AF_INET6, SOCK_STREAM, 0);
+ ASSERT_NE(-1, v4socket) << "Failed to open IPv4 socket";
+ ASSERT_NE(-1, v6socket) << "Failed to open IPv6 socket";
+ ASSERT_NE(-1, listensocket) << "Failed to open listen socket";
+
+ uint16_t port = bindAndListen(listensocket);
+ ASSERT_NE(0, port) << "Can't bind to server port";
+
+ // Connect to loopback.
+ sockaddr_in server4 = { .sin_family = AF_INET, .sin_port = htons(port) };
+ sockaddr_in6 server6 = { .sin6_family = AF_INET6, .sin6_port = htons(port) };
+ ASSERT_EQ(0, connect(v4socket, (sockaddr *) &server4, sizeof(server4)))
+ << "IPv4 connect failed: " << strerror(errno);
+ ASSERT_EQ(0, connect(v6socket, (sockaddr *) &server6, sizeof(server6)))
+ << "IPv6 connect failed: " << strerror(errno);
+
+ sockaddr_in6 client46, client6;
+ socklen_t clientlen = std::max(sizeof(client46), sizeof(client6));
+ int accepted4 = accept(listensocket, (sockaddr *) &client46, &clientlen);
+ int accepted6 = accept(listensocket, (sockaddr *) &client6, &clientlen);
+ ASSERT_NE(-1, accepted4);
+ ASSERT_NE(-1, accepted6);
+
+ int v4SocketsSeen = 0;
+ bool seenclient46 = false;
+ bool seenNull = false;
+ char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
+
+ fprintf(stderr, "Ports:\n server=%d. client46=%d, client6=%d\n",
+ port, ntohs(client46.sin6_port), ntohs(client6.sin6_port));
+
+ auto checkIPv4Dump = [&] (uint8_t /* proto */, const inet_diag_msg *msg) {
+ if (msg == nullptr) {
+ EXPECT_FALSE(seenNull);
+ seenNull = true;
+ return 0;
+ }
+ EXPECT_EQ(htonl(INADDR_LOOPBACK), msg->id.idiag_src[0]);
+ v4SocketsSeen++;
+ seenclient46 |= (msg->id.idiag_sport == client46.sin6_port);
+ inet_ntop(AF_INET, msg->id.idiag_src, src, sizeof(src));
+ inet_ntop(AF_INET, msg->id.idiag_src, dst, sizeof(dst));
+ fprintf(stderr, " v4 %s:%d -> %s:%d %s\n",
+ src, htons(msg->id.idiag_sport),
+ dst, htons(msg->id.idiag_dport),
+ tcpStateName(msg->idiag_state));
+ return 0;
+ };
+
+ int v6SocketsSeen = 0;
+ bool seenClient6 = false, seenServer46 = false, seenServer6 = false;
+
+ auto checkIPv6Dump = [&] (uint8_t /* proto */, const inet_diag_msg *msg) {
+ if (msg == nullptr) {
+ EXPECT_FALSE(seenNull);
+ seenNull = true;
+ return 0;
+ }
+ struct in6_addr *saddr = (struct in6_addr *) msg->id.idiag_src;
+ EXPECT_TRUE(
+ IN6_IS_ADDR_LOOPBACK(saddr) ||
+ (IN6_IS_ADDR_V4MAPPED(saddr) && saddr->s6_addr32[3] == htonl(INADDR_LOOPBACK)));
+ v6SocketsSeen++;
+ seenClient6 |= (msg->id.idiag_sport == client6.sin6_port);
+ seenServer46 |= (msg->id.idiag_sport == htons(port));
+ seenServer6 |= (msg->id.idiag_sport == htons(port));
+ inet_ntop(AF_INET6, msg->id.idiag_src, src, sizeof(src));
+ inet_ntop(AF_INET6, msg->id.idiag_src, dst, sizeof(dst));
+ fprintf(stderr, " v6 [%s]:%d -> [%s]:%d %s\n",
+ src, htons(msg->id.idiag_sport),
+ dst, htons(msg->id.idiag_dport),
+ tcpStateName(msg->idiag_state));
+ return 0;
+ };
+
+ SockDiag sd;
+ ASSERT_TRUE(sd.open()) << "Failed to open SOCK_DIAG socket";
+
+ seenNull = false;
+ int ret = sd.sendDumpRequest(IPPROTO_TCP, AF_INET, "127.0.0.1");
+ ASSERT_EQ(0, ret) << "Failed to send IPv4 dump request: " << strerror(-ret);
+ fprintf(stderr, "Sent IPv4 dump\n");
+ sd.readDiagMsg(IPPROTO_TCP, checkIPv4Dump);
+ EXPECT_GE(v4SocketsSeen, 1);
+ EXPECT_TRUE(seenclient46);
+ EXPECT_FALSE(seenServer46);
+
+ seenNull = false;
+ ret = sd.sendDumpRequest(IPPROTO_TCP, AF_INET6, "127.0.0.1");
+ ASSERT_EQ(0, ret) << "Failed to send mapped dump request: " << strerror(-ret);
+ fprintf(stderr, "Sent mapped dump\n");
+ sd.readDiagMsg(IPPROTO_TCP, checkIPv6Dump);
+ EXPECT_TRUE(seenServer46);
+
+ seenNull = false;
+ ret = sd.sendDumpRequest(IPPROTO_TCP, AF_INET6, "::1");
+ ASSERT_EQ(0, ret) << "Failed to send IPv6 dump request: " << strerror(-ret);
+ fprintf(stderr, "Sent IPv6 dump\n");
+
+ sd.readDiagMsg(IPPROTO_TCP, checkIPv6Dump);
+ EXPECT_GE(v6SocketsSeen, 1);
+ EXPECT_TRUE(seenClient6);
+ EXPECT_TRUE(seenServer6);
+
+ close(v4socket);
+ close(v6socket);
+ close(listensocket);
+ close(accepted4);
+ close(accepted6);
+}
+
+TEST_F(SockDiagTest, TestMicroBenchmark) {
+ fprintf(stderr, "Benchmarking closing %d sockets\n", NUM_SOCKETS);
+
+ int listensocket = socket(AF_INET6, SOCK_STREAM, 0);
+ ASSERT_NE(-1, listensocket) << "Failed to open listen socket";
+
+ uint16_t port = bindAndListen(listensocket);
+ ASSERT_NE(0, port) << "Can't bind to server port";
+ sockaddr_in6 server = { .sin6_family = AF_INET6, .sin6_port = htons(port) };
+
+ using ms = std::chrono::duration<float, std::ratio<1, 1000>>;
+
+ int clientsockets[NUM_SOCKETS], serversockets[NUM_SOCKETS];
+ uint16_t clientports[NUM_SOCKETS];
+ sockaddr_in6 client;
+ socklen_t clientlen;
+
+ auto start = std::chrono::steady_clock::now();
+ for (int i = 0; i < NUM_SOCKETS; i++) {
+ int s = socket(AF_INET6, SOCK_STREAM, 0);
+ clientlen = sizeof(client);
+ ASSERT_EQ(0, connect(s, (sockaddr *) &server, sizeof(server)))
+ << "Connecting socket " << i << " failed " << strerror(errno);
+ serversockets[i] = accept(listensocket, (sockaddr *) &client, &clientlen);
+ ASSERT_NE(-1, serversockets[i])
+ << "Accepting socket " << i << " failed " << strerror(errno);
+ clientports[i] = client.sin6_port;
+ clientsockets[i] = s;
+ }
+ fprintf(stderr, " Connecting: %6.1f ms\n",
+ std::chrono::duration_cast<ms>(std::chrono::steady_clock::now() - start).count());
+
+ SockDiag sd;
+ ASSERT_TRUE(sd.open()) << "Failed to open SOCK_DIAG socket";
+
+ int ret = sd.sendDumpRequest(IPPROTO_TCP, AF_INET6, "::1");
+ ASSERT_EQ(0, ret) << "Failed to send IPv6 dump request: " << strerror(-ret);
+
+ auto closeMySockets = [&] (uint8_t proto, const inet_diag_msg *msg) {
+ if (msg && msg->id.idiag_dport == htons(port)) {
+ return sd.sockDestroy(proto, msg);
+ }
+ return 0;
+ };
+
+ start = std::chrono::steady_clock::now();
+ sd.readDiagMsg(IPPROTO_TCP, closeMySockets);
+ fprintf(stderr, " Destroying: %6.1f ms\n",
+ std::chrono::duration_cast<ms>(std::chrono::steady_clock::now() - start).count());
+
+ int err;
+ start = std::chrono::steady_clock::now();
+ for (int i = 0; i < NUM_SOCKETS; i++) {
+ ret = send(clientsockets[i], "foo", sizeof("foo"), 0);
+ err = errno;
+ EXPECT_EQ(-1, ret) << "Client socket " << i << " not closed";
+ if (ret == -1) {
+ EXPECT_EQ(ECONNABORTED, errno)
+ << "Client socket: unexpected error: " << strerror(errno);
+ }
+
+ // Check that the server sockets have been closed too (because closing the client sockets
+ // sends RSTs).
+ ret = send(serversockets[i], "foo", sizeof("foo"), 0);
+ err = errno;
+ EXPECT_EQ(-1, ret) << "Server socket " << i << " not closed";
+ if (ret == -1) {
+ EXPECT_EQ(ECONNRESET, errno)
+ << "Server socket: unexpected error: " << strerror(errno);
+ }
+ }
+ fprintf(stderr, " Verifying: %6.1f ms\n",
+ std::chrono::duration_cast<ms>(std::chrono::steady_clock::now() - start).count());
+
+
+
+ start = std::chrono::steady_clock::now();
+ for (int i = 0; i < NUM_SOCKETS; i++) {
+ close(clientsockets[i]);
+ close(serversockets[i]);
+ }
+ fprintf(stderr, " Closing: %6.1f ms\n",
+ std::chrono::duration_cast<ms>(std::chrono::steady_clock::now() - start).count());
+
+ close(listensocket);
+}