Use native netlink code instead of /sbin/ip to manipulate routes

Shelling out to /sbin/ip is slow, and more importantly it does
not preserve the error messages returned by the kernel when
adding or deleting a route fails.  Instead, use netlink directly.

This change does not yet pass the errors back to CommandListener;
that is done in the next change in the series.

Change-Id: I5ad3c8583580857be6386a620ff5c4f3872d685b
diff --git a/server/NetdConstants.cpp b/server/NetdConstants.cpp
index ea31410..4823c91 100644
--- a/server/NetdConstants.cpp
+++ b/server/NetdConstants.cpp
@@ -15,6 +15,9 @@
  */
 
 #include <fcntl.h>
+#include <netdb.h>
+#include <netinet/in.h>
+#include <stdlib.h>
 #include <string.h>
 #include <sys/wait.h>
 #include <ctype.h>
@@ -170,3 +173,78 @@
 
     return true;
 }
+
+int parsePrefix(const char *prefix, uint8_t *family, void *address, int size, uint8_t *prefixlen) {
+    if (!prefix || !family || !address || !prefixlen) {
+        return -EFAULT;
+    }
+
+    // Find the '/' separating address from prefix length.
+    const char *slash = strchr(prefix, '/');
+    const char *prefixlenString = slash + 1;
+    if (!slash || !*prefixlenString)
+        return -EINVAL;
+
+    // Convert the prefix length to a uint8_t.
+    char *endptr;
+    unsigned templen;
+    templen = strtoul(prefixlenString, &endptr, 10);
+    if (*endptr || templen > 255) {
+        return -EINVAL;
+    }
+    *prefixlen = templen;
+
+    // Copy the address part of the prefix to a local buffer. We have to copy
+    // because inet_pton and getaddrinfo operate on null-terminated address
+    // strings, but prefix is const and has '/' after the address.
+    std::string addressString(prefix, slash - prefix);
+
+    // Parse the address.
+    addrinfo *res;
+    addrinfo hints = {
+        .ai_flags = AI_NUMERICHOST,
+    };
+    int ret = getaddrinfo(addressString.c_str(), NULL, &hints, &res);
+    if (ret || !res) {
+        return -EINVAL;  // getaddrinfo return values are not errno values.
+    }
+
+    // Convert the address string to raw address bytes.
+    void *rawAddress;
+    int rawLength;
+    switch (res[0].ai_family) {
+        case AF_INET: {
+            if (*prefixlen > 32) {
+                return -EINVAL;
+            }
+            sockaddr_in *sin = (sockaddr_in *) res[0].ai_addr;
+            rawAddress = &sin->sin_addr;
+            rawLength = 4;
+            break;
+        }
+        case AF_INET6: {
+            if (*prefixlen > 128) {
+                return -EINVAL;
+            }
+            sockaddr_in6 *sin6 = (sockaddr_in6 *) res[0].ai_addr;
+            rawAddress = &sin6->sin6_addr;
+            rawLength = 16;
+            break;
+        }
+        default: {
+            freeaddrinfo(res);
+            return -EAFNOSUPPORT;
+        }
+    }
+
+    if (rawLength > size) {
+        freeaddrinfo(res);
+        return -ENOSPC;
+    }
+
+    *family = res[0].ai_family;
+    memcpy(address, rawAddress, rawLength);
+    freeaddrinfo(res);
+
+    return rawLength;
+}
diff --git a/server/NetdConstants.h b/server/NetdConstants.h
index 9b85d16..a05b6d6 100644
--- a/server/NetdConstants.h
+++ b/server/NetdConstants.h
@@ -38,6 +38,7 @@
 int writeFile(const char *path, const char *value, int size);
 int readFile(const char *path, char *buf, int *sizep);
 bool isIfaceName(const char *name);
+int parsePrefix(const char *prefix, uint8_t *family, void *address, int size, uint8_t *prefixlen);
 
 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(*(a)))
 
diff --git a/server/RouteController.cpp b/server/RouteController.cpp
index b44a31a..3c16fa3 100644
--- a/server/RouteController.cpp
+++ b/server/RouteController.cpp
@@ -19,10 +19,21 @@
 #include "Fwmark.h"
 #include "NetdConstants.h"
 
+#include <arpa/inet.h>
+#include <errno.h>
+#include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <logwrap/logwrap.h>
 #include <map>
+#include <netinet/in.h>
 #include <net/if.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+// Avoids "non-constant-expression cannot be narrowed from type 'unsigned int' to 'unsigned short'"
+// warnings when using RTA_LENGTH(x) inside static initializers (even when x is already uint16_t).
+#define U16_RTA_LENGTH(x) static_cast<uint16_t>(RTA_LENGTH((x)))
 
 namespace {
 
@@ -109,30 +120,106 @@
     return true;
 }
 
-bool runIpRouteCommand(const char* action, uint32_t table, const char* interface,
-                       const char* destination, const char* nexthop) {
-    char tableString[UINT32_STRLEN];
-    snprintf(tableString, sizeof(tableString), "%u", table);
+// Adds or deletes an IPv4 or IPv6 route.
+// Returns 0 on success or negative errno on failure.
+int modifyIpRoute(uint16_t action, uint32_t table, const char* interface, const char* destination,
+                  const char* nexthop) {
+    // At least the destination must be non-null.
+    if (!destination) {
+        return -EFAULT;
+    }
 
-    int argc = 0;
-    const char* argv[16];
+    // Parse the prefix.
+    uint8_t rawAddress[sizeof(in6_addr)];
+    uint8_t family, prefixLength;
+    int rawLength = parsePrefix(destination, &family, rawAddress, sizeof(rawAddress),
+                                &prefixLength);
+    if (rawLength < 0) {
+        return rawLength;
+    }
 
-    argv[argc++] = IP_PATH;
-    argv[argc++] = "route";
-    argv[argc++] = action;
-    argv[argc++] = "table";
-    argv[argc++] = tableString;
-    if (destination) {
-        argv[argc++] = destination;
-        argv[argc++] = "dev";
-        argv[argc++] = interface;
-        if (nexthop) {
-            argv[argc++] = "via";
-            argv[argc++] = nexthop;
+    if (static_cast<size_t>(rawLength) > sizeof(rawAddress)) {
+        return -ENOBUFS;  // Cannot happen; parsePrefix only supports IPv4 and IPv6.
+    }
+
+    // If an interface was specified, find the ifindex.
+    uint32_t ifindex;
+    if (interface) {
+        ifindex = if_nametoindex(interface);
+        if (!ifindex) {
+            return -ENODEV;
         }
     }
 
-    return !android_fork_execvp(argc, const_cast<char**>(argv), NULL, false, false);
+    // If a nexthop was specified, parse it as the same family as the prefix.
+    uint8_t rawNexthop[sizeof(in6_addr)];
+    if (nexthop && !inet_pton(family, nexthop, rawNexthop)) {
+        return -EINVAL;
+    }
+
+    // Assemble a netlink request and put it in an array of iovec structures.
+    nlmsghdr nlmsg = {
+        .nlmsg_type = action,
+        .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+    };
+    rtmsg rtmsg = {
+        .rtm_protocol = RTPROT_STATIC,
+        .rtm_type = RTN_UNICAST,
+        .rtm_family = family,
+        .rtm_dst_len = prefixLength,
+    };
+    rtattr rta_table = { U16_RTA_LENGTH(sizeof(table)), RTA_TABLE };
+    rtattr rta_oif = { U16_RTA_LENGTH(sizeof(ifindex)), RTA_OIF };
+    rtattr rta_dst = { U16_RTA_LENGTH(rawLength), RTA_DST };
+    rtattr rta_gateway = { U16_RTA_LENGTH(rawLength), RTA_GATEWAY };
+    if (action == RTM_NEWROUTE) {
+        nlmsg.nlmsg_flags |= (NLM_F_CREATE | NLM_F_EXCL);
+    }
+
+    iovec iov[] = {
+        { &nlmsg,        sizeof(nlmsg) },
+        { &rtmsg,        sizeof(rtmsg) },
+        { &rta_table,    sizeof(rta_table) },
+        { &table,        sizeof(table) },
+        { &rta_dst,      sizeof(rta_dst) },
+        { rawAddress,    static_cast<size_t>(rawLength) },
+        { &rta_oif,      interface ? sizeof(rta_oif) : 0 },
+        { &ifindex,      interface ? sizeof(interface) : 0 },
+        { &rta_gateway,  nexthop ? sizeof(rta_gateway) : 0 },
+        { rawNexthop,    nexthop ? static_cast<size_t>(rawLength) : 0 },
+    };
+    int iovlen = ARRAY_SIZE(iov);
+
+    for (int i = 0; i < iovlen; ++i) {
+        nlmsg.nlmsg_len += iov[i].iov_len;
+    }
+
+    int ret;
+    struct {
+        nlmsghdr msg;
+        nlmsgerr err;
+    } response;
+
+    sockaddr_nl kernel = {AF_NETLINK, 0, 0, 0};
+    int sock = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+    if (sock != -1 &&
+            connect(sock, reinterpret_cast<sockaddr *>(&kernel), sizeof(kernel)) != -1 &&
+            writev(sock, iov, iovlen) != -1 &&
+            (ret = recv(sock, &response, sizeof(response), 0)) != -1) {
+        if (ret == sizeof(response)) {
+            ret = response.err.error;  // Netlink errors are negative errno.
+        } else {
+            ret = -EBADMSG;
+        }
+    } else {
+        ret = -errno;
+    }
+
+    if (sock != -1) {
+        close(sock);
+    }
+
+    return ret;
 }
 
 bool modifyPerNetworkRules(unsigned netId, const char* interface, Permission permission, bool add,
@@ -223,7 +310,7 @@
 }
 
 bool modifyRoute(const char* interface, const char* destination, const char* nexthop,
-                 const char* action, RouteController::TableType tableType, unsigned /* uid */) {
+                 int action, RouteController::TableType tableType, unsigned /* uid */) {
     uint32_t table = 0;
     switch (tableType) {
         case RouteController::INTERFACE: {
@@ -245,7 +332,7 @@
         return false;
     }
 
-    if (!runIpRouteCommand(action, table, interface, destination, nexthop)) {
+    if (modifyIpRoute(action, table, interface, destination, nexthop)) {
         return false;
     }
 
@@ -258,7 +345,7 @@
     // them based on the return status of the 'ip' command. Fix this situation by ignoring errors
     // only when action == ADD && error == EEXIST.
     if (!nexthop && !strchr(destination, ':')) {
-        runIpRouteCommand(action, RT_TABLE_MAIN, interface, destination, NULL);
+        modifyIpRoute(action, RT_TABLE_MAIN, interface, destination, NULL);
     }
 
     return true;
@@ -365,10 +452,10 @@
 
 bool RouteController::addRoute(const char* interface, const char* destination,
                                const char* nexthop, TableType tableType, unsigned uid) {
-    return modifyRoute(interface, destination, nexthop, ADD, tableType, uid);
+    return modifyRoute(interface, destination, nexthop, RTM_NEWROUTE, tableType, uid);
 }
 
 bool RouteController::removeRoute(const char* interface, const char* destination,
                                   const char* nexthop, TableType tableType, unsigned uid) {
-    return modifyRoute(interface, destination, nexthop, DEL, tableType, uid);
+    return modifyRoute(interface, destination, nexthop, RTM_DELROUTE, tableType, uid);
 }