blob: a3916b6f8aa1204da123df621a2add3c770962d1 [file] [log] [blame]
Maciej Żenczykowskib70da762019-01-28 15:20:48 -08001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Maciej Żenczykowskieec72082020-02-04 23:29:41 -080017#include "OffloadUtils.h"
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080018
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -070019#include <arpa/inet.h>
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080020#include <linux/if.h>
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080021#include <linux/netlink.h>
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -070022#include <linux/pkt_cls.h>
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -080023#include <linux/pkt_sched.h>
24#include <linux/rtnetlink.h>
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080025#include <sys/ioctl.h>
26#include <sys/socket.h>
27#include <sys/types.h>
28#include <unistd.h>
29
Maciej Żenczykowskieec72082020-02-04 23:29:41 -080030#define LOG_TAG "OffloadUtils"
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080031#include <log/log.h>
32
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080033#include "NetlinkCommands.h"
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080034#include "android-base/unique_fd.h"
35
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080036namespace android {
37namespace net {
38
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080039int hardwareAddressType(const std::string& interface) {
40 base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
41
42 if (ufd < 0) {
43 const int err = errno;
44 ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
45 return -err;
46 };
47
48 struct ifreq ifr = {};
49 // We use strncpy() instead of strlcpy() since kernel has to be able
50 // to handle non-zero terminated junk passed in by userspace anyway,
51 // and this way too long interface names (more than IFNAMSIZ-1 = 15
52 // characters plus terminating NULL) will not get truncated to 15
53 // characters and zero-terminated and thus potentially erroneously
54 // match a truncated interface if one were to exist.
55 strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
56
57 if (ioctl(ufd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) return -errno;
58
59 return ifr.ifr_hwaddr.sa_family;
60}
61
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080062// TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
63int openNetlinkSocket(void) {
64 base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
65 if (fd == -1) {
66 const int err = errno;
67 ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
68 return -err;
69 }
70
71 int rv;
72
73 const int on = 1;
74 rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
75 if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
76
77 // this is needed to get sane strace netlink parsing, it allocates the pid
78 rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
79 if (rv) {
80 const int err = errno;
81 ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
82 return -err;
83 }
84
85 // we do not want to receive messages from anyone besides the kernel
86 rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
87 if (rv) {
88 const int err = errno;
89 ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
90 return -err;
91 }
92
93 return fd.release();
94}
95
Maciej Żenczykowski992a51d2019-02-11 18:06:56 -080096// TODO: merge with //system/netd/server/SockDiag.cpp:checkError(fd)
97int processNetlinkResponse(int fd) {
98 struct {
99 nlmsghdr h;
100 nlmsgerr e;
101 char buf[256];
102 } resp = {};
103
104 const int rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
105
106 if (rv == -1) {
107 const int err = errno;
108 ALOGE("recv() failed");
109 return -err;
110 }
111
112 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
113 ALOGE("recv() returned short packet: %d", rv);
114 return -EMSGSIZE;
115 }
116
117 if (resp.h.nlmsg_len != (unsigned)rv) {
118 ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
119 return -EBADMSG;
120 }
121
122 if (resp.h.nlmsg_type != NLMSG_ERROR) {
123 ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
124 return -EBADMSG;
125 }
126
127 return resp.e.error; // returns 0 on success
128}
129
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -0800130// ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
131// REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
132// DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800133static int doTcQdiscClsact(int fd, int ifIndex, __u16 nlMsgType, __u16 nlMsgFlags) {
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -0800134 // This is the name of the qdisc we are attaching.
135 // Some hoop jumping to make this compile time constant with known size,
136 // so that the structure declaration is well defined at compile time.
137#define CLSACT "clsact"
138 static const char clsact[] = CLSACT;
139 // sizeof() includes the terminating NULL
140#define ASCIIZ_LEN_CLSACT sizeof(clsact)
141
142 const struct {
143 nlmsghdr n;
144 tcmsg t;
145 struct {
146 nlattr attr;
147 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
148 } kind;
149 } req = {
150 .n =
151 {
152 .nlmsg_len = sizeof(req),
153 .nlmsg_type = nlMsgType,
154 .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
155 },
156 .t =
157 {
158 .tcm_family = AF_UNSPEC,
159 .tcm_ifindex = ifIndex,
160 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
161 .tcm_parent = TC_H_CLSACT,
162 },
163 .kind =
164 {
165 .attr =
166 {
167 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
168 .nla_type = TCA_KIND,
169 },
170 .str = CLSACT,
171 },
172 };
173#undef ASCIIZ_LEN_CLSACT
174#undef CLSACT
175
176 const int rv = send(fd, &req, sizeof(req), 0);
177 if (rv == -1) return -errno;
178 if (rv != sizeof(req)) return -EMSGSIZE;
179
180 return processNetlinkResponse(fd);
181}
182
183int tcQdiscAddDevClsact(int fd, int ifIndex) {
184 return doTcQdiscClsact(fd, ifIndex, RTM_NEWQDISC, NLM_F_EXCL | NLM_F_CREATE);
185}
186
187int tcQdiscReplaceDevClsact(int fd, int ifIndex) {
188 return doTcQdiscClsact(fd, ifIndex, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_REPLACE);
189}
190
191int tcQdiscDelDevClsact(int fd, int ifIndex) {
192 return doTcQdiscClsact(fd, ifIndex, RTM_DELQDISC, 0);
193}
194
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800195// tc filter add dev .. in/egress prio 1 protocol ipv6/ip bpf object-pinned /sys/fs/bpf/...
196// direct-action
197static int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet, bool ingress,
198 bool ipv6) {
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700199 // The priority doesn't matter until we actually start attaching multiple
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800200 // things to the same interface's in/egress point.
201 const __u32 prio = 1;
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700202
203 // This is the name of the filter we're attaching (ie. this is the 'bpf'
204 // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
205 //
206 // We go through some hoops in order to make this compile time constants
207 // so that we can define the struct further down the function with the
208 // field for this sized correctly already during the build.
209#define BPF "bpf"
210 const char bpf[] = BPF;
211 // sizeof() includes the terminating NULL
212#define ASCIIZ_LEN_BPF sizeof(bpf)
213
214 // This is to replicate program name suffix used by 'tc' Linux cli
215 // when it attaches programs.
216#define FSOBJ_SUFFIX ":[*fsobj]"
217
218 // This macro expands (from header files) to:
219 // prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800220 // and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700221 // (also compatible with anything that has 0 size L2 header)
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800222#define NAME_RX_RAWIP CLAT_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX
223 const char name_rx_rawip[] = NAME_RX_RAWIP;
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700224
225 // This macro expands (from header files) to:
226 // prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800227 // and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700228 // (also compatible with anything that has standard ethernet header)
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800229#define NAME_RX_ETHER CLAT_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX
230 const char name_rx_ether[] = NAME_RX_ETHER;
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700231
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800232 // This macro expands (from header files) to:
233 // prog_clatd_schedcls_egress_clat_rawip:[*fsobj]
234 // and is the name of the pinned egress ebpf program for ARPHRD_RAWIP interfaces.
235 // (also compatible with anything that has 0 size L2 header)
236#define NAME_TX_RAWIP CLAT_EGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX
237 const char name_tx_rawip[] = NAME_TX_RAWIP;
238
239 // This macro expands (from header files) to:
240 // prog_clatd_schedcls_egress_clat_ether:[*fsobj]
241 // and is the name of the pinned egress ebpf program for ARPHRD_ETHER interfaces.
242 // (also compatible with anything that has standard ethernet header)
243#define NAME_TX_ETHER CLAT_EGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX
244 const char name_tx_ether[] = NAME_TX_ETHER;
245
246 // The actual name we'll use is determined at run time via 'ethernet' and 'ingress'
247 // booleans. We need to compile time allocate enough space in the struct
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700248 // hence this macro magic to make sure we have enough space for either
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800249 // possibility. In practice some of these are actually the same size.
250#define ASCIIZ_MAXLEN_NAME_RX \
251 ((sizeof(name_rx_rawip) > sizeof(name_rx_ether)) ? sizeof(name_rx_rawip) \
252 : sizeof(name_rx_ether))
253#define ASCIIZ_MAXLEN_NAME_TX \
254 ((sizeof(name_tx_rawip) > sizeof(name_tx_ether)) ? sizeof(name_tx_rawip) \
255 : sizeof(name_tx_ether))
256#define ASCIIZ_MAXLEN_NAME \
257 ((ASCIIZ_MAXLEN_NAME_RX > ASCIIZ_MAXLEN_NAME_TX) ? ASCIIZ_MAXLEN_NAME_RX \
258 : ASCIIZ_MAXLEN_NAME_TX)
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700259
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800260 // These are not compile time constants: NAME is used in strncpy below
261#define NAME_RX (ethernet ? NAME_RX_ETHER : NAME_RX_RAWIP)
262#define NAME_TX (ethernet ? NAME_TX_ETHER : NAME_TX_RAWIP)
263#define NAME (ingress ? NAME_RX : NAME_TX)
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700264
265 struct {
266 nlmsghdr n;
267 tcmsg t;
268 struct {
269 nlattr attr;
270 char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
271 } kind;
272 struct {
273 nlattr attr;
274 struct {
275 nlattr attr;
276 __u32 u32;
277 } fd;
278 struct {
279 nlattr attr;
280 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
281 } name;
282 struct {
283 nlattr attr;
284 __u32 u32;
285 } flags;
286 } options;
287 } req = {
288 .n =
289 {
290 .nlmsg_len = sizeof(req),
291 .nlmsg_type = RTM_NEWTFILTER,
292 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
293 },
294 .t =
295 {
296 .tcm_family = AF_UNSPEC,
297 .tcm_ifindex = ifIndex,
298 .tcm_handle = TC_H_UNSPEC,
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800299 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
300 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
301 .tcm_info = (prio << 16) |
302 (__u32)(ipv6 ? htons(ETH_P_IPV6) : htons(ETH_P_IP)),
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700303 },
304 .kind =
305 {
306 .attr =
307 {
308 .nla_len = sizeof(req.kind),
309 .nla_type = TCA_KIND,
310 },
311 .str = BPF,
312 },
313 .options =
314 {
315 .attr =
316 {
317 .nla_len = sizeof(req.options),
318 .nla_type = TCA_OPTIONS,
319 },
320 .fd =
321 {
322 .attr =
323 {
324 .nla_len = sizeof(req.options.fd),
325 .nla_type = TCA_BPF_FD,
326 },
327 .u32 = static_cast<__u32>(bpfFd),
328 },
329 .name =
330 {
331 .attr =
332 {
333 .nla_len = sizeof(req.options.name),
334 .nla_type = TCA_BPF_NAME,
335 },
336 // Visible via 'tc filter show', but
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800337 // is overwritten by strncpy below
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700338 .str = "placeholder",
339 },
340 .flags =
341 {
342 .attr =
343 {
344 .nla_len = sizeof(req.options.flags),
345 .nla_type = TCA_BPF_FLAGS,
346 },
347 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
348 },
349 },
350 };
351
352 strncpy(req.options.name.str, NAME, sizeof(req.options.name.str));
353
354#undef NAME
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800355#undef NAME_TX
356#undef NAME_RX
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700357#undef ASCIIZ_MAXLEN_NAME
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800358#undef ASCIIZ_MAXLEN_NAME_TX
359#undef ASCIIZ_MAXLEN_NAME_RX
360#undef NAME_TX_ETHER
361#undef NAME_TX_RAWIP
362#undef NAME_RX_ETHER
363#undef NAME_RX_RAWIP
364#undef FSOBJ_SUFFIX
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700365#undef ASCIIZ_LEN_BPF
366#undef BPF
367
368 const int rv = send(fd, &req, sizeof(req), 0);
369 if (rv == -1) return -errno;
370 if (rv != sizeof(req)) return -EMSGSIZE;
371
372 return processNetlinkResponse(fd);
373}
374
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800375// tc filter add dev .. ingress prio 1 protocol ipv6 bpf object-pinned /sys/fs/bpf/... direct-action
376int tcFilterAddDevIngressBpf(int fd, int ifIndex, int bpfFd, bool ethernet) {
377 return tcFilterAddDevBpf(fd, ifIndex, bpfFd, ethernet, /*ingress*/ true, /*ipv6*/ true);
378}
379
380// tc filter add dev .. egress prio 1 protocol ip bpf object-pinned /sys/fs/bpf/... direct-action
381int tcFilterAddDevEgressBpf(int fd, int ifIndex, int bpfFd, bool ethernet) {
382 return tcFilterAddDevBpf(fd, ifIndex, bpfFd, ethernet, /*ingress*/ false, /*ipv6*/ false);
383}
384
Maciej Żenczykowskif1247382020-02-05 00:44:14 -0800385// tc filter del dev .. in/egress prio .. protocol ..
386int tcFilterDelDev(int fd, int ifIndex, bool ingress, uint16_t prio, uint16_t proto) {
387 struct {
388 nlmsghdr n;
389 tcmsg t;
390 } req = {
391 .n =
392 {
393 .nlmsg_len = sizeof(req),
394 .nlmsg_type = RTM_DELTFILTER,
395 .nlmsg_flags = NETLINK_REQUEST_FLAGS,
396 },
397 .t =
398 {
399 .tcm_family = AF_UNSPEC,
400 .tcm_ifindex = ifIndex,
401 .tcm_handle = TC_H_UNSPEC,
402 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
403 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
404 .tcm_info = static_cast<__u32>((prio << 16) | htons(proto)),
405 },
406 };
407
408 const int rv = send(fd, &req, sizeof(req), 0);
409 if (rv == -1) return -errno;
410 if (rv != sizeof(req)) return -EMSGSIZE;
411
412 return processNetlinkResponse(fd);
413}
414
Maciej Żenczykowskib70da762019-01-28 15:20:48 -0800415} // namespace net
416} // namespace android