blob: ea4341ce15111fe6b0066da25625a92e58334af0 [file] [log] [blame]
Maciej Żenczykowskib70da762019-01-28 15:20:48 -08001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Maciej Żenczykowskieec72082020-02-04 23:29:41 -080017#include "OffloadUtils.h"
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080018
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -070019#include <arpa/inet.h>
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080020#include <linux/if.h>
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080021#include <linux/netlink.h>
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -070022#include <linux/pkt_cls.h>
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -080023#include <linux/pkt_sched.h>
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080024#include <sys/ioctl.h>
25#include <sys/socket.h>
26#include <sys/types.h>
27#include <unistd.h>
28
Maciej Żenczykowskieec72082020-02-04 23:29:41 -080029#define LOG_TAG "OffloadUtils"
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080030#include <log/log.h>
31
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080032#include "NetlinkCommands.h"
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080033#include "android-base/unique_fd.h"
34
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080035namespace android {
36namespace net {
37
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080038int hardwareAddressType(const std::string& interface) {
39 base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
40
41 if (ufd < 0) {
42 const int err = errno;
43 ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
44 return -err;
45 };
46
47 struct ifreq ifr = {};
48 // We use strncpy() instead of strlcpy() since kernel has to be able
49 // to handle non-zero terminated junk passed in by userspace anyway,
50 // and this way too long interface names (more than IFNAMSIZ-1 = 15
51 // characters plus terminating NULL) will not get truncated to 15
52 // characters and zero-terminated and thus potentially erroneously
53 // match a truncated interface if one were to exist.
54 strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
55
56 if (ioctl(ufd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) return -errno;
57
58 return ifr.ifr_hwaddr.sa_family;
59}
60
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080061// TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
62int openNetlinkSocket(void) {
63 base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
64 if (fd == -1) {
65 const int err = errno;
66 ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
67 return -err;
68 }
69
70 int rv;
71
72 const int on = 1;
73 rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
74 if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
75
76 // this is needed to get sane strace netlink parsing, it allocates the pid
77 rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
78 if (rv) {
79 const int err = errno;
80 ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
81 return -err;
82 }
83
84 // we do not want to receive messages from anyone besides the kernel
85 rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
86 if (rv) {
87 const int err = errno;
88 ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
89 return -err;
90 }
91
92 return fd.release();
93}
94
Maciej Żenczykowski992a51d2019-02-11 18:06:56 -080095// TODO: merge with //system/netd/server/SockDiag.cpp:checkError(fd)
96int processNetlinkResponse(int fd) {
97 struct {
98 nlmsghdr h;
99 nlmsgerr e;
100 char buf[256];
101 } resp = {};
102
103 const int rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
104
105 if (rv == -1) {
106 const int err = errno;
107 ALOGE("recv() failed");
108 return -err;
109 }
110
111 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
112 ALOGE("recv() returned short packet: %d", rv);
113 return -EMSGSIZE;
114 }
115
116 if (resp.h.nlmsg_len != (unsigned)rv) {
117 ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
118 return -EBADMSG;
119 }
120
121 if (resp.h.nlmsg_type != NLMSG_ERROR) {
122 ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
123 return -EBADMSG;
124 }
125
126 return resp.e.error; // returns 0 on success
127}
128
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -0800129// ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
130// REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
131// DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
Maciej Żenczykowskib3e69d62020-02-05 02:44:16 -0800132int doTcQdiscClsact(int fd, int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -0800133 // This is the name of the qdisc we are attaching.
134 // Some hoop jumping to make this compile time constant with known size,
135 // so that the structure declaration is well defined at compile time.
136#define CLSACT "clsact"
137 static const char clsact[] = CLSACT;
138 // sizeof() includes the terminating NULL
139#define ASCIIZ_LEN_CLSACT sizeof(clsact)
140
141 const struct {
142 nlmsghdr n;
143 tcmsg t;
144 struct {
145 nlattr attr;
146 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
147 } kind;
148 } req = {
149 .n =
150 {
151 .nlmsg_len = sizeof(req),
152 .nlmsg_type = nlMsgType,
153 .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
154 },
155 .t =
156 {
157 .tcm_family = AF_UNSPEC,
158 .tcm_ifindex = ifIndex,
159 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
160 .tcm_parent = TC_H_CLSACT,
161 },
162 .kind =
163 {
164 .attr =
165 {
166 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
167 .nla_type = TCA_KIND,
168 },
169 .str = CLSACT,
170 },
171 };
172#undef ASCIIZ_LEN_CLSACT
173#undef CLSACT
174
175 const int rv = send(fd, &req, sizeof(req), 0);
176 if (rv == -1) return -errno;
177 if (rv != sizeof(req)) return -EMSGSIZE;
178
179 return processNetlinkResponse(fd);
180}
181
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800182// tc filter add dev .. in/egress prio 1 protocol ipv6/ip bpf object-pinned /sys/fs/bpf/...
183// direct-action
184static int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet, bool ingress,
185 bool ipv6) {
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700186 // The priority doesn't matter until we actually start attaching multiple
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800187 // things to the same interface's in/egress point.
188 const __u32 prio = 1;
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700189
190 // This is the name of the filter we're attaching (ie. this is the 'bpf'
191 // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
192 //
193 // We go through some hoops in order to make this compile time constants
194 // so that we can define the struct further down the function with the
195 // field for this sized correctly already during the build.
196#define BPF "bpf"
197 const char bpf[] = BPF;
198 // sizeof() includes the terminating NULL
199#define ASCIIZ_LEN_BPF sizeof(bpf)
200
201 // This is to replicate program name suffix used by 'tc' Linux cli
202 // when it attaches programs.
203#define FSOBJ_SUFFIX ":[*fsobj]"
204
205 // This macro expands (from header files) to:
206 // prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800207 // and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700208 // (also compatible with anything that has 0 size L2 header)
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800209#define NAME_RX_RAWIP CLAT_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX
210 const char name_rx_rawip[] = NAME_RX_RAWIP;
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700211
212 // This macro expands (from header files) to:
213 // prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800214 // and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700215 // (also compatible with anything that has standard ethernet header)
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800216#define NAME_RX_ETHER CLAT_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX
217 const char name_rx_ether[] = NAME_RX_ETHER;
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700218
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800219 // This macro expands (from header files) to:
220 // prog_clatd_schedcls_egress_clat_rawip:[*fsobj]
221 // and is the name of the pinned egress ebpf program for ARPHRD_RAWIP interfaces.
222 // (also compatible with anything that has 0 size L2 header)
223#define NAME_TX_RAWIP CLAT_EGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX
224 const char name_tx_rawip[] = NAME_TX_RAWIP;
225
226 // This macro expands (from header files) to:
227 // prog_clatd_schedcls_egress_clat_ether:[*fsobj]
228 // and is the name of the pinned egress ebpf program for ARPHRD_ETHER interfaces.
229 // (also compatible with anything that has standard ethernet header)
230#define NAME_TX_ETHER CLAT_EGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX
231 const char name_tx_ether[] = NAME_TX_ETHER;
232
233 // The actual name we'll use is determined at run time via 'ethernet' and 'ingress'
234 // booleans. We need to compile time allocate enough space in the struct
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700235 // hence this macro magic to make sure we have enough space for either
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800236 // possibility. In practice some of these are actually the same size.
237#define ASCIIZ_MAXLEN_NAME_RX \
238 ((sizeof(name_rx_rawip) > sizeof(name_rx_ether)) ? sizeof(name_rx_rawip) \
239 : sizeof(name_rx_ether))
240#define ASCIIZ_MAXLEN_NAME_TX \
241 ((sizeof(name_tx_rawip) > sizeof(name_tx_ether)) ? sizeof(name_tx_rawip) \
242 : sizeof(name_tx_ether))
243#define ASCIIZ_MAXLEN_NAME \
244 ((ASCIIZ_MAXLEN_NAME_RX > ASCIIZ_MAXLEN_NAME_TX) ? ASCIIZ_MAXLEN_NAME_RX \
245 : ASCIIZ_MAXLEN_NAME_TX)
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700246
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800247 // These are not compile time constants: NAME is used in strncpy below
248#define NAME_RX (ethernet ? NAME_RX_ETHER : NAME_RX_RAWIP)
249#define NAME_TX (ethernet ? NAME_TX_ETHER : NAME_TX_RAWIP)
250#define NAME (ingress ? NAME_RX : NAME_TX)
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700251
252 struct {
253 nlmsghdr n;
254 tcmsg t;
255 struct {
256 nlattr attr;
257 char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
258 } kind;
259 struct {
260 nlattr attr;
261 struct {
262 nlattr attr;
263 __u32 u32;
264 } fd;
265 struct {
266 nlattr attr;
267 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
268 } name;
269 struct {
270 nlattr attr;
271 __u32 u32;
272 } flags;
273 } options;
274 } req = {
275 .n =
276 {
277 .nlmsg_len = sizeof(req),
278 .nlmsg_type = RTM_NEWTFILTER,
279 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
280 },
281 .t =
282 {
283 .tcm_family = AF_UNSPEC,
284 .tcm_ifindex = ifIndex,
285 .tcm_handle = TC_H_UNSPEC,
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800286 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
287 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
288 .tcm_info = (prio << 16) |
289 (__u32)(ipv6 ? htons(ETH_P_IPV6) : htons(ETH_P_IP)),
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700290 },
291 .kind =
292 {
293 .attr =
294 {
295 .nla_len = sizeof(req.kind),
296 .nla_type = TCA_KIND,
297 },
298 .str = BPF,
299 },
300 .options =
301 {
302 .attr =
303 {
304 .nla_len = sizeof(req.options),
305 .nla_type = TCA_OPTIONS,
306 },
307 .fd =
308 {
309 .attr =
310 {
311 .nla_len = sizeof(req.options.fd),
312 .nla_type = TCA_BPF_FD,
313 },
314 .u32 = static_cast<__u32>(bpfFd),
315 },
316 .name =
317 {
318 .attr =
319 {
320 .nla_len = sizeof(req.options.name),
321 .nla_type = TCA_BPF_NAME,
322 },
323 // Visible via 'tc filter show', but
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800324 // is overwritten by strncpy below
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700325 .str = "placeholder",
326 },
327 .flags =
328 {
329 .attr =
330 {
331 .nla_len = sizeof(req.options.flags),
332 .nla_type = TCA_BPF_FLAGS,
333 },
334 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
335 },
336 },
337 };
338
339 strncpy(req.options.name.str, NAME, sizeof(req.options.name.str));
340
341#undef NAME
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800342#undef NAME_TX
343#undef NAME_RX
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700344#undef ASCIIZ_MAXLEN_NAME
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800345#undef ASCIIZ_MAXLEN_NAME_TX
346#undef ASCIIZ_MAXLEN_NAME_RX
347#undef NAME_TX_ETHER
348#undef NAME_TX_RAWIP
349#undef NAME_RX_ETHER
350#undef NAME_RX_RAWIP
351#undef FSOBJ_SUFFIX
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700352#undef ASCIIZ_LEN_BPF
353#undef BPF
354
355 const int rv = send(fd, &req, sizeof(req), 0);
356 if (rv == -1) return -errno;
357 if (rv != sizeof(req)) return -EMSGSIZE;
358
359 return processNetlinkResponse(fd);
360}
361
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800362// tc filter add dev .. ingress prio 1 protocol ipv6 bpf object-pinned /sys/fs/bpf/... direct-action
363int tcFilterAddDevIngressBpf(int fd, int ifIndex, int bpfFd, bool ethernet) {
364 return tcFilterAddDevBpf(fd, ifIndex, bpfFd, ethernet, /*ingress*/ true, /*ipv6*/ true);
365}
366
367// tc filter add dev .. egress prio 1 protocol ip bpf object-pinned /sys/fs/bpf/... direct-action
368int tcFilterAddDevEgressBpf(int fd, int ifIndex, int bpfFd, bool ethernet) {
369 return tcFilterAddDevBpf(fd, ifIndex, bpfFd, ethernet, /*ingress*/ false, /*ipv6*/ false);
370}
371
Maciej Żenczykowskif1247382020-02-05 00:44:14 -0800372// tc filter del dev .. in/egress prio .. protocol ..
373int tcFilterDelDev(int fd, int ifIndex, bool ingress, uint16_t prio, uint16_t proto) {
374 struct {
375 nlmsghdr n;
376 tcmsg t;
377 } req = {
378 .n =
379 {
380 .nlmsg_len = sizeof(req),
381 .nlmsg_type = RTM_DELTFILTER,
382 .nlmsg_flags = NETLINK_REQUEST_FLAGS,
383 },
384 .t =
385 {
386 .tcm_family = AF_UNSPEC,
387 .tcm_ifindex = ifIndex,
388 .tcm_handle = TC_H_UNSPEC,
389 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
390 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
391 .tcm_info = static_cast<__u32>((prio << 16) | htons(proto)),
392 },
393 };
394
395 const int rv = send(fd, &req, sizeof(req), 0);
396 if (rv == -1) return -errno;
397 if (rv != sizeof(req)) return -EMSGSIZE;
398
399 return processNetlinkResponse(fd);
400}
401
Maciej Żenczykowskib70da762019-01-28 15:20:48 -0800402} // namespace net
403} // namespace android