blob: 715d840eff122031244a6dc70e149751741ef5ab [file] [log] [blame]
Maciej Żenczykowskib70da762019-01-28 15:20:48 -08001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Maciej Żenczykowskieec72082020-02-04 23:29:41 -080017#include "OffloadUtils.h"
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080018
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -070019#include <arpa/inet.h>
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080020#include <linux/if.h>
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080021#include <linux/netlink.h>
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -070022#include <linux/pkt_cls.h>
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -080023#include <linux/pkt_sched.h>
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080024#include <sys/ioctl.h>
25#include <sys/socket.h>
26#include <sys/types.h>
27#include <unistd.h>
28
Maciej Żenczykowskieec72082020-02-04 23:29:41 -080029#define LOG_TAG "OffloadUtils"
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080030#include <log/log.h>
31
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080032#include "NetlinkCommands.h"
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080033#include "android-base/unique_fd.h"
34
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080035namespace android {
36namespace net {
37
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080038int hardwareAddressType(const std::string& interface) {
39 base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
40
41 if (ufd < 0) {
42 const int err = errno;
43 ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
44 return -err;
45 };
46
47 struct ifreq ifr = {};
48 // We use strncpy() instead of strlcpy() since kernel has to be able
49 // to handle non-zero terminated junk passed in by userspace anyway,
50 // and this way too long interface names (more than IFNAMSIZ-1 = 15
51 // characters plus terminating NULL) will not get truncated to 15
52 // characters and zero-terminated and thus potentially erroneously
53 // match a truncated interface if one were to exist.
54 strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
55
56 if (ioctl(ufd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) return -errno;
57
58 return ifr.ifr_hwaddr.sa_family;
59}
60
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080061// TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
62int openNetlinkSocket(void) {
63 base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
64 if (fd == -1) {
65 const int err = errno;
66 ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
67 return -err;
68 }
69
70 int rv;
71
72 const int on = 1;
73 rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
74 if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
75
76 // this is needed to get sane strace netlink parsing, it allocates the pid
77 rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
78 if (rv) {
79 const int err = errno;
80 ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
81 return -err;
82 }
83
84 // we do not want to receive messages from anyone besides the kernel
85 rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
86 if (rv) {
87 const int err = errno;
88 ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
89 return -err;
90 }
91
92 return fd.release();
93}
94
Maciej Żenczykowski992a51d2019-02-11 18:06:56 -080095// TODO: merge with //system/netd/server/SockDiag.cpp:checkError(fd)
Maciej Żenczykowskia8b69392020-02-12 03:30:52 -080096static int processNetlinkResponse(int fd) {
Maciej Żenczykowski992a51d2019-02-11 18:06:56 -080097 struct {
98 nlmsghdr h;
99 nlmsgerr e;
100 char buf[256];
101 } resp = {};
102
103 const int rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
104
105 if (rv == -1) {
106 const int err = errno;
107 ALOGE("recv() failed");
108 return -err;
109 }
110
111 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
112 ALOGE("recv() returned short packet: %d", rv);
113 return -EMSGSIZE;
114 }
115
116 if (resp.h.nlmsg_len != (unsigned)rv) {
117 ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
118 return -EBADMSG;
119 }
120
121 if (resp.h.nlmsg_type != NLMSG_ERROR) {
122 ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
123 return -EBADMSG;
124 }
125
126 return resp.e.error; // returns 0 on success
127}
128
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -0800129// ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
130// REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
131// DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
Maciej Żenczykowskib3e69d62020-02-05 02:44:16 -0800132int doTcQdiscClsact(int fd, int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -0800133 // This is the name of the qdisc we are attaching.
134 // Some hoop jumping to make this compile time constant with known size,
135 // so that the structure declaration is well defined at compile time.
136#define CLSACT "clsact"
137 static const char clsact[] = CLSACT;
138 // sizeof() includes the terminating NULL
139#define ASCIIZ_LEN_CLSACT sizeof(clsact)
140
141 const struct {
142 nlmsghdr n;
143 tcmsg t;
144 struct {
145 nlattr attr;
146 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
147 } kind;
148 } req = {
149 .n =
150 {
151 .nlmsg_len = sizeof(req),
152 .nlmsg_type = nlMsgType,
153 .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
154 },
155 .t =
156 {
157 .tcm_family = AF_UNSPEC,
158 .tcm_ifindex = ifIndex,
159 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
160 .tcm_parent = TC_H_CLSACT,
161 },
162 .kind =
163 {
164 .attr =
165 {
166 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
167 .nla_type = TCA_KIND,
168 },
169 .str = CLSACT,
170 },
171 };
172#undef ASCIIZ_LEN_CLSACT
173#undef CLSACT
174
175 const int rv = send(fd, &req, sizeof(req), 0);
176 if (rv == -1) return -errno;
177 if (rv != sizeof(req)) return -EMSGSIZE;
178
179 return processNetlinkResponse(fd);
180}
181
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800182// tc filter add dev .. in/egress prio 1 protocol ipv6/ip bpf object-pinned /sys/fs/bpf/...
183// direct-action
Maciej Żenczykowski74f8c9d2020-02-05 02:59:20 -0800184int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet, bool ingress, bool ipv6) {
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700185 // The priority doesn't matter until we actually start attaching multiple
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800186 // things to the same interface's in/egress point.
187 const __u32 prio = 1;
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700188
189 // This is the name of the filter we're attaching (ie. this is the 'bpf'
190 // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
191 //
192 // We go through some hoops in order to make this compile time constants
193 // so that we can define the struct further down the function with the
194 // field for this sized correctly already during the build.
195#define BPF "bpf"
196 const char bpf[] = BPF;
197 // sizeof() includes the terminating NULL
198#define ASCIIZ_LEN_BPF sizeof(bpf)
199
200 // This is to replicate program name suffix used by 'tc' Linux cli
201 // when it attaches programs.
202#define FSOBJ_SUFFIX ":[*fsobj]"
203
204 // This macro expands (from header files) to:
205 // prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800206 // and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700207 // (also compatible with anything that has 0 size L2 header)
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800208#define NAME_RX_RAWIP CLAT_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX
209 const char name_rx_rawip[] = NAME_RX_RAWIP;
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700210
211 // This macro expands (from header files) to:
212 // prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800213 // and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700214 // (also compatible with anything that has standard ethernet header)
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800215#define NAME_RX_ETHER CLAT_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX
216 const char name_rx_ether[] = NAME_RX_ETHER;
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700217
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800218 // This macro expands (from header files) to:
219 // prog_clatd_schedcls_egress_clat_rawip:[*fsobj]
220 // and is the name of the pinned egress ebpf program for ARPHRD_RAWIP interfaces.
221 // (also compatible with anything that has 0 size L2 header)
222#define NAME_TX_RAWIP CLAT_EGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX
223 const char name_tx_rawip[] = NAME_TX_RAWIP;
224
225 // This macro expands (from header files) to:
226 // prog_clatd_schedcls_egress_clat_ether:[*fsobj]
227 // and is the name of the pinned egress ebpf program for ARPHRD_ETHER interfaces.
228 // (also compatible with anything that has standard ethernet header)
229#define NAME_TX_ETHER CLAT_EGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX
230 const char name_tx_ether[] = NAME_TX_ETHER;
231
232 // The actual name we'll use is determined at run time via 'ethernet' and 'ingress'
233 // booleans. We need to compile time allocate enough space in the struct
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700234 // hence this macro magic to make sure we have enough space for either
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800235 // possibility. In practice some of these are actually the same size.
236#define ASCIIZ_MAXLEN_NAME_RX \
237 ((sizeof(name_rx_rawip) > sizeof(name_rx_ether)) ? sizeof(name_rx_rawip) \
238 : sizeof(name_rx_ether))
239#define ASCIIZ_MAXLEN_NAME_TX \
240 ((sizeof(name_tx_rawip) > sizeof(name_tx_ether)) ? sizeof(name_tx_rawip) \
241 : sizeof(name_tx_ether))
242#define ASCIIZ_MAXLEN_NAME \
243 ((ASCIIZ_MAXLEN_NAME_RX > ASCIIZ_MAXLEN_NAME_TX) ? ASCIIZ_MAXLEN_NAME_RX \
244 : ASCIIZ_MAXLEN_NAME_TX)
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700245
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800246 // These are not compile time constants: NAME is used in strncpy below
247#define NAME_RX (ethernet ? NAME_RX_ETHER : NAME_RX_RAWIP)
248#define NAME_TX (ethernet ? NAME_TX_ETHER : NAME_TX_RAWIP)
249#define NAME (ingress ? NAME_RX : NAME_TX)
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700250
251 struct {
252 nlmsghdr n;
253 tcmsg t;
254 struct {
255 nlattr attr;
256 char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
257 } kind;
258 struct {
259 nlattr attr;
260 struct {
261 nlattr attr;
262 __u32 u32;
263 } fd;
264 struct {
265 nlattr attr;
266 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
267 } name;
268 struct {
269 nlattr attr;
270 __u32 u32;
271 } flags;
272 } options;
273 } req = {
274 .n =
275 {
276 .nlmsg_len = sizeof(req),
277 .nlmsg_type = RTM_NEWTFILTER,
278 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
279 },
280 .t =
281 {
282 .tcm_family = AF_UNSPEC,
283 .tcm_ifindex = ifIndex,
284 .tcm_handle = TC_H_UNSPEC,
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800285 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
286 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
287 .tcm_info = (prio << 16) |
288 (__u32)(ipv6 ? htons(ETH_P_IPV6) : htons(ETH_P_IP)),
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700289 },
290 .kind =
291 {
292 .attr =
293 {
294 .nla_len = sizeof(req.kind),
295 .nla_type = TCA_KIND,
296 },
297 .str = BPF,
298 },
299 .options =
300 {
301 .attr =
302 {
303 .nla_len = sizeof(req.options),
304 .nla_type = TCA_OPTIONS,
305 },
306 .fd =
307 {
308 .attr =
309 {
310 .nla_len = sizeof(req.options.fd),
311 .nla_type = TCA_BPF_FD,
312 },
313 .u32 = static_cast<__u32>(bpfFd),
314 },
315 .name =
316 {
317 .attr =
318 {
319 .nla_len = sizeof(req.options.name),
320 .nla_type = TCA_BPF_NAME,
321 },
322 // Visible via 'tc filter show', but
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800323 // is overwritten by strncpy below
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700324 .str = "placeholder",
325 },
326 .flags =
327 {
328 .attr =
329 {
330 .nla_len = sizeof(req.options.flags),
331 .nla_type = TCA_BPF_FLAGS,
332 },
333 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
334 },
335 },
336 };
337
338 strncpy(req.options.name.str, NAME, sizeof(req.options.name.str));
339
340#undef NAME
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800341#undef NAME_TX
342#undef NAME_RX
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700343#undef ASCIIZ_MAXLEN_NAME
Maciej Żenczykowskia06943c2019-12-15 11:57:42 -0800344#undef ASCIIZ_MAXLEN_NAME_TX
345#undef ASCIIZ_MAXLEN_NAME_RX
346#undef NAME_TX_ETHER
347#undef NAME_TX_RAWIP
348#undef NAME_RX_ETHER
349#undef NAME_RX_RAWIP
350#undef FSOBJ_SUFFIX
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700351#undef ASCIIZ_LEN_BPF
352#undef BPF
353
354 const int rv = send(fd, &req, sizeof(req), 0);
355 if (rv == -1) return -errno;
356 if (rv != sizeof(req)) return -EMSGSIZE;
357
358 return processNetlinkResponse(fd);
359}
360
Maciej Żenczykowskif1247382020-02-05 00:44:14 -0800361// tc filter del dev .. in/egress prio .. protocol ..
362int tcFilterDelDev(int fd, int ifIndex, bool ingress, uint16_t prio, uint16_t proto) {
363 struct {
364 nlmsghdr n;
365 tcmsg t;
366 } req = {
367 .n =
368 {
369 .nlmsg_len = sizeof(req),
370 .nlmsg_type = RTM_DELTFILTER,
371 .nlmsg_flags = NETLINK_REQUEST_FLAGS,
372 },
373 .t =
374 {
375 .tcm_family = AF_UNSPEC,
376 .tcm_ifindex = ifIndex,
377 .tcm_handle = TC_H_UNSPEC,
378 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
379 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
380 .tcm_info = static_cast<__u32>((prio << 16) | htons(proto)),
381 },
382 };
383
384 const int rv = send(fd, &req, sizeof(req), 0);
385 if (rv == -1) return -errno;
386 if (rv != sizeof(req)) return -EMSGSIZE;
387
388 return processNetlinkResponse(fd);
389}
390
Maciej Żenczykowskib70da762019-01-28 15:20:48 -0800391} // namespace net
392} // namespace android