blob: c75fcdd6a91d35f1cd1e82acd5165f6c851cf92b [file] [log] [blame]
Maciej Żenczykowskib70da762019-01-28 15:20:48 -08001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "ClatUtils.h"
18
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -070019#include <arpa/inet.h>
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080020#include <errno.h>
21#include <linux/if.h>
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080022#include <linux/netlink.h>
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -070023#include <linux/pkt_cls.h>
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -080024#include <linux/pkt_sched.h>
25#include <linux/rtnetlink.h>
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080026#include <sys/ioctl.h>
27#include <sys/socket.h>
28#include <sys/types.h>
29#include <unistd.h>
30
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080031#define LOG_TAG "ClatUtils"
32#include <log/log.h>
33
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080034#include "NetlinkCommands.h"
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080035#include "android-base/unique_fd.h"
Maciej Żenczykowski88d28ff2019-03-25 11:54:32 -070036#include "bpf/BpfUtils.h"
37#include "netdbpf/bpf_shared.h"
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080038
Maciej Żenczykowskib70da762019-01-28 15:20:48 -080039namespace android {
40namespace net {
41
Maciej Żenczykowski0a7dce82019-01-28 15:31:55 -080042int hardwareAddressType(const std::string& interface) {
43 base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
44
45 if (ufd < 0) {
46 const int err = errno;
47 ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
48 return -err;
49 };
50
51 struct ifreq ifr = {};
52 // We use strncpy() instead of strlcpy() since kernel has to be able
53 // to handle non-zero terminated junk passed in by userspace anyway,
54 // and this way too long interface names (more than IFNAMSIZ-1 = 15
55 // characters plus terminating NULL) will not get truncated to 15
56 // characters and zero-terminated and thus potentially erroneously
57 // match a truncated interface if one were to exist.
58 strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
59
60 if (ioctl(ufd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) return -errno;
61
62 return ifr.ifr_hwaddr.sa_family;
63}
64
Maciej Żenczykowski4e36f132019-12-15 13:20:15 -080065int getClatEgressMapFd(void) {
66 const int fd = bpf::bpfFdGet(CLAT_EGRESS_MAP_PATH, 0);
67 return (fd == -1) ? -errno : fd;
68}
69
Maciej Żenczykowski4fe857e2019-03-29 23:29:17 -070070int getClatIngressMapFd(void) {
71 const int fd = bpf::bpfFdGet(CLAT_INGRESS_MAP_PATH, 0);
Maciej Żenczykowski88d28ff2019-03-25 11:54:32 -070072 return (fd == -1) ? -errno : fd;
73}
74
Maciej Żenczykowski4fe857e2019-03-29 23:29:17 -070075int getClatIngressProgFd(bool with_ethernet_header) {
76 const int fd = bpf::bpfFdGet(
77 with_ethernet_header ? CLAT_INGRESS_PROG_ETHER_PATH : CLAT_INGRESS_PROG_RAWIP_PATH, 0);
Maciej Żenczykowski949d84a2019-01-28 17:22:30 -080078 return (fd == -1) ? -errno : fd;
79}
80
Maciej Żenczykowski7330b022019-01-28 17:30:24 -080081// TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
82int openNetlinkSocket(void) {
83 base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
84 if (fd == -1) {
85 const int err = errno;
86 ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
87 return -err;
88 }
89
90 int rv;
91
92 const int on = 1;
93 rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
94 if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
95
96 // this is needed to get sane strace netlink parsing, it allocates the pid
97 rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
98 if (rv) {
99 const int err = errno;
100 ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
101 return -err;
102 }
103
104 // we do not want to receive messages from anyone besides the kernel
105 rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
106 if (rv) {
107 const int err = errno;
108 ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
109 return -err;
110 }
111
112 return fd.release();
113}
114
Maciej Żenczykowski992a51d2019-02-11 18:06:56 -0800115// TODO: merge with //system/netd/server/SockDiag.cpp:checkError(fd)
116int processNetlinkResponse(int fd) {
117 struct {
118 nlmsghdr h;
119 nlmsgerr e;
120 char buf[256];
121 } resp = {};
122
123 const int rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
124
125 if (rv == -1) {
126 const int err = errno;
127 ALOGE("recv() failed");
128 return -err;
129 }
130
131 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
132 ALOGE("recv() returned short packet: %d", rv);
133 return -EMSGSIZE;
134 }
135
136 if (resp.h.nlmsg_len != (unsigned)rv) {
137 ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
138 return -EBADMSG;
139 }
140
141 if (resp.h.nlmsg_type != NLMSG_ERROR) {
142 ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
143 return -EBADMSG;
144 }
145
146 return resp.e.error; // returns 0 on success
147}
148
Maciej Żenczykowskiff3308d2019-02-12 19:10:55 -0800149// ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
150// REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
151// DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
152int doTcQdiscClsact(int fd, int ifIndex, __u16 nlMsgType, __u16 nlMsgFlags) {
153 // This is the name of the qdisc we are attaching.
154 // Some hoop jumping to make this compile time constant with known size,
155 // so that the structure declaration is well defined at compile time.
156#define CLSACT "clsact"
157 static const char clsact[] = CLSACT;
158 // sizeof() includes the terminating NULL
159#define ASCIIZ_LEN_CLSACT sizeof(clsact)
160
161 const struct {
162 nlmsghdr n;
163 tcmsg t;
164 struct {
165 nlattr attr;
166 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
167 } kind;
168 } req = {
169 .n =
170 {
171 .nlmsg_len = sizeof(req),
172 .nlmsg_type = nlMsgType,
173 .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
174 },
175 .t =
176 {
177 .tcm_family = AF_UNSPEC,
178 .tcm_ifindex = ifIndex,
179 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
180 .tcm_parent = TC_H_CLSACT,
181 },
182 .kind =
183 {
184 .attr =
185 {
186 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
187 .nla_type = TCA_KIND,
188 },
189 .str = CLSACT,
190 },
191 };
192#undef ASCIIZ_LEN_CLSACT
193#undef CLSACT
194
195 const int rv = send(fd, &req, sizeof(req), 0);
196 if (rv == -1) return -errno;
197 if (rv != sizeof(req)) return -EMSGSIZE;
198
199 return processNetlinkResponse(fd);
200}
201
202int tcQdiscAddDevClsact(int fd, int ifIndex) {
203 return doTcQdiscClsact(fd, ifIndex, RTM_NEWQDISC, NLM_F_EXCL | NLM_F_CREATE);
204}
205
206int tcQdiscReplaceDevClsact(int fd, int ifIndex) {
207 return doTcQdiscClsact(fd, ifIndex, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_REPLACE);
208}
209
210int tcQdiscDelDevClsact(int fd, int ifIndex) {
211 return doTcQdiscClsact(fd, ifIndex, RTM_DELQDISC, 0);
212}
213
Maciej Żenczykowskif7a9f882019-04-02 22:09:04 -0700214// tc filter add dev .. ingress prio 1 protocol ipv6 bpf object-pinned /sys/fs/bpf/... direct-action
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700215int tcFilterAddDevBpf(int fd, int ifIndex, int bpfFd, bool ethernet) {
216 // The priority doesn't matter until we actually start attaching multiple
Maciej Żenczykowskif7a9f882019-04-02 22:09:04 -0700217 // things to the same interface's ingress point.
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700218 const int prio = 1;
219
220 // This is the name of the filter we're attaching (ie. this is the 'bpf'
221 // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
222 //
223 // We go through some hoops in order to make this compile time constants
224 // so that we can define the struct further down the function with the
225 // field for this sized correctly already during the build.
226#define BPF "bpf"
227 const char bpf[] = BPF;
228 // sizeof() includes the terminating NULL
229#define ASCIIZ_LEN_BPF sizeof(bpf)
230
231 // This is to replicate program name suffix used by 'tc' Linux cli
232 // when it attaches programs.
233#define FSOBJ_SUFFIX ":[*fsobj]"
234
235 // This macro expands (from header files) to:
236 // prog_clatd_schedcls_ingress_clat_rawip:[*fsobj]
237 // and is the name of the pinned ebpf program for ARPHRD_RAWIP interfaces.
238 // (also compatible with anything that has 0 size L2 header)
Maciej Żenczykowski4fe857e2019-03-29 23:29:17 -0700239#define NAME_RAWIP CLAT_INGRESS_PROG_RAWIP_NAME FSOBJ_SUFFIX
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700240 const char name_rawip[] = NAME_RAWIP;
241
242 // This macro expands (from header files) to:
243 // prog_clatd_schedcls_ingress_clat_ether:[*fsobj]
244 // and is the name of the pinned ebpf program for ARPHRD_ETHER interfaces.
245 // (also compatible with anything that has standard ethernet header)
Maciej Żenczykowski4fe857e2019-03-29 23:29:17 -0700246#define NAME_ETHER CLAT_INGRESS_PROG_ETHER_NAME FSOBJ_SUFFIX
Maciej Żenczykowski2f8ff892019-03-25 13:57:20 -0700247 const char name_ether[] = NAME_ETHER;
248
249 // The actual name we'll use is determined at run time via 'ethernet'
250 // boolean. We need to compile time allocate enough space in the struct
251 // hence this macro magic to make sure we have enough space for either
252 // possibility. In practice both are actually the same size.
253#define ASCIIZ_MAXLEN_NAME \
254 ((sizeof(name_rawip) > sizeof(name_ether)) ? sizeof(name_rawip) : sizeof(name_ether))
255
256 // This is not a compile time constant and is used in strcpy below
257#define NAME (ethernet ? NAME_ETHER : NAME_RAWIP)
258
259 struct {
260 nlmsghdr n;
261 tcmsg t;
262 struct {
263 nlattr attr;
264 char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
265 } kind;
266 struct {
267 nlattr attr;
268 struct {
269 nlattr attr;
270 __u32 u32;
271 } fd;
272 struct {
273 nlattr attr;
274 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
275 } name;
276 struct {
277 nlattr attr;
278 __u32 u32;
279 } flags;
280 } options;
281 } req = {
282 .n =
283 {
284 .nlmsg_len = sizeof(req),
285 .nlmsg_type = RTM_NEWTFILTER,
286 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
287 },
288 .t =
289 {
290 .tcm_family = AF_UNSPEC,
291 .tcm_ifindex = ifIndex,
292 .tcm_handle = TC_H_UNSPEC,
293 .tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS),
294 .tcm_info = (prio << 16) | htons(ETH_P_IPV6),
295 },
296 .kind =
297 {
298 .attr =
299 {
300 .nla_len = sizeof(req.kind),
301 .nla_type = TCA_KIND,
302 },
303 .str = BPF,
304 },
305 .options =
306 {
307 .attr =
308 {
309 .nla_len = sizeof(req.options),
310 .nla_type = TCA_OPTIONS,
311 },
312 .fd =
313 {
314 .attr =
315 {
316 .nla_len = sizeof(req.options.fd),
317 .nla_type = TCA_BPF_FD,
318 },
319 .u32 = static_cast<__u32>(bpfFd),
320 },
321 .name =
322 {
323 .attr =
324 {
325 .nla_len = sizeof(req.options.name),
326 .nla_type = TCA_BPF_NAME,
327 },
328 // Visible via 'tc filter show', but
329 // is overwritten by strcpy below
330 .str = "placeholder",
331 },
332 .flags =
333 {
334 .attr =
335 {
336 .nla_len = sizeof(req.options.flags),
337 .nla_type = TCA_BPF_FLAGS,
338 },
339 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
340 },
341 },
342 };
343
344 strncpy(req.options.name.str, NAME, sizeof(req.options.name.str));
345
346#undef NAME
347#undef ASCIIZ_MAXLEN_NAME
348#undef NAME_ETHER
349#undef NAME_RAWIP
350#undef NAME
351#undef ASCIIZ_LEN_BPF
352#undef BPF
353
354 const int rv = send(fd, &req, sizeof(req), 0);
355 if (rv == -1) return -errno;
356 if (rv != sizeof(req)) return -EMSGSIZE;
357
358 return processNetlinkResponse(fd);
359}
360
Maciej Żenczykowskib70da762019-01-28 15:20:48 -0800361} // namespace net
362} // namespace android