blob: 0752dbefd0f6ee5df4fe26b9487b9fdb22543f59 [file] [log] [blame]
Brenden Blanco246b9422015-06-05 11:15:27 -07001/*
2 * Copyright (c) 2015 PLUMgrid, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Nan Xiao667988c2017-08-28 11:44:19 +080016#ifndef _GNU_SOURCE
Colin Ian Kinga12db192017-07-06 13:58:17 +010017#define _GNU_SOURCE
Nan Xiao667988c2017-08-28 11:44:19 +080018#endif
Brenden Blanco246b9422015-06-05 11:15:27 -070019
Brenden Blancocd5cb412015-04-26 09:41:58 -070020#include <arpa/inet.h>
Brenden Blancoa94bd932015-04-26 00:56:42 -070021#include <errno.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070022#include <fcntl.h>
Teng Qin01b07ba2017-11-20 13:28:03 -080023#include <inttypes.h>
Brenden Blanco3069caa2016-08-01 18:12:11 -070024#include <limits.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070025#include <linux/bpf.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070026#include <linux/bpf_common.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070027#include <linux/if_packet.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070028#include <linux/perf_event.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070029#include <linux/pkt_cls.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070030#include <linux/rtnetlink.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070031#include <linux/sched.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070032#include <linux/unistd.h>
33#include <linux/version.h>
Brenden Blancoa94bd932015-04-26 00:56:42 -070034#include <net/ethernet.h>
35#include <net/if.h>
Brenden Blancofa073452017-05-30 17:35:53 -070036#include <sched.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070037#include <stdbool.h>
Brenden Blancobb7200c2015-06-04 18:01:42 -070038#include <stdio.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070039#include <stdlib.h>
40#include <string.h>
41#include <sys/ioctl.h>
Brenden Blanco4b4bd272015-11-30 10:54:47 -080042#include <sys/resource.h>
Derek35c25012017-01-22 20:58:23 -080043#include <sys/stat.h>
44#include <sys/types.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070045#include <unistd.h>
Alexei Starovoitovb1df37c2017-09-06 19:47:47 -070046#include <linux/if_alg.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070047
Brenden Blancoa94bd932015-04-26 00:56:42 -070048#include "libbpf.h"
Brenden Blanco8207d102015-09-25 13:58:30 -070049#include "perf_reader.h"
Brenden Blancoa94bd932015-04-26 00:56:42 -070050
Brenden Blancof275d3d2015-07-06 23:41:23 -070051// TODO: remove these defines when linux-libc-dev exports them properly
52
53#ifndef __NR_bpf
Naveen N. Rao0006ad12016-04-29 16:42:58 +053054#if defined(__powerpc64__)
55#define __NR_bpf 361
Zvonko Kosic98121a32017-03-07 07:30:25 +010056#elif defined(__s390x__)
57#define __NR_bpf 351
Zhiyi Sun8e434b72016-12-06 16:21:37 +080058#elif defined(__aarch64__)
59#define __NR_bpf 280
Naveen N. Rao0006ad12016-04-29 16:42:58 +053060#else
Brenden Blancof275d3d2015-07-06 23:41:23 -070061#define __NR_bpf 321
62#endif
Naveen N. Rao0006ad12016-04-29 16:42:58 +053063#endif
Brenden Blancof275d3d2015-07-06 23:41:23 -070064
65#ifndef SO_ATTACH_BPF
66#define SO_ATTACH_BPF 50
67#endif
68
69#ifndef PERF_EVENT_IOC_SET_BPF
70#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
71#endif
72
73#ifndef PERF_FLAG_FD_CLOEXEC
74#define PERF_FLAG_FD_CLOEXEC (1UL << 3)
75#endif
76
Martin KaFai Laudf368162017-10-19 12:46:48 -070077#define min(x, y) ((x) < (y) ? (x) : (y))
78
Mark Drayton5f5687e2017-02-20 18:13:03 +000079static int probe_perf_reader_page_cnt = 8;
80
Brenden Blancofa073452017-05-30 17:35:53 -070081static uint64_t ptr_to_u64(void *ptr)
Brenden Blancoa94bd932015-04-26 00:56:42 -070082{
Brenden Blancofa073452017-05-30 17:35:53 -070083 return (uint64_t) (unsigned long) ptr;
Brenden Blancoa94bd932015-04-26 00:56:42 -070084}
85
Martin KaFai Laudf368162017-10-19 12:46:48 -070086int bpf_create_map(enum bpf_map_type map_type, const char *name,
87 int key_size, int value_size,
88 int max_entries, int map_flags)
Brenden Blancoa94bd932015-04-26 00:56:42 -070089{
Martin KaFai Laudf368162017-10-19 12:46:48 -070090 size_t name_len = name ? strlen(name) : 0;
Brenden Blancofdc027c2015-09-03 11:49:54 -070091 union bpf_attr attr;
92 memset(&attr, 0, sizeof(attr));
93 attr.map_type = map_type;
94 attr.key_size = key_size;
95 attr.value_size = value_size;
96 attr.max_entries = max_entries;
Huapeng Zhoude11d072016-12-06 18:10:38 -080097 attr.map_flags = map_flags;
Martin KaFai Laudf368162017-10-19 12:46:48 -070098 memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
Brenden Blancoa94bd932015-04-26 00:56:42 -070099
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800100 int ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
Martin KaFai Laudf368162017-10-19 12:46:48 -0700101
102 if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
103 memset(attr.map_name, 0, BPF_OBJ_NAME_LEN);
104 ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
105 }
106
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800107 if (ret < 0 && errno == EPERM) {
108 // see note below about the rationale for this retry
109
110 struct rlimit rl = {};
111 if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
112 rl.rlim_max = RLIM_INFINITY;
113 rl.rlim_cur = rl.rlim_max;
114 if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
115 ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
116 }
117 }
118 return ret;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700119}
120
121int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
122{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700123 union bpf_attr attr;
124 memset(&attr, 0, sizeof(attr));
125 attr.map_fd = fd;
126 attr.key = ptr_to_u64(key);
127 attr.value = ptr_to_u64(value);
128 attr.flags = flags;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700129
130 return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
131}
132
133int bpf_lookup_elem(int fd, void *key, void *value)
134{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700135 union bpf_attr attr;
136 memset(&attr, 0, sizeof(attr));
137 attr.map_fd = fd;
138 attr.key = ptr_to_u64(key);
139 attr.value = ptr_to_u64(value);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700140
141 return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
142}
143
144int bpf_delete_elem(int fd, void *key)
145{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700146 union bpf_attr attr;
147 memset(&attr, 0, sizeof(attr));
148 attr.map_fd = fd;
149 attr.key = ptr_to_u64(key);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700150
151 return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
152}
153
Teng Qindb7fab52017-05-16 01:10:15 -0700154int bpf_get_first_key(int fd, void *key, size_t key_size)
155{
156 union bpf_attr attr;
157 int i, res;
158
159 memset(&attr, 0, sizeof(attr));
160 attr.map_fd = fd;
161 attr.key = 0;
162 attr.next_key = ptr_to_u64(key);
163
164 // 4.12 and above kernel supports passing NULL to BPF_MAP_GET_NEXT_KEY
165 // to get first key of the map. For older kernels, the call will fail.
166 res = syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
167 if (res < 0 && errno == EFAULT) {
168 // Fall back to try to find a non-existing key.
169 static unsigned char try_values[3] = {0, 0xff, 0x55};
170 attr.key = ptr_to_u64(key);
171 for (i = 0; i < 3; i++) {
172 memset(key, try_values[i], key_size);
173 // We want to check the existence of the key but we don't know the size
174 // of map's value. So we pass an invalid pointer for value, expect
175 // the call to fail and check if the error is ENOENT indicating the
176 // key doesn't exist. If we use NULL for the invalid pointer, it might
Edward Bettsfdf9b082017-10-10 21:13:28 +0100177 // trigger a page fault in kernel and affect performance. Hence we use
Teng Qindb7fab52017-05-16 01:10:15 -0700178 // ~0 which will fail and return fast.
179 // This should fail since we pass an invalid pointer for value.
Teng Qin9190ef52017-05-20 22:46:00 -0700180 if (bpf_lookup_elem(fd, key, (void *)~0) >= 0)
Teng Qindb7fab52017-05-16 01:10:15 -0700181 return -1;
182 // This means the key doesn't exist.
183 if (errno == ENOENT)
184 return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
185 }
186 return -1;
187 } else {
188 return res;
189 }
190}
191
Brenden Blancoa94bd932015-04-26 00:56:42 -0700192int bpf_get_next_key(int fd, void *key, void *next_key)
193{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700194 union bpf_attr attr;
195 memset(&attr, 0, sizeof(attr));
196 attr.map_fd = fd;
197 attr.key = ptr_to_u64(key);
198 attr.next_key = ptr_to_u64(next_key);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700199
200 return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
201}
202
Teng Qin797669f2017-11-03 16:04:32 -0700203static void bpf_print_hints(int ret, char *log)
Brendan Gregg34826372017-01-13 14:02:02 -0800204{
Teng Qin797669f2017-11-03 16:04:32 -0700205 if (ret < 0)
206 fprintf(stderr, "bpf: Failed to load program: %s\n", strerror(errno));
Brendan Gregg34826372017-01-13 14:02:02 -0800207 if (log == NULL)
208 return;
Teng Qin797669f2017-11-03 16:04:32 -0700209 else
210 fprintf(stderr, "%s\n", log);
211
212 if (ret >= 0)
213 return;
Brendan Gregg34826372017-01-13 14:02:02 -0800214
215 // The following error strings will need maintenance to match LLVM.
216
217 // stack busting
218 if (strstr(log, "invalid stack off=-") != NULL) {
219 fprintf(stderr, "HINT: Looks like you exceeded the BPF stack limit. "
220 "This can happen if you allocate too much local variable storage. "
221 "For example, if you allocated a 1 Kbyte struct (maybe for "
222 "BPF_PERF_OUTPUT), busting a max stack of 512 bytes.\n\n");
223 }
224
225 // didn't check NULL on map lookup
226 if (strstr(log, "invalid mem access 'map_value_or_null'") != NULL) {
227 fprintf(stderr, "HINT: The 'map_value_or_null' error can happen if "
228 "you dereference a pointer value from a map lookup without first "
229 "checking if that pointer is NULL.\n\n");
230 }
231
232 // lacking a bpf_probe_read
233 if (strstr(log, "invalid mem access 'inv'") != NULL) {
234 fprintf(stderr, "HINT: The invalid mem access 'inv' error can happen "
235 "if you try to dereference memory without first using "
236 "bpf_probe_read() to copy it to the BPF stack. Sometimes the "
237 "bpf_probe_read is automatic by the bcc rewriter, other times "
238 "you'll need to be explicit.\n\n");
239 }
240}
Brenden Blancoa94bd932015-04-26 00:56:42 -0700241#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
242
Martin KaFai Lau3c24ad92017-10-19 23:47:39 -0700243int bpf_obj_get_info(int prog_map_fd, void *info, uint32_t *info_len)
Alexei Starovoitovb1df37c2017-09-06 19:47:47 -0700244{
245 union bpf_attr attr;
246 int err;
247
248 memset(&attr, 0, sizeof(attr));
249 attr.info.bpf_fd = prog_map_fd;
250 attr.info.info_len = *info_len;
251 attr.info.info = ptr_to_u64(info);
252
253 err = syscall(__NR_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
254 if (!err)
255 *info_len = attr.info.info_len;
256
257 return err;
258}
259
260int bpf_prog_compute_tag(const struct bpf_insn *insns, int prog_len,
261 unsigned long long *ptag)
262{
263 struct sockaddr_alg alg = {
264 .salg_family = AF_ALG,
265 .salg_type = "hash",
266 .salg_name = "sha1",
267 };
268 int shafd = socket(AF_ALG, SOCK_SEQPACKET, 0);
269 if (shafd < 0) {
270 fprintf(stderr, "sha1 socket not available %s\n", strerror(errno));
271 return -1;
272 }
273 int ret = bind(shafd, (struct sockaddr *)&alg, sizeof(alg));
274 if (ret < 0) {
275 fprintf(stderr, "sha1 bind fail %s\n", strerror(errno));
276 close(shafd);
277 return ret;
278 }
279 int shafd2 = accept(shafd, NULL, 0);
280 if (shafd2 < 0) {
281 fprintf(stderr, "sha1 accept fail %s\n", strerror(errno));
282 close(shafd);
283 return -1;
284 }
285 struct bpf_insn prog[prog_len / 8];
286 bool map_ld_seen = false;
287 int i;
288 for (i = 0; i < prog_len / 8; i++) {
289 prog[i] = insns[i];
290 if (insns[i].code == (BPF_LD | BPF_DW | BPF_IMM) &&
291 insns[i].src_reg == BPF_PSEUDO_MAP_FD &&
292 !map_ld_seen) {
293 prog[i].imm = 0;
294 map_ld_seen = true;
295 } else if (insns[i].code == 0 && map_ld_seen) {
296 prog[i].imm = 0;
297 map_ld_seen = false;
298 } else {
299 map_ld_seen = false;
300 }
301 }
302 ret = write(shafd2, prog, prog_len);
303 if (ret != prog_len) {
304 fprintf(stderr, "sha1 write fail %s\n", strerror(errno));
305 close(shafd2);
306 close(shafd);
307 return -1;
308 }
309
310 union {
311 unsigned char sha[20];
312 unsigned long long tag;
313 } u = {};
314 ret = read(shafd2, u.sha, 20);
315 if (ret != 20) {
316 fprintf(stderr, "sha1 read fail %s\n", strerror(errno));
317 close(shafd2);
318 close(shafd);
319 return -1;
320 }
321 *ptag = __builtin_bswap64(u.tag);
322 return 0;
323}
324
325int bpf_prog_get_tag(int fd, unsigned long long *ptag)
326{
327 char fmt[64];
328 snprintf(fmt, sizeof(fmt), "/proc/self/fdinfo/%d", fd);
329 FILE * f = fopen(fmt, "r");
330 if (!f) {
331/* fprintf(stderr, "failed to open fdinfo %s\n", strerror(errno));*/
332 return -1;
333 }
334 fgets(fmt, sizeof(fmt), f); // pos
335 fgets(fmt, sizeof(fmt), f); // flags
336 fgets(fmt, sizeof(fmt), f); // mnt_id
337 fgets(fmt, sizeof(fmt), f); // prog_type
338 fgets(fmt, sizeof(fmt), f); // prog_jited
339 fgets(fmt, sizeof(fmt), f); // prog_tag
340 fclose(f);
341 char *p = strchr(fmt, ':');
342 if (!p) {
343/* fprintf(stderr, "broken fdinfo %s\n", fmt);*/
344 return -2;
345 }
346 unsigned long long tag = 0;
347 sscanf(p + 1, "%llx", &tag);
348 *ptag = tag;
349 return 0;
350}
351
Martin KaFai Laudf368162017-10-19 12:46:48 -0700352int bpf_prog_load(enum bpf_prog_type prog_type, const char *name,
Brenden Blancocd5cb412015-04-26 09:41:58 -0700353 const struct bpf_insn *insns, int prog_len,
Brenden Blanco759029f2015-07-29 15:47:51 -0700354 const char *license, unsigned kern_version,
Teng Qin797669f2017-11-03 16:04:32 -0700355 int log_level, char *log_buf, unsigned log_buf_size)
Brenden Blancoa94bd932015-04-26 00:56:42 -0700356{
Martin KaFai Laudf368162017-10-19 12:46:48 -0700357 size_t name_len = name ? strlen(name) : 0;
Brenden Blancofdc027c2015-09-03 11:49:54 -0700358 union bpf_attr attr;
Teng Qin797669f2017-11-03 16:04:32 -0700359 char *tmp_log_buf = NULL;
360 unsigned tmp_log_buf_size = 0;
davidefdl2dece102016-09-12 12:00:37 -0700361 int ret = 0;
362
Brenden Blancofdc027c2015-09-03 11:49:54 -0700363 memset(&attr, 0, sizeof(attr));
Teng Qin797669f2017-11-03 16:04:32 -0700364
Brenden Blancofdc027c2015-09-03 11:49:54 -0700365 attr.prog_type = prog_type;
Brenden Blanco7009b552015-05-26 11:48:17 -0700366 attr.kern_version = kern_version;
Teng Qin797669f2017-11-03 16:04:32 -0700367 attr.license = ptr_to_u64((void *)license);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700368
Teng Qin797669f2017-11-03 16:04:32 -0700369 attr.insns = ptr_to_u64((void *)insns);
370 attr.insn_cnt = prog_len / sizeof(struct bpf_insn);
davidefdl2dece102016-09-12 12:00:37 -0700371 if (attr.insn_cnt > BPF_MAXINSNS) {
davidefdl2dece102016-09-12 12:00:37 -0700372 errno = EINVAL;
373 fprintf(stderr,
Martin KaFai Laudf368162017-10-19 12:46:48 -0700374 "bpf: %s. Program too large (%u insns), at most %d insns\n\n",
davidefdl2dece102016-09-12 12:00:37 -0700375 strerror(errno), attr.insn_cnt, BPF_MAXINSNS);
Teng Qin797669f2017-11-03 16:04:32 -0700376 return -1;
davidefdl2dece102016-09-12 12:00:37 -0700377 }
378
Teng Qin797669f2017-11-03 16:04:32 -0700379 attr.log_level = log_level;
380 if (attr.log_level > 0) {
381 if (log_buf_size > 0) {
382 // Use user-provided log buffer if availiable.
383 log_buf[0] = 0;
384 attr.log_buf = ptr_to_u64(log_buf);
385 attr.log_size = log_buf_size;
386 } else {
387 // Create and use temporary log buffer if user didn't provide one.
388 tmp_log_buf_size = LOG_BUF_SIZE;
389 tmp_log_buf = malloc(tmp_log_buf_size);
390 if (!tmp_log_buf) {
391 fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
392 strerror(errno));
393 attr.log_level = 0;
394 } else {
395 tmp_log_buf[0] = 0;
396 attr.log_buf = ptr_to_u64(tmp_log_buf);
397 attr.log_size = tmp_log_buf_size;
398 }
399 }
400 }
Huapeng Zhoude11d072016-12-06 18:10:38 -0800401
Teng Qin797669f2017-11-03 16:04:32 -0700402 memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
403
404 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
405 // BPF object name is not supported on older Kernels.
406 // If we failed due to this, clear the name and try again.
Martin KaFai Laudf368162017-10-19 12:46:48 -0700407 if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
408 memset(attr.prog_name, 0, BPF_OBJ_NAME_LEN);
409 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
410 }
411
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800412 if (ret < 0 && errno == EPERM) {
413 // When EPERM is returned, two reasons are possible:
414 // 1. user has no permissions for bpf()
415 // 2. user has insufficent rlimit for locked memory
416 // Unfortunately, there is no api to inspect the current usage of locked
417 // mem for the user, so an accurate calculation of how much memory to lock
418 // for this new program is difficult to calculate. As a hack, bump the limit
419 // to unlimited. If program load fails again, return the error.
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800420 struct rlimit rl = {};
421 if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
422 rl.rlim_max = RLIM_INFINITY;
423 rl.rlim_cur = rl.rlim_max;
424 if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
425 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
426 }
427 }
428
Teng Qin797669f2017-11-03 16:04:32 -0700429 // The load has failed. Handle log message.
430 if (ret < 0) {
431 // User has provided a log buffer.
432 if (log_buf_size) {
433 // If logging is not already enabled, enable it and do the syscall again.
434 if (attr.log_level == 0) {
435 attr.log_level = 1;
436 attr.log_buf = ptr_to_u64(log_buf);
437 attr.log_size = log_buf_size;
438 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
439 }
440 // Print the log message and return.
441 bpf_print_hints(ret, log_buf);
442 if (errno == ENOSPC)
443 fprintf(stderr, "bpf: log_buf size may be insufficient\n");
444 goto return_result;
davidefdl2dece102016-09-12 12:00:37 -0700445 }
446
Teng Qin797669f2017-11-03 16:04:32 -0700447 // User did not provide log buffer. We will try to increase size of
448 // our temporary log buffer to get full error message.
449 if (tmp_log_buf)
450 free(tmp_log_buf);
451 tmp_log_buf_size = LOG_BUF_SIZE;
Yonghong Songe86b0172017-11-14 10:20:07 -0800452 if (attr.log_level == 0)
453 attr.log_level = 1;
Teng Qin797669f2017-11-03 16:04:32 -0700454 for (;;) {
455 tmp_log_buf = malloc(tmp_log_buf_size);
456 if (!tmp_log_buf) {
457 fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
458 strerror(errno));
459 goto return_result;
460 }
461 tmp_log_buf[0] = 0;
462 attr.log_buf = ptr_to_u64(tmp_log_buf);
463 attr.log_size = tmp_log_buf_size;
davidefdl2dece102016-09-12 12:00:37 -0700464
Teng Qin797669f2017-11-03 16:04:32 -0700465 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
466 if (ret < 0 && errno == ENOSPC) {
467 // Temporary buffer size is not enough. Double it and try again.
468 free(tmp_log_buf);
469 tmp_log_buf = NULL;
470 tmp_log_buf_size <<= 1;
471 } else {
472 break;
473 }
474 }
Brenden Blancocd5cb412015-04-26 09:41:58 -0700475 }
Teng Qin797669f2017-11-03 16:04:32 -0700476
Teng Qinc6692572017-12-05 17:06:57 -0800477 // Check if we should print the log message if log_level is not 0,
478 // either specified by user or set due to error.
Teng Qin797669f2017-11-03 16:04:32 -0700479 if (attr.log_level > 0) {
Teng Qinc6692572017-12-05 17:06:57 -0800480 // Don't print if user enabled logging and provided log buffer,
481 // but there is no error.
482 if (log_buf && ret < 0)
Teng Qin797669f2017-11-03 16:04:32 -0700483 bpf_print_hints(ret, log_buf);
484 else if (tmp_log_buf)
485 bpf_print_hints(ret, tmp_log_buf);
486 }
487
488return_result:
489 if (tmp_log_buf)
490 free(tmp_log_buf);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700491 return ret;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700492}
493
494int bpf_open_raw_sock(const char *name)
495{
496 struct sockaddr_ll sll;
497 int sock;
498
499 sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
500 if (sock < 0) {
Prashant Bhole5b08d382017-10-06 15:05:04 +0900501 fprintf(stderr, "cannot create raw socket\n");
Brenden Blancoa94bd932015-04-26 00:56:42 -0700502 return -1;
503 }
504
Olivier Tilmans830d58d2017-11-21 23:26:27 +0100505 /* Do not bind on empty interface names */
506 if (!name || *name == '\0')
507 return sock;
508
Brenden Blancoa94bd932015-04-26 00:56:42 -0700509 memset(&sll, 0, sizeof(sll));
510 sll.sll_family = AF_PACKET;
511 sll.sll_ifindex = if_nametoindex(name);
Prashant Bholee9fa6592017-10-06 14:45:21 +0900512 if (sll.sll_ifindex == 0) {
513 fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
514 close(sock);
515 return -1;
516 }
Brenden Blancoa94bd932015-04-26 00:56:42 -0700517 sll.sll_protocol = htons(ETH_P_ALL);
518 if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
Prashant Bhole5b08d382017-10-06 15:05:04 +0900519 fprintf(stderr, "bind to %s: %s\n", name, strerror(errno));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700520 close(sock);
521 return -1;
522 }
523
524 return sock;
525}
526
527int bpf_attach_socket(int sock, int prog) {
Brenden Blancoaf956732015-06-09 13:58:42 -0700528 return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700529}
530
Brenden Blanco8207d102015-09-25 13:58:30 -0700531static int bpf_attach_tracing_event(int progfd, const char *event_path,
532 struct perf_reader *reader, int pid, int cpu, int group_fd) {
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700533 int efd, pfd;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800534 ssize_t bytes;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700535 char buf[256];
536 struct perf_event_attr attr = {};
537
538 snprintf(buf, sizeof(buf), "%s/id", event_path);
539 efd = open(buf, O_RDONLY, 0);
540 if (efd < 0) {
541 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700542 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700543 }
544
545 bytes = read(efd, buf, sizeof(buf));
546 if (bytes <= 0 || bytes >= sizeof(buf)) {
547 fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700548 close(efd);
549 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700550 }
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700551 close(efd);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700552 buf[bytes] = '\0';
553 attr.config = strtol(buf, NULL, 0);
554 attr.type = PERF_TYPE_TRACEPOINT;
Brenden Blanco8207d102015-09-25 13:58:30 -0700555 attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700556 attr.sample_period = 1;
557 attr.wakeup_events = 1;
558 pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC);
559 if (pfd < 0) {
Brenden Blanco74681952016-01-28 14:18:46 -0800560 fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700561 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700562 }
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800563 perf_reader_set_fd(reader, pfd);
Brenden Blanco8207d102015-09-25 13:58:30 -0700564
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800565 if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700566 return -1;
Brenden Blanco8207d102015-09-25 13:58:30 -0700567
Brenden Blancocd5cb412015-04-26 09:41:58 -0700568 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, progfd) < 0) {
569 perror("ioctl(PERF_EVENT_IOC_SET_BPF)");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700570 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700571 }
572 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
573 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700574 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700575 }
576
Brenden Blanco75982492015-11-06 10:43:05 -0800577 return 0;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700578}
579
Derek7174d932017-01-30 21:03:02 -0800580void * bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type, const char *ev_name,
Dereke4da6c22017-01-28 16:11:28 -0800581 const char *fn_name,
582 pid_t pid, int cpu, int group_fd,
Mauricio Vasquez Bd1324ac2017-05-17 20:26:47 -0500583 perf_reader_cb cb, void *cb_cookie)
Dereke4da6c22017-01-28 16:11:28 -0800584{
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700585 int kfd;
Derek7174d932017-01-30 21:03:02 -0800586 char buf[256];
Teng Qinf4543fc2017-09-03 17:11:59 -0700587 char event_alias[128];
Brenden Blanco8207d102015-09-25 13:58:30 -0700588 struct perf_reader *reader = NULL;
Dereke4da6c22017-01-28 16:11:28 -0800589 static char *event_type = "kprobe";
Brenden Blanco8207d102015-09-25 13:58:30 -0700590
Teng Qin4b764de2017-04-03 22:10:46 -0700591 reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
Brenden Blanco8207d102015-09-25 13:58:30 -0700592 if (!reader)
Brenden Blanco75982492015-11-06 10:43:05 -0800593 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700594
Brenden Blanco68e2d142016-01-28 10:24:56 -0800595 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
596 kfd = open(buf, O_WRONLY | O_APPEND, 0);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700597 if (kfd < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -0800598 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Brenden Blanco75982492015-11-06 10:43:05 -0800599 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700600 }
601
Teng Qinf4543fc2017-09-03 17:11:59 -0700602 snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
Mauricio Vasquez Bd1324ac2017-05-17 20:26:47 -0500603 snprintf(buf, sizeof(buf), "%c:%ss/%s %s", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
Teng Qinf4543fc2017-09-03 17:11:59 -0700604 event_type, event_alias, fn_name);
Dereke4da6c22017-01-28 16:11:28 -0800605 if (write(kfd, buf, strlen(buf)) < 0) {
Brenden Blanco7e71aef2015-09-09 18:28:21 -0700606 if (errno == EINVAL)
607 fprintf(stderr, "check dmesg output for possible cause\n");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700608 close(kfd);
Brenden Blanco75982492015-11-06 10:43:05 -0800609 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700610 }
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700611 close(kfd);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700612
Teng Qinf4543fc2017-09-03 17:11:59 -0700613 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
Dereke4da6c22017-01-28 16:11:28 -0800614 if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
615 goto error;
Teng Qinf4543fc2017-09-03 17:11:59 -0700616
Dereke4da6c22017-01-28 16:11:28 -0800617 return reader;
618
619error:
620 perf_reader_free(reader);
621 return NULL;
622
623}
624
Brenden Blancofa073452017-05-30 17:35:53 -0700625static int enter_mount_ns(int pid) {
626 struct stat self_stat, target_stat;
627 int self_fd = -1, target_fd = -1;
628 char buf[64];
629
630 if (pid < 0)
631 return -1;
632
633 if ((size_t)snprintf(buf, sizeof(buf), "/proc/%d/ns/mnt", pid) >= sizeof(buf))
634 return -1;
635
636 self_fd = open("/proc/self/ns/mnt", O_RDONLY);
637 if (self_fd < 0) {
638 perror("open(/proc/self/ns/mnt)");
639 return -1;
640 }
641
642 target_fd = open(buf, O_RDONLY);
643 if (target_fd < 0) {
644 perror("open(/proc/<pid>/ns/mnt)");
645 goto error;
646 }
647
648 if (fstat(self_fd, &self_stat)) {
649 perror("fstat(self_fd)");
650 goto error;
651 }
652
653 if (fstat(target_fd, &target_stat)) {
654 perror("fstat(target_fd)");
655 goto error;
656 }
657
658 // both target and current ns are same, avoid setns and close all fds
659 if (self_stat.st_ino == target_stat.st_ino)
660 goto error;
661
662 if (setns(target_fd, CLONE_NEWNS)) {
663 perror("setns(target)");
664 goto error;
665 }
666
667 close(target_fd);
668 return self_fd;
669
670error:
671 if (self_fd >= 0)
672 close(self_fd);
673 if (target_fd >= 0)
674 close(target_fd);
675 return -1;
676}
677
678static void exit_mount_ns(int fd) {
679 if (fd < 0)
680 return;
681
682 if (setns(fd, CLONE_NEWNS))
683 perror("setns");
684}
685
Derek7174d932017-01-30 21:03:02 -0800686void * bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type, const char *ev_name,
Dereke4da6c22017-01-28 16:11:28 -0800687 const char *binary_path, uint64_t offset,
688 pid_t pid, int cpu, int group_fd,
Mauricio Vasquez Bd1324ac2017-05-17 20:26:47 -0500689 perf_reader_cb cb, void *cb_cookie)
Dereke4da6c22017-01-28 16:11:28 -0800690{
Derek7174d932017-01-30 21:03:02 -0800691 char buf[PATH_MAX];
Teng Qin0760b752017-09-03 19:07:59 -0700692 char event_alias[PATH_MAX];
Dereke4da6c22017-01-28 16:11:28 -0800693 struct perf_reader *reader = NULL;
694 static char *event_type = "uprobe";
Teng Qin0760b752017-09-03 19:07:59 -0700695 int res, kfd = -1, ns_fd = -1;
Dereke4da6c22017-01-28 16:11:28 -0800696
Teng Qin4b764de2017-04-03 22:10:46 -0700697 reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
Dereke4da6c22017-01-28 16:11:28 -0800698 if (!reader)
699 goto error;
700
701 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
702 kfd = open(buf, O_WRONLY | O_APPEND, 0);
703 if (kfd < 0) {
704 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
705 goto error;
706 }
707
Teng Qin0760b752017-09-03 19:07:59 -0700708 res = snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
709 if (res < 0 || res >= sizeof(event_alias)) {
710 fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
711 goto error;
712 }
713 res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
714 event_type, event_alias, binary_path, offset);
715 if (res < 0 || res >= sizeof(buf)) {
716 fprintf(stderr, "Event alias (%s) too long for buffer\n", event_alias);
Derek7174d932017-01-30 21:03:02 -0800717 goto error;
718 }
kmjohansen4b87af02017-03-30 00:58:31 -0700719
Brenden Blancofa073452017-05-30 17:35:53 -0700720 ns_fd = enter_mount_ns(pid);
Dereke4da6c22017-01-28 16:11:28 -0800721 if (write(kfd, buf, strlen(buf)) < 0) {
722 if (errno == EINVAL)
723 fprintf(stderr, "check dmesg output for possible cause\n");
Dereke4da6c22017-01-28 16:11:28 -0800724 goto error;
725 }
726 close(kfd);
Brenden Blancofa073452017-05-30 17:35:53 -0700727 exit_mount_ns(ns_fd);
728 ns_fd = -1;
Dereke4da6c22017-01-28 16:11:28 -0800729
Teng Qin0760b752017-09-03 19:07:59 -0700730 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
Brenden Blanco75982492015-11-06 10:43:05 -0800731 if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
732 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700733
Brenden Blanco8207d102015-09-25 13:58:30 -0700734 return reader;
Brenden Blanco75982492015-11-06 10:43:05 -0800735
736error:
Teng Qin0760b752017-09-03 19:07:59 -0700737 if (kfd >= 0)
738 close(kfd);
Brenden Blancofa073452017-05-30 17:35:53 -0700739 exit_mount_ns(ns_fd);
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700740 perf_reader_free(reader);
Brenden Blanco75982492015-11-06 10:43:05 -0800741 return NULL;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700742}
743
Dereke4da6c22017-01-28 16:11:28 -0800744static int bpf_detach_probe(const char *ev_name, const char *event_type)
745{
Teng Qin0760b752017-09-03 19:07:59 -0700746 int kfd, res;
747 char buf[PATH_MAX];
Brenden Blanco68e2d142016-01-28 10:24:56 -0800748 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
749 kfd = open(buf, O_WRONLY | O_APPEND, 0);
Brenden Blanco839dd272015-06-11 12:35:55 -0700750 if (kfd < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -0800751 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Teng Qin0760b752017-09-03 19:07:59 -0700752 goto error;
Brenden Blanco839dd272015-06-11 12:35:55 -0700753 }
754
Teng Qin0760b752017-09-03 19:07:59 -0700755 res = snprintf(buf, sizeof(buf), "-:%ss/%s_bcc_%d", event_type, ev_name, getpid());
756 if (res < 0 || res >= sizeof(buf)) {
757 fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
758 goto error;
759 }
Dereke4da6c22017-01-28 16:11:28 -0800760 if (write(kfd, buf, strlen(buf)) < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -0800761 fprintf(stderr, "write(%s): %s\n", buf, strerror(errno));
Teng Qin0760b752017-09-03 19:07:59 -0700762 goto error;
Brenden Blanco839dd272015-06-11 12:35:55 -0700763 }
Brenden Blanco839dd272015-06-11 12:35:55 -0700764
Teng Qin0760b752017-09-03 19:07:59 -0700765 close(kfd);
Brenden Blanco75982492015-11-06 10:43:05 -0800766 return 0;
Teng Qin0760b752017-09-03 19:07:59 -0700767
768error:
769 if (kfd >= 0)
770 close(kfd);
771 return -1;
Brenden Blanco839dd272015-06-11 12:35:55 -0700772}
773
Dereke4da6c22017-01-28 16:11:28 -0800774int bpf_detach_kprobe(const char *ev_name)
775{
Teng Qinf4543fc2017-09-03 17:11:59 -0700776 return bpf_detach_probe(ev_name, "kprobe");
Brenden Blanco68e2d142016-01-28 10:24:56 -0800777}
778
Dereke4da6c22017-01-28 16:11:28 -0800779int bpf_detach_uprobe(const char *ev_name)
780{
781 return bpf_detach_probe(ev_name, "uprobe");
Brenden Blanco68e2d142016-01-28 10:24:56 -0800782}
783
Dereke4da6c22017-01-28 16:11:28 -0800784
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -0700785void * bpf_attach_tracepoint(int progfd, const char *tp_category,
786 const char *tp_name, int pid, int cpu,
787 int group_fd, perf_reader_cb cb, void *cb_cookie) {
788 char buf[256];
789 struct perf_reader *reader = NULL;
790
Teng Qin4b764de2017-04-03 22:10:46 -0700791 reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -0700792 if (!reader)
793 goto error;
794
795 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/%s",
796 tp_category, tp_name);
797 if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
798 goto error;
799
800 return reader;
801
802error:
803 perf_reader_free(reader);
804 return NULL;
805}
806
807int bpf_detach_tracepoint(const char *tp_category, const char *tp_name) {
808 // Right now, there is nothing to do, but it's a good idea to encourage
809 // callers to detach anything they attach.
810 return 0;
811}
812
Teng Qin4b764de2017-04-03 22:10:46 -0700813void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,
814 perf_reader_lost_cb lost_cb, void *cb_cookie,
815 int pid, int cpu, int page_cnt) {
Brenden Blanco75982492015-11-06 10:43:05 -0800816 int pfd;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800817 struct perf_event_attr attr = {};
Brenden Blanco75982492015-11-06 10:43:05 -0800818 struct perf_reader *reader = NULL;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800819
Teng Qin4b764de2017-04-03 22:10:46 -0700820 reader = perf_reader_new(NULL, raw_cb, lost_cb, cb_cookie, page_cnt);
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800821 if (!reader)
Brenden Blanco75982492015-11-06 10:43:05 -0800822 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800823
Brenden Blanco0dd24412016-02-17 00:26:14 -0800824 attr.config = 10;//PERF_COUNT_SW_BPF_OUTPUT;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800825 attr.type = PERF_TYPE_SOFTWARE;
826 attr.sample_type = PERF_SAMPLE_RAW;
Brenden Blanco75982492015-11-06 10:43:05 -0800827 attr.sample_period = 1;
828 attr.wakeup_events = 1;
829 pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800830 if (pfd < 0) {
Brenden Blanco0dd24412016-02-17 00:26:14 -0800831 fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
832 fprintf(stderr, " (check your kernel for PERF_COUNT_SW_BPF_OUTPUT support, 4.4 or newer)\n");
Brenden Blanco75982492015-11-06 10:43:05 -0800833 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800834 }
835 perf_reader_set_fd(reader, pfd);
836
837 if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
Brenden Blanco75982492015-11-06 10:43:05 -0800838 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800839
840 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
841 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
Brenden Blanco75982492015-11-06 10:43:05 -0800842 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800843 }
844
845 return reader;
Brenden Blanco75982492015-11-06 10:43:05 -0800846
847error:
848 if (reader)
849 perf_reader_free(reader);
850
851 return NULL;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800852}
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200853
Brenden Blancofa073452017-05-30 17:35:53 -0700854static int invalid_perf_config(uint32_t type, uint64_t config) {
Teng Qin98752212017-05-19 19:05:24 -0700855 switch (type) {
Teng Qin01b07ba2017-11-20 13:28:03 -0800856 case PERF_TYPE_HARDWARE:
857 if (config >= PERF_COUNT_HW_MAX) {
858 fprintf(stderr, "HARDWARE perf event config out of range\n");
859 goto is_invalid;
860 }
861 return 0;
862 case PERF_TYPE_SOFTWARE:
863 if (config >= PERF_COUNT_SW_MAX) {
864 fprintf(stderr, "SOFTWARE perf event config out of range\n");
865 goto is_invalid;
866 } else if (config == 10 /* PERF_COUNT_SW_BPF_OUTPUT */) {
867 fprintf(stderr, "Unable to open or attach perf event for BPF_OUTPUT\n");
868 goto is_invalid;
869 }
870 return 0;
871 case PERF_TYPE_HW_CACHE:
872 if (((config >> 16) >= PERF_COUNT_HW_CACHE_RESULT_MAX) ||
873 (((config >> 8) & 0xff) >= PERF_COUNT_HW_CACHE_OP_MAX) ||
874 ((config & 0xff) >= PERF_COUNT_HW_CACHE_MAX)) {
875 fprintf(stderr, "HW_CACHE perf event config out of range\n");
876 goto is_invalid;
877 }
878 return 0;
879 case PERF_TYPE_TRACEPOINT:
880 case PERF_TYPE_BREAKPOINT:
881 fprintf(stderr,
882 "Unable to open or attach TRACEPOINT or BREAKPOINT events\n");
883 goto is_invalid;
884 default:
885 return 0;
Teng Qin98752212017-05-19 19:05:24 -0700886 }
Teng Qin01b07ba2017-11-20 13:28:03 -0800887is_invalid:
888 fprintf(stderr, "Invalid perf event type %" PRIu32 " config %" PRIu64 "\n",
889 type, config);
890 return 1;
Teng Qin98752212017-05-19 19:05:24 -0700891}
892
Brenden Blanco3069caa2016-08-01 18:12:11 -0700893int bpf_open_perf_event(uint32_t type, uint64_t config, int pid, int cpu) {
894 int fd;
895 struct perf_event_attr attr = {};
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200896
Teng Qin98752212017-05-19 19:05:24 -0700897 if (invalid_perf_config(type, config)) {
Teng Qin98752212017-05-19 19:05:24 -0700898 return -1;
899 }
900
Brenden Blanco3069caa2016-08-01 18:12:11 -0700901 attr.sample_period = LONG_MAX;
902 attr.type = type;
903 attr.config = config;
904
905 fd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
906 if (fd < 0) {
907 fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
908 return -1;
909 }
910
911 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
912 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
913 close(fd);
914 return -1;
915 }
916
917 return fd;
918}
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200919
Andy Gospodarek9f3cab72017-05-17 16:18:45 -0400920int bpf_attach_xdp(const char *dev_name, int progfd, uint32_t flags) {
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200921 struct sockaddr_nl sa;
922 int sock, seq = 0, len, ret = -1;
923 char buf[4096];
924 struct nlattr *nla, *nla_xdp;
925 struct {
926 struct nlmsghdr nh;
927 struct ifinfomsg ifinfo;
928 char attrbuf[64];
929 } req;
930 struct nlmsghdr *nh;
931 struct nlmsgerr *err;
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +0900932 socklen_t addrlen;
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200933
934 memset(&sa, 0, sizeof(sa));
935 sa.nl_family = AF_NETLINK;
936
937 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
938 if (sock < 0) {
939 fprintf(stderr, "bpf: opening a netlink socket: %s\n", strerror(errno));
940 return -1;
941 }
942
943 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
944 fprintf(stderr, "bpf: bind to netlink: %s\n", strerror(errno));
945 goto cleanup;
946 }
947
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +0900948 addrlen = sizeof(sa);
949 if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
950 fprintf(stderr, "bpf: get sock name of netlink: %s\n", strerror(errno));
951 goto cleanup;
952 }
953
954 if (addrlen != sizeof(sa)) {
955 fprintf(stderr, "bpf: wrong netlink address length: %d\n", addrlen);
956 goto cleanup;
957 }
958
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200959 memset(&req, 0, sizeof(req));
960 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
961 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
962 req.nh.nlmsg_type = RTM_SETLINK;
963 req.nh.nlmsg_pid = 0;
964 req.nh.nlmsg_seq = ++seq;
965 req.ifinfo.ifi_family = AF_UNSPEC;
966 req.ifinfo.ifi_index = if_nametoindex(dev_name);
967 if (req.ifinfo.ifi_index == 0) {
968 fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
969 goto cleanup;
970 }
971
972 nla = (struct nlattr *)(((char *)&req)
973 + NLMSG_ALIGN(req.nh.nlmsg_len));
974 nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
975
976 nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
Andy Gospodarek9f3cab72017-05-17 16:18:45 -0400977 nla->nla_len = NLA_HDRLEN;
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200978
979 // we specify the FD passed over by the user
980 nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
Arthur Gautierfbd91e22017-04-28 21:39:58 +0000981 nla_xdp->nla_len = NLA_HDRLEN + sizeof(progfd);
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200982 memcpy((char *)nla_xdp + NLA_HDRLEN, &progfd, sizeof(progfd));
Andy Gospodarek9f3cab72017-05-17 16:18:45 -0400983 nla->nla_len += nla_xdp->nla_len;
984
985 // parse flags as passed by the user
986 if (flags) {
987 nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
Gary Lindb8353b2017-08-18 18:10:43 +0800988 nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
Andy Gospodarek9f3cab72017-05-17 16:18:45 -0400989 nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
990 memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
991 nla->nla_len += nla_xdp->nla_len;
992 }
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200993
994 req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
995
996 if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
997 fprintf(stderr, "bpf: send to netlink: %s\n", strerror(errno));
998 goto cleanup;
999 }
1000
1001 len = recv(sock, buf, sizeof(buf), 0);
1002 if (len < 0) {
1003 fprintf(stderr, "bpf: recv from netlink: %s\n", strerror(errno));
1004 goto cleanup;
1005 }
1006
1007 for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
1008 nh = NLMSG_NEXT(nh, len)) {
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001009 if (nh->nlmsg_pid != sa.nl_pid) {
Toshiaki Makita890c76a2017-07-31 20:20:55 +09001010 fprintf(stderr, "bpf: Wrong pid %u, expected %u\n",
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001011 nh->nlmsg_pid, sa.nl_pid);
Jan RĂ¼the0724d72016-07-28 22:32:46 +02001012 errno = EBADMSG;
1013 goto cleanup;
1014 }
1015 if (nh->nlmsg_seq != seq) {
1016 fprintf(stderr, "bpf: Wrong seq %d, expected %d\n",
1017 nh->nlmsg_seq, seq);
1018 errno = EBADMSG;
1019 goto cleanup;
1020 }
1021 switch (nh->nlmsg_type) {
1022 case NLMSG_ERROR:
1023 err = (struct nlmsgerr *)NLMSG_DATA(nh);
1024 if (!err->error)
1025 continue;
1026 fprintf(stderr, "bpf: nlmsg error %s\n", strerror(-err->error));
1027 errno = -err->error;
1028 goto cleanup;
1029 case NLMSG_DONE:
1030 break;
1031 }
1032 }
1033
1034 ret = 0;
1035
1036cleanup:
1037 close(sock);
1038 return ret;
1039}
Teng Qin206b0202016-10-18 16:06:57 -07001040
1041int bpf_attach_perf_event(int progfd, uint32_t ev_type, uint32_t ev_config,
1042 uint64_t sample_period, uint64_t sample_freq,
1043 pid_t pid, int cpu, int group_fd) {
Teng Qin98752212017-05-19 19:05:24 -07001044 if (invalid_perf_config(ev_type, ev_config)) {
Teng Qin206b0202016-10-18 16:06:57 -07001045 return -1;
1046 }
1047 if (!((sample_period > 0) ^ (sample_freq > 0))) {
1048 fprintf(
1049 stderr, "Exactly one of sample_period / sample_freq should be set\n"
1050 );
1051 return -1;
1052 }
1053
1054 struct perf_event_attr attr = {};
1055 attr.type = ev_type;
1056 attr.config = ev_config;
Teng Qin99978d22017-12-15 00:22:13 -08001057 if (pid > 0)
1058 attr.inherit = 1;
Teng Qin206b0202016-10-18 16:06:57 -07001059 if (sample_freq > 0) {
1060 attr.freq = 1;
1061 attr.sample_freq = sample_freq;
1062 } else {
1063 attr.sample_period = sample_period;
1064 }
1065
1066 int fd = syscall(
1067 __NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC
1068 );
1069 if (fd < 0) {
1070 perror("perf_event_open failed");
1071 return -1;
1072 }
1073 if (ioctl(fd, PERF_EVENT_IOC_SET_BPF, progfd) != 0) {
1074 perror("ioctl(PERF_EVENT_IOC_SET_BPF) failed");
1075 close(fd);
1076 return -1;
1077 }
1078 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) != 0) {
1079 perror("ioctl(PERF_EVENT_IOC_ENABLE) failed");
1080 close(fd);
1081 return -1;
1082 }
1083
1084 return fd;
1085}
1086
Teng Qind6827332017-05-23 16:35:11 -07001087int bpf_close_perf_event_fd(int fd) {
1088 int res, error = 0;
1089 if (fd >= 0) {
1090 res = ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
1091 if (res != 0) {
1092 perror("ioctl(PERF_EVENT_IOC_DISABLE) failed");
1093 error = res;
1094 }
1095 res = close(fd);
1096 if (res != 0) {
1097 perror("close perf event FD failed");
1098 error = (res && !error) ? res : error;
1099 }
1100 }
1101 return error;
Teng Qin206b0202016-10-18 16:06:57 -07001102}
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001103
1104int bpf_obj_pin(int fd, const char *pathname)
1105{
Brenden Blancofa073452017-05-30 17:35:53 -07001106 union bpf_attr attr;
1107
1108 memset(&attr, 0, sizeof(attr));
1109 attr.pathname = ptr_to_u64((void *)pathname);
1110 attr.bpf_fd = fd;
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001111
1112 return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
1113}
1114
1115int bpf_obj_get(const char *pathname)
1116{
Brenden Blancofa073452017-05-30 17:35:53 -07001117 union bpf_attr attr;
1118
1119 memset(&attr, 0, sizeof(attr));
1120 attr.pathname = ptr_to_u64((void *)pathname);
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001121
1122 return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
1123}
Martin KaFai Lau3c24ad92017-10-19 23:47:39 -07001124
1125int bpf_prog_get_next_id(uint32_t start_id, uint32_t *next_id)
1126{
1127 union bpf_attr attr;
1128 int err;
1129
1130 memset(&attr, 0, sizeof(attr));
1131 attr.start_id = start_id;
1132
1133 err = syscall(__NR_bpf, BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
1134 if (!err)
1135 *next_id = attr.next_id;
1136
1137 return err;
1138}
1139
1140int bpf_prog_get_fd_by_id(uint32_t id)
1141{
1142 union bpf_attr attr;
1143
1144 memset(&attr, 0, sizeof(attr));
1145 attr.prog_id = id;
1146
1147 return syscall(__NR_bpf, BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
1148}
1149
1150int bpf_map_get_fd_by_id(uint32_t id)
1151{
1152 union bpf_attr attr;
1153
1154 memset(&attr, 0, sizeof(attr));
1155 attr.map_id = id;
1156
1157 return syscall(__NR_bpf, BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
1158}