blob: ef6daf33073a9fa6f5b956214f58e9bc67db434f [file] [log] [blame]
Brenden Blanco246b9422015-06-05 11:15:27 -07001/*
2 * Copyright (c) 2015 PLUMgrid, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Nan Xiao667988c2017-08-28 11:44:19 +080016#ifndef _GNU_SOURCE
Colin Ian Kinga12db192017-07-06 13:58:17 +010017#define _GNU_SOURCE
Nan Xiao667988c2017-08-28 11:44:19 +080018#endif
Brenden Blanco246b9422015-06-05 11:15:27 -070019
Brenden Blancocd5cb412015-04-26 09:41:58 -070020#include <arpa/inet.h>
Brenden Blancoa94bd932015-04-26 00:56:42 -070021#include <errno.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070022#include <fcntl.h>
Teng Qin01b07ba2017-11-20 13:28:03 -080023#include <inttypes.h>
Brenden Blanco3069caa2016-08-01 18:12:11 -070024#include <limits.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070025#include <linux/bpf.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070026#include <linux/bpf_common.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070027#include <linux/if_packet.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070028#include <linux/perf_event.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070029#include <linux/pkt_cls.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070030#include <linux/rtnetlink.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070031#include <linux/sched.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070032#include <linux/unistd.h>
33#include <linux/version.h>
Brenden Blancoa94bd932015-04-26 00:56:42 -070034#include <net/ethernet.h>
35#include <net/if.h>
Brenden Blancofa073452017-05-30 17:35:53 -070036#include <sched.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070037#include <stdbool.h>
Brenden Blancobb7200c2015-06-04 18:01:42 -070038#include <stdio.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070039#include <stdlib.h>
40#include <string.h>
41#include <sys/ioctl.h>
Brenden Blanco4b4bd272015-11-30 10:54:47 -080042#include <sys/resource.h>
Derek35c25012017-01-22 20:58:23 -080043#include <sys/stat.h>
44#include <sys/types.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070045#include <unistd.h>
Alexei Starovoitovb1df37c2017-09-06 19:47:47 -070046#include <linux/if_alg.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070047
Brenden Blancoa94bd932015-04-26 00:56:42 -070048#include "libbpf.h"
Brenden Blanco8207d102015-09-25 13:58:30 -070049#include "perf_reader.h"
Brenden Blancoa94bd932015-04-26 00:56:42 -070050
Brenden Blancof275d3d2015-07-06 23:41:23 -070051// TODO: remove these defines when linux-libc-dev exports them properly
52
53#ifndef __NR_bpf
Naveen N. Rao0006ad12016-04-29 16:42:58 +053054#if defined(__powerpc64__)
55#define __NR_bpf 361
Zvonko Kosic98121a32017-03-07 07:30:25 +010056#elif defined(__s390x__)
57#define __NR_bpf 351
Zhiyi Sun8e434b72016-12-06 16:21:37 +080058#elif defined(__aarch64__)
59#define __NR_bpf 280
Naveen N. Rao0006ad12016-04-29 16:42:58 +053060#else
Brenden Blancof275d3d2015-07-06 23:41:23 -070061#define __NR_bpf 321
62#endif
Naveen N. Rao0006ad12016-04-29 16:42:58 +053063#endif
Brenden Blancof275d3d2015-07-06 23:41:23 -070064
65#ifndef SO_ATTACH_BPF
66#define SO_ATTACH_BPF 50
67#endif
68
69#ifndef PERF_EVENT_IOC_SET_BPF
70#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
71#endif
72
73#ifndef PERF_FLAG_FD_CLOEXEC
74#define PERF_FLAG_FD_CLOEXEC (1UL << 3)
75#endif
76
Martin KaFai Laudf368162017-10-19 12:46:48 -070077#define min(x, y) ((x) < (y) ? (x) : (y))
78
Mark Drayton5f5687e2017-02-20 18:13:03 +000079static int probe_perf_reader_page_cnt = 8;
80
Brenden Blancofa073452017-05-30 17:35:53 -070081static uint64_t ptr_to_u64(void *ptr)
Brenden Blancoa94bd932015-04-26 00:56:42 -070082{
Brenden Blancofa073452017-05-30 17:35:53 -070083 return (uint64_t) (unsigned long) ptr;
Brenden Blancoa94bd932015-04-26 00:56:42 -070084}
85
Martin KaFai Laudf368162017-10-19 12:46:48 -070086int bpf_create_map(enum bpf_map_type map_type, const char *name,
87 int key_size, int value_size,
88 int max_entries, int map_flags)
Brenden Blancoa94bd932015-04-26 00:56:42 -070089{
Martin KaFai Laudf368162017-10-19 12:46:48 -070090 size_t name_len = name ? strlen(name) : 0;
Brenden Blancofdc027c2015-09-03 11:49:54 -070091 union bpf_attr attr;
92 memset(&attr, 0, sizeof(attr));
93 attr.map_type = map_type;
94 attr.key_size = key_size;
95 attr.value_size = value_size;
96 attr.max_entries = max_entries;
Huapeng Zhoude11d072016-12-06 18:10:38 -080097 attr.map_flags = map_flags;
Martin KaFai Laudf368162017-10-19 12:46:48 -070098 memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
Brenden Blancoa94bd932015-04-26 00:56:42 -070099
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800100 int ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
Martin KaFai Laudf368162017-10-19 12:46:48 -0700101
102 if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
103 memset(attr.map_name, 0, BPF_OBJ_NAME_LEN);
104 ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
105 }
106
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800107 if (ret < 0 && errno == EPERM) {
108 // see note below about the rationale for this retry
109
110 struct rlimit rl = {};
111 if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
112 rl.rlim_max = RLIM_INFINITY;
113 rl.rlim_cur = rl.rlim_max;
114 if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
115 ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
116 }
117 }
118 return ret;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700119}
120
121int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
122{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700123 union bpf_attr attr;
124 memset(&attr, 0, sizeof(attr));
125 attr.map_fd = fd;
126 attr.key = ptr_to_u64(key);
127 attr.value = ptr_to_u64(value);
128 attr.flags = flags;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700129
130 return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
131}
132
133int bpf_lookup_elem(int fd, void *key, void *value)
134{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700135 union bpf_attr attr;
136 memset(&attr, 0, sizeof(attr));
137 attr.map_fd = fd;
138 attr.key = ptr_to_u64(key);
139 attr.value = ptr_to_u64(value);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700140
141 return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
142}
143
144int bpf_delete_elem(int fd, void *key)
145{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700146 union bpf_attr attr;
147 memset(&attr, 0, sizeof(attr));
148 attr.map_fd = fd;
149 attr.key = ptr_to_u64(key);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700150
151 return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
152}
153
Teng Qindb7fab52017-05-16 01:10:15 -0700154int bpf_get_first_key(int fd, void *key, size_t key_size)
155{
156 union bpf_attr attr;
157 int i, res;
158
159 memset(&attr, 0, sizeof(attr));
160 attr.map_fd = fd;
161 attr.key = 0;
162 attr.next_key = ptr_to_u64(key);
163
164 // 4.12 and above kernel supports passing NULL to BPF_MAP_GET_NEXT_KEY
165 // to get first key of the map. For older kernels, the call will fail.
166 res = syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
167 if (res < 0 && errno == EFAULT) {
168 // Fall back to try to find a non-existing key.
169 static unsigned char try_values[3] = {0, 0xff, 0x55};
170 attr.key = ptr_to_u64(key);
171 for (i = 0; i < 3; i++) {
172 memset(key, try_values[i], key_size);
173 // We want to check the existence of the key but we don't know the size
174 // of map's value. So we pass an invalid pointer for value, expect
175 // the call to fail and check if the error is ENOENT indicating the
176 // key doesn't exist. If we use NULL for the invalid pointer, it might
Edward Bettsfdf9b082017-10-10 21:13:28 +0100177 // trigger a page fault in kernel and affect performance. Hence we use
Teng Qindb7fab52017-05-16 01:10:15 -0700178 // ~0 which will fail and return fast.
179 // This should fail since we pass an invalid pointer for value.
Teng Qin9190ef52017-05-20 22:46:00 -0700180 if (bpf_lookup_elem(fd, key, (void *)~0) >= 0)
Teng Qindb7fab52017-05-16 01:10:15 -0700181 return -1;
182 // This means the key doesn't exist.
183 if (errno == ENOENT)
184 return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
185 }
186 return -1;
187 } else {
188 return res;
189 }
190}
191
Brenden Blancoa94bd932015-04-26 00:56:42 -0700192int bpf_get_next_key(int fd, void *key, void *next_key)
193{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700194 union bpf_attr attr;
195 memset(&attr, 0, sizeof(attr));
196 attr.map_fd = fd;
197 attr.key = ptr_to_u64(key);
198 attr.next_key = ptr_to_u64(next_key);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700199
200 return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
201}
202
Teng Qin797669f2017-11-03 16:04:32 -0700203static void bpf_print_hints(int ret, char *log)
Brendan Gregg34826372017-01-13 14:02:02 -0800204{
Teng Qin797669f2017-11-03 16:04:32 -0700205 if (ret < 0)
206 fprintf(stderr, "bpf: Failed to load program: %s\n", strerror(errno));
Brendan Gregg34826372017-01-13 14:02:02 -0800207 if (log == NULL)
208 return;
Teng Qin797669f2017-11-03 16:04:32 -0700209 else
210 fprintf(stderr, "%s\n", log);
211
212 if (ret >= 0)
213 return;
Brendan Gregg34826372017-01-13 14:02:02 -0800214
215 // The following error strings will need maintenance to match LLVM.
216
217 // stack busting
218 if (strstr(log, "invalid stack off=-") != NULL) {
219 fprintf(stderr, "HINT: Looks like you exceeded the BPF stack limit. "
220 "This can happen if you allocate too much local variable storage. "
221 "For example, if you allocated a 1 Kbyte struct (maybe for "
222 "BPF_PERF_OUTPUT), busting a max stack of 512 bytes.\n\n");
223 }
224
225 // didn't check NULL on map lookup
226 if (strstr(log, "invalid mem access 'map_value_or_null'") != NULL) {
227 fprintf(stderr, "HINT: The 'map_value_or_null' error can happen if "
228 "you dereference a pointer value from a map lookup without first "
229 "checking if that pointer is NULL.\n\n");
230 }
231
232 // lacking a bpf_probe_read
233 if (strstr(log, "invalid mem access 'inv'") != NULL) {
234 fprintf(stderr, "HINT: The invalid mem access 'inv' error can happen "
235 "if you try to dereference memory without first using "
236 "bpf_probe_read() to copy it to the BPF stack. Sometimes the "
237 "bpf_probe_read is automatic by the bcc rewriter, other times "
238 "you'll need to be explicit.\n\n");
239 }
240}
Brenden Blancoa94bd932015-04-26 00:56:42 -0700241#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
242
Martin KaFai Lau3c24ad92017-10-19 23:47:39 -0700243int bpf_obj_get_info(int prog_map_fd, void *info, uint32_t *info_len)
Alexei Starovoitovb1df37c2017-09-06 19:47:47 -0700244{
245 union bpf_attr attr;
246 int err;
247
248 memset(&attr, 0, sizeof(attr));
249 attr.info.bpf_fd = prog_map_fd;
250 attr.info.info_len = *info_len;
251 attr.info.info = ptr_to_u64(info);
252
253 err = syscall(__NR_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
254 if (!err)
255 *info_len = attr.info.info_len;
256
257 return err;
258}
259
260int bpf_prog_compute_tag(const struct bpf_insn *insns, int prog_len,
261 unsigned long long *ptag)
262{
263 struct sockaddr_alg alg = {
264 .salg_family = AF_ALG,
265 .salg_type = "hash",
266 .salg_name = "sha1",
267 };
268 int shafd = socket(AF_ALG, SOCK_SEQPACKET, 0);
269 if (shafd < 0) {
270 fprintf(stderr, "sha1 socket not available %s\n", strerror(errno));
271 return -1;
272 }
273 int ret = bind(shafd, (struct sockaddr *)&alg, sizeof(alg));
274 if (ret < 0) {
275 fprintf(stderr, "sha1 bind fail %s\n", strerror(errno));
276 close(shafd);
277 return ret;
278 }
279 int shafd2 = accept(shafd, NULL, 0);
280 if (shafd2 < 0) {
281 fprintf(stderr, "sha1 accept fail %s\n", strerror(errno));
282 close(shafd);
283 return -1;
284 }
285 struct bpf_insn prog[prog_len / 8];
286 bool map_ld_seen = false;
287 int i;
288 for (i = 0; i < prog_len / 8; i++) {
289 prog[i] = insns[i];
290 if (insns[i].code == (BPF_LD | BPF_DW | BPF_IMM) &&
291 insns[i].src_reg == BPF_PSEUDO_MAP_FD &&
292 !map_ld_seen) {
293 prog[i].imm = 0;
294 map_ld_seen = true;
295 } else if (insns[i].code == 0 && map_ld_seen) {
296 prog[i].imm = 0;
297 map_ld_seen = false;
298 } else {
299 map_ld_seen = false;
300 }
301 }
302 ret = write(shafd2, prog, prog_len);
303 if (ret != prog_len) {
304 fprintf(stderr, "sha1 write fail %s\n", strerror(errno));
305 close(shafd2);
306 close(shafd);
307 return -1;
308 }
309
310 union {
311 unsigned char sha[20];
312 unsigned long long tag;
313 } u = {};
314 ret = read(shafd2, u.sha, 20);
315 if (ret != 20) {
316 fprintf(stderr, "sha1 read fail %s\n", strerror(errno));
317 close(shafd2);
318 close(shafd);
319 return -1;
320 }
321 *ptag = __builtin_bswap64(u.tag);
322 return 0;
323}
324
325int bpf_prog_get_tag(int fd, unsigned long long *ptag)
326{
327 char fmt[64];
328 snprintf(fmt, sizeof(fmt), "/proc/self/fdinfo/%d", fd);
329 FILE * f = fopen(fmt, "r");
330 if (!f) {
331/* fprintf(stderr, "failed to open fdinfo %s\n", strerror(errno));*/
332 return -1;
333 }
334 fgets(fmt, sizeof(fmt), f); // pos
335 fgets(fmt, sizeof(fmt), f); // flags
336 fgets(fmt, sizeof(fmt), f); // mnt_id
337 fgets(fmt, sizeof(fmt), f); // prog_type
338 fgets(fmt, sizeof(fmt), f); // prog_jited
339 fgets(fmt, sizeof(fmt), f); // prog_tag
340 fclose(f);
341 char *p = strchr(fmt, ':');
342 if (!p) {
343/* fprintf(stderr, "broken fdinfo %s\n", fmt);*/
344 return -2;
345 }
346 unsigned long long tag = 0;
347 sscanf(p + 1, "%llx", &tag);
348 *ptag = tag;
349 return 0;
350}
351
Martin KaFai Laudf368162017-10-19 12:46:48 -0700352int bpf_prog_load(enum bpf_prog_type prog_type, const char *name,
Brenden Blancocd5cb412015-04-26 09:41:58 -0700353 const struct bpf_insn *insns, int prog_len,
Brenden Blanco759029f2015-07-29 15:47:51 -0700354 const char *license, unsigned kern_version,
Teng Qin797669f2017-11-03 16:04:32 -0700355 int log_level, char *log_buf, unsigned log_buf_size)
Brenden Blancoa94bd932015-04-26 00:56:42 -0700356{
Martin KaFai Laudf368162017-10-19 12:46:48 -0700357 size_t name_len = name ? strlen(name) : 0;
Brenden Blancofdc027c2015-09-03 11:49:54 -0700358 union bpf_attr attr;
Teng Qin797669f2017-11-03 16:04:32 -0700359 char *tmp_log_buf = NULL;
360 unsigned tmp_log_buf_size = 0;
davidefdl2dece102016-09-12 12:00:37 -0700361 int ret = 0;
362
Brenden Blancofdc027c2015-09-03 11:49:54 -0700363 memset(&attr, 0, sizeof(attr));
Teng Qin797669f2017-11-03 16:04:32 -0700364
Brenden Blancofdc027c2015-09-03 11:49:54 -0700365 attr.prog_type = prog_type;
Brenden Blanco7009b552015-05-26 11:48:17 -0700366 attr.kern_version = kern_version;
Teng Qin797669f2017-11-03 16:04:32 -0700367 attr.license = ptr_to_u64((void *)license);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700368
Teng Qin797669f2017-11-03 16:04:32 -0700369 attr.insns = ptr_to_u64((void *)insns);
370 attr.insn_cnt = prog_len / sizeof(struct bpf_insn);
davidefdl2dece102016-09-12 12:00:37 -0700371 if (attr.insn_cnt > BPF_MAXINSNS) {
davidefdl2dece102016-09-12 12:00:37 -0700372 errno = EINVAL;
373 fprintf(stderr,
Martin KaFai Laudf368162017-10-19 12:46:48 -0700374 "bpf: %s. Program too large (%u insns), at most %d insns\n\n",
davidefdl2dece102016-09-12 12:00:37 -0700375 strerror(errno), attr.insn_cnt, BPF_MAXINSNS);
Teng Qin797669f2017-11-03 16:04:32 -0700376 return -1;
davidefdl2dece102016-09-12 12:00:37 -0700377 }
378
Teng Qin797669f2017-11-03 16:04:32 -0700379 attr.log_level = log_level;
380 if (attr.log_level > 0) {
381 if (log_buf_size > 0) {
382 // Use user-provided log buffer if availiable.
383 log_buf[0] = 0;
384 attr.log_buf = ptr_to_u64(log_buf);
385 attr.log_size = log_buf_size;
386 } else {
387 // Create and use temporary log buffer if user didn't provide one.
388 tmp_log_buf_size = LOG_BUF_SIZE;
389 tmp_log_buf = malloc(tmp_log_buf_size);
390 if (!tmp_log_buf) {
391 fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
392 strerror(errno));
393 attr.log_level = 0;
394 } else {
395 tmp_log_buf[0] = 0;
396 attr.log_buf = ptr_to_u64(tmp_log_buf);
397 attr.log_size = tmp_log_buf_size;
398 }
399 }
400 }
Huapeng Zhoude11d072016-12-06 18:10:38 -0800401
Teng Qin797669f2017-11-03 16:04:32 -0700402 memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
403
404 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
405 // BPF object name is not supported on older Kernels.
406 // If we failed due to this, clear the name and try again.
Martin KaFai Laudf368162017-10-19 12:46:48 -0700407 if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
408 memset(attr.prog_name, 0, BPF_OBJ_NAME_LEN);
409 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
410 }
411
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800412 if (ret < 0 && errno == EPERM) {
413 // When EPERM is returned, two reasons are possible:
414 // 1. user has no permissions for bpf()
415 // 2. user has insufficent rlimit for locked memory
416 // Unfortunately, there is no api to inspect the current usage of locked
417 // mem for the user, so an accurate calculation of how much memory to lock
418 // for this new program is difficult to calculate. As a hack, bump the limit
419 // to unlimited. If program load fails again, return the error.
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800420 struct rlimit rl = {};
421 if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
422 rl.rlim_max = RLIM_INFINITY;
423 rl.rlim_cur = rl.rlim_max;
424 if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
425 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
426 }
427 }
428
Teng Qin797669f2017-11-03 16:04:32 -0700429 // The load has failed. Handle log message.
430 if (ret < 0) {
431 // User has provided a log buffer.
432 if (log_buf_size) {
433 // If logging is not already enabled, enable it and do the syscall again.
434 if (attr.log_level == 0) {
435 attr.log_level = 1;
436 attr.log_buf = ptr_to_u64(log_buf);
437 attr.log_size = log_buf_size;
438 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
439 }
440 // Print the log message and return.
441 bpf_print_hints(ret, log_buf);
442 if (errno == ENOSPC)
443 fprintf(stderr, "bpf: log_buf size may be insufficient\n");
444 goto return_result;
davidefdl2dece102016-09-12 12:00:37 -0700445 }
446
Teng Qin797669f2017-11-03 16:04:32 -0700447 // User did not provide log buffer. We will try to increase size of
448 // our temporary log buffer to get full error message.
449 if (tmp_log_buf)
450 free(tmp_log_buf);
451 tmp_log_buf_size = LOG_BUF_SIZE;
Yonghong Songe86b0172017-11-14 10:20:07 -0800452 if (attr.log_level == 0)
453 attr.log_level = 1;
Teng Qin797669f2017-11-03 16:04:32 -0700454 for (;;) {
455 tmp_log_buf = malloc(tmp_log_buf_size);
456 if (!tmp_log_buf) {
457 fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
458 strerror(errno));
459 goto return_result;
460 }
461 tmp_log_buf[0] = 0;
462 attr.log_buf = ptr_to_u64(tmp_log_buf);
463 attr.log_size = tmp_log_buf_size;
davidefdl2dece102016-09-12 12:00:37 -0700464
Teng Qin797669f2017-11-03 16:04:32 -0700465 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
466 if (ret < 0 && errno == ENOSPC) {
467 // Temporary buffer size is not enough. Double it and try again.
468 free(tmp_log_buf);
469 tmp_log_buf = NULL;
470 tmp_log_buf_size <<= 1;
471 } else {
472 break;
473 }
474 }
Brenden Blancocd5cb412015-04-26 09:41:58 -0700475 }
Teng Qin797669f2017-11-03 16:04:32 -0700476
477 // If log_level is not 0, either speficied by user or set due to error,
478 // print the log message.
479 if (attr.log_level > 0) {
480 if (log_buf)
481 bpf_print_hints(ret, log_buf);
482 else if (tmp_log_buf)
483 bpf_print_hints(ret, tmp_log_buf);
484 }
485
486return_result:
487 if (tmp_log_buf)
488 free(tmp_log_buf);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700489 return ret;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700490}
491
492int bpf_open_raw_sock(const char *name)
493{
494 struct sockaddr_ll sll;
495 int sock;
496
497 sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
498 if (sock < 0) {
Prashant Bhole5b08d382017-10-06 15:05:04 +0900499 fprintf(stderr, "cannot create raw socket\n");
Brenden Blancoa94bd932015-04-26 00:56:42 -0700500 return -1;
501 }
502
Olivier Tilmans830d58d2017-11-21 23:26:27 +0100503 /* Do not bind on empty interface names */
504 if (!name || *name == '\0')
505 return sock;
506
Brenden Blancoa94bd932015-04-26 00:56:42 -0700507 memset(&sll, 0, sizeof(sll));
508 sll.sll_family = AF_PACKET;
509 sll.sll_ifindex = if_nametoindex(name);
Prashant Bholee9fa6592017-10-06 14:45:21 +0900510 if (sll.sll_ifindex == 0) {
511 fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
512 close(sock);
513 return -1;
514 }
Brenden Blancoa94bd932015-04-26 00:56:42 -0700515 sll.sll_protocol = htons(ETH_P_ALL);
516 if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
Prashant Bhole5b08d382017-10-06 15:05:04 +0900517 fprintf(stderr, "bind to %s: %s\n", name, strerror(errno));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700518 close(sock);
519 return -1;
520 }
521
522 return sock;
523}
524
525int bpf_attach_socket(int sock, int prog) {
Brenden Blancoaf956732015-06-09 13:58:42 -0700526 return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700527}
528
Brenden Blanco8207d102015-09-25 13:58:30 -0700529static int bpf_attach_tracing_event(int progfd, const char *event_path,
530 struct perf_reader *reader, int pid, int cpu, int group_fd) {
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700531 int efd, pfd;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800532 ssize_t bytes;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700533 char buf[256];
534 struct perf_event_attr attr = {};
535
536 snprintf(buf, sizeof(buf), "%s/id", event_path);
537 efd = open(buf, O_RDONLY, 0);
538 if (efd < 0) {
539 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700540 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700541 }
542
543 bytes = read(efd, buf, sizeof(buf));
544 if (bytes <= 0 || bytes >= sizeof(buf)) {
545 fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700546 close(efd);
547 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700548 }
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700549 close(efd);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700550 buf[bytes] = '\0';
551 attr.config = strtol(buf, NULL, 0);
552 attr.type = PERF_TYPE_TRACEPOINT;
Brenden Blanco8207d102015-09-25 13:58:30 -0700553 attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700554 attr.sample_period = 1;
555 attr.wakeup_events = 1;
556 pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC);
557 if (pfd < 0) {
Brenden Blanco74681952016-01-28 14:18:46 -0800558 fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700559 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700560 }
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800561 perf_reader_set_fd(reader, pfd);
Brenden Blanco8207d102015-09-25 13:58:30 -0700562
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800563 if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700564 return -1;
Brenden Blanco8207d102015-09-25 13:58:30 -0700565
Brenden Blancocd5cb412015-04-26 09:41:58 -0700566 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, progfd) < 0) {
567 perror("ioctl(PERF_EVENT_IOC_SET_BPF)");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700568 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700569 }
570 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
571 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700572 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700573 }
574
Brenden Blanco75982492015-11-06 10:43:05 -0800575 return 0;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700576}
577
Derek7174d932017-01-30 21:03:02 -0800578void * bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type, const char *ev_name,
Dereke4da6c22017-01-28 16:11:28 -0800579 const char *fn_name,
580 pid_t pid, int cpu, int group_fd,
Mauricio Vasquez Bd1324ac2017-05-17 20:26:47 -0500581 perf_reader_cb cb, void *cb_cookie)
Dereke4da6c22017-01-28 16:11:28 -0800582{
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700583 int kfd;
Derek7174d932017-01-30 21:03:02 -0800584 char buf[256];
Teng Qinf4543fc2017-09-03 17:11:59 -0700585 char event_alias[128];
Brenden Blanco8207d102015-09-25 13:58:30 -0700586 struct perf_reader *reader = NULL;
Dereke4da6c22017-01-28 16:11:28 -0800587 static char *event_type = "kprobe";
Brenden Blanco8207d102015-09-25 13:58:30 -0700588
Teng Qin4b764de2017-04-03 22:10:46 -0700589 reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
Brenden Blanco8207d102015-09-25 13:58:30 -0700590 if (!reader)
Brenden Blanco75982492015-11-06 10:43:05 -0800591 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700592
Brenden Blanco68e2d142016-01-28 10:24:56 -0800593 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
594 kfd = open(buf, O_WRONLY | O_APPEND, 0);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700595 if (kfd < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -0800596 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Brenden Blanco75982492015-11-06 10:43:05 -0800597 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700598 }
599
Teng Qinf4543fc2017-09-03 17:11:59 -0700600 snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
Mauricio Vasquez Bd1324ac2017-05-17 20:26:47 -0500601 snprintf(buf, sizeof(buf), "%c:%ss/%s %s", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
Teng Qinf4543fc2017-09-03 17:11:59 -0700602 event_type, event_alias, fn_name);
Dereke4da6c22017-01-28 16:11:28 -0800603 if (write(kfd, buf, strlen(buf)) < 0) {
Brenden Blanco7e71aef2015-09-09 18:28:21 -0700604 if (errno == EINVAL)
605 fprintf(stderr, "check dmesg output for possible cause\n");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700606 close(kfd);
Brenden Blanco75982492015-11-06 10:43:05 -0800607 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700608 }
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700609 close(kfd);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700610
Teng Qinf4543fc2017-09-03 17:11:59 -0700611 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
Dereke4da6c22017-01-28 16:11:28 -0800612 if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
613 goto error;
Teng Qinf4543fc2017-09-03 17:11:59 -0700614
Dereke4da6c22017-01-28 16:11:28 -0800615 return reader;
616
617error:
618 perf_reader_free(reader);
619 return NULL;
620
621}
622
Brenden Blancofa073452017-05-30 17:35:53 -0700623static int enter_mount_ns(int pid) {
624 struct stat self_stat, target_stat;
625 int self_fd = -1, target_fd = -1;
626 char buf[64];
627
628 if (pid < 0)
629 return -1;
630
631 if ((size_t)snprintf(buf, sizeof(buf), "/proc/%d/ns/mnt", pid) >= sizeof(buf))
632 return -1;
633
634 self_fd = open("/proc/self/ns/mnt", O_RDONLY);
635 if (self_fd < 0) {
636 perror("open(/proc/self/ns/mnt)");
637 return -1;
638 }
639
640 target_fd = open(buf, O_RDONLY);
641 if (target_fd < 0) {
642 perror("open(/proc/<pid>/ns/mnt)");
643 goto error;
644 }
645
646 if (fstat(self_fd, &self_stat)) {
647 perror("fstat(self_fd)");
648 goto error;
649 }
650
651 if (fstat(target_fd, &target_stat)) {
652 perror("fstat(target_fd)");
653 goto error;
654 }
655
656 // both target and current ns are same, avoid setns and close all fds
657 if (self_stat.st_ino == target_stat.st_ino)
658 goto error;
659
660 if (setns(target_fd, CLONE_NEWNS)) {
661 perror("setns(target)");
662 goto error;
663 }
664
665 close(target_fd);
666 return self_fd;
667
668error:
669 if (self_fd >= 0)
670 close(self_fd);
671 if (target_fd >= 0)
672 close(target_fd);
673 return -1;
674}
675
676static void exit_mount_ns(int fd) {
677 if (fd < 0)
678 return;
679
680 if (setns(fd, CLONE_NEWNS))
681 perror("setns");
682}
683
Derek7174d932017-01-30 21:03:02 -0800684void * bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type, const char *ev_name,
Dereke4da6c22017-01-28 16:11:28 -0800685 const char *binary_path, uint64_t offset,
686 pid_t pid, int cpu, int group_fd,
Mauricio Vasquez Bd1324ac2017-05-17 20:26:47 -0500687 perf_reader_cb cb, void *cb_cookie)
Dereke4da6c22017-01-28 16:11:28 -0800688{
Derek7174d932017-01-30 21:03:02 -0800689 char buf[PATH_MAX];
Teng Qin0760b752017-09-03 19:07:59 -0700690 char event_alias[PATH_MAX];
Dereke4da6c22017-01-28 16:11:28 -0800691 struct perf_reader *reader = NULL;
692 static char *event_type = "uprobe";
Teng Qin0760b752017-09-03 19:07:59 -0700693 int res, kfd = -1, ns_fd = -1;
Dereke4da6c22017-01-28 16:11:28 -0800694
Teng Qin4b764de2017-04-03 22:10:46 -0700695 reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
Dereke4da6c22017-01-28 16:11:28 -0800696 if (!reader)
697 goto error;
698
699 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
700 kfd = open(buf, O_WRONLY | O_APPEND, 0);
701 if (kfd < 0) {
702 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
703 goto error;
704 }
705
Teng Qin0760b752017-09-03 19:07:59 -0700706 res = snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
707 if (res < 0 || res >= sizeof(event_alias)) {
708 fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
709 goto error;
710 }
711 res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
712 event_type, event_alias, binary_path, offset);
713 if (res < 0 || res >= sizeof(buf)) {
714 fprintf(stderr, "Event alias (%s) too long for buffer\n", event_alias);
Derek7174d932017-01-30 21:03:02 -0800715 goto error;
716 }
kmjohansen4b87af02017-03-30 00:58:31 -0700717
Brenden Blancofa073452017-05-30 17:35:53 -0700718 ns_fd = enter_mount_ns(pid);
Dereke4da6c22017-01-28 16:11:28 -0800719 if (write(kfd, buf, strlen(buf)) < 0) {
720 if (errno == EINVAL)
721 fprintf(stderr, "check dmesg output for possible cause\n");
Dereke4da6c22017-01-28 16:11:28 -0800722 goto error;
723 }
724 close(kfd);
Brenden Blancofa073452017-05-30 17:35:53 -0700725 exit_mount_ns(ns_fd);
726 ns_fd = -1;
Dereke4da6c22017-01-28 16:11:28 -0800727
Teng Qin0760b752017-09-03 19:07:59 -0700728 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
Brenden Blanco75982492015-11-06 10:43:05 -0800729 if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
730 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700731
Brenden Blanco8207d102015-09-25 13:58:30 -0700732 return reader;
Brenden Blanco75982492015-11-06 10:43:05 -0800733
734error:
Teng Qin0760b752017-09-03 19:07:59 -0700735 if (kfd >= 0)
736 close(kfd);
Brenden Blancofa073452017-05-30 17:35:53 -0700737 exit_mount_ns(ns_fd);
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700738 perf_reader_free(reader);
Brenden Blanco75982492015-11-06 10:43:05 -0800739 return NULL;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700740}
741
Dereke4da6c22017-01-28 16:11:28 -0800742static int bpf_detach_probe(const char *ev_name, const char *event_type)
743{
Teng Qin0760b752017-09-03 19:07:59 -0700744 int kfd, res;
745 char buf[PATH_MAX];
Brenden Blanco68e2d142016-01-28 10:24:56 -0800746 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
747 kfd = open(buf, O_WRONLY | O_APPEND, 0);
Brenden Blanco839dd272015-06-11 12:35:55 -0700748 if (kfd < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -0800749 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Teng Qin0760b752017-09-03 19:07:59 -0700750 goto error;
Brenden Blanco839dd272015-06-11 12:35:55 -0700751 }
752
Teng Qin0760b752017-09-03 19:07:59 -0700753 res = snprintf(buf, sizeof(buf), "-:%ss/%s_bcc_%d", event_type, ev_name, getpid());
754 if (res < 0 || res >= sizeof(buf)) {
755 fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
756 goto error;
757 }
Dereke4da6c22017-01-28 16:11:28 -0800758 if (write(kfd, buf, strlen(buf)) < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -0800759 fprintf(stderr, "write(%s): %s\n", buf, strerror(errno));
Teng Qin0760b752017-09-03 19:07:59 -0700760 goto error;
Brenden Blanco839dd272015-06-11 12:35:55 -0700761 }
Brenden Blanco839dd272015-06-11 12:35:55 -0700762
Teng Qin0760b752017-09-03 19:07:59 -0700763 close(kfd);
Brenden Blanco75982492015-11-06 10:43:05 -0800764 return 0;
Teng Qin0760b752017-09-03 19:07:59 -0700765
766error:
767 if (kfd >= 0)
768 close(kfd);
769 return -1;
Brenden Blanco839dd272015-06-11 12:35:55 -0700770}
771
Dereke4da6c22017-01-28 16:11:28 -0800772int bpf_detach_kprobe(const char *ev_name)
773{
Teng Qinf4543fc2017-09-03 17:11:59 -0700774 return bpf_detach_probe(ev_name, "kprobe");
Brenden Blanco68e2d142016-01-28 10:24:56 -0800775}
776
Dereke4da6c22017-01-28 16:11:28 -0800777int bpf_detach_uprobe(const char *ev_name)
778{
779 return bpf_detach_probe(ev_name, "uprobe");
Brenden Blanco68e2d142016-01-28 10:24:56 -0800780}
781
Dereke4da6c22017-01-28 16:11:28 -0800782
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -0700783void * bpf_attach_tracepoint(int progfd, const char *tp_category,
784 const char *tp_name, int pid, int cpu,
785 int group_fd, perf_reader_cb cb, void *cb_cookie) {
786 char buf[256];
787 struct perf_reader *reader = NULL;
788
Teng Qin4b764de2017-04-03 22:10:46 -0700789 reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -0700790 if (!reader)
791 goto error;
792
793 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/%s",
794 tp_category, tp_name);
795 if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
796 goto error;
797
798 return reader;
799
800error:
801 perf_reader_free(reader);
802 return NULL;
803}
804
805int bpf_detach_tracepoint(const char *tp_category, const char *tp_name) {
806 // Right now, there is nothing to do, but it's a good idea to encourage
807 // callers to detach anything they attach.
808 return 0;
809}
810
Teng Qin4b764de2017-04-03 22:10:46 -0700811void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,
812 perf_reader_lost_cb lost_cb, void *cb_cookie,
813 int pid, int cpu, int page_cnt) {
Brenden Blanco75982492015-11-06 10:43:05 -0800814 int pfd;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800815 struct perf_event_attr attr = {};
Brenden Blanco75982492015-11-06 10:43:05 -0800816 struct perf_reader *reader = NULL;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800817
Teng Qin4b764de2017-04-03 22:10:46 -0700818 reader = perf_reader_new(NULL, raw_cb, lost_cb, cb_cookie, page_cnt);
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800819 if (!reader)
Brenden Blanco75982492015-11-06 10:43:05 -0800820 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800821
Brenden Blanco0dd24412016-02-17 00:26:14 -0800822 attr.config = 10;//PERF_COUNT_SW_BPF_OUTPUT;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800823 attr.type = PERF_TYPE_SOFTWARE;
824 attr.sample_type = PERF_SAMPLE_RAW;
Brenden Blanco75982492015-11-06 10:43:05 -0800825 attr.sample_period = 1;
826 attr.wakeup_events = 1;
827 pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800828 if (pfd < 0) {
Brenden Blanco0dd24412016-02-17 00:26:14 -0800829 fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
830 fprintf(stderr, " (check your kernel for PERF_COUNT_SW_BPF_OUTPUT support, 4.4 or newer)\n");
Brenden Blanco75982492015-11-06 10:43:05 -0800831 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800832 }
833 perf_reader_set_fd(reader, pfd);
834
835 if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
Brenden Blanco75982492015-11-06 10:43:05 -0800836 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800837
838 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
839 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
Brenden Blanco75982492015-11-06 10:43:05 -0800840 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800841 }
842
843 return reader;
Brenden Blanco75982492015-11-06 10:43:05 -0800844
845error:
846 if (reader)
847 perf_reader_free(reader);
848
849 return NULL;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800850}
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200851
Brenden Blancofa073452017-05-30 17:35:53 -0700852static int invalid_perf_config(uint32_t type, uint64_t config) {
Teng Qin98752212017-05-19 19:05:24 -0700853 switch (type) {
Teng Qin01b07ba2017-11-20 13:28:03 -0800854 case PERF_TYPE_HARDWARE:
855 if (config >= PERF_COUNT_HW_MAX) {
856 fprintf(stderr, "HARDWARE perf event config out of range\n");
857 goto is_invalid;
858 }
859 return 0;
860 case PERF_TYPE_SOFTWARE:
861 if (config >= PERF_COUNT_SW_MAX) {
862 fprintf(stderr, "SOFTWARE perf event config out of range\n");
863 goto is_invalid;
864 } else if (config == 10 /* PERF_COUNT_SW_BPF_OUTPUT */) {
865 fprintf(stderr, "Unable to open or attach perf event for BPF_OUTPUT\n");
866 goto is_invalid;
867 }
868 return 0;
869 case PERF_TYPE_HW_CACHE:
870 if (((config >> 16) >= PERF_COUNT_HW_CACHE_RESULT_MAX) ||
871 (((config >> 8) & 0xff) >= PERF_COUNT_HW_CACHE_OP_MAX) ||
872 ((config & 0xff) >= PERF_COUNT_HW_CACHE_MAX)) {
873 fprintf(stderr, "HW_CACHE perf event config out of range\n");
874 goto is_invalid;
875 }
876 return 0;
877 case PERF_TYPE_TRACEPOINT:
878 case PERF_TYPE_BREAKPOINT:
879 fprintf(stderr,
880 "Unable to open or attach TRACEPOINT or BREAKPOINT events\n");
881 goto is_invalid;
882 default:
883 return 0;
Teng Qin98752212017-05-19 19:05:24 -0700884 }
Teng Qin01b07ba2017-11-20 13:28:03 -0800885is_invalid:
886 fprintf(stderr, "Invalid perf event type %" PRIu32 " config %" PRIu64 "\n",
887 type, config);
888 return 1;
Teng Qin98752212017-05-19 19:05:24 -0700889}
890
Brenden Blanco3069caa2016-08-01 18:12:11 -0700891int bpf_open_perf_event(uint32_t type, uint64_t config, int pid, int cpu) {
892 int fd;
893 struct perf_event_attr attr = {};
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200894
Teng Qin98752212017-05-19 19:05:24 -0700895 if (invalid_perf_config(type, config)) {
Teng Qin98752212017-05-19 19:05:24 -0700896 return -1;
897 }
898
Brenden Blanco3069caa2016-08-01 18:12:11 -0700899 attr.sample_period = LONG_MAX;
900 attr.type = type;
901 attr.config = config;
902
903 fd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
904 if (fd < 0) {
905 fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
906 return -1;
907 }
908
909 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
910 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
911 close(fd);
912 return -1;
913 }
914
915 return fd;
916}
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200917
Andy Gospodarek9f3cab72017-05-17 16:18:45 -0400918int bpf_attach_xdp(const char *dev_name, int progfd, uint32_t flags) {
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200919 struct sockaddr_nl sa;
920 int sock, seq = 0, len, ret = -1;
921 char buf[4096];
922 struct nlattr *nla, *nla_xdp;
923 struct {
924 struct nlmsghdr nh;
925 struct ifinfomsg ifinfo;
926 char attrbuf[64];
927 } req;
928 struct nlmsghdr *nh;
929 struct nlmsgerr *err;
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +0900930 socklen_t addrlen;
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200931
932 memset(&sa, 0, sizeof(sa));
933 sa.nl_family = AF_NETLINK;
934
935 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
936 if (sock < 0) {
937 fprintf(stderr, "bpf: opening a netlink socket: %s\n", strerror(errno));
938 return -1;
939 }
940
941 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
942 fprintf(stderr, "bpf: bind to netlink: %s\n", strerror(errno));
943 goto cleanup;
944 }
945
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +0900946 addrlen = sizeof(sa);
947 if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
948 fprintf(stderr, "bpf: get sock name of netlink: %s\n", strerror(errno));
949 goto cleanup;
950 }
951
952 if (addrlen != sizeof(sa)) {
953 fprintf(stderr, "bpf: wrong netlink address length: %d\n", addrlen);
954 goto cleanup;
955 }
956
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200957 memset(&req, 0, sizeof(req));
958 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
959 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
960 req.nh.nlmsg_type = RTM_SETLINK;
961 req.nh.nlmsg_pid = 0;
962 req.nh.nlmsg_seq = ++seq;
963 req.ifinfo.ifi_family = AF_UNSPEC;
964 req.ifinfo.ifi_index = if_nametoindex(dev_name);
965 if (req.ifinfo.ifi_index == 0) {
966 fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
967 goto cleanup;
968 }
969
970 nla = (struct nlattr *)(((char *)&req)
971 + NLMSG_ALIGN(req.nh.nlmsg_len));
972 nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
973
974 nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
Andy Gospodarek9f3cab72017-05-17 16:18:45 -0400975 nla->nla_len = NLA_HDRLEN;
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200976
977 // we specify the FD passed over by the user
978 nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
Arthur Gautierfbd91e22017-04-28 21:39:58 +0000979 nla_xdp->nla_len = NLA_HDRLEN + sizeof(progfd);
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200980 memcpy((char *)nla_xdp + NLA_HDRLEN, &progfd, sizeof(progfd));
Andy Gospodarek9f3cab72017-05-17 16:18:45 -0400981 nla->nla_len += nla_xdp->nla_len;
982
983 // parse flags as passed by the user
984 if (flags) {
985 nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
Gary Lindb8353b2017-08-18 18:10:43 +0800986 nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
Andy Gospodarek9f3cab72017-05-17 16:18:45 -0400987 nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
988 memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
989 nla->nla_len += nla_xdp->nla_len;
990 }
Jan RĂ¼the0724d72016-07-28 22:32:46 +0200991
992 req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
993
994 if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
995 fprintf(stderr, "bpf: send to netlink: %s\n", strerror(errno));
996 goto cleanup;
997 }
998
999 len = recv(sock, buf, sizeof(buf), 0);
1000 if (len < 0) {
1001 fprintf(stderr, "bpf: recv from netlink: %s\n", strerror(errno));
1002 goto cleanup;
1003 }
1004
1005 for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
1006 nh = NLMSG_NEXT(nh, len)) {
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001007 if (nh->nlmsg_pid != sa.nl_pid) {
Toshiaki Makita890c76a2017-07-31 20:20:55 +09001008 fprintf(stderr, "bpf: Wrong pid %u, expected %u\n",
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001009 nh->nlmsg_pid, sa.nl_pid);
Jan RĂ¼the0724d72016-07-28 22:32:46 +02001010 errno = EBADMSG;
1011 goto cleanup;
1012 }
1013 if (nh->nlmsg_seq != seq) {
1014 fprintf(stderr, "bpf: Wrong seq %d, expected %d\n",
1015 nh->nlmsg_seq, seq);
1016 errno = EBADMSG;
1017 goto cleanup;
1018 }
1019 switch (nh->nlmsg_type) {
1020 case NLMSG_ERROR:
1021 err = (struct nlmsgerr *)NLMSG_DATA(nh);
1022 if (!err->error)
1023 continue;
1024 fprintf(stderr, "bpf: nlmsg error %s\n", strerror(-err->error));
1025 errno = -err->error;
1026 goto cleanup;
1027 case NLMSG_DONE:
1028 break;
1029 }
1030 }
1031
1032 ret = 0;
1033
1034cleanup:
1035 close(sock);
1036 return ret;
1037}
Teng Qin206b0202016-10-18 16:06:57 -07001038
1039int bpf_attach_perf_event(int progfd, uint32_t ev_type, uint32_t ev_config,
1040 uint64_t sample_period, uint64_t sample_freq,
1041 pid_t pid, int cpu, int group_fd) {
Teng Qin98752212017-05-19 19:05:24 -07001042 if (invalid_perf_config(ev_type, ev_config)) {
Teng Qin206b0202016-10-18 16:06:57 -07001043 return -1;
1044 }
1045 if (!((sample_period > 0) ^ (sample_freq > 0))) {
1046 fprintf(
1047 stderr, "Exactly one of sample_period / sample_freq should be set\n"
1048 );
1049 return -1;
1050 }
1051
1052 struct perf_event_attr attr = {};
1053 attr.type = ev_type;
1054 attr.config = ev_config;
1055 attr.inherit = 1;
1056 if (sample_freq > 0) {
1057 attr.freq = 1;
1058 attr.sample_freq = sample_freq;
1059 } else {
1060 attr.sample_period = sample_period;
1061 }
1062
1063 int fd = syscall(
1064 __NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC
1065 );
1066 if (fd < 0) {
1067 perror("perf_event_open failed");
1068 return -1;
1069 }
1070 if (ioctl(fd, PERF_EVENT_IOC_SET_BPF, progfd) != 0) {
1071 perror("ioctl(PERF_EVENT_IOC_SET_BPF) failed");
1072 close(fd);
1073 return -1;
1074 }
1075 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) != 0) {
1076 perror("ioctl(PERF_EVENT_IOC_ENABLE) failed");
1077 close(fd);
1078 return -1;
1079 }
1080
1081 return fd;
1082}
1083
Teng Qind6827332017-05-23 16:35:11 -07001084int bpf_close_perf_event_fd(int fd) {
1085 int res, error = 0;
1086 if (fd >= 0) {
1087 res = ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
1088 if (res != 0) {
1089 perror("ioctl(PERF_EVENT_IOC_DISABLE) failed");
1090 error = res;
1091 }
1092 res = close(fd);
1093 if (res != 0) {
1094 perror("close perf event FD failed");
1095 error = (res && !error) ? res : error;
1096 }
1097 }
1098 return error;
Teng Qin206b0202016-10-18 16:06:57 -07001099}
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001100
1101int bpf_obj_pin(int fd, const char *pathname)
1102{
Brenden Blancofa073452017-05-30 17:35:53 -07001103 union bpf_attr attr;
1104
1105 memset(&attr, 0, sizeof(attr));
1106 attr.pathname = ptr_to_u64((void *)pathname);
1107 attr.bpf_fd = fd;
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001108
1109 return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
1110}
1111
1112int bpf_obj_get(const char *pathname)
1113{
Brenden Blancofa073452017-05-30 17:35:53 -07001114 union bpf_attr attr;
1115
1116 memset(&attr, 0, sizeof(attr));
1117 attr.pathname = ptr_to_u64((void *)pathname);
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001118
1119 return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
1120}
Martin KaFai Lau3c24ad92017-10-19 23:47:39 -07001121
1122int bpf_prog_get_next_id(uint32_t start_id, uint32_t *next_id)
1123{
1124 union bpf_attr attr;
1125 int err;
1126
1127 memset(&attr, 0, sizeof(attr));
1128 attr.start_id = start_id;
1129
1130 err = syscall(__NR_bpf, BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
1131 if (!err)
1132 *next_id = attr.next_id;
1133
1134 return err;
1135}
1136
1137int bpf_prog_get_fd_by_id(uint32_t id)
1138{
1139 union bpf_attr attr;
1140
1141 memset(&attr, 0, sizeof(attr));
1142 attr.prog_id = id;
1143
1144 return syscall(__NR_bpf, BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
1145}
1146
1147int bpf_map_get_fd_by_id(uint32_t id)
1148{
1149 union bpf_attr attr;
1150
1151 memset(&attr, 0, sizeof(attr));
1152 attr.map_id = id;
1153
1154 return syscall(__NR_bpf, BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
1155}