blob: acfbc5e37440c8c16c0acb532980eaf82fa26b71 [file] [log] [blame]
Brenden Blanco246b9422015-06-05 11:15:27 -07001/*
2 * Copyright (c) 2015 PLUMgrid, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Nan Xiao667988c2017-08-28 11:44:19 +080016#ifndef _GNU_SOURCE
Colin Ian Kinga12db192017-07-06 13:58:17 +010017#define _GNU_SOURCE
Nan Xiao667988c2017-08-28 11:44:19 +080018#endif
Brenden Blanco246b9422015-06-05 11:15:27 -070019
Brenden Blancocd5cb412015-04-26 09:41:58 -070020#include <arpa/inet.h>
Brenden Blancoa94bd932015-04-26 00:56:42 -070021#include <errno.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070022#include <fcntl.h>
Teng Qin01b07ba2017-11-20 13:28:03 -080023#include <inttypes.h>
Brenden Blanco3069caa2016-08-01 18:12:11 -070024#include <limits.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070025#include <linux/bpf.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070026#include <linux/bpf_common.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070027#include <linux/if_packet.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070028#include <linux/perf_event.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070029#include <linux/pkt_cls.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070030#include <linux/rtnetlink.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070031#include <linux/sched.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070032#include <linux/unistd.h>
33#include <linux/version.h>
Brenden Blancoa94bd932015-04-26 00:56:42 -070034#include <net/ethernet.h>
35#include <net/if.h>
Brenden Blancofa073452017-05-30 17:35:53 -070036#include <sched.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070037#include <stdbool.h>
Brenden Blancobb7200c2015-06-04 18:01:42 -070038#include <stdio.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070039#include <stdlib.h>
40#include <string.h>
41#include <sys/ioctl.h>
Brenden Blanco4b4bd272015-11-30 10:54:47 -080042#include <sys/resource.h>
Derek35c25012017-01-22 20:58:23 -080043#include <sys/stat.h>
44#include <sys/types.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070045#include <unistd.h>
Alexei Starovoitovb1df37c2017-09-06 19:47:47 -070046#include <linux/if_alg.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070047
Brenden Blancoa94bd932015-04-26 00:56:42 -070048#include "libbpf.h"
Brenden Blanco8207d102015-09-25 13:58:30 -070049#include "perf_reader.h"
Brenden Blancoa94bd932015-04-26 00:56:42 -070050
nikolay.samofatovc5308e92017-12-28 19:01:31 +030051// TODO: Remove this when CentOS 6 support is not needed anymore
52#include "setns.h"
53
Brenden Blancof275d3d2015-07-06 23:41:23 -070054// TODO: remove these defines when linux-libc-dev exports them properly
55
56#ifndef __NR_bpf
Naveen N. Rao0006ad12016-04-29 16:42:58 +053057#if defined(__powerpc64__)
58#define __NR_bpf 361
Zvonko Kosic98121a32017-03-07 07:30:25 +010059#elif defined(__s390x__)
60#define __NR_bpf 351
Zhiyi Sun8e434b72016-12-06 16:21:37 +080061#elif defined(__aarch64__)
62#define __NR_bpf 280
Naveen N. Rao0006ad12016-04-29 16:42:58 +053063#else
Brenden Blancof275d3d2015-07-06 23:41:23 -070064#define __NR_bpf 321
65#endif
Naveen N. Rao0006ad12016-04-29 16:42:58 +053066#endif
Brenden Blancof275d3d2015-07-06 23:41:23 -070067
68#ifndef SO_ATTACH_BPF
69#define SO_ATTACH_BPF 50
70#endif
71
72#ifndef PERF_EVENT_IOC_SET_BPF
73#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
74#endif
75
76#ifndef PERF_FLAG_FD_CLOEXEC
77#define PERF_FLAG_FD_CLOEXEC (1UL << 3)
78#endif
79
nikolay.samofatovc5308e92017-12-28 19:01:31 +030080// TODO: Remove this when CentOS 6 support is not needed anymore
81#ifndef AF_ALG
82#define AF_ALG 38
83#endif
84
Martin KaFai Laudf368162017-10-19 12:46:48 -070085#define min(x, y) ((x) < (y) ? (x) : (y))
86
Paul Chaignon37308292018-01-27 23:06:39 +010087struct bpf_helper {
88 char *name;
89 char *required_version;
90};
91
92static struct bpf_helper helpers[] = {
93 {"map_lookup_elem", "3.19"},
94 {"map_update_elem", "3.19"},
95 {"map_delete_elem", "3.19"},
96 {"probe_read", "4.1"},
97 {"ktime_get_ns", "4.1"},
98 {"trace_printk", "4.1"},
99 {"get_prandom_u32", "4.1"},
100 {"get_smp_processor_id", "4.1"},
101 {"skb_store_bytes", "4.1"},
102 {"l3_csum_replace", "4.1"},
103 {"l4_csum_replace", "4.1"},
104 {"tail_call", "4.2"},
105 {"clone_redirect", "4.2"},
106 {"get_current_pid_tgid", "4.2"},
107 {"get_current_uid_gid", "4.2"},
108 {"get_current_comm", "4.2"},
109 {"get_cgroup_classid", "4.3"},
110 {"skb_vlan_push", "4.3"},
111 {"skb_vlan_pop", "4.3"},
112 {"skb_get_tunnel_key", "4.3"},
113 {"skb_set_tunnel_key", "4.3"},
114 {"perf_event_read", "4.3"},
115 {"redirect", "4.4"},
116 {"get_route_realm", "4.4"},
117 {"perf_event_output", "4.4"},
118 {"skb_load_bytes", "4.5"},
119 {"get_stackid", "4.6"},
120 {"csum_diff", "4.6"},
121 {"skb_get_tunnel_opt", "4.6"},
122 {"skb_set_tunnel_opt", "4.6"},
123 {"skb_change_proto", "4.8"},
124 {"skb_change_type", "4.8"},
125 {"skb_under_cgroup", "4.8"},
126 {"get_hash_recalc", "4.8"},
127 {"get_current_task", "4.8"},
128 {"probe_write_user", "4.8"},
129 {"current_task_under_cgroup", "4.9"},
130 {"skb_change_tail", "4.9"},
131 {"skb_pull_data", "4.9"},
132 {"csum_update", "4.9"},
133 {"set_hash_invalid", "4.9"},
134 {"get_numa_node_id", "4.10"},
135 {"skb_change_head", "4.10"},
136 {"xdp_adjust_head", "4.10"},
137 {"probe_read_str", "4.11"},
138 {"get_socket_cookie", "4.12"},
139 {"get_socket_uid", "4.12"},
140 {"set_hash", "4.13"},
141 {"setsockopt", "4.13"},
142 {"skb_adjust_room", "4.13"},
143 {"redirect_map", "4.14"},
144 {"sk_redirect_map", "4.14"},
145 {"sock_map_update", "4.14"},
146 {"xdp_adjust_meta", "4.15"},
147 {"perf_event_read_value", "4.15"},
148 {"perf_prog_read_value", "4.15"},
149 {"getsockopt", "4.15"},
150 {"override_return", "4.16"},
Yonghong Song530fa0e2018-02-14 14:28:32 -0800151 {"sock_ops_cb_flags_set", "4.16"},
Quentin Monnet5ddc64e2018-04-02 23:33:38 +0100152 {"msg_redirect_map", "4.17"},
153 {"msg_apply_bytes", "4.17"},
154 {"msg_cork_bytes", "4.17"},
155 {"msg_pull_data", "4.17"},
156 {"bind", "4.17"},
Quentin Monnet4e285452018-04-29 12:49:21 +0100157 {"xdp_adjust_tail", "4.18"},
158 {"skb_get_xfrm_state", "4.18"},
Quentin Monnet8ce57ac2018-06-01 12:28:27 +0100159 {"get_stack", "4.18"},
160 {"skb_load_bytes_relative", "4.18"},
161 {"fib_lookup", "4.18"},
162 {"sock_hash_update", "4.18"},
163 {"msg_redirect_hash", "4.18"},
164 {"sk_redirect_hash", "4.18"},
165 {"lwt_push_encap", "4.18"},
166 {"lwt_seg6_store_bytes", "4.18"},
167 {"lwt_seg6_adjust_srh", "4.18"},
168 {"lwt_seg6_action", "4.18"},
169 {"rc_repeat", "4.18"},
170 {"rc_keydown", "4.18"},
Paul Chaignon37308292018-01-27 23:06:39 +0100171};
172
Brenden Blancofa073452017-05-30 17:35:53 -0700173static uint64_t ptr_to_u64(void *ptr)
Brenden Blancoa94bd932015-04-26 00:56:42 -0700174{
Brenden Blancofa073452017-05-30 17:35:53 -0700175 return (uint64_t) (unsigned long) ptr;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700176}
177
Martin KaFai Laudf368162017-10-19 12:46:48 -0700178int bpf_create_map(enum bpf_map_type map_type, const char *name,
179 int key_size, int value_size,
180 int max_entries, int map_flags)
Brenden Blancoa94bd932015-04-26 00:56:42 -0700181{
Martin KaFai Laudf368162017-10-19 12:46:48 -0700182 size_t name_len = name ? strlen(name) : 0;
Brenden Blancofdc027c2015-09-03 11:49:54 -0700183 union bpf_attr attr;
184 memset(&attr, 0, sizeof(attr));
185 attr.map_type = map_type;
186 attr.key_size = key_size;
187 attr.value_size = value_size;
188 attr.max_entries = max_entries;
Huapeng Zhoude11d072016-12-06 18:10:38 -0800189 attr.map_flags = map_flags;
Martin KaFai Laudf368162017-10-19 12:46:48 -0700190 memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700191
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800192 int ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
Martin KaFai Laudf368162017-10-19 12:46:48 -0700193
194 if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
195 memset(attr.map_name, 0, BPF_OBJ_NAME_LEN);
196 ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
197 }
198
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800199 if (ret < 0 && errno == EPERM) {
200 // see note below about the rationale for this retry
201
202 struct rlimit rl = {};
203 if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
204 rl.rlim_max = RLIM_INFINITY;
205 rl.rlim_cur = rl.rlim_max;
206 if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
207 ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
208 }
209 }
210 return ret;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700211}
212
213int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
214{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700215 union bpf_attr attr;
216 memset(&attr, 0, sizeof(attr));
217 attr.map_fd = fd;
218 attr.key = ptr_to_u64(key);
219 attr.value = ptr_to_u64(value);
220 attr.flags = flags;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700221
222 return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
223}
224
225int bpf_lookup_elem(int fd, void *key, void *value)
226{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700227 union bpf_attr attr;
228 memset(&attr, 0, sizeof(attr));
229 attr.map_fd = fd;
230 attr.key = ptr_to_u64(key);
231 attr.value = ptr_to_u64(value);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700232
233 return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
234}
235
236int bpf_delete_elem(int fd, void *key)
237{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700238 union bpf_attr attr;
239 memset(&attr, 0, sizeof(attr));
240 attr.map_fd = fd;
241 attr.key = ptr_to_u64(key);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700242
243 return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
244}
245
Teng Qindb7fab52017-05-16 01:10:15 -0700246int bpf_get_first_key(int fd, void *key, size_t key_size)
247{
248 union bpf_attr attr;
249 int i, res;
250
251 memset(&attr, 0, sizeof(attr));
252 attr.map_fd = fd;
253 attr.key = 0;
254 attr.next_key = ptr_to_u64(key);
255
256 // 4.12 and above kernel supports passing NULL to BPF_MAP_GET_NEXT_KEY
257 // to get first key of the map. For older kernels, the call will fail.
258 res = syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
259 if (res < 0 && errno == EFAULT) {
260 // Fall back to try to find a non-existing key.
261 static unsigned char try_values[3] = {0, 0xff, 0x55};
262 attr.key = ptr_to_u64(key);
263 for (i = 0; i < 3; i++) {
264 memset(key, try_values[i], key_size);
265 // We want to check the existence of the key but we don't know the size
266 // of map's value. So we pass an invalid pointer for value, expect
267 // the call to fail and check if the error is ENOENT indicating the
268 // key doesn't exist. If we use NULL for the invalid pointer, it might
Edward Bettsfdf9b082017-10-10 21:13:28 +0100269 // trigger a page fault in kernel and affect performance. Hence we use
Teng Qindb7fab52017-05-16 01:10:15 -0700270 // ~0 which will fail and return fast.
271 // This should fail since we pass an invalid pointer for value.
Teng Qin9190ef52017-05-20 22:46:00 -0700272 if (bpf_lookup_elem(fd, key, (void *)~0) >= 0)
Teng Qindb7fab52017-05-16 01:10:15 -0700273 return -1;
274 // This means the key doesn't exist.
275 if (errno == ENOENT)
276 return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
277 }
278 return -1;
279 } else {
280 return res;
281 }
282}
283
Brenden Blancoa94bd932015-04-26 00:56:42 -0700284int bpf_get_next_key(int fd, void *key, void *next_key)
285{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700286 union bpf_attr attr;
287 memset(&attr, 0, sizeof(attr));
288 attr.map_fd = fd;
289 attr.key = ptr_to_u64(key);
290 attr.next_key = ptr_to_u64(next_key);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700291
292 return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
293}
294
Teng Qin797669f2017-11-03 16:04:32 -0700295static void bpf_print_hints(int ret, char *log)
Brendan Gregg34826372017-01-13 14:02:02 -0800296{
Teng Qin797669f2017-11-03 16:04:32 -0700297 if (ret < 0)
298 fprintf(stderr, "bpf: Failed to load program: %s\n", strerror(errno));
Brendan Gregg34826372017-01-13 14:02:02 -0800299 if (log == NULL)
300 return;
Teng Qin797669f2017-11-03 16:04:32 -0700301 else
302 fprintf(stderr, "%s\n", log);
303
304 if (ret >= 0)
305 return;
Brendan Gregg34826372017-01-13 14:02:02 -0800306
307 // The following error strings will need maintenance to match LLVM.
308
309 // stack busting
310 if (strstr(log, "invalid stack off=-") != NULL) {
311 fprintf(stderr, "HINT: Looks like you exceeded the BPF stack limit. "
312 "This can happen if you allocate too much local variable storage. "
313 "For example, if you allocated a 1 Kbyte struct (maybe for "
314 "BPF_PERF_OUTPUT), busting a max stack of 512 bytes.\n\n");
315 }
316
317 // didn't check NULL on map lookup
318 if (strstr(log, "invalid mem access 'map_value_or_null'") != NULL) {
319 fprintf(stderr, "HINT: The 'map_value_or_null' error can happen if "
320 "you dereference a pointer value from a map lookup without first "
321 "checking if that pointer is NULL.\n\n");
322 }
323
324 // lacking a bpf_probe_read
325 if (strstr(log, "invalid mem access 'inv'") != NULL) {
326 fprintf(stderr, "HINT: The invalid mem access 'inv' error can happen "
327 "if you try to dereference memory without first using "
328 "bpf_probe_read() to copy it to the BPF stack. Sometimes the "
329 "bpf_probe_read is automatic by the bcc rewriter, other times "
330 "you'll need to be explicit.\n\n");
331 }
Paul Chaignon37308292018-01-27 23:06:39 +0100332
333 // helper function not found in kernel
334 char *helper_str = strstr(log, "invalid func ");
335 if (helper_str != NULL) {
336 helper_str += strlen("invalid func ");
337 char *str = strchr(helper_str, '#');
338 if (str != NULL) {
339 helper_str = str + 1;
340 }
341 int helper_id = atoi(helper_str);
342 if (helper_id && helper_id < sizeof(helpers) / sizeof(struct bpf_helper)) {
343 struct bpf_helper helper = helpers[helper_id - 1];
344 fprintf(stderr, "HINT: bpf_%s missing (added in Linux %s).\n\n",
345 helper.name, helper.required_version);
346 }
347 }
Brendan Gregg34826372017-01-13 14:02:02 -0800348}
Brenden Blancoa94bd932015-04-26 00:56:42 -0700349#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
350
Martin KaFai Lau3c24ad92017-10-19 23:47:39 -0700351int bpf_obj_get_info(int prog_map_fd, void *info, uint32_t *info_len)
Alexei Starovoitovb1df37c2017-09-06 19:47:47 -0700352{
353 union bpf_attr attr;
354 int err;
355
356 memset(&attr, 0, sizeof(attr));
357 attr.info.bpf_fd = prog_map_fd;
358 attr.info.info_len = *info_len;
359 attr.info.info = ptr_to_u64(info);
360
361 err = syscall(__NR_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
362 if (!err)
363 *info_len = attr.info.info_len;
364
365 return err;
366}
367
368int bpf_prog_compute_tag(const struct bpf_insn *insns, int prog_len,
369 unsigned long long *ptag)
370{
371 struct sockaddr_alg alg = {
372 .salg_family = AF_ALG,
373 .salg_type = "hash",
374 .salg_name = "sha1",
375 };
376 int shafd = socket(AF_ALG, SOCK_SEQPACKET, 0);
377 if (shafd < 0) {
378 fprintf(stderr, "sha1 socket not available %s\n", strerror(errno));
379 return -1;
380 }
381 int ret = bind(shafd, (struct sockaddr *)&alg, sizeof(alg));
382 if (ret < 0) {
383 fprintf(stderr, "sha1 bind fail %s\n", strerror(errno));
384 close(shafd);
385 return ret;
386 }
387 int shafd2 = accept(shafd, NULL, 0);
388 if (shafd2 < 0) {
389 fprintf(stderr, "sha1 accept fail %s\n", strerror(errno));
390 close(shafd);
391 return -1;
392 }
393 struct bpf_insn prog[prog_len / 8];
394 bool map_ld_seen = false;
395 int i;
396 for (i = 0; i < prog_len / 8; i++) {
397 prog[i] = insns[i];
398 if (insns[i].code == (BPF_LD | BPF_DW | BPF_IMM) &&
399 insns[i].src_reg == BPF_PSEUDO_MAP_FD &&
400 !map_ld_seen) {
401 prog[i].imm = 0;
402 map_ld_seen = true;
403 } else if (insns[i].code == 0 && map_ld_seen) {
404 prog[i].imm = 0;
405 map_ld_seen = false;
406 } else {
407 map_ld_seen = false;
408 }
409 }
410 ret = write(shafd2, prog, prog_len);
411 if (ret != prog_len) {
412 fprintf(stderr, "sha1 write fail %s\n", strerror(errno));
413 close(shafd2);
414 close(shafd);
415 return -1;
416 }
417
418 union {
419 unsigned char sha[20];
420 unsigned long long tag;
421 } u = {};
422 ret = read(shafd2, u.sha, 20);
423 if (ret != 20) {
424 fprintf(stderr, "sha1 read fail %s\n", strerror(errno));
425 close(shafd2);
426 close(shafd);
427 return -1;
428 }
429 *ptag = __builtin_bswap64(u.tag);
Nirmoy Das2f2412a2018-03-14 10:44:11 +0100430 close(shafd2);
431 close(shafd);
Alexei Starovoitovb1df37c2017-09-06 19:47:47 -0700432 return 0;
433}
434
435int bpf_prog_get_tag(int fd, unsigned long long *ptag)
436{
437 char fmt[64];
438 snprintf(fmt, sizeof(fmt), "/proc/self/fdinfo/%d", fd);
439 FILE * f = fopen(fmt, "r");
440 if (!f) {
441/* fprintf(stderr, "failed to open fdinfo %s\n", strerror(errno));*/
442 return -1;
443 }
444 fgets(fmt, sizeof(fmt), f); // pos
445 fgets(fmt, sizeof(fmt), f); // flags
446 fgets(fmt, sizeof(fmt), f); // mnt_id
447 fgets(fmt, sizeof(fmt), f); // prog_type
448 fgets(fmt, sizeof(fmt), f); // prog_jited
449 fgets(fmt, sizeof(fmt), f); // prog_tag
450 fclose(f);
451 char *p = strchr(fmt, ':');
452 if (!p) {
453/* fprintf(stderr, "broken fdinfo %s\n", fmt);*/
454 return -2;
455 }
456 unsigned long long tag = 0;
457 sscanf(p + 1, "%llx", &tag);
458 *ptag = tag;
459 return 0;
460}
461
Martin KaFai Laudf368162017-10-19 12:46:48 -0700462int bpf_prog_load(enum bpf_prog_type prog_type, const char *name,
Brenden Blancocd5cb412015-04-26 09:41:58 -0700463 const struct bpf_insn *insns, int prog_len,
Brenden Blanco759029f2015-07-29 15:47:51 -0700464 const char *license, unsigned kern_version,
Teng Qin797669f2017-11-03 16:04:32 -0700465 int log_level, char *log_buf, unsigned log_buf_size)
Brenden Blancoa94bd932015-04-26 00:56:42 -0700466{
Martin KaFai Laudf368162017-10-19 12:46:48 -0700467 size_t name_len = name ? strlen(name) : 0;
Brenden Blancofdc027c2015-09-03 11:49:54 -0700468 union bpf_attr attr;
Teng Qin797669f2017-11-03 16:04:32 -0700469 char *tmp_log_buf = NULL;
470 unsigned tmp_log_buf_size = 0;
yonghong-songd83210d2018-06-11 11:41:34 -0700471 int ret = 0, name_offset = 0;
davidefdl2dece102016-09-12 12:00:37 -0700472
Brenden Blancofdc027c2015-09-03 11:49:54 -0700473 memset(&attr, 0, sizeof(attr));
Teng Qin797669f2017-11-03 16:04:32 -0700474
Brenden Blancofdc027c2015-09-03 11:49:54 -0700475 attr.prog_type = prog_type;
Brenden Blanco7009b552015-05-26 11:48:17 -0700476 attr.kern_version = kern_version;
Teng Qin797669f2017-11-03 16:04:32 -0700477 attr.license = ptr_to_u64((void *)license);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700478
Teng Qin797669f2017-11-03 16:04:32 -0700479 attr.insns = ptr_to_u64((void *)insns);
480 attr.insn_cnt = prog_len / sizeof(struct bpf_insn);
davidefdl2dece102016-09-12 12:00:37 -0700481 if (attr.insn_cnt > BPF_MAXINSNS) {
davidefdl2dece102016-09-12 12:00:37 -0700482 errno = EINVAL;
483 fprintf(stderr,
Martin KaFai Laudf368162017-10-19 12:46:48 -0700484 "bpf: %s. Program too large (%u insns), at most %d insns\n\n",
davidefdl2dece102016-09-12 12:00:37 -0700485 strerror(errno), attr.insn_cnt, BPF_MAXINSNS);
Teng Qin797669f2017-11-03 16:04:32 -0700486 return -1;
davidefdl2dece102016-09-12 12:00:37 -0700487 }
488
Teng Qin797669f2017-11-03 16:04:32 -0700489 attr.log_level = log_level;
490 if (attr.log_level > 0) {
491 if (log_buf_size > 0) {
492 // Use user-provided log buffer if availiable.
493 log_buf[0] = 0;
494 attr.log_buf = ptr_to_u64(log_buf);
495 attr.log_size = log_buf_size;
496 } else {
497 // Create and use temporary log buffer if user didn't provide one.
498 tmp_log_buf_size = LOG_BUF_SIZE;
499 tmp_log_buf = malloc(tmp_log_buf_size);
500 if (!tmp_log_buf) {
501 fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
502 strerror(errno));
503 attr.log_level = 0;
504 } else {
505 tmp_log_buf[0] = 0;
506 attr.log_buf = ptr_to_u64(tmp_log_buf);
507 attr.log_size = tmp_log_buf_size;
508 }
509 }
510 }
Huapeng Zhoude11d072016-12-06 18:10:38 -0800511
yonghong-songd83210d2018-06-11 11:41:34 -0700512 if (strncmp(name, "kprobe__", 8) == 0)
513 name_offset = 8;
514 else if (strncmp(name, "tracepoint__", 12) == 0)
515 name_offset = 12;
516 else if (strncmp(name, "raw_tracepoint__", 16) == 0)
517 name_offset = 16;
518 memcpy(attr.prog_name, name + name_offset,
519 min(name_len - name_offset, BPF_OBJ_NAME_LEN - 1));
Teng Qin797669f2017-11-03 16:04:32 -0700520
521 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
522 // BPF object name is not supported on older Kernels.
523 // If we failed due to this, clear the name and try again.
Martin KaFai Laudf368162017-10-19 12:46:48 -0700524 if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
525 memset(attr.prog_name, 0, BPF_OBJ_NAME_LEN);
526 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
527 }
528
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800529 if (ret < 0 && errno == EPERM) {
530 // When EPERM is returned, two reasons are possible:
531 // 1. user has no permissions for bpf()
532 // 2. user has insufficent rlimit for locked memory
533 // Unfortunately, there is no api to inspect the current usage of locked
534 // mem for the user, so an accurate calculation of how much memory to lock
535 // for this new program is difficult to calculate. As a hack, bump the limit
536 // to unlimited. If program load fails again, return the error.
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800537 struct rlimit rl = {};
538 if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
539 rl.rlim_max = RLIM_INFINITY;
540 rl.rlim_cur = rl.rlim_max;
541 if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
542 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
543 }
544 }
545
Teng Qin797669f2017-11-03 16:04:32 -0700546 // The load has failed. Handle log message.
547 if (ret < 0) {
548 // User has provided a log buffer.
549 if (log_buf_size) {
550 // If logging is not already enabled, enable it and do the syscall again.
551 if (attr.log_level == 0) {
552 attr.log_level = 1;
553 attr.log_buf = ptr_to_u64(log_buf);
554 attr.log_size = log_buf_size;
555 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
556 }
557 // Print the log message and return.
558 bpf_print_hints(ret, log_buf);
559 if (errno == ENOSPC)
560 fprintf(stderr, "bpf: log_buf size may be insufficient\n");
561 goto return_result;
davidefdl2dece102016-09-12 12:00:37 -0700562 }
563
Teng Qin797669f2017-11-03 16:04:32 -0700564 // User did not provide log buffer. We will try to increase size of
565 // our temporary log buffer to get full error message.
566 if (tmp_log_buf)
567 free(tmp_log_buf);
568 tmp_log_buf_size = LOG_BUF_SIZE;
Yonghong Songe86b0172017-11-14 10:20:07 -0800569 if (attr.log_level == 0)
570 attr.log_level = 1;
Teng Qin797669f2017-11-03 16:04:32 -0700571 for (;;) {
572 tmp_log_buf = malloc(tmp_log_buf_size);
573 if (!tmp_log_buf) {
574 fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
575 strerror(errno));
576 goto return_result;
577 }
578 tmp_log_buf[0] = 0;
579 attr.log_buf = ptr_to_u64(tmp_log_buf);
580 attr.log_size = tmp_log_buf_size;
davidefdl2dece102016-09-12 12:00:37 -0700581
Teng Qin797669f2017-11-03 16:04:32 -0700582 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
583 if (ret < 0 && errno == ENOSPC) {
584 // Temporary buffer size is not enough. Double it and try again.
585 free(tmp_log_buf);
586 tmp_log_buf = NULL;
587 tmp_log_buf_size <<= 1;
588 } else {
589 break;
590 }
591 }
Brenden Blancocd5cb412015-04-26 09:41:58 -0700592 }
Teng Qin797669f2017-11-03 16:04:32 -0700593
Teng Qinc6692572017-12-05 17:06:57 -0800594 // Check if we should print the log message if log_level is not 0,
595 // either specified by user or set due to error.
Teng Qin797669f2017-11-03 16:04:32 -0700596 if (attr.log_level > 0) {
Teng Qinc6692572017-12-05 17:06:57 -0800597 // Don't print if user enabled logging and provided log buffer,
598 // but there is no error.
599 if (log_buf && ret < 0)
Teng Qin797669f2017-11-03 16:04:32 -0700600 bpf_print_hints(ret, log_buf);
601 else if (tmp_log_buf)
602 bpf_print_hints(ret, tmp_log_buf);
603 }
604
605return_result:
606 if (tmp_log_buf)
607 free(tmp_log_buf);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700608 return ret;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700609}
610
611int bpf_open_raw_sock(const char *name)
612{
613 struct sockaddr_ll sll;
614 int sock;
615
616 sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
617 if (sock < 0) {
Prashant Bhole5b08d382017-10-06 15:05:04 +0900618 fprintf(stderr, "cannot create raw socket\n");
Brenden Blancoa94bd932015-04-26 00:56:42 -0700619 return -1;
620 }
621
Olivier Tilmans830d58d2017-11-21 23:26:27 +0100622 /* Do not bind on empty interface names */
623 if (!name || *name == '\0')
624 return sock;
625
Brenden Blancoa94bd932015-04-26 00:56:42 -0700626 memset(&sll, 0, sizeof(sll));
627 sll.sll_family = AF_PACKET;
628 sll.sll_ifindex = if_nametoindex(name);
Prashant Bholee9fa6592017-10-06 14:45:21 +0900629 if (sll.sll_ifindex == 0) {
630 fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
631 close(sock);
632 return -1;
633 }
Brenden Blancoa94bd932015-04-26 00:56:42 -0700634 sll.sll_protocol = htons(ETH_P_ALL);
635 if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
Prashant Bhole5b08d382017-10-06 15:05:04 +0900636 fprintf(stderr, "bind to %s: %s\n", name, strerror(errno));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700637 close(sock);
638 return -1;
639 }
640
641 return sock;
642}
643
644int bpf_attach_socket(int sock, int prog) {
Brenden Blancoaf956732015-06-09 13:58:42 -0700645 return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700646}
647
Song Liuf180ea12018-01-08 12:46:07 -0800648#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
649static int bpf_find_probe_type(const char *event_type)
650{
651 int fd;
652 int ret;
653 char buf[PATH_MAX];
654
655 ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
656 if (ret < 0 || ret >= sizeof(buf))
657 return -1;
658
659 fd = open(buf, O_RDONLY);
660 if (fd < 0)
661 return -1;
662 ret = read(fd, buf, sizeof(buf));
663 close(fd);
664 if (ret < 0 || ret >= sizeof(buf))
665 return -1;
666 errno = 0;
667 ret = (int)strtol(buf, NULL, 10);
668 return errno ? -1 : ret;
669}
670
671#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
672static int bpf_get_retprobe_bit(const char *event_type)
673{
674 int fd;
675 int ret;
676 char buf[PATH_MAX];
677
678 ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
679 if (ret < 0 || ret >= sizeof(buf))
680 return -1;
681
682 fd = open(buf, O_RDONLY);
683 if (fd < 0)
684 return -1;
685 ret = read(fd, buf, sizeof(buf));
686 close(fd);
687 if (ret < 0 || ret >= sizeof(buf))
688 return -1;
689 if (strlen(buf) < strlen("config:"))
690 return -1;
691 errno = 0;
692 ret = (int)strtol(buf + strlen("config:"), NULL, 10);
693 return errno ? -1 : ret;
694}
695
696/*
697 * new kernel API allows creating [k,u]probe with perf_event_open, which
698 * makes it easier to clean up the [k,u]probe. This function tries to
699 * create pfd with the new API.
700 */
701static int bpf_try_perf_event_open_with_probe(const char *name, uint64_t offs,
702 int pid, char *event_type, int is_return)
703{
704 struct perf_event_attr attr = {};
705 int type = bpf_find_probe_type(event_type);
706 int is_return_bit = bpf_get_retprobe_bit(event_type);
707 int cpu = 0;
708
709 if (type < 0 || is_return_bit < 0)
710 return -1;
Song Liuf180ea12018-01-08 12:46:07 -0800711 attr.sample_period = 1;
712 attr.wakeup_events = 1;
713 if (is_return)
714 attr.config |= 1 << is_return_bit;
715
716 /*
717 * struct perf_event_attr in latest perf_event.h has the following
718 * extension to config1 and config2. To keep bcc compatibe with
719 * older perf_event.h, we use config1 and config2 here instead of
720 * kprobe_func, uprobe_path, kprobe_addr, and probe_offset.
721 *
722 * union {
723 * __u64 bp_addr;
724 * __u64 kprobe_func;
725 * __u64 uprobe_path;
726 * __u64 config1;
727 * };
728 * union {
729 * __u64 bp_len;
730 * __u64 kprobe_addr;
731 * __u64 probe_offset;
732 * __u64 config2;
733 * };
734 */
735 attr.config2 = offs; /* config2 here is kprobe_addr or probe_offset */
736 attr.size = sizeof(attr);
737 attr.type = type;
738 /* config1 here is kprobe_func or uprobe_path */
739 attr.config1 = ptr_to_u64((void *)name);
740 // PID filter is only possible for uprobe events.
741 if (pid < 0)
742 pid = -1;
743 // perf_event_open API doesn't allow both pid and cpu to be -1.
744 // So only set it to -1 when PID is not -1.
745 // Tracing events do not do CPU filtering in any cases.
746 if (pid != -1)
747 cpu = -1;
748 return syscall(__NR_perf_event_open, &attr, pid, cpu, -1 /* group_fd */,
749 PERF_FLAG_FD_CLOEXEC);
750}
751
Teng Qinfafbf3c2018-02-27 00:26:12 -0800752// When a valid Perf Event FD provided through pfd, it will be used to enable
753// and attach BPF program to the event, and event_path will be ignored.
754// Otherwise, event_path is expected to contain the path to the event in debugfs
755// and it will be used to open the Perf Event FD.
756// In either case, if the attach partially failed (such as issue with the
757// ioctl operations), the **caller** need to clean up the Perf Event FD, either
758// provided by the caller or opened here.
759static int bpf_attach_tracing_event(int progfd, const char *event_path, int pid,
760 int *pfd)
Song Liu0e9ed202018-01-08 14:25:07 -0800761{
762 int efd, cpu = 0;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800763 ssize_t bytes;
Teng Qinbe578b72018-01-09 16:37:06 -0800764 char buf[PATH_MAX];
Brenden Blancocd5cb412015-04-26 09:41:58 -0700765 struct perf_event_attr attr = {};
Teng Qinfafbf3c2018-02-27 00:26:12 -0800766 // Caller did not provided a valid Perf Event FD. Create one with the debugfs
767 // event path provided.
768 if (*pfd < 0) {
Song Liu0e9ed202018-01-08 14:25:07 -0800769 snprintf(buf, sizeof(buf), "%s/id", event_path);
770 efd = open(buf, O_RDONLY, 0);
771 if (efd < 0) {
772 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
773 return -1;
774 }
775
776 bytes = read(efd, buf, sizeof(buf));
777 if (bytes <= 0 || bytes >= sizeof(buf)) {
778 fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
779 close(efd);
780 return -1;
781 }
782 close(efd);
783 buf[bytes] = '\0';
784 attr.config = strtol(buf, NULL, 0);
785 attr.type = PERF_TYPE_TRACEPOINT;
Song Liu0e9ed202018-01-08 14:25:07 -0800786 attr.sample_period = 1;
787 attr.wakeup_events = 1;
788 // PID filter is only possible for uprobe events.
789 if (pid < 0)
790 pid = -1;
791 // perf_event_open API doesn't allow both pid and cpu to be -1.
792 // So only set it to -1 when PID is not -1.
793 // Tracing events do not do CPU filtering in any cases.
794 if (pid != -1)
795 cpu = -1;
Teng Qinfafbf3c2018-02-27 00:26:12 -0800796 *pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
797 if (*pfd < 0) {
Song Liu0e9ed202018-01-08 14:25:07 -0800798 fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
799 return -1;
800 }
Brenden Blancocd5cb412015-04-26 09:41:58 -0700801 }
Brenden Blanco8207d102015-09-25 13:58:30 -0700802
Teng Qinfafbf3c2018-02-27 00:26:12 -0800803 if (ioctl(*pfd, PERF_EVENT_IOC_SET_BPF, progfd) < 0) {
Brenden Blancocd5cb412015-04-26 09:41:58 -0700804 perror("ioctl(PERF_EVENT_IOC_SET_BPF)");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700805 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700806 }
Teng Qinfafbf3c2018-02-27 00:26:12 -0800807 if (ioctl(*pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
Brenden Blancocd5cb412015-04-26 09:41:58 -0700808 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700809 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700810 }
811
Brenden Blanco75982492015-11-06 10:43:05 -0800812 return 0;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700813}
814
Teng Qinfafbf3c2018-02-27 00:26:12 -0800815int bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type,
Sandipan Das651d6902018-05-29 23:30:22 +0530816 const char *ev_name, const char *fn_name, uint64_t fn_offset)
Dereke4da6c22017-01-28 16:11:28 -0800817{
Teng Qinfafbf3c2018-02-27 00:26:12 -0800818 int kfd, pfd = -1;
Derek7174d932017-01-30 21:03:02 -0800819 char buf[256];
Teng Qinf4543fc2017-09-03 17:11:59 -0700820 char event_alias[128];
Dereke4da6c22017-01-28 16:11:28 -0800821 static char *event_type = "kprobe";
Brenden Blanco8207d102015-09-25 13:58:30 -0700822
Teng Qinfafbf3c2018-02-27 00:26:12 -0800823 // Try create the kprobe Perf Event with perf_event_open API.
Sandipan Das651d6902018-05-29 23:30:22 +0530824 pfd = bpf_try_perf_event_open_with_probe(fn_name, fn_offset, -1, event_type,
Song Liu0e9ed202018-01-08 14:25:07 -0800825 attach_type != BPF_PROBE_ENTRY);
Teng Qinfafbf3c2018-02-27 00:26:12 -0800826 // If failed, most likely Kernel doesn't support the new perf_event_open API
827 // yet. Try create the event using debugfs.
Song Liu0e9ed202018-01-08 14:25:07 -0800828 if (pfd < 0) {
829 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
830 kfd = open(buf, O_WRONLY | O_APPEND, 0);
831 if (kfd < 0) {
832 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
833 goto error;
834 }
835
836 snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
Sandipan Das651d6902018-05-29 23:30:22 +0530837
838 if (fn_offset > 0 && attach_type == BPF_PROBE_ENTRY)
839 snprintf(buf, sizeof(buf), "p:%ss/%s %s+%"PRIu64,
840 event_type, event_alias, fn_name, fn_offset);
841 else
842 snprintf(buf, sizeof(buf), "%c:%ss/%s %s",
843 attach_type == BPF_PROBE_ENTRY ? 'p' : 'r',
844 event_type, event_alias, fn_name);
845
Song Liu0e9ed202018-01-08 14:25:07 -0800846 if (write(kfd, buf, strlen(buf)) < 0) {
Yonghong Song858a3d72018-04-24 10:18:20 -0700847 if (errno == ENOENT)
848 fprintf(stderr, "cannot attach kprobe, probe entry may not exist\n");
849 else
850 fprintf(stderr, "cannot attach kprobe, %s\n", strerror(errno));
Song Liu0e9ed202018-01-08 14:25:07 -0800851 close(kfd);
852 goto error;
853 }
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700854 close(kfd);
Song Liu0e9ed202018-01-08 14:25:07 -0800855 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700856 }
Teng Qinfafbf3c2018-02-27 00:26:12 -0800857 // If perf_event_open succeeded, bpf_attach_tracing_event will use the created
858 // Perf Event FD directly and buf would be empty and unused.
859 // Otherwise it will read the event ID from the path in buf, create the
860 // Perf Event event using that ID, and updated value of pfd.
861 if (bpf_attach_tracing_event(progfd, buf, -1 /* PID */, &pfd) == 0)
862 return pfd;
Dereke4da6c22017-01-28 16:11:28 -0800863
864error:
Teng Qinfafbf3c2018-02-27 00:26:12 -0800865 bpf_close_perf_event_fd(pfd);
866 return -1;
Dereke4da6c22017-01-28 16:11:28 -0800867}
868
Brenden Blancofa073452017-05-30 17:35:53 -0700869static int enter_mount_ns(int pid) {
870 struct stat self_stat, target_stat;
871 int self_fd = -1, target_fd = -1;
872 char buf[64];
873
874 if (pid < 0)
875 return -1;
876
877 if ((size_t)snprintf(buf, sizeof(buf), "/proc/%d/ns/mnt", pid) >= sizeof(buf))
878 return -1;
879
880 self_fd = open("/proc/self/ns/mnt", O_RDONLY);
881 if (self_fd < 0) {
882 perror("open(/proc/self/ns/mnt)");
883 return -1;
884 }
885
886 target_fd = open(buf, O_RDONLY);
887 if (target_fd < 0) {
888 perror("open(/proc/<pid>/ns/mnt)");
889 goto error;
890 }
891
892 if (fstat(self_fd, &self_stat)) {
893 perror("fstat(self_fd)");
894 goto error;
895 }
896
897 if (fstat(target_fd, &target_stat)) {
898 perror("fstat(target_fd)");
899 goto error;
900 }
901
902 // both target and current ns are same, avoid setns and close all fds
903 if (self_stat.st_ino == target_stat.st_ino)
904 goto error;
905
906 if (setns(target_fd, CLONE_NEWNS)) {
907 perror("setns(target)");
908 goto error;
909 }
910
911 close(target_fd);
912 return self_fd;
913
914error:
915 if (self_fd >= 0)
916 close(self_fd);
917 if (target_fd >= 0)
918 close(target_fd);
919 return -1;
920}
921
922static void exit_mount_ns(int fd) {
923 if (fd < 0)
924 return;
925
926 if (setns(fd, CLONE_NEWNS))
927 perror("setns");
jeromemarchandb84714a2018-08-08 18:09:44 +0200928 close(fd);
Brenden Blancofa073452017-05-30 17:35:53 -0700929}
930
Teng Qinfafbf3c2018-02-27 00:26:12 -0800931int bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type,
932 const char *ev_name, const char *binary_path,
933 uint64_t offset, pid_t pid)
Dereke4da6c22017-01-28 16:11:28 -0800934{
Derek7174d932017-01-30 21:03:02 -0800935 char buf[PATH_MAX];
Teng Qin0760b752017-09-03 19:07:59 -0700936 char event_alias[PATH_MAX];
Dereke4da6c22017-01-28 16:11:28 -0800937 static char *event_type = "uprobe";
Teng Qinfafbf3c2018-02-27 00:26:12 -0800938 int res, kfd = -1, pfd = -1, ns_fd = -1;
939 // Try create the uprobe Perf Event with perf_event_open API.
Song Liu0e9ed202018-01-08 14:25:07 -0800940 pfd = bpf_try_perf_event_open_with_probe(binary_path, offset, pid, event_type,
941 attach_type != BPF_PROBE_ENTRY);
Teng Qinfafbf3c2018-02-27 00:26:12 -0800942 // If failed, most likely Kernel doesn't support the new perf_event_open API
943 // yet. Try create the event using debugfs.
Song Liu0e9ed202018-01-08 14:25:07 -0800944 if (pfd < 0) {
945 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
946 kfd = open(buf, O_WRONLY | O_APPEND, 0);
947 if (kfd < 0) {
948 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
949 goto error;
950 }
Dereke4da6c22017-01-28 16:11:28 -0800951
Song Liu0e9ed202018-01-08 14:25:07 -0800952 res = snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
953 if (res < 0 || res >= sizeof(event_alias)) {
954 fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
955 goto error;
956 }
957 res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
958 event_type, event_alias, binary_path, offset);
959 if (res < 0 || res >= sizeof(buf)) {
960 fprintf(stderr, "Event alias (%s) too long for buffer\n", event_alias);
961 goto error;
962 }
kmjohansen4b87af02017-03-30 00:58:31 -0700963
Song Liu0e9ed202018-01-08 14:25:07 -0800964 ns_fd = enter_mount_ns(pid);
965 if (write(kfd, buf, strlen(buf)) < 0) {
966 if (errno == EINVAL)
967 fprintf(stderr, "check dmesg output for possible cause\n");
968 goto error;
969 }
970 close(kfd);
Teng Qinfafbf3c2018-02-27 00:26:12 -0800971 kfd = -1;
Song Liu0e9ed202018-01-08 14:25:07 -0800972 exit_mount_ns(ns_fd);
973 ns_fd = -1;
Dereke4da6c22017-01-28 16:11:28 -0800974
Song Liu0e9ed202018-01-08 14:25:07 -0800975 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
976 }
Teng Qinfafbf3c2018-02-27 00:26:12 -0800977 // If perf_event_open succeeded, bpf_attach_tracing_event will use the created
978 // Perf Event FD directly and buf would be empty and unused.
979 // Otherwise it will read the event ID from the path in buf, create the
980 // Perf Event event using that ID, and updated value of pfd.
981 if (bpf_attach_tracing_event(progfd, buf, pid, &pfd) == 0)
982 return pfd;
Brenden Blanco75982492015-11-06 10:43:05 -0800983
984error:
Teng Qin0760b752017-09-03 19:07:59 -0700985 if (kfd >= 0)
986 close(kfd);
Brenden Blancofa073452017-05-30 17:35:53 -0700987 exit_mount_ns(ns_fd);
Teng Qinfafbf3c2018-02-27 00:26:12 -0800988 bpf_close_perf_event_fd(pfd);
989 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700990}
991
Dereke4da6c22017-01-28 16:11:28 -0800992static int bpf_detach_probe(const char *ev_name, const char *event_type)
993{
Song Liu0e9ed202018-01-08 14:25:07 -0800994 int kfd = -1, res;
Teng Qin0760b752017-09-03 19:07:59 -0700995 char buf[PATH_MAX];
Song Liu0e9ed202018-01-08 14:25:07 -0800996 int found_event = 0;
997 size_t bufsize = 0;
998 char *cptr = NULL;
999 FILE *fp;
1000
1001 /*
1002 * For [k,u]probe created with perf_event_open (on newer kernel), it is
1003 * not necessary to clean it up in [k,u]probe_events. We first look up
1004 * the %s_bcc_%d line in [k,u]probe_events. If the event is not found,
1005 * it is safe to skip the cleaning up process (write -:... to the file).
1006 */
1007 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
1008 fp = fopen(buf, "r");
1009 if (!fp) {
1010 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
1011 goto error;
1012 }
1013
1014 res = snprintf(buf, sizeof(buf), "%ss/%s_bcc_%d", event_type, ev_name, getpid());
1015 if (res < 0 || res >= sizeof(buf)) {
1016 fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
1017 goto error;
1018 }
1019
1020 while (getline(&cptr, &bufsize, fp) != -1)
1021 if (strstr(cptr, buf) != NULL) {
1022 found_event = 1;
1023 break;
1024 }
Yonghong Song3c5686d2018-06-05 23:42:20 -07001025 free(cptr);
Song Liu0e9ed202018-01-08 14:25:07 -08001026 fclose(fp);
1027 fp = NULL;
1028
1029 if (!found_event)
1030 return 0;
1031
Brenden Blanco68e2d142016-01-28 10:24:56 -08001032 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
1033 kfd = open(buf, O_WRONLY | O_APPEND, 0);
Brenden Blanco839dd272015-06-11 12:35:55 -07001034 if (kfd < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -08001035 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Teng Qin0760b752017-09-03 19:07:59 -07001036 goto error;
Brenden Blanco839dd272015-06-11 12:35:55 -07001037 }
1038
Teng Qin0760b752017-09-03 19:07:59 -07001039 res = snprintf(buf, sizeof(buf), "-:%ss/%s_bcc_%d", event_type, ev_name, getpid());
1040 if (res < 0 || res >= sizeof(buf)) {
1041 fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
1042 goto error;
1043 }
Dereke4da6c22017-01-28 16:11:28 -08001044 if (write(kfd, buf, strlen(buf)) < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -08001045 fprintf(stderr, "write(%s): %s\n", buf, strerror(errno));
Teng Qin0760b752017-09-03 19:07:59 -07001046 goto error;
Brenden Blanco839dd272015-06-11 12:35:55 -07001047 }
Brenden Blanco839dd272015-06-11 12:35:55 -07001048
Teng Qin0760b752017-09-03 19:07:59 -07001049 close(kfd);
Brenden Blanco75982492015-11-06 10:43:05 -08001050 return 0;
Teng Qin0760b752017-09-03 19:07:59 -07001051
1052error:
1053 if (kfd >= 0)
1054 close(kfd);
Song Liu0e9ed202018-01-08 14:25:07 -08001055 if (fp)
1056 fclose(fp);
Teng Qin0760b752017-09-03 19:07:59 -07001057 return -1;
Brenden Blanco839dd272015-06-11 12:35:55 -07001058}
1059
Dereke4da6c22017-01-28 16:11:28 -08001060int bpf_detach_kprobe(const char *ev_name)
1061{
Teng Qinf4543fc2017-09-03 17:11:59 -07001062 return bpf_detach_probe(ev_name, "kprobe");
Brenden Blanco68e2d142016-01-28 10:24:56 -08001063}
1064
Dereke4da6c22017-01-28 16:11:28 -08001065int bpf_detach_uprobe(const char *ev_name)
1066{
1067 return bpf_detach_probe(ev_name, "uprobe");
Brenden Blanco68e2d142016-01-28 10:24:56 -08001068}
1069
Dereke4da6c22017-01-28 16:11:28 -08001070
Teng Qinfafbf3c2018-02-27 00:26:12 -08001071int bpf_attach_tracepoint(int progfd, const char *tp_category,
1072 const char *tp_name)
1073{
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -07001074 char buf[256];
Teng Qinfafbf3c2018-02-27 00:26:12 -08001075 int pfd = -1;
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -07001076
1077 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/%s",
1078 tp_category, tp_name);
Teng Qinfafbf3c2018-02-27 00:26:12 -08001079 if (bpf_attach_tracing_event(progfd, buf, -1 /* PID */, &pfd) == 0)
1080 return pfd;
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -07001081
Teng Qinfafbf3c2018-02-27 00:26:12 -08001082 bpf_close_perf_event_fd(pfd);
1083 return -1;
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -07001084}
1085
1086int bpf_detach_tracepoint(const char *tp_category, const char *tp_name) {
1087 // Right now, there is nothing to do, but it's a good idea to encourage
1088 // callers to detach anything they attach.
1089 return 0;
1090}
1091
Yonghong Song0d722372018-04-27 04:56:08 -07001092int bpf_attach_raw_tracepoint(int progfd, char *tp_name)
1093{
1094 union bpf_attr attr;
1095 int ret;
1096
1097 bzero(&attr, sizeof(attr));
1098 attr.raw_tracepoint.name = ptr_to_u64(tp_name);
1099 attr.raw_tracepoint.prog_fd = progfd;
1100
1101 ret = syscall(__NR_bpf, BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
1102 if (ret < 0)
1103 fprintf(stderr, "bpf_attach_raw_tracepoint (%s): %s\n", tp_name, strerror(errno));
1104 return ret;
1105}
1106
Teng Qin4b764de2017-04-03 22:10:46 -07001107void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,
1108 perf_reader_lost_cb lost_cb, void *cb_cookie,
1109 int pid, int cpu, int page_cnt) {
Brenden Blanco75982492015-11-06 10:43:05 -08001110 int pfd;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001111 struct perf_event_attr attr = {};
Brenden Blanco75982492015-11-06 10:43:05 -08001112 struct perf_reader *reader = NULL;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001113
Teng Qind0e48ed2018-02-27 01:05:20 -08001114 reader = perf_reader_new(raw_cb, lost_cb, cb_cookie, page_cnt);
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001115 if (!reader)
Brenden Blanco75982492015-11-06 10:43:05 -08001116 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001117
Brenden Blanco0dd24412016-02-17 00:26:14 -08001118 attr.config = 10;//PERF_COUNT_SW_BPF_OUTPUT;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001119 attr.type = PERF_TYPE_SOFTWARE;
1120 attr.sample_type = PERF_SAMPLE_RAW;
Brenden Blanco75982492015-11-06 10:43:05 -08001121 attr.sample_period = 1;
1122 attr.wakeup_events = 1;
1123 pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001124 if (pfd < 0) {
Brenden Blanco0dd24412016-02-17 00:26:14 -08001125 fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
1126 fprintf(stderr, " (check your kernel for PERF_COUNT_SW_BPF_OUTPUT support, 4.4 or newer)\n");
Brenden Blanco75982492015-11-06 10:43:05 -08001127 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001128 }
1129 perf_reader_set_fd(reader, pfd);
1130
Teng Qind0e48ed2018-02-27 01:05:20 -08001131 if (perf_reader_mmap(reader) < 0)
Brenden Blanco75982492015-11-06 10:43:05 -08001132 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001133
1134 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
1135 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
Brenden Blanco75982492015-11-06 10:43:05 -08001136 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001137 }
1138
1139 return reader;
Brenden Blanco75982492015-11-06 10:43:05 -08001140
1141error:
1142 if (reader)
1143 perf_reader_free(reader);
1144
1145 return NULL;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001146}
Jan Rüthe0724d72016-07-28 22:32:46 +02001147
Brenden Blancofa073452017-05-30 17:35:53 -07001148static int invalid_perf_config(uint32_t type, uint64_t config) {
Teng Qin98752212017-05-19 19:05:24 -07001149 switch (type) {
Teng Qin01b07ba2017-11-20 13:28:03 -08001150 case PERF_TYPE_HARDWARE:
1151 if (config >= PERF_COUNT_HW_MAX) {
1152 fprintf(stderr, "HARDWARE perf event config out of range\n");
1153 goto is_invalid;
1154 }
1155 return 0;
1156 case PERF_TYPE_SOFTWARE:
1157 if (config >= PERF_COUNT_SW_MAX) {
1158 fprintf(stderr, "SOFTWARE perf event config out of range\n");
1159 goto is_invalid;
1160 } else if (config == 10 /* PERF_COUNT_SW_BPF_OUTPUT */) {
1161 fprintf(stderr, "Unable to open or attach perf event for BPF_OUTPUT\n");
1162 goto is_invalid;
1163 }
1164 return 0;
1165 case PERF_TYPE_HW_CACHE:
1166 if (((config >> 16) >= PERF_COUNT_HW_CACHE_RESULT_MAX) ||
1167 (((config >> 8) & 0xff) >= PERF_COUNT_HW_CACHE_OP_MAX) ||
1168 ((config & 0xff) >= PERF_COUNT_HW_CACHE_MAX)) {
1169 fprintf(stderr, "HW_CACHE perf event config out of range\n");
1170 goto is_invalid;
1171 }
1172 return 0;
1173 case PERF_TYPE_TRACEPOINT:
1174 case PERF_TYPE_BREAKPOINT:
1175 fprintf(stderr,
1176 "Unable to open or attach TRACEPOINT or BREAKPOINT events\n");
1177 goto is_invalid;
1178 default:
1179 return 0;
Teng Qin98752212017-05-19 19:05:24 -07001180 }
Teng Qin01b07ba2017-11-20 13:28:03 -08001181is_invalid:
1182 fprintf(stderr, "Invalid perf event type %" PRIu32 " config %" PRIu64 "\n",
1183 type, config);
1184 return 1;
Teng Qin98752212017-05-19 19:05:24 -07001185}
1186
Brenden Blanco3069caa2016-08-01 18:12:11 -07001187int bpf_open_perf_event(uint32_t type, uint64_t config, int pid, int cpu) {
1188 int fd;
1189 struct perf_event_attr attr = {};
Jan Rüthe0724d72016-07-28 22:32:46 +02001190
Teng Qin98752212017-05-19 19:05:24 -07001191 if (invalid_perf_config(type, config)) {
Teng Qin98752212017-05-19 19:05:24 -07001192 return -1;
1193 }
1194
Brenden Blanco3069caa2016-08-01 18:12:11 -07001195 attr.sample_period = LONG_MAX;
1196 attr.type = type;
1197 attr.config = config;
1198
1199 fd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
1200 if (fd < 0) {
1201 fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
1202 return -1;
1203 }
1204
1205 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
1206 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
1207 close(fd);
1208 return -1;
1209 }
1210
1211 return fd;
1212}
Jan Rüthe0724d72016-07-28 22:32:46 +02001213
Andy Gospodarek9f3cab72017-05-17 16:18:45 -04001214int bpf_attach_xdp(const char *dev_name, int progfd, uint32_t flags) {
Jan Rüthe0724d72016-07-28 22:32:46 +02001215 struct sockaddr_nl sa;
1216 int sock, seq = 0, len, ret = -1;
1217 char buf[4096];
1218 struct nlattr *nla, *nla_xdp;
1219 struct {
1220 struct nlmsghdr nh;
1221 struct ifinfomsg ifinfo;
1222 char attrbuf[64];
1223 } req;
1224 struct nlmsghdr *nh;
1225 struct nlmsgerr *err;
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001226 socklen_t addrlen;
Jan Rüthe0724d72016-07-28 22:32:46 +02001227
1228 memset(&sa, 0, sizeof(sa));
1229 sa.nl_family = AF_NETLINK;
1230
1231 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1232 if (sock < 0) {
1233 fprintf(stderr, "bpf: opening a netlink socket: %s\n", strerror(errno));
1234 return -1;
1235 }
1236
1237 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
1238 fprintf(stderr, "bpf: bind to netlink: %s\n", strerror(errno));
1239 goto cleanup;
1240 }
1241
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001242 addrlen = sizeof(sa);
1243 if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
1244 fprintf(stderr, "bpf: get sock name of netlink: %s\n", strerror(errno));
1245 goto cleanup;
1246 }
1247
1248 if (addrlen != sizeof(sa)) {
1249 fprintf(stderr, "bpf: wrong netlink address length: %d\n", addrlen);
1250 goto cleanup;
1251 }
1252
Jan Rüthe0724d72016-07-28 22:32:46 +02001253 memset(&req, 0, sizeof(req));
1254 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1255 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1256 req.nh.nlmsg_type = RTM_SETLINK;
1257 req.nh.nlmsg_pid = 0;
1258 req.nh.nlmsg_seq = ++seq;
1259 req.ifinfo.ifi_family = AF_UNSPEC;
1260 req.ifinfo.ifi_index = if_nametoindex(dev_name);
1261 if (req.ifinfo.ifi_index == 0) {
1262 fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
1263 goto cleanup;
1264 }
1265
1266 nla = (struct nlattr *)(((char *)&req)
1267 + NLMSG_ALIGN(req.nh.nlmsg_len));
1268 nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
1269
1270 nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
Andy Gospodarek9f3cab72017-05-17 16:18:45 -04001271 nla->nla_len = NLA_HDRLEN;
Jan Rüthe0724d72016-07-28 22:32:46 +02001272
1273 // we specify the FD passed over by the user
1274 nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
Arthur Gautierfbd91e22017-04-28 21:39:58 +00001275 nla_xdp->nla_len = NLA_HDRLEN + sizeof(progfd);
Jan Rüthe0724d72016-07-28 22:32:46 +02001276 memcpy((char *)nla_xdp + NLA_HDRLEN, &progfd, sizeof(progfd));
Andy Gospodarek9f3cab72017-05-17 16:18:45 -04001277 nla->nla_len += nla_xdp->nla_len;
1278
1279 // parse flags as passed by the user
1280 if (flags) {
1281 nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
Gary Lindb8353b2017-08-18 18:10:43 +08001282 nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
Andy Gospodarek9f3cab72017-05-17 16:18:45 -04001283 nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
1284 memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
1285 nla->nla_len += nla_xdp->nla_len;
1286 }
Jan Rüthe0724d72016-07-28 22:32:46 +02001287
1288 req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
1289
1290 if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
1291 fprintf(stderr, "bpf: send to netlink: %s\n", strerror(errno));
1292 goto cleanup;
1293 }
1294
1295 len = recv(sock, buf, sizeof(buf), 0);
1296 if (len < 0) {
1297 fprintf(stderr, "bpf: recv from netlink: %s\n", strerror(errno));
1298 goto cleanup;
1299 }
1300
1301 for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
1302 nh = NLMSG_NEXT(nh, len)) {
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001303 if (nh->nlmsg_pid != sa.nl_pid) {
Toshiaki Makita890c76a2017-07-31 20:20:55 +09001304 fprintf(stderr, "bpf: Wrong pid %u, expected %u\n",
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001305 nh->nlmsg_pid, sa.nl_pid);
Jan Rüthe0724d72016-07-28 22:32:46 +02001306 errno = EBADMSG;
1307 goto cleanup;
1308 }
1309 if (nh->nlmsg_seq != seq) {
1310 fprintf(stderr, "bpf: Wrong seq %d, expected %d\n",
1311 nh->nlmsg_seq, seq);
1312 errno = EBADMSG;
1313 goto cleanup;
1314 }
1315 switch (nh->nlmsg_type) {
1316 case NLMSG_ERROR:
1317 err = (struct nlmsgerr *)NLMSG_DATA(nh);
1318 if (!err->error)
1319 continue;
1320 fprintf(stderr, "bpf: nlmsg error %s\n", strerror(-err->error));
1321 errno = -err->error;
1322 goto cleanup;
1323 case NLMSG_DONE:
1324 break;
1325 }
1326 }
1327
1328 ret = 0;
1329
1330cleanup:
1331 close(sock);
1332 return ret;
1333}
Teng Qin206b0202016-10-18 16:06:57 -07001334
Teng Qin5b7c6782018-03-09 16:57:11 -08001335int bpf_attach_perf_event_raw(int progfd, void *perf_event_attr, pid_t pid,
Teng Qinbf2513d2018-05-16 13:09:09 -07001336 int cpu, int group_fd, unsigned long extra_flags) {
Teng Qin5b7c6782018-03-09 16:57:11 -08001337 int fd = syscall(__NR_perf_event_open, perf_event_attr, pid, cpu, group_fd,
Teng Qinbf2513d2018-05-16 13:09:09 -07001338 PERF_FLAG_FD_CLOEXEC | extra_flags);
Teng Qin5b7c6782018-03-09 16:57:11 -08001339 if (fd < 0) {
1340 perror("perf_event_open failed");
1341 return -1;
1342 }
1343 if (ioctl(fd, PERF_EVENT_IOC_SET_BPF, progfd) != 0) {
1344 perror("ioctl(PERF_EVENT_IOC_SET_BPF) failed");
1345 close(fd);
1346 return -1;
1347 }
1348 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) != 0) {
1349 perror("ioctl(PERF_EVENT_IOC_ENABLE) failed");
1350 close(fd);
1351 return -1;
1352 }
1353
1354 return fd;
1355}
1356
Teng Qin206b0202016-10-18 16:06:57 -07001357int bpf_attach_perf_event(int progfd, uint32_t ev_type, uint32_t ev_config,
1358 uint64_t sample_period, uint64_t sample_freq,
1359 pid_t pid, int cpu, int group_fd) {
Teng Qin98752212017-05-19 19:05:24 -07001360 if (invalid_perf_config(ev_type, ev_config)) {
Teng Qin206b0202016-10-18 16:06:57 -07001361 return -1;
1362 }
1363 if (!((sample_period > 0) ^ (sample_freq > 0))) {
1364 fprintf(
1365 stderr, "Exactly one of sample_period / sample_freq should be set\n"
1366 );
1367 return -1;
1368 }
1369
1370 struct perf_event_attr attr = {};
1371 attr.type = ev_type;
1372 attr.config = ev_config;
Teng Qin99978d22017-12-15 00:22:13 -08001373 if (pid > 0)
1374 attr.inherit = 1;
Teng Qin206b0202016-10-18 16:06:57 -07001375 if (sample_freq > 0) {
1376 attr.freq = 1;
1377 attr.sample_freq = sample_freq;
1378 } else {
1379 attr.sample_period = sample_period;
1380 }
1381
Teng Qinbf2513d2018-05-16 13:09:09 -07001382 return bpf_attach_perf_event_raw(progfd, &attr, pid, cpu, group_fd, 0);
Teng Qin206b0202016-10-18 16:06:57 -07001383}
1384
Teng Qind6827332017-05-23 16:35:11 -07001385int bpf_close_perf_event_fd(int fd) {
1386 int res, error = 0;
1387 if (fd >= 0) {
1388 res = ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
1389 if (res != 0) {
1390 perror("ioctl(PERF_EVENT_IOC_DISABLE) failed");
1391 error = res;
1392 }
1393 res = close(fd);
1394 if (res != 0) {
1395 perror("close perf event FD failed");
1396 error = (res && !error) ? res : error;
1397 }
1398 }
1399 return error;
Teng Qin206b0202016-10-18 16:06:57 -07001400}
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001401
1402int bpf_obj_pin(int fd, const char *pathname)
1403{
Brenden Blancofa073452017-05-30 17:35:53 -07001404 union bpf_attr attr;
1405
1406 memset(&attr, 0, sizeof(attr));
1407 attr.pathname = ptr_to_u64((void *)pathname);
1408 attr.bpf_fd = fd;
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001409
1410 return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
1411}
1412
1413int bpf_obj_get(const char *pathname)
1414{
Brenden Blancofa073452017-05-30 17:35:53 -07001415 union bpf_attr attr;
1416
1417 memset(&attr, 0, sizeof(attr));
1418 attr.pathname = ptr_to_u64((void *)pathname);
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001419
1420 return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
1421}
Martin KaFai Lau3c24ad92017-10-19 23:47:39 -07001422
1423int bpf_prog_get_next_id(uint32_t start_id, uint32_t *next_id)
1424{
1425 union bpf_attr attr;
1426 int err;
1427
1428 memset(&attr, 0, sizeof(attr));
1429 attr.start_id = start_id;
1430
1431 err = syscall(__NR_bpf, BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
1432 if (!err)
1433 *next_id = attr.next_id;
1434
1435 return err;
1436}
1437
1438int bpf_prog_get_fd_by_id(uint32_t id)
1439{
1440 union bpf_attr attr;
1441
1442 memset(&attr, 0, sizeof(attr));
1443 attr.prog_id = id;
1444
1445 return syscall(__NR_bpf, BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
1446}
1447
1448int bpf_map_get_fd_by_id(uint32_t id)
1449{
1450 union bpf_attr attr;
1451
1452 memset(&attr, 0, sizeof(attr));
1453 attr.map_id = id;
1454
1455 return syscall(__NR_bpf, BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
1456}