blob: c23030e9cb10ccb9a267c685ddd6590a3cb0bbc0 [file] [log] [blame]
Brenden Blanco246b9422015-06-05 11:15:27 -07001/*
2 * Copyright (c) 2015 PLUMgrid, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Nan Xiao667988c2017-08-28 11:44:19 +080016#ifndef _GNU_SOURCE
Colin Ian Kinga12db192017-07-06 13:58:17 +010017#define _GNU_SOURCE
Nan Xiao667988c2017-08-28 11:44:19 +080018#endif
Brenden Blanco246b9422015-06-05 11:15:27 -070019
Brenden Blancocd5cb412015-04-26 09:41:58 -070020#include <arpa/inet.h>
Brenden Blancoa94bd932015-04-26 00:56:42 -070021#include <errno.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070022#include <fcntl.h>
Teng Qin01b07ba2017-11-20 13:28:03 -080023#include <inttypes.h>
Brenden Blanco3069caa2016-08-01 18:12:11 -070024#include <limits.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070025#include <linux/bpf.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070026#include <linux/bpf_common.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070027#include <linux/if_packet.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070028#include <linux/perf_event.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070029#include <linux/pkt_cls.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070030#include <linux/rtnetlink.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070031#include <linux/sched.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070032#include <linux/unistd.h>
33#include <linux/version.h>
Brenden Blancoa94bd932015-04-26 00:56:42 -070034#include <net/ethernet.h>
35#include <net/if.h>
Brenden Blancofa073452017-05-30 17:35:53 -070036#include <sched.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070037#include <stdbool.h>
Brenden Blancobb7200c2015-06-04 18:01:42 -070038#include <stdio.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070039#include <stdlib.h>
40#include <string.h>
41#include <sys/ioctl.h>
Brenden Blanco4b4bd272015-11-30 10:54:47 -080042#include <sys/resource.h>
Derek35c25012017-01-22 20:58:23 -080043#include <sys/stat.h>
44#include <sys/types.h>
Brenden Blancoa934c902017-05-31 08:44:22 -070045#include <unistd.h>
Alexei Starovoitovb1df37c2017-09-06 19:47:47 -070046#include <linux/if_alg.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070047
Brenden Blancoa94bd932015-04-26 00:56:42 -070048#include "libbpf.h"
Brenden Blanco8207d102015-09-25 13:58:30 -070049#include "perf_reader.h"
Brenden Blancoa94bd932015-04-26 00:56:42 -070050
nikolay.samofatovc5308e92017-12-28 19:01:31 +030051// TODO: Remove this when CentOS 6 support is not needed anymore
52#include "setns.h"
53
Brenden Blancof275d3d2015-07-06 23:41:23 -070054// TODO: remove these defines when linux-libc-dev exports them properly
55
56#ifndef __NR_bpf
Naveen N. Rao0006ad12016-04-29 16:42:58 +053057#if defined(__powerpc64__)
58#define __NR_bpf 361
Zvonko Kosic98121a32017-03-07 07:30:25 +010059#elif defined(__s390x__)
60#define __NR_bpf 351
Zhiyi Sun8e434b72016-12-06 16:21:37 +080061#elif defined(__aarch64__)
62#define __NR_bpf 280
Naveen N. Rao0006ad12016-04-29 16:42:58 +053063#else
Brenden Blancof275d3d2015-07-06 23:41:23 -070064#define __NR_bpf 321
65#endif
Naveen N. Rao0006ad12016-04-29 16:42:58 +053066#endif
Brenden Blancof275d3d2015-07-06 23:41:23 -070067
68#ifndef SO_ATTACH_BPF
69#define SO_ATTACH_BPF 50
70#endif
71
72#ifndef PERF_EVENT_IOC_SET_BPF
73#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
74#endif
75
76#ifndef PERF_FLAG_FD_CLOEXEC
77#define PERF_FLAG_FD_CLOEXEC (1UL << 3)
78#endif
79
nikolay.samofatovc5308e92017-12-28 19:01:31 +030080// TODO: Remove this when CentOS 6 support is not needed anymore
81#ifndef AF_ALG
82#define AF_ALG 38
83#endif
84
Martin KaFai Laudf368162017-10-19 12:46:48 -070085#define min(x, y) ((x) < (y) ? (x) : (y))
86
Paul Chaignon37308292018-01-27 23:06:39 +010087struct bpf_helper {
88 char *name;
89 char *required_version;
90};
91
92static struct bpf_helper helpers[] = {
93 {"map_lookup_elem", "3.19"},
94 {"map_update_elem", "3.19"},
95 {"map_delete_elem", "3.19"},
96 {"probe_read", "4.1"},
97 {"ktime_get_ns", "4.1"},
98 {"trace_printk", "4.1"},
99 {"get_prandom_u32", "4.1"},
100 {"get_smp_processor_id", "4.1"},
101 {"skb_store_bytes", "4.1"},
102 {"l3_csum_replace", "4.1"},
103 {"l4_csum_replace", "4.1"},
104 {"tail_call", "4.2"},
105 {"clone_redirect", "4.2"},
106 {"get_current_pid_tgid", "4.2"},
107 {"get_current_uid_gid", "4.2"},
108 {"get_current_comm", "4.2"},
109 {"get_cgroup_classid", "4.3"},
110 {"skb_vlan_push", "4.3"},
111 {"skb_vlan_pop", "4.3"},
112 {"skb_get_tunnel_key", "4.3"},
113 {"skb_set_tunnel_key", "4.3"},
114 {"perf_event_read", "4.3"},
115 {"redirect", "4.4"},
116 {"get_route_realm", "4.4"},
117 {"perf_event_output", "4.4"},
118 {"skb_load_bytes", "4.5"},
119 {"get_stackid", "4.6"},
120 {"csum_diff", "4.6"},
121 {"skb_get_tunnel_opt", "4.6"},
122 {"skb_set_tunnel_opt", "4.6"},
123 {"skb_change_proto", "4.8"},
124 {"skb_change_type", "4.8"},
125 {"skb_under_cgroup", "4.8"},
126 {"get_hash_recalc", "4.8"},
127 {"get_current_task", "4.8"},
128 {"probe_write_user", "4.8"},
129 {"current_task_under_cgroup", "4.9"},
130 {"skb_change_tail", "4.9"},
131 {"skb_pull_data", "4.9"},
132 {"csum_update", "4.9"},
133 {"set_hash_invalid", "4.9"},
134 {"get_numa_node_id", "4.10"},
135 {"skb_change_head", "4.10"},
136 {"xdp_adjust_head", "4.10"},
137 {"probe_read_str", "4.11"},
138 {"get_socket_cookie", "4.12"},
139 {"get_socket_uid", "4.12"},
140 {"set_hash", "4.13"},
141 {"setsockopt", "4.13"},
142 {"skb_adjust_room", "4.13"},
143 {"redirect_map", "4.14"},
144 {"sk_redirect_map", "4.14"},
145 {"sock_map_update", "4.14"},
146 {"xdp_adjust_meta", "4.15"},
147 {"perf_event_read_value", "4.15"},
148 {"perf_prog_read_value", "4.15"},
149 {"getsockopt", "4.15"},
150 {"override_return", "4.16"},
Yonghong Song530fa0e2018-02-14 14:28:32 -0800151 {"sock_ops_cb_flags_set", "4.16"},
Quentin Monnet5ddc64e2018-04-02 23:33:38 +0100152 {"msg_redirect_map", "4.17"},
153 {"msg_apply_bytes", "4.17"},
154 {"msg_cork_bytes", "4.17"},
155 {"msg_pull_data", "4.17"},
156 {"bind", "4.17"},
Quentin Monnet4e285452018-04-29 12:49:21 +0100157 {"xdp_adjust_tail", "4.18"},
158 {"skb_get_xfrm_state", "4.18"},
Quentin Monnet8ce57ac2018-06-01 12:28:27 +0100159 {"get_stack", "4.18"},
160 {"skb_load_bytes_relative", "4.18"},
161 {"fib_lookup", "4.18"},
162 {"sock_hash_update", "4.18"},
163 {"msg_redirect_hash", "4.18"},
164 {"sk_redirect_hash", "4.18"},
165 {"lwt_push_encap", "4.18"},
166 {"lwt_seg6_store_bytes", "4.18"},
167 {"lwt_seg6_adjust_srh", "4.18"},
168 {"lwt_seg6_action", "4.18"},
169 {"rc_repeat", "4.18"},
170 {"rc_keydown", "4.18"},
Paul Chaignon37308292018-01-27 23:06:39 +0100171};
172
Brenden Blancofa073452017-05-30 17:35:53 -0700173static uint64_t ptr_to_u64(void *ptr)
Brenden Blancoa94bd932015-04-26 00:56:42 -0700174{
Brenden Blancofa073452017-05-30 17:35:53 -0700175 return (uint64_t) (unsigned long) ptr;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700176}
177
Martin KaFai Laudf368162017-10-19 12:46:48 -0700178int bpf_create_map(enum bpf_map_type map_type, const char *name,
179 int key_size, int value_size,
180 int max_entries, int map_flags)
Brenden Blancoa94bd932015-04-26 00:56:42 -0700181{
Martin KaFai Laudf368162017-10-19 12:46:48 -0700182 size_t name_len = name ? strlen(name) : 0;
Brenden Blancofdc027c2015-09-03 11:49:54 -0700183 union bpf_attr attr;
184 memset(&attr, 0, sizeof(attr));
185 attr.map_type = map_type;
186 attr.key_size = key_size;
187 attr.value_size = value_size;
188 attr.max_entries = max_entries;
Huapeng Zhoude11d072016-12-06 18:10:38 -0800189 attr.map_flags = map_flags;
Martin KaFai Laudf368162017-10-19 12:46:48 -0700190 memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700191
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800192 int ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
Martin KaFai Laudf368162017-10-19 12:46:48 -0700193
194 if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
195 memset(attr.map_name, 0, BPF_OBJ_NAME_LEN);
196 ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
197 }
198
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800199 if (ret < 0 && errno == EPERM) {
200 // see note below about the rationale for this retry
201
202 struct rlimit rl = {};
203 if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
204 rl.rlim_max = RLIM_INFINITY;
205 rl.rlim_cur = rl.rlim_max;
206 if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
207 ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
208 }
209 }
210 return ret;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700211}
212
213int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
214{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700215 union bpf_attr attr;
216 memset(&attr, 0, sizeof(attr));
217 attr.map_fd = fd;
218 attr.key = ptr_to_u64(key);
219 attr.value = ptr_to_u64(value);
220 attr.flags = flags;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700221
222 return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
223}
224
225int bpf_lookup_elem(int fd, void *key, void *value)
226{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700227 union bpf_attr attr;
228 memset(&attr, 0, sizeof(attr));
229 attr.map_fd = fd;
230 attr.key = ptr_to_u64(key);
231 attr.value = ptr_to_u64(value);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700232
233 return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
234}
235
236int bpf_delete_elem(int fd, void *key)
237{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700238 union bpf_attr attr;
239 memset(&attr, 0, sizeof(attr));
240 attr.map_fd = fd;
241 attr.key = ptr_to_u64(key);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700242
243 return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
244}
245
Teng Qindb7fab52017-05-16 01:10:15 -0700246int bpf_get_first_key(int fd, void *key, size_t key_size)
247{
248 union bpf_attr attr;
249 int i, res;
250
251 memset(&attr, 0, sizeof(attr));
252 attr.map_fd = fd;
253 attr.key = 0;
254 attr.next_key = ptr_to_u64(key);
255
256 // 4.12 and above kernel supports passing NULL to BPF_MAP_GET_NEXT_KEY
257 // to get first key of the map. For older kernels, the call will fail.
258 res = syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
259 if (res < 0 && errno == EFAULT) {
260 // Fall back to try to find a non-existing key.
261 static unsigned char try_values[3] = {0, 0xff, 0x55};
262 attr.key = ptr_to_u64(key);
263 for (i = 0; i < 3; i++) {
264 memset(key, try_values[i], key_size);
265 // We want to check the existence of the key but we don't know the size
266 // of map's value. So we pass an invalid pointer for value, expect
267 // the call to fail and check if the error is ENOENT indicating the
268 // key doesn't exist. If we use NULL for the invalid pointer, it might
Edward Bettsfdf9b082017-10-10 21:13:28 +0100269 // trigger a page fault in kernel and affect performance. Hence we use
Teng Qindb7fab52017-05-16 01:10:15 -0700270 // ~0 which will fail and return fast.
271 // This should fail since we pass an invalid pointer for value.
Teng Qin9190ef52017-05-20 22:46:00 -0700272 if (bpf_lookup_elem(fd, key, (void *)~0) >= 0)
Teng Qindb7fab52017-05-16 01:10:15 -0700273 return -1;
274 // This means the key doesn't exist.
275 if (errno == ENOENT)
276 return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
277 }
278 return -1;
279 } else {
280 return res;
281 }
282}
283
Brenden Blancoa94bd932015-04-26 00:56:42 -0700284int bpf_get_next_key(int fd, void *key, void *next_key)
285{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700286 union bpf_attr attr;
287 memset(&attr, 0, sizeof(attr));
288 attr.map_fd = fd;
289 attr.key = ptr_to_u64(key);
290 attr.next_key = ptr_to_u64(next_key);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700291
292 return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
293}
294
Teng Qin797669f2017-11-03 16:04:32 -0700295static void bpf_print_hints(int ret, char *log)
Brendan Gregg34826372017-01-13 14:02:02 -0800296{
Teng Qin797669f2017-11-03 16:04:32 -0700297 if (ret < 0)
298 fprintf(stderr, "bpf: Failed to load program: %s\n", strerror(errno));
Brendan Gregg34826372017-01-13 14:02:02 -0800299 if (log == NULL)
300 return;
Teng Qin797669f2017-11-03 16:04:32 -0700301 else
302 fprintf(stderr, "%s\n", log);
303
304 if (ret >= 0)
305 return;
Brendan Gregg34826372017-01-13 14:02:02 -0800306
307 // The following error strings will need maintenance to match LLVM.
308
309 // stack busting
310 if (strstr(log, "invalid stack off=-") != NULL) {
311 fprintf(stderr, "HINT: Looks like you exceeded the BPF stack limit. "
312 "This can happen if you allocate too much local variable storage. "
313 "For example, if you allocated a 1 Kbyte struct (maybe for "
314 "BPF_PERF_OUTPUT), busting a max stack of 512 bytes.\n\n");
315 }
316
317 // didn't check NULL on map lookup
318 if (strstr(log, "invalid mem access 'map_value_or_null'") != NULL) {
319 fprintf(stderr, "HINT: The 'map_value_or_null' error can happen if "
320 "you dereference a pointer value from a map lookup without first "
321 "checking if that pointer is NULL.\n\n");
322 }
323
324 // lacking a bpf_probe_read
325 if (strstr(log, "invalid mem access 'inv'") != NULL) {
326 fprintf(stderr, "HINT: The invalid mem access 'inv' error can happen "
327 "if you try to dereference memory without first using "
328 "bpf_probe_read() to copy it to the BPF stack. Sometimes the "
329 "bpf_probe_read is automatic by the bcc rewriter, other times "
330 "you'll need to be explicit.\n\n");
331 }
Paul Chaignon37308292018-01-27 23:06:39 +0100332
333 // helper function not found in kernel
334 char *helper_str = strstr(log, "invalid func ");
335 if (helper_str != NULL) {
336 helper_str += strlen("invalid func ");
337 char *str = strchr(helper_str, '#');
338 if (str != NULL) {
339 helper_str = str + 1;
340 }
341 int helper_id = atoi(helper_str);
342 if (helper_id && helper_id < sizeof(helpers) / sizeof(struct bpf_helper)) {
343 struct bpf_helper helper = helpers[helper_id - 1];
344 fprintf(stderr, "HINT: bpf_%s missing (added in Linux %s).\n\n",
345 helper.name, helper.required_version);
346 }
347 }
Brendan Gregg34826372017-01-13 14:02:02 -0800348}
Brenden Blancoa94bd932015-04-26 00:56:42 -0700349#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
350
Martin KaFai Lau3c24ad92017-10-19 23:47:39 -0700351int bpf_obj_get_info(int prog_map_fd, void *info, uint32_t *info_len)
Alexei Starovoitovb1df37c2017-09-06 19:47:47 -0700352{
353 union bpf_attr attr;
354 int err;
355
356 memset(&attr, 0, sizeof(attr));
357 attr.info.bpf_fd = prog_map_fd;
358 attr.info.info_len = *info_len;
359 attr.info.info = ptr_to_u64(info);
360
361 err = syscall(__NR_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
362 if (!err)
363 *info_len = attr.info.info_len;
364
365 return err;
366}
367
368int bpf_prog_compute_tag(const struct bpf_insn *insns, int prog_len,
369 unsigned long long *ptag)
370{
371 struct sockaddr_alg alg = {
372 .salg_family = AF_ALG,
373 .salg_type = "hash",
374 .salg_name = "sha1",
375 };
376 int shafd = socket(AF_ALG, SOCK_SEQPACKET, 0);
377 if (shafd < 0) {
378 fprintf(stderr, "sha1 socket not available %s\n", strerror(errno));
379 return -1;
380 }
381 int ret = bind(shafd, (struct sockaddr *)&alg, sizeof(alg));
382 if (ret < 0) {
383 fprintf(stderr, "sha1 bind fail %s\n", strerror(errno));
384 close(shafd);
385 return ret;
386 }
387 int shafd2 = accept(shafd, NULL, 0);
388 if (shafd2 < 0) {
389 fprintf(stderr, "sha1 accept fail %s\n", strerror(errno));
390 close(shafd);
391 return -1;
392 }
393 struct bpf_insn prog[prog_len / 8];
394 bool map_ld_seen = false;
395 int i;
396 for (i = 0; i < prog_len / 8; i++) {
397 prog[i] = insns[i];
398 if (insns[i].code == (BPF_LD | BPF_DW | BPF_IMM) &&
399 insns[i].src_reg == BPF_PSEUDO_MAP_FD &&
400 !map_ld_seen) {
401 prog[i].imm = 0;
402 map_ld_seen = true;
403 } else if (insns[i].code == 0 && map_ld_seen) {
404 prog[i].imm = 0;
405 map_ld_seen = false;
406 } else {
407 map_ld_seen = false;
408 }
409 }
410 ret = write(shafd2, prog, prog_len);
411 if (ret != prog_len) {
412 fprintf(stderr, "sha1 write fail %s\n", strerror(errno));
413 close(shafd2);
414 close(shafd);
415 return -1;
416 }
417
418 union {
419 unsigned char sha[20];
420 unsigned long long tag;
421 } u = {};
422 ret = read(shafd2, u.sha, 20);
423 if (ret != 20) {
424 fprintf(stderr, "sha1 read fail %s\n", strerror(errno));
425 close(shafd2);
426 close(shafd);
427 return -1;
428 }
429 *ptag = __builtin_bswap64(u.tag);
Nirmoy Das2f2412a2018-03-14 10:44:11 +0100430 close(shafd2);
431 close(shafd);
Alexei Starovoitovb1df37c2017-09-06 19:47:47 -0700432 return 0;
433}
434
435int bpf_prog_get_tag(int fd, unsigned long long *ptag)
436{
437 char fmt[64];
438 snprintf(fmt, sizeof(fmt), "/proc/self/fdinfo/%d", fd);
439 FILE * f = fopen(fmt, "r");
440 if (!f) {
441/* fprintf(stderr, "failed to open fdinfo %s\n", strerror(errno));*/
442 return -1;
443 }
444 fgets(fmt, sizeof(fmt), f); // pos
445 fgets(fmt, sizeof(fmt), f); // flags
446 fgets(fmt, sizeof(fmt), f); // mnt_id
447 fgets(fmt, sizeof(fmt), f); // prog_type
448 fgets(fmt, sizeof(fmt), f); // prog_jited
449 fgets(fmt, sizeof(fmt), f); // prog_tag
450 fclose(f);
451 char *p = strchr(fmt, ':');
452 if (!p) {
453/* fprintf(stderr, "broken fdinfo %s\n", fmt);*/
454 return -2;
455 }
456 unsigned long long tag = 0;
457 sscanf(p + 1, "%llx", &tag);
458 *ptag = tag;
459 return 0;
460}
461
Martin KaFai Laudf368162017-10-19 12:46:48 -0700462int bpf_prog_load(enum bpf_prog_type prog_type, const char *name,
Brenden Blancocd5cb412015-04-26 09:41:58 -0700463 const struct bpf_insn *insns, int prog_len,
Brenden Blanco759029f2015-07-29 15:47:51 -0700464 const char *license, unsigned kern_version,
Teng Qin797669f2017-11-03 16:04:32 -0700465 int log_level, char *log_buf, unsigned log_buf_size)
Brenden Blancoa94bd932015-04-26 00:56:42 -0700466{
Martin KaFai Laudf368162017-10-19 12:46:48 -0700467 size_t name_len = name ? strlen(name) : 0;
Brenden Blancofdc027c2015-09-03 11:49:54 -0700468 union bpf_attr attr;
Teng Qin797669f2017-11-03 16:04:32 -0700469 char *tmp_log_buf = NULL;
470 unsigned tmp_log_buf_size = 0;
yonghong-songd83210d2018-06-11 11:41:34 -0700471 int ret = 0, name_offset = 0;
davidefdl2dece102016-09-12 12:00:37 -0700472
Brenden Blancofdc027c2015-09-03 11:49:54 -0700473 memset(&attr, 0, sizeof(attr));
Teng Qin797669f2017-11-03 16:04:32 -0700474
Brenden Blancofdc027c2015-09-03 11:49:54 -0700475 attr.prog_type = prog_type;
Brenden Blanco7009b552015-05-26 11:48:17 -0700476 attr.kern_version = kern_version;
Teng Qin797669f2017-11-03 16:04:32 -0700477 attr.license = ptr_to_u64((void *)license);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700478
Teng Qin797669f2017-11-03 16:04:32 -0700479 attr.insns = ptr_to_u64((void *)insns);
480 attr.insn_cnt = prog_len / sizeof(struct bpf_insn);
davidefdl2dece102016-09-12 12:00:37 -0700481 if (attr.insn_cnt > BPF_MAXINSNS) {
davidefdl2dece102016-09-12 12:00:37 -0700482 errno = EINVAL;
483 fprintf(stderr,
Martin KaFai Laudf368162017-10-19 12:46:48 -0700484 "bpf: %s. Program too large (%u insns), at most %d insns\n\n",
davidefdl2dece102016-09-12 12:00:37 -0700485 strerror(errno), attr.insn_cnt, BPF_MAXINSNS);
Teng Qin797669f2017-11-03 16:04:32 -0700486 return -1;
davidefdl2dece102016-09-12 12:00:37 -0700487 }
488
Teng Qin797669f2017-11-03 16:04:32 -0700489 attr.log_level = log_level;
490 if (attr.log_level > 0) {
491 if (log_buf_size > 0) {
492 // Use user-provided log buffer if availiable.
493 log_buf[0] = 0;
494 attr.log_buf = ptr_to_u64(log_buf);
495 attr.log_size = log_buf_size;
496 } else {
497 // Create and use temporary log buffer if user didn't provide one.
498 tmp_log_buf_size = LOG_BUF_SIZE;
499 tmp_log_buf = malloc(tmp_log_buf_size);
500 if (!tmp_log_buf) {
501 fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
502 strerror(errno));
503 attr.log_level = 0;
504 } else {
505 tmp_log_buf[0] = 0;
506 attr.log_buf = ptr_to_u64(tmp_log_buf);
507 attr.log_size = tmp_log_buf_size;
508 }
509 }
510 }
Huapeng Zhoude11d072016-12-06 18:10:38 -0800511
yonghong-songd83210d2018-06-11 11:41:34 -0700512 if (strncmp(name, "kprobe__", 8) == 0)
513 name_offset = 8;
514 else if (strncmp(name, "tracepoint__", 12) == 0)
515 name_offset = 12;
516 else if (strncmp(name, "raw_tracepoint__", 16) == 0)
517 name_offset = 16;
518 memcpy(attr.prog_name, name + name_offset,
519 min(name_len - name_offset, BPF_OBJ_NAME_LEN - 1));
Teng Qin797669f2017-11-03 16:04:32 -0700520
521 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
522 // BPF object name is not supported on older Kernels.
523 // If we failed due to this, clear the name and try again.
Martin KaFai Laudf368162017-10-19 12:46:48 -0700524 if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
525 memset(attr.prog_name, 0, BPF_OBJ_NAME_LEN);
526 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
527 }
528
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800529 if (ret < 0 && errno == EPERM) {
530 // When EPERM is returned, two reasons are possible:
531 // 1. user has no permissions for bpf()
532 // 2. user has insufficent rlimit for locked memory
533 // Unfortunately, there is no api to inspect the current usage of locked
534 // mem for the user, so an accurate calculation of how much memory to lock
535 // for this new program is difficult to calculate. As a hack, bump the limit
536 // to unlimited. If program load fails again, return the error.
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800537 struct rlimit rl = {};
538 if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
539 rl.rlim_max = RLIM_INFINITY;
540 rl.rlim_cur = rl.rlim_max;
541 if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
542 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
543 }
544 }
545
Teng Qin797669f2017-11-03 16:04:32 -0700546 // The load has failed. Handle log message.
547 if (ret < 0) {
548 // User has provided a log buffer.
549 if (log_buf_size) {
550 // If logging is not already enabled, enable it and do the syscall again.
551 if (attr.log_level == 0) {
552 attr.log_level = 1;
553 attr.log_buf = ptr_to_u64(log_buf);
554 attr.log_size = log_buf_size;
555 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
556 }
557 // Print the log message and return.
558 bpf_print_hints(ret, log_buf);
559 if (errno == ENOSPC)
560 fprintf(stderr, "bpf: log_buf size may be insufficient\n");
561 goto return_result;
davidefdl2dece102016-09-12 12:00:37 -0700562 }
563
Teng Qin797669f2017-11-03 16:04:32 -0700564 // User did not provide log buffer. We will try to increase size of
565 // our temporary log buffer to get full error message.
566 if (tmp_log_buf)
567 free(tmp_log_buf);
568 tmp_log_buf_size = LOG_BUF_SIZE;
Yonghong Songe86b0172017-11-14 10:20:07 -0800569 if (attr.log_level == 0)
570 attr.log_level = 1;
Teng Qin797669f2017-11-03 16:04:32 -0700571 for (;;) {
572 tmp_log_buf = malloc(tmp_log_buf_size);
573 if (!tmp_log_buf) {
574 fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
575 strerror(errno));
576 goto return_result;
577 }
578 tmp_log_buf[0] = 0;
579 attr.log_buf = ptr_to_u64(tmp_log_buf);
580 attr.log_size = tmp_log_buf_size;
davidefdl2dece102016-09-12 12:00:37 -0700581
Teng Qin797669f2017-11-03 16:04:32 -0700582 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
583 if (ret < 0 && errno == ENOSPC) {
584 // Temporary buffer size is not enough. Double it and try again.
585 free(tmp_log_buf);
586 tmp_log_buf = NULL;
587 tmp_log_buf_size <<= 1;
588 } else {
589 break;
590 }
591 }
Brenden Blancocd5cb412015-04-26 09:41:58 -0700592 }
Teng Qin797669f2017-11-03 16:04:32 -0700593
Teng Qinc6692572017-12-05 17:06:57 -0800594 // Check if we should print the log message if log_level is not 0,
595 // either specified by user or set due to error.
Teng Qin797669f2017-11-03 16:04:32 -0700596 if (attr.log_level > 0) {
Teng Qinc6692572017-12-05 17:06:57 -0800597 // Don't print if user enabled logging and provided log buffer,
598 // but there is no error.
599 if (log_buf && ret < 0)
Teng Qin797669f2017-11-03 16:04:32 -0700600 bpf_print_hints(ret, log_buf);
601 else if (tmp_log_buf)
602 bpf_print_hints(ret, tmp_log_buf);
603 }
604
605return_result:
606 if (tmp_log_buf)
607 free(tmp_log_buf);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700608 return ret;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700609}
610
611int bpf_open_raw_sock(const char *name)
612{
613 struct sockaddr_ll sll;
614 int sock;
615
616 sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
617 if (sock < 0) {
Prashant Bhole5b08d382017-10-06 15:05:04 +0900618 fprintf(stderr, "cannot create raw socket\n");
Brenden Blancoa94bd932015-04-26 00:56:42 -0700619 return -1;
620 }
621
Olivier Tilmans830d58d2017-11-21 23:26:27 +0100622 /* Do not bind on empty interface names */
623 if (!name || *name == '\0')
624 return sock;
625
Brenden Blancoa94bd932015-04-26 00:56:42 -0700626 memset(&sll, 0, sizeof(sll));
627 sll.sll_family = AF_PACKET;
628 sll.sll_ifindex = if_nametoindex(name);
Prashant Bholee9fa6592017-10-06 14:45:21 +0900629 if (sll.sll_ifindex == 0) {
630 fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
631 close(sock);
632 return -1;
633 }
Brenden Blancoa94bd932015-04-26 00:56:42 -0700634 sll.sll_protocol = htons(ETH_P_ALL);
635 if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
Prashant Bhole5b08d382017-10-06 15:05:04 +0900636 fprintf(stderr, "bind to %s: %s\n", name, strerror(errno));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700637 close(sock);
638 return -1;
639 }
640
641 return sock;
642}
643
644int bpf_attach_socket(int sock, int prog) {
Brenden Blancoaf956732015-06-09 13:58:42 -0700645 return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700646}
647
Song Liuf180ea12018-01-08 12:46:07 -0800648#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
649static int bpf_find_probe_type(const char *event_type)
650{
651 int fd;
652 int ret;
653 char buf[PATH_MAX];
654
655 ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
656 if (ret < 0 || ret >= sizeof(buf))
657 return -1;
658
659 fd = open(buf, O_RDONLY);
660 if (fd < 0)
661 return -1;
662 ret = read(fd, buf, sizeof(buf));
663 close(fd);
664 if (ret < 0 || ret >= sizeof(buf))
665 return -1;
666 errno = 0;
667 ret = (int)strtol(buf, NULL, 10);
668 return errno ? -1 : ret;
669}
670
671#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
672static int bpf_get_retprobe_bit(const char *event_type)
673{
674 int fd;
675 int ret;
676 char buf[PATH_MAX];
677
678 ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
679 if (ret < 0 || ret >= sizeof(buf))
680 return -1;
681
682 fd = open(buf, O_RDONLY);
683 if (fd < 0)
684 return -1;
685 ret = read(fd, buf, sizeof(buf));
686 close(fd);
687 if (ret < 0 || ret >= sizeof(buf))
688 return -1;
689 if (strlen(buf) < strlen("config:"))
690 return -1;
691 errno = 0;
692 ret = (int)strtol(buf + strlen("config:"), NULL, 10);
693 return errno ? -1 : ret;
694}
695
696/*
697 * new kernel API allows creating [k,u]probe with perf_event_open, which
698 * makes it easier to clean up the [k,u]probe. This function tries to
699 * create pfd with the new API.
700 */
701static int bpf_try_perf_event_open_with_probe(const char *name, uint64_t offs,
702 int pid, char *event_type, int is_return)
703{
704 struct perf_event_attr attr = {};
705 int type = bpf_find_probe_type(event_type);
706 int is_return_bit = bpf_get_retprobe_bit(event_type);
707 int cpu = 0;
708
709 if (type < 0 || is_return_bit < 0)
710 return -1;
Song Liuf180ea12018-01-08 12:46:07 -0800711 attr.sample_period = 1;
712 attr.wakeup_events = 1;
713 if (is_return)
714 attr.config |= 1 << is_return_bit;
715
716 /*
717 * struct perf_event_attr in latest perf_event.h has the following
718 * extension to config1 and config2. To keep bcc compatibe with
719 * older perf_event.h, we use config1 and config2 here instead of
720 * kprobe_func, uprobe_path, kprobe_addr, and probe_offset.
721 *
722 * union {
723 * __u64 bp_addr;
724 * __u64 kprobe_func;
725 * __u64 uprobe_path;
726 * __u64 config1;
727 * };
728 * union {
729 * __u64 bp_len;
730 * __u64 kprobe_addr;
731 * __u64 probe_offset;
732 * __u64 config2;
733 * };
734 */
735 attr.config2 = offs; /* config2 here is kprobe_addr or probe_offset */
736 attr.size = sizeof(attr);
737 attr.type = type;
738 /* config1 here is kprobe_func or uprobe_path */
739 attr.config1 = ptr_to_u64((void *)name);
740 // PID filter is only possible for uprobe events.
741 if (pid < 0)
742 pid = -1;
743 // perf_event_open API doesn't allow both pid and cpu to be -1.
744 // So only set it to -1 when PID is not -1.
745 // Tracing events do not do CPU filtering in any cases.
746 if (pid != -1)
747 cpu = -1;
748 return syscall(__NR_perf_event_open, &attr, pid, cpu, -1 /* group_fd */,
749 PERF_FLAG_FD_CLOEXEC);
750}
751
Teng Qinfafbf3c2018-02-27 00:26:12 -0800752// When a valid Perf Event FD provided through pfd, it will be used to enable
753// and attach BPF program to the event, and event_path will be ignored.
754// Otherwise, event_path is expected to contain the path to the event in debugfs
755// and it will be used to open the Perf Event FD.
756// In either case, if the attach partially failed (such as issue with the
757// ioctl operations), the **caller** need to clean up the Perf Event FD, either
758// provided by the caller or opened here.
759static int bpf_attach_tracing_event(int progfd, const char *event_path, int pid,
760 int *pfd)
Song Liu0e9ed202018-01-08 14:25:07 -0800761{
762 int efd, cpu = 0;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800763 ssize_t bytes;
Teng Qinbe578b72018-01-09 16:37:06 -0800764 char buf[PATH_MAX];
Brenden Blancocd5cb412015-04-26 09:41:58 -0700765 struct perf_event_attr attr = {};
Teng Qinfafbf3c2018-02-27 00:26:12 -0800766 // Caller did not provided a valid Perf Event FD. Create one with the debugfs
767 // event path provided.
768 if (*pfd < 0) {
Song Liu0e9ed202018-01-08 14:25:07 -0800769 snprintf(buf, sizeof(buf), "%s/id", event_path);
770 efd = open(buf, O_RDONLY, 0);
771 if (efd < 0) {
772 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
773 return -1;
774 }
775
776 bytes = read(efd, buf, sizeof(buf));
777 if (bytes <= 0 || bytes >= sizeof(buf)) {
778 fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
779 close(efd);
780 return -1;
781 }
782 close(efd);
783 buf[bytes] = '\0';
784 attr.config = strtol(buf, NULL, 0);
785 attr.type = PERF_TYPE_TRACEPOINT;
Song Liu0e9ed202018-01-08 14:25:07 -0800786 attr.sample_period = 1;
787 attr.wakeup_events = 1;
788 // PID filter is only possible for uprobe events.
789 if (pid < 0)
790 pid = -1;
791 // perf_event_open API doesn't allow both pid and cpu to be -1.
792 // So only set it to -1 when PID is not -1.
793 // Tracing events do not do CPU filtering in any cases.
794 if (pid != -1)
795 cpu = -1;
Teng Qinfafbf3c2018-02-27 00:26:12 -0800796 *pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
797 if (*pfd < 0) {
Song Liu0e9ed202018-01-08 14:25:07 -0800798 fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
799 return -1;
800 }
Brenden Blancocd5cb412015-04-26 09:41:58 -0700801 }
Brenden Blanco8207d102015-09-25 13:58:30 -0700802
Teng Qinfafbf3c2018-02-27 00:26:12 -0800803 if (ioctl(*pfd, PERF_EVENT_IOC_SET_BPF, progfd) < 0) {
Brenden Blancocd5cb412015-04-26 09:41:58 -0700804 perror("ioctl(PERF_EVENT_IOC_SET_BPF)");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700805 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700806 }
Teng Qinfafbf3c2018-02-27 00:26:12 -0800807 if (ioctl(*pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
Brenden Blancocd5cb412015-04-26 09:41:58 -0700808 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700809 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700810 }
811
Brenden Blanco75982492015-11-06 10:43:05 -0800812 return 0;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700813}
814
Teng Qinfafbf3c2018-02-27 00:26:12 -0800815int bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type,
Sandipan Das651d6902018-05-29 23:30:22 +0530816 const char *ev_name, const char *fn_name, uint64_t fn_offset)
Dereke4da6c22017-01-28 16:11:28 -0800817{
Teng Qinfafbf3c2018-02-27 00:26:12 -0800818 int kfd, pfd = -1;
Derek7174d932017-01-30 21:03:02 -0800819 char buf[256];
Teng Qinf4543fc2017-09-03 17:11:59 -0700820 char event_alias[128];
Dereke4da6c22017-01-28 16:11:28 -0800821 static char *event_type = "kprobe";
Brenden Blanco8207d102015-09-25 13:58:30 -0700822
Teng Qinfafbf3c2018-02-27 00:26:12 -0800823 // Try create the kprobe Perf Event with perf_event_open API.
Sandipan Das651d6902018-05-29 23:30:22 +0530824 pfd = bpf_try_perf_event_open_with_probe(fn_name, fn_offset, -1, event_type,
Song Liu0e9ed202018-01-08 14:25:07 -0800825 attach_type != BPF_PROBE_ENTRY);
Teng Qinfafbf3c2018-02-27 00:26:12 -0800826 // If failed, most likely Kernel doesn't support the new perf_event_open API
827 // yet. Try create the event using debugfs.
Song Liu0e9ed202018-01-08 14:25:07 -0800828 if (pfd < 0) {
829 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
830 kfd = open(buf, O_WRONLY | O_APPEND, 0);
831 if (kfd < 0) {
832 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
833 goto error;
834 }
835
836 snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
Sandipan Das651d6902018-05-29 23:30:22 +0530837
838 if (fn_offset > 0 && attach_type == BPF_PROBE_ENTRY)
839 snprintf(buf, sizeof(buf), "p:%ss/%s %s+%"PRIu64,
840 event_type, event_alias, fn_name, fn_offset);
841 else
842 snprintf(buf, sizeof(buf), "%c:%ss/%s %s",
843 attach_type == BPF_PROBE_ENTRY ? 'p' : 'r',
844 event_type, event_alias, fn_name);
845
Song Liu0e9ed202018-01-08 14:25:07 -0800846 if (write(kfd, buf, strlen(buf)) < 0) {
Yonghong Song858a3d72018-04-24 10:18:20 -0700847 if (errno == ENOENT)
848 fprintf(stderr, "cannot attach kprobe, probe entry may not exist\n");
849 else
850 fprintf(stderr, "cannot attach kprobe, %s\n", strerror(errno));
Song Liu0e9ed202018-01-08 14:25:07 -0800851 close(kfd);
852 goto error;
853 }
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700854 close(kfd);
Song Liu0e9ed202018-01-08 14:25:07 -0800855 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700856 }
Teng Qinfafbf3c2018-02-27 00:26:12 -0800857 // If perf_event_open succeeded, bpf_attach_tracing_event will use the created
858 // Perf Event FD directly and buf would be empty and unused.
859 // Otherwise it will read the event ID from the path in buf, create the
860 // Perf Event event using that ID, and updated value of pfd.
861 if (bpf_attach_tracing_event(progfd, buf, -1 /* PID */, &pfd) == 0)
862 return pfd;
Dereke4da6c22017-01-28 16:11:28 -0800863
864error:
Teng Qinfafbf3c2018-02-27 00:26:12 -0800865 bpf_close_perf_event_fd(pfd);
866 return -1;
Dereke4da6c22017-01-28 16:11:28 -0800867}
868
Brenden Blancofa073452017-05-30 17:35:53 -0700869static int enter_mount_ns(int pid) {
870 struct stat self_stat, target_stat;
871 int self_fd = -1, target_fd = -1;
872 char buf[64];
873
874 if (pid < 0)
875 return -1;
876
877 if ((size_t)snprintf(buf, sizeof(buf), "/proc/%d/ns/mnt", pid) >= sizeof(buf))
878 return -1;
879
880 self_fd = open("/proc/self/ns/mnt", O_RDONLY);
881 if (self_fd < 0) {
882 perror("open(/proc/self/ns/mnt)");
883 return -1;
884 }
885
886 target_fd = open(buf, O_RDONLY);
887 if (target_fd < 0) {
888 perror("open(/proc/<pid>/ns/mnt)");
889 goto error;
890 }
891
892 if (fstat(self_fd, &self_stat)) {
893 perror("fstat(self_fd)");
894 goto error;
895 }
896
897 if (fstat(target_fd, &target_stat)) {
898 perror("fstat(target_fd)");
899 goto error;
900 }
901
902 // both target and current ns are same, avoid setns and close all fds
903 if (self_stat.st_ino == target_stat.st_ino)
904 goto error;
905
906 if (setns(target_fd, CLONE_NEWNS)) {
907 perror("setns(target)");
908 goto error;
909 }
910
911 close(target_fd);
912 return self_fd;
913
914error:
915 if (self_fd >= 0)
916 close(self_fd);
917 if (target_fd >= 0)
918 close(target_fd);
919 return -1;
920}
921
922static void exit_mount_ns(int fd) {
923 if (fd < 0)
924 return;
925
926 if (setns(fd, CLONE_NEWNS))
927 perror("setns");
928}
929
Teng Qinfafbf3c2018-02-27 00:26:12 -0800930int bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type,
931 const char *ev_name, const char *binary_path,
932 uint64_t offset, pid_t pid)
Dereke4da6c22017-01-28 16:11:28 -0800933{
Derek7174d932017-01-30 21:03:02 -0800934 char buf[PATH_MAX];
Teng Qin0760b752017-09-03 19:07:59 -0700935 char event_alias[PATH_MAX];
Dereke4da6c22017-01-28 16:11:28 -0800936 static char *event_type = "uprobe";
Teng Qinfafbf3c2018-02-27 00:26:12 -0800937 int res, kfd = -1, pfd = -1, ns_fd = -1;
938 // Try create the uprobe Perf Event with perf_event_open API.
Song Liu0e9ed202018-01-08 14:25:07 -0800939 pfd = bpf_try_perf_event_open_with_probe(binary_path, offset, pid, event_type,
940 attach_type != BPF_PROBE_ENTRY);
Teng Qinfafbf3c2018-02-27 00:26:12 -0800941 // If failed, most likely Kernel doesn't support the new perf_event_open API
942 // yet. Try create the event using debugfs.
Song Liu0e9ed202018-01-08 14:25:07 -0800943 if (pfd < 0) {
944 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
945 kfd = open(buf, O_WRONLY | O_APPEND, 0);
946 if (kfd < 0) {
947 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
948 goto error;
949 }
Dereke4da6c22017-01-28 16:11:28 -0800950
Song Liu0e9ed202018-01-08 14:25:07 -0800951 res = snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
952 if (res < 0 || res >= sizeof(event_alias)) {
953 fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
954 goto error;
955 }
956 res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
957 event_type, event_alias, binary_path, offset);
958 if (res < 0 || res >= sizeof(buf)) {
959 fprintf(stderr, "Event alias (%s) too long for buffer\n", event_alias);
960 goto error;
961 }
kmjohansen4b87af02017-03-30 00:58:31 -0700962
Song Liu0e9ed202018-01-08 14:25:07 -0800963 ns_fd = enter_mount_ns(pid);
964 if (write(kfd, buf, strlen(buf)) < 0) {
965 if (errno == EINVAL)
966 fprintf(stderr, "check dmesg output for possible cause\n");
967 goto error;
968 }
969 close(kfd);
Teng Qinfafbf3c2018-02-27 00:26:12 -0800970 kfd = -1;
Song Liu0e9ed202018-01-08 14:25:07 -0800971 exit_mount_ns(ns_fd);
972 ns_fd = -1;
Dereke4da6c22017-01-28 16:11:28 -0800973
Song Liu0e9ed202018-01-08 14:25:07 -0800974 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
975 }
Teng Qinfafbf3c2018-02-27 00:26:12 -0800976 // If perf_event_open succeeded, bpf_attach_tracing_event will use the created
977 // Perf Event FD directly and buf would be empty and unused.
978 // Otherwise it will read the event ID from the path in buf, create the
979 // Perf Event event using that ID, and updated value of pfd.
980 if (bpf_attach_tracing_event(progfd, buf, pid, &pfd) == 0)
981 return pfd;
Brenden Blanco75982492015-11-06 10:43:05 -0800982
983error:
Teng Qin0760b752017-09-03 19:07:59 -0700984 if (kfd >= 0)
985 close(kfd);
Brenden Blancofa073452017-05-30 17:35:53 -0700986 exit_mount_ns(ns_fd);
Teng Qinfafbf3c2018-02-27 00:26:12 -0800987 bpf_close_perf_event_fd(pfd);
988 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700989}
990
Dereke4da6c22017-01-28 16:11:28 -0800991static int bpf_detach_probe(const char *ev_name, const char *event_type)
992{
Song Liu0e9ed202018-01-08 14:25:07 -0800993 int kfd = -1, res;
Teng Qin0760b752017-09-03 19:07:59 -0700994 char buf[PATH_MAX];
Song Liu0e9ed202018-01-08 14:25:07 -0800995 int found_event = 0;
996 size_t bufsize = 0;
997 char *cptr = NULL;
998 FILE *fp;
999
1000 /*
1001 * For [k,u]probe created with perf_event_open (on newer kernel), it is
1002 * not necessary to clean it up in [k,u]probe_events. We first look up
1003 * the %s_bcc_%d line in [k,u]probe_events. If the event is not found,
1004 * it is safe to skip the cleaning up process (write -:... to the file).
1005 */
1006 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
1007 fp = fopen(buf, "r");
1008 if (!fp) {
1009 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
1010 goto error;
1011 }
1012
1013 res = snprintf(buf, sizeof(buf), "%ss/%s_bcc_%d", event_type, ev_name, getpid());
1014 if (res < 0 || res >= sizeof(buf)) {
1015 fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
1016 goto error;
1017 }
1018
1019 while (getline(&cptr, &bufsize, fp) != -1)
1020 if (strstr(cptr, buf) != NULL) {
1021 found_event = 1;
1022 break;
1023 }
Yonghong Song3c5686d2018-06-05 23:42:20 -07001024 free(cptr);
Song Liu0e9ed202018-01-08 14:25:07 -08001025 fclose(fp);
1026 fp = NULL;
1027
1028 if (!found_event)
1029 return 0;
1030
Brenden Blanco68e2d142016-01-28 10:24:56 -08001031 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
1032 kfd = open(buf, O_WRONLY | O_APPEND, 0);
Brenden Blanco839dd272015-06-11 12:35:55 -07001033 if (kfd < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -08001034 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Teng Qin0760b752017-09-03 19:07:59 -07001035 goto error;
Brenden Blanco839dd272015-06-11 12:35:55 -07001036 }
1037
Teng Qin0760b752017-09-03 19:07:59 -07001038 res = snprintf(buf, sizeof(buf), "-:%ss/%s_bcc_%d", event_type, ev_name, getpid());
1039 if (res < 0 || res >= sizeof(buf)) {
1040 fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
1041 goto error;
1042 }
Dereke4da6c22017-01-28 16:11:28 -08001043 if (write(kfd, buf, strlen(buf)) < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -08001044 fprintf(stderr, "write(%s): %s\n", buf, strerror(errno));
Teng Qin0760b752017-09-03 19:07:59 -07001045 goto error;
Brenden Blanco839dd272015-06-11 12:35:55 -07001046 }
Brenden Blanco839dd272015-06-11 12:35:55 -07001047
Teng Qin0760b752017-09-03 19:07:59 -07001048 close(kfd);
Brenden Blanco75982492015-11-06 10:43:05 -08001049 return 0;
Teng Qin0760b752017-09-03 19:07:59 -07001050
1051error:
1052 if (kfd >= 0)
1053 close(kfd);
Song Liu0e9ed202018-01-08 14:25:07 -08001054 if (fp)
1055 fclose(fp);
Teng Qin0760b752017-09-03 19:07:59 -07001056 return -1;
Brenden Blanco839dd272015-06-11 12:35:55 -07001057}
1058
Dereke4da6c22017-01-28 16:11:28 -08001059int bpf_detach_kprobe(const char *ev_name)
1060{
Teng Qinf4543fc2017-09-03 17:11:59 -07001061 return bpf_detach_probe(ev_name, "kprobe");
Brenden Blanco68e2d142016-01-28 10:24:56 -08001062}
1063
Dereke4da6c22017-01-28 16:11:28 -08001064int bpf_detach_uprobe(const char *ev_name)
1065{
1066 return bpf_detach_probe(ev_name, "uprobe");
Brenden Blanco68e2d142016-01-28 10:24:56 -08001067}
1068
Dereke4da6c22017-01-28 16:11:28 -08001069
Teng Qinfafbf3c2018-02-27 00:26:12 -08001070int bpf_attach_tracepoint(int progfd, const char *tp_category,
1071 const char *tp_name)
1072{
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -07001073 char buf[256];
Teng Qinfafbf3c2018-02-27 00:26:12 -08001074 int pfd = -1;
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -07001075
1076 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/%s",
1077 tp_category, tp_name);
Teng Qinfafbf3c2018-02-27 00:26:12 -08001078 if (bpf_attach_tracing_event(progfd, buf, -1 /* PID */, &pfd) == 0)
1079 return pfd;
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -07001080
Teng Qinfafbf3c2018-02-27 00:26:12 -08001081 bpf_close_perf_event_fd(pfd);
1082 return -1;
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -07001083}
1084
1085int bpf_detach_tracepoint(const char *tp_category, const char *tp_name) {
1086 // Right now, there is nothing to do, but it's a good idea to encourage
1087 // callers to detach anything they attach.
1088 return 0;
1089}
1090
Yonghong Song0d722372018-04-27 04:56:08 -07001091int bpf_attach_raw_tracepoint(int progfd, char *tp_name)
1092{
1093 union bpf_attr attr;
1094 int ret;
1095
1096 bzero(&attr, sizeof(attr));
1097 attr.raw_tracepoint.name = ptr_to_u64(tp_name);
1098 attr.raw_tracepoint.prog_fd = progfd;
1099
1100 ret = syscall(__NR_bpf, BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
1101 if (ret < 0)
1102 fprintf(stderr, "bpf_attach_raw_tracepoint (%s): %s\n", tp_name, strerror(errno));
1103 return ret;
1104}
1105
Teng Qin4b764de2017-04-03 22:10:46 -07001106void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,
1107 perf_reader_lost_cb lost_cb, void *cb_cookie,
1108 int pid, int cpu, int page_cnt) {
Brenden Blanco75982492015-11-06 10:43:05 -08001109 int pfd;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001110 struct perf_event_attr attr = {};
Brenden Blanco75982492015-11-06 10:43:05 -08001111 struct perf_reader *reader = NULL;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001112
Teng Qind0e48ed2018-02-27 01:05:20 -08001113 reader = perf_reader_new(raw_cb, lost_cb, cb_cookie, page_cnt);
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001114 if (!reader)
Brenden Blanco75982492015-11-06 10:43:05 -08001115 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001116
Brenden Blanco0dd24412016-02-17 00:26:14 -08001117 attr.config = 10;//PERF_COUNT_SW_BPF_OUTPUT;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001118 attr.type = PERF_TYPE_SOFTWARE;
1119 attr.sample_type = PERF_SAMPLE_RAW;
Brenden Blanco75982492015-11-06 10:43:05 -08001120 attr.sample_period = 1;
1121 attr.wakeup_events = 1;
1122 pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001123 if (pfd < 0) {
Brenden Blanco0dd24412016-02-17 00:26:14 -08001124 fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
1125 fprintf(stderr, " (check your kernel for PERF_COUNT_SW_BPF_OUTPUT support, 4.4 or newer)\n");
Brenden Blanco75982492015-11-06 10:43:05 -08001126 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001127 }
1128 perf_reader_set_fd(reader, pfd);
1129
Teng Qind0e48ed2018-02-27 01:05:20 -08001130 if (perf_reader_mmap(reader) < 0)
Brenden Blanco75982492015-11-06 10:43:05 -08001131 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001132
1133 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
1134 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
Brenden Blanco75982492015-11-06 10:43:05 -08001135 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001136 }
1137
1138 return reader;
Brenden Blanco75982492015-11-06 10:43:05 -08001139
1140error:
1141 if (reader)
1142 perf_reader_free(reader);
1143
1144 return NULL;
Brenden Blancod0daf6a2015-11-05 23:31:22 -08001145}
Jan Rüthe0724d72016-07-28 22:32:46 +02001146
Brenden Blancofa073452017-05-30 17:35:53 -07001147static int invalid_perf_config(uint32_t type, uint64_t config) {
Teng Qin98752212017-05-19 19:05:24 -07001148 switch (type) {
Teng Qin01b07ba2017-11-20 13:28:03 -08001149 case PERF_TYPE_HARDWARE:
1150 if (config >= PERF_COUNT_HW_MAX) {
1151 fprintf(stderr, "HARDWARE perf event config out of range\n");
1152 goto is_invalid;
1153 }
1154 return 0;
1155 case PERF_TYPE_SOFTWARE:
1156 if (config >= PERF_COUNT_SW_MAX) {
1157 fprintf(stderr, "SOFTWARE perf event config out of range\n");
1158 goto is_invalid;
1159 } else if (config == 10 /* PERF_COUNT_SW_BPF_OUTPUT */) {
1160 fprintf(stderr, "Unable to open or attach perf event for BPF_OUTPUT\n");
1161 goto is_invalid;
1162 }
1163 return 0;
1164 case PERF_TYPE_HW_CACHE:
1165 if (((config >> 16) >= PERF_COUNT_HW_CACHE_RESULT_MAX) ||
1166 (((config >> 8) & 0xff) >= PERF_COUNT_HW_CACHE_OP_MAX) ||
1167 ((config & 0xff) >= PERF_COUNT_HW_CACHE_MAX)) {
1168 fprintf(stderr, "HW_CACHE perf event config out of range\n");
1169 goto is_invalid;
1170 }
1171 return 0;
1172 case PERF_TYPE_TRACEPOINT:
1173 case PERF_TYPE_BREAKPOINT:
1174 fprintf(stderr,
1175 "Unable to open or attach TRACEPOINT or BREAKPOINT events\n");
1176 goto is_invalid;
1177 default:
1178 return 0;
Teng Qin98752212017-05-19 19:05:24 -07001179 }
Teng Qin01b07ba2017-11-20 13:28:03 -08001180is_invalid:
1181 fprintf(stderr, "Invalid perf event type %" PRIu32 " config %" PRIu64 "\n",
1182 type, config);
1183 return 1;
Teng Qin98752212017-05-19 19:05:24 -07001184}
1185
Brenden Blanco3069caa2016-08-01 18:12:11 -07001186int bpf_open_perf_event(uint32_t type, uint64_t config, int pid, int cpu) {
1187 int fd;
1188 struct perf_event_attr attr = {};
Jan Rüthe0724d72016-07-28 22:32:46 +02001189
Teng Qin98752212017-05-19 19:05:24 -07001190 if (invalid_perf_config(type, config)) {
Teng Qin98752212017-05-19 19:05:24 -07001191 return -1;
1192 }
1193
Brenden Blanco3069caa2016-08-01 18:12:11 -07001194 attr.sample_period = LONG_MAX;
1195 attr.type = type;
1196 attr.config = config;
1197
1198 fd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
1199 if (fd < 0) {
1200 fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
1201 return -1;
1202 }
1203
1204 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
1205 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
1206 close(fd);
1207 return -1;
1208 }
1209
1210 return fd;
1211}
Jan Rüthe0724d72016-07-28 22:32:46 +02001212
Andy Gospodarek9f3cab72017-05-17 16:18:45 -04001213int bpf_attach_xdp(const char *dev_name, int progfd, uint32_t flags) {
Jan Rüthe0724d72016-07-28 22:32:46 +02001214 struct sockaddr_nl sa;
1215 int sock, seq = 0, len, ret = -1;
1216 char buf[4096];
1217 struct nlattr *nla, *nla_xdp;
1218 struct {
1219 struct nlmsghdr nh;
1220 struct ifinfomsg ifinfo;
1221 char attrbuf[64];
1222 } req;
1223 struct nlmsghdr *nh;
1224 struct nlmsgerr *err;
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001225 socklen_t addrlen;
Jan Rüthe0724d72016-07-28 22:32:46 +02001226
1227 memset(&sa, 0, sizeof(sa));
1228 sa.nl_family = AF_NETLINK;
1229
1230 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
1231 if (sock < 0) {
1232 fprintf(stderr, "bpf: opening a netlink socket: %s\n", strerror(errno));
1233 return -1;
1234 }
1235
1236 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
1237 fprintf(stderr, "bpf: bind to netlink: %s\n", strerror(errno));
1238 goto cleanup;
1239 }
1240
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001241 addrlen = sizeof(sa);
1242 if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
1243 fprintf(stderr, "bpf: get sock name of netlink: %s\n", strerror(errno));
1244 goto cleanup;
1245 }
1246
1247 if (addrlen != sizeof(sa)) {
1248 fprintf(stderr, "bpf: wrong netlink address length: %d\n", addrlen);
1249 goto cleanup;
1250 }
1251
Jan Rüthe0724d72016-07-28 22:32:46 +02001252 memset(&req, 0, sizeof(req));
1253 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
1254 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1255 req.nh.nlmsg_type = RTM_SETLINK;
1256 req.nh.nlmsg_pid = 0;
1257 req.nh.nlmsg_seq = ++seq;
1258 req.ifinfo.ifi_family = AF_UNSPEC;
1259 req.ifinfo.ifi_index = if_nametoindex(dev_name);
1260 if (req.ifinfo.ifi_index == 0) {
1261 fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
1262 goto cleanup;
1263 }
1264
1265 nla = (struct nlattr *)(((char *)&req)
1266 + NLMSG_ALIGN(req.nh.nlmsg_len));
1267 nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
1268
1269 nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
Andy Gospodarek9f3cab72017-05-17 16:18:45 -04001270 nla->nla_len = NLA_HDRLEN;
Jan Rüthe0724d72016-07-28 22:32:46 +02001271
1272 // we specify the FD passed over by the user
1273 nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
Arthur Gautierfbd91e22017-04-28 21:39:58 +00001274 nla_xdp->nla_len = NLA_HDRLEN + sizeof(progfd);
Jan Rüthe0724d72016-07-28 22:32:46 +02001275 memcpy((char *)nla_xdp + NLA_HDRLEN, &progfd, sizeof(progfd));
Andy Gospodarek9f3cab72017-05-17 16:18:45 -04001276 nla->nla_len += nla_xdp->nla_len;
1277
1278 // parse flags as passed by the user
1279 if (flags) {
1280 nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
Gary Lindb8353b2017-08-18 18:10:43 +08001281 nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/;
Andy Gospodarek9f3cab72017-05-17 16:18:45 -04001282 nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
1283 memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
1284 nla->nla_len += nla_xdp->nla_len;
1285 }
Jan Rüthe0724d72016-07-28 22:32:46 +02001286
1287 req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
1288
1289 if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
1290 fprintf(stderr, "bpf: send to netlink: %s\n", strerror(errno));
1291 goto cleanup;
1292 }
1293
1294 len = recv(sock, buf, sizeof(buf), 0);
1295 if (len < 0) {
1296 fprintf(stderr, "bpf: recv from netlink: %s\n", strerror(errno));
1297 goto cleanup;
1298 }
1299
1300 for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
1301 nh = NLMSG_NEXT(nh, len)) {
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001302 if (nh->nlmsg_pid != sa.nl_pid) {
Toshiaki Makita890c76a2017-07-31 20:20:55 +09001303 fprintf(stderr, "bpf: Wrong pid %u, expected %u\n",
Toshiaki Makitabb9b92a2017-07-31 20:20:55 +09001304 nh->nlmsg_pid, sa.nl_pid);
Jan Rüthe0724d72016-07-28 22:32:46 +02001305 errno = EBADMSG;
1306 goto cleanup;
1307 }
1308 if (nh->nlmsg_seq != seq) {
1309 fprintf(stderr, "bpf: Wrong seq %d, expected %d\n",
1310 nh->nlmsg_seq, seq);
1311 errno = EBADMSG;
1312 goto cleanup;
1313 }
1314 switch (nh->nlmsg_type) {
1315 case NLMSG_ERROR:
1316 err = (struct nlmsgerr *)NLMSG_DATA(nh);
1317 if (!err->error)
1318 continue;
1319 fprintf(stderr, "bpf: nlmsg error %s\n", strerror(-err->error));
1320 errno = -err->error;
1321 goto cleanup;
1322 case NLMSG_DONE:
1323 break;
1324 }
1325 }
1326
1327 ret = 0;
1328
1329cleanup:
1330 close(sock);
1331 return ret;
1332}
Teng Qin206b0202016-10-18 16:06:57 -07001333
Teng Qin5b7c6782018-03-09 16:57:11 -08001334int bpf_attach_perf_event_raw(int progfd, void *perf_event_attr, pid_t pid,
Teng Qinbf2513d2018-05-16 13:09:09 -07001335 int cpu, int group_fd, unsigned long extra_flags) {
Teng Qin5b7c6782018-03-09 16:57:11 -08001336 int fd = syscall(__NR_perf_event_open, perf_event_attr, pid, cpu, group_fd,
Teng Qinbf2513d2018-05-16 13:09:09 -07001337 PERF_FLAG_FD_CLOEXEC | extra_flags);
Teng Qin5b7c6782018-03-09 16:57:11 -08001338 if (fd < 0) {
1339 perror("perf_event_open failed");
1340 return -1;
1341 }
1342 if (ioctl(fd, PERF_EVENT_IOC_SET_BPF, progfd) != 0) {
1343 perror("ioctl(PERF_EVENT_IOC_SET_BPF) failed");
1344 close(fd);
1345 return -1;
1346 }
1347 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) != 0) {
1348 perror("ioctl(PERF_EVENT_IOC_ENABLE) failed");
1349 close(fd);
1350 return -1;
1351 }
1352
1353 return fd;
1354}
1355
Teng Qin206b0202016-10-18 16:06:57 -07001356int bpf_attach_perf_event(int progfd, uint32_t ev_type, uint32_t ev_config,
1357 uint64_t sample_period, uint64_t sample_freq,
1358 pid_t pid, int cpu, int group_fd) {
Teng Qin98752212017-05-19 19:05:24 -07001359 if (invalid_perf_config(ev_type, ev_config)) {
Teng Qin206b0202016-10-18 16:06:57 -07001360 return -1;
1361 }
1362 if (!((sample_period > 0) ^ (sample_freq > 0))) {
1363 fprintf(
1364 stderr, "Exactly one of sample_period / sample_freq should be set\n"
1365 );
1366 return -1;
1367 }
1368
1369 struct perf_event_attr attr = {};
1370 attr.type = ev_type;
1371 attr.config = ev_config;
Teng Qin99978d22017-12-15 00:22:13 -08001372 if (pid > 0)
1373 attr.inherit = 1;
Teng Qin206b0202016-10-18 16:06:57 -07001374 if (sample_freq > 0) {
1375 attr.freq = 1;
1376 attr.sample_freq = sample_freq;
1377 } else {
1378 attr.sample_period = sample_period;
1379 }
1380
Teng Qinbf2513d2018-05-16 13:09:09 -07001381 return bpf_attach_perf_event_raw(progfd, &attr, pid, cpu, group_fd, 0);
Teng Qin206b0202016-10-18 16:06:57 -07001382}
1383
Teng Qind6827332017-05-23 16:35:11 -07001384int bpf_close_perf_event_fd(int fd) {
1385 int res, error = 0;
1386 if (fd >= 0) {
1387 res = ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
1388 if (res != 0) {
1389 perror("ioctl(PERF_EVENT_IOC_DISABLE) failed");
1390 error = res;
1391 }
1392 res = close(fd);
1393 if (res != 0) {
1394 perror("close perf event FD failed");
1395 error = (res && !error) ? res : error;
1396 }
1397 }
1398 return error;
Teng Qin206b0202016-10-18 16:06:57 -07001399}
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001400
1401int bpf_obj_pin(int fd, const char *pathname)
1402{
Brenden Blancofa073452017-05-30 17:35:53 -07001403 union bpf_attr attr;
1404
1405 memset(&attr, 0, sizeof(attr));
1406 attr.pathname = ptr_to_u64((void *)pathname);
1407 attr.bpf_fd = fd;
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001408
1409 return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
1410}
1411
1412int bpf_obj_get(const char *pathname)
1413{
Brenden Blancofa073452017-05-30 17:35:53 -07001414 union bpf_attr attr;
1415
1416 memset(&attr, 0, sizeof(attr));
1417 attr.pathname = ptr_to_u64((void *)pathname);
Huapeng Zhou37dcac02016-12-20 13:42:01 -08001418
1419 return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
1420}
Martin KaFai Lau3c24ad92017-10-19 23:47:39 -07001421
1422int bpf_prog_get_next_id(uint32_t start_id, uint32_t *next_id)
1423{
1424 union bpf_attr attr;
1425 int err;
1426
1427 memset(&attr, 0, sizeof(attr));
1428 attr.start_id = start_id;
1429
1430 err = syscall(__NR_bpf, BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
1431 if (!err)
1432 *next_id = attr.next_id;
1433
1434 return err;
1435}
1436
1437int bpf_prog_get_fd_by_id(uint32_t id)
1438{
1439 union bpf_attr attr;
1440
1441 memset(&attr, 0, sizeof(attr));
1442 attr.prog_id = id;
1443
1444 return syscall(__NR_bpf, BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
1445}
1446
1447int bpf_map_get_fd_by_id(uint32_t id)
1448{
1449 union bpf_attr attr;
1450
1451 memset(&attr, 0, sizeof(attr));
1452 attr.map_id = id;
1453
1454 return syscall(__NR_bpf, BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
1455}