blob: 859dc6db0d99e40bb7f0f9338c51ddef80e202c2 [file] [log] [blame]
Brenden Blanco246b9422015-06-05 11:15:27 -07001/*
2 * Copyright (c) 2015 PLUMgrid, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Brenden Blancocd5cb412015-04-26 09:41:58 -070017#include <arpa/inet.h>
Brenden Blancoa94bd932015-04-26 00:56:42 -070018#include <errno.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070019#include <fcntl.h>
Brenden Blanco3069caa2016-08-01 18:12:11 -070020#include <limits.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070021#include <linux/bpf.h>
22#include <linux/if_packet.h>
23#include <linux/pkt_cls.h>
24#include <linux/perf_event.h>
25#include <linux/rtnetlink.h>
26#include <linux/unistd.h>
27#include <linux/version.h>
davidefdl2dece102016-09-12 12:00:37 -070028#include <linux/bpf_common.h>
Brenden Blancoa94bd932015-04-26 00:56:42 -070029#include <net/ethernet.h>
30#include <net/if.h>
Brenden Blancobb7200c2015-06-04 18:01:42 -070031#include <stdio.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070032#include <stdlib.h>
33#include <string.h>
34#include <sys/ioctl.h>
Brenden Blanco4b4bd272015-11-30 10:54:47 -080035#include <sys/resource.h>
Brenden Blancobb7200c2015-06-04 18:01:42 -070036#include <unistd.h>
davidefdl2dece102016-09-12 12:00:37 -070037#include <stdbool.h>
Derek35c25012017-01-22 20:58:23 -080038#include <sys/stat.h>
39#include <sys/types.h>
Brenden Blancocd5cb412015-04-26 09:41:58 -070040
kmjohansen4b87af02017-03-30 00:58:31 -070041#include "bcc_proc.h"
Brenden Blancoa94bd932015-04-26 00:56:42 -070042#include "libbpf.h"
Brenden Blanco8207d102015-09-25 13:58:30 -070043#include "perf_reader.h"
Brenden Blancoa94bd932015-04-26 00:56:42 -070044
Brenden Blancof275d3d2015-07-06 23:41:23 -070045// TODO: remove these defines when linux-libc-dev exports them properly
46
47#ifndef __NR_bpf
Naveen N. Rao0006ad12016-04-29 16:42:58 +053048#if defined(__powerpc64__)
49#define __NR_bpf 361
Zvonko Kosic98121a32017-03-07 07:30:25 +010050#elif defined(__s390x__)
51#define __NR_bpf 351
Zhiyi Sun8e434b72016-12-06 16:21:37 +080052#elif defined(__aarch64__)
53#define __NR_bpf 280
Naveen N. Rao0006ad12016-04-29 16:42:58 +053054#else
Brenden Blancof275d3d2015-07-06 23:41:23 -070055#define __NR_bpf 321
56#endif
Naveen N. Rao0006ad12016-04-29 16:42:58 +053057#endif
Brenden Blancof275d3d2015-07-06 23:41:23 -070058
59#ifndef SO_ATTACH_BPF
60#define SO_ATTACH_BPF 50
61#endif
62
63#ifndef PERF_EVENT_IOC_SET_BPF
64#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
65#endif
66
67#ifndef PERF_FLAG_FD_CLOEXEC
68#define PERF_FLAG_FD_CLOEXEC (1UL << 3)
69#endif
70
Mark Drayton5f5687e2017-02-20 18:13:03 +000071static int probe_perf_reader_page_cnt = 8;
72
Brenden Blancoa94bd932015-04-26 00:56:42 -070073static __u64 ptr_to_u64(void *ptr)
74{
75 return (__u64) (unsigned long) ptr;
76}
77
Huapeng Zhoude11d072016-12-06 18:10:38 -080078int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, int map_flags)
Brenden Blancoa94bd932015-04-26 00:56:42 -070079{
Brenden Blancofdc027c2015-09-03 11:49:54 -070080 union bpf_attr attr;
81 memset(&attr, 0, sizeof(attr));
82 attr.map_type = map_type;
83 attr.key_size = key_size;
84 attr.value_size = value_size;
85 attr.max_entries = max_entries;
Huapeng Zhoude11d072016-12-06 18:10:38 -080086 attr.map_flags = map_flags;
Brenden Blancoa94bd932015-04-26 00:56:42 -070087
Brenden Blanco4b4bd272015-11-30 10:54:47 -080088 int ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
89 if (ret < 0 && errno == EPERM) {
90 // see note below about the rationale for this retry
91
92 struct rlimit rl = {};
93 if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
94 rl.rlim_max = RLIM_INFINITY;
95 rl.rlim_cur = rl.rlim_max;
96 if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
97 ret = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
98 }
99 }
100 return ret;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700101}
102
103int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
104{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700105 union bpf_attr attr;
106 memset(&attr, 0, sizeof(attr));
107 attr.map_fd = fd;
108 attr.key = ptr_to_u64(key);
109 attr.value = ptr_to_u64(value);
110 attr.flags = flags;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700111
112 return syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
113}
114
115int bpf_lookup_elem(int fd, void *key, void *value)
116{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700117 union bpf_attr attr;
118 memset(&attr, 0, sizeof(attr));
119 attr.map_fd = fd;
120 attr.key = ptr_to_u64(key);
121 attr.value = ptr_to_u64(value);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700122
123 return syscall(__NR_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
124}
125
126int bpf_delete_elem(int fd, void *key)
127{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700128 union bpf_attr attr;
129 memset(&attr, 0, sizeof(attr));
130 attr.map_fd = fd;
131 attr.key = ptr_to_u64(key);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700132
133 return syscall(__NR_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
134}
135
136int bpf_get_next_key(int fd, void *key, void *next_key)
137{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700138 union bpf_attr attr;
139 memset(&attr, 0, sizeof(attr));
140 attr.map_fd = fd;
141 attr.key = ptr_to_u64(key);
142 attr.next_key = ptr_to_u64(next_key);
Brenden Blancoa94bd932015-04-26 00:56:42 -0700143
144 return syscall(__NR_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
145}
146
Brendan Gregg34826372017-01-13 14:02:02 -0800147void bpf_print_hints(char *log)
148{
149 if (log == NULL)
150 return;
151
152 // The following error strings will need maintenance to match LLVM.
153
154 // stack busting
155 if (strstr(log, "invalid stack off=-") != NULL) {
156 fprintf(stderr, "HINT: Looks like you exceeded the BPF stack limit. "
157 "This can happen if you allocate too much local variable storage. "
158 "For example, if you allocated a 1 Kbyte struct (maybe for "
159 "BPF_PERF_OUTPUT), busting a max stack of 512 bytes.\n\n");
160 }
161
162 // didn't check NULL on map lookup
163 if (strstr(log, "invalid mem access 'map_value_or_null'") != NULL) {
164 fprintf(stderr, "HINT: The 'map_value_or_null' error can happen if "
165 "you dereference a pointer value from a map lookup without first "
166 "checking if that pointer is NULL.\n\n");
167 }
168
169 // lacking a bpf_probe_read
170 if (strstr(log, "invalid mem access 'inv'") != NULL) {
171 fprintf(stderr, "HINT: The invalid mem access 'inv' error can happen "
172 "if you try to dereference memory without first using "
173 "bpf_probe_read() to copy it to the BPF stack. Sometimes the "
174 "bpf_probe_read is automatic by the bcc rewriter, other times "
175 "you'll need to be explicit.\n\n");
176 }
177}
Brenden Blancoa94bd932015-04-26 00:56:42 -0700178#define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
179
Brenden Blancoa94bd932015-04-26 00:56:42 -0700180int bpf_prog_load(enum bpf_prog_type prog_type,
Brenden Blancocd5cb412015-04-26 09:41:58 -0700181 const struct bpf_insn *insns, int prog_len,
Brenden Blanco759029f2015-07-29 15:47:51 -0700182 const char *license, unsigned kern_version,
183 char *log_buf, unsigned log_buf_size)
Brenden Blancoa94bd932015-04-26 00:56:42 -0700184{
Brenden Blancofdc027c2015-09-03 11:49:54 -0700185 union bpf_attr attr;
davidefdl2dece102016-09-12 12:00:37 -0700186 char *bpf_log_buffer = NULL;
187 unsigned buffer_size = 0;
188 int ret = 0;
189
Brenden Blancofdc027c2015-09-03 11:49:54 -0700190 memset(&attr, 0, sizeof(attr));
191 attr.prog_type = prog_type;
192 attr.insns = ptr_to_u64((void *) insns);
193 attr.insn_cnt = prog_len / sizeof(struct bpf_insn);
194 attr.license = ptr_to_u64((void *) license);
195 attr.log_buf = ptr_to_u64(log_buf);
196 attr.log_size = log_buf_size;
197 attr.log_level = log_buf ? 1 : 0;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700198
Brenden Blanco7009b552015-05-26 11:48:17 -0700199 attr.kern_version = kern_version;
Brenden Blanco81a783a2015-08-24 23:42:42 -0700200 if (log_buf)
201 log_buf[0] = 0;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700202
davidefdl2dece102016-09-12 12:00:37 -0700203 if (attr.insn_cnt > BPF_MAXINSNS) {
204 ret = -1;
205 errno = EINVAL;
206 fprintf(stderr,
207 "bpf: %s. Program too large (%d insns), at most %d insns\n\n",
208 strerror(errno), attr.insn_cnt, BPF_MAXINSNS);
209 return ret;
210 }
211
212 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
Huapeng Zhoude11d072016-12-06 18:10:38 -0800213
Brenden Blanco4b4bd272015-11-30 10:54:47 -0800214 if (ret < 0 && errno == EPERM) {
215 // When EPERM is returned, two reasons are possible:
216 // 1. user has no permissions for bpf()
217 // 2. user has insufficent rlimit for locked memory
218 // Unfortunately, there is no api to inspect the current usage of locked
219 // mem for the user, so an accurate calculation of how much memory to lock
220 // for this new program is difficult to calculate. As a hack, bump the limit
221 // to unlimited. If program load fails again, return the error.
222
223 struct rlimit rl = {};
224 if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
225 rl.rlim_max = RLIM_INFINITY;
226 rl.rlim_cur = rl.rlim_max;
227 if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
228 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
229 }
230 }
231
Brenden Blanco81a783a2015-08-24 23:42:42 -0700232 if (ret < 0 && !log_buf) {
davidefdl2dece102016-09-12 12:00:37 -0700233
234 buffer_size = LOG_BUF_SIZE;
Brenden Blanco81a783a2015-08-24 23:42:42 -0700235 // caller did not specify log_buf but failure should be printed,
davidefdl2dece102016-09-12 12:00:37 -0700236 // so repeat the syscall and print the result to stderr
237 for (;;) {
238 bpf_log_buffer = malloc(buffer_size);
239 if (!bpf_log_buffer) {
240 fprintf(stderr,
241 "bpf: buffer log memory allocation failed for error %s\n\n",
242 strerror(errno));
243 return ret;
244 }
Teng Qinba8cb302016-12-02 16:31:00 -0800245 bpf_log_buffer[0] = 0;
davidefdl2dece102016-09-12 12:00:37 -0700246
247 attr.log_buf = ptr_to_u64(bpf_log_buffer);
248 attr.log_size = buffer_size;
249 attr.log_level = bpf_log_buffer ? 1 : 0;
250
251 ret = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
252 if (ret < 0 && errno == ENOSPC) {
253 free(bpf_log_buffer);
254 bpf_log_buffer = NULL;
255 buffer_size <<= 1;
256 } else {
257 break;
258 }
259 }
260
261 fprintf(stderr, "bpf: %s\n%s\n", strerror(errno), bpf_log_buffer);
Brendan Gregg34826372017-01-13 14:02:02 -0800262 bpf_print_hints(bpf_log_buffer);
davidefdl2dece102016-09-12 12:00:37 -0700263
Huapeng Zhoude11d072016-12-06 18:10:38 -0800264 free(bpf_log_buffer);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700265 }
266 return ret;
Brenden Blancoa94bd932015-04-26 00:56:42 -0700267}
268
269int bpf_open_raw_sock(const char *name)
270{
271 struct sockaddr_ll sll;
272 int sock;
273
274 sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
275 if (sock < 0) {
276 printf("cannot create raw socket\n");
277 return -1;
278 }
279
280 memset(&sll, 0, sizeof(sll));
281 sll.sll_family = AF_PACKET;
282 sll.sll_ifindex = if_nametoindex(name);
283 sll.sll_protocol = htons(ETH_P_ALL);
284 if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
285 printf("bind to %s: %s\n", name, strerror(errno));
286 close(sock);
287 return -1;
288 }
289
290 return sock;
291}
292
293int bpf_attach_socket(int sock, int prog) {
Brenden Blancoaf956732015-06-09 13:58:42 -0700294 return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
Brenden Blancoa94bd932015-04-26 00:56:42 -0700295}
296
Brenden Blanco8207d102015-09-25 13:58:30 -0700297static int bpf_attach_tracing_event(int progfd, const char *event_path,
298 struct perf_reader *reader, int pid, int cpu, int group_fd) {
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700299 int efd, pfd;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800300 ssize_t bytes;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700301 char buf[256];
302 struct perf_event_attr attr = {};
303
304 snprintf(buf, sizeof(buf), "%s/id", event_path);
305 efd = open(buf, O_RDONLY, 0);
306 if (efd < 0) {
307 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700308 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700309 }
310
311 bytes = read(efd, buf, sizeof(buf));
312 if (bytes <= 0 || bytes >= sizeof(buf)) {
313 fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700314 close(efd);
315 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700316 }
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700317 close(efd);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700318 buf[bytes] = '\0';
319 attr.config = strtol(buf, NULL, 0);
320 attr.type = PERF_TYPE_TRACEPOINT;
Brenden Blanco8207d102015-09-25 13:58:30 -0700321 attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700322 attr.sample_period = 1;
323 attr.wakeup_events = 1;
324 pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC);
325 if (pfd < 0) {
Brenden Blanco74681952016-01-28 14:18:46 -0800326 fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700327 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700328 }
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800329 perf_reader_set_fd(reader, pfd);
Brenden Blanco8207d102015-09-25 13:58:30 -0700330
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800331 if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700332 return -1;
Brenden Blanco8207d102015-09-25 13:58:30 -0700333
Brenden Blancocd5cb412015-04-26 09:41:58 -0700334 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, progfd) < 0) {
335 perror("ioctl(PERF_EVENT_IOC_SET_BPF)");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700336 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700337 }
338 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
339 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700340 return -1;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700341 }
342
Brenden Blanco75982492015-11-06 10:43:05 -0800343 return 0;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700344}
345
Derek7174d932017-01-30 21:03:02 -0800346void * bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type, const char *ev_name,
Dereke4da6c22017-01-28 16:11:28 -0800347 const char *fn_name,
348 pid_t pid, int cpu, int group_fd,
349 perf_reader_cb cb, void *cb_cookie)
350{
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700351 int kfd;
Derek7174d932017-01-30 21:03:02 -0800352 char buf[256];
353 char new_name[128];
Brenden Blanco8207d102015-09-25 13:58:30 -0700354 struct perf_reader *reader = NULL;
Dereke4da6c22017-01-28 16:11:28 -0800355 static char *event_type = "kprobe";
Derek7174d932017-01-30 21:03:02 -0800356 int n;
Brenden Blanco8207d102015-09-25 13:58:30 -0700357
Dereke4da6c22017-01-28 16:11:28 -0800358 snprintf(new_name, sizeof(new_name), "%s_bcc_%d", ev_name, getpid());
Teng Qin4b764de2017-04-03 22:10:46 -0700359 reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
Brenden Blanco8207d102015-09-25 13:58:30 -0700360 if (!reader)
Brenden Blanco75982492015-11-06 10:43:05 -0800361 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700362
Brenden Blanco68e2d142016-01-28 10:24:56 -0800363 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
364 kfd = open(buf, O_WRONLY | O_APPEND, 0);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700365 if (kfd < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -0800366 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Brenden Blanco75982492015-11-06 10:43:05 -0800367 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700368 }
369
Derek227b5b92017-01-31 19:28:10 -0800370 snprintf(buf, sizeof(buf), "%c:%ss/%s %s", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
Dereke4da6c22017-01-28 16:11:28 -0800371 event_type, new_name, fn_name);
372 if (write(kfd, buf, strlen(buf)) < 0) {
Brenden Blanco7e71aef2015-09-09 18:28:21 -0700373 if (errno == EINVAL)
374 fprintf(stderr, "check dmesg output for possible cause\n");
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700375 close(kfd);
Brenden Blanco75982492015-11-06 10:43:05 -0800376 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700377 }
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700378 close(kfd);
Brenden Blancocd5cb412015-04-26 09:41:58 -0700379
Derek35c25012017-01-22 20:58:23 -0800380 if (access("/sys/kernel/debug/tracing/instances", F_OK) != -1) {
Derek6c999582017-02-06 19:34:11 -0800381 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/instances/bcc_%d", getpid());
382 if (access(buf, F_OK) == -1) {
383 if (mkdir(buf, 0755) == -1)
384 goto retry;
385 }
386 n = snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/instances/bcc_%d/events/%ss/%s",
387 getpid(), event_type, new_name);
Derek7174d932017-01-30 21:03:02 -0800388 if (n < sizeof(buf) && bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) == 0)
Derek35c25012017-01-22 20:58:23 -0800389 goto out;
Derek6c999582017-02-06 19:34:11 -0800390 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/instances/bcc_%d", getpid());
Derek35c25012017-01-22 20:58:23 -0800391 rmdir(buf);
392 }
393retry:
Dereke4da6c22017-01-28 16:11:28 -0800394 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, new_name);
395 if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
396 goto error;
397out:
398 return reader;
399
400error:
401 perf_reader_free(reader);
402 return NULL;
403
404}
405
Derek7174d932017-01-30 21:03:02 -0800406void * bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type, const char *ev_name,
Dereke4da6c22017-01-28 16:11:28 -0800407 const char *binary_path, uint64_t offset,
408 pid_t pid, int cpu, int group_fd,
409 perf_reader_cb cb, void *cb_cookie)
410{
411 int kfd;
Derek7174d932017-01-30 21:03:02 -0800412 char buf[PATH_MAX];
413 char new_name[128];
Dereke4da6c22017-01-28 16:11:28 -0800414 struct perf_reader *reader = NULL;
415 static char *event_type = "uprobe";
kmjohansen4b87af02017-03-30 00:58:31 -0700416 struct ns_cookie nsc = {-1, -1};
Derek7174d932017-01-30 21:03:02 -0800417 int n;
Dereke4da6c22017-01-28 16:11:28 -0800418
419 snprintf(new_name, sizeof(new_name), "%s_bcc_%d", ev_name, getpid());
Teng Qin4b764de2017-04-03 22:10:46 -0700420 reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
Dereke4da6c22017-01-28 16:11:28 -0800421 if (!reader)
422 goto error;
423
424 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
425 kfd = open(buf, O_WRONLY | O_APPEND, 0);
426 if (kfd < 0) {
427 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
428 goto error;
429 }
430
Derek227b5b92017-01-31 19:28:10 -0800431 n = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
Dereke4da6c22017-01-28 16:11:28 -0800432 event_type, new_name, binary_path, offset);
Derek7174d932017-01-30 21:03:02 -0800433 if (n >= sizeof(buf)) {
434 close(kfd);
435 goto error;
436 }
kmjohansen4b87af02017-03-30 00:58:31 -0700437
438 bcc_procutils_enter_mountns(pid, &nsc);
Dereke4da6c22017-01-28 16:11:28 -0800439 if (write(kfd, buf, strlen(buf)) < 0) {
440 if (errno == EINVAL)
441 fprintf(stderr, "check dmesg output for possible cause\n");
442 close(kfd);
443 goto error;
444 }
kmjohansen4b87af02017-03-30 00:58:31 -0700445 bcc_procutils_exit_mountns(&nsc);
Dereke4da6c22017-01-28 16:11:28 -0800446 close(kfd);
447
448 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, new_name);
Brenden Blanco75982492015-11-06 10:43:05 -0800449 if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
450 goto error;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700451
Brenden Blanco8207d102015-09-25 13:58:30 -0700452 return reader;
Brenden Blanco75982492015-11-06 10:43:05 -0800453
454error:
kmjohansen4b87af02017-03-30 00:58:31 -0700455 bcc_procutils_exit_mountns(&nsc);
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700456 perf_reader_free(reader);
Brenden Blanco75982492015-11-06 10:43:05 -0800457 return NULL;
Brenden Blancocd5cb412015-04-26 09:41:58 -0700458}
459
Dereke4da6c22017-01-28 16:11:28 -0800460static int bpf_detach_probe(const char *ev_name, const char *event_type)
461{
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700462 int kfd;
Derek7174d932017-01-30 21:03:02 -0800463 char buf[256];
Brenden Blanco68e2d142016-01-28 10:24:56 -0800464 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
465 kfd = open(buf, O_WRONLY | O_APPEND, 0);
Brenden Blanco839dd272015-06-11 12:35:55 -0700466 if (kfd < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -0800467 fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700468 return -1;
Brenden Blanco839dd272015-06-11 12:35:55 -0700469 }
470
Dereke4da6c22017-01-28 16:11:28 -0800471 snprintf(buf, sizeof(buf), "-:%ss/%s_bcc_%d", event_type, ev_name, getpid());
472 if (write(kfd, buf, strlen(buf)) < 0) {
Brenden Blanco68e2d142016-01-28 10:24:56 -0800473 fprintf(stderr, "write(%s): %s\n", buf, strerror(errno));
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700474 close(kfd);
475 return -1;
Brenden Blanco839dd272015-06-11 12:35:55 -0700476 }
Martin KaFai Laua8e66962016-05-19 23:04:41 -0700477 close(kfd);
Brenden Blanco839dd272015-06-11 12:35:55 -0700478
Brenden Blanco75982492015-11-06 10:43:05 -0800479 return 0;
Brenden Blanco839dd272015-06-11 12:35:55 -0700480}
481
Dereke4da6c22017-01-28 16:11:28 -0800482int bpf_detach_kprobe(const char *ev_name)
483{
Derek7174d932017-01-30 21:03:02 -0800484 char buf[256];
Dereke4da6c22017-01-28 16:11:28 -0800485 int ret = bpf_detach_probe(ev_name, "kprobe");
Derek6c999582017-02-06 19:34:11 -0800486 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/instances/bcc_%d", getpid());
Dereke4da6c22017-01-28 16:11:28 -0800487 if (access(buf, F_OK) != -1) {
488 rmdir(buf);
489 }
490
491 return ret;
Brenden Blanco68e2d142016-01-28 10:24:56 -0800492}
493
Dereke4da6c22017-01-28 16:11:28 -0800494int bpf_detach_uprobe(const char *ev_name)
495{
496 return bpf_detach_probe(ev_name, "uprobe");
Brenden Blanco68e2d142016-01-28 10:24:56 -0800497}
498
Dereke4da6c22017-01-28 16:11:28 -0800499
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -0700500void * bpf_attach_tracepoint(int progfd, const char *tp_category,
501 const char *tp_name, int pid, int cpu,
502 int group_fd, perf_reader_cb cb, void *cb_cookie) {
503 char buf[256];
504 struct perf_reader *reader = NULL;
505
Teng Qin4b764de2017-04-03 22:10:46 -0700506 reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
Sasha Goldshtein1198c3c2016-06-30 06:26:28 -0700507 if (!reader)
508 goto error;
509
510 snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/%s",
511 tp_category, tp_name);
512 if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
513 goto error;
514
515 return reader;
516
517error:
518 perf_reader_free(reader);
519 return NULL;
520}
521
522int bpf_detach_tracepoint(const char *tp_category, const char *tp_name) {
523 // Right now, there is nothing to do, but it's a good idea to encourage
524 // callers to detach anything they attach.
525 return 0;
526}
527
Teng Qin4b764de2017-04-03 22:10:46 -0700528void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,
529 perf_reader_lost_cb lost_cb, void *cb_cookie,
530 int pid, int cpu, int page_cnt) {
Brenden Blanco75982492015-11-06 10:43:05 -0800531 int pfd;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800532 struct perf_event_attr attr = {};
Brenden Blanco75982492015-11-06 10:43:05 -0800533 struct perf_reader *reader = NULL;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800534
Teng Qin4b764de2017-04-03 22:10:46 -0700535 reader = perf_reader_new(NULL, raw_cb, lost_cb, cb_cookie, page_cnt);
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800536 if (!reader)
Brenden Blanco75982492015-11-06 10:43:05 -0800537 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800538
Brenden Blanco0dd24412016-02-17 00:26:14 -0800539 attr.config = 10;//PERF_COUNT_SW_BPF_OUTPUT;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800540 attr.type = PERF_TYPE_SOFTWARE;
541 attr.sample_type = PERF_SAMPLE_RAW;
Brenden Blanco75982492015-11-06 10:43:05 -0800542 attr.sample_period = 1;
543 attr.wakeup_events = 1;
544 pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800545 if (pfd < 0) {
Brenden Blanco0dd24412016-02-17 00:26:14 -0800546 fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
547 fprintf(stderr, " (check your kernel for PERF_COUNT_SW_BPF_OUTPUT support, 4.4 or newer)\n");
Brenden Blanco75982492015-11-06 10:43:05 -0800548 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800549 }
550 perf_reader_set_fd(reader, pfd);
551
552 if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
Brenden Blanco75982492015-11-06 10:43:05 -0800553 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800554
555 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
556 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
Brenden Blanco75982492015-11-06 10:43:05 -0800557 goto error;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800558 }
559
560 return reader;
Brenden Blanco75982492015-11-06 10:43:05 -0800561
562error:
563 if (reader)
564 perf_reader_free(reader);
565
566 return NULL;
Brenden Blancod0daf6a2015-11-05 23:31:22 -0800567}
Jan Rüthe0724d72016-07-28 22:32:46 +0200568
Brenden Blanco3069caa2016-08-01 18:12:11 -0700569int bpf_open_perf_event(uint32_t type, uint64_t config, int pid, int cpu) {
570 int fd;
571 struct perf_event_attr attr = {};
Jan Rüthe0724d72016-07-28 22:32:46 +0200572
Brenden Blanco3069caa2016-08-01 18:12:11 -0700573 attr.sample_period = LONG_MAX;
574 attr.type = type;
575 attr.config = config;
576
577 fd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
578 if (fd < 0) {
579 fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
580 return -1;
581 }
582
583 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
584 perror("ioctl(PERF_EVENT_IOC_ENABLE)");
585 close(fd);
586 return -1;
587 }
588
589 return fd;
590}
Jan Rüthe0724d72016-07-28 22:32:46 +0200591
592int bpf_attach_xdp(const char *dev_name, int progfd) {
593 struct sockaddr_nl sa;
594 int sock, seq = 0, len, ret = -1;
595 char buf[4096];
596 struct nlattr *nla, *nla_xdp;
597 struct {
598 struct nlmsghdr nh;
599 struct ifinfomsg ifinfo;
600 char attrbuf[64];
601 } req;
602 struct nlmsghdr *nh;
603 struct nlmsgerr *err;
604
605 memset(&sa, 0, sizeof(sa));
606 sa.nl_family = AF_NETLINK;
607
608 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
609 if (sock < 0) {
610 fprintf(stderr, "bpf: opening a netlink socket: %s\n", strerror(errno));
611 return -1;
612 }
613
614 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
615 fprintf(stderr, "bpf: bind to netlink: %s\n", strerror(errno));
616 goto cleanup;
617 }
618
619 memset(&req, 0, sizeof(req));
620 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
621 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
622 req.nh.nlmsg_type = RTM_SETLINK;
623 req.nh.nlmsg_pid = 0;
624 req.nh.nlmsg_seq = ++seq;
625 req.ifinfo.ifi_family = AF_UNSPEC;
626 req.ifinfo.ifi_index = if_nametoindex(dev_name);
627 if (req.ifinfo.ifi_index == 0) {
628 fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
629 goto cleanup;
630 }
631
632 nla = (struct nlattr *)(((char *)&req)
633 + NLMSG_ALIGN(req.nh.nlmsg_len));
634 nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
635
636 nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
637
638 // we specify the FD passed over by the user
639 nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
640 nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
641 memcpy((char *)nla_xdp + NLA_HDRLEN, &progfd, sizeof(progfd));
642 nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
643
644 req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
645
646 if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
647 fprintf(stderr, "bpf: send to netlink: %s\n", strerror(errno));
648 goto cleanup;
649 }
650
651 len = recv(sock, buf, sizeof(buf), 0);
652 if (len < 0) {
653 fprintf(stderr, "bpf: recv from netlink: %s\n", strerror(errno));
654 goto cleanup;
655 }
656
657 for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
658 nh = NLMSG_NEXT(nh, len)) {
659 if (nh->nlmsg_pid != getpid()) {
660 fprintf(stderr, "bpf: Wrong pid %d, expected %d\n",
661 nh->nlmsg_pid, getpid());
662 errno = EBADMSG;
663 goto cleanup;
664 }
665 if (nh->nlmsg_seq != seq) {
666 fprintf(stderr, "bpf: Wrong seq %d, expected %d\n",
667 nh->nlmsg_seq, seq);
668 errno = EBADMSG;
669 goto cleanup;
670 }
671 switch (nh->nlmsg_type) {
672 case NLMSG_ERROR:
673 err = (struct nlmsgerr *)NLMSG_DATA(nh);
674 if (!err->error)
675 continue;
676 fprintf(stderr, "bpf: nlmsg error %s\n", strerror(-err->error));
677 errno = -err->error;
678 goto cleanup;
679 case NLMSG_DONE:
680 break;
681 }
682 }
683
684 ret = 0;
685
686cleanup:
687 close(sock);
688 return ret;
689}
Teng Qin206b0202016-10-18 16:06:57 -0700690
691int bpf_attach_perf_event(int progfd, uint32_t ev_type, uint32_t ev_config,
692 uint64_t sample_period, uint64_t sample_freq,
693 pid_t pid, int cpu, int group_fd) {
694 if (ev_type != PERF_TYPE_HARDWARE && ev_type != PERF_TYPE_SOFTWARE) {
695 fprintf(stderr, "Unsupported perf event type\n");
696 return -1;
697 }
698 if ((ev_type == PERF_TYPE_HARDWARE && ev_config >= PERF_COUNT_HW_MAX) ||
699 (ev_type == PERF_TYPE_SOFTWARE && ev_config >= PERF_COUNT_SW_MAX)) {
700 fprintf(stderr, "Invalid perf event config\n");
701 return -1;
702 }
703 if (!((sample_period > 0) ^ (sample_freq > 0))) {
704 fprintf(
705 stderr, "Exactly one of sample_period / sample_freq should be set\n"
706 );
707 return -1;
708 }
709
710 struct perf_event_attr attr = {};
711 attr.type = ev_type;
712 attr.config = ev_config;
713 attr.inherit = 1;
714 if (sample_freq > 0) {
715 attr.freq = 1;
716 attr.sample_freq = sample_freq;
717 } else {
718 attr.sample_period = sample_period;
719 }
720
721 int fd = syscall(
722 __NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC
723 );
724 if (fd < 0) {
725 perror("perf_event_open failed");
726 return -1;
727 }
728 if (ioctl(fd, PERF_EVENT_IOC_SET_BPF, progfd) != 0) {
729 perror("ioctl(PERF_EVENT_IOC_SET_BPF) failed");
730 close(fd);
731 return -1;
732 }
733 if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) != 0) {
734 perror("ioctl(PERF_EVENT_IOC_ENABLE) failed");
735 close(fd);
736 return -1;
737 }
738
739 return fd;
740}
741
742int bpf_detach_perf_event(uint32_t ev_type, uint32_t ev_config) {
743 // Right now, there is nothing to do, but it's a good idea to encourage
744 // callers to detach anything they attach.
745 return 0;
746}
Huapeng Zhou37dcac02016-12-20 13:42:01 -0800747
748int bpf_obj_pin(int fd, const char *pathname)
749{
750 union bpf_attr attr = {
751 .pathname = ptr_to_u64((void *)pathname),
752 .bpf_fd = fd,
753 };
754
755 return syscall(__NR_bpf, BPF_OBJ_PIN, &attr, sizeof(attr));
756}
757
758int bpf_obj_get(const char *pathname)
759{
760 union bpf_attr attr = {
761 .pathname = ptr_to_u64((void *)pathname),
762 };
763
764 return syscall(__NR_bpf, BPF_OBJ_GET, &attr, sizeof(attr));
765}