blob: 7eb1cd74acca0ca54e509c54a2a628f4b97bdd8a [file] [log] [blame]
Jiri Pirko1d129d12015-01-19 16:56:29 +01001/*
2 * tc_bpf.c BPF common code
3 *
4 * This program is free software; you can distribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Daniel Borkmann <dborkman@redhat.com>
10 * Jiri Pirko <jiri@resnulli.us>
Daniel Borkmann11c39b52015-03-16 19:37:41 +010011 * Alexei Starovoitov <ast@plumgrid.com>
Jiri Pirko1d129d12015-01-19 16:56:29 +010012 */
13
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <string.h>
18#include <stdbool.h>
Daniel Borkmann473d7842015-05-29 15:15:44 +020019#include <stdint.h>
Jiri Pirko1d129d12015-01-19 16:56:29 +010020#include <errno.h>
Daniel Borkmann11c39b52015-03-16 19:37:41 +010021#include <fcntl.h>
22#include <stdarg.h>
Gustavo Zacarias5c5a0f32016-04-08 09:59:33 -030023#include <limits.h>
Jiri Pirko1d129d12015-01-19 16:56:29 +010024
Daniel Borkmann11c39b52015-03-16 19:37:41 +010025#ifdef HAVE_ELF
26#include <libelf.h>
27#include <gelf.h>
28#endif
29
Daniel Borkmann32e93fb2015-11-13 00:39:29 +010030#include <sys/types.h>
31#include <sys/stat.h>
32#include <sys/un.h>
33#include <sys/vfs.h>
34#include <sys/mount.h>
35#include <sys/syscall.h>
36#include <sys/sendfile.h>
37#include <sys/resource.h>
38
39#include <linux/bpf.h>
40#include <linux/filter.h>
41#include <linux/if_alg.h>
42
Daniel Borkmann8187b012016-01-12 02:03:08 +010043#include <arpa/inet.h>
44
Jiri Pirko1d129d12015-01-19 16:56:29 +010045#include "utils.h"
Daniel Borkmann6256f8c2015-04-01 17:57:44 +020046
47#include "bpf_elf.h"
48#include "bpf_scm.h"
49
Jiri Pirko1d129d12015-01-19 16:56:29 +010050#include "tc_util.h"
51#include "tc_bpf.h"
52
Nicolas Dichtel67584e32016-02-03 09:25:00 +010053#ifndef AF_ALG
54#define AF_ALG 38
55#endif
56
Daniel Borkmanne77fa412016-07-19 01:09:52 +020057#ifndef EM_BPF
58#define EM_BPF 247
59#endif
60
Daniel Borkmann32e93fb2015-11-13 00:39:29 +010061#ifdef HAVE_ELF
62static int bpf_obj_open(const char *path, enum bpf_prog_type type,
63 const char *sec, bool verbose);
64#else
65static int bpf_obj_open(const char *path, enum bpf_prog_type type,
66 const char *sec, bool verbose)
67{
68 fprintf(stderr, "No ELF library support compiled in.\n");
69 errno = ENOSYS;
70 return -1;
71}
72#endif
73
74static inline __u64 bpf_ptr_to_u64(const void *ptr)
75{
76 return (__u64)(unsigned long)ptr;
77}
78
79static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
80{
81#ifdef __NR_bpf
82 return syscall(__NR_bpf, cmd, attr, size);
83#else
84 fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
85 errno = ENOSYS;
86 return -1;
87#endif
88}
89
Daniel Borkmann91d88ee2015-11-26 15:38:45 +010090static int bpf_map_update(int fd, const void *key, const void *value,
91 uint64_t flags)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +010092{
Nicolas Dichtel67584e32016-02-03 09:25:00 +010093 union bpf_attr attr;
94
95 memset(&attr, 0, sizeof(attr));
96 attr.map_fd = fd;
97 attr.key = bpf_ptr_to_u64(key);
98 attr.value = bpf_ptr_to_u64(value);
99 attr.flags = flags;
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100100
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100101 return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100102}
103
104static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
105 char **bpf_string, bool *need_release,
106 const char separator)
Jiri Pirko1d129d12015-01-19 16:56:29 +0100107{
108 char sp;
109
110 if (from_file) {
111 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
112 char *tmp_string;
113 FILE *fp;
114
115 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
116 tmp_string = malloc(tmp_len);
117 if (tmp_string == NULL)
118 return -ENOMEM;
119
120 memset(tmp_string, 0, tmp_len);
121
122 fp = fopen(arg, "r");
123 if (fp == NULL) {
124 perror("Cannot fopen");
125 free(tmp_string);
126 return -ENOENT;
127 }
128
129 if (!fgets(tmp_string, tmp_len, fp)) {
130 free(tmp_string);
131 fclose(fp);
132 return -EIO;
133 }
134
135 fclose(fp);
136
137 *need_release = true;
138 *bpf_string = tmp_string;
139 } else {
140 *need_release = false;
141 *bpf_string = arg;
142 }
143
144 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
145 sp != separator) {
146 if (*need_release)
147 free(*bpf_string);
148 return -EINVAL;
149 }
150
151 return 0;
152}
153
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100154static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
155 bool from_file)
Jiri Pirko1d129d12015-01-19 16:56:29 +0100156{
157 char *bpf_string, *token, separator = ',';
158 int ret = 0, i = 0;
159 bool need_release;
160 __u16 bpf_len = 0;
161
162 if (argc < 1)
163 return -EINVAL;
164 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
165 &need_release, separator))
166 return -EINVAL;
167 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
168 ret = -EINVAL;
169 goto out;
170 }
171
172 token = bpf_string;
173 while ((token = strchr(token, separator)) && (++token)[0]) {
174 if (i >= bpf_len) {
Stephen Hemminger32a121c2016-03-21 11:48:36 -0700175 fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
Jiri Pirko1d129d12015-01-19 16:56:29 +0100176 ret = -EINVAL;
177 goto out;
178 }
179
180 if (sscanf(token, "%hu %hhu %hhu %u,",
181 &bpf_ops[i].code, &bpf_ops[i].jt,
182 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
183 fprintf(stderr, "Error at instruction %d!\n", i);
184 ret = -EINVAL;
185 goto out;
186 }
187
188 i++;
189 }
190
191 if (i != bpf_len) {
Daniel Borkmannafc1a202016-04-09 00:32:04 +0200192 fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
Jiri Pirko1d129d12015-01-19 16:56:29 +0100193 ret = -EINVAL;
194 goto out;
195 }
196 ret = bpf_len;
Jiri Pirko1d129d12015-01-19 16:56:29 +0100197out:
198 if (need_release)
199 free(bpf_string);
200
201 return ret;
202}
203
204void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
205{
206 struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
207 int i;
208
209 if (len == 0)
210 return;
211
212 fprintf(f, "bytecode \'%u,", len);
213
214 for (i = 0; i < len - 1; i++)
215 fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
216 ops[i].jf, ops[i].k);
217
Daniel Borkmann6256f8c2015-04-01 17:57:44 +0200218 fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
Jiri Pirko1d129d12015-01-19 16:56:29 +0100219 ops[i].jf, ops[i].k);
220}
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100221
Daniel Borkmannafc1a202016-04-09 00:32:04 +0200222static void bpf_map_pin_report(const struct bpf_elf_map *pin,
223 const struct bpf_elf_map *obj)
224{
225 fprintf(stderr, "Map specification differs from pinned file!\n");
226
227 if (obj->type != pin->type)
228 fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
229 obj->type, pin->type);
230 if (obj->size_key != pin->size_key)
231 fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
232 obj->size_key, pin->size_key);
233 if (obj->size_value != pin->size_value)
234 fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
235 obj->size_value, pin->size_value);
236 if (obj->max_elem != pin->max_elem)
237 fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
238 obj->max_elem, pin->max_elem);
Daniel Borkmann4dd3f502016-04-09 00:32:05 +0200239 if (obj->flags != pin->flags)
240 fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
241 obj->flags, pin->flags);
Daniel Borkmannafc1a202016-04-09 00:32:04 +0200242
243 fprintf(stderr, "\n");
244}
245
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100246static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
247 int length)
Daniel Borkmann9e607f22015-11-26 15:38:43 +0100248{
249 char file[PATH_MAX], buff[4096];
250 struct bpf_elf_map tmp, zero;
251 unsigned int val;
252 FILE *fp;
253
254 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
255
256 fp = fopen(file, "r");
257 if (!fp) {
258 fprintf(stderr, "No procfs support?!\n");
259 return -EIO;
260 }
261
262 memset(&tmp, 0, sizeof(tmp));
263 while (fgets(buff, sizeof(buff), fp)) {
264 if (sscanf(buff, "map_type:\t%u", &val) == 1)
265 tmp.type = val;
266 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
267 tmp.size_key = val;
268 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
269 tmp.size_value = val;
270 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
271 tmp.max_elem = val;
Daniel Borkmann4dd3f502016-04-09 00:32:05 +0200272 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
273 tmp.flags = val;
Daniel Borkmann9e607f22015-11-26 15:38:43 +0100274 }
275
276 fclose(fp);
277
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100278 if (!memcmp(&tmp, map, length)) {
Daniel Borkmann9e607f22015-11-26 15:38:43 +0100279 return 0;
280 } else {
281 memset(&zero, 0, sizeof(zero));
282 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
283 * so just accept it. We know we do have an eBPF fd and in this
284 * case, everything is 0. It is guaranteed that no such map exists
285 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
286 */
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100287 if (!memcmp(&tmp, &zero, length))
Daniel Borkmann9e607f22015-11-26 15:38:43 +0100288 return 0;
289
Daniel Borkmannafc1a202016-04-09 00:32:04 +0200290 bpf_map_pin_report(&tmp, map);
Daniel Borkmann9e607f22015-11-26 15:38:43 +0100291 return -EINVAL;
292 }
293}
294
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100295static int bpf_mnt_fs(const char *target)
296{
297 bool bind_done = false;
298
299 while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
300 if (errno != EINVAL || bind_done) {
301 fprintf(stderr, "mount --make-private %s failed: %s\n",
302 target, strerror(errno));
303 return -1;
304 }
305
306 if (mount(target, target, "none", MS_BIND, NULL)) {
307 fprintf(stderr, "mount --bind %s %s failed: %s\n",
308 target, target, strerror(errno));
309 return -1;
310 }
311
312 bind_done = true;
313 }
314
315 if (mount("bpf", target, "bpf", 0, NULL)) {
316 fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
317 target, strerror(errno));
318 return -1;
319 }
320
321 return 0;
322}
323
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100324static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
325{
326 struct statfs st_fs;
327
328 if (statfs(mnt, &st_fs) < 0)
329 return -ENOENT;
330 if ((unsigned long)st_fs.f_type != magic)
331 return -ENOENT;
332
333 return 0;
334}
335
336static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
337 char *mnt, int len,
338 const char * const *known_mnts)
339{
340 const char * const *ptr;
341 char type[100];
342 FILE *fp;
343
344 if (known_mnts) {
345 ptr = known_mnts;
346 while (*ptr) {
347 if (bpf_valid_mntpt(*ptr, magic) == 0) {
348 strncpy(mnt, *ptr, len - 1);
349 mnt[len - 1] = 0;
350 return mnt;
351 }
352 ptr++;
353 }
354 }
355
356 fp = fopen("/proc/mounts", "r");
357 if (fp == NULL || len != PATH_MAX)
358 return NULL;
359
360 while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
361 mnt, type) == 2) {
362 if (strcmp(type, fstype) == 0)
363 break;
364 }
365
366 fclose(fp);
367 if (strcmp(type, fstype) != 0)
368 return NULL;
369
370 return mnt;
371}
372
373int bpf_trace_pipe(void)
374{
375 char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
376 static const char * const tracefs_known_mnts[] = {
377 TRACE_DIR_MNT,
378 "/sys/kernel/debug/tracing",
379 "/tracing",
380 "/trace",
381 0,
382 };
383 char tpipe[PATH_MAX];
384 const char *mnt;
385 int fd;
386
387 mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
388 sizeof(tracefs_mnt), tracefs_known_mnts);
389 if (!mnt) {
390 fprintf(stderr, "tracefs not mounted?\n");
391 return -1;
392 }
393
394 snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
395
396 fd = open(tpipe, O_RDONLY);
397 if (fd < 0)
398 return -1;
399
400 fprintf(stderr, "Running! Hang up with ^C!\n\n");
401 while (1) {
402 static char buff[4096];
403 ssize_t ret;
404
405 ret = read(fd, buff, sizeof(buff) - 1);
406 if (ret > 0) {
407 write(2, buff, ret);
408 fflush(stderr);
409 }
410 }
411
412 return 0;
413}
414
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100415static const char *bpf_get_tc_dir(void)
416{
Stephen Hemminger32a121c2016-03-21 11:48:36 -0700417 static bool bpf_mnt_cached;
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100418 static char bpf_tc_dir[PATH_MAX];
419 static const char *mnt;
420 static const char * const bpf_known_mnts[] = {
421 BPF_DIR_MNT,
422 0,
423 };
424 char bpf_mnt[PATH_MAX] = BPF_DIR_MNT;
425 char bpf_glo_dir[PATH_MAX];
426 int ret;
427
428 if (bpf_mnt_cached)
429 goto done;
430
431 mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt),
432 bpf_known_mnts);
433 if (!mnt) {
434 mnt = getenv(BPF_ENV_MNT);
435 if (!mnt)
436 mnt = BPF_DIR_MNT;
437 ret = bpf_mnt_fs(mnt);
438 if (ret) {
439 mnt = NULL;
440 goto out;
441 }
442 }
443
444 snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC);
445 ret = mkdir(bpf_tc_dir, S_IRWXU);
446 if (ret && errno != EEXIST) {
447 fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir,
448 strerror(errno));
449 mnt = NULL;
450 goto out;
451 }
452
453 snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s",
454 bpf_tc_dir, BPF_DIR_GLOBALS);
455 ret = mkdir(bpf_glo_dir, S_IRWXU);
456 if (ret && errno != EEXIST) {
457 fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
458 strerror(errno));
459 mnt = NULL;
460 goto out;
461 }
462
463 mnt = bpf_tc_dir;
464out:
465 bpf_mnt_cached = true;
466done:
467 return mnt;
468}
469
470static int bpf_obj_get(const char *pathname)
471{
472 union bpf_attr attr;
473 char tmp[PATH_MAX];
474
475 if (strlen(pathname) > 2 && pathname[0] == 'm' &&
476 pathname[1] == ':' && bpf_get_tc_dir()) {
477 snprintf(tmp, sizeof(tmp), "%s/%s",
478 bpf_get_tc_dir(), pathname + 2);
479 pathname = tmp;
480 }
481
482 memset(&attr, 0, sizeof(attr));
483 attr.pathname = bpf_ptr_to_u64(pathname);
484
485 return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
486}
487
Daniel Borkmann6256f8c2015-04-01 17:57:44 +0200488const char *bpf_default_section(const enum bpf_prog_type type)
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100489{
490 switch (type) {
491 case BPF_PROG_TYPE_SCHED_CLS:
492 return ELF_SECTION_CLASSIFIER;
Daniel Borkmann6256f8c2015-04-01 17:57:44 +0200493 case BPF_PROG_TYPE_SCHED_ACT:
494 return ELF_SECTION_ACTION;
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100495 default:
496 return NULL;
497 }
498}
499
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100500enum bpf_mode {
501 CBPF_BYTECODE = 0,
502 CBPF_FILE,
503 EBPF_OBJECT,
504 EBPF_PINNED,
505 __BPF_MODE_MAX,
506#define BPF_MODE_MAX __BPF_MODE_MAX
507};
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100508
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100509static int bpf_parse(int *ptr_argc, char ***ptr_argv, const bool *opt_tbl,
510 enum bpf_prog_type *type, enum bpf_mode *mode,
511 const char **ptr_object, const char **ptr_section,
512 const char **ptr_uds_name, struct sock_filter *opcodes)
513{
514 const char *file, *section, *uds_name;
515 bool verbose = false;
516 int ret, argc;
517 char **argv;
518
519 argv = *ptr_argv;
520 argc = *ptr_argc;
521
522 if (opt_tbl[CBPF_BYTECODE] &&
523 (matches(*argv, "bytecode") == 0 ||
524 strcmp(*argv, "bc") == 0)) {
525 *mode = CBPF_BYTECODE;
526 } else if (opt_tbl[CBPF_FILE] &&
527 (matches(*argv, "bytecode-file") == 0 ||
528 strcmp(*argv, "bcf") == 0)) {
529 *mode = CBPF_FILE;
530 } else if (opt_tbl[EBPF_OBJECT] &&
531 (matches(*argv, "object-file") == 0 ||
532 strcmp(*argv, "obj") == 0)) {
533 *mode = EBPF_OBJECT;
534 } else if (opt_tbl[EBPF_PINNED] &&
535 (matches(*argv, "object-pinned") == 0 ||
536 matches(*argv, "pinned") == 0 ||
537 matches(*argv, "fd") == 0)) {
538 *mode = EBPF_PINNED;
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100539 } else {
540 fprintf(stderr, "What mode is \"%s\"?\n", *argv);
541 return -1;
542 }
543
544 NEXT_ARG();
545 file = section = uds_name = NULL;
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100546 if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100547 file = *argv;
548 NEXT_ARG_FWD();
549
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100550 if (*type == BPF_PROG_TYPE_UNSPEC) {
551 if (argc > 0 && matches(*argv, "type") == 0) {
552 NEXT_ARG();
553 if (matches(*argv, "cls") == 0) {
554 *type = BPF_PROG_TYPE_SCHED_CLS;
555 } else if (matches(*argv, "act") == 0) {
556 *type = BPF_PROG_TYPE_SCHED_ACT;
557 } else {
558 fprintf(stderr, "What type is \"%s\"?\n",
559 *argv);
560 return -1;
561 }
562 NEXT_ARG_FWD();
563 } else {
564 *type = BPF_PROG_TYPE_SCHED_CLS;
565 }
566 }
567
568 section = bpf_default_section(*type);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100569 if (argc > 0 && matches(*argv, "section") == 0) {
570 NEXT_ARG();
571 section = *argv;
572 NEXT_ARG_FWD();
573 }
574
575 uds_name = getenv(BPF_ENV_UDS);
576 if (argc > 0 && !uds_name &&
577 matches(*argv, "export") == 0) {
578 NEXT_ARG();
579 uds_name = *argv;
580 NEXT_ARG_FWD();
581 }
582
583 if (argc > 0 && matches(*argv, "verbose") == 0) {
584 verbose = true;
585 NEXT_ARG_FWD();
586 }
587
588 PREV_ARG();
589 }
590
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100591 if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
592 ret = bpf_ops_parse(argc, argv, opcodes, *mode == CBPF_FILE);
593 else if (*mode == EBPF_OBJECT)
594 ret = bpf_obj_open(file, *type, section, verbose);
595 else if (*mode == EBPF_PINNED)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100596 ret = bpf_obj_get(file);
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100597 else
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100598 return -1;
599
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100600 if (ptr_object)
601 *ptr_object = file;
602 if (ptr_section)
603 *ptr_section = section;
604 if (ptr_uds_name)
605 *ptr_uds_name = uds_name;
606
607 *ptr_argc = argc;
608 *ptr_argv = argv;
609
610 return ret;
611}
612
613int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
614 enum bpf_prog_type type, const char **ptr_object,
615 const char **ptr_uds_name, struct nlmsghdr *n)
616{
617 struct sock_filter opcodes[BPF_MAXINSNS];
618 const bool opt_tbl[BPF_MODE_MAX] = {
619 [CBPF_BYTECODE] = true,
620 [CBPF_FILE] = true,
621 [EBPF_OBJECT] = true,
622 [EBPF_PINNED] = true,
623 };
624 char annotation[256];
625 const char *section;
626 enum bpf_mode mode;
627 int ret;
628
629 ret = bpf_parse(ptr_argc, ptr_argv, opt_tbl, &type, &mode,
630 ptr_object, &section, ptr_uds_name, opcodes);
631 if (ret < 0)
632 return ret;
633
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100634 if (mode == CBPF_BYTECODE || mode == CBPF_FILE) {
635 addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret);
636 addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes,
637 ret * sizeof(struct sock_filter));
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100638 }
639
640 if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100641 snprintf(annotation, sizeof(annotation), "%s:[%s]",
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100642 basename(*ptr_object), mode == EBPF_PINNED ?
643 "*fsobj" : section);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100644
645 addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret);
646 addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation);
647 }
648
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100649 return 0;
650}
651
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100652int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
653{
654 enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
655 const bool opt_tbl[BPF_MODE_MAX] = {
656 [CBPF_BYTECODE] = false,
657 [CBPF_FILE] = false,
658 [EBPF_OBJECT] = true,
659 [EBPF_PINNED] = true,
660 };
661 const struct bpf_elf_map test = {
662 .type = BPF_MAP_TYPE_PROG_ARRAY,
663 .size_key = sizeof(int),
664 .size_value = sizeof(int),
665 };
666 int ret, prog_fd, map_fd;
667 const char *section;
668 enum bpf_mode mode;
669 uint32_t map_key;
670
671 prog_fd = bpf_parse(&argc, &argv, opt_tbl, &type, &mode,
672 NULL, &section, NULL, NULL);
673 if (prog_fd < 0)
674 return prog_fd;
675 if (key) {
676 map_key = *key;
677 } else {
678 ret = sscanf(section, "%*i/%i", &map_key);
679 if (ret != 1) {
Stephen Hemminger32a121c2016-03-21 11:48:36 -0700680 fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
Daniel Borkmann91d88ee2015-11-26 15:38:45 +0100681 ret = -EINVAL;
682 goto out_prog;
683 }
684 }
685
686 map_fd = bpf_obj_get(map_path);
687 if (map_fd < 0) {
688 fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
689 map_path, strerror(errno));
690 ret = map_fd;
691 goto out_prog;
692 }
693
694 ret = bpf_map_selfcheck_pinned(map_fd, &test,
695 offsetof(struct bpf_elf_map, max_elem));
696 if (ret < 0) {
697 fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
698 goto out_map;
699 }
700
701 ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
702 if (ret < 0)
703 fprintf(stderr, "Map update failed: %s\n", strerror(errno));
704out_map:
705 close(map_fd);
706out_prog:
707 close(prog_fd);
708 return ret;
709}
710
Daniel Borkmann6256f8c2015-04-01 17:57:44 +0200711#ifdef HAVE_ELF
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100712struct bpf_elf_prog {
713 enum bpf_prog_type type;
714 const struct bpf_insn *insns;
715 size_t size;
716 const char *license;
717};
718
Daniel Borkmannf6793ee2015-11-26 15:38:44 +0100719struct bpf_hash_entry {
720 unsigned int pinning;
721 const char *subpath;
722 struct bpf_hash_entry *next;
723};
724
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100725struct bpf_elf_ctx {
726 Elf *elf_fd;
727 GElf_Ehdr elf_hdr;
728 Elf_Data *sym_tab;
729 Elf_Data *str_tab;
730 int obj_fd;
731 int map_fds[ELF_MAX_MAPS];
732 struct bpf_elf_map maps[ELF_MAX_MAPS];
733 int sym_num;
734 int map_num;
735 bool *sec_done;
736 int sec_maps;
737 char license[ELF_MAX_LICENSE_LEN];
738 enum bpf_prog_type type;
739 bool verbose;
740 struct bpf_elf_st stat;
Daniel Borkmannf6793ee2015-11-26 15:38:44 +0100741 struct bpf_hash_entry *ht[256];
Daniel Borkmannf31645d2016-02-07 02:11:51 +0100742 char *log;
743 size_t log_size;
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100744};
745
Daniel Borkmann6256f8c2015-04-01 17:57:44 +0200746struct bpf_elf_sec_data {
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100747 GElf_Shdr sec_hdr;
748 Elf_Data *sec_data;
749 const char *sec_name;
Daniel Borkmann6256f8c2015-04-01 17:57:44 +0200750};
751
752struct bpf_map_data {
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100753 int *fds;
754 const char *obj;
755 struct bpf_elf_st *st;
756 struct bpf_elf_map *ent;
Daniel Borkmann6256f8c2015-04-01 17:57:44 +0200757};
758
Daniel Borkmannf31645d2016-02-07 02:11:51 +0100759static __check_format_string(2, 3) void
760bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100761{
762 va_list vl;
763
764 va_start(vl, format);
765 vfprintf(stderr, format, vl);
766 va_end(vl);
767
Daniel Borkmannf31645d2016-02-07 02:11:51 +0100768 if (ctx->log && ctx->log[0]) {
Daniel Borkmannafc1a202016-04-09 00:32:04 +0200769 if (ctx->verbose) {
770 fprintf(stderr, "%s\n", ctx->log);
771 } else {
772 unsigned int off = 0, len = strlen(ctx->log);
773
774 if (len > BPF_MAX_LOG) {
775 off = len - BPF_MAX_LOG;
776 fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
777 off);
778 }
779 fprintf(stderr, "%s\n", ctx->log + off);
780 }
781
Daniel Borkmannf31645d2016-02-07 02:11:51 +0100782 memset(ctx->log, 0, ctx->log_size);
Daniel Borkmannd937a742015-04-28 13:37:42 +0200783 }
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100784}
785
Daniel Borkmannf31645d2016-02-07 02:11:51 +0100786static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
787{
788 size_t log_size = ctx->log_size;
789 void *ptr;
790
791 if (!ctx->log) {
792 log_size = 65536;
793 } else {
794 log_size <<= 1;
795 if (log_size > (UINT_MAX >> 8))
796 return -EINVAL;
797 }
798
799 ptr = realloc(ctx->log, log_size);
800 if (!ptr)
801 return -ENOMEM;
802
803 ctx->log = ptr;
804 ctx->log_size = log_size;
805
806 return 0;
807}
808
Daniel Borkmann4dd3f502016-04-09 00:32:05 +0200809static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
810 uint32_t size_value, uint32_t max_elem,
811 uint32_t flags)
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100812{
Nicolas Dichtel67584e32016-02-03 09:25:00 +0100813 union bpf_attr attr;
814
815 memset(&attr, 0, sizeof(attr));
816 attr.map_type = type;
817 attr.key_size = size_key;
818 attr.value_size = size_value;
819 attr.max_entries = max_elem;
Daniel Borkmann4dd3f502016-04-09 00:32:05 +0200820 attr.map_flags = flags;
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100821
822 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
823}
824
825static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
Daniel Borkmannf31645d2016-02-07 02:11:51 +0100826 size_t size_insns, const char *license, char *log,
827 size_t size_log)
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100828{
Nicolas Dichtel67584e32016-02-03 09:25:00 +0100829 union bpf_attr attr;
830
831 memset(&attr, 0, sizeof(attr));
832 attr.prog_type = type;
833 attr.insns = bpf_ptr_to_u64(insns);
Daniel Borkmannf31645d2016-02-07 02:11:51 +0100834 attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
Nicolas Dichtel67584e32016-02-03 09:25:00 +0100835 attr.license = bpf_ptr_to_u64(license);
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100836
Daniel Borkmannf31645d2016-02-07 02:11:51 +0100837 if (size_log > 0) {
838 attr.log_buf = bpf_ptr_to_u64(log);
839 attr.log_size = size_log;
840 attr.log_level = 1;
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100841 }
842
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100843 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
844}
845
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100846static int bpf_obj_pin(int fd, const char *pathname)
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100847{
Nicolas Dichtel67584e32016-02-03 09:25:00 +0100848 union bpf_attr attr;
849
850 memset(&attr, 0, sizeof(attr));
851 attr.pathname = bpf_ptr_to_u64(pathname);
852 attr.bpf_fd = fd;
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100853
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100854 return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
855}
856
857static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
858{
859 struct sockaddr_alg alg = {
860 .salg_family = AF_ALG,
861 .salg_type = "hash",
862 .salg_name = "sha1",
863 };
864 int ret, cfd, ofd, ffd;
865 struct stat stbuff;
866 ssize_t size;
867
868 if (!object || len != 20)
869 return -EINVAL;
870
871 cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
872 if (cfd < 0) {
873 fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
874 strerror(errno));
875 return cfd;
Daniel Borkmannd937a742015-04-28 13:37:42 +0200876 }
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100877
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100878 ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
879 if (ret < 0) {
880 fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
881 goto out_cfd;
Daniel Borkmann6256f8c2015-04-01 17:57:44 +0200882 }
883
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100884 ofd = accept(cfd, NULL, 0);
885 if (ofd < 0) {
886 fprintf(stderr, "Error accepting socket: %s\n",
887 strerror(errno));
888 ret = ofd;
889 goto out_cfd;
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100890 }
891
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100892 ffd = open(object, O_RDONLY);
893 if (ffd < 0) {
894 fprintf(stderr, "Error opening object %s: %s\n",
895 object, strerror(errno));
896 ret = ffd;
897 goto out_ofd;
898 }
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100899
Stephen Hemminger32a121c2016-03-21 11:48:36 -0700900 ret = fstat(ffd, &stbuff);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100901 if (ret < 0) {
902 fprintf(stderr, "Error doing fstat: %s\n",
903 strerror(errno));
904 goto out_ffd;
905 }
906
907 size = sendfile(ofd, ffd, NULL, stbuff.st_size);
908 if (size != stbuff.st_size) {
909 fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
910 size, stbuff.st_size, strerror(errno));
911 ret = -1;
912 goto out_ffd;
913 }
914
915 size = read(ofd, out, len);
916 if (size != len) {
917 fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
918 size, len, strerror(errno));
919 ret = -1;
920 } else {
921 ret = 0;
922 }
923out_ffd:
924 close(ffd);
925out_ofd:
926 close(ofd);
927out_cfd:
928 close(cfd);
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100929 return ret;
930}
931
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100932static const char *bpf_get_obj_uid(const char *pathname)
Daniel Borkmann11c39b52015-03-16 19:37:41 +0100933{
Stephen Hemminger32a121c2016-03-21 11:48:36 -0700934 static bool bpf_uid_cached;
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100935 static char bpf_uid[64];
936 uint8_t tmp[20];
937 int ret;
938
939 if (bpf_uid_cached)
940 goto done;
941
942 ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
943 if (ret) {
944 fprintf(stderr, "Object hashing failed!\n");
945 return NULL;
946 }
947
948 hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
949 bpf_uid_cached = true;
950done:
951 return bpf_uid;
952}
953
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100954static int bpf_init_env(const char *pathname)
955{
956 struct rlimit limit = {
957 .rlim_cur = RLIM_INFINITY,
958 .rlim_max = RLIM_INFINITY,
959 };
960
961 /* Don't bother in case we fail! */
962 setrlimit(RLIMIT_MEMLOCK, &limit);
963
964 if (!bpf_get_tc_dir()) {
Stephen Hemminger32a121c2016-03-21 11:48:36 -0700965 fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100966 return 0;
967 }
968
969 if (!bpf_get_obj_uid(pathname))
970 return -1;
971
972 return 0;
973}
974
Daniel Borkmannf6793ee2015-11-26 15:38:44 +0100975static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
976 uint32_t pinning)
977{
978 struct bpf_hash_entry *entry;
979
980 entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
981 while (entry && entry->pinning != pinning)
982 entry = entry->next;
983
984 return entry ? entry->subpath : NULL;
985}
986
987static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
988 uint32_t pinning)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100989{
990 switch (pinning) {
991 case PIN_OBJECT_NS:
992 case PIN_GLOBAL_NS:
993 return false;
994 case PIN_NONE:
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100995 return true;
Daniel Borkmannf6793ee2015-11-26 15:38:44 +0100996 default:
997 return !bpf_custom_pinning(ctx, pinning);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +0100998 }
999}
1000
1001static void bpf_make_pathname(char *pathname, size_t len, const char *name,
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001002 const struct bpf_elf_ctx *ctx, uint32_t pinning)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001003{
1004 switch (pinning) {
1005 case PIN_OBJECT_NS:
1006 snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
1007 bpf_get_obj_uid(NULL), name);
1008 break;
1009 case PIN_GLOBAL_NS:
1010 snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
1011 BPF_DIR_GLOBALS, name);
1012 break;
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001013 default:
1014 snprintf(pathname, len, "%s/../%s/%s", bpf_get_tc_dir(),
1015 bpf_custom_pinning(ctx, pinning), name);
1016 break;
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001017 }
1018}
1019
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001020static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
1021 uint32_t pinning)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001022{
1023 char pathname[PATH_MAX];
1024
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001025 if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001026 return 0;
1027
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001028 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001029 return bpf_obj_get(pathname);
1030}
1031
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001032static int bpf_make_obj_path(void)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001033{
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001034 char tmp[PATH_MAX];
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001035 int ret;
1036
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001037 snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_tc_dir(),
1038 bpf_get_obj_uid(NULL));
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001039
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001040 ret = mkdir(tmp, S_IRWXU);
1041 if (ret && errno != EEXIST) {
1042 fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
1043 return ret;
1044 }
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001045
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001046 return 0;
1047}
1048
1049static int bpf_make_custom_path(const char *todo)
1050{
1051 char tmp[PATH_MAX], rem[PATH_MAX], *sub;
1052 int ret;
1053
1054 snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_tc_dir());
1055 snprintf(rem, sizeof(rem), "%s/", todo);
1056 sub = strtok(rem, "/");
1057
1058 while (sub) {
1059 if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
1060 return -EINVAL;
1061
1062 strcat(tmp, sub);
1063 strcat(tmp, "/");
1064
1065 ret = mkdir(tmp, S_IRWXU);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001066 if (ret && errno != EEXIST) {
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001067 fprintf(stderr, "mkdir %s failed: %s\n", tmp,
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001068 strerror(errno));
1069 return ret;
1070 }
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001071
1072 sub = strtok(NULL, "/");
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001073 }
1074
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001075 return 0;
1076}
1077
1078static int bpf_place_pinned(int fd, const char *name,
1079 const struct bpf_elf_ctx *ctx, uint32_t pinning)
1080{
1081 char pathname[PATH_MAX];
1082 const char *tmp;
1083 int ret = 0;
1084
1085 if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
1086 return 0;
1087
1088 if (pinning == PIN_OBJECT_NS)
1089 ret = bpf_make_obj_path();
1090 else if ((tmp = bpf_custom_pinning(ctx, pinning)))
1091 ret = bpf_make_custom_path(tmp);
1092 if (ret < 0)
1093 return ret;
1094
1095 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001096 return bpf_obj_pin(fd, pathname);
1097}
1098
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001099static void bpf_prog_report(int fd, const char *section,
1100 const struct bpf_elf_prog *prog,
1101 struct bpf_elf_ctx *ctx)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001102{
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001103 unsigned int insns = prog->size / sizeof(struct bpf_insn);
1104
1105 fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001106 fd < 0 ? "rejected: " : "loaded",
1107 fd < 0 ? strerror(errno) : "",
1108 fd < 0 ? errno : fd);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001109
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001110 fprintf(stderr, " - Type: %u\n", prog->type);
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001111 fprintf(stderr, " - Instructions: %u (%u over limit)\n",
1112 insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001113 fprintf(stderr, " - License: %s\n\n", prog->license);
1114
1115 bpf_dump_error(ctx, "Verifier analysis:\n\n");
1116}
1117
1118static int bpf_prog_attach(const char *section,
1119 const struct bpf_elf_prog *prog,
1120 struct bpf_elf_ctx *ctx)
1121{
1122 int tries = 0, fd;
1123retry:
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001124 errno = 0;
1125 fd = bpf_prog_load(prog->type, prog->insns, prog->size,
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001126 prog->license, ctx->log, ctx->log_size);
1127 if (fd < 0 || ctx->verbose) {
1128 /* The verifier log is pretty chatty, sometimes so chatty
1129 * on larger programs, that we could fail to dump everything
1130 * into our buffer. Still, try to give a debuggable error
1131 * log for the user, so enlarge it and re-fail.
1132 */
1133 if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) {
1134 if (tries++ < 6 && !bpf_log_realloc(ctx))
1135 goto retry;
1136
Stephen Hemminger32a121c2016-03-21 11:48:36 -07001137 fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001138 ctx->log_size, tries);
1139 return fd;
1140 }
1141
1142 bpf_prog_report(fd, section, prog, ctx);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001143 }
1144
1145 return fd;
1146}
1147
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001148static void bpf_map_report(int fd, const char *name,
1149 const struct bpf_elf_map *map,
1150 struct bpf_elf_ctx *ctx)
1151{
1152 fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
1153 fd < 0 ? "rejected: " : "loaded",
1154 fd < 0 ? strerror(errno) : "",
1155 fd < 0 ? errno : fd);
1156
1157 fprintf(stderr, " - Type: %u\n", map->type);
1158 fprintf(stderr, " - Identifier: %u\n", map->id);
1159 fprintf(stderr, " - Pinning: %u\n", map->pinning);
1160 fprintf(stderr, " - Size key: %u\n", map->size_key);
1161 fprintf(stderr, " - Size value: %u\n", map->size_value);
Daniel Borkmann4dd3f502016-04-09 00:32:05 +02001162 fprintf(stderr, " - Max elems: %u\n", map->max_elem);
1163 fprintf(stderr, " - Flags: %#x\n\n", map->flags);
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001164}
1165
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001166static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001167 struct bpf_elf_ctx *ctx)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001168{
1169 int fd, ret;
1170
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001171 fd = bpf_probe_pinned(name, ctx, map->pinning);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001172 if (fd > 0) {
Daniel Borkmann91d88ee2015-11-26 15:38:45 +01001173 ret = bpf_map_selfcheck_pinned(fd, map,
1174 offsetof(struct bpf_elf_map,
1175 id));
Daniel Borkmann9e607f22015-11-26 15:38:43 +01001176 if (ret < 0) {
1177 close(fd);
1178 fprintf(stderr, "Map \'%s\' self-check failed!\n",
1179 name);
1180 return ret;
1181 }
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001182 if (ctx->verbose)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001183 fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
1184 name);
1185 return fd;
1186 }
1187
1188 errno = 0;
1189 fd = bpf_map_create(map->type, map->size_key, map->size_value,
Daniel Borkmann4dd3f502016-04-09 00:32:05 +02001190 map->max_elem, map->flags);
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001191 if (fd < 0 || ctx->verbose) {
1192 bpf_map_report(fd, name, map, ctx);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001193 if (fd < 0)
1194 return fd;
1195 }
1196
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001197 ret = bpf_place_pinned(fd, name, ctx, map->pinning);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001198 if (ret < 0 && errno != EEXIST) {
1199 fprintf(stderr, "Could not pin %s map: %s\n", name,
1200 strerror(errno));
1201 close(fd);
1202 return ret;
1203 }
1204
1205 return fd;
1206}
1207
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001208static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
1209 const GElf_Sym *sym)
1210{
1211 return ctx->str_tab->d_buf + sym->st_name;
1212}
1213
1214static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
1215{
1216 GElf_Sym sym;
1217 int i;
1218
1219 for (i = 0; i < ctx->sym_num; i++) {
1220 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
1221 continue;
1222
Daniel Borkmann5230a2e2016-02-07 02:11:53 +01001223 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
1224 GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001225 sym.st_shndx != ctx->sec_maps ||
1226 sym.st_value / sizeof(struct bpf_elf_map) != which)
1227 continue;
1228
1229 return bpf_str_tab_name(ctx, &sym);
1230 }
1231
1232 return NULL;
1233}
1234
1235static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
1236{
1237 const char *map_name;
1238 int i, fd;
1239
1240 for (i = 0; i < ctx->map_num; i++) {
1241 map_name = bpf_map_fetch_name(ctx, i);
1242 if (!map_name)
1243 return -EIO;
1244
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001245 fd = bpf_map_attach(map_name, &ctx->maps[i], ctx);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001246 if (fd < 0)
1247 return fd;
1248
1249 ctx->map_fds[i] = fd;
1250 }
1251
1252 return 0;
1253}
1254
1255static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
1256 struct bpf_elf_sec_data *data)
1257{
1258 Elf_Data *sec_edata;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001259 GElf_Shdr sec_hdr;
1260 Elf_Scn *sec_fd;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001261 char *sec_name;
1262
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001263 memset(data, 0, sizeof(*data));
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001264
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001265 sec_fd = elf_getscn(ctx->elf_fd, section);
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001266 if (!sec_fd)
1267 return -EINVAL;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001268 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
1269 return -EIO;
1270
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001271 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001272 sec_hdr.sh_name);
1273 if (!sec_name || !sec_hdr.sh_size)
1274 return -ENOENT;
1275
1276 sec_edata = elf_getdata(sec_fd, NULL);
1277 if (!sec_edata || elf_getdata(sec_fd, sec_edata))
1278 return -EIO;
1279
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001280 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001281
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001282 data->sec_name = sec_name;
1283 data->sec_data = sec_edata;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001284 return 0;
1285}
1286
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001287static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section,
1288 struct bpf_elf_sec_data *data)
1289{
1290 if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0)
1291 return -EINVAL;
1292
1293 ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map);
1294 ctx->sec_maps = section;
1295 ctx->sec_done[section] = true;
1296
1297 if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) {
1298 fprintf(stderr, "Too many BPF maps in ELF section!\n");
1299 return -ENOMEM;
1300 }
1301
1302 memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size);
1303 return 0;
1304}
1305
1306static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
1307 struct bpf_elf_sec_data *data)
1308{
1309 if (data->sec_data->d_size > sizeof(ctx->license))
1310 return -ENOMEM;
1311
1312 memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
1313 ctx->sec_done[section] = true;
1314 return 0;
1315}
1316
1317static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
1318 struct bpf_elf_sec_data *data)
1319{
1320 ctx->sym_tab = data->sec_data;
1321 ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
1322 ctx->sec_done[section] = true;
1323 return 0;
1324}
1325
1326static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
1327 struct bpf_elf_sec_data *data)
1328{
1329 ctx->str_tab = data->sec_data;
1330 ctx->sec_done[section] = true;
1331 return 0;
1332}
1333
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001334static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
1335{
1336 return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
1337}
1338
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001339static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
1340{
1341 struct bpf_elf_sec_data data;
1342 int i, ret = -1;
1343
1344 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1345 ret = bpf_fill_section_data(ctx, i, &data);
1346 if (ret < 0)
1347 continue;
1348
Daniel Borkmanncce3d462016-01-12 02:03:07 +01001349 if (data.sec_hdr.sh_type == SHT_PROGBITS &&
1350 !strcmp(data.sec_name, ELF_SECTION_MAPS))
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001351 ret = bpf_fetch_maps(ctx, i, &data);
Daniel Borkmanncce3d462016-01-12 02:03:07 +01001352 else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
1353 !strcmp(data.sec_name, ELF_SECTION_LICENSE))
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001354 ret = bpf_fetch_license(ctx, i, &data);
Daniel Borkmanncce3d462016-01-12 02:03:07 +01001355 else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
1356 !strcmp(data.sec_name, ".symtab"))
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001357 ret = bpf_fetch_symtab(ctx, i, &data);
1358 else if (data.sec_hdr.sh_type == SHT_STRTAB &&
Daniel Borkmanncce3d462016-01-12 02:03:07 +01001359 !strcmp(data.sec_name, ".strtab"))
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001360 ret = bpf_fetch_strtab(ctx, i, &data);
1361 if (ret < 0) {
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001362 fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
Stephen Hemminger32a121c2016-03-21 11:48:36 -07001363 i);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001364 break;
1365 }
1366 }
1367
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001368 if (bpf_has_map_data(ctx)) {
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001369 ret = bpf_maps_attach_all(ctx);
1370 if (ret < 0) {
1371 fprintf(stderr, "Error loading maps into kernel!\n");
1372 return ret;
1373 }
1374 }
1375
1376 return ret;
1377}
1378
1379static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section)
1380{
1381 struct bpf_elf_sec_data data;
1382 struct bpf_elf_prog prog;
1383 int ret, i, fd = -1;
1384
1385 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1386 if (ctx->sec_done[i])
1387 continue;
1388
1389 ret = bpf_fill_section_data(ctx, i, &data);
Daniel Borkmanncce3d462016-01-12 02:03:07 +01001390 if (ret < 0 ||
1391 !(data.sec_hdr.sh_type == SHT_PROGBITS &&
1392 data.sec_hdr.sh_flags & SHF_EXECINSTR &&
1393 !strcmp(data.sec_name, section)))
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001394 continue;
1395
1396 memset(&prog, 0, sizeof(prog));
1397 prog.type = ctx->type;
1398 prog.insns = data.sec_data->d_buf;
1399 prog.size = data.sec_data->d_size;
1400 prog.license = ctx->license;
1401
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001402 fd = bpf_prog_attach(section, &prog, ctx);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001403 if (fd < 0)
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001404 break;
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001405
1406 ctx->sec_done[i] = true;
1407 break;
1408 }
1409
1410 return fd;
1411}
1412
1413static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
1414 struct bpf_elf_sec_data *data_relo,
1415 struct bpf_elf_sec_data *data_insn)
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001416{
1417 Elf_Data *idata = data_insn->sec_data;
1418 GElf_Shdr *rhdr = &data_relo->sec_hdr;
1419 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
1420 struct bpf_insn *insns = idata->d_buf;
1421 unsigned int num_insns = idata->d_size / sizeof(*insns);
1422
1423 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001424 unsigned int ioff, rmap;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001425 GElf_Rel relo;
1426 GElf_Sym sym;
1427
1428 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
1429 return -EIO;
1430
1431 ioff = relo.r_offset / sizeof(struct bpf_insn);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001432 if (ioff >= num_insns ||
Daniel Borkmanna576c6b2016-02-07 02:11:52 +01001433 insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
Stephen Hemminger32a121c2016-03-21 11:48:36 -07001434 fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
Daniel Borkmanna576c6b2016-02-07 02:11:52 +01001435 ioff);
1436 if (ioff < num_insns &&
1437 insns[ioff].code == (BPF_JMP | BPF_CALL))
Stephen Hemminger32a121c2016-03-21 11:48:36 -07001438 fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001439 return -EINVAL;
Daniel Borkmanna576c6b2016-02-07 02:11:52 +01001440 }
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001441
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001442 if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001443 return -EIO;
Daniel Borkmann24863372016-01-22 00:46:28 +01001444 if (sym.st_shndx != ctx->sec_maps) {
Stephen Hemminger32a121c2016-03-21 11:48:36 -07001445 fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
Daniel Borkmann24863372016-01-22 00:46:28 +01001446 relo_ent, sym.st_shndx);
1447 return -EIO;
1448 }
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001449
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001450 rmap = sym.st_value / sizeof(struct bpf_elf_map);
1451 if (rmap >= ARRAY_SIZE(ctx->map_fds))
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001452 return -EINVAL;
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001453 if (!ctx->map_fds[rmap])
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001454 return -EINVAL;
1455
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001456 if (ctx->verbose)
Stephen Hemminger32a121c2016-03-21 11:48:36 -07001457 fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001458 bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
1459 data_insn->sec_name, ioff);
1460
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001461 insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001462 insns[ioff].imm = ctx->map_fds[rmap];
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001463 }
1464
1465 return 0;
1466}
1467
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001468static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
1469 bool *lderr)
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001470{
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001471 struct bpf_elf_sec_data data_relo, data_insn;
1472 struct bpf_elf_prog prog;
1473 int ret, idx, i, fd = -1;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001474
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001475 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1476 ret = bpf_fill_section_data(ctx, i, &data_relo);
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001477 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
1478 continue;
1479
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001480 idx = data_relo.sec_hdr.sh_info;
1481 ret = bpf_fill_section_data(ctx, idx, &data_insn);
Daniel Borkmanncce3d462016-01-12 02:03:07 +01001482 if (ret < 0 ||
1483 !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
1484 data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
1485 !strcmp(data_insn.sec_name, section)))
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001486 continue;
1487
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001488 ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001489 if (ret < 0)
1490 continue;
1491
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001492 memset(&prog, 0, sizeof(prog));
1493 prog.type = ctx->type;
1494 prog.insns = data_insn.sec_data->d_buf;
1495 prog.size = data_insn.sec_data->d_size;
1496 prog.license = ctx->license;
1497
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001498 fd = bpf_prog_attach(section, &prog, ctx);
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001499 if (fd < 0) {
1500 *lderr = true;
1501 break;
1502 }
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001503
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001504 ctx->sec_done[i] = true;
1505 ctx->sec_done[idx] = true;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001506 break;
1507 }
1508
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001509 return fd;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001510}
1511
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001512static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
Daniel Borkmann473d7842015-05-29 15:15:44 +02001513{
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001514 bool lderr = false;
Daniel Borkmann473d7842015-05-29 15:15:44 +02001515 int ret = -1;
1516
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001517 if (bpf_has_map_data(ctx))
1518 ret = bpf_fetch_prog_relo(ctx, section, &lderr);
1519 if (ret < 0 && !lderr)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001520 ret = bpf_fetch_prog(ctx, section);
1521
Daniel Borkmann473d7842015-05-29 15:15:44 +02001522 return ret;
1523}
1524
Daniel Borkmann910b5432015-11-26 15:38:42 +01001525static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
1526{
1527 int i;
1528
1529 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
1530 if (ctx->map_fds[i] && ctx->maps[i].id == id &&
1531 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
1532 return i;
1533 return -1;
1534}
1535
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001536static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
Daniel Borkmann473d7842015-05-29 15:15:44 +02001537{
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001538 struct bpf_elf_sec_data data;
1539 uint32_t map_id, key_id;
Daniel Borkmann910b5432015-11-26 15:38:42 +01001540 int fd, i, ret, idx;
Daniel Borkmann473d7842015-05-29 15:15:44 +02001541
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001542 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1543 if (ctx->sec_done[i])
Daniel Borkmann473d7842015-05-29 15:15:44 +02001544 continue;
1545
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001546 ret = bpf_fill_section_data(ctx, i, &data);
Daniel Borkmann473d7842015-05-29 15:15:44 +02001547 if (ret < 0)
1548 continue;
1549
Daniel Borkmann910b5432015-11-26 15:38:42 +01001550 ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
1551 if (ret != 2)
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001552 continue;
Daniel Borkmann910b5432015-11-26 15:38:42 +01001553
1554 idx = bpf_find_map_by_id(ctx, map_id);
1555 if (idx < 0)
Daniel Borkmann473d7842015-05-29 15:15:44 +02001556 continue;
1557
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001558 fd = bpf_fetch_prog_sec(ctx, data.sec_name);
1559 if (fd < 0)
Daniel Borkmann473d7842015-05-29 15:15:44 +02001560 return -EIO;
1561
Daniel Borkmann910b5432015-11-26 15:38:42 +01001562 ret = bpf_map_update(ctx->map_fds[idx], &key_id,
1563 &fd, BPF_ANY);
Daniel Borkmannafc1a202016-04-09 00:32:04 +02001564 if (ret < 0) {
1565 if (errno == E2BIG)
1566 fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
1567 key_id, map_id);
1568 return -errno;
1569 }
Daniel Borkmann473d7842015-05-29 15:15:44 +02001570
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001571 ctx->sec_done[i] = true;
Daniel Borkmann473d7842015-05-29 15:15:44 +02001572 }
1573
1574 return 0;
1575}
1576
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001577static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001578{
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001579 struct stat st;
1580 int ret;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001581
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001582 memset(&ctx->stat, 0, sizeof(ctx->stat));
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001583
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001584 ret = fstat(ctx->obj_fd, &st);
1585 if (ret < 0) {
1586 fprintf(stderr, "Stat of elf file failed: %s\n",
1587 strerror(errno));
1588 return;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001589 }
1590
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001591 ctx->stat.st_dev = st.st_dev;
1592 ctx->stat.st_ino = st.st_ino;
1593}
1594
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001595static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
1596{
1597 char buff[PATH_MAX];
1598
1599 while (fgets(buff, sizeof(buff), fp)) {
1600 char *ptr = buff;
1601
1602 while (*ptr == ' ' || *ptr == '\t')
1603 ptr++;
1604
1605 if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
1606 continue;
1607
1608 if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
1609 sscanf(ptr, "%i %s #", id, path) != 2) {
1610 strcpy(path, ptr);
1611 return -1;
1612 }
1613
1614 return 1;
1615 }
1616
1617 return 0;
1618}
1619
1620static bool bpf_pinning_reserved(uint32_t pinning)
1621{
1622 switch (pinning) {
1623 case PIN_NONE:
1624 case PIN_OBJECT_NS:
1625 case PIN_GLOBAL_NS:
1626 return true;
1627 default:
1628 return false;
1629 }
1630}
1631
1632static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
1633{
1634 struct bpf_hash_entry *entry;
1635 char subpath[PATH_MAX];
1636 uint32_t pinning;
1637 FILE *fp;
1638 int ret;
1639
1640 fp = fopen(db_file, "r");
1641 if (!fp)
1642 return;
1643
1644 memset(subpath, 0, sizeof(subpath));
1645 while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
1646 if (ret == -1) {
1647 fprintf(stderr, "Database %s is corrupted at: %s\n",
1648 db_file, subpath);
1649 fclose(fp);
1650 return;
1651 }
1652
1653 if (bpf_pinning_reserved(pinning)) {
Stephen Hemminger32a121c2016-03-21 11:48:36 -07001654 fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
1655 db_file, pinning);
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001656 continue;
1657 }
1658
1659 entry = malloc(sizeof(*entry));
1660 if (!entry) {
1661 fprintf(stderr, "No memory left for db entry!\n");
1662 continue;
1663 }
1664
1665 entry->pinning = pinning;
1666 entry->subpath = strdup(subpath);
1667 if (!entry->subpath) {
1668 fprintf(stderr, "No memory left for db entry!\n");
1669 free(entry);
1670 continue;
1671 }
1672
1673 entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
1674 ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
1675 }
1676
1677 fclose(fp);
1678}
1679
1680static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
1681{
1682 struct bpf_hash_entry *entry;
1683 int i;
1684
1685 for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
1686 while ((entry = ctx->ht[i]) != NULL) {
1687 ctx->ht[i] = entry->next;
1688 free((char *)entry->subpath);
1689 free(entry);
1690 }
1691 }
1692}
1693
Daniel Borkmann8187b012016-01-12 02:03:08 +01001694static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
1695{
1696 if (ctx->elf_hdr.e_type != ET_REL ||
Daniel Borkmanne77fa412016-07-19 01:09:52 +02001697 (ctx->elf_hdr.e_machine != EM_NONE &&
1698 ctx->elf_hdr.e_machine != EM_BPF) ||
Daniel Borkmann8187b012016-01-12 02:03:08 +01001699 ctx->elf_hdr.e_version != EV_CURRENT) {
1700 fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
1701 return -EINVAL;
1702 }
1703
1704 switch (ctx->elf_hdr.e_ident[EI_DATA]) {
1705 default:
1706 fprintf(stderr, "ELF format error, wrong endianness info?\n");
1707 return -EINVAL;
1708 case ELFDATA2LSB:
1709 if (htons(1) == 1) {
1710 fprintf(stderr,
1711 "We are big endian, eBPF object is little endian!\n");
1712 return -EIO;
1713 }
1714 break;
1715 case ELFDATA2MSB:
1716 if (htons(1) != 1) {
1717 fprintf(stderr,
1718 "We are little endian, eBPF object is big endian!\n");
1719 return -EIO;
1720 }
1721 break;
1722 }
1723
1724 return 0;
1725}
1726
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001727static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
1728 enum bpf_prog_type type, bool verbose)
1729{
1730 int ret = -EINVAL;
1731
1732 if (elf_version(EV_CURRENT) == EV_NONE ||
1733 bpf_init_env(pathname))
1734 return ret;
1735
1736 memset(ctx, 0, sizeof(*ctx));
1737 ctx->verbose = verbose;
1738 ctx->type = type;
1739
1740 ctx->obj_fd = open(pathname, O_RDONLY);
1741 if (ctx->obj_fd < 0)
1742 return ctx->obj_fd;
1743
1744 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
1745 if (!ctx->elf_fd) {
1746 ret = -EINVAL;
1747 goto out_fd;
1748 }
1749
Daniel Borkmann8187b012016-01-12 02:03:08 +01001750 if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
1751 ret = -EINVAL;
1752 goto out_fd;
1753 }
1754
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001755 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
1756 &ctx->elf_hdr) {
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001757 ret = -EIO;
1758 goto out_elf;
1759 }
1760
Daniel Borkmann8187b012016-01-12 02:03:08 +01001761 ret = bpf_elf_check_ehdr(ctx);
1762 if (ret < 0)
1763 goto out_elf;
1764
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001765 ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
1766 sizeof(*(ctx->sec_done)));
1767 if (!ctx->sec_done) {
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001768 ret = -ENOMEM;
1769 goto out_elf;
1770 }
1771
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001772 if (ctx->verbose && bpf_log_realloc(ctx)) {
1773 ret = -ENOMEM;
1774 goto out_free;
1775 }
1776
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001777 bpf_save_finfo(ctx);
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001778 bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
1779
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001780 return 0;
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001781out_free:
1782 free(ctx->sec_done);
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001783out_elf:
1784 elf_end(ctx->elf_fd);
1785out_fd:
1786 close(ctx->obj_fd);
1787 return ret;
1788}
Daniel Borkmannd937a742015-04-28 13:37:42 +02001789
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001790static int bpf_maps_count(struct bpf_elf_ctx *ctx)
1791{
1792 int i, count = 0;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001793
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001794 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
1795 if (!ctx->map_fds[i])
1796 break;
1797 count++;
Daniel Borkmann473d7842015-05-29 15:15:44 +02001798 }
1799
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001800 return count;
1801}
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001802
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001803static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
1804{
1805 int i;
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001806
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001807 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
1808 if (ctx->map_fds[i])
1809 close(ctx->map_fds[i]);
1810 }
1811}
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001812
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001813static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
1814{
1815 if (failure)
1816 bpf_maps_teardown(ctx);
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001817
Daniel Borkmannf6793ee2015-11-26 15:38:44 +01001818 bpf_hash_destroy(ctx);
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001819
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001820 free(ctx->sec_done);
Daniel Borkmannf31645d2016-02-07 02:11:51 +01001821 free(ctx->log);
1822
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001823 elf_end(ctx->elf_fd);
1824 close(ctx->obj_fd);
1825}
1826
1827static struct bpf_elf_ctx __ctx;
1828
1829static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
1830 const char *section, bool verbose)
1831{
1832 struct bpf_elf_ctx *ctx = &__ctx;
1833 int fd = 0, ret;
1834
1835 ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
1836 if (ret < 0) {
1837 fprintf(stderr, "Cannot initialize ELF context!\n");
1838 return ret;
1839 }
1840
1841 ret = bpf_fetch_ancillary(ctx);
1842 if (ret < 0) {
1843 fprintf(stderr, "Error fetching ELF ancillary data!\n");
1844 goto out;
1845 }
1846
1847 fd = bpf_fetch_prog_sec(ctx, section);
1848 if (fd < 0) {
1849 fprintf(stderr, "Error fetching program/map!\n");
1850 ret = fd;
1851 goto out;
1852 }
1853
1854 ret = bpf_fill_prog_arrays(ctx);
1855 if (ret < 0)
1856 fprintf(stderr, "Error filling program arrays!\n");
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001857out:
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001858 bpf_elf_ctx_destroy(ctx, ret < 0);
1859 if (ret < 0) {
1860 if (fd)
1861 close(fd);
1862 return ret;
1863 }
1864
1865 return fd;
Daniel Borkmann11c39b52015-03-16 19:37:41 +01001866}
1867
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001868static int
Daniel Borkmann4bd62442015-04-16 21:20:06 +02001869bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
1870 const struct bpf_map_data *aux, unsigned int entries)
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001871{
1872 struct bpf_map_set_msg msg;
1873 int *cmsg_buf, min_fd;
1874 char *amsg_buf;
1875 int i;
1876
1877 memset(&msg, 0, sizeof(msg));
1878
1879 msg.aux.uds_ver = BPF_SCM_AUX_VER;
Daniel Borkmann4bd62442015-04-16 21:20:06 +02001880 msg.aux.num_ent = entries;
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001881
1882 strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
1883 memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
1884
1885 cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
1886 amsg_buf = (char *)msg.aux.ent;
1887
Daniel Borkmann4bd62442015-04-16 21:20:06 +02001888 for (i = 0; i < entries; i += min_fd) {
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001889 int ret;
1890
Daniel Borkmann4bd62442015-04-16 21:20:06 +02001891 min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001892 bpf_map_set_init_single(&msg, min_fd);
1893
1894 memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
1895 memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
1896
1897 ret = sendmsg(fd, &msg.hdr, 0);
1898 if (ret <= 0)
1899 return ret ? : -1;
1900 }
1901
1902 return 0;
1903}
1904
Daniel Borkmann4bd62442015-04-16 21:20:06 +02001905static int
1906bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
1907 unsigned int entries)
1908{
1909 struct bpf_map_set_msg msg;
1910 int *cmsg_buf, min_fd;
1911 char *amsg_buf, *mmsg_buf;
1912 unsigned int needed = 1;
1913 int i;
1914
1915 cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
1916 amsg_buf = (char *)msg.aux.ent;
1917 mmsg_buf = (char *)&msg.aux;
1918
1919 for (i = 0; i < min(entries, needed); i += min_fd) {
1920 struct cmsghdr *cmsg;
1921 int ret;
1922
1923 min_fd = min(entries, entries - i);
1924 bpf_map_set_init_single(&msg, min_fd);
1925
1926 ret = recvmsg(fd, &msg.hdr, 0);
1927 if (ret <= 0)
1928 return ret ? : -1;
1929
1930 cmsg = CMSG_FIRSTHDR(&msg.hdr);
1931 if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
1932 return -EINVAL;
1933 if (msg.hdr.msg_flags & MSG_CTRUNC)
1934 return -EIO;
1935 if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
1936 return -ENOSYS;
1937
1938 min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
1939 if (min_fd > entries || min_fd <= 0)
1940 return -EINVAL;
1941
1942 memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
1943 memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
1944 memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
1945
1946 needed = aux->num_ent;
1947 }
1948
1949 return 0;
1950}
1951
1952int bpf_send_map_fds(const char *path, const char *obj)
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001953{
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001954 struct bpf_elf_ctx *ctx = &__ctx;
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001955 struct sockaddr_un addr;
1956 struct bpf_map_data bpf_aux;
1957 int fd, ret;
1958
1959 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
1960 if (fd < 0) {
1961 fprintf(stderr, "Cannot open socket: %s\n",
1962 strerror(errno));
1963 return -1;
1964 }
1965
1966 memset(&addr, 0, sizeof(addr));
1967 addr.sun_family = AF_UNIX;
1968 strncpy(addr.sun_path, path, sizeof(addr.sun_path));
1969
1970 ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
1971 if (ret < 0) {
1972 fprintf(stderr, "Cannot connect to %s: %s\n",
1973 path, strerror(errno));
1974 return -1;
1975 }
1976
1977 memset(&bpf_aux, 0, sizeof(bpf_aux));
1978
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001979 bpf_aux.fds = ctx->map_fds;
1980 bpf_aux.ent = ctx->maps;
1981 bpf_aux.st = &ctx->stat;
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001982 bpf_aux.obj = obj;
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001983
Daniel Borkmann4bd62442015-04-16 21:20:06 +02001984 ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001985 bpf_maps_count(ctx));
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001986 if (ret < 0)
Daniel Borkmann4bd62442015-04-16 21:20:06 +02001987 fprintf(stderr, "Cannot send fds to %s: %s\n",
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001988 path, strerror(errno));
1989
Daniel Borkmann32e93fb2015-11-13 00:39:29 +01001990 bpf_maps_teardown(ctx);
Daniel Borkmann6256f8c2015-04-01 17:57:44 +02001991 close(fd);
1992 return ret;
1993}
Daniel Borkmann4bd62442015-04-16 21:20:06 +02001994
1995int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
1996 unsigned int entries)
1997{
1998 struct sockaddr_un addr;
1999 int fd, ret;
2000
2001 fd = socket(AF_UNIX, SOCK_DGRAM, 0);
2002 if (fd < 0) {
2003 fprintf(stderr, "Cannot open socket: %s\n",
2004 strerror(errno));
2005 return -1;
2006 }
2007
2008 memset(&addr, 0, sizeof(addr));
2009 addr.sun_family = AF_UNIX;
2010 strncpy(addr.sun_path, path, sizeof(addr.sun_path));
2011
2012 ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
2013 if (ret < 0) {
2014 fprintf(stderr, "Cannot bind to socket: %s\n",
2015 strerror(errno));
2016 return -1;
2017 }
2018
2019 ret = bpf_map_set_recv(fd, fds, aux, entries);
2020 if (ret < 0)
2021 fprintf(stderr, "Cannot recv fds from %s: %s\n",
2022 path, strerror(errno));
2023
2024 unlink(addr.sun_path);
2025 close(fd);
2026 return ret;
2027}
Daniel Borkmann11c39b52015-03-16 19:37:41 +01002028#endif /* HAVE_ELF */