blob: ca7e277e8b5fff6afc8cf184b693eb71946edfcf [file] [log] [blame]
Alexei Starovoitov99c55f72014-09-26 00:16:57 -07001/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
13#include <linux/syscalls.h>
14#include <linux/slab.h>
Daniel Borkmann251d00b2017-01-18 15:14:17 +010015#include <linux/vmalloc.h>
16#include <linux/mmzone.h>
Alexei Starovoitov99c55f72014-09-26 00:16:57 -070017#include <linux/anon_inodes.h>
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -070018#include <linux/file.h>
Alexei Starovoitov09756af2014-09-26 00:17:00 -070019#include <linux/license.h>
20#include <linux/filter.h>
Alexei Starovoitov25415172015-03-25 12:49:20 -070021#include <linux/version.h>
Alexei Starovoitov99c55f72014-09-26 00:16:57 -070022
Alexei Starovoitovb121d1e2016-03-07 21:57:13 -080023DEFINE_PER_CPU(int, bpf_prog_active);
24
Alexei Starovoitov1be7f752015-10-07 22:23:21 -070025int sysctl_unprivileged_bpf_disabled __read_mostly;
26
Alexei Starovoitov99c55f72014-09-26 00:16:57 -070027static LIST_HEAD(bpf_map_types);
28
29static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
30{
31 struct bpf_map_type_list *tl;
32 struct bpf_map *map;
33
34 list_for_each_entry(tl, &bpf_map_types, list_node) {
35 if (tl->type == attr->map_type) {
36 map = tl->ops->map_alloc(attr);
37 if (IS_ERR(map))
38 return map;
39 map->ops = tl->ops;
40 map->map_type = attr->map_type;
41 return map;
42 }
43 }
44 return ERR_PTR(-EINVAL);
45}
46
47/* boot time registration of different map implementations */
48void bpf_register_map_type(struct bpf_map_type_list *tl)
49{
50 list_add(&tl->list_node, &bpf_map_types);
51}
52
Daniel Borkmann251d00b2017-01-18 15:14:17 +010053void *bpf_map_area_alloc(size_t size)
54{
55 /* We definitely need __GFP_NORETRY, so OOM killer doesn't
56 * trigger under memory pressure as we really just want to
57 * fail instead.
58 */
59 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
60 void *area;
61
62 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
63 area = kmalloc(size, GFP_USER | flags);
64 if (area != NULL)
65 return area;
66 }
67
68 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
69 PAGE_KERNEL);
70}
71
72void bpf_map_area_free(void *area)
73{
74 kvfree(area);
75}
76
Alexei Starovoitov6c905982016-03-07 21:57:15 -080077int bpf_map_precharge_memlock(u32 pages)
78{
79 struct user_struct *user = get_current_user();
80 unsigned long memlock_limit, cur;
81
82 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
83 cur = atomic_long_read(&user->locked_vm);
84 free_uid(user);
85 if (cur + pages > memlock_limit)
86 return -EPERM;
87 return 0;
88}
89
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -070090static int bpf_map_charge_memlock(struct bpf_map *map)
91{
92 struct user_struct *user = get_current_user();
93 unsigned long memlock_limit;
94
95 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
96
97 atomic_long_add(map->pages, &user->locked_vm);
98
99 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
100 atomic_long_sub(map->pages, &user->locked_vm);
101 free_uid(user);
102 return -EPERM;
103 }
104 map->user = user;
105 return 0;
106}
107
108static void bpf_map_uncharge_memlock(struct bpf_map *map)
109{
110 struct user_struct *user = map->user;
111
112 atomic_long_sub(map->pages, &user->locked_vm);
113 free_uid(user);
114}
115
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700116/* called from workqueue */
117static void bpf_map_free_deferred(struct work_struct *work)
118{
119 struct bpf_map *map = container_of(work, struct bpf_map, work);
120
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700121 bpf_map_uncharge_memlock(map);
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700122 /* implementation dependent freeing */
123 map->ops->map_free(map);
124}
125
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100126static void bpf_map_put_uref(struct bpf_map *map)
127{
128 if (atomic_dec_and_test(&map->usercnt)) {
129 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
130 bpf_fd_array_map_clear(map);
131 }
132}
133
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700134/* decrement map refcnt and schedule it for freeing via workqueue
135 * (unrelying map implementation ops->map_free() might sleep)
136 */
137void bpf_map_put(struct bpf_map *map)
138{
139 if (atomic_dec_and_test(&map->refcnt)) {
140 INIT_WORK(&map->work, bpf_map_free_deferred);
141 schedule_work(&map->work);
142 }
143}
144
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100145void bpf_map_put_with_uref(struct bpf_map *map)
146{
147 bpf_map_put_uref(map);
148 bpf_map_put(map);
149}
150
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700151static int bpf_map_release(struct inode *inode, struct file *filp)
152{
Daniel Borkmann61d1b6a2016-06-15 22:47:12 +0200153 struct bpf_map *map = filp->private_data;
154
155 if (map->ops->map_release)
156 map->ops->map_release(map, filp);
157
158 bpf_map_put_with_uref(map);
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700159 return 0;
160}
161
Daniel Borkmannf99bf202015-11-19 11:56:22 +0100162#ifdef CONFIG_PROC_FS
163static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
164{
165 const struct bpf_map *map = filp->private_data;
166
167 seq_printf(m,
168 "map_type:\t%u\n"
169 "key_size:\t%u\n"
170 "value_size:\t%u\n"
Daniel Borkmann322cea22016-03-25 00:30:25 +0100171 "max_entries:\t%u\n"
172 "map_flags:\t%#x\n",
Daniel Borkmannf99bf202015-11-19 11:56:22 +0100173 map->map_type,
174 map->key_size,
175 map->value_size,
Daniel Borkmann322cea22016-03-25 00:30:25 +0100176 map->max_entries,
177 map->map_flags);
Daniel Borkmannf99bf202015-11-19 11:56:22 +0100178}
179#endif
180
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700181static const struct file_operations bpf_map_fops = {
Daniel Borkmannf99bf202015-11-19 11:56:22 +0100182#ifdef CONFIG_PROC_FS
183 .show_fdinfo = bpf_map_show_fdinfo,
184#endif
185 .release = bpf_map_release,
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700186};
187
Daniel Borkmannb2197752015-10-29 14:58:09 +0100188int bpf_map_new_fd(struct bpf_map *map)
Daniel Borkmannaa797812015-10-29 14:58:06 +0100189{
190 return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
191 O_RDWR | O_CLOEXEC);
192}
193
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700194/* helper macro to check that unused fields 'union bpf_attr' are zero */
195#define CHECK_ATTR(CMD) \
196 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
197 sizeof(attr->CMD##_LAST_FIELD), 0, \
198 sizeof(*attr) - \
199 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
200 sizeof(attr->CMD##_LAST_FIELD)) != NULL
201
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800202#define BPF_MAP_CREATE_LAST_FIELD map_flags
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700203/* called via syscall */
204static int map_create(union bpf_attr *attr)
205{
206 struct bpf_map *map;
207 int err;
208
209 err = CHECK_ATTR(BPF_MAP_CREATE);
210 if (err)
211 return -EINVAL;
212
213 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
214 map = find_and_alloc_map(attr);
215 if (IS_ERR(map))
216 return PTR_ERR(map);
217
218 atomic_set(&map->refcnt, 1);
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100219 atomic_set(&map->usercnt, 1);
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700220
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700221 err = bpf_map_charge_memlock(map);
222 if (err)
Daniel Borkmann20b2b242016-11-04 00:56:31 +0100223 goto free_map_nouncharge;
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700224
Daniel Borkmannaa797812015-10-29 14:58:06 +0100225 err = bpf_map_new_fd(map);
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700226 if (err < 0)
227 /* failed to allocate fd */
228 goto free_map;
229
230 return err;
231
232free_map:
Daniel Borkmann20b2b242016-11-04 00:56:31 +0100233 bpf_map_uncharge_memlock(map);
234free_map_nouncharge:
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700235 map->ops->map_free(map);
236 return err;
237}
238
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700239/* if error is returned, fd is released.
240 * On success caller should complete fd access with matching fdput()
241 */
Daniel Borkmannc2101292015-10-29 14:58:07 +0100242struct bpf_map *__bpf_map_get(struct fd f)
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700243{
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700244 if (!f.file)
245 return ERR_PTR(-EBADF);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700246 if (f.file->f_op != &bpf_map_fops) {
247 fdput(f);
248 return ERR_PTR(-EINVAL);
249 }
250
Daniel Borkmannc2101292015-10-29 14:58:07 +0100251 return f.file->private_data;
252}
253
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700254/* prog's and map's refcnt limit */
255#define BPF_MAX_REFCNT 32768
256
257struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100258{
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700259 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
260 atomic_dec(&map->refcnt);
261 return ERR_PTR(-EBUSY);
262 }
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100263 if (uref)
264 atomic_inc(&map->usercnt);
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700265 return map;
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100266}
267
268struct bpf_map *bpf_map_get_with_uref(u32 ufd)
Daniel Borkmannc2101292015-10-29 14:58:07 +0100269{
270 struct fd f = fdget(ufd);
271 struct bpf_map *map;
272
273 map = __bpf_map_get(f);
274 if (IS_ERR(map))
275 return map;
276
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700277 map = bpf_map_inc(map, true);
Daniel Borkmannc2101292015-10-29 14:58:07 +0100278 fdput(f);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700279
280 return map;
281}
282
283/* helper to convert user pointers passed inside __aligned_u64 fields */
284static void __user *u64_to_ptr(__u64 val)
285{
286 return (void __user *) (unsigned long) val;
287}
288
Alexei Starovoitovb8cdc052016-03-09 18:56:49 -0800289int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
290{
291 return -ENOTSUPP;
292}
293
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700294/* last field in 'union bpf_attr' used by this command */
295#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
296
297static int map_lookup_elem(union bpf_attr *attr)
298{
299 void __user *ukey = u64_to_ptr(attr->key);
300 void __user *uvalue = u64_to_ptr(attr->value);
301 int ufd = attr->map_fd;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700302 struct bpf_map *map;
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800303 void *key, *value, *ptr;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800304 u32 value_size;
Daniel Borkmann592867b2015-09-08 18:00:09 +0200305 struct fd f;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700306 int err;
307
308 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
309 return -EINVAL;
310
Daniel Borkmann592867b2015-09-08 18:00:09 +0200311 f = fdget(ufd);
Daniel Borkmannc2101292015-10-29 14:58:07 +0100312 map = __bpf_map_get(f);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700313 if (IS_ERR(map))
314 return PTR_ERR(map);
315
316 err = -ENOMEM;
317 key = kmalloc(map->key_size, GFP_USER);
318 if (!key)
319 goto err_put;
320
321 err = -EFAULT;
322 if (copy_from_user(key, ukey, map->key_size) != 0)
323 goto free_key;
324
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800325 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
326 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
327 value_size = round_up(map->value_size, 8) * num_possible_cpus();
328 else
329 value_size = map->value_size;
330
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800331 err = -ENOMEM;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800332 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700333 if (!value)
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800334 goto free_key;
335
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800336 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
337 err = bpf_percpu_hash_copy(map, key, value);
338 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
339 err = bpf_percpu_array_copy(map, key, value);
Alexei Starovoitov557c0c62016-03-07 21:57:17 -0800340 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
341 err = bpf_stackmap_copy(map, key, value);
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800342 } else {
343 rcu_read_lock();
344 ptr = map->ops->map_lookup_elem(map, key);
345 if (ptr)
346 memcpy(value, ptr, value_size);
347 rcu_read_unlock();
348 err = ptr ? 0 : -ENOENT;
349 }
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800350
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800351 if (err)
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800352 goto free_value;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700353
354 err = -EFAULT;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800355 if (copy_to_user(uvalue, value, value_size) != 0)
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800356 goto free_value;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700357
358 err = 0;
359
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800360free_value:
361 kfree(value);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700362free_key:
363 kfree(key);
364err_put:
365 fdput(f);
366 return err;
367}
368
Alexei Starovoitov3274f522014-11-13 17:36:44 -0800369#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700370
371static int map_update_elem(union bpf_attr *attr)
372{
373 void __user *ukey = u64_to_ptr(attr->key);
374 void __user *uvalue = u64_to_ptr(attr->value);
375 int ufd = attr->map_fd;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700376 struct bpf_map *map;
377 void *key, *value;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800378 u32 value_size;
Daniel Borkmann592867b2015-09-08 18:00:09 +0200379 struct fd f;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700380 int err;
381
382 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
383 return -EINVAL;
384
Daniel Borkmann592867b2015-09-08 18:00:09 +0200385 f = fdget(ufd);
Daniel Borkmannc2101292015-10-29 14:58:07 +0100386 map = __bpf_map_get(f);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700387 if (IS_ERR(map))
388 return PTR_ERR(map);
389
390 err = -ENOMEM;
391 key = kmalloc(map->key_size, GFP_USER);
392 if (!key)
393 goto err_put;
394
395 err = -EFAULT;
396 if (copy_from_user(key, ukey, map->key_size) != 0)
397 goto free_key;
398
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800399 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
400 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
401 value_size = round_up(map->value_size, 8) * num_possible_cpus();
402 else
403 value_size = map->value_size;
404
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700405 err = -ENOMEM;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800406 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700407 if (!value)
408 goto free_key;
409
410 err = -EFAULT;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800411 if (copy_from_user(value, uvalue, value_size) != 0)
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700412 goto free_value;
413
Alexei Starovoitovb121d1e2016-03-07 21:57:13 -0800414 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
415 * inside bpf map update or delete otherwise deadlocks are possible
416 */
417 preempt_disable();
418 __this_cpu_inc(bpf_prog_active);
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800419 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
420 err = bpf_percpu_hash_update(map, key, value, attr->flags);
421 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
422 err = bpf_percpu_array_update(map, key, value, attr->flags);
Daniel Borkmannd056a782016-06-15 22:47:13 +0200423 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
Martin KaFai Lau4ed8ec52016-06-30 10:28:43 -0700424 map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
425 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
Daniel Borkmannd056a782016-06-15 22:47:13 +0200426 rcu_read_lock();
427 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
428 attr->flags);
429 rcu_read_unlock();
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800430 } else {
431 rcu_read_lock();
432 err = map->ops->map_update_elem(map, key, value, attr->flags);
433 rcu_read_unlock();
434 }
Alexei Starovoitovb121d1e2016-03-07 21:57:13 -0800435 __this_cpu_dec(bpf_prog_active);
436 preempt_enable();
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700437
438free_value:
439 kfree(value);
440free_key:
441 kfree(key);
442err_put:
443 fdput(f);
444 return err;
445}
446
447#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
448
449static int map_delete_elem(union bpf_attr *attr)
450{
451 void __user *ukey = u64_to_ptr(attr->key);
452 int ufd = attr->map_fd;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700453 struct bpf_map *map;
Daniel Borkmann592867b2015-09-08 18:00:09 +0200454 struct fd f;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700455 void *key;
456 int err;
457
458 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
459 return -EINVAL;
460
Daniel Borkmann592867b2015-09-08 18:00:09 +0200461 f = fdget(ufd);
Daniel Borkmannc2101292015-10-29 14:58:07 +0100462 map = __bpf_map_get(f);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700463 if (IS_ERR(map))
464 return PTR_ERR(map);
465
466 err = -ENOMEM;
467 key = kmalloc(map->key_size, GFP_USER);
468 if (!key)
469 goto err_put;
470
471 err = -EFAULT;
472 if (copy_from_user(key, ukey, map->key_size) != 0)
473 goto free_key;
474
Alexei Starovoitovb121d1e2016-03-07 21:57:13 -0800475 preempt_disable();
476 __this_cpu_inc(bpf_prog_active);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700477 rcu_read_lock();
478 err = map->ops->map_delete_elem(map, key);
479 rcu_read_unlock();
Alexei Starovoitovb121d1e2016-03-07 21:57:13 -0800480 __this_cpu_dec(bpf_prog_active);
481 preempt_enable();
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700482
483free_key:
484 kfree(key);
485err_put:
486 fdput(f);
487 return err;
488}
489
490/* last field in 'union bpf_attr' used by this command */
491#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
492
493static int map_get_next_key(union bpf_attr *attr)
494{
495 void __user *ukey = u64_to_ptr(attr->key);
496 void __user *unext_key = u64_to_ptr(attr->next_key);
497 int ufd = attr->map_fd;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700498 struct bpf_map *map;
499 void *key, *next_key;
Daniel Borkmann592867b2015-09-08 18:00:09 +0200500 struct fd f;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700501 int err;
502
503 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
504 return -EINVAL;
505
Daniel Borkmann592867b2015-09-08 18:00:09 +0200506 f = fdget(ufd);
Daniel Borkmannc2101292015-10-29 14:58:07 +0100507 map = __bpf_map_get(f);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700508 if (IS_ERR(map))
509 return PTR_ERR(map);
510
Teng Qinfcbc8d02017-04-24 19:00:37 -0700511 if (ukey) {
512 err = -ENOMEM;
513 key = kmalloc(map->key_size, GFP_USER);
514 if (!key)
515 goto err_put;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700516
Teng Qinfcbc8d02017-04-24 19:00:37 -0700517 err = -EFAULT;
518 if (copy_from_user(key, ukey, map->key_size) != 0)
519 goto free_key;
520 } else {
521 key = NULL;
522 }
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700523
524 err = -ENOMEM;
525 next_key = kmalloc(map->key_size, GFP_USER);
526 if (!next_key)
527 goto free_key;
528
529 rcu_read_lock();
530 err = map->ops->map_get_next_key(map, key, next_key);
531 rcu_read_unlock();
532 if (err)
533 goto free_next_key;
534
535 err = -EFAULT;
536 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
537 goto free_next_key;
538
539 err = 0;
540
541free_next_key:
542 kfree(next_key);
543free_key:
544 kfree(key);
545err_put:
546 fdput(f);
547 return err;
548}
549
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700550static LIST_HEAD(bpf_prog_types);
551
552static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
553{
554 struct bpf_prog_type_list *tl;
555
556 list_for_each_entry(tl, &bpf_prog_types, list_node) {
557 if (tl->type == type) {
558 prog->aux->ops = tl->ops;
Daniel Borkmann24701ec2015-03-01 12:31:47 +0100559 prog->type = type;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700560 return 0;
561 }
562 }
Daniel Borkmann24701ec2015-03-01 12:31:47 +0100563
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700564 return -EINVAL;
565}
566
567void bpf_register_prog_type(struct bpf_prog_type_list *tl)
568{
569 list_add(&tl->list_node, &bpf_prog_types);
570}
571
572/* drop refcnt on maps used by eBPF program and free auxilary data */
573static void free_used_maps(struct bpf_prog_aux *aux)
574{
575 int i;
576
577 for (i = 0; i < aux->used_map_cnt; i++)
578 bpf_map_put(aux->used_maps[i]);
579
580 kfree(aux->used_maps);
581}
582
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700583static int bpf_prog_charge_memlock(struct bpf_prog *prog)
584{
585 struct user_struct *user = get_current_user();
586 unsigned long memlock_limit;
587
588 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
589
590 atomic_long_add(prog->pages, &user->locked_vm);
591 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
592 atomic_long_sub(prog->pages, &user->locked_vm);
593 free_uid(user);
594 return -EPERM;
595 }
596 prog->aux->user = user;
597 return 0;
598}
599
600static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
601{
602 struct user_struct *user = prog->aux->user;
603
604 atomic_long_sub(prog->pages, &user->locked_vm);
605 free_uid(user);
606}
607
Daniel Borkmann1aacde32016-06-30 17:24:43 +0200608static void __bpf_prog_put_rcu(struct rcu_head *rcu)
Alexei Starovoitovabf2e7d2015-05-28 19:26:02 -0700609{
610 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
611
612 free_used_maps(aux);
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700613 bpf_prog_uncharge_memlock(aux->prog);
Alexei Starovoitovabf2e7d2015-05-28 19:26:02 -0700614 bpf_prog_free(aux->prog);
615}
616
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700617void bpf_prog_put(struct bpf_prog *prog)
618{
Daniel Borkmanne9d8afa2015-10-29 14:58:08 +0100619 if (atomic_dec_and_test(&prog->aux->refcnt))
Daniel Borkmann1aacde32016-06-30 17:24:43 +0200620 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700621}
Daniel Borkmanne2e9b652015-03-01 12:31:48 +0100622EXPORT_SYMBOL_GPL(bpf_prog_put);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700623
624static int bpf_prog_release(struct inode *inode, struct file *filp)
625{
626 struct bpf_prog *prog = filp->private_data;
627
Daniel Borkmann1aacde32016-06-30 17:24:43 +0200628 bpf_prog_put(prog);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700629 return 0;
630}
631
632static const struct file_operations bpf_prog_fops = {
633 .release = bpf_prog_release,
634};
635
Daniel Borkmannb2197752015-10-29 14:58:09 +0100636int bpf_prog_new_fd(struct bpf_prog *prog)
Daniel Borkmannaa797812015-10-29 14:58:06 +0100637{
638 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
639 O_RDWR | O_CLOEXEC);
640}
641
Daniel Borkmann113214b2016-06-30 17:24:44 +0200642static struct bpf_prog *____bpf_prog_get(struct fd f)
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700643{
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700644 if (!f.file)
645 return ERR_PTR(-EBADF);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700646 if (f.file->f_op != &bpf_prog_fops) {
647 fdput(f);
648 return ERR_PTR(-EINVAL);
649 }
650
Daniel Borkmannc2101292015-10-29 14:58:07 +0100651 return f.file->private_data;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700652}
653
Brenden Blanco59d36562016-07-19 12:16:46 -0700654struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700655{
Brenden Blanco59d36562016-07-19 12:16:46 -0700656 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
657 atomic_sub(i, &prog->aux->refcnt);
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700658 return ERR_PTR(-EBUSY);
659 }
660 return prog;
661}
Brenden Blanco59d36562016-07-19 12:16:46 -0700662EXPORT_SYMBOL_GPL(bpf_prog_add);
663
664struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
665{
666 return bpf_prog_add(prog, 1);
667}
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700668
Daniel Borkmann113214b2016-06-30 17:24:44 +0200669static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700670{
671 struct fd f = fdget(ufd);
672 struct bpf_prog *prog;
673
Daniel Borkmann113214b2016-06-30 17:24:44 +0200674 prog = ____bpf_prog_get(f);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700675 if (IS_ERR(prog))
676 return prog;
Daniel Borkmann113214b2016-06-30 17:24:44 +0200677 if (type && prog->type != *type) {
678 prog = ERR_PTR(-EINVAL);
679 goto out;
680 }
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700681
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700682 prog = bpf_prog_inc(prog);
Daniel Borkmann113214b2016-06-30 17:24:44 +0200683out:
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700684 fdput(f);
685 return prog;
686}
Daniel Borkmann113214b2016-06-30 17:24:44 +0200687
688struct bpf_prog *bpf_prog_get(u32 ufd)
689{
690 return __bpf_prog_get(ufd, NULL);
691}
692
693struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
694{
695 return __bpf_prog_get(ufd, &type);
696}
697EXPORT_SYMBOL_GPL(bpf_prog_get_type);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700698
699/* last field in 'union bpf_attr' used by this command */
Alexei Starovoitov25415172015-03-25 12:49:20 -0700700#define BPF_PROG_LOAD_LAST_FIELD kern_version
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700701
702static int bpf_prog_load(union bpf_attr *attr)
703{
704 enum bpf_prog_type type = attr->prog_type;
705 struct bpf_prog *prog;
706 int err;
707 char license[128];
708 bool is_gpl;
709
710 if (CHECK_ATTR(BPF_PROG_LOAD))
711 return -EINVAL;
712
713 /* copy eBPF program license from user space */
714 if (strncpy_from_user(license, u64_to_ptr(attr->license),
715 sizeof(license) - 1) < 0)
716 return -EFAULT;
717 license[sizeof(license) - 1] = 0;
718
719 /* eBPF programs must be GPL compatible to use GPL-ed functions */
720 is_gpl = license_is_gpl_compatible(license);
721
722 if (attr->insn_cnt >= BPF_MAXINSNS)
723 return -EINVAL;
724
Alexei Starovoitov25415172015-03-25 12:49:20 -0700725 if (type == BPF_PROG_TYPE_KPROBE &&
726 attr->kern_version != LINUX_VERSION_CODE)
727 return -EINVAL;
728
Alexei Starovoitov1be7f752015-10-07 22:23:21 -0700729 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
730 return -EPERM;
731
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700732 /* plain bpf_prog allocation */
733 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
734 if (!prog)
735 return -ENOMEM;
736
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700737 err = bpf_prog_charge_memlock(prog);
738 if (err)
739 goto free_prog_nouncharge;
740
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700741 prog->len = attr->insn_cnt;
742
743 err = -EFAULT;
744 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
745 prog->len * sizeof(struct bpf_insn)) != 0)
746 goto free_prog;
747
748 prog->orig_prog = NULL;
Daniel Borkmanna91263d2015-09-30 01:41:50 +0200749 prog->jited = 0;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700750
751 atomic_set(&prog->aux->refcnt, 1);
Daniel Borkmanna91263d2015-09-30 01:41:50 +0200752 prog->gpl_compatible = is_gpl ? 1 : 0;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700753
754 /* find program type: socket_filter vs tracing_filter */
755 err = find_prog_type(type, prog);
756 if (err < 0)
757 goto free_prog;
758
759 /* run eBPF verifier */
Alexei Starovoitov9bac3d62015-03-13 11:57:42 -0700760 err = bpf_check(&prog, attr);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700761 if (err < 0)
762 goto free_used_maps;
763
764 /* eBPF program is ready to be JITed */
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +0200765 prog = bpf_prog_select_runtime(prog, &err);
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -0700766 if (err < 0)
767 goto free_used_maps;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700768
Daniel Borkmannaa797812015-10-29 14:58:06 +0100769 err = bpf_prog_new_fd(prog);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700770 if (err < 0)
771 /* failed to allocate fd */
772 goto free_used_maps;
773
774 return err;
775
776free_used_maps:
777 free_used_maps(prog->aux);
778free_prog:
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700779 bpf_prog_uncharge_memlock(prog);
780free_prog_nouncharge:
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700781 bpf_prog_free(prog);
782 return err;
783}
784
Daniel Borkmannb2197752015-10-29 14:58:09 +0100785#define BPF_OBJ_LAST_FIELD bpf_fd
786
787static int bpf_obj_pin(const union bpf_attr *attr)
788{
789 if (CHECK_ATTR(BPF_OBJ))
790 return -EINVAL;
791
792 return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
793}
794
795static int bpf_obj_get(const union bpf_attr *attr)
796{
797 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
798 return -EINVAL;
799
800 return bpf_obj_get_user(u64_to_ptr(attr->pathname));
801}
802
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700803SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
804{
805 union bpf_attr attr = {};
806 int err;
807
Chenbo Feng3eb88802018-03-19 17:57:27 -0700808 if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700809 return -EPERM;
810
811 if (!access_ok(VERIFY_READ, uattr, 1))
812 return -EFAULT;
813
814 if (size > PAGE_SIZE) /* silly large */
815 return -E2BIG;
816
817 /* If we're handed a bigger struct than we know of,
818 * ensure all the unknown bits are 0 - i.e. new
819 * user-space does not rely on any kernel feature
820 * extensions we dont know about yet.
821 */
822 if (size > sizeof(attr)) {
823 unsigned char __user *addr;
824 unsigned char __user *end;
825 unsigned char val;
826
827 addr = (void __user *)uattr + sizeof(attr);
828 end = (void __user *)uattr + size;
829
830 for (; addr < end; addr++) {
831 err = get_user(val, addr);
832 if (err)
833 return err;
834 if (val)
835 return -E2BIG;
836 }
837 size = sizeof(attr);
838 }
839
840 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
841 if (copy_from_user(&attr, uattr, size) != 0)
842 return -EFAULT;
843
844 switch (cmd) {
845 case BPF_MAP_CREATE:
846 err = map_create(&attr);
847 break;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700848 case BPF_MAP_LOOKUP_ELEM:
849 err = map_lookup_elem(&attr);
850 break;
851 case BPF_MAP_UPDATE_ELEM:
852 err = map_update_elem(&attr);
853 break;
854 case BPF_MAP_DELETE_ELEM:
855 err = map_delete_elem(&attr);
856 break;
857 case BPF_MAP_GET_NEXT_KEY:
858 err = map_get_next_key(&attr);
859 break;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700860 case BPF_PROG_LOAD:
861 err = bpf_prog_load(&attr);
862 break;
Daniel Borkmannb2197752015-10-29 14:58:09 +0100863 case BPF_OBJ_PIN:
864 err = bpf_obj_pin(&attr);
865 break;
866 case BPF_OBJ_GET:
867 err = bpf_obj_get(&attr);
868 break;
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700869 default:
870 err = -EINVAL;
871 break;
872 }
873
874 return err;
875}