blob: f8b4e3e16cefa9fa986c8b135d340f662a50acf1 [file] [log] [blame]
Alexei Starovoitov99c55f72014-09-26 00:16:57 -07001/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful, but
8 * WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 * General Public License for more details.
11 */
12#include <linux/bpf.h>
13#include <linux/syscalls.h>
14#include <linux/slab.h>
Daniel Borkmann251d00b2017-01-18 15:14:17 +010015#include <linux/vmalloc.h>
16#include <linux/mmzone.h>
Alexei Starovoitov99c55f72014-09-26 00:16:57 -070017#include <linux/anon_inodes.h>
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -070018#include <linux/file.h>
Alexei Starovoitov09756af2014-09-26 00:17:00 -070019#include <linux/license.h>
20#include <linux/filter.h>
Alexei Starovoitov25415172015-03-25 12:49:20 -070021#include <linux/version.h>
Alexei Starovoitov99c55f72014-09-26 00:16:57 -070022
Alexei Starovoitovb121d1e2016-03-07 21:57:13 -080023DEFINE_PER_CPU(int, bpf_prog_active);
24
Alexei Starovoitov1be7f752015-10-07 22:23:21 -070025int sysctl_unprivileged_bpf_disabled __read_mostly;
26
Alexei Starovoitov99c55f72014-09-26 00:16:57 -070027static LIST_HEAD(bpf_map_types);
28
29static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
30{
31 struct bpf_map_type_list *tl;
32 struct bpf_map *map;
33
34 list_for_each_entry(tl, &bpf_map_types, list_node) {
35 if (tl->type == attr->map_type) {
36 map = tl->ops->map_alloc(attr);
37 if (IS_ERR(map))
38 return map;
39 map->ops = tl->ops;
40 map->map_type = attr->map_type;
41 return map;
42 }
43 }
44 return ERR_PTR(-EINVAL);
45}
46
47/* boot time registration of different map implementations */
48void bpf_register_map_type(struct bpf_map_type_list *tl)
49{
50 list_add(&tl->list_node, &bpf_map_types);
51}
52
Daniel Borkmann251d00b2017-01-18 15:14:17 +010053void *bpf_map_area_alloc(size_t size)
54{
55 /* We definitely need __GFP_NORETRY, so OOM killer doesn't
56 * trigger under memory pressure as we really just want to
57 * fail instead.
58 */
59 const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
60 void *area;
61
62 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
63 area = kmalloc(size, GFP_USER | flags);
64 if (area != NULL)
65 return area;
66 }
67
68 return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | flags,
69 PAGE_KERNEL);
70}
71
72void bpf_map_area_free(void *area)
73{
74 kvfree(area);
75}
76
Alexei Starovoitov6c905982016-03-07 21:57:15 -080077int bpf_map_precharge_memlock(u32 pages)
78{
79 struct user_struct *user = get_current_user();
80 unsigned long memlock_limit, cur;
81
82 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
83 cur = atomic_long_read(&user->locked_vm);
84 free_uid(user);
85 if (cur + pages > memlock_limit)
86 return -EPERM;
87 return 0;
88}
89
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -070090static int bpf_map_charge_memlock(struct bpf_map *map)
91{
92 struct user_struct *user = get_current_user();
93 unsigned long memlock_limit;
94
95 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
96
97 atomic_long_add(map->pages, &user->locked_vm);
98
99 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
100 atomic_long_sub(map->pages, &user->locked_vm);
101 free_uid(user);
102 return -EPERM;
103 }
104 map->user = user;
105 return 0;
106}
107
108static void bpf_map_uncharge_memlock(struct bpf_map *map)
109{
110 struct user_struct *user = map->user;
111
112 atomic_long_sub(map->pages, &user->locked_vm);
113 free_uid(user);
114}
115
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700116/* called from workqueue */
117static void bpf_map_free_deferred(struct work_struct *work)
118{
119 struct bpf_map *map = container_of(work, struct bpf_map, work);
120
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700121 bpf_map_uncharge_memlock(map);
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700122 /* implementation dependent freeing */
123 map->ops->map_free(map);
124}
125
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100126static void bpf_map_put_uref(struct bpf_map *map)
127{
128 if (atomic_dec_and_test(&map->usercnt)) {
129 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
130 bpf_fd_array_map_clear(map);
131 }
132}
133
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700134/* decrement map refcnt and schedule it for freeing via workqueue
135 * (unrelying map implementation ops->map_free() might sleep)
136 */
137void bpf_map_put(struct bpf_map *map)
138{
139 if (atomic_dec_and_test(&map->refcnt)) {
140 INIT_WORK(&map->work, bpf_map_free_deferred);
141 schedule_work(&map->work);
142 }
143}
144
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100145void bpf_map_put_with_uref(struct bpf_map *map)
146{
147 bpf_map_put_uref(map);
148 bpf_map_put(map);
149}
150
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700151static int bpf_map_release(struct inode *inode, struct file *filp)
152{
Daniel Borkmann61d1b6a2016-06-15 22:47:12 +0200153 struct bpf_map *map = filp->private_data;
154
155 if (map->ops->map_release)
156 map->ops->map_release(map, filp);
157
158 bpf_map_put_with_uref(map);
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700159 return 0;
160}
161
Daniel Borkmannf99bf202015-11-19 11:56:22 +0100162#ifdef CONFIG_PROC_FS
163static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
164{
165 const struct bpf_map *map = filp->private_data;
166
167 seq_printf(m,
168 "map_type:\t%u\n"
169 "key_size:\t%u\n"
170 "value_size:\t%u\n"
Daniel Borkmann322cea22016-03-25 00:30:25 +0100171 "max_entries:\t%u\n"
172 "map_flags:\t%#x\n",
Daniel Borkmannf99bf202015-11-19 11:56:22 +0100173 map->map_type,
174 map->key_size,
175 map->value_size,
Daniel Borkmann322cea22016-03-25 00:30:25 +0100176 map->max_entries,
177 map->map_flags);
Daniel Borkmannf99bf202015-11-19 11:56:22 +0100178}
179#endif
180
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700181static const struct file_operations bpf_map_fops = {
Daniel Borkmannf99bf202015-11-19 11:56:22 +0100182#ifdef CONFIG_PROC_FS
183 .show_fdinfo = bpf_map_show_fdinfo,
184#endif
185 .release = bpf_map_release,
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700186};
187
Daniel Borkmannb2197752015-10-29 14:58:09 +0100188int bpf_map_new_fd(struct bpf_map *map)
Daniel Borkmannaa797812015-10-29 14:58:06 +0100189{
190 return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
191 O_RDWR | O_CLOEXEC);
192}
193
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700194/* helper macro to check that unused fields 'union bpf_attr' are zero */
195#define CHECK_ATTR(CMD) \
196 memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
197 sizeof(attr->CMD##_LAST_FIELD), 0, \
198 sizeof(*attr) - \
199 offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
200 sizeof(attr->CMD##_LAST_FIELD)) != NULL
201
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800202#define BPF_MAP_CREATE_LAST_FIELD map_flags
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700203/* called via syscall */
204static int map_create(union bpf_attr *attr)
205{
206 struct bpf_map *map;
207 int err;
208
209 err = CHECK_ATTR(BPF_MAP_CREATE);
210 if (err)
211 return -EINVAL;
212
213 /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
214 map = find_and_alloc_map(attr);
215 if (IS_ERR(map))
216 return PTR_ERR(map);
217
218 atomic_set(&map->refcnt, 1);
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100219 atomic_set(&map->usercnt, 1);
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700220
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700221 err = bpf_map_charge_memlock(map);
222 if (err)
Daniel Borkmann20b2b242016-11-04 00:56:31 +0100223 goto free_map_nouncharge;
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700224
Daniel Borkmannaa797812015-10-29 14:58:06 +0100225 err = bpf_map_new_fd(map);
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700226 if (err < 0)
227 /* failed to allocate fd */
228 goto free_map;
229
230 return err;
231
232free_map:
Daniel Borkmann20b2b242016-11-04 00:56:31 +0100233 bpf_map_uncharge_memlock(map);
234free_map_nouncharge:
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700235 map->ops->map_free(map);
236 return err;
237}
238
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700239/* if error is returned, fd is released.
240 * On success caller should complete fd access with matching fdput()
241 */
Daniel Borkmannc2101292015-10-29 14:58:07 +0100242struct bpf_map *__bpf_map_get(struct fd f)
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700243{
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700244 if (!f.file)
245 return ERR_PTR(-EBADF);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700246 if (f.file->f_op != &bpf_map_fops) {
247 fdput(f);
248 return ERR_PTR(-EINVAL);
249 }
250
Daniel Borkmannc2101292015-10-29 14:58:07 +0100251 return f.file->private_data;
252}
253
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700254/* prog's and map's refcnt limit */
255#define BPF_MAX_REFCNT 32768
256
257struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100258{
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700259 if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
260 atomic_dec(&map->refcnt);
261 return ERR_PTR(-EBUSY);
262 }
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100263 if (uref)
264 atomic_inc(&map->usercnt);
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700265 return map;
Daniel Borkmannc9da1612015-11-24 21:28:15 +0100266}
267
268struct bpf_map *bpf_map_get_with_uref(u32 ufd)
Daniel Borkmannc2101292015-10-29 14:58:07 +0100269{
270 struct fd f = fdget(ufd);
271 struct bpf_map *map;
272
273 map = __bpf_map_get(f);
274 if (IS_ERR(map))
275 return map;
276
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700277 map = bpf_map_inc(map, true);
Daniel Borkmannc2101292015-10-29 14:58:07 +0100278 fdput(f);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700279
280 return map;
281}
282
283/* helper to convert user pointers passed inside __aligned_u64 fields */
284static void __user *u64_to_ptr(__u64 val)
285{
286 return (void __user *) (unsigned long) val;
287}
288
Alexei Starovoitovb8cdc052016-03-09 18:56:49 -0800289int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
290{
291 return -ENOTSUPP;
292}
293
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700294/* last field in 'union bpf_attr' used by this command */
295#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
296
297static int map_lookup_elem(union bpf_attr *attr)
298{
299 void __user *ukey = u64_to_ptr(attr->key);
300 void __user *uvalue = u64_to_ptr(attr->value);
301 int ufd = attr->map_fd;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700302 struct bpf_map *map;
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800303 void *key, *value, *ptr;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800304 u32 value_size;
Daniel Borkmann592867b2015-09-08 18:00:09 +0200305 struct fd f;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700306 int err;
307
308 if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
309 return -EINVAL;
310
Daniel Borkmann592867b2015-09-08 18:00:09 +0200311 f = fdget(ufd);
Daniel Borkmannc2101292015-10-29 14:58:07 +0100312 map = __bpf_map_get(f);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700313 if (IS_ERR(map))
314 return PTR_ERR(map);
315
316 err = -ENOMEM;
317 key = kmalloc(map->key_size, GFP_USER);
318 if (!key)
319 goto err_put;
320
321 err = -EFAULT;
322 if (copy_from_user(key, ukey, map->key_size) != 0)
323 goto free_key;
324
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800325 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
326 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
327 value_size = round_up(map->value_size, 8) * num_possible_cpus();
328 else
329 value_size = map->value_size;
330
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800331 err = -ENOMEM;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800332 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700333 if (!value)
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800334 goto free_key;
335
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800336 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
337 err = bpf_percpu_hash_copy(map, key, value);
338 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
339 err = bpf_percpu_array_copy(map, key, value);
Alexei Starovoitov557c0c62016-03-07 21:57:17 -0800340 } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
341 err = bpf_stackmap_copy(map, key, value);
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800342 } else {
343 rcu_read_lock();
344 ptr = map->ops->map_lookup_elem(map, key);
345 if (ptr)
346 memcpy(value, ptr, value_size);
347 rcu_read_unlock();
348 err = ptr ? 0 : -ENOENT;
349 }
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800350
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800351 if (err)
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800352 goto free_value;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700353
354 err = -EFAULT;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800355 if (copy_to_user(uvalue, value, value_size) != 0)
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800356 goto free_value;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700357
358 err = 0;
359
Alexei Starovoitov8ebe6672015-01-22 17:11:08 -0800360free_value:
361 kfree(value);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700362free_key:
363 kfree(key);
364err_put:
365 fdput(f);
366 return err;
367}
368
Alexei Starovoitov3274f522014-11-13 17:36:44 -0800369#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700370
371static int map_update_elem(union bpf_attr *attr)
372{
373 void __user *ukey = u64_to_ptr(attr->key);
374 void __user *uvalue = u64_to_ptr(attr->value);
375 int ufd = attr->map_fd;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700376 struct bpf_map *map;
377 void *key, *value;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800378 u32 value_size;
Daniel Borkmann592867b2015-09-08 18:00:09 +0200379 struct fd f;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700380 int err;
381
382 if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
383 return -EINVAL;
384
Daniel Borkmann592867b2015-09-08 18:00:09 +0200385 f = fdget(ufd);
Daniel Borkmannc2101292015-10-29 14:58:07 +0100386 map = __bpf_map_get(f);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700387 if (IS_ERR(map))
388 return PTR_ERR(map);
389
390 err = -ENOMEM;
391 key = kmalloc(map->key_size, GFP_USER);
392 if (!key)
393 goto err_put;
394
395 err = -EFAULT;
396 if (copy_from_user(key, ukey, map->key_size) != 0)
397 goto free_key;
398
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800399 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
400 map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
401 value_size = round_up(map->value_size, 8) * num_possible_cpus();
402 else
403 value_size = map->value_size;
404
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700405 err = -ENOMEM;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800406 value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700407 if (!value)
408 goto free_key;
409
410 err = -EFAULT;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800411 if (copy_from_user(value, uvalue, value_size) != 0)
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700412 goto free_value;
413
Alexei Starovoitovb121d1e2016-03-07 21:57:13 -0800414 /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
415 * inside bpf map update or delete otherwise deadlocks are possible
416 */
417 preempt_disable();
418 __this_cpu_inc(bpf_prog_active);
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800419 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
420 err = bpf_percpu_hash_update(map, key, value, attr->flags);
421 } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
422 err = bpf_percpu_array_update(map, key, value, attr->flags);
Daniel Borkmannd056a782016-06-15 22:47:13 +0200423 } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
Martin KaFai Lau4ed8ec52016-06-30 10:28:43 -0700424 map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
425 map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY) {
Daniel Borkmannd056a782016-06-15 22:47:13 +0200426 rcu_read_lock();
427 err = bpf_fd_array_map_update_elem(map, f.file, key, value,
428 attr->flags);
429 rcu_read_unlock();
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800430 } else {
431 rcu_read_lock();
432 err = map->ops->map_update_elem(map, key, value, attr->flags);
433 rcu_read_unlock();
434 }
Alexei Starovoitovb121d1e2016-03-07 21:57:13 -0800435 __this_cpu_dec(bpf_prog_active);
436 preempt_enable();
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700437
438free_value:
439 kfree(value);
440free_key:
441 kfree(key);
442err_put:
443 fdput(f);
444 return err;
445}
446
447#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
448
449static int map_delete_elem(union bpf_attr *attr)
450{
451 void __user *ukey = u64_to_ptr(attr->key);
452 int ufd = attr->map_fd;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700453 struct bpf_map *map;
Daniel Borkmann592867b2015-09-08 18:00:09 +0200454 struct fd f;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700455 void *key;
456 int err;
457
458 if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
459 return -EINVAL;
460
Daniel Borkmann592867b2015-09-08 18:00:09 +0200461 f = fdget(ufd);
Daniel Borkmannc2101292015-10-29 14:58:07 +0100462 map = __bpf_map_get(f);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700463 if (IS_ERR(map))
464 return PTR_ERR(map);
465
466 err = -ENOMEM;
467 key = kmalloc(map->key_size, GFP_USER);
468 if (!key)
469 goto err_put;
470
471 err = -EFAULT;
472 if (copy_from_user(key, ukey, map->key_size) != 0)
473 goto free_key;
474
Alexei Starovoitovb121d1e2016-03-07 21:57:13 -0800475 preempt_disable();
476 __this_cpu_inc(bpf_prog_active);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700477 rcu_read_lock();
478 err = map->ops->map_delete_elem(map, key);
479 rcu_read_unlock();
Alexei Starovoitovb121d1e2016-03-07 21:57:13 -0800480 __this_cpu_dec(bpf_prog_active);
481 preempt_enable();
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700482
483free_key:
484 kfree(key);
485err_put:
486 fdput(f);
487 return err;
488}
489
490/* last field in 'union bpf_attr' used by this command */
491#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
492
493static int map_get_next_key(union bpf_attr *attr)
494{
495 void __user *ukey = u64_to_ptr(attr->key);
496 void __user *unext_key = u64_to_ptr(attr->next_key);
497 int ufd = attr->map_fd;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700498 struct bpf_map *map;
499 void *key, *next_key;
Daniel Borkmann592867b2015-09-08 18:00:09 +0200500 struct fd f;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700501 int err;
502
503 if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
504 return -EINVAL;
505
Daniel Borkmann592867b2015-09-08 18:00:09 +0200506 f = fdget(ufd);
Daniel Borkmannc2101292015-10-29 14:58:07 +0100507 map = __bpf_map_get(f);
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700508 if (IS_ERR(map))
509 return PTR_ERR(map);
510
511 err = -ENOMEM;
512 key = kmalloc(map->key_size, GFP_USER);
513 if (!key)
514 goto err_put;
515
516 err = -EFAULT;
517 if (copy_from_user(key, ukey, map->key_size) != 0)
518 goto free_key;
519
520 err = -ENOMEM;
521 next_key = kmalloc(map->key_size, GFP_USER);
522 if (!next_key)
523 goto free_key;
524
525 rcu_read_lock();
526 err = map->ops->map_get_next_key(map, key, next_key);
527 rcu_read_unlock();
528 if (err)
529 goto free_next_key;
530
531 err = -EFAULT;
532 if (copy_to_user(unext_key, next_key, map->key_size) != 0)
533 goto free_next_key;
534
535 err = 0;
536
537free_next_key:
538 kfree(next_key);
539free_key:
540 kfree(key);
541err_put:
542 fdput(f);
543 return err;
544}
545
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700546static LIST_HEAD(bpf_prog_types);
547
548static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
549{
550 struct bpf_prog_type_list *tl;
551
552 list_for_each_entry(tl, &bpf_prog_types, list_node) {
553 if (tl->type == type) {
554 prog->aux->ops = tl->ops;
Daniel Borkmann24701ec2015-03-01 12:31:47 +0100555 prog->type = type;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700556 return 0;
557 }
558 }
Daniel Borkmann24701ec2015-03-01 12:31:47 +0100559
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700560 return -EINVAL;
561}
562
563void bpf_register_prog_type(struct bpf_prog_type_list *tl)
564{
565 list_add(&tl->list_node, &bpf_prog_types);
566}
567
568/* drop refcnt on maps used by eBPF program and free auxilary data */
569static void free_used_maps(struct bpf_prog_aux *aux)
570{
571 int i;
572
573 for (i = 0; i < aux->used_map_cnt; i++)
574 bpf_map_put(aux->used_maps[i]);
575
576 kfree(aux->used_maps);
577}
578
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700579static int bpf_prog_charge_memlock(struct bpf_prog *prog)
580{
581 struct user_struct *user = get_current_user();
582 unsigned long memlock_limit;
583
584 memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
585
586 atomic_long_add(prog->pages, &user->locked_vm);
587 if (atomic_long_read(&user->locked_vm) > memlock_limit) {
588 atomic_long_sub(prog->pages, &user->locked_vm);
589 free_uid(user);
590 return -EPERM;
591 }
592 prog->aux->user = user;
593 return 0;
594}
595
596static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
597{
598 struct user_struct *user = prog->aux->user;
599
600 atomic_long_sub(prog->pages, &user->locked_vm);
601 free_uid(user);
602}
603
Daniel Borkmann1aacde32016-06-30 17:24:43 +0200604static void __bpf_prog_put_rcu(struct rcu_head *rcu)
Alexei Starovoitovabf2e7d2015-05-28 19:26:02 -0700605{
606 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
607
608 free_used_maps(aux);
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700609 bpf_prog_uncharge_memlock(aux->prog);
Alexei Starovoitovabf2e7d2015-05-28 19:26:02 -0700610 bpf_prog_free(aux->prog);
611}
612
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700613void bpf_prog_put(struct bpf_prog *prog)
614{
Daniel Borkmanne9d8afa2015-10-29 14:58:08 +0100615 if (atomic_dec_and_test(&prog->aux->refcnt))
Daniel Borkmann1aacde32016-06-30 17:24:43 +0200616 call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700617}
Daniel Borkmanne2e9b652015-03-01 12:31:48 +0100618EXPORT_SYMBOL_GPL(bpf_prog_put);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700619
620static int bpf_prog_release(struct inode *inode, struct file *filp)
621{
622 struct bpf_prog *prog = filp->private_data;
623
Daniel Borkmann1aacde32016-06-30 17:24:43 +0200624 bpf_prog_put(prog);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700625 return 0;
626}
627
628static const struct file_operations bpf_prog_fops = {
629 .release = bpf_prog_release,
630};
631
Daniel Borkmannb2197752015-10-29 14:58:09 +0100632int bpf_prog_new_fd(struct bpf_prog *prog)
Daniel Borkmannaa797812015-10-29 14:58:06 +0100633{
634 return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
635 O_RDWR | O_CLOEXEC);
636}
637
Daniel Borkmann113214b2016-06-30 17:24:44 +0200638static struct bpf_prog *____bpf_prog_get(struct fd f)
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700639{
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700640 if (!f.file)
641 return ERR_PTR(-EBADF);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700642 if (f.file->f_op != &bpf_prog_fops) {
643 fdput(f);
644 return ERR_PTR(-EINVAL);
645 }
646
Daniel Borkmannc2101292015-10-29 14:58:07 +0100647 return f.file->private_data;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700648}
649
Brenden Blanco59d36562016-07-19 12:16:46 -0700650struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700651{
Brenden Blanco59d36562016-07-19 12:16:46 -0700652 if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
653 atomic_sub(i, &prog->aux->refcnt);
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700654 return ERR_PTR(-EBUSY);
655 }
656 return prog;
657}
Brenden Blanco59d36562016-07-19 12:16:46 -0700658EXPORT_SYMBOL_GPL(bpf_prog_add);
659
660struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
661{
662 return bpf_prog_add(prog, 1);
663}
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700664
Daniel Borkmann113214b2016-06-30 17:24:44 +0200665static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700666{
667 struct fd f = fdget(ufd);
668 struct bpf_prog *prog;
669
Daniel Borkmann113214b2016-06-30 17:24:44 +0200670 prog = ____bpf_prog_get(f);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700671 if (IS_ERR(prog))
672 return prog;
Daniel Borkmann113214b2016-06-30 17:24:44 +0200673 if (type && prog->type != *type) {
674 prog = ERR_PTR(-EINVAL);
675 goto out;
676 }
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700677
Alexei Starovoitov92117d82016-04-27 18:56:20 -0700678 prog = bpf_prog_inc(prog);
Daniel Borkmann113214b2016-06-30 17:24:44 +0200679out:
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700680 fdput(f);
681 return prog;
682}
Daniel Borkmann113214b2016-06-30 17:24:44 +0200683
684struct bpf_prog *bpf_prog_get(u32 ufd)
685{
686 return __bpf_prog_get(ufd, NULL);
687}
688
689struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
690{
691 return __bpf_prog_get(ufd, &type);
692}
693EXPORT_SYMBOL_GPL(bpf_prog_get_type);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700694
695/* last field in 'union bpf_attr' used by this command */
Alexei Starovoitov25415172015-03-25 12:49:20 -0700696#define BPF_PROG_LOAD_LAST_FIELD kern_version
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700697
698static int bpf_prog_load(union bpf_attr *attr)
699{
700 enum bpf_prog_type type = attr->prog_type;
701 struct bpf_prog *prog;
702 int err;
703 char license[128];
704 bool is_gpl;
705
706 if (CHECK_ATTR(BPF_PROG_LOAD))
707 return -EINVAL;
708
709 /* copy eBPF program license from user space */
710 if (strncpy_from_user(license, u64_to_ptr(attr->license),
711 sizeof(license) - 1) < 0)
712 return -EFAULT;
713 license[sizeof(license) - 1] = 0;
714
715 /* eBPF programs must be GPL compatible to use GPL-ed functions */
716 is_gpl = license_is_gpl_compatible(license);
717
718 if (attr->insn_cnt >= BPF_MAXINSNS)
719 return -EINVAL;
720
Alexei Starovoitov25415172015-03-25 12:49:20 -0700721 if (type == BPF_PROG_TYPE_KPROBE &&
722 attr->kern_version != LINUX_VERSION_CODE)
723 return -EINVAL;
724
Alexei Starovoitov1be7f752015-10-07 22:23:21 -0700725 if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
726 return -EPERM;
727
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700728 /* plain bpf_prog allocation */
729 prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
730 if (!prog)
731 return -ENOMEM;
732
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700733 err = bpf_prog_charge_memlock(prog);
734 if (err)
735 goto free_prog_nouncharge;
736
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700737 prog->len = attr->insn_cnt;
738
739 err = -EFAULT;
740 if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
741 prog->len * sizeof(struct bpf_insn)) != 0)
742 goto free_prog;
743
744 prog->orig_prog = NULL;
Daniel Borkmanna91263d2015-09-30 01:41:50 +0200745 prog->jited = 0;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700746
747 atomic_set(&prog->aux->refcnt, 1);
Daniel Borkmanna91263d2015-09-30 01:41:50 +0200748 prog->gpl_compatible = is_gpl ? 1 : 0;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700749
750 /* find program type: socket_filter vs tracing_filter */
751 err = find_prog_type(type, prog);
752 if (err < 0)
753 goto free_prog;
754
755 /* run eBPF verifier */
Alexei Starovoitov9bac3d62015-03-13 11:57:42 -0700756 err = bpf_check(&prog, attr);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700757 if (err < 0)
758 goto free_used_maps;
759
760 /* eBPF program is ready to be JITed */
Daniel Borkmannd1c55ab2016-05-13 19:08:31 +0200761 prog = bpf_prog_select_runtime(prog, &err);
Alexei Starovoitov04fd61a2015-05-19 16:59:03 -0700762 if (err < 0)
763 goto free_used_maps;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700764
Daniel Borkmannaa797812015-10-29 14:58:06 +0100765 err = bpf_prog_new_fd(prog);
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700766 if (err < 0)
767 /* failed to allocate fd */
768 goto free_used_maps;
769
770 return err;
771
772free_used_maps:
773 free_used_maps(prog->aux);
774free_prog:
Alexei Starovoitovaaac3ba2015-10-07 22:23:22 -0700775 bpf_prog_uncharge_memlock(prog);
776free_prog_nouncharge:
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700777 bpf_prog_free(prog);
778 return err;
779}
780
Daniel Borkmannb2197752015-10-29 14:58:09 +0100781#define BPF_OBJ_LAST_FIELD bpf_fd
782
783static int bpf_obj_pin(const union bpf_attr *attr)
784{
785 if (CHECK_ATTR(BPF_OBJ))
786 return -EINVAL;
787
788 return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
789}
790
791static int bpf_obj_get(const union bpf_attr *attr)
792{
793 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
794 return -EINVAL;
795
796 return bpf_obj_get_user(u64_to_ptr(attr->pathname));
797}
798
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700799SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
800{
801 union bpf_attr attr = {};
802 int err;
803
Chenbo Feng3eb88802018-03-19 17:57:27 -0700804 if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700805 return -EPERM;
806
807 if (!access_ok(VERIFY_READ, uattr, 1))
808 return -EFAULT;
809
810 if (size > PAGE_SIZE) /* silly large */
811 return -E2BIG;
812
813 /* If we're handed a bigger struct than we know of,
814 * ensure all the unknown bits are 0 - i.e. new
815 * user-space does not rely on any kernel feature
816 * extensions we dont know about yet.
817 */
818 if (size > sizeof(attr)) {
819 unsigned char __user *addr;
820 unsigned char __user *end;
821 unsigned char val;
822
823 addr = (void __user *)uattr + sizeof(attr);
824 end = (void __user *)uattr + size;
825
826 for (; addr < end; addr++) {
827 err = get_user(val, addr);
828 if (err)
829 return err;
830 if (val)
831 return -E2BIG;
832 }
833 size = sizeof(attr);
834 }
835
836 /* copy attributes from user space, may be less than sizeof(bpf_attr) */
837 if (copy_from_user(&attr, uattr, size) != 0)
838 return -EFAULT;
839
840 switch (cmd) {
841 case BPF_MAP_CREATE:
842 err = map_create(&attr);
843 break;
Alexei Starovoitovdb20fd22014-09-26 00:16:59 -0700844 case BPF_MAP_LOOKUP_ELEM:
845 err = map_lookup_elem(&attr);
846 break;
847 case BPF_MAP_UPDATE_ELEM:
848 err = map_update_elem(&attr);
849 break;
850 case BPF_MAP_DELETE_ELEM:
851 err = map_delete_elem(&attr);
852 break;
853 case BPF_MAP_GET_NEXT_KEY:
854 err = map_get_next_key(&attr);
855 break;
Alexei Starovoitov09756af2014-09-26 00:17:00 -0700856 case BPF_PROG_LOAD:
857 err = bpf_prog_load(&attr);
858 break;
Daniel Borkmannb2197752015-10-29 14:58:09 +0100859 case BPF_OBJ_PIN:
860 err = bpf_obj_pin(&attr);
861 break;
862 case BPF_OBJ_GET:
863 err = bpf_obj_get(&attr);
864 break;
Alexei Starovoitov99c55f72014-09-26 00:16:57 -0700865 default:
866 err = -EINVAL;
867 break;
868 }
869
870 return err;
871}