blob: f9d53ac57f640101d5a3c7bd4b8152bd63eda946 [file] [log] [blame]
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -08001/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
Alexei Starovoitov6c905982016-03-07 21:57:15 -08002 * Copyright (c) 2016 Facebook
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -08003 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/bpf.h>
14#include <linux/jhash.h>
15#include <linux/filter.h>
Alexei Starovoitov6c905982016-03-07 21:57:15 -080016#include "percpu_freelist.h"
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -080017
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +080018struct bucket {
19 struct hlist_head head;
20 raw_spinlock_t lock;
21};
22
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -080023struct bpf_htab {
24 struct bpf_map map;
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +080025 struct bucket *buckets;
Alexei Starovoitov6c905982016-03-07 21:57:15 -080026 void *elems;
27 struct pcpu_freelist freelist;
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -070028 void __percpu *extra_elems;
tom.leiming@gmail.com6591f1e2015-12-29 22:40:25 +080029 atomic_t count; /* number of elements in this hashtable */
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -080030 u32 n_buckets; /* number of hash buckets */
31 u32 elem_size; /* size of each element in bytes */
32};
33
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -070034enum extra_elem_state {
35 HTAB_NOT_AN_EXTRA_ELEM = 0,
36 HTAB_EXTRA_ELEM_FREE,
37 HTAB_EXTRA_ELEM_USED
38};
39
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -080040/* each htab element is struct htab_elem + key + value */
41struct htab_elem {
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -080042 union {
Alexei Starovoitov6c905982016-03-07 21:57:15 -080043 struct hlist_node hash_node;
Alexei Starovoitovaad9db62019-05-09 19:33:53 -070044 struct {
45 void *padding;
46 union {
47 struct bpf_htab *htab;
48 struct pcpu_freelist_node fnode;
49 };
50 };
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -080051 };
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -070052 union {
53 struct rcu_head rcu;
54 enum extra_elem_state state;
55 };
Alexei Starovoitov6c905982016-03-07 21:57:15 -080056 u32 hash;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -080057 char key[0] __aligned(8);
58};
59
Alexei Starovoitov6c905982016-03-07 21:57:15 -080060static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
61 void __percpu *pptr)
62{
63 *(void __percpu **)(l->key + key_size) = pptr;
64}
65
66static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
67{
68 return *(void __percpu **)(l->key + key_size);
69}
70
71static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
72{
73 return (struct htab_elem *) (htab->elems + i * htab->elem_size);
74}
75
76static void htab_free_elems(struct bpf_htab *htab)
77{
78 int i;
79
80 if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
81 goto free_elems;
82
83 for (i = 0; i < htab->map.max_entries; i++) {
84 void __percpu *pptr;
85
86 pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
87 htab->map.key_size);
88 free_percpu(pptr);
89 }
90free_elems:
Daniel Borkmann251d00b2017-01-18 15:14:17 +010091 bpf_map_area_free(htab->elems);
Alexei Starovoitov6c905982016-03-07 21:57:15 -080092}
93
94static int prealloc_elems_and_freelist(struct bpf_htab *htab)
95{
96 int err = -ENOMEM, i;
97
Daniel Borkmann251d00b2017-01-18 15:14:17 +010098 htab->elems = bpf_map_area_alloc(htab->elem_size *
99 htab->map.max_entries);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800100 if (!htab->elems)
101 return -ENOMEM;
102
103 if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
104 goto skip_percpu_elems;
105
106 for (i = 0; i < htab->map.max_entries; i++) {
107 u32 size = round_up(htab->map.value_size, 8);
108 void __percpu *pptr;
109
110 pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
111 if (!pptr)
112 goto free_elems;
113 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
114 pptr);
115 }
116
117skip_percpu_elems:
118 err = pcpu_freelist_init(&htab->freelist);
119 if (err)
120 goto free_elems;
121
Alexei Starovoitovaad9db62019-05-09 19:33:53 -0700122 pcpu_freelist_populate(&htab->freelist,
123 htab->elems + offsetof(struct htab_elem, fnode),
124 htab->elem_size, htab->map.max_entries);
125
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800126 return 0;
127
128free_elems:
129 htab_free_elems(htab);
130 return err;
131}
132
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700133static int alloc_extra_elems(struct bpf_htab *htab)
134{
135 void __percpu *pptr;
136 int cpu;
137
138 pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
139 if (!pptr)
140 return -ENOMEM;
141
142 for_each_possible_cpu(cpu) {
143 ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
144 HTAB_EXTRA_ELEM_FREE;
145 }
146 htab->extra_elems = pptr;
147 return 0;
148}
149
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800150/* Called from syscall */
151static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
152{
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800153 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_HASH;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800154 struct bpf_htab *htab;
155 int err, i;
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800156 u64 cost;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800157
Alexei Starovoitovaad9db62019-05-09 19:33:53 -0700158 BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
159 offsetof(struct htab_elem, hash_node.pprev));
160 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
161 offsetof(struct htab_elem, hash_node.pprev));
162
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800163 if (attr->map_flags & ~BPF_F_NO_PREALLOC)
164 /* reserved bits should not be used */
165 return ERR_PTR(-EINVAL);
166
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800167 htab = kzalloc(sizeof(*htab), GFP_USER);
168 if (!htab)
169 return ERR_PTR(-ENOMEM);
170
171 /* mandatory map attributes */
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800172 htab->map.map_type = attr->map_type;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800173 htab->map.key_size = attr->key_size;
174 htab->map.value_size = attr->value_size;
175 htab->map.max_entries = attr->max_entries;
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800176 htab->map.map_flags = attr->map_flags;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800177
178 /* check sanity of attributes.
179 * value_size == 0 may be allowed in the future to use map as a set
180 */
181 err = -EINVAL;
182 if (htab->map.max_entries == 0 || htab->map.key_size == 0 ||
183 htab->map.value_size == 0)
184 goto free_htab;
185
186 /* hash table size must be power of 2 */
187 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
188
189 err = -E2BIG;
190 if (htab->map.key_size > MAX_BPF_STACK)
191 /* eBPF programs initialize keys on stack, so they cannot be
192 * larger than max stack size
193 */
194 goto free_htab;
195
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800196 if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
197 MAX_BPF_STACK - sizeof(struct htab_elem))
198 /* if value_size is bigger, the user space won't be able to
199 * access the elements via bpf syscall. This check also makes
200 * sure that the elem_size doesn't overflow and it's
201 * kmalloc-able later in htab_map_update_elem()
202 */
203 goto free_htab;
204
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800205 if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
206 /* make sure the size for pcpu_alloc() is reasonable */
207 goto free_htab;
208
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800209 htab->elem_size = sizeof(struct htab_elem) +
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800210 round_up(htab->map.key_size, 8);
211 if (percpu)
212 htab->elem_size += sizeof(void *);
213 else
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800214 htab->elem_size += round_up(htab->map.value_size, 8);
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800215
Alexei Starovoitovdaaf4272014-11-18 17:32:16 -0800216 /* prevent zero size kmalloc and check for u32 overflow */
217 if (htab->n_buckets == 0 ||
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800218 htab->n_buckets > U32_MAX / sizeof(struct bucket))
Alexei Starovoitovdaaf4272014-11-18 17:32:16 -0800219 goto free_htab;
220
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800221 cost = (u64) htab->n_buckets * sizeof(struct bucket) +
222 (u64) htab->elem_size * htab->map.max_entries;
223
224 if (percpu)
225 cost += (u64) round_up(htab->map.value_size, 8) *
226 num_possible_cpus() * htab->map.max_entries;
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700227 else
228 cost += (u64) htab->elem_size * num_possible_cpus();
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800229
230 if (cost >= U32_MAX - PAGE_SIZE)
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800231 /* make sure page count doesn't overflow */
232 goto free_htab;
233
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800234 htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800235
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800236 /* if map size is larger than memlock limit, reject it early */
237 err = bpf_map_precharge_memlock(htab->map.pages);
238 if (err)
239 goto free_htab;
240
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800241 err = -ENOMEM;
Daniel Borkmann251d00b2017-01-18 15:14:17 +0100242 htab->buckets = bpf_map_area_alloc(htab->n_buckets *
243 sizeof(struct bucket));
244 if (!htab->buckets)
245 goto free_htab;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800246
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800247 for (i = 0; i < htab->n_buckets; i++) {
248 INIT_HLIST_HEAD(&htab->buckets[i].head);
249 raw_spin_lock_init(&htab->buckets[i].lock);
250 }
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800251
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700252 if (!percpu) {
253 err = alloc_extra_elems(htab);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800254 if (err)
255 goto free_buckets;
256 }
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800257
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700258 if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
259 err = prealloc_elems_and_freelist(htab);
260 if (err)
261 goto free_extra_elems;
262 }
263
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800264 return &htab->map;
265
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700266free_extra_elems:
267 free_percpu(htab->extra_elems);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800268free_buckets:
Daniel Borkmann251d00b2017-01-18 15:14:17 +0100269 bpf_map_area_free(htab->buckets);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800270free_htab:
271 kfree(htab);
272 return ERR_PTR(err);
273}
274
275static inline u32 htab_map_hash(const void *key, u32 key_len)
276{
277 return jhash(key, key_len, 0);
278}
279
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800280static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800281{
282 return &htab->buckets[hash & (htab->n_buckets - 1)];
283}
284
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800285static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
286{
287 return &__select_bucket(htab, hash)->head;
288}
289
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800290static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
291 void *key, u32 key_size)
292{
293 struct htab_elem *l;
294
295 hlist_for_each_entry_rcu(l, head, hash_node)
296 if (l->hash == hash && !memcmp(&l->key, key, key_size))
297 return l;
298
299 return NULL;
300}
301
302/* Called from syscall or from eBPF program */
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800303static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800304{
305 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
306 struct hlist_head *head;
307 struct htab_elem *l;
308 u32 hash, key_size;
309
310 /* Must be called with rcu_read_lock. */
311 WARN_ON_ONCE(!rcu_read_lock_held());
312
313 key_size = map->key_size;
314
315 hash = htab_map_hash(key, key_size);
316
317 head = select_bucket(htab, hash);
318
319 l = lookup_elem_raw(head, hash, key, key_size);
320
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800321 return l;
322}
323
324static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
325{
326 struct htab_elem *l = __htab_map_lookup_elem(map, key);
327
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800328 if (l)
329 return l->key + round_up(map->key_size, 8);
330
331 return NULL;
332}
333
334/* Called from syscall */
335static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
336{
337 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
338 struct hlist_head *head;
339 struct htab_elem *l, *next_l;
340 u32 hash, key_size;
Teng Qinfcbc8d02017-04-24 19:00:37 -0700341 int i = 0;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800342
343 WARN_ON_ONCE(!rcu_read_lock_held());
344
345 key_size = map->key_size;
346
Teng Qinfcbc8d02017-04-24 19:00:37 -0700347 if (!key)
348 goto find_first_elem;
349
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800350 hash = htab_map_hash(key, key_size);
351
352 head = select_bucket(htab, hash);
353
354 /* lookup the key */
355 l = lookup_elem_raw(head, hash, key, key_size);
356
Teng Qinfcbc8d02017-04-24 19:00:37 -0700357 if (!l)
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800358 goto find_first_elem;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800359
360 /* key was found, get next key in the same bucket */
361 next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
362 struct htab_elem, hash_node);
363
364 if (next_l) {
365 /* if next elem in this hash list is non-zero, just return it */
366 memcpy(next_key, next_l->key, key_size);
367 return 0;
368 }
369
370 /* no more elements in this hash list, go to the next bucket */
371 i = hash & (htab->n_buckets - 1);
372 i++;
373
374find_first_elem:
375 /* iterate over buckets */
376 for (; i < htab->n_buckets; i++) {
377 head = select_bucket(htab, i);
378
379 /* pick first element in the bucket */
380 next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
381 struct htab_elem, hash_node);
382 if (next_l) {
383 /* if it's not empty, just return it */
384 memcpy(next_key, next_l->key, key_size);
385 return 0;
386 }
387 }
388
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800389 /* iterated over all buckets and all elements */
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800390 return -ENOENT;
391}
392
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800393static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800394{
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800395 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
396 free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800397 kfree(l);
398}
399
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800400static void htab_elem_free_rcu(struct rcu_head *head)
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800401{
402 struct htab_elem *l = container_of(head, struct htab_elem, rcu);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800403 struct bpf_htab *htab = l->htab;
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800404
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800405 /* must increment bpf_prog_active to avoid kprobe+bpf triggering while
406 * we're calling kfree, otherwise deadlock is possible if kprobes
407 * are placed somewhere inside of slub
408 */
409 preempt_disable();
410 __this_cpu_inc(bpf_prog_active);
411 htab_elem_free(htab, l);
412 __this_cpu_dec(bpf_prog_active);
413 preempt_enable();
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800414}
415
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800416static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800417{
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700418 if (l->state == HTAB_EXTRA_ELEM_USED) {
419 l->state = HTAB_EXTRA_ELEM_FREE;
420 return;
421 }
422
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800423 if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
424 pcpu_freelist_push(&htab->freelist, &l->fnode);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800425 } else {
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800426 atomic_dec(&htab->count);
427 l->htab = htab;
428 call_rcu(&l->rcu, htab_elem_free_rcu);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800429 }
430}
431
432static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
433 void *value, u32 key_size, u32 hash,
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700434 bool percpu, bool onallcpus,
435 bool old_elem_exists)
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800436{
437 u32 size = htab->map.value_size;
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800438 bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800439 struct htab_elem *l_new;
440 void __percpu *pptr;
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700441 int err = 0;
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800442
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800443 if (prealloc) {
Alexei Starovoitovaad9db62019-05-09 19:33:53 -0700444 struct pcpu_freelist_node *l;
445
446 l = pcpu_freelist_pop(&htab->freelist);
447 if (!l)
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700448 err = -E2BIG;
Alexei Starovoitovaad9db62019-05-09 19:33:53 -0700449 else
450 l_new = container_of(l, struct htab_elem, fnode);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800451 } else {
452 if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
453 atomic_dec(&htab->count);
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700454 err = -E2BIG;
455 } else {
456 l_new = kmalloc(htab->elem_size,
457 GFP_ATOMIC | __GFP_NOWARN);
458 if (!l_new)
459 return ERR_PTR(-ENOMEM);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800460 }
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700461 }
462
463 if (err) {
464 if (!old_elem_exists)
465 return ERR_PTR(err);
466
467 /* if we're updating the existing element and the hash table
468 * is full, use per-cpu extra elems
469 */
470 l_new = this_cpu_ptr(htab->extra_elems);
471 if (l_new->state != HTAB_EXTRA_ELEM_FREE)
472 return ERR_PTR(-E2BIG);
473 l_new->state = HTAB_EXTRA_ELEM_USED;
474 } else {
475 l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800476 }
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800477
478 memcpy(l_new->key, key, key_size);
479 if (percpu) {
480 /* round up value_size to 8 bytes */
481 size = round_up(size, 8);
482
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800483 if (prealloc) {
484 pptr = htab_elem_get_ptr(l_new, key_size);
485 } else {
486 /* alloc_percpu zero-fills */
487 pptr = __alloc_percpu_gfp(size, 8,
488 GFP_ATOMIC | __GFP_NOWARN);
489 if (!pptr) {
490 kfree(l_new);
491 return ERR_PTR(-ENOMEM);
492 }
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800493 }
494
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800495 if (!onallcpus) {
496 /* copy true value_size bytes */
497 memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
498 } else {
499 int off = 0, cpu;
500
501 for_each_possible_cpu(cpu) {
502 bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
503 value + off, size);
504 off += size;
505 }
506 }
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800507 if (!prealloc)
508 htab_elem_set_ptr(l_new, key_size, pptr);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800509 } else {
510 memcpy(l_new->key + round_up(key_size, 8), value, size);
511 }
512
513 l_new->hash = hash;
514 return l_new;
515}
516
517static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
518 u64 map_flags)
519{
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800520 if (l_old && map_flags == BPF_NOEXIST)
521 /* elem already exists */
522 return -EEXIST;
523
524 if (!l_old && map_flags == BPF_EXIST)
525 /* elem doesn't exist, cannot update it */
526 return -ENOENT;
527
528 return 0;
529}
530
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800531/* Called from syscall or from eBPF program */
532static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
533 u64 map_flags)
534{
535 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800536 struct htab_elem *l_new = NULL, *l_old;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800537 struct hlist_head *head;
538 unsigned long flags;
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800539 struct bucket *b;
540 u32 key_size, hash;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800541 int ret;
542
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800543 if (unlikely(map_flags > BPF_EXIST))
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800544 /* unknown flags */
545 return -EINVAL;
546
547 WARN_ON_ONCE(!rcu_read_lock_held());
548
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800549 key_size = map->key_size;
550
551 hash = htab_map_hash(key, key_size);
552
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800553 b = __select_bucket(htab, hash);
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800554 head = &b->head;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800555
556 /* bpf_map_update_elem() can be called in_irq() */
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800557 raw_spin_lock_irqsave(&b->lock, flags);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800558
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800559 l_old = lookup_elem_raw(head, hash, key, key_size);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800560
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800561 ret = check_flags(htab, l_old, map_flags);
562 if (ret)
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800563 goto err;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800564
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700565 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
566 !!l_old);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800567 if (IS_ERR(l_new)) {
568 /* all pre-allocated elements are in use or memory exhausted */
569 ret = PTR_ERR(l_new);
570 goto err;
571 }
572
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800573 /* add new element to the head of the list, so that
574 * concurrent search will find it before old elem
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800575 */
576 hlist_add_head_rcu(&l_new->hash_node, head);
577 if (l_old) {
578 hlist_del_rcu(&l_old->hash_node);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800579 free_htab_elem(htab, l_old);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800580 }
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800581 ret = 0;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800582err:
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800583 raw_spin_unlock_irqrestore(&b->lock, flags);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800584 return ret;
585}
586
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800587static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
588 void *value, u64 map_flags,
589 bool onallcpus)
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800590{
591 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
592 struct htab_elem *l_new = NULL, *l_old;
593 struct hlist_head *head;
594 unsigned long flags;
595 struct bucket *b;
596 u32 key_size, hash;
597 int ret;
598
599 if (unlikely(map_flags > BPF_EXIST))
600 /* unknown flags */
601 return -EINVAL;
602
603 WARN_ON_ONCE(!rcu_read_lock_held());
604
605 key_size = map->key_size;
606
607 hash = htab_map_hash(key, key_size);
608
609 b = __select_bucket(htab, hash);
610 head = &b->head;
611
612 /* bpf_map_update_elem() can be called in_irq() */
613 raw_spin_lock_irqsave(&b->lock, flags);
614
615 l_old = lookup_elem_raw(head, hash, key, key_size);
616
617 ret = check_flags(htab, l_old, map_flags);
618 if (ret)
619 goto err;
620
621 if (l_old) {
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800622 void __percpu *pptr = htab_elem_get_ptr(l_old, key_size);
623 u32 size = htab->map.value_size;
624
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800625 /* per-cpu hash map can update value in-place */
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800626 if (!onallcpus) {
627 memcpy(this_cpu_ptr(pptr), value, size);
628 } else {
629 int off = 0, cpu;
630
631 size = round_up(size, 8);
632 for_each_possible_cpu(cpu) {
633 bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
634 value + off, size);
635 off += size;
636 }
637 }
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800638 } else {
639 l_new = alloc_htab_elem(htab, key, value, key_size,
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700640 hash, true, onallcpus, false);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800641 if (IS_ERR(l_new)) {
642 ret = PTR_ERR(l_new);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800643 goto err;
644 }
645 hlist_add_head_rcu(&l_new->hash_node, head);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800646 }
647 ret = 0;
648err:
649 raw_spin_unlock_irqrestore(&b->lock, flags);
650 return ret;
651}
652
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800653static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
654 void *value, u64 map_flags)
655{
656 return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
657}
658
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800659/* Called from syscall or from eBPF program */
660static int htab_map_delete_elem(struct bpf_map *map, void *key)
661{
662 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
663 struct hlist_head *head;
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800664 struct bucket *b;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800665 struct htab_elem *l;
666 unsigned long flags;
667 u32 hash, key_size;
668 int ret = -ENOENT;
669
670 WARN_ON_ONCE(!rcu_read_lock_held());
671
672 key_size = map->key_size;
673
674 hash = htab_map_hash(key, key_size);
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800675 b = __select_bucket(htab, hash);
676 head = &b->head;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800677
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800678 raw_spin_lock_irqsave(&b->lock, flags);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800679
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800680 l = lookup_elem_raw(head, hash, key, key_size);
681
682 if (l) {
683 hlist_del_rcu(&l->hash_node);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800684 free_htab_elem(htab, l);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800685 ret = 0;
686 }
687
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800688 raw_spin_unlock_irqrestore(&b->lock, flags);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800689 return ret;
690}
691
692static void delete_all_elements(struct bpf_htab *htab)
693{
694 int i;
695
696 for (i = 0; i < htab->n_buckets; i++) {
697 struct hlist_head *head = select_bucket(htab, i);
698 struct hlist_node *n;
699 struct htab_elem *l;
700
701 hlist_for_each_entry_safe(l, n, head, hash_node) {
702 hlist_del_rcu(&l->hash_node);
Daniel Borkmann483bed22016-11-04 00:01:19 +0100703 if (l->state != HTAB_EXTRA_ELEM_USED)
704 htab_elem_free(htab, l);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800705 }
706 }
707}
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800708/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
709static void htab_map_free(struct bpf_map *map)
710{
711 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
712
713 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
714 * so the programs (can be more than one that used this map) were
715 * disconnected from events. Wait for outstanding critical sections in
716 * these programs to complete
717 */
718 synchronize_rcu();
719
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800720 /* some of free_htab_elem() callbacks for elements of this map may
721 * not have executed. Wait for them.
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800722 */
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800723 rcu_barrier();
724 if (htab->map.map_flags & BPF_F_NO_PREALLOC) {
725 delete_all_elements(htab);
726 } else {
727 htab_free_elems(htab);
728 pcpu_freelist_destroy(&htab->freelist);
729 }
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700730 free_percpu(htab->extra_elems);
Daniel Borkmann251d00b2017-01-18 15:14:17 +0100731 bpf_map_area_free(htab->buckets);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800732 kfree(htab);
733}
734
Daniel Borkmanna2c83ff2015-03-01 12:31:42 +0100735static const struct bpf_map_ops htab_ops = {
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800736 .map_alloc = htab_map_alloc,
737 .map_free = htab_map_free,
738 .map_get_next_key = htab_map_get_next_key,
739 .map_lookup_elem = htab_map_lookup_elem,
740 .map_update_elem = htab_map_update_elem,
741 .map_delete_elem = htab_map_delete_elem,
742};
743
Daniel Borkmanna2c83ff2015-03-01 12:31:42 +0100744static struct bpf_map_type_list htab_type __read_mostly = {
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800745 .ops = &htab_ops,
746 .type = BPF_MAP_TYPE_HASH,
747};
748
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800749/* Called from eBPF program */
750static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
751{
752 struct htab_elem *l = __htab_map_lookup_elem(map, key);
753
754 if (l)
755 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
756 else
757 return NULL;
758}
759
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800760int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
761{
762 struct htab_elem *l;
763 void __percpu *pptr;
764 int ret = -ENOENT;
765 int cpu, off = 0;
766 u32 size;
767
768 /* per_cpu areas are zero-filled and bpf programs can only
769 * access 'value_size' of them, so copying rounded areas
770 * will not leak any kernel data
771 */
772 size = round_up(map->value_size, 8);
773 rcu_read_lock();
774 l = __htab_map_lookup_elem(map, key);
775 if (!l)
776 goto out;
777 pptr = htab_elem_get_ptr(l, map->key_size);
778 for_each_possible_cpu(cpu) {
779 bpf_long_memcpy(value + off,
780 per_cpu_ptr(pptr, cpu), size);
781 off += size;
782 }
783 ret = 0;
784out:
785 rcu_read_unlock();
786 return ret;
787}
788
789int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
790 u64 map_flags)
791{
Sasha Levin6bbd9a02016-02-19 13:53:10 -0500792 int ret;
793
794 rcu_read_lock();
795 ret = __htab_percpu_map_update_elem(map, key, value, map_flags, true);
796 rcu_read_unlock();
797
798 return ret;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800799}
800
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800801static const struct bpf_map_ops htab_percpu_ops = {
802 .map_alloc = htab_map_alloc,
803 .map_free = htab_map_free,
804 .map_get_next_key = htab_map_get_next_key,
805 .map_lookup_elem = htab_percpu_map_lookup_elem,
806 .map_update_elem = htab_percpu_map_update_elem,
807 .map_delete_elem = htab_map_delete_elem,
808};
809
810static struct bpf_map_type_list htab_percpu_type __read_mostly = {
811 .ops = &htab_percpu_ops,
812 .type = BPF_MAP_TYPE_PERCPU_HASH,
813};
814
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800815static int __init register_htab_map(void)
816{
Daniel Borkmanna2c83ff2015-03-01 12:31:42 +0100817 bpf_register_map_type(&htab_type);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800818 bpf_register_map_type(&htab_percpu_type);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800819 return 0;
820}
821late_initcall(register_htab_map);