blob: b6e2bfd3491ebfc5d03e839e57128ec640f87b0a [file] [log] [blame]
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -08001/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
Alexei Starovoitov6c905982016-03-07 21:57:15 -08002 * Copyright (c) 2016 Facebook
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -08003 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/bpf.h>
14#include <linux/jhash.h>
15#include <linux/filter.h>
Alexei Starovoitov82303dd2019-05-09 19:33:54 -070016#include <linux/rculist_nulls.h>
Alexei Starovoitov6c905982016-03-07 21:57:15 -080017#include "percpu_freelist.h"
Chenbo Feng4672ded2017-10-18 13:00:22 -070018#define HTAB_CREATE_FLAG_MASK \
19 (BPF_F_NO_PREALLOC | BPF_F_RDONLY | BPF_F_WRONLY)
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -080020
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +080021struct bucket {
Alexei Starovoitov82303dd2019-05-09 19:33:54 -070022 struct hlist_nulls_head head;
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +080023 raw_spinlock_t lock;
24};
25
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -080026struct bpf_htab {
27 struct bpf_map map;
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +080028 struct bucket *buckets;
Alexei Starovoitov6c905982016-03-07 21:57:15 -080029 void *elems;
30 struct pcpu_freelist freelist;
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -070031 void __percpu *extra_elems;
tom.leiming@gmail.com6591f1e2015-12-29 22:40:25 +080032 atomic_t count; /* number of elements in this hashtable */
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -080033 u32 n_buckets; /* number of hash buckets */
34 u32 elem_size; /* size of each element in bytes */
35};
36
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -070037enum extra_elem_state {
38 HTAB_NOT_AN_EXTRA_ELEM = 0,
39 HTAB_EXTRA_ELEM_FREE,
40 HTAB_EXTRA_ELEM_USED
41};
42
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -080043/* each htab element is struct htab_elem + key + value */
44struct htab_elem {
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -080045 union {
Alexei Starovoitov82303dd2019-05-09 19:33:54 -070046 struct hlist_nulls_node hash_node;
Alexei Starovoitovaad9db62019-05-09 19:33:53 -070047 struct {
48 void *padding;
49 union {
50 struct bpf_htab *htab;
51 struct pcpu_freelist_node fnode;
52 };
53 };
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -080054 };
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -070055 union {
56 struct rcu_head rcu;
57 enum extra_elem_state state;
58 };
Alexei Starovoitov6c905982016-03-07 21:57:15 -080059 u32 hash;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -080060 char key[0] __aligned(8);
61};
62
Alexei Starovoitov6c905982016-03-07 21:57:15 -080063static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
64 void __percpu *pptr)
65{
66 *(void __percpu **)(l->key + key_size) = pptr;
67}
68
69static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
70{
71 return *(void __percpu **)(l->key + key_size);
72}
73
74static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
75{
76 return (struct htab_elem *) (htab->elems + i * htab->elem_size);
77}
78
79static void htab_free_elems(struct bpf_htab *htab)
80{
81 int i;
82
83 if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
84 goto free_elems;
85
86 for (i = 0; i < htab->map.max_entries; i++) {
87 void __percpu *pptr;
88
89 pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
90 htab->map.key_size);
91 free_percpu(pptr);
92 }
93free_elems:
Daniel Borkmann251d00b2017-01-18 15:14:17 +010094 bpf_map_area_free(htab->elems);
Alexei Starovoitov6c905982016-03-07 21:57:15 -080095}
96
97static int prealloc_elems_and_freelist(struct bpf_htab *htab)
98{
99 int err = -ENOMEM, i;
100
Daniel Borkmann251d00b2017-01-18 15:14:17 +0100101 htab->elems = bpf_map_area_alloc(htab->elem_size *
102 htab->map.max_entries);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800103 if (!htab->elems)
104 return -ENOMEM;
105
106 if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
107 goto skip_percpu_elems;
108
109 for (i = 0; i < htab->map.max_entries; i++) {
110 u32 size = round_up(htab->map.value_size, 8);
111 void __percpu *pptr;
112
113 pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
114 if (!pptr)
115 goto free_elems;
116 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
117 pptr);
118 }
119
120skip_percpu_elems:
121 err = pcpu_freelist_init(&htab->freelist);
122 if (err)
123 goto free_elems;
124
Alexei Starovoitovaad9db62019-05-09 19:33:53 -0700125 pcpu_freelist_populate(&htab->freelist,
126 htab->elems + offsetof(struct htab_elem, fnode),
127 htab->elem_size, htab->map.max_entries);
128
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800129 return 0;
130
131free_elems:
132 htab_free_elems(htab);
133 return err;
134}
135
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700136static int alloc_extra_elems(struct bpf_htab *htab)
137{
138 void __percpu *pptr;
139 int cpu;
140
141 pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
142 if (!pptr)
143 return -ENOMEM;
144
145 for_each_possible_cpu(cpu) {
146 ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
147 HTAB_EXTRA_ELEM_FREE;
148 }
149 htab->extra_elems = pptr;
150 return 0;
151}
152
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800153/* Called from syscall */
154static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
155{
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800156 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_HASH;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800157 struct bpf_htab *htab;
158 int err, i;
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800159 u64 cost;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800160
Alexei Starovoitovaad9db62019-05-09 19:33:53 -0700161 BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
162 offsetof(struct htab_elem, hash_node.pprev));
163 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
164 offsetof(struct htab_elem, hash_node.pprev));
165
Chenbo Feng4672ded2017-10-18 13:00:22 -0700166 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK)
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800167 /* reserved bits should not be used */
168 return ERR_PTR(-EINVAL);
169
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800170 htab = kzalloc(sizeof(*htab), GFP_USER);
171 if (!htab)
172 return ERR_PTR(-ENOMEM);
173
174 /* mandatory map attributes */
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800175 htab->map.map_type = attr->map_type;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800176 htab->map.key_size = attr->key_size;
177 htab->map.value_size = attr->value_size;
178 htab->map.max_entries = attr->max_entries;
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800179 htab->map.map_flags = attr->map_flags;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800180
181 /* check sanity of attributes.
182 * value_size == 0 may be allowed in the future to use map as a set
183 */
184 err = -EINVAL;
185 if (htab->map.max_entries == 0 || htab->map.key_size == 0 ||
186 htab->map.value_size == 0)
187 goto free_htab;
188
189 /* hash table size must be power of 2 */
190 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
191
192 err = -E2BIG;
193 if (htab->map.key_size > MAX_BPF_STACK)
194 /* eBPF programs initialize keys on stack, so they cannot be
195 * larger than max stack size
196 */
197 goto free_htab;
198
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800199 if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
200 MAX_BPF_STACK - sizeof(struct htab_elem))
201 /* if value_size is bigger, the user space won't be able to
202 * access the elements via bpf syscall. This check also makes
203 * sure that the elem_size doesn't overflow and it's
204 * kmalloc-able later in htab_map_update_elem()
205 */
206 goto free_htab;
207
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800208 if (percpu && round_up(htab->map.value_size, 8) > PCPU_MIN_UNIT_SIZE)
209 /* make sure the size for pcpu_alloc() is reasonable */
210 goto free_htab;
211
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800212 htab->elem_size = sizeof(struct htab_elem) +
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800213 round_up(htab->map.key_size, 8);
214 if (percpu)
215 htab->elem_size += sizeof(void *);
216 else
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800217 htab->elem_size += round_up(htab->map.value_size, 8);
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800218
Alexei Starovoitovdaaf4272014-11-18 17:32:16 -0800219 /* prevent zero size kmalloc and check for u32 overflow */
220 if (htab->n_buckets == 0 ||
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800221 htab->n_buckets > U32_MAX / sizeof(struct bucket))
Alexei Starovoitovdaaf4272014-11-18 17:32:16 -0800222 goto free_htab;
223
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800224 cost = (u64) htab->n_buckets * sizeof(struct bucket) +
225 (u64) htab->elem_size * htab->map.max_entries;
226
227 if (percpu)
228 cost += (u64) round_up(htab->map.value_size, 8) *
229 num_possible_cpus() * htab->map.max_entries;
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700230 else
231 cost += (u64) htab->elem_size * num_possible_cpus();
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800232
233 if (cost >= U32_MAX - PAGE_SIZE)
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800234 /* make sure page count doesn't overflow */
235 goto free_htab;
236
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800237 htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800238
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800239 /* if map size is larger than memlock limit, reject it early */
240 err = bpf_map_precharge_memlock(htab->map.pages);
241 if (err)
242 goto free_htab;
243
Alexei Starovoitov01b3f522015-11-29 16:59:35 -0800244 err = -ENOMEM;
Daniel Borkmann251d00b2017-01-18 15:14:17 +0100245 htab->buckets = bpf_map_area_alloc(htab->n_buckets *
246 sizeof(struct bucket));
247 if (!htab->buckets)
248 goto free_htab;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800249
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800250 for (i = 0; i < htab->n_buckets; i++) {
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700251 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800252 raw_spin_lock_init(&htab->buckets[i].lock);
253 }
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800254
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700255 if (!percpu) {
256 err = alloc_extra_elems(htab);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800257 if (err)
258 goto free_buckets;
259 }
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800260
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700261 if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
262 err = prealloc_elems_and_freelist(htab);
263 if (err)
264 goto free_extra_elems;
265 }
266
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800267 return &htab->map;
268
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700269free_extra_elems:
270 free_percpu(htab->extra_elems);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800271free_buckets:
Daniel Borkmann251d00b2017-01-18 15:14:17 +0100272 bpf_map_area_free(htab->buckets);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800273free_htab:
274 kfree(htab);
275 return ERR_PTR(err);
276}
277
278static inline u32 htab_map_hash(const void *key, u32 key_len)
279{
280 return jhash(key, key_len, 0);
281}
282
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800283static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800284{
285 return &htab->buckets[hash & (htab->n_buckets - 1)];
286}
287
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700288static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800289{
290 return &__select_bucket(htab, hash)->head;
291}
292
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700293/* this lookup function can only be called with bucket lock taken */
294static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800295 void *key, u32 key_size)
296{
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700297 struct hlist_nulls_node *n;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800298 struct htab_elem *l;
299
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700300 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800301 if (l->hash == hash && !memcmp(&l->key, key, key_size))
302 return l;
303
304 return NULL;
305}
306
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700307/* can be called without bucket lock. it will repeat the loop in
308 * the unlikely event when elements moved from one bucket into another
309 * while link list is being walked
310 */
311static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
312 u32 hash, void *key,
313 u32 key_size, u32 n_buckets)
314{
315 struct hlist_nulls_node *n;
316 struct htab_elem *l;
317
318again:
319 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
320 if (l->hash == hash && !memcmp(&l->key, key, key_size))
321 return l;
322
323 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
324 goto again;
325
326 return NULL;
327}
328
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800329/* Called from syscall or from eBPF program */
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800330static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800331{
332 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700333 struct hlist_nulls_head *head;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800334 struct htab_elem *l;
335 u32 hash, key_size;
336
337 /* Must be called with rcu_read_lock. */
338 WARN_ON_ONCE(!rcu_read_lock_held());
339
340 key_size = map->key_size;
341
342 hash = htab_map_hash(key, key_size);
343
344 head = select_bucket(htab, hash);
345
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700346 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800347
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800348 return l;
349}
350
351static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
352{
353 struct htab_elem *l = __htab_map_lookup_elem(map, key);
354
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800355 if (l)
356 return l->key + round_up(map->key_size, 8);
357
358 return NULL;
359}
360
361/* Called from syscall */
362static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
363{
364 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700365 struct hlist_nulls_head *head;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800366 struct htab_elem *l, *next_l;
367 u32 hash, key_size;
Teng Qinfcbc8d02017-04-24 19:00:37 -0700368 int i = 0;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800369
370 WARN_ON_ONCE(!rcu_read_lock_held());
371
372 key_size = map->key_size;
373
Teng Qinfcbc8d02017-04-24 19:00:37 -0700374 if (!key)
375 goto find_first_elem;
376
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800377 hash = htab_map_hash(key, key_size);
378
379 head = select_bucket(htab, hash);
380
381 /* lookup the key */
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700382 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800383
Teng Qinfcbc8d02017-04-24 19:00:37 -0700384 if (!l)
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800385 goto find_first_elem;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800386
387 /* key was found, get next key in the same bucket */
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700388 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800389 struct htab_elem, hash_node);
390
391 if (next_l) {
392 /* if next elem in this hash list is non-zero, just return it */
393 memcpy(next_key, next_l->key, key_size);
394 return 0;
395 }
396
397 /* no more elements in this hash list, go to the next bucket */
398 i = hash & (htab->n_buckets - 1);
399 i++;
400
401find_first_elem:
402 /* iterate over buckets */
403 for (; i < htab->n_buckets; i++) {
404 head = select_bucket(htab, i);
405
406 /* pick first element in the bucket */
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700407 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800408 struct htab_elem, hash_node);
409 if (next_l) {
410 /* if it's not empty, just return it */
411 memcpy(next_key, next_l->key, key_size);
412 return 0;
413 }
414 }
415
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800416 /* iterated over all buckets and all elements */
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800417 return -ENOENT;
418}
419
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800420static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800421{
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800422 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
423 free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800424 kfree(l);
425}
426
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800427static void htab_elem_free_rcu(struct rcu_head *head)
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800428{
429 struct htab_elem *l = container_of(head, struct htab_elem, rcu);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800430 struct bpf_htab *htab = l->htab;
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800431
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800432 /* must increment bpf_prog_active to avoid kprobe+bpf triggering while
433 * we're calling kfree, otherwise deadlock is possible if kprobes
434 * are placed somewhere inside of slub
435 */
436 preempt_disable();
437 __this_cpu_inc(bpf_prog_active);
438 htab_elem_free(htab, l);
439 __this_cpu_dec(bpf_prog_active);
440 preempt_enable();
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800441}
442
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800443static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800444{
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700445 if (l->state == HTAB_EXTRA_ELEM_USED) {
446 l->state = HTAB_EXTRA_ELEM_FREE;
447 return;
448 }
449
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800450 if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
451 pcpu_freelist_push(&htab->freelist, &l->fnode);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800452 } else {
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800453 atomic_dec(&htab->count);
454 l->htab = htab;
455 call_rcu(&l->rcu, htab_elem_free_rcu);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800456 }
457}
458
459static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
460 void *value, u32 key_size, u32 hash,
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700461 bool percpu, bool onallcpus,
462 bool old_elem_exists)
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800463{
464 u32 size = htab->map.value_size;
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800465 bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800466 struct htab_elem *l_new;
467 void __percpu *pptr;
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700468 int err = 0;
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800469
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800470 if (prealloc) {
Alexei Starovoitovaad9db62019-05-09 19:33:53 -0700471 struct pcpu_freelist_node *l;
472
473 l = pcpu_freelist_pop(&htab->freelist);
474 if (!l)
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700475 err = -E2BIG;
Alexei Starovoitovaad9db62019-05-09 19:33:53 -0700476 else
477 l_new = container_of(l, struct htab_elem, fnode);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800478 } else {
479 if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
480 atomic_dec(&htab->count);
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700481 err = -E2BIG;
482 } else {
483 l_new = kmalloc(htab->elem_size,
484 GFP_ATOMIC | __GFP_NOWARN);
485 if (!l_new)
486 return ERR_PTR(-ENOMEM);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800487 }
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700488 }
489
490 if (err) {
491 if (!old_elem_exists)
492 return ERR_PTR(err);
493
494 /* if we're updating the existing element and the hash table
495 * is full, use per-cpu extra elems
496 */
497 l_new = this_cpu_ptr(htab->extra_elems);
498 if (l_new->state != HTAB_EXTRA_ELEM_FREE)
499 return ERR_PTR(-E2BIG);
500 l_new->state = HTAB_EXTRA_ELEM_USED;
501 } else {
502 l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800503 }
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800504
505 memcpy(l_new->key, key, key_size);
506 if (percpu) {
507 /* round up value_size to 8 bytes */
508 size = round_up(size, 8);
509
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800510 if (prealloc) {
511 pptr = htab_elem_get_ptr(l_new, key_size);
512 } else {
513 /* alloc_percpu zero-fills */
514 pptr = __alloc_percpu_gfp(size, 8,
515 GFP_ATOMIC | __GFP_NOWARN);
516 if (!pptr) {
517 kfree(l_new);
518 return ERR_PTR(-ENOMEM);
519 }
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800520 }
521
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800522 if (!onallcpus) {
523 /* copy true value_size bytes */
524 memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
525 } else {
526 int off = 0, cpu;
527
528 for_each_possible_cpu(cpu) {
529 bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
530 value + off, size);
531 off += size;
532 }
533 }
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800534 if (!prealloc)
535 htab_elem_set_ptr(l_new, key_size, pptr);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800536 } else {
537 memcpy(l_new->key + round_up(key_size, 8), value, size);
538 }
539
540 l_new->hash = hash;
541 return l_new;
542}
543
544static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
545 u64 map_flags)
546{
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800547 if (l_old && map_flags == BPF_NOEXIST)
548 /* elem already exists */
549 return -EEXIST;
550
551 if (!l_old && map_flags == BPF_EXIST)
552 /* elem doesn't exist, cannot update it */
553 return -ENOENT;
554
555 return 0;
556}
557
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800558/* Called from syscall or from eBPF program */
559static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
560 u64 map_flags)
561{
562 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800563 struct htab_elem *l_new = NULL, *l_old;
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700564 struct hlist_nulls_head *head;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800565 unsigned long flags;
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800566 struct bucket *b;
567 u32 key_size, hash;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800568 int ret;
569
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800570 if (unlikely(map_flags > BPF_EXIST))
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800571 /* unknown flags */
572 return -EINVAL;
573
574 WARN_ON_ONCE(!rcu_read_lock_held());
575
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800576 key_size = map->key_size;
577
578 hash = htab_map_hash(key, key_size);
579
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800580 b = __select_bucket(htab, hash);
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800581 head = &b->head;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800582
583 /* bpf_map_update_elem() can be called in_irq() */
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800584 raw_spin_lock_irqsave(&b->lock, flags);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800585
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800586 l_old = lookup_elem_raw(head, hash, key, key_size);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800587
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800588 ret = check_flags(htab, l_old, map_flags);
589 if (ret)
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800590 goto err;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800591
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700592 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
593 !!l_old);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800594 if (IS_ERR(l_new)) {
595 /* all pre-allocated elements are in use or memory exhausted */
596 ret = PTR_ERR(l_new);
597 goto err;
598 }
599
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800600 /* add new element to the head of the list, so that
601 * concurrent search will find it before old elem
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800602 */
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700603 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800604 if (l_old) {
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700605 hlist_nulls_del_rcu(&l_old->hash_node);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800606 free_htab_elem(htab, l_old);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800607 }
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800608 ret = 0;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800609err:
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800610 raw_spin_unlock_irqrestore(&b->lock, flags);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800611 return ret;
612}
613
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800614static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
615 void *value, u64 map_flags,
616 bool onallcpus)
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800617{
618 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
619 struct htab_elem *l_new = NULL, *l_old;
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700620 struct hlist_nulls_head *head;
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800621 unsigned long flags;
622 struct bucket *b;
623 u32 key_size, hash;
624 int ret;
625
626 if (unlikely(map_flags > BPF_EXIST))
627 /* unknown flags */
628 return -EINVAL;
629
630 WARN_ON_ONCE(!rcu_read_lock_held());
631
632 key_size = map->key_size;
633
634 hash = htab_map_hash(key, key_size);
635
636 b = __select_bucket(htab, hash);
637 head = &b->head;
638
639 /* bpf_map_update_elem() can be called in_irq() */
640 raw_spin_lock_irqsave(&b->lock, flags);
641
642 l_old = lookup_elem_raw(head, hash, key, key_size);
643
644 ret = check_flags(htab, l_old, map_flags);
645 if (ret)
646 goto err;
647
648 if (l_old) {
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800649 void __percpu *pptr = htab_elem_get_ptr(l_old, key_size);
650 u32 size = htab->map.value_size;
651
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800652 /* per-cpu hash map can update value in-place */
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800653 if (!onallcpus) {
654 memcpy(this_cpu_ptr(pptr), value, size);
655 } else {
656 int off = 0, cpu;
657
658 size = round_up(size, 8);
659 for_each_possible_cpu(cpu) {
660 bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
661 value + off, size);
662 off += size;
663 }
664 }
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800665 } else {
666 l_new = alloc_htab_elem(htab, key, value, key_size,
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700667 hash, true, onallcpus, false);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800668 if (IS_ERR(l_new)) {
669 ret = PTR_ERR(l_new);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800670 goto err;
671 }
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700672 hlist_nulls_add_head_rcu(&l_new->hash_node, head);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800673 }
674 ret = 0;
675err:
676 raw_spin_unlock_irqrestore(&b->lock, flags);
677 return ret;
678}
679
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800680static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
681 void *value, u64 map_flags)
682{
683 return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
684}
685
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800686/* Called from syscall or from eBPF program */
687static int htab_map_delete_elem(struct bpf_map *map, void *key)
688{
689 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700690 struct hlist_nulls_head *head;
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800691 struct bucket *b;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800692 struct htab_elem *l;
693 unsigned long flags;
694 u32 hash, key_size;
695 int ret = -ENOENT;
696
697 WARN_ON_ONCE(!rcu_read_lock_held());
698
699 key_size = map->key_size;
700
701 hash = htab_map_hash(key, key_size);
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800702 b = __select_bucket(htab, hash);
703 head = &b->head;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800704
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800705 raw_spin_lock_irqsave(&b->lock, flags);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800706
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800707 l = lookup_elem_raw(head, hash, key, key_size);
708
709 if (l) {
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700710 hlist_nulls_del_rcu(&l->hash_node);
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800711 free_htab_elem(htab, l);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800712 ret = 0;
713 }
714
tom.leiming@gmail.com688ecfe2015-12-29 22:40:27 +0800715 raw_spin_unlock_irqrestore(&b->lock, flags);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800716 return ret;
717}
718
719static void delete_all_elements(struct bpf_htab *htab)
720{
721 int i;
722
723 for (i = 0; i < htab->n_buckets; i++) {
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700724 struct hlist_nulls_head *head = select_bucket(htab, i);
725 struct hlist_nulls_node *n;
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800726 struct htab_elem *l;
727
Alexei Starovoitov82303dd2019-05-09 19:33:54 -0700728 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
729 hlist_nulls_del_rcu(&l->hash_node);
Daniel Borkmann483bed22016-11-04 00:01:19 +0100730 if (l->state != HTAB_EXTRA_ELEM_USED)
731 htab_elem_free(htab, l);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800732 }
733 }
734}
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800735/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
736static void htab_map_free(struct bpf_map *map)
737{
738 struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
739
740 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
741 * so the programs (can be more than one that used this map) were
742 * disconnected from events. Wait for outstanding critical sections in
743 * these programs to complete
744 */
745 synchronize_rcu();
746
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800747 /* some of free_htab_elem() callbacks for elements of this map may
748 * not have executed. Wait for them.
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800749 */
Alexei Starovoitov6c905982016-03-07 21:57:15 -0800750 rcu_barrier();
751 if (htab->map.map_flags & BPF_F_NO_PREALLOC) {
752 delete_all_elements(htab);
753 } else {
754 htab_free_elems(htab);
755 pcpu_freelist_destroy(&htab->freelist);
756 }
Alexei Starovoitova6ed3ea2016-08-05 14:01:27 -0700757 free_percpu(htab->extra_elems);
Daniel Borkmann251d00b2017-01-18 15:14:17 +0100758 bpf_map_area_free(htab->buckets);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800759 kfree(htab);
760}
761
Daniel Borkmanna2c83ff2015-03-01 12:31:42 +0100762static const struct bpf_map_ops htab_ops = {
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800763 .map_alloc = htab_map_alloc,
764 .map_free = htab_map_free,
765 .map_get_next_key = htab_map_get_next_key,
766 .map_lookup_elem = htab_map_lookup_elem,
767 .map_update_elem = htab_map_update_elem,
768 .map_delete_elem = htab_map_delete_elem,
769};
770
Daniel Borkmanna2c83ff2015-03-01 12:31:42 +0100771static struct bpf_map_type_list htab_type __read_mostly = {
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800772 .ops = &htab_ops,
773 .type = BPF_MAP_TYPE_HASH,
774};
775
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800776/* Called from eBPF program */
777static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
778{
779 struct htab_elem *l = __htab_map_lookup_elem(map, key);
780
781 if (l)
782 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
783 else
784 return NULL;
785}
786
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800787int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
788{
789 struct htab_elem *l;
790 void __percpu *pptr;
791 int ret = -ENOENT;
792 int cpu, off = 0;
793 u32 size;
794
795 /* per_cpu areas are zero-filled and bpf programs can only
796 * access 'value_size' of them, so copying rounded areas
797 * will not leak any kernel data
798 */
799 size = round_up(map->value_size, 8);
800 rcu_read_lock();
801 l = __htab_map_lookup_elem(map, key);
802 if (!l)
803 goto out;
804 pptr = htab_elem_get_ptr(l, map->key_size);
805 for_each_possible_cpu(cpu) {
806 bpf_long_memcpy(value + off,
807 per_cpu_ptr(pptr, cpu), size);
808 off += size;
809 }
810 ret = 0;
811out:
812 rcu_read_unlock();
813 return ret;
814}
815
816int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
817 u64 map_flags)
818{
Sasha Levin6bbd9a02016-02-19 13:53:10 -0500819 int ret;
820
821 rcu_read_lock();
822 ret = __htab_percpu_map_update_elem(map, key, value, map_flags, true);
823 rcu_read_unlock();
824
825 return ret;
Alexei Starovoitov15a07b32016-02-01 22:39:55 -0800826}
827
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800828static const struct bpf_map_ops htab_percpu_ops = {
829 .map_alloc = htab_map_alloc,
830 .map_free = htab_map_free,
831 .map_get_next_key = htab_map_get_next_key,
832 .map_lookup_elem = htab_percpu_map_lookup_elem,
833 .map_update_elem = htab_percpu_map_update_elem,
834 .map_delete_elem = htab_map_delete_elem,
835};
836
837static struct bpf_map_type_list htab_percpu_type __read_mostly = {
838 .ops = &htab_percpu_ops,
839 .type = BPF_MAP_TYPE_PERCPU_HASH,
840};
841
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800842static int __init register_htab_map(void)
843{
Daniel Borkmanna2c83ff2015-03-01 12:31:42 +0100844 bpf_register_map_type(&htab_type);
Alexei Starovoitov824bd0c2016-02-01 22:39:53 -0800845 bpf_register_map_type(&htab_percpu_type);
Alexei Starovoitov0f8e4bd2014-11-13 17:36:45 -0800846 return 0;
847}
848late_initcall(register_htab_map);