blob: f8635fd5744259431305b73599949011d67a78b0 [file] [log] [blame]
Thomas Graf7e1e7762014-08-02 11:47:44 +02001/*
2 * Resizable, Scalable, Concurrent Hash Table
3 *
Herbert Xu02fd97c2015-03-20 21:57:00 +11004 * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
Thomas Grafa5ec68e2015-02-05 02:03:32 +01005 * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
Thomas Graf7e1e7762014-08-02 11:47:44 +02006 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
7 *
Thomas Graf7e1e7762014-08-02 11:47:44 +02008 * Code partially derived from nft_hash
Herbert Xu02fd97c2015-03-20 21:57:00 +11009 * Rewritten with rehash code from br_multicast plus single list
10 * pointer as suggested by Josh Triplett
Thomas Graf7e1e7762014-08-02 11:47:44 +020011 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 */
16
Herbert Xu07ee0722015-05-15 11:30:47 +080017#include <linux/atomic.h>
Thomas Graf7e1e7762014-08-02 11:47:44 +020018#include <linux/kernel.h>
19#include <linux/init.h>
20#include <linux/log2.h>
Eric Dumazet5beb5c92015-02-26 07:20:34 -080021#include <linux/sched.h>
Ingo Molnarb2d09102017-02-04 01:27:20 +010022#include <linux/rculist.h>
Thomas Graf7e1e7762014-08-02 11:47:44 +020023#include <linux/slab.h>
24#include <linux/vmalloc.h>
25#include <linux/mm.h>
Daniel Borkmann87545892014-12-10 16:33:11 +010026#include <linux/jhash.h>
Thomas Graf7e1e7762014-08-02 11:47:44 +020027#include <linux/random.h>
28#include <linux/rhashtable.h>
Stephen Rothwell61d7b092015-02-09 14:04:03 +110029#include <linux/err.h>
Hauke Mehrtens6d795412015-06-06 22:07:23 +020030#include <linux/export.h>
Thomas Graf7e1e7762014-08-02 11:47:44 +020031
32#define HASH_DEFAULT_SIZE 64UL
Herbert Xuc2e213c2015-03-18 20:01:16 +110033#define HASH_MIN_SIZE 4U
Florian Westphal4cf0b352016-08-12 12:03:52 +020034#define BUCKET_LOCKS_PER_CPU 32UL
Thomas Graf97defe12015-01-02 23:00:20 +010035
Herbert Xuda204202017-02-11 19:26:47 +080036union nested_table {
37 union nested_table __rcu *table;
38 struct rhash_head __rcu *bucket;
39};
40
Herbert Xu988dfbd2015-03-10 09:27:55 +110041static u32 head_hashfn(struct rhashtable *ht,
Thomas Graf8d24c0b2015-01-02 23:00:14 +010042 const struct bucket_table *tbl,
43 const struct rhash_head *he)
Thomas Graf7e1e7762014-08-02 11:47:44 +020044{
Herbert Xu02fd97c2015-03-20 21:57:00 +110045 return rht_head_hashfn(ht, tbl, he, ht->p);
Thomas Graf7e1e7762014-08-02 11:47:44 +020046}
47
Thomas Grafa03eaec2015-02-05 02:03:34 +010048#ifdef CONFIG_PROVE_LOCKING
Thomas Grafa03eaec2015-02-05 02:03:34 +010049#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
Thomas Grafa03eaec2015-02-05 02:03:34 +010050
51int lockdep_rht_mutex_is_held(struct rhashtable *ht)
52{
53 return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
54}
55EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
56
57int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
58{
Herbert Xu02fd97c2015-03-20 21:57:00 +110059 spinlock_t *lock = rht_bucket_lock(tbl, hash);
Thomas Grafa03eaec2015-02-05 02:03:34 +010060
61 return (debug_locks) ? lockdep_is_held(lock) : 1;
62}
63EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
64#else
65#define ASSERT_RHT_MUTEX(HT)
Thomas Grafa03eaec2015-02-05 02:03:34 +010066#endif
67
68
Herbert Xub9ecfda2015-03-24 00:50:27 +110069static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
70 gfp_t gfp)
Thomas Graf97defe12015-01-02 23:00:20 +010071{
72 unsigned int i, size;
73#if defined(CONFIG_PROVE_LOCKING)
74 unsigned int nr_pcpus = 2;
75#else
76 unsigned int nr_pcpus = num_possible_cpus();
77#endif
78
Florian Westphal4cf0b352016-08-12 12:03:52 +020079 nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
Thomas Graf97defe12015-01-02 23:00:20 +010080 size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
81
Thomas Grafa5ec68e2015-02-05 02:03:32 +010082 /* Never allocate more than 0.5 locks per bucket */
83 size = min_t(unsigned int, size, tbl->size >> 1);
Thomas Graf97defe12015-01-02 23:00:20 +010084
Herbert Xuda204202017-02-11 19:26:47 +080085 if (tbl->nest)
86 size = min(size, 1U << tbl->nest);
87
Thomas Graf97defe12015-01-02 23:00:20 +010088 if (sizeof(spinlock_t) != 0) {
Eric Dumazet9dbeea72016-08-26 08:51:39 -070089 tbl->locks = NULL;
Thomas Graf97defe12015-01-02 23:00:20 +010090#ifdef CONFIG_NUMA
Herbert Xub9ecfda2015-03-24 00:50:27 +110091 if (size * sizeof(spinlock_t) > PAGE_SIZE &&
92 gfp == GFP_KERNEL)
Thomas Graf97defe12015-01-02 23:00:20 +010093 tbl->locks = vmalloc(size * sizeof(spinlock_t));
Thomas Graf97defe12015-01-02 23:00:20 +010094#endif
Florian Westphal4cf0b352016-08-12 12:03:52 +020095 if (gfp != GFP_KERNEL)
96 gfp |= __GFP_NOWARN | __GFP_NORETRY;
97
Eric Dumazet9dbeea72016-08-26 08:51:39 -070098 if (!tbl->locks)
99 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
100 gfp);
Thomas Graf97defe12015-01-02 23:00:20 +0100101 if (!tbl->locks)
102 return -ENOMEM;
103 for (i = 0; i < size; i++)
104 spin_lock_init(&tbl->locks[i]);
105 }
106 tbl->locks_mask = size - 1;
107
108 return 0;
109}
110
Herbert Xuda204202017-02-11 19:26:47 +0800111static void nested_table_free(union nested_table *ntbl, unsigned int size)
112{
113 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
114 const unsigned int len = 1 << shift;
115 unsigned int i;
116
117 ntbl = rcu_dereference_raw(ntbl->table);
118 if (!ntbl)
119 return;
120
121 if (size > len) {
122 size >>= shift;
123 for (i = 0; i < len; i++)
124 nested_table_free(ntbl + i, size);
125 }
126
127 kfree(ntbl);
128}
129
130static void nested_bucket_table_free(const struct bucket_table *tbl)
131{
132 unsigned int size = tbl->size >> tbl->nest;
133 unsigned int len = 1 << tbl->nest;
134 union nested_table *ntbl;
135 unsigned int i;
136
137 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
138
139 for (i = 0; i < len; i++)
140 nested_table_free(ntbl + i, size);
141
142 kfree(ntbl);
143}
144
Thomas Graf97defe12015-01-02 23:00:20 +0100145static void bucket_table_free(const struct bucket_table *tbl)
146{
Herbert Xuda204202017-02-11 19:26:47 +0800147 if (tbl->nest)
148 nested_bucket_table_free(tbl);
149
Herbert Xuca435402017-02-25 22:38:11 +0800150 kvfree(tbl->locks);
Thomas Graf97defe12015-01-02 23:00:20 +0100151 kvfree(tbl);
152}
153
Herbert Xu9d901bc2015-03-14 13:57:23 +1100154static void bucket_table_free_rcu(struct rcu_head *head)
155{
156 bucket_table_free(container_of(head, struct bucket_table, rcu));
157}
158
Herbert Xuda204202017-02-11 19:26:47 +0800159static union nested_table *nested_table_alloc(struct rhashtable *ht,
160 union nested_table __rcu **prev,
161 unsigned int shifted,
162 unsigned int nhash)
163{
164 union nested_table *ntbl;
165 int i;
166
167 ntbl = rcu_dereference(*prev);
168 if (ntbl)
169 return ntbl;
170
171 ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
172
173 if (ntbl && shifted) {
174 for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++)
175 INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht,
176 (i << shifted) | nhash);
177 }
178
179 rcu_assign_pointer(*prev, ntbl);
180
181 return ntbl;
182}
183
184static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
185 size_t nbuckets,
186 gfp_t gfp)
187{
188 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
189 struct bucket_table *tbl;
190 size_t size;
191
192 if (nbuckets < (1 << (shift + 1)))
193 return NULL;
194
195 size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
196
197 tbl = kzalloc(size, gfp);
198 if (!tbl)
199 return NULL;
200
201 if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
202 0, 0)) {
203 kfree(tbl);
204 return NULL;
205 }
206
207 tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
208
209 return tbl;
210}
211
Thomas Graf97defe12015-01-02 23:00:20 +0100212static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
Herbert Xub9ecfda2015-03-24 00:50:27 +1100213 size_t nbuckets,
214 gfp_t gfp)
Thomas Graf7e1e7762014-08-02 11:47:44 +0200215{
Daniel Borkmanneb6d1ab2015-02-20 00:53:38 +0100216 struct bucket_table *tbl = NULL;
Thomas Graf7e1e7762014-08-02 11:47:44 +0200217 size_t size;
Thomas Graff89bd6f2015-01-02 23:00:21 +0100218 int i;
Thomas Graf7e1e7762014-08-02 11:47:44 +0200219
220 size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
Herbert Xub9ecfda2015-03-24 00:50:27 +1100221 if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
222 gfp != GFP_KERNEL)
223 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
224 if (tbl == NULL && gfp == GFP_KERNEL)
Thomas Graf7e1e7762014-08-02 11:47:44 +0200225 tbl = vzalloc(size);
Herbert Xuda204202017-02-11 19:26:47 +0800226
227 size = nbuckets;
228
229 if (tbl == NULL && gfp != GFP_KERNEL) {
230 tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
231 nbuckets = 0;
232 }
Thomas Graf7e1e7762014-08-02 11:47:44 +0200233 if (tbl == NULL)
234 return NULL;
235
Herbert Xuda204202017-02-11 19:26:47 +0800236 tbl->size = size;
Thomas Graf7e1e7762014-08-02 11:47:44 +0200237
Herbert Xub9ecfda2015-03-24 00:50:27 +1100238 if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
Thomas Graf97defe12015-01-02 23:00:20 +0100239 bucket_table_free(tbl);
240 return NULL;
241 }
Thomas Graf7e1e7762014-08-02 11:47:44 +0200242
Herbert Xueddee5ba2015-03-14 13:57:20 +1100243 INIT_LIST_HEAD(&tbl->walkers);
244
Herbert Xu5269b532015-03-14 13:57:22 +1100245 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
246
Thomas Graff89bd6f2015-01-02 23:00:21 +0100247 for (i = 0; i < nbuckets; i++)
248 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
249
Thomas Graf97defe12015-01-02 23:00:20 +0100250 return tbl;
Thomas Graf7e1e7762014-08-02 11:47:44 +0200251}
252
Herbert Xub8244782015-03-24 00:50:26 +1100253static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
254 struct bucket_table *tbl)
255{
256 struct bucket_table *new_tbl;
257
258 do {
259 new_tbl = tbl;
260 tbl = rht_dereference_rcu(tbl->future_tbl, ht);
261 } while (tbl);
262
263 return new_tbl;
264}
265
Thomas Graf299e5c32015-03-24 14:18:17 +0100266static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
Thomas Grafa5ec68e2015-02-05 02:03:32 +0100267{
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100268 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
Herbert Xub8244782015-03-24 00:50:26 +1100269 struct bucket_table *new_tbl = rhashtable_last_table(ht,
270 rht_dereference_rcu(old_tbl->future_tbl, ht));
Herbert Xuda204202017-02-11 19:26:47 +0800271 struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash);
272 int err = -EAGAIN;
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100273 struct rhash_head *head, *next, *entry;
274 spinlock_t *new_bucket_lock;
Thomas Graf299e5c32015-03-24 14:18:17 +0100275 unsigned int new_hash;
Thomas Grafa5ec68e2015-02-05 02:03:32 +0100276
Herbert Xuda204202017-02-11 19:26:47 +0800277 if (new_tbl->nest)
278 goto out;
279
280 err = -ENOENT;
281
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100282 rht_for_each(entry, old_tbl, old_hash) {
283 err = 0;
284 next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
Thomas Grafa5ec68e2015-02-05 02:03:32 +0100285
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100286 if (rht_is_a_nulls(next))
Thomas Graf7e1e7762014-08-02 11:47:44 +0200287 break;
Thomas Graf97defe12015-01-02 23:00:20 +0100288
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100289 pprev = &entry->next;
Thomas Graf7e1e7762014-08-02 11:47:44 +0200290 }
291
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100292 if (err)
293 goto out;
Thomas Graf97defe12015-01-02 23:00:20 +0100294
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100295 new_hash = head_hashfn(ht, new_tbl, entry);
Thomas Grafa5ec68e2015-02-05 02:03:32 +0100296
Herbert Xu02fd97c2015-03-20 21:57:00 +1100297 new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100298
Herbert Xu8f2484b2015-03-14 13:57:21 +1100299 spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100300 head = rht_dereference_bucket(new_tbl->buckets[new_hash],
301 new_tbl, new_hash);
302
Dmitriy Vyukov7def0f92015-09-22 10:51:52 +0200303 RCU_INIT_POINTER(entry->next, head);
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100304
305 rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
306 spin_unlock(new_bucket_lock);
307
308 rcu_assign_pointer(*pprev, next);
309
310out:
311 return err;
Thomas Graf97defe12015-01-02 23:00:20 +0100312}
313
Herbert Xuda204202017-02-11 19:26:47 +0800314static int rhashtable_rehash_chain(struct rhashtable *ht,
Thomas Graf299e5c32015-03-24 14:18:17 +0100315 unsigned int old_hash)
Thomas Graf97defe12015-01-02 23:00:20 +0100316{
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100317 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
318 spinlock_t *old_bucket_lock;
Herbert Xuda204202017-02-11 19:26:47 +0800319 int err;
Thomas Graf7cd10db2015-02-05 02:03:35 +0100320
Herbert Xu02fd97c2015-03-20 21:57:00 +1100321 old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100322
323 spin_lock_bh(old_bucket_lock);
Herbert Xuda204202017-02-11 19:26:47 +0800324 while (!(err = rhashtable_rehash_one(ht, old_hash)))
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100325 ;
Herbert Xuda204202017-02-11 19:26:47 +0800326
327 if (err == -ENOENT) {
328 old_tbl->rehash++;
329 err = 0;
330 }
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100331 spin_unlock_bh(old_bucket_lock);
Herbert Xuda204202017-02-11 19:26:47 +0800332
333 return err;
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100334}
335
Herbert Xub8244782015-03-24 00:50:26 +1100336static int rhashtable_rehash_attach(struct rhashtable *ht,
337 struct bucket_table *old_tbl,
338 struct bucket_table *new_tbl)
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100339{
Herbert Xub8244782015-03-24 00:50:26 +1100340 /* Protect future_tbl using the first bucket lock. */
341 spin_lock_bh(old_tbl->locks);
342
343 /* Did somebody beat us to it? */
344 if (rcu_access_pointer(old_tbl->future_tbl)) {
345 spin_unlock_bh(old_tbl->locks);
346 return -EEXIST;
347 }
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100348
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100349 /* Make insertions go into the new, empty table right away. Deletions
350 * and lookups will be attempted in both tables until we synchronize.
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100351 */
Herbert Xuc4db8842015-03-14 13:57:25 +1100352 rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100353
Herbert Xub8244782015-03-24 00:50:26 +1100354 spin_unlock_bh(old_tbl->locks);
355
356 return 0;
357}
358
359static int rhashtable_rehash_table(struct rhashtable *ht)
360{
361 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
362 struct bucket_table *new_tbl;
363 struct rhashtable_walker *walker;
Thomas Graf299e5c32015-03-24 14:18:17 +0100364 unsigned int old_hash;
Herbert Xuda204202017-02-11 19:26:47 +0800365 int err;
Herbert Xub8244782015-03-24 00:50:26 +1100366
367 new_tbl = rht_dereference(old_tbl->future_tbl, ht);
368 if (!new_tbl)
369 return 0;
370
Herbert Xuda204202017-02-11 19:26:47 +0800371 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
372 err = rhashtable_rehash_chain(ht, old_hash);
373 if (err)
374 return err;
375 }
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100376
377 /* Publish the new table pointer. */
378 rcu_assign_pointer(ht->tbl, new_tbl);
379
Herbert Xuba7c95e2015-03-24 09:53:17 +1100380 spin_lock(&ht->lock);
Herbert Xueddee5ba2015-03-14 13:57:20 +1100381 list_for_each_entry(walker, &old_tbl->walkers, list)
382 walker->tbl = NULL;
Herbert Xuba7c95e2015-03-24 09:53:17 +1100383 spin_unlock(&ht->lock);
Herbert Xueddee5ba2015-03-14 13:57:20 +1100384
Herbert Xuaa34a6cb02015-03-11 09:43:48 +1100385 /* Wait for readers. All new readers will see the new
386 * table, and thus no references to the old table will
387 * remain.
388 */
Herbert Xu9d901bc2015-03-14 13:57:23 +1100389 call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
Herbert Xub8244782015-03-24 00:50:26 +1100390
391 return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
Thomas Graf7e1e7762014-08-02 11:47:44 +0200392}
393
Herbert Xuda204202017-02-11 19:26:47 +0800394static int rhashtable_rehash_alloc(struct rhashtable *ht,
395 struct bucket_table *old_tbl,
396 unsigned int size)
Thomas Graf7e1e7762014-08-02 11:47:44 +0200397{
Herbert Xuda204202017-02-11 19:26:47 +0800398 struct bucket_table *new_tbl;
Herbert Xub8244782015-03-24 00:50:26 +1100399 int err;
Thomas Graf7e1e7762014-08-02 11:47:44 +0200400
401 ASSERT_RHT_MUTEX(ht);
402
Herbert Xuda204202017-02-11 19:26:47 +0800403 new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
Thomas Graf7e1e7762014-08-02 11:47:44 +0200404 if (new_tbl == NULL)
405 return -ENOMEM;
406
Herbert Xub8244782015-03-24 00:50:26 +1100407 err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
408 if (err)
409 bucket_table_free(new_tbl);
410
411 return err;
Thomas Graf7e1e7762014-08-02 11:47:44 +0200412}
Thomas Graf7e1e7762014-08-02 11:47:44 +0200413
414/**
415 * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
416 * @ht: the hash table to shrink
Thomas Graf7e1e7762014-08-02 11:47:44 +0200417 *
Herbert Xu18093d12015-03-24 00:50:25 +1100418 * This function shrinks the hash table to fit, i.e., the smallest
419 * size would not cause it to expand right away automatically.
Thomas Graf7e1e7762014-08-02 11:47:44 +0200420 *
Thomas Graf97defe12015-01-02 23:00:20 +0100421 * The caller must ensure that no concurrent resizing occurs by holding
422 * ht->mutex.
423 *
Thomas Graf7e1e7762014-08-02 11:47:44 +0200424 * The caller must ensure that no concurrent table mutations take place.
425 * It is however valid to have concurrent lookups if they are RCU protected.
Thomas Graf97defe12015-01-02 23:00:20 +0100426 *
427 * It is valid to have concurrent insertions and deletions protected by per
428 * bucket locks or concurrent RCU protected lookups and traversals.
Thomas Graf7e1e7762014-08-02 11:47:44 +0200429 */
Herbert Xub8244782015-03-24 00:50:26 +1100430static int rhashtable_shrink(struct rhashtable *ht)
Thomas Graf7e1e7762014-08-02 11:47:44 +0200431{
Herbert Xuda204202017-02-11 19:26:47 +0800432 struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
Vegard Nossum12311952016-08-12 20:10:44 +0200433 unsigned int nelems = atomic_read(&ht->nelems);
434 unsigned int size = 0;
Thomas Graf7e1e7762014-08-02 11:47:44 +0200435
Vegard Nossum12311952016-08-12 20:10:44 +0200436 if (nelems)
437 size = roundup_pow_of_two(nelems * 3 / 2);
Herbert Xu18093d12015-03-24 00:50:25 +1100438 if (size < ht->p.min_size)
439 size = ht->p.min_size;
440
441 if (old_tbl->size <= size)
442 return 0;
443
Herbert Xub8244782015-03-24 00:50:26 +1100444 if (rht_dereference(old_tbl->future_tbl, ht))
445 return -EEXIST;
446
Herbert Xuda204202017-02-11 19:26:47 +0800447 return rhashtable_rehash_alloc(ht, old_tbl, size);
Thomas Graf7e1e7762014-08-02 11:47:44 +0200448}
Thomas Graf7e1e7762014-08-02 11:47:44 +0200449
Thomas Graf97defe12015-01-02 23:00:20 +0100450static void rht_deferred_worker(struct work_struct *work)
451{
452 struct rhashtable *ht;
453 struct bucket_table *tbl;
Herbert Xub8244782015-03-24 00:50:26 +1100454 int err = 0;
Thomas Graf97defe12015-01-02 23:00:20 +0100455
Ying Xue57699a42015-01-16 11:13:09 +0800456 ht = container_of(work, struct rhashtable, run_work);
Thomas Graf97defe12015-01-02 23:00:20 +0100457 mutex_lock(&ht->mutex);
Herbert Xu28134a52015-02-04 07:33:22 +1100458
Thomas Graf97defe12015-01-02 23:00:20 +0100459 tbl = rht_dereference(ht->tbl, ht);
Herbert Xub8244782015-03-24 00:50:26 +1100460 tbl = rhashtable_last_table(ht, tbl);
Thomas Graf97defe12015-01-02 23:00:20 +0100461
Daniel Borkmanna5b68462015-03-12 15:28:40 +0100462 if (rht_grow_above_75(ht, tbl))
Herbert Xuda204202017-02-11 19:26:47 +0800463 err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
Thomas Grafb5e2c152015-03-24 20:42:19 +0000464 else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
Herbert Xuda204202017-02-11 19:26:47 +0800465 err = rhashtable_shrink(ht);
466 else if (tbl->nest)
467 err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
Herbert Xub8244782015-03-24 00:50:26 +1100468
Herbert Xuda204202017-02-11 19:26:47 +0800469 if (!err)
470 err = rhashtable_rehash_table(ht);
Herbert Xub8244782015-03-24 00:50:26 +1100471
Thomas Graf97defe12015-01-02 23:00:20 +0100472 mutex_unlock(&ht->mutex);
Herbert Xub8244782015-03-24 00:50:26 +1100473
474 if (err)
475 schedule_work(&ht->run_work);
Thomas Graf97defe12015-01-02 23:00:20 +0100476}
477
Herbert Xuca268932016-09-19 19:00:09 +0800478static int rhashtable_insert_rehash(struct rhashtable *ht,
479 struct bucket_table *tbl)
Herbert Xuccd57b12015-03-24 00:50:28 +1100480{
481 struct bucket_table *old_tbl;
482 struct bucket_table *new_tbl;
Herbert Xuccd57b12015-03-24 00:50:28 +1100483 unsigned int size;
484 int err;
485
486 old_tbl = rht_dereference_rcu(ht->tbl, ht);
Herbert Xuccd57b12015-03-24 00:50:28 +1100487
488 size = tbl->size;
489
Herbert Xu3cf92222015-12-03 20:41:29 +0800490 err = -EBUSY;
491
Herbert Xuccd57b12015-03-24 00:50:28 +1100492 if (rht_grow_above_75(ht, tbl))
493 size *= 2;
Thomas Grafa87b9eb2015-04-22 09:41:46 +0200494 /* Do not schedule more than one rehash */
495 else if (old_tbl != tbl)
Herbert Xu3cf92222015-12-03 20:41:29 +0800496 goto fail;
497
498 err = -ENOMEM;
Herbert Xuccd57b12015-03-24 00:50:28 +1100499
500 new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
Herbert Xu3cf92222015-12-03 20:41:29 +0800501 if (new_tbl == NULL)
502 goto fail;
Herbert Xuccd57b12015-03-24 00:50:28 +1100503
504 err = rhashtable_rehash_attach(ht, tbl, new_tbl);
505 if (err) {
506 bucket_table_free(new_tbl);
507 if (err == -EEXIST)
508 err = 0;
509 } else
510 schedule_work(&ht->run_work);
511
512 return err;
Herbert Xu3cf92222015-12-03 20:41:29 +0800513
514fail:
515 /* Do not fail the insert if someone else did a rehash. */
516 if (likely(rcu_dereference_raw(tbl->future_tbl)))
517 return 0;
518
519 /* Schedule async rehash to retry allocation in process context. */
520 if (err == -ENOMEM)
521 schedule_work(&ht->run_work);
522
523 return err;
Herbert Xuccd57b12015-03-24 00:50:28 +1100524}
Herbert Xuccd57b12015-03-24 00:50:28 +1100525
Herbert Xuca268932016-09-19 19:00:09 +0800526static void *rhashtable_lookup_one(struct rhashtable *ht,
527 struct bucket_table *tbl, unsigned int hash,
528 const void *key, struct rhash_head *obj)
Herbert Xu02fd97c2015-03-20 21:57:00 +1100529{
Herbert Xuca268932016-09-19 19:00:09 +0800530 struct rhashtable_compare_arg arg = {
531 .ht = ht,
532 .key = key,
533 };
534 struct rhash_head __rcu **pprev;
Herbert Xu02fd97c2015-03-20 21:57:00 +1100535 struct rhash_head *head;
Herbert Xuca268932016-09-19 19:00:09 +0800536 int elasticity;
Herbert Xu02fd97c2015-03-20 21:57:00 +1100537
Herbert Xuca268932016-09-19 19:00:09 +0800538 elasticity = ht->elasticity;
Herbert Xuda204202017-02-11 19:26:47 +0800539 pprev = rht_bucket_var(tbl, hash);
540 rht_for_each_continue(head, *pprev, tbl, hash) {
Herbert Xuca268932016-09-19 19:00:09 +0800541 struct rhlist_head *list;
542 struct rhlist_head *plist;
Herbert Xu02fd97c2015-03-20 21:57:00 +1100543
Herbert Xuca268932016-09-19 19:00:09 +0800544 elasticity--;
545 if (!key ||
546 (ht->p.obj_cmpfn ?
547 ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
548 rhashtable_compare(&arg, rht_obj(ht, head))))
549 continue;
550
551 if (!ht->rhlist)
552 return rht_obj(ht, head);
553
554 list = container_of(obj, struct rhlist_head, rhead);
555 plist = container_of(head, struct rhlist_head, rhead);
556
557 RCU_INIT_POINTER(list->next, plist);
558 head = rht_dereference_bucket(head->next, tbl, hash);
559 RCU_INIT_POINTER(list->rhead.next, head);
560 rcu_assign_pointer(*pprev, obj);
561
562 return NULL;
Pablo Neira Ayuso5ca8cc52016-08-24 12:31:31 +0200563 }
Herbert Xu02fd97c2015-03-20 21:57:00 +1100564
Herbert Xuca268932016-09-19 19:00:09 +0800565 if (elasticity <= 0)
566 return ERR_PTR(-EAGAIN);
567
568 return ERR_PTR(-ENOENT);
569}
570
571static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
572 struct bucket_table *tbl,
573 unsigned int hash,
574 struct rhash_head *obj,
575 void *data)
576{
Herbert Xuda204202017-02-11 19:26:47 +0800577 struct rhash_head __rcu **pprev;
Herbert Xuca268932016-09-19 19:00:09 +0800578 struct bucket_table *new_tbl;
579 struct rhash_head *head;
580
581 if (!IS_ERR_OR_NULL(data))
582 return ERR_PTR(-EEXIST);
583
584 if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
585 return ERR_CAST(data);
586
587 new_tbl = rcu_dereference(tbl->future_tbl);
588 if (new_tbl)
589 return new_tbl;
590
591 if (PTR_ERR(data) != -ENOENT)
592 return ERR_CAST(data);
593
Herbert Xu07ee0722015-05-15 11:30:47 +0800594 if (unlikely(rht_grow_above_max(ht, tbl)))
Herbert Xuca268932016-09-19 19:00:09 +0800595 return ERR_PTR(-E2BIG);
Herbert Xu07ee0722015-05-15 11:30:47 +0800596
Herbert Xuca268932016-09-19 19:00:09 +0800597 if (unlikely(rht_grow_above_100(ht, tbl)))
598 return ERR_PTR(-EAGAIN);
Herbert Xu02fd97c2015-03-20 21:57:00 +1100599
Herbert Xuda204202017-02-11 19:26:47 +0800600 pprev = rht_bucket_insert(ht, tbl, hash);
601 if (!pprev)
602 return ERR_PTR(-ENOMEM);
603
604 head = rht_dereference_bucket(*pprev, tbl, hash);
Herbert Xu02fd97c2015-03-20 21:57:00 +1100605
606 RCU_INIT_POINTER(obj->next, head);
Herbert Xuca268932016-09-19 19:00:09 +0800607 if (ht->rhlist) {
608 struct rhlist_head *list;
609
610 list = container_of(obj, struct rhlist_head, rhead);
611 RCU_INIT_POINTER(list->next, NULL);
612 }
Herbert Xu02fd97c2015-03-20 21:57:00 +1100613
Herbert Xuda204202017-02-11 19:26:47 +0800614 rcu_assign_pointer(*pprev, obj);
Herbert Xu02fd97c2015-03-20 21:57:00 +1100615
616 atomic_inc(&ht->nelems);
Herbert Xuca268932016-09-19 19:00:09 +0800617 if (rht_grow_above_75(ht, tbl))
618 schedule_work(&ht->run_work);
Herbert Xu02fd97c2015-03-20 21:57:00 +1100619
Herbert Xuca268932016-09-19 19:00:09 +0800620 return NULL;
621}
Herbert Xu02fd97c2015-03-20 21:57:00 +1100622
Herbert Xuca268932016-09-19 19:00:09 +0800623static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
624 struct rhash_head *obj)
625{
626 struct bucket_table *new_tbl;
627 struct bucket_table *tbl;
628 unsigned int hash;
629 spinlock_t *lock;
630 void *data;
631
632 tbl = rcu_dereference(ht->tbl);
633
634 /* All insertions must grab the oldest table containing
635 * the hashed bucket that is yet to be rehashed.
636 */
637 for (;;) {
638 hash = rht_head_hashfn(ht, tbl, obj, ht->p);
639 lock = rht_bucket_lock(tbl, hash);
640 spin_lock_bh(lock);
641
642 if (tbl->rehash <= hash)
643 break;
644
645 spin_unlock_bh(lock);
646 tbl = rcu_dereference(tbl->future_tbl);
647 }
648
649 data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
650 new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
651 if (PTR_ERR(new_tbl) != -EEXIST)
652 data = ERR_CAST(new_tbl);
653
654 while (!IS_ERR_OR_NULL(new_tbl)) {
655 tbl = new_tbl;
656 hash = rht_head_hashfn(ht, tbl, obj, ht->p);
657 spin_lock_nested(rht_bucket_lock(tbl, hash),
658 SINGLE_DEPTH_NESTING);
659
660 data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
661 new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
662 if (PTR_ERR(new_tbl) != -EEXIST)
663 data = ERR_CAST(new_tbl);
664
665 spin_unlock(rht_bucket_lock(tbl, hash));
666 }
667
668 spin_unlock_bh(lock);
669
670 if (PTR_ERR(data) == -EAGAIN)
671 data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
672 -EAGAIN);
673
674 return data;
675}
676
677void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
678 struct rhash_head *obj)
679{
680 void *data;
681
682 do {
683 rcu_read_lock();
684 data = rhashtable_try_insert(ht, key, obj);
685 rcu_read_unlock();
686 } while (PTR_ERR(data) == -EAGAIN);
687
688 return data;
Herbert Xu02fd97c2015-03-20 21:57:00 +1100689}
690EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
691
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100692/**
Herbert Xu246779d2016-08-18 16:50:56 +0800693 * rhashtable_walk_enter - Initialise an iterator
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100694 * @ht: Table to walk over
695 * @iter: Hash table Iterator
696 *
697 * This function prepares a hash table walk.
698 *
699 * Note that if you restart a walk after rhashtable_walk_stop you
700 * may see the same object twice. Also, you may miss objects if
701 * there are removals in between rhashtable_walk_stop and the next
702 * call to rhashtable_walk_start.
703 *
704 * For a completely stable walk you should construct your own data
705 * structure outside the hash table.
706 *
707 * This function may sleep so you must not call it from interrupt
708 * context or with spin locks held.
709 *
Herbert Xu246779d2016-08-18 16:50:56 +0800710 * You must call rhashtable_walk_exit after this function returns.
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100711 */
Herbert Xu246779d2016-08-18 16:50:56 +0800712void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100713{
714 iter->ht = ht;
715 iter->p = NULL;
716 iter->slot = 0;
717 iter->skip = 0;
718
Herbert Xuc6ff5262015-12-16 16:45:54 +0800719 spin_lock(&ht->lock);
Herbert Xu246779d2016-08-18 16:50:56 +0800720 iter->walker.tbl =
Herbert Xu179ccc02015-12-19 10:45:28 +0800721 rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
Herbert Xu246779d2016-08-18 16:50:56 +0800722 list_add(&iter->walker.list, &iter->walker.tbl->walkers);
Herbert Xuc6ff5262015-12-16 16:45:54 +0800723 spin_unlock(&ht->lock);
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100724}
Herbert Xu246779d2016-08-18 16:50:56 +0800725EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100726
727/**
728 * rhashtable_walk_exit - Free an iterator
729 * @iter: Hash table Iterator
730 *
731 * This function frees resources allocated by rhashtable_walk_init.
732 */
733void rhashtable_walk_exit(struct rhashtable_iter *iter)
734{
Herbert Xuc6ff5262015-12-16 16:45:54 +0800735 spin_lock(&iter->ht->lock);
Herbert Xu246779d2016-08-18 16:50:56 +0800736 if (iter->walker.tbl)
737 list_del(&iter->walker.list);
Herbert Xuc6ff5262015-12-16 16:45:54 +0800738 spin_unlock(&iter->ht->lock);
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100739}
740EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
741
742/**
743 * rhashtable_walk_start - Start a hash table walk
744 * @iter: Hash table iterator
745 *
746 * Start a hash table walk. Note that we take the RCU lock in all
747 * cases including when we return an error. So you must always call
748 * rhashtable_walk_stop to clean up.
749 *
750 * Returns zero if successful.
751 *
752 * Returns -EAGAIN if resize event occured. Note that the iterator
753 * will rewind back to the beginning and you may use it immediately
754 * by calling rhashtable_walk_next.
755 */
756int rhashtable_walk_start(struct rhashtable_iter *iter)
Thomas Grafdb4374f2015-03-16 10:42:27 +0100757 __acquires(RCU)
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100758{
Herbert Xueddee5ba2015-03-14 13:57:20 +1100759 struct rhashtable *ht = iter->ht;
760
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100761 rcu_read_lock();
762
Herbert Xuc6ff5262015-12-16 16:45:54 +0800763 spin_lock(&ht->lock);
Herbert Xu246779d2016-08-18 16:50:56 +0800764 if (iter->walker.tbl)
765 list_del(&iter->walker.list);
Herbert Xuc6ff5262015-12-16 16:45:54 +0800766 spin_unlock(&ht->lock);
Herbert Xueddee5ba2015-03-14 13:57:20 +1100767
Herbert Xu246779d2016-08-18 16:50:56 +0800768 if (!iter->walker.tbl) {
769 iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100770 return -EAGAIN;
771 }
772
773 return 0;
774}
775EXPORT_SYMBOL_GPL(rhashtable_walk_start);
776
777/**
778 * rhashtable_walk_next - Return the next object and advance the iterator
779 * @iter: Hash table iterator
780 *
781 * Note that you must call rhashtable_walk_stop when you are finished
782 * with the walk.
783 *
784 * Returns the next object or NULL when the end of the table is reached.
785 *
786 * Returns -EAGAIN if resize event occured. Note that the iterator
787 * will rewind back to the beginning and you may continue to use it.
788 */
789void *rhashtable_walk_next(struct rhashtable_iter *iter)
790{
Herbert Xu246779d2016-08-18 16:50:56 +0800791 struct bucket_table *tbl = iter->walker.tbl;
Herbert Xuca268932016-09-19 19:00:09 +0800792 struct rhlist_head *list = iter->list;
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100793 struct rhashtable *ht = iter->ht;
794 struct rhash_head *p = iter->p;
Herbert Xuca268932016-09-19 19:00:09 +0800795 bool rhlist = ht->rhlist;
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100796
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100797 if (p) {
Herbert Xuca268932016-09-19 19:00:09 +0800798 if (!rhlist || !(list = rcu_dereference(list->next))) {
799 p = rcu_dereference(p->next);
800 list = container_of(p, struct rhlist_head, rhead);
801 }
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100802 goto next;
803 }
804
805 for (; iter->slot < tbl->size; iter->slot++) {
806 int skip = iter->skip;
807
808 rht_for_each_rcu(p, tbl, iter->slot) {
Herbert Xuca268932016-09-19 19:00:09 +0800809 if (rhlist) {
810 list = container_of(p, struct rhlist_head,
811 rhead);
812 do {
813 if (!skip)
814 goto next;
815 skip--;
816 list = rcu_dereference(list->next);
817 } while (list);
818
819 continue;
820 }
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100821 if (!skip)
822 break;
823 skip--;
824 }
825
826next:
827 if (!rht_is_a_nulls(p)) {
828 iter->skip++;
829 iter->p = p;
Herbert Xuca268932016-09-19 19:00:09 +0800830 iter->list = list;
831 return rht_obj(ht, rhlist ? &list->rhead : p);
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100832 }
833
834 iter->skip = 0;
835 }
836
Phil Sutter142b9422015-07-06 15:51:20 +0200837 iter->p = NULL;
838
Herbert Xud88252f2015-03-24 00:50:19 +1100839 /* Ensure we see any new tables. */
840 smp_rmb();
841
Herbert Xu246779d2016-08-18 16:50:56 +0800842 iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
843 if (iter->walker.tbl) {
Herbert Xueddee5ba2015-03-14 13:57:20 +1100844 iter->slot = 0;
845 iter->skip = 0;
846 return ERR_PTR(-EAGAIN);
847 }
848
Thomas Grafc936a792015-05-05 02:22:53 +0200849 return NULL;
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100850}
851EXPORT_SYMBOL_GPL(rhashtable_walk_next);
852
853/**
854 * rhashtable_walk_stop - Finish a hash table walk
855 * @iter: Hash table iterator
856 *
857 * Finish a hash table walk.
858 */
859void rhashtable_walk_stop(struct rhashtable_iter *iter)
Thomas Grafdb4374f2015-03-16 10:42:27 +0100860 __releases(RCU)
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100861{
Herbert Xueddee5ba2015-03-14 13:57:20 +1100862 struct rhashtable *ht;
Herbert Xu246779d2016-08-18 16:50:56 +0800863 struct bucket_table *tbl = iter->walker.tbl;
Herbert Xueddee5ba2015-03-14 13:57:20 +1100864
Herbert Xueddee5ba2015-03-14 13:57:20 +1100865 if (!tbl)
Herbert Xu963ecbd2015-03-15 21:12:04 +1100866 goto out;
Herbert Xueddee5ba2015-03-14 13:57:20 +1100867
868 ht = iter->ht;
869
Herbert Xuba7c95e2015-03-24 09:53:17 +1100870 spin_lock(&ht->lock);
Herbert Xuc4db8842015-03-14 13:57:25 +1100871 if (tbl->rehash < tbl->size)
Herbert Xu246779d2016-08-18 16:50:56 +0800872 list_add(&iter->walker.list, &tbl->walkers);
Herbert Xueddee5ba2015-03-14 13:57:20 +1100873 else
Herbert Xu246779d2016-08-18 16:50:56 +0800874 iter->walker.tbl = NULL;
Herbert Xuba7c95e2015-03-24 09:53:17 +1100875 spin_unlock(&ht->lock);
Herbert Xueddee5ba2015-03-14 13:57:20 +1100876
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100877 iter->p = NULL;
Herbert Xu963ecbd2015-03-15 21:12:04 +1100878
879out:
880 rcu_read_unlock();
Herbert Xuf2dba9c2015-02-04 07:33:23 +1100881}
882EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
883
Herbert Xu488fb86e2015-03-20 21:56:59 +1100884static size_t rounded_hashtable_size(const struct rhashtable_params *params)
Thomas Graf7e1e7762014-08-02 11:47:44 +0200885{
Ying Xue94000172014-09-03 09:22:36 +0800886 return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
Herbert Xue2e21c12015-03-18 20:01:21 +1100887 (unsigned long)params->min_size);
Thomas Graf7e1e7762014-08-02 11:47:44 +0200888}
889
Herbert Xu31ccde22015-03-24 00:50:21 +1100890static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
891{
892 return jhash2(key, length, seed);
893}
894
Thomas Graf7e1e7762014-08-02 11:47:44 +0200895/**
896 * rhashtable_init - initialize a new hash table
897 * @ht: hash table to be initialized
898 * @params: configuration parameters
899 *
900 * Initializes a new hash table based on the provided configuration
901 * parameters. A table can be configured either with a variable or
902 * fixed length key:
903 *
904 * Configuration Example 1: Fixed length keys
905 * struct test_obj {
906 * int key;
907 * void * my_member;
908 * struct rhash_head node;
909 * };
910 *
911 * struct rhashtable_params params = {
912 * .head_offset = offsetof(struct test_obj, node),
913 * .key_offset = offsetof(struct test_obj, key),
914 * .key_len = sizeof(int),
Daniel Borkmann87545892014-12-10 16:33:11 +0100915 * .hashfn = jhash,
Thomas Graff89bd6f2015-01-02 23:00:21 +0100916 * .nulls_base = (1U << RHT_BASE_SHIFT),
Thomas Graf7e1e7762014-08-02 11:47:44 +0200917 * };
918 *
919 * Configuration Example 2: Variable length keys
920 * struct test_obj {
921 * [...]
922 * struct rhash_head node;
923 * };
924 *
Patrick McHardy49f7b332015-03-25 13:07:45 +0000925 * u32 my_hash_fn(const void *data, u32 len, u32 seed)
Thomas Graf7e1e7762014-08-02 11:47:44 +0200926 * {
927 * struct test_obj *obj = data;
928 *
929 * return [... hash ...];
930 * }
931 *
932 * struct rhashtable_params params = {
933 * .head_offset = offsetof(struct test_obj, node),
Daniel Borkmann87545892014-12-10 16:33:11 +0100934 * .hashfn = jhash,
Thomas Graf7e1e7762014-08-02 11:47:44 +0200935 * .obj_hashfn = my_hash_fn,
Thomas Graf7e1e7762014-08-02 11:47:44 +0200936 * };
937 */
Herbert Xu488fb86e2015-03-20 21:56:59 +1100938int rhashtable_init(struct rhashtable *ht,
939 const struct rhashtable_params *params)
Thomas Graf7e1e7762014-08-02 11:47:44 +0200940{
941 struct bucket_table *tbl;
942 size_t size;
943
944 size = HASH_DEFAULT_SIZE;
945
Herbert Xu31ccde22015-03-24 00:50:21 +1100946 if ((!params->key_len && !params->obj_hashfn) ||
Herbert Xu02fd97c2015-03-20 21:57:00 +1100947 (params->obj_hashfn && !params->obj_cmpfn))
Thomas Graf7e1e7762014-08-02 11:47:44 +0200948 return -EINVAL;
949
Thomas Graff89bd6f2015-01-02 23:00:21 +0100950 if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
951 return -EINVAL;
952
Thomas Graf97defe12015-01-02 23:00:20 +0100953 memset(ht, 0, sizeof(*ht));
954 mutex_init(&ht->mutex);
Herbert Xuba7c95e2015-03-24 09:53:17 +1100955 spin_lock_init(&ht->lock);
Thomas Graf97defe12015-01-02 23:00:20 +0100956 memcpy(&ht->p, params, sizeof(*params));
957
Thomas Grafa998f712015-03-19 22:31:13 +0000958 if (params->min_size)
959 ht->p.min_size = roundup_pow_of_two(params->min_size);
960
961 if (params->max_size)
962 ht->p.max_size = rounddown_pow_of_two(params->max_size);
963
Herbert Xu07ee0722015-05-15 11:30:47 +0800964 if (params->insecure_max_entries)
965 ht->p.insecure_max_entries =
966 rounddown_pow_of_two(params->insecure_max_entries);
967 else
968 ht->p.insecure_max_entries = ht->p.max_size * 2;
969
Herbert Xu488fb86e2015-03-20 21:56:59 +1100970 ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
Thomas Grafa998f712015-03-19 22:31:13 +0000971
Herbert Xu3a324602015-12-16 18:13:14 +0800972 if (params->nelem_hint)
973 size = rounded_hashtable_size(&ht->p);
974
Herbert Xu27ed44a2015-03-24 13:37:30 +1100975 /* The maximum (not average) chain length grows with the
976 * size of the hash table, at a rate of (log N)/(log log N).
977 * The value of 16 is selected so that even if the hash
978 * table grew to 2^32 you would not expect the maximum
979 * chain length to exceed it unless we are under attack
980 * (or extremely unlucky).
981 *
982 * As this limit is only to detect attacks, we don't need
983 * to set it to a lower value as you'd need the chain
984 * length to vastly exceed 16 to have any real effect
985 * on the system.
986 */
Herbert Xuccd57b12015-03-24 00:50:28 +1100987 if (!params->insecure_elasticity)
988 ht->elasticity = 16;
989
Thomas Graf97defe12015-01-02 23:00:20 +0100990 if (params->locks_mul)
991 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
992 else
993 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
994
Herbert Xu31ccde22015-03-24 00:50:21 +1100995 ht->key_len = ht->p.key_len;
996 if (!params->hashfn) {
997 ht->p.hashfn = jhash;
998
999 if (!(ht->key_len & (sizeof(u32) - 1))) {
1000 ht->key_len /= sizeof(u32);
1001 ht->p.hashfn = rhashtable_jhash2;
1002 }
1003 }
1004
Herbert Xub9ecfda2015-03-24 00:50:27 +11001005 tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
Thomas Graf7e1e7762014-08-02 11:47:44 +02001006 if (tbl == NULL)
1007 return -ENOMEM;
1008
Ying Xue545a1482015-01-07 13:41:57 +08001009 atomic_set(&ht->nelems, 0);
Daniel Borkmanna5b68462015-03-12 15:28:40 +01001010
Thomas Graf7e1e7762014-08-02 11:47:44 +02001011 RCU_INIT_POINTER(ht->tbl, tbl);
1012
Daniel Borkmann4c4b52d2015-02-25 16:31:54 +01001013 INIT_WORK(&ht->run_work, rht_deferred_worker);
Thomas Graf97defe12015-01-02 23:00:20 +01001014
Thomas Graf7e1e7762014-08-02 11:47:44 +02001015 return 0;
1016}
1017EXPORT_SYMBOL_GPL(rhashtable_init);
1018
1019/**
Herbert Xuca268932016-09-19 19:00:09 +08001020 * rhltable_init - initialize a new hash list table
1021 * @hlt: hash list table to be initialized
1022 * @params: configuration parameters
1023 *
1024 * Initializes a new hash list table.
1025 *
1026 * See documentation for rhashtable_init.
1027 */
1028int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
1029{
1030 int err;
1031
1032 /* No rhlist NULLs marking for now. */
1033 if (params->nulls_base)
1034 return -EINVAL;
1035
1036 err = rhashtable_init(&hlt->ht, params);
1037 hlt->ht.rhlist = true;
1038 return err;
1039}
1040EXPORT_SYMBOL_GPL(rhltable_init);
1041
1042static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
1043 void (*free_fn)(void *ptr, void *arg),
1044 void *arg)
1045{
1046 struct rhlist_head *list;
1047
1048 if (!ht->rhlist) {
1049 free_fn(rht_obj(ht, obj), arg);
1050 return;
1051 }
1052
1053 list = container_of(obj, struct rhlist_head, rhead);
1054 do {
1055 obj = &list->rhead;
1056 list = rht_dereference(list->next, ht);
1057 free_fn(rht_obj(ht, obj), arg);
1058 } while (list);
1059}
1060
1061/**
Thomas Graf6b6f3022015-03-24 14:18:20 +01001062 * rhashtable_free_and_destroy - free elements and destroy hash table
Thomas Graf7e1e7762014-08-02 11:47:44 +02001063 * @ht: the hash table to destroy
Thomas Graf6b6f3022015-03-24 14:18:20 +01001064 * @free_fn: callback to release resources of element
1065 * @arg: pointer passed to free_fn
Thomas Graf7e1e7762014-08-02 11:47:44 +02001066 *
Thomas Graf6b6f3022015-03-24 14:18:20 +01001067 * Stops an eventual async resize. If defined, invokes free_fn for each
1068 * element to releasal resources. Please note that RCU protected
1069 * readers may still be accessing the elements. Releasing of resources
1070 * must occur in a compatible manner. Then frees the bucket array.
1071 *
1072 * This function will eventually sleep to wait for an async resize
1073 * to complete. The caller is responsible that no further write operations
1074 * occurs in parallel.
Thomas Graf7e1e7762014-08-02 11:47:44 +02001075 */
Thomas Graf6b6f3022015-03-24 14:18:20 +01001076void rhashtable_free_and_destroy(struct rhashtable *ht,
1077 void (*free_fn)(void *ptr, void *arg),
1078 void *arg)
Thomas Graf7e1e7762014-08-02 11:47:44 +02001079{
Herbert Xuda204202017-02-11 19:26:47 +08001080 struct bucket_table *tbl;
Thomas Graf6b6f3022015-03-24 14:18:20 +01001081 unsigned int i;
Thomas Graf97defe12015-01-02 23:00:20 +01001082
Daniel Borkmann4c4b52d2015-02-25 16:31:54 +01001083 cancel_work_sync(&ht->run_work);
Ying Xue57699a42015-01-16 11:13:09 +08001084
Thomas Graf97defe12015-01-02 23:00:20 +01001085 mutex_lock(&ht->mutex);
Thomas Graf6b6f3022015-03-24 14:18:20 +01001086 tbl = rht_dereference(ht->tbl, ht);
1087 if (free_fn) {
1088 for (i = 0; i < tbl->size; i++) {
1089 struct rhash_head *pos, *next;
1090
Herbert Xuda204202017-02-11 19:26:47 +08001091 for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
Thomas Graf6b6f3022015-03-24 14:18:20 +01001092 next = !rht_is_a_nulls(pos) ?
1093 rht_dereference(pos->next, ht) : NULL;
1094 !rht_is_a_nulls(pos);
1095 pos = next,
1096 next = !rht_is_a_nulls(pos) ?
1097 rht_dereference(pos->next, ht) : NULL)
Herbert Xuca268932016-09-19 19:00:09 +08001098 rhashtable_free_one(ht, pos, free_fn, arg);
Thomas Graf6b6f3022015-03-24 14:18:20 +01001099 }
1100 }
1101
1102 bucket_table_free(tbl);
Thomas Graf97defe12015-01-02 23:00:20 +01001103 mutex_unlock(&ht->mutex);
Thomas Graf7e1e7762014-08-02 11:47:44 +02001104}
Thomas Graf6b6f3022015-03-24 14:18:20 +01001105EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
1106
1107void rhashtable_destroy(struct rhashtable *ht)
1108{
1109 return rhashtable_free_and_destroy(ht, NULL, NULL);
1110}
Thomas Graf7e1e7762014-08-02 11:47:44 +02001111EXPORT_SYMBOL_GPL(rhashtable_destroy);
Herbert Xuda204202017-02-11 19:26:47 +08001112
1113struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
1114 unsigned int hash)
1115{
1116 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1117 static struct rhash_head __rcu *rhnull =
1118 (struct rhash_head __rcu *)NULLS_MARKER(0);
1119 unsigned int index = hash & ((1 << tbl->nest) - 1);
1120 unsigned int size = tbl->size >> tbl->nest;
1121 unsigned int subhash = hash;
1122 union nested_table *ntbl;
1123
1124 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
Herbert Xuc4d26032017-02-25 22:39:50 +08001125 ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
Herbert Xuda204202017-02-11 19:26:47 +08001126 subhash >>= tbl->nest;
1127
1128 while (ntbl && size > (1 << shift)) {
1129 index = subhash & ((1 << shift) - 1);
Herbert Xuc4d26032017-02-25 22:39:50 +08001130 ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
1131 tbl, hash);
Herbert Xuda204202017-02-11 19:26:47 +08001132 size >>= shift;
1133 subhash >>= shift;
1134 }
1135
1136 if (!ntbl)
1137 return &rhnull;
1138
1139 return &ntbl[subhash].bucket;
1140
1141}
1142EXPORT_SYMBOL_GPL(rht_bucket_nested);
1143
1144struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
1145 struct bucket_table *tbl,
1146 unsigned int hash)
1147{
1148 const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1149 unsigned int index = hash & ((1 << tbl->nest) - 1);
1150 unsigned int size = tbl->size >> tbl->nest;
1151 union nested_table *ntbl;
1152 unsigned int shifted;
1153 unsigned int nhash;
1154
1155 ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]);
1156 hash >>= tbl->nest;
1157 nhash = index;
1158 shifted = tbl->nest;
1159 ntbl = nested_table_alloc(ht, &ntbl[index].table,
1160 size <= (1 << shift) ? shifted : 0, nhash);
1161
1162 while (ntbl && size > (1 << shift)) {
1163 index = hash & ((1 << shift) - 1);
1164 size >>= shift;
1165 hash >>= shift;
1166 nhash |= index << shifted;
1167 shifted += shift;
1168 ntbl = nested_table_alloc(ht, &ntbl[index].table,
1169 size <= (1 << shift) ? shifted : 0,
1170 nhash);
1171 }
1172
1173 if (!ntbl)
1174 return NULL;
1175
1176 return &ntbl[hash].bucket;
1177
1178}
1179EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);