Merge branch 'rhashtable-next'

Thomas Graf says:

====================
rhashtable updates on top of Herbert's work

Patch 1 is a bugfix for an RCU splash I encountered while testing.
Patch 2 & 3 are pure cleanups. Patch 4 disables automatic shrinking
by default as discussed in previous thread. Patch 5 removes some
rhashtable internal knowledge from nft_hash and fixes another RCU
splash.

I've pushed various rhashtable tests (Netlink, nft) together with a
Makefile to a git tree [0] for easier stress testing.

[0] https://github.com/tgraf/rhashtable
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index d7be9cb..99f2e49 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -2,7 +2,7 @@
  * Resizable, Scalable, Concurrent Hash Table
  *
  * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
- * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
+ * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
  *
  * Code partially derived from nft_hash
@@ -104,6 +104,7 @@
  * @min_size: Minimum size while shrinking
  * @nulls_base: Base value to generate nulls marker
  * @insecure_elasticity: Set to true to disable chain length checks
+ * @automatic_shrinking: Enable automatic shrinking of tables
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
  * @obj_hashfn: Function to hash object
@@ -118,6 +119,7 @@
 	unsigned int		min_size;
 	u32			nulls_base;
 	bool			insecure_elasticity;
+	bool			automatic_shrinking;
 	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
@@ -134,12 +136,10 @@
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
  * @lock: Spin lock to protect walker list
- * @being_destroyed: True if table is set up for destruction
  */
 struct rhashtable {
 	struct bucket_table __rcu	*tbl;
 	atomic_t			nelems;
-	bool                            being_destroyed;
 	unsigned int			key_len;
 	unsigned int			elasticity;
 	struct rhashtable_params	p;
@@ -208,13 +208,13 @@
 	struct rhashtable *ht, const struct bucket_table *tbl,
 	const void *key, const struct rhashtable_params params)
 {
-	unsigned hash;
+	unsigned int hash;
 
 	/* params must be equal to ht->p if it isn't constant. */
 	if (!__builtin_constant_p(params.key_len))
 		hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
 	else if (params.key_len) {
-		unsigned key_len = params.key_len;
+		unsigned int key_len = params.key_len;
 
 		if (params.hashfn)
 			hash = params.hashfn(key, key_len, tbl->hash_rnd);
@@ -224,7 +224,7 @@
 			hash = jhash2(key, key_len / sizeof(u32),
 				      tbl->hash_rnd);
 	} else {
-		unsigned key_len = ht->p.key_len;
+		unsigned int key_len = ht->p.key_len;
 
 		if (params.hashfn)
 			hash = params.hashfn(key, key_len, tbl->hash_rnd);
@@ -332,6 +332,9 @@
 void *rhashtable_walk_next(struct rhashtable_iter *iter);
 void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
 
+void rhashtable_free_and_destroy(struct rhashtable *ht,
+				 void (*free_fn)(void *ptr, void *arg),
+				 void *arg);
 void rhashtable_destroy(struct rhashtable *ht);
 
 #define rht_dereference(p, ht) \
@@ -512,7 +515,7 @@
 	};
 	const struct bucket_table *tbl;
 	struct rhash_head *he;
-	unsigned hash;
+	unsigned int hash;
 
 	rcu_read_lock();
 
@@ -539,6 +542,7 @@
 	return NULL;
 }
 
+/* Internal function, please use rhashtable_insert_fast() instead */
 static inline int __rhashtable_insert_fast(
 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
 	const struct rhashtable_params params)
@@ -550,8 +554,8 @@
 	struct bucket_table *tbl, *new_tbl;
 	struct rhash_head *head;
 	spinlock_t *lock;
-	unsigned elasticity;
-	unsigned hash;
+	unsigned int elasticity;
+	unsigned int hash;
 	int err;
 
 restart:
@@ -585,8 +589,8 @@
 	if (unlikely(rht_grow_above_100(ht, tbl))) {
 slow_path:
 		spin_unlock_bh(lock);
-		rcu_read_unlock();
 		err = rhashtable_insert_rehash(ht);
+		rcu_read_unlock();
 		if (err)
 			return err;
 
@@ -711,6 +715,7 @@
 	return __rhashtable_insert_fast(ht, key, obj, params);
 }
 
+/* Internal function, please use rhashtable_remove_fast() instead */
 static inline int __rhashtable_remove_fast(
 	struct rhashtable *ht, struct bucket_table *tbl,
 	struct rhash_head *obj, const struct rhashtable_params params)
@@ -718,7 +723,7 @@
 	struct rhash_head __rcu **pprev;
 	struct rhash_head *he;
 	spinlock_t * lock;
-	unsigned hash;
+	unsigned int hash;
 	int err = -ENOENT;
 
 	hash = rht_head_hashfn(ht, tbl, obj, params);
@@ -782,7 +787,8 @@
 		goto out;
 
 	atomic_dec(&ht->nelems);
-	if (rht_shrink_below_30(ht, tbl))
+	if (unlikely(ht->p.automatic_shrinking &&
+		     rht_shrink_below_30(ht, tbl)))
 		schedule_work(&ht->run_work);
 
 out:
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 8514f7c..4b7b7e6 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -153,7 +153,7 @@
 	return new_tbl;
 }
 
-static int rhashtable_rehash_one(struct rhashtable *ht, unsigned old_hash)
+static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	struct bucket_table *new_tbl = rhashtable_last_table(ht,
@@ -162,7 +162,7 @@
 	int err = -ENOENT;
 	struct rhash_head *head, *next, *entry;
 	spinlock_t *new_bucket_lock;
-	unsigned new_hash;
+	unsigned int new_hash;
 
 	rht_for_each(entry, old_tbl, old_hash) {
 		err = 0;
@@ -199,7 +199,8 @@
 	return err;
 }
 
-static void rhashtable_rehash_chain(struct rhashtable *ht, unsigned old_hash)
+static void rhashtable_rehash_chain(struct rhashtable *ht,
+				    unsigned int old_hash)
 {
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	spinlock_t *old_bucket_lock;
@@ -244,7 +245,7 @@
 	struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
 	struct bucket_table *new_tbl;
 	struct rhashtable_walker *walker;
-	unsigned old_hash;
+	unsigned int old_hash;
 
 	new_tbl = rht_dereference(old_tbl->future_tbl, ht);
 	if (!new_tbl)
@@ -324,11 +325,12 @@
 static int rhashtable_shrink(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
-	unsigned size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+	unsigned int size;
 	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
+	size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
 	if (size < ht->p.min_size)
 		size = ht->p.min_size;
 
@@ -357,20 +359,17 @@
 
 	ht = container_of(work, struct rhashtable, run_work);
 	mutex_lock(&ht->mutex);
-	if (ht->being_destroyed)
-		goto unlock;
 
 	tbl = rht_dereference(ht->tbl, ht);
 	tbl = rhashtable_last_table(ht, tbl);
 
 	if (rht_grow_above_75(ht, tbl))
 		rhashtable_expand(ht);
-	else if (rht_shrink_below_30(ht, tbl))
+	else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
 		rhashtable_shrink(ht);
 
 	err = rhashtable_rehash_table(ht);
 
-unlock:
 	mutex_unlock(&ht->mutex);
 
 	if (err)
@@ -379,9 +378,9 @@
 
 static bool rhashtable_check_elasticity(struct rhashtable *ht,
 					struct bucket_table *tbl,
-					unsigned hash)
+					unsigned int hash)
 {
-	unsigned elasticity = ht->elasticity;
+	unsigned int elasticity = ht->elasticity;
 	struct rhash_head *head;
 
 	rht_for_each(head, tbl, hash)
@@ -431,7 +430,7 @@
 			   struct bucket_table *tbl)
 {
 	struct rhash_head *head;
-	unsigned hash;
+	unsigned int hash;
 	int err;
 
 	tbl = rhashtable_last_table(ht, tbl);
@@ -781,21 +780,53 @@
 EXPORT_SYMBOL_GPL(rhashtable_init);
 
 /**
- * rhashtable_destroy - destroy hash table
+ * rhashtable_free_and_destroy - free elements and destroy hash table
  * @ht:		the hash table to destroy
+ * @free_fn:	callback to release resources of element
+ * @arg:	pointer passed to free_fn
  *
- * Frees the bucket array. This function is not rcu safe, therefore the caller
- * has to make sure that no resizing may happen by unpublishing the hashtable
- * and waiting for the quiescent cycle before releasing the bucket array.
+ * Stops an eventual async resize. If defined, invokes free_fn for each
+ * element to releasal resources. Please note that RCU protected
+ * readers may still be accessing the elements. Releasing of resources
+ * must occur in a compatible manner. Then frees the bucket array.
+ *
+ * This function will eventually sleep to wait for an async resize
+ * to complete. The caller is responsible that no further write operations
+ * occurs in parallel.
  */
-void rhashtable_destroy(struct rhashtable *ht)
+void rhashtable_free_and_destroy(struct rhashtable *ht,
+				 void (*free_fn)(void *ptr, void *arg),
+				 void *arg)
 {
-	ht->being_destroyed = true;
+	const struct bucket_table *tbl;
+	unsigned int i;
 
 	cancel_work_sync(&ht->run_work);
 
 	mutex_lock(&ht->mutex);
-	bucket_table_free(rht_dereference(ht->tbl, ht));
+	tbl = rht_dereference(ht->tbl, ht);
+	if (free_fn) {
+		for (i = 0; i < tbl->size; i++) {
+			struct rhash_head *pos, *next;
+
+			for (pos = rht_dereference(tbl->buckets[i], ht),
+			     next = !rht_is_a_nulls(pos) ?
+					rht_dereference(pos->next, ht) : NULL;
+			     !rht_is_a_nulls(pos);
+			     pos = next,
+			     next = !rht_is_a_nulls(pos) ?
+					rht_dereference(pos->next, ht) : NULL)
+				free_fn(rht_obj(ht, pos), arg);
+		}
+	}
+
+	bucket_table_free(tbl);
 	mutex_unlock(&ht->mutex);
 }
+EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
+
+void rhashtable_destroy(struct rhashtable *ht)
+{
+	return rhashtable_free_and_destroy(ht, NULL, NULL);
+}
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index ad39669..f9ce219 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -172,6 +172,7 @@
 	.head_offset = offsetof(struct nft_hash_elem, node),
 	.key_offset = offsetof(struct nft_hash_elem, key),
 	.hashfn = jhash,
+	.automatic_shrinking = true,
 };
 
 static int nft_hash_init(const struct nft_set *set,
@@ -187,26 +188,15 @@
 	return rhashtable_init(priv, &params);
 }
 
+static void nft_free_element(void *ptr, void *arg)
+{
+	nft_hash_elem_destroy((const struct nft_set *)arg, ptr);
+}
+
 static void nft_hash_destroy(const struct nft_set *set)
 {
-	struct rhashtable *priv = nft_set_priv(set);
-	const struct bucket_table *tbl;
-	struct nft_hash_elem *he;
-	struct rhash_head *pos, *next;
-	unsigned int i;
-
-	/* Stop an eventual async resizing */
-	priv->being_destroyed = true;
-	mutex_lock(&priv->mutex);
-
-	tbl = rht_dereference(priv->tbl, priv);
-	for (i = 0; i < tbl->size; i++) {
-		rht_for_each_entry_safe(he, pos, next, tbl, i, node)
-			nft_hash_elem_destroy(set, he);
-	}
-	mutex_unlock(&priv->mutex);
-
-	rhashtable_destroy(priv);
+	rhashtable_free_and_destroy(nft_set_priv(set), nft_free_element,
+				    (void *)set);
 }
 
 static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e2f7f28..4caa809 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3142,6 +3142,7 @@
 	.obj_hashfn = netlink_hash,
 	.obj_cmpfn = netlink_compare,
 	.max_size = 65536,
+	.automatic_shrinking = true,
 };
 
 static int __init netlink_proto_init(void)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 0947105..ee90d74 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2297,6 +2297,7 @@
 	.key_len = sizeof(u32), /* portid */
 	.max_size = 1048576,
 	.min_size = 256,
+	.automatic_shrinking = true,
 };
 
 int tipc_sk_rht_init(struct net *net)