ipc: optimize semget/shmget/msgget for lots of keys

ipc_findkey() used to scan all objects to look for the wanted key.  This
is slow when using a high number of keys.  This change adds an rhashtable
of kern_ipc_perm objects in ipc_ids, so that one lookup cease to be O(n).

This change gives a 865% improvement of benchmark reaim.jobs_per_min on a
56 threads Intel(R) Xeon(R) CPU E5-2695 v3 @ 2.30GHz with 256G memory [1]

Other (more micro) benchmark results, by the author: On an i5 laptop, the
following loop executed right after a reboot took, without and with this
change:

    for (int i = 0, k=0x424242; i < KEYS; ++i)
        semget(k++, 1, IPC_CREAT | 0600);

                 total       total          max single  max single
   KEYS        without        with        call without   call with

      1            3.5         4.9   µs            3.5         4.9
     10            7.6         8.6   µs            3.7         4.7
     32           16.2        15.9   µs            4.3         5.3
    100           72.9        41.8   µs            3.7         4.7
   1000        5,630.0       502.0   µs             *           *
  10000    1,340,000.0     7,240.0   µs             *           *
  31900   17,600,000.0    22,200.0   µs             *           *

 *: unreliable measure: high variance

The duration for a lookup-only usage was obtained by the same loop once
the keys are present:

                 total       total          max single  max single
   KEYS        without        with        call without   call with

      1            2.1         2.5   µs            2.1         2.5
     10            4.5         4.8   µs            2.2         2.3
     32           13.0        10.8   µs            2.3         2.8
    100           82.9        25.1   µs             *          2.3
   1000        5,780.0       217.0   µs             *           *
  10000    1,470,000.0     2,520.0   µs             *           *
  31900   17,400,000.0     7,810.0   µs             *           *

Finally, executing each semget() in a new process gave, when still
summing only the durations of these syscalls:

creation:
                 total       total
   KEYS        without        with

      1            3.7         5.0   µs
     10           32.9        36.7   µs
     32          125.0       109.0   µs
    100          523.0       353.0   µs
   1000       20,300.0     3,280.0   µs
  10000    2,470,000.0    46,700.0   µs
  31900   27,800,000.0   219,000.0   µs

lookup-only:
                 total       total
   KEYS        without        with

      1            2.5         2.7   µs
     10           25.4        24.4   µs
     32          106.0        72.6   µs
    100          591.0       352.0   µs
   1000       22,400.0     2,250.0   µs
  10000    2,510,000.0    25,700.0   µs
  31900   28,200,000.0   115,000.0   µs

[1] http://lkml.kernel.org/r/20170814060507.GE23258@yexl-desktop

Link: http://lkml.kernel.org/r/20170815194954.ck32ta2z35yuzpwp@debix
Signed-off-by: Guillaume Knispel <guillaume.knispel@supersonicimagine.com>
Reviewed-by: Marc Pardo <marc.pardo@supersonicimagine.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: Guillaume Knispel <guillaume.knispel@supersonicimagine.com>
Cc: Marc Pardo <marc.pardo@supersonicimagine.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/ipc/util.c b/ipc/util.c
index 069bb22..7875587 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -83,27 +83,46 @@
  */
 static int __init ipc_init(void)
 {
-	sem_init();
-	msg_init();
+	int err_sem, err_msg;
+
+	err_sem = sem_init();
+	WARN(err_sem, "ipc: sysv sem_init failed: %d\n", err_sem);
+	err_msg = msg_init();
+	WARN(err_msg, "ipc: sysv msg_init failed: %d\n", err_msg);
 	shm_init();
-	return 0;
+
+	return err_msg ? err_msg : err_sem;
 }
 device_initcall(ipc_init);
 
+static const struct rhashtable_params ipc_kht_params = {
+	.head_offset		= offsetof(struct kern_ipc_perm, khtnode),
+	.key_offset		= offsetof(struct kern_ipc_perm, key),
+	.key_len		= FIELD_SIZEOF(struct kern_ipc_perm, key),
+	.locks_mul		= 1,
+	.automatic_shrinking	= true,
+};
+
 /**
  * ipc_init_ids	- initialise ipc identifiers
  * @ids: ipc identifier set
  *
  * Set up the sequence range to use for the ipc identifier range (limited
- * below IPCMNI) then initialise the ids idr.
+ * below IPCMNI) then initialise the keys hashtable and ids idr.
  */
-void ipc_init_ids(struct ipc_ids *ids)
+int ipc_init_ids(struct ipc_ids *ids)
 {
+	int err;
 	ids->in_use = 0;
 	ids->seq = 0;
 	ids->next_id = -1;
 	init_rwsem(&ids->rwsem);
+	err = rhashtable_init(&ids->key_ht, &ipc_kht_params);
+	if (err)
+		return err;
 	idr_init(&ids->ipcs_idr);
+	ids->tables_initialized = true;
+	return 0;
 }
 
 #ifdef CONFIG_PROC_FS
@@ -147,28 +166,20 @@
  * Returns the locked pointer to the ipc structure if found or NULL
  * otherwise. If key is found ipc points to the owning ipc structure
  *
- * Called with ipc_ids.rwsem held.
+ * Called with writer ipc_ids.rwsem held.
  */
 static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
 {
-	struct kern_ipc_perm *ipc;
-	int next_id;
-	int total;
+	struct kern_ipc_perm *ipcp = NULL;
 
-	for (total = 0, next_id = 0; total < ids->in_use; next_id++) {
-		ipc = idr_find(&ids->ipcs_idr, next_id);
+	if (likely(ids->tables_initialized))
+		ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
+					      ipc_kht_params);
 
-		if (ipc == NULL)
-			continue;
-
-		if (ipc->key != key) {
-			total++;
-			continue;
-		}
-
+	if (ipcp) {
 		rcu_read_lock();
-		ipc_lock_object(ipc);
-		return ipc;
+		ipc_lock_object(ipcp);
+		return ipcp;
 	}
 
 	return NULL;
@@ -221,13 +232,13 @@
 {
 	kuid_t euid;
 	kgid_t egid;
-	int id;
+	int id, err;
 	int next_id = ids->next_id;
 
 	if (size > IPCMNI)
 		size = IPCMNI;
 
-	if (ids->in_use >= size)
+	if (!ids->tables_initialized || ids->in_use >= size)
 		return -ENOSPC;
 
 	idr_preload(GFP_KERNEL);
@@ -246,6 +257,15 @@
 		       (next_id < 0) ? 0 : ipcid_to_idx(next_id), 0,
 		       GFP_NOWAIT);
 	idr_preload_end();
+
+	if (id >= 0 && new->key != IPC_PRIVATE) {
+		err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode,
+					     ipc_kht_params);
+		if (err < 0) {
+			idr_remove(&ids->ipcs_idr, id);
+			id = err;
+		}
+	}
 	if (id < 0) {
 		spin_unlock(&new->lock);
 		rcu_read_unlock();
@@ -377,6 +397,20 @@
 	return err;
 }
 
+/**
+ * ipc_kht_remove - remove an ipc from the key hashtable
+ * @ids: ipc identifier set
+ * @ipcp: ipc perm structure containing the key to remove
+ *
+ * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
+ * before this function is called, and remain locked on the exit.
+ */
+static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
+{
+	if (ipcp->key != IPC_PRIVATE)
+		rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode,
+				       ipc_kht_params);
+}
 
 /**
  * ipc_rmid - remove an ipc identifier
@@ -391,10 +425,25 @@
 	int lid = ipcid_to_idx(ipcp->id);
 
 	idr_remove(&ids->ipcs_idr, lid);
+	ipc_kht_remove(ids, ipcp);
 	ids->in_use--;
 	ipcp->deleted = true;
 }
 
+/**
+ * ipc_set_key_private - switch the key of an existing ipc to IPC_PRIVATE
+ * @ids: ipc identifier set
+ * @ipcp: ipc perm structure containing the key to modify
+ *
+ * ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
+ * before this function is called, and remain locked on the exit.
+ */
+void ipc_set_key_private(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
+{
+	ipc_kht_remove(ids, ipcp);
+	ipcp->key = IPC_PRIVATE;
+}
+
 int ipc_rcu_getref(struct kern_ipc_perm *ptr)
 {
 	return refcount_inc_not_zero(&ptr->refcount);
@@ -485,7 +534,7 @@
 }
 
 /**
- * ipc_obtain_object
+ * ipc_obtain_object_idr
  * @ids: ipc identifier set
  * @id: ipc id to look for
  *
@@ -499,6 +548,9 @@
 	struct kern_ipc_perm *out;
 	int lid = ipcid_to_idx(id);
 
+	if (unlikely(!ids->tables_initialized))
+		return ERR_PTR(-EINVAL);
+
 	out = idr_find(&ids->ipcs_idr, lid);
 	if (!out)
 		return ERR_PTR(-EINVAL);