cpu/hotplug: Serialize callback invocations proper

commit dc434e056fe1dada20df7ba07f32739d3a701adf upstream.

The setup/remove_state/instance() functions in the hotplug core code are
serialized against concurrent CPU hotplug, but unfortunately not serialized
against themself.

As a consequence a concurrent invocation of these function results in
corruption of the callback machinery because two instances try to invoke
callbacks on remote cpus at the same time. This results in missing callback
invocations and initiator threads waiting forever on the completion.

The obvious solution to replace get_cpu_online() with cpu_hotplug_begin()
is not possible because at least one callsite calls into these functions
from a get_online_cpu() locked region.

Extend the protection scope of the cpuhp_state_mutex from solely protecting
the state arrays to cover the callback invocation machinery as well.

Fixes: 5b7aa87e0482 ("cpu/hotplug: Implement setup/removal interface")
Reported-and-tested-by: Bart Van Assche <Bart.VanAssche@sandisk.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: hpa@zytor.com
Cc: mingo@kernel.org
Cc: akpm@linux-foundation.org
Cc: torvalds@linux-foundation.org
Link: http://lkml.kernel.org/r/20170314150645.g4tdyoszlcbajmna@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 217fd2e..99c6c56 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1441,14 +1441,12 @@
 	/* (Un)Install the callbacks for further cpu hotplug operations */
 	struct cpuhp_step *sp;
 
-	mutex_lock(&cpuhp_state_mutex);
 	sp = cpuhp_get_step(state);
 	sp->startup.single = startup;
 	sp->teardown.single = teardown;
 	sp->name = name;
 	sp->multi_instance = multi_instance;
 	INIT_HLIST_HEAD(&sp->list);
-	mutex_unlock(&cpuhp_state_mutex);
 }
 
 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
@@ -1518,16 +1516,13 @@
 {
 	enum cpuhp_state i;
 
-	mutex_lock(&cpuhp_state_mutex);
 	for (i = CPUHP_AP_ONLINE_DYN; i <= CPUHP_AP_ONLINE_DYN_END; i++) {
 		if (cpuhp_ap_states[i].name)
 			continue;
 
 		cpuhp_ap_states[i].name = "Reserved";
-		mutex_unlock(&cpuhp_state_mutex);
 		return i;
 	}
-	mutex_unlock(&cpuhp_state_mutex);
 	WARN(1, "No more dynamic states available for CPU hotplug\n");
 	return -ENOSPC;
 }
@@ -1544,6 +1539,7 @@
 		return -EINVAL;
 
 	get_online_cpus();
+	mutex_lock(&cpuhp_state_mutex);
 
 	if (!invoke || !sp->startup.multi)
 		goto add_node;
@@ -1568,11 +1564,10 @@
 	}
 add_node:
 	ret = 0;
-	mutex_lock(&cpuhp_state_mutex);
 	hlist_add_head(node, &sp->list);
-	mutex_unlock(&cpuhp_state_mutex);
 
 err:
+	mutex_unlock(&cpuhp_state_mutex);
 	put_online_cpus();
 	return ret;
 }
@@ -1601,6 +1596,7 @@
 		return -EINVAL;
 
 	get_online_cpus();
+	mutex_lock(&cpuhp_state_mutex);
 
 	/* currently assignments for the ONLINE state are possible */
 	if (state == CPUHP_AP_ONLINE_DYN) {
@@ -1636,6 +1632,8 @@
 		}
 	}
 out:
+	mutex_unlock(&cpuhp_state_mutex);
+
 	put_online_cpus();
 	if (!ret && dyn_state)
 		return state;
@@ -1655,6 +1653,8 @@
 		return -EINVAL;
 
 	get_online_cpus();
+	mutex_lock(&cpuhp_state_mutex);
+
 	if (!invoke || !cpuhp_get_teardown_cb(state))
 		goto remove;
 	/*
@@ -1671,7 +1671,6 @@
 	}
 
 remove:
-	mutex_lock(&cpuhp_state_mutex);
 	hlist_del(node);
 	mutex_unlock(&cpuhp_state_mutex);
 	put_online_cpus();
@@ -1696,6 +1695,7 @@
 	BUG_ON(cpuhp_cb_check(state));
 
 	get_online_cpus();
+	mutex_lock(&cpuhp_state_mutex);
 
 	if (sp->multi_instance) {
 		WARN(!hlist_empty(&sp->list),
@@ -1721,6 +1721,7 @@
 	}
 remove:
 	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
+	mutex_unlock(&cpuhp_state_mutex);
 	put_online_cpus();
 }
 EXPORT_SYMBOL(__cpuhp_remove_state);