ARM: Fix subtle race in CPU pen_release hotplug code

There is a subtle race in the CPU hotplug code, where a CPU which has
been offlined can online itself before being requested, which results
in things going astray on the next online/offline cycle.

What happens in the normal online/offline/online cycle is:

	CPU0			CPU3
	requests boot of CPU3
	pen_release = 3
	flush cache line
				checks pen_release, reads 3
				starts boot
				pen_release = -1
	... requests CPU3 offline ...
				... dies ...
				checks pen_release, reads -1
	requests boot of CPU3
	pen_release = 3
	flush cache line
				checks pen_release, reads 3
				starts boot
				pen_release = -1

However, as the write of -1 of pen_release is not fully flushed back to
memory, and the checking of pen_release is done with caches disabled,
this allows CPU3 the opportunity to read the old value of pen_release:

	CPU0			CPU3
	requests boot of CPU3
	pen_release = 3
	flush cache line
				checks pen_release, reads 3
				starts boot
				pen_release = -1
	... requests CPU3 offline ...
				... dies ...
				checks pen_release, reads 3
				starts boot
				pen_release = -1
	requests boot of CPU3
	pen_release = 3
	flush cache line

Fix this by grouping the write of pen_release along with its cache line
flushing code to ensure that any update to pen_release is always pushed
out to physical memory.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/mach-s5pv310/platsmp.c b/arch/arm/mach-s5pv310/platsmp.c
index 18aaf5f..98c0474 100644
--- a/arch/arm/mach-s5pv310/platsmp.c
+++ b/arch/arm/mach-s5pv310/platsmp.c
@@ -37,6 +37,19 @@
 
 volatile int __cpuinitdata pen_release = -1;
 
+/*
+ * Write pen_release in a way that is guaranteed to be visible to all
+ * observers, irrespective of whether they're taking part in coherency
+ * or not.  This is necessary for the hotplug code to work reliably.
+ */
+static void write_pen_release(int val)
+{
+	pen_release = val;
+	smp_wmb();
+	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
+	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
+}
+
 static void __iomem *scu_base_addr(void)
 {
 	return (void __iomem *)(S5P_VA_SCU);
@@ -57,8 +70,7 @@
 	 * let the primary processor know we're out of the
 	 * pen, then head off into the C entry point
 	 */
-	pen_release = -1;
-	smp_wmb();
+	write_pen_release(-1);
 
 	/*
 	 * Synchronise with the boot thread.
@@ -85,9 +97,7 @@
 	 * Note that "pen_release" is the hardware CPU ID, whereas
 	 * "cpu" is Linux's internal ID.
 	 */
-	pen_release = cpu;
-	__cpuc_flush_dcache_area((void *)&pen_release, sizeof(pen_release));
-	outer_clean_range(__pa(&pen_release), __pa(&pen_release + 1));
+	write_pen_release(cpu);
 
 	/*
 	 * Send the secondary CPU a soft interrupt, thereby causing