s390/time: correct use of store clock fast

The result of the store-clock-fast (STCKF) instruction is a bit fuzzy.
It can happen that the value stored on one CPU is smaller than the value
stored on another CPU, although the order of the stores is the other
way around. This can cause deltas of get_tod_clock() values to become
negative when they should not be.

We need to be more careful with store-clock-fast, this patch partially
reverts git commit e4b7b4238e666682555461fa52eecd74652f36bb "time:
always use stckf instead of stck if available". The get_tod_clock()
function now uses the store-clock-extended (STCKE) instruction.
get_tod_clock_fast() can be used if the fuzziness of store-clock-fast
is acceptable e.g. for wait loops local to a CPU.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 57c87d7..a9f3d00 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -44,7 +44,7 @@
 	do {
 		set_clock_comparator(end);
 		vtime_stop_cpu();
-	} while (get_tod_clock() < end);
+	} while (get_tod_clock_fast() < end);
 	lockdep_on();
 	__ctl_load(cr0, 0, 0);
 	__ctl_load(cr6, 6, 6);
@@ -55,7 +55,7 @@
 {
 	u64 clock_saved, end;
 
-	end = get_tod_clock() + (usecs << 12);
+	end = get_tod_clock_fast() + (usecs << 12);
 	do {
 		clock_saved = 0;
 		if (end < S390_lowcore.clock_comparator) {
@@ -65,7 +65,7 @@
 		vtime_stop_cpu();
 		if (clock_saved)
 			local_tick_enable(clock_saved);
-	} while (get_tod_clock() < end);
+	} while (get_tod_clock_fast() < end);
 }
 
 /*
@@ -109,8 +109,8 @@
 {
 	u64 end;
 
-	end = get_tod_clock() + (usecs << 12);
-	while (get_tod_clock() < end)
+	end = get_tod_clock_fast() + (usecs << 12);
+	while (get_tod_clock_fast() < end)
 		cpu_relax();
 }
 
@@ -120,10 +120,10 @@
 
 	nsecs <<= 9;
 	do_div(nsecs, 125);
-	end = get_tod_clock() + nsecs;
+	end = get_tod_clock_fast() + nsecs;
 	if (nsecs & ~0xfffUL)
 		__udelay(nsecs >> 12);
-	while (get_tod_clock() < end)
+	while (get_tod_clock_fast() < end)
 		barrier();
 }
 EXPORT_SYMBOL(__ndelay);