[S390] rework idle code

Whenever the cpu loads an enabled wait PSW it will appear as idle to the
underlying host system. The code in default_idle calls vtime_stop_cpu
which does the necessary voodoo to get the cpu time accounting right.
The udelay code just loads an enabled wait PSW. To correct this rework
the vtime_stop_cpu/vtime_start_cpu logic and move the difficult parts
to entry[64].S, vtime_stop_cpu can now be called from anywhere and
vtime_start_cpu is gone. The correction of the cpu time during wakeup
from an enabled wait PSW is done with a critical section in entry[64].S.
As vtime_start_cpu is gone, s390_idle_check can be removed as well.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index c23c390..24ef186 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -170,24 +170,17 @@
 	unsigned int sequence;
 	unsigned long long idle_count;
 	unsigned long long idle_enter;
+	unsigned long long idle_exit;
 	unsigned long long idle_time;
 	int nohz_delay;
 };
 
 DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
 
-void vtime_start_cpu(__u64 int_clock, __u64 enter_timer);
 cputime64_t s390_get_idle_time(int cpu);
 
 #define arch_idle_time(cpu) s390_get_idle_time(cpu)
 
-static inline void s390_idle_check(struct pt_regs *regs, __u64 int_clock,
-				   __u64 enter_timer)
-{
-	if (regs->psw.mask & PSW_MASK_WAIT)
-		vtime_start_cpu(int_clock, enter_timer);
-}
-
 static inline int s390_nohz_delay(int cpu)
 {
 	return __get_cpu_var(s390_idle).nohz_delay != 0;
diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h
index 814243c..e63069b 100644
--- a/arch/s390/include/asm/timer.h
+++ b/arch/s390/include/asm/timer.h
@@ -33,8 +33,8 @@
 	spinlock_t lock;
 	__u64 timer;		/* last programmed timer */
 	__u64 elapsed;		/* elapsed time of timer expire values */
-	__u64 idle;		/* temp var for idle */
-	int do_spt;		/* =1: reprogram cpu timer in idle */
+	__u64 idle_enter;	/* cpu timer on idle enter */
+	__u64 idle_exit;	/* cpu timer on idle exit */
 };
 
 extern void init_virt_timer(struct vtimer_list *timer);
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index aeeaf89..ed8c913 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -8,6 +8,8 @@
 
 #include <linux/kbuild.h>
 #include <linux/sched.h>
+#include <asm/cputime.h>
+#include <asm/timer.h>
 #include <asm/vdso.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -70,6 +72,12 @@
 	DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
 	DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
 	BLANK();
+	/* idle data offsets */
+	DEFINE(__IDLE_ENTER, offsetof(struct s390_idle_data, idle_enter));
+	DEFINE(__IDLE_EXIT, offsetof(struct s390_idle_data, idle_exit));
+	/* vtimer queue offsets */
+	DEFINE(__VQ_IDLE_ENTER, offsetof(struct vtimer_queue, idle_enter));
+	DEFINE(__VQ_IDLE_EXIT, offsetof(struct vtimer_queue, idle_exit));
 	/* lowcore offsets */
 	DEFINE(__LC_EXT_PARAMS, offsetof(struct _lowcore, ext_params));
 	DEFINE(__LC_EXT_CPU_ADDR, offsetof(struct _lowcore, ext_cpu_addr));
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6143521..74ee563 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -105,14 +105,14 @@
 
 	.macro	ADD64 high,low,timer
 	al	\high,\timer
-	al	\low,\timer+4
+	al	\low,4+\timer
 	brc	12,.+8
 	ahi	\high,1
 	.endm
 
 	.macro	SUB64 high,low,timer
 	sl	\high,\timer
-	sl	\low,\timer+4
+	sl	\low,4+\timer
 	brc	3,.+8
 	ahi	\high,-1
 	.endm
@@ -471,7 +471,6 @@
 	jnz	io_work			# there is work to do (signals etc.)
 io_restore:
 	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r11)
-	ni	__LC_RETURN_PSW+1,0xfd	# clean wait state bit
 	stpt	__LC_EXIT_TIMER
 	lm	%r0,%r15,__PT_R0(%r11)
 	lpsw	__LC_RETURN_PSW
@@ -612,6 +611,26 @@
 	basr	%r14,%r1		# call do_extint
 	j	io_return
 
+/*
+ * Load idle PSW. The second "half" of this function is in cleanup_idle.
+ */
+ENTRY(psw_idle)
+	st	%r4,__SF_EMPTY(%r15)
+	basr	%r1,0
+	la	%r1,psw_idle_lpsw+4-.(%r1)
+	st	%r1,__SF_EMPTY+4(%r15)
+	oi	__SF_EMPTY+4(%r15),0x80
+	la	%r1,.Lvtimer_max-psw_idle_lpsw-4(%r1)
+	stck	__IDLE_ENTER(%r2)
+	ltr	%r5,%r5
+	stpt	__VQ_IDLE_ENTER(%r3)
+	jz	psw_idle_lpsw
+	spt	0(%r1)
+psw_idle_lpsw:
+	lpsw	__SF_EMPTY(%r15)
+	br	%r14
+psw_idle_end:
+
 __critical_end:
 
 /*
@@ -673,7 +692,6 @@
 	TRACE_IRQS_ON
 mcck_return:
 	mvc	__LC_RETURN_MCCK_PSW(8),__PT_PSW(%r11) # move return PSW
-	ni	__LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
 	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
 	jno	0f
 	lm	%r0,%r15,__PT_R0(%r11)
@@ -748,6 +766,8 @@
 	.long	io_tif + 0x80000000
 	.long	io_restore + 0x80000000
 	.long	io_done + 0x80000000
+	.long	psw_idle + 0x80000000
+	.long	psw_idle_end + 0x80000000
 
 cleanup_critical:
 	cl	%r9,BASED(cleanup_table)	# system_call
@@ -766,6 +786,10 @@
 	jl	cleanup_io_tif
 	cl	%r9,BASED(cleanup_table+28)	# io_done
 	jl	cleanup_io_restore
+	cl	%r9,BASED(cleanup_table+32)	# psw_idle
+	jl	0f
+	cl	%r9,BASED(cleanup_table+36)	# psw_idle_end
+	jl	cleanup_idle
 0:	br	%r14
 
 cleanup_system_call:
@@ -849,7 +873,6 @@
 	jhe	0f
 	l	%r9,12(%r11)		# get saved r11 pointer to pt_regs
 	mvc	__LC_RETURN_PSW(8),__PT_PSW(%r9)
-	ni	__LC_RETURN_PSW+1,0xfd	# clear wait state bit
 	mvc	0(32,%r11),__PT_R8(%r9)
 	lm	%r0,%r7,__PT_R0(%r9)
 0:	lm	%r8,%r9,__LC_RETURN_PSW
@@ -857,11 +880,52 @@
 cleanup_io_restore_insn:
 	.long	io_done - 4 + 0x80000000
 
+cleanup_idle:
+	# copy interrupt clock & cpu timer
+	mvc	__IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+	mvc	__VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER
+	chi	%r11,__LC_SAVE_AREA_ASYNC
+	je	0f
+	mvc	__IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+	mvc	__VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER
+0:	# check if stck has been executed
+	cl	%r9,BASED(cleanup_idle_insn)
+	jhe	1f
+	mvc	__IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2)
+	mvc	__VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3)
+	j	2f
+1:	# check if the cpu timer has been reprogrammed
+	ltr	%r5,%r5
+	jz	2f
+	spt	__VQ_IDLE_ENTER(%r3)
+2:	# account system time going idle
+	lm	%r9,%r10,__LC_STEAL_TIMER
+	ADD64	%r9,%r10,__IDLE_ENTER(%r2)
+	SUB64	%r9,%r10,__LC_LAST_UPDATE_CLOCK
+	stm	%r9,%r10,__LC_STEAL_TIMER
+	mvc	__LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2)
+	lm	%r9,%r10,__LC_SYSTEM_TIMER
+	ADD64	%r9,%r10,__LC_LAST_UPDATE_TIMER
+	SUB64	%r9,%r10,__VQ_IDLE_ENTER(%r3)
+	stm	%r9,%r10,__LC_SYSTEM_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3)
+	# prepare return psw
+	n	%r8,BASED(cleanup_idle_wait)	# clear wait state bit
+	l	%r9,24(%r11)			# return from psw_idle
+	br	%r14
+cleanup_idle_insn:
+	.long	psw_idle_lpsw + 0x80000000
+cleanup_idle_wait:
+	.long	0xfffdffff
+
 /*
  * Integer constants
  */
 	.align	4
-.Lnr_syscalls:		.long	NR_syscalls
+.Lnr_syscalls:
+	.long	NR_syscalls
+.Lvtimer_max:
+	.quad	0x7fffffffffffffff
 
 /*
  * Symbol constants
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 92b1617..4984785 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -4,7 +4,8 @@
 #include <linux/types.h>
 #include <linux/signal.h>
 #include <asm/ptrace.h>
-
+#include <asm/cputime.h>
+#include <asm/timer.h>
 
 extern void (*pgm_check_table[128])(struct pt_regs *);
 extern void *restart_stack;
@@ -16,6 +17,8 @@
 void mcck_int_handler(void);
 void restart_int_handler(void);
 void restart_call_handler(void);
+void psw_idle(struct s390_idle_data *, struct vtimer_queue *,
+	      unsigned long, int);
 
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index e33789a..4e1c292 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -489,7 +489,6 @@
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
-	ni	__LC_RETURN_PSW+1,0xfd	# clear wait state bit
 	stpt	__LC_EXIT_TIMER
 	mvc	__VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
 	lmg	%r11,%r15,__PT_R11(%r11)
@@ -631,6 +630,24 @@
 	brasl	%r14,do_extint
 	j	io_return
 
+/*
+ * Load idle PSW. The second "half" of this function is in cleanup_idle.
+ */
+ENTRY(psw_idle)
+	stg	%r4,__SF_EMPTY(%r15)
+	larl	%r1,psw_idle_lpsw+4
+	stg	%r1,__SF_EMPTY+8(%r15)
+	larl	%r1,.Lvtimer_max
+	stck	__IDLE_ENTER(%r2)
+	ltr	%r5,%r5
+	stpt	__VQ_IDLE_ENTER(%r3)
+	jz	psw_idle_lpsw
+	spt	0(%r1)
+psw_idle_lpsw:
+	lpswe	__SF_EMPTY(%r15)
+	br	%r14
+psw_idle_end:
+
 __critical_end:
 
 /*
@@ -696,7 +713,6 @@
 	lg	%r14,__LC_VDSO_PER_CPU
 	lmg	%r0,%r10,__PT_R0(%r11)
 	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
-	ni	__LC_RETURN_MCCK_PSW+1,0xfd # clear wait state bit
 	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
 	jno	0f
 	stpt	__LC_EXIT_TIMER
@@ -770,6 +786,8 @@
 	.quad	io_tif
 	.quad	io_restore
 	.quad	io_done
+	.quad	psw_idle
+	.quad	psw_idle_end
 
 cleanup_critical:
 	clg	%r9,BASED(cleanup_table)	# system_call
@@ -788,6 +806,10 @@
 	jl	cleanup_io_tif
 	clg	%r9,BASED(cleanup_table+56)	# io_done
 	jl	cleanup_io_restore
+	clg	%r9,BASED(cleanup_table+64)	# psw_idle
+	jl	0f
+	clg	%r9,BASED(cleanup_table+72)	# psw_idle_end
+	jl	cleanup_idle
 0:	br	%r14
 
 
@@ -877,7 +899,6 @@
 	je	0f
 	lg	%r9,24(%r11)		# get saved r11 pointer to pt_regs
 	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r9)
-	ni	__LC_RETURN_PSW+1,0xfd	# clear wait state bit
 	mvc	0(64,%r11),__PT_R8(%r9)
 	lmg	%r0,%r7,__PT_R0(%r9)
 0:	lmg	%r8,%r9,__LC_RETURN_PSW
@@ -885,6 +906,42 @@
 cleanup_io_restore_insn:
 	.quad	io_done - 4
 
+cleanup_idle:
+	# copy interrupt clock & cpu timer
+	mvc	__IDLE_EXIT(8,%r2),__LC_INT_CLOCK
+	mvc	__VQ_IDLE_EXIT(8,%r3),__LC_ASYNC_ENTER_TIMER
+	cghi	%r11,__LC_SAVE_AREA_ASYNC
+	je	0f
+	mvc	__IDLE_EXIT(8,%r2),__LC_MCCK_CLOCK
+	mvc	__VQ_IDLE_EXIT(8,%r3),__LC_MCCK_ENTER_TIMER
+0:	# check if stck & stpt have been executed
+	clg	%r9,BASED(cleanup_idle_insn)
+	jhe	1f
+	mvc	__IDLE_ENTER(8,%r2),__IDLE_EXIT(%r2)
+	mvc	__VQ_IDLE_ENTER(8,%r3),__VQ_IDLE_EXIT(%r3)
+	j	2f
+1:	# check if the cpu timer has been reprogrammed
+	ltr	%r5,%r5
+	jz	2f
+	spt	__VQ_IDLE_ENTER(%r3)
+2:	# account system time going idle
+	lg	%r9,__LC_STEAL_TIMER
+	alg	%r9,__IDLE_ENTER(%r2)
+	slg	%r9,__LC_LAST_UPDATE_CLOCK
+	stg	%r9,__LC_STEAL_TIMER
+	mvc	__LC_LAST_UPDATE_CLOCK(8),__IDLE_EXIT(%r2)
+	lg	%r9,__LC_SYSTEM_TIMER
+	alg	%r9,__LC_LAST_UPDATE_TIMER
+	slg	%r9,__VQ_IDLE_ENTER(%r3)
+	stg	%r9,__LC_SYSTEM_TIMER
+	mvc	__LC_LAST_UPDATE_TIMER(8),__VQ_IDLE_EXIT(%r3)
+	# prepare return psw
+	nihh	%r8,0xfffd		# clear wait state bit
+	lg	%r9,48(%r11)		# return from psw_idle
+	br	%r14
+cleanup_idle_insn:
+	.quad	psw_idle_lpsw
+
 /*
  * Integer constants
  */
@@ -893,6 +950,8 @@
 	.quad	__critical_start
 .Lcritical_length:
 	.quad	__critical_end - __critical_start
+.Lvtimer_max:
+	.quad	0x7fffffffffffffff
 
 
 #if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index b9a7fdd..09a014c 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -219,8 +219,6 @@
 
 	code = (unsigned short) ext_int_code;
 	old_regs = set_irq_regs(regs);
-	s390_idle_check(regs, S390_lowcore.int_clock,
-			S390_lowcore.async_enter_timer);
 	irq_enter();
 	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
 		/* Serve timer interrupts first. */
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 0fd2e86..8c372ca 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -254,8 +254,6 @@
 	int umode;
 
 	nmi_enter();
-	s390_idle_check(regs, S390_lowcore.mcck_clock,
-			S390_lowcore.mcck_enter_timer);
 	kstat_cpu(smp_processor_id()).irqs[NMI_NMI]++;
 	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
 	mcck = &__get_cpu_var(cpu_mcck);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index e795933..78b3c14 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -77,13 +77,8 @@
 		local_irq_enable();
 		return;
 	}
-	trace_hardirqs_on();
-	/* Don't trace preempt off for idle. */
-	stop_critical_timings();
-	/* Stop virtual timer and halt the cpu. */
+	/* Halt the cpu and keep track of cpu time accounting. */
 	vtime_stop_cpu();
-	/* Reenable preemption tracer. */
-	start_critical_timings();
 }
 
 void cpu_idle(void)
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 6db8526..afd6e51 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -972,22 +972,16 @@
 static ssize_t show_idle_count(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	struct s390_idle_data *idle;
+	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
 	unsigned long long idle_count;
 	unsigned int sequence;
 
-	idle = &per_cpu(s390_idle, dev->id);
-repeat:
-	sequence = idle->sequence;
-	smp_rmb();
-	if (sequence & 1)
-		goto repeat;
-	idle_count = idle->idle_count;
-	if (idle->idle_enter)
-		idle_count++;
-	smp_rmb();
-	if (idle->sequence != sequence)
-		goto repeat;
+	do {
+		sequence = ACCESS_ONCE(idle->sequence);
+		idle_count = ACCESS_ONCE(idle->idle_count);
+		if (ACCESS_ONCE(idle->idle_enter))
+			idle_count++;
+	} while ((sequence & 1) || (idle->sequence != sequence));
 	return sprintf(buf, "%llu\n", idle_count);
 }
 static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -995,24 +989,18 @@
 static ssize_t show_idle_time(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	struct s390_idle_data *idle;
-	unsigned long long now, idle_time, idle_enter;
+	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+	unsigned long long now, idle_time, idle_enter, idle_exit;
 	unsigned int sequence;
 
-	idle = &per_cpu(s390_idle, dev->id);
-	now = get_clock();
-repeat:
-	sequence = idle->sequence;
-	smp_rmb();
-	if (sequence & 1)
-		goto repeat;
-	idle_time = idle->idle_time;
-	idle_enter = idle->idle_enter;
-	if (idle_enter != 0ULL && idle_enter < now)
-		idle_time += now - idle_enter;
-	smp_rmb();
-	if (idle->sequence != sequence)
-		goto repeat;
+	do {
+		now = get_clock();
+		sequence = ACCESS_ONCE(idle->sequence);
+		idle_time = ACCESS_ONCE(idle->idle_time);
+		idle_enter = ACCESS_ONCE(idle->idle_enter);
+		idle_exit = ACCESS_ONCE(idle->idle_exit);
+	} while ((sequence & 1) || (idle->sequence != sequence));
+	idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
 	return sprintf(buf, "%llu\n", idle_time >> 12);
 }
 static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 7bacee9..277ea71 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -26,6 +26,7 @@
 #include <asm/irq_regs.h>
 #include <asm/cputime.h>
 #include <asm/irq.h>
+#include "entry.h"
 
 static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
 
@@ -123,153 +124,53 @@
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
 
-void __kprobes vtime_start_cpu(__u64 int_clock, __u64 enter_timer)
-{
-	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
-	struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
-	__u64 idle_time, expires;
-
-	if (idle->idle_enter == 0ULL)
-		return;
-
-	/* Account time spent with enabled wait psw loaded as idle time. */
-	idle_time = int_clock - idle->idle_enter;
-	account_idle_time(idle_time);
-	S390_lowcore.steal_timer +=
-		idle->idle_enter - S390_lowcore.last_update_clock;
-	S390_lowcore.last_update_clock = int_clock;
-
-	/* Account system time spent going idle. */
-	S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
-	S390_lowcore.last_update_timer = enter_timer;
-
-	/* Restart vtime CPU timer */
-	if (vq->do_spt) {
-		/* Program old expire value but first save progress. */
-		expires = vq->idle - enter_timer;
-		expires += get_vtimer();
-		set_vtimer(expires);
-	} else {
-		/* Don't account the CPU timer delta while the cpu was idle. */
-		vq->elapsed -= vq->idle - enter_timer;
-	}
-
-	idle->sequence++;
-	smp_wmb();
-	idle->idle_time += idle_time;
-	idle->idle_enter = 0ULL;
-	idle->idle_count++;
-	smp_wmb();
-	idle->sequence++;
-}
-
 void __kprobes vtime_stop_cpu(void)
 {
 	struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
 	struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
-	psw_t psw;
+	unsigned long long idle_time;
+	unsigned long psw_mask;
+
+	trace_hardirqs_on();
+	/* Don't trace preempt off for idle. */
+	stop_critical_timings();
 
 	/* Wait for external, I/O or machine check interrupt. */
-	psw.mask = psw_kernel_bits | PSW_MASK_WAIT |
-		PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-
+	psw_mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_DAT |
+		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
 	idle->nohz_delay = 0;
 
-	/* Check if the CPU timer needs to be reprogrammed. */
-	if (vq->do_spt) {
-		__u64 vmax = VTIMER_MAX_SLICE;
-		/*
-		 * The inline assembly is equivalent to
-		 *	vq->idle = get_cpu_timer();
-		 *	set_cpu_timer(VTIMER_MAX_SLICE);
-		 *	idle->idle_enter = get_clock();
-		 *	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
-		 *			   PSW_MASK_DAT | PSW_MASK_IO |
-		 *			   PSW_MASK_EXT | PSW_MASK_MCHECK);
-		 * The difference is that the inline assembly makes sure that
-		 * the last three instruction are stpt, stck and lpsw in that
-		 * order. This is done to increase the precision.
-		 */
-		asm volatile(
-#ifndef CONFIG_64BIT
-			"	basr	1,0\n"
-			"0:	ahi	1,1f-0b\n"
-			"	st	1,4(%2)\n"
-#else /* CONFIG_64BIT */
-			"	larl	1,1f\n"
-			"	stg	1,8(%2)\n"
-#endif /* CONFIG_64BIT */
-			"	stpt	0(%4)\n"
-			"	spt	0(%5)\n"
-			"	stck	0(%3)\n"
-#ifndef CONFIG_64BIT
-			"	lpsw	0(%2)\n"
-#else /* CONFIG_64BIT */
-			"	lpswe	0(%2)\n"
-#endif /* CONFIG_64BIT */
-			"1:"
-			: "=m" (idle->idle_enter), "=m" (vq->idle)
-			: "a" (&psw), "a" (&idle->idle_enter),
-			  "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
-			: "memory", "cc", "1");
-	} else {
-		/*
-		 * The inline assembly is equivalent to
-		 *	vq->idle = get_cpu_timer();
-		 *	idle->idle_enter = get_clock();
-		 *	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
-		 *			   PSW_MASK_DAT | PSW_MASK_IO |
-		 *			   PSW_MASK_EXT | PSW_MASK_MCHECK);
-		 * The difference is that the inline assembly makes sure that
-		 * the last three instruction are stpt, stck and lpsw in that
-		 * order. This is done to increase the precision.
-		 */
-		asm volatile(
-#ifndef CONFIG_64BIT
-			"	basr	1,0\n"
-			"0:	ahi	1,1f-0b\n"
-			"	st	1,4(%2)\n"
-#else /* CONFIG_64BIT */
-			"	larl	1,1f\n"
-			"	stg	1,8(%2)\n"
-#endif /* CONFIG_64BIT */
-			"	stpt	0(%4)\n"
-			"	stck	0(%3)\n"
-#ifndef CONFIG_64BIT
-			"	lpsw	0(%2)\n"
-#else /* CONFIG_64BIT */
-			"	lpswe	0(%2)\n"
-#endif /* CONFIG_64BIT */
-			"1:"
-			: "=m" (idle->idle_enter), "=m" (vq->idle)
-			: "a" (&psw), "a" (&idle->idle_enter),
-			  "a" (&vq->idle), "m" (psw)
-			: "memory", "cc", "1");
-	}
+	/* Call the assembler magic in entry.S */
+	psw_idle(idle, vq, psw_mask, !list_empty(&vq->list));
+
+	/* Reenable preemption tracer. */
+	start_critical_timings();
+
+	/* Account time spent with enabled wait psw loaded as idle time. */
+	idle->sequence++;
+	smp_wmb();
+	idle_time = idle->idle_exit - idle->idle_enter;
+	idle->idle_time += idle_time;
+	idle->idle_enter = idle->idle_exit = 0ULL;
+	idle->idle_count++;
+	account_idle_time(idle_time);
+	smp_wmb();
+	idle->sequence++;
 }
 
 cputime64_t s390_get_idle_time(int cpu)
 {
-	struct s390_idle_data *idle;
-	unsigned long long now, idle_time, idle_enter;
+	struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
+	unsigned long long now, idle_enter, idle_exit;
 	unsigned int sequence;
 
-	idle = &per_cpu(s390_idle, cpu);
-
-	now = get_clock();
-repeat:
-	sequence = idle->sequence;
-	smp_rmb();
-	if (sequence & 1)
-		goto repeat;
-	idle_time = 0;
-	idle_enter = idle->idle_enter;
-	if (idle_enter != 0ULL && idle_enter < now)
-		idle_time = now - idle_enter;
-	smp_rmb();
-	if (idle->sequence != sequence)
-		goto repeat;
-	return idle_time;
+	do {
+		now = get_clock();
+		sequence = ACCESS_ONCE(idle->sequence);
+		idle_enter = ACCESS_ONCE(idle->idle_enter);
+		idle_exit = ACCESS_ONCE(idle->idle_exit);
+	} while ((sequence & 1) || (idle->sequence != sequence));
+	return idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
 }
 
 /*
@@ -346,7 +247,6 @@
 	}
 	spin_unlock(&vq->lock);
 
-	vq->do_spt = list_empty(&cb_list);
 	do_callbacks(&cb_list);
 
 	/* next event is first in list */
@@ -355,8 +255,7 @@
 	if (!list_empty(&vq->list)) {
 		event = list_first_entry(&vq->list, struct vtimer_list, entry);
 		next = event->expires;
-	} else
-		vq->do_spt = 0;
+	}
 	spin_unlock(&vq->lock);
 	/*
 	 * To improve precision add the time spent by the
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index db92f04..9f1f71e 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -13,6 +13,7 @@
 #include <linux/irqflags.h>
 #include <linux/interrupt.h>
 #include <asm/div64.h>
+#include <asm/timer.h>
 
 void __delay(unsigned long loops)
 {
@@ -28,36 +29,33 @@
 
 static void __udelay_disabled(unsigned long long usecs)
 {
-	unsigned long mask, cr0, cr0_saved;
-	u64 clock_saved;
-	u64 end;
+	unsigned long cr0, cr6, new;
+	u64 clock_saved, end;
 
-	mask = psw_kernel_bits | PSW_MASK_DAT | PSW_MASK_WAIT |
-		PSW_MASK_EXT | PSW_MASK_MCHECK;
 	end = get_clock() + (usecs << 12);
 	clock_saved = local_tick_disable();
-	__ctl_store(cr0_saved, 0, 0);
-	cr0 = (cr0_saved & 0xffff00e0) | 0x00000800;
-	__ctl_load(cr0 , 0, 0);
+	__ctl_store(cr0, 0, 0);
+	__ctl_store(cr6, 6, 6);
+	new = (cr0 &  0xffff00e0) | 0x00000800;
+	__ctl_load(new , 0, 0);
+	new = 0;
+	__ctl_load(new, 6, 6);
 	lockdep_off();
 	do {
 		set_clock_comparator(end);
-		trace_hardirqs_on();
-		__load_psw_mask(mask);
+		vtime_stop_cpu();
 		local_irq_disable();
 	} while (get_clock() < end);
 	lockdep_on();
-	__ctl_load(cr0_saved, 0, 0);
+	__ctl_load(cr0, 0, 0);
+	__ctl_load(cr6, 6, 6);
 	local_tick_enable(clock_saved);
 }
 
 static void __udelay_enabled(unsigned long long usecs)
 {
-	unsigned long mask;
-	u64 clock_saved;
-	u64 end;
+	u64 clock_saved, end;
 
-	mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO;
 	end = get_clock() + (usecs << 12);
 	do {
 		clock_saved = 0;
@@ -65,8 +63,7 @@
 			clock_saved = local_tick_disable();
 			set_clock_comparator(end);
 		}
-		trace_hardirqs_on();
-		__load_psw_mask(mask);
+		vtime_stop_cpu();
 		local_irq_disable();
 		if (clock_saved)
 			local_tick_enable(clock_saved);
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index dc67c39..a49c46c 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -601,8 +601,6 @@
 	struct pt_regs *old_regs;
 
 	old_regs = set_irq_regs(regs);
-	s390_idle_check(regs, S390_lowcore.int_clock,
-			S390_lowcore.async_enter_timer);
 	irq_enter();
 	__this_cpu_write(s390_idle.nohz_delay, 1);
 	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)