s390/uaccess: always load the kernel ASCE after task switch

This patch fixes a problem introduced with git commit beef560b4cdfafb2
"s390/uaccess: simplify control register updates".

The switch_mm function is not called if the next process is a kernel
thread without an attached mm or is a nop if the mm does not change.
But CR1 still needs to be loaded with the kernel ASCE in case the
code returns to a uaccess function that uses the secondary space mode.

In addition move the set_fs call from finish_arch_switch to
finish_arch_post_lock_switch and then remove finish_arch_switch.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c28f32a..3815bfe 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -33,10 +33,9 @@
 
 static inline void set_user_asce(struct mm_struct *mm)
 {
-	pgd_t *pgd = mm->pgd;
-
-	S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
-	set_fs(current->thread.mm_segment);
+	S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd);
+	if (current->thread.mm_segment.ar4)
+		__ctl_load(S390_lowcore.user_asce, 7, 7);
 	set_cpu_flag(CIF_ASCE);
 }
 
@@ -70,12 +69,11 @@
 	/* Clear old ASCE by loading the kernel ASCE. */
 	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
 	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
-	/* Delay loading of the new ASCE to control registers CR1 & CR7 */
-	set_cpu_flag(CIF_ASCE);
 	atomic_inc(&next->context.attach_count);
 	atomic_dec(&prev->context.attach_count);
 	if (MACHINE_HAS_TLB_LC)
 		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
+	S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
 }
 
 #define finish_arch_post_lock_switch finish_arch_post_lock_switch
@@ -84,17 +82,18 @@
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 
-	if (!mm)
-		return;
-	preempt_disable();
-	while (atomic_read(&mm->context.attach_count) >> 16)
-		cpu_relax();
+	load_kernel_asce();
+	if (mm) {
+		preempt_disable();
+		while (atomic_read(&mm->context.attach_count) >> 16)
+			cpu_relax();
 
-	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-	set_user_asce(mm);
-	if (mm->context.flush_mm)
-		__tlb_flush_mm(mm);
-	preempt_enable();
+		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+		if (mm->context.flush_mm)
+			__tlb_flush_mm(mm);
+		preempt_enable();
+	}
+	set_fs(current->thread.mm_segment);
 }
 
 #define enter_lazy_tlb(mm,tsk)	do { } while (0)
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 29c81f8..df38c70 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -134,8 +134,4 @@
 	prev = __switch_to(prev,next);					\
 } while (0)
 
-#define finish_arch_switch(prev) do {					     \
-	set_fs(current->thread.mm_segment);				     \
-} while (0)
-
 #endif /* __ASM_SWITCH_TO_H */