s390/uaccess: simplify control register updates

Always switch to the kernel ASCE in switch_mm. Load the secondary
space ASCE in finish_arch_post_lock_switch after checking that
any pending page table operations have completed. The primary
ASCE is loaded in entry[64].S. With this the update_primary_asce
call can be removed from the switch_to macro and from the start
of switch_mm function. Remove the load_primary argument from
update_user_asce/clear_user_asce, rename update_user_asce to
set_user_asce and rename update_primary_asce to load_kernel_asce.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 71be346..93ec0c8 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -30,33 +30,31 @@
 
 #define destroy_context(mm)             do { } while (0)
 
-static inline void update_user_asce(struct mm_struct *mm, int load_primary)
+static inline void set_user_asce(struct mm_struct *mm)
 {
 	pgd_t *pgd = mm->pgd;
 
 	S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
-	if (load_primary)
-		__ctl_load(S390_lowcore.user_asce, 1, 1);
 	set_fs(current->thread.mm_segment);
+	set_thread_flag(TIF_ASCE);
 }
 
-static inline void clear_user_asce(struct mm_struct *mm, int load_primary)
+static inline void clear_user_asce(void)
 {
 	S390_lowcore.user_asce = S390_lowcore.kernel_asce;
 
-	if (load_primary)
-		__ctl_load(S390_lowcore.user_asce, 1, 1);
+	__ctl_load(S390_lowcore.user_asce, 1, 1);
 	__ctl_load(S390_lowcore.user_asce, 7, 7);
 }
 
-static inline void update_primary_asce(struct task_struct *tsk)
+static inline void load_kernel_asce(void)
 {
 	unsigned long asce;
 
 	__ctl_store(asce, 1, 1);
 	if (asce != S390_lowcore.kernel_asce)
 		__ctl_load(S390_lowcore.kernel_asce, 1, 1);
-	set_tsk_thread_flag(tsk, TIF_ASCE);
+	set_thread_flag(TIF_ASCE);
 }
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
@@ -64,25 +62,17 @@
 {
 	int cpu = smp_processor_id();
 
-	update_primary_asce(tsk);
 	if (prev == next)
 		return;
 	if (MACHINE_HAS_TLB_LC)
 		cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
-	if (atomic_inc_return(&next->context.attach_count) >> 16) {
-		/* Delay update_user_asce until all TLB flushes are done. */
-		set_tsk_thread_flag(tsk, TIF_TLB_WAIT);
-		/* Clear old ASCE by loading the kernel ASCE. */
-		clear_user_asce(next, 0);
-	} else {
-		cpumask_set_cpu(cpu, mm_cpumask(next));
-		update_user_asce(next, 0);
-		if (next->context.flush_mm)
-			/* Flush pending TLBs */
-			__tlb_flush_mm(next);
-	}
+	/* Clear old ASCE by loading the kernel ASCE. */
+	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
+	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
+	/* Delay loading of the new ASCE to control registers CR1 & CR7 */
+	set_thread_flag(TIF_ASCE);
+	atomic_inc(&next->context.attach_count);
 	atomic_dec(&prev->context.attach_count);
-	WARN_ON(atomic_read(&prev->context.attach_count) < 0);
 	if (MACHINE_HAS_TLB_LC)
 		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
 }
@@ -93,15 +83,14 @@
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 
-	if (!test_tsk_thread_flag(tsk, TIF_TLB_WAIT))
+	if (!mm)
 		return;
 	preempt_disable();
-	clear_tsk_thread_flag(tsk, TIF_TLB_WAIT);
 	while (atomic_read(&mm->context.attach_count) >> 16)
 		cpu_relax();
 
 	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-	update_user_asce(mm, 0);
+	set_user_asce(mm);
 	if (mm->context.flush_mm)
 		__tlb_flush_mm(mm);
 	preempt_enable();
@@ -113,7 +102,9 @@
 static inline void activate_mm(struct mm_struct *prev,
                                struct mm_struct *next)
 {
-        switch_mm(prev, next, current);
+	switch_mm(prev, next, current);
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+	set_user_asce(next);
 }
 
 static inline void arch_dup_mmap(struct mm_struct *oldmm,