s390/uaccess: simplify control register updates

Always switch to the kernel ASCE in switch_mm. Load the secondary
space ASCE in finish_arch_post_lock_switch after checking that
any pending page table operations have completed. The primary
ASCE is loaded in entry[64].S. With this the update_primary_asce
call can be removed from the switch_to macro and from the start
of switch_mm function. Remove the load_primary argument from
update_user_asce/clear_user_asce, rename update_user_asce to
set_user_asce and rename update_primary_asce to load_kernel_asce.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h
index 69cf5b5..a4811aa 100644
--- a/arch/s390/include/asm/futex.h
+++ b/arch/s390/include/asm/futex.h
@@ -29,7 +29,7 @@
 	int cmparg = (encoded_op << 20) >> 20;
 	int oldval = 0, newval, ret;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 		oparg = 1 << oparg;
 
@@ -79,7 +79,7 @@
 {
 	int ret;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	asm volatile(
 		"   sacf 256\n"
 		"0: cs   %1,%4,0(%5)\n"
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 71be346..93ec0c8 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -30,33 +30,31 @@
 
 #define destroy_context(mm)             do { } while (0)
 
-static inline void update_user_asce(struct mm_struct *mm, int load_primary)
+static inline void set_user_asce(struct mm_struct *mm)
 {
 	pgd_t *pgd = mm->pgd;
 
 	S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
-	if (load_primary)
-		__ctl_load(S390_lowcore.user_asce, 1, 1);
 	set_fs(current->thread.mm_segment);
+	set_thread_flag(TIF_ASCE);
 }
 
-static inline void clear_user_asce(struct mm_struct *mm, int load_primary)
+static inline void clear_user_asce(void)
 {
 	S390_lowcore.user_asce = S390_lowcore.kernel_asce;
 
-	if (load_primary)
-		__ctl_load(S390_lowcore.user_asce, 1, 1);
+	__ctl_load(S390_lowcore.user_asce, 1, 1);
 	__ctl_load(S390_lowcore.user_asce, 7, 7);
 }
 
-static inline void update_primary_asce(struct task_struct *tsk)
+static inline void load_kernel_asce(void)
 {
 	unsigned long asce;
 
 	__ctl_store(asce, 1, 1);
 	if (asce != S390_lowcore.kernel_asce)
 		__ctl_load(S390_lowcore.kernel_asce, 1, 1);
-	set_tsk_thread_flag(tsk, TIF_ASCE);
+	set_thread_flag(TIF_ASCE);
 }
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
@@ -64,25 +62,17 @@
 {
 	int cpu = smp_processor_id();
 
-	update_primary_asce(tsk);
 	if (prev == next)
 		return;
 	if (MACHINE_HAS_TLB_LC)
 		cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
-	if (atomic_inc_return(&next->context.attach_count) >> 16) {
-		/* Delay update_user_asce until all TLB flushes are done. */
-		set_tsk_thread_flag(tsk, TIF_TLB_WAIT);
-		/* Clear old ASCE by loading the kernel ASCE. */
-		clear_user_asce(next, 0);
-	} else {
-		cpumask_set_cpu(cpu, mm_cpumask(next));
-		update_user_asce(next, 0);
-		if (next->context.flush_mm)
-			/* Flush pending TLBs */
-			__tlb_flush_mm(next);
-	}
+	/* Clear old ASCE by loading the kernel ASCE. */
+	__ctl_load(S390_lowcore.kernel_asce, 1, 1);
+	__ctl_load(S390_lowcore.kernel_asce, 7, 7);
+	/* Delay loading of the new ASCE to control registers CR1 & CR7 */
+	set_thread_flag(TIF_ASCE);
+	atomic_inc(&next->context.attach_count);
 	atomic_dec(&prev->context.attach_count);
-	WARN_ON(atomic_read(&prev->context.attach_count) < 0);
 	if (MACHINE_HAS_TLB_LC)
 		cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
 }
@@ -93,15 +83,14 @@
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 
-	if (!test_tsk_thread_flag(tsk, TIF_TLB_WAIT))
+	if (!mm)
 		return;
 	preempt_disable();
-	clear_tsk_thread_flag(tsk, TIF_TLB_WAIT);
 	while (atomic_read(&mm->context.attach_count) >> 16)
 		cpu_relax();
 
 	cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-	update_user_asce(mm, 0);
+	set_user_asce(mm);
 	if (mm->context.flush_mm)
 		__tlb_flush_mm(mm);
 	preempt_enable();
@@ -113,7 +102,9 @@
 static inline void activate_mm(struct mm_struct *prev,
                                struct mm_struct *next)
 {
-        switch_mm(prev, next, current);
+	switch_mm(prev, next, current);
+	cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+	set_user_asce(next);
 }
 
 static inline void arch_dup_mmap(struct mm_struct *oldmm,
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index e759181..29c81f8 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -132,7 +132,6 @@
 		update_cr_regs(next);					\
 	}								\
 	prev = __switch_to(prev,next);					\
-	update_primary_asce(current);					\
 } while (0)
 
 #define finish_arch_switch(prev) do {					     \
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 50630e6..f3e5cad 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -81,8 +81,7 @@
 #define TIF_NOTIFY_RESUME	1	/* callback before returning to user */
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
-#define TIF_TLB_WAIT		4	/* wait for TLB flush completion */
-#define TIF_ASCE		5	/* primary asce needs fixup / uaccess */
+#define TIF_ASCE		5	/* user asce needs fixup / uaccess */
 #define TIF_PER_TRAP		6	/* deliver sigtrap on return to user */
 #define TIF_MCCK_PENDING	7	/* machine check handling is pending */
 #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
@@ -99,7 +98,6 @@
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
-#define _TIF_TLB_WAIT		(1<<TIF_TLB_WAIT)
 #define _TIF_ASCE		(1<<TIF_ASCE)
 #define _TIF_PER_TRAP		(1<<TIF_PER_TRAP)
 #define _TIF_MCCK_PENDING	(1<<TIF_MCCK_PENDING)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 1662038..7006bfd 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -43,7 +43,7 @@
 		 _TIF_MCCK_PENDING | _TIF_ASCE)
 _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		 _TIF_SYSCALL_TRACEPOINT)
-_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_TLB_WAIT)
+_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_ASCE)
 
 STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
 STACK_SIZE  = 1 << STACK_SHIFT
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 5963e43..d15e7bf 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -48,7 +48,7 @@
 		 _TIF_MCCK_PENDING | _TIF_ASCE)
 _TIF_TRACE    = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
 		 _TIF_SYSCALL_TRACEPOINT)
-_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_TLB_WAIT)
+_TIF_TRANSFER = (_TIF_MCCK_PENDING | _TIF_ASCE)
 
 #define BASED(name) name-system_call(%r13)
 
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 7416efe..53dd5d7 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -76,7 +76,7 @@
 {
 	unsigned long tmp1, tmp2;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	tmp1 = -256UL;
 	asm volatile(
 		"   sacf  0\n"
@@ -159,7 +159,7 @@
 {
 	unsigned long tmp1, tmp2;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	tmp1 = -256UL;
 	asm volatile(
 		"   sacf  0\n"
@@ -225,7 +225,7 @@
 {
 	unsigned long tmp1;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	asm volatile(
 		"   sacf  256\n"
 		"  "AHI"  %0,-1\n"
@@ -292,7 +292,7 @@
 {
 	unsigned long tmp1, tmp2;
 
-	update_primary_asce(current);
+	load_kernel_asce();
 	asm volatile(
 		"   sacf  256\n"
 		"  "AHI"  %0,-1\n"
@@ -358,7 +358,7 @@
 {
 	if (unlikely(!size))
 		return 0;
-	update_primary_asce(current);
+	load_kernel_asce();
 	return strnlen_user_srst(src, size);
 }
 EXPORT_SYMBOL(__strnlen_user);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index d7cfd57..7881d4e 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -53,8 +53,10 @@
 {
 	struct mm_struct *mm = arg;
 
-	if (current->active_mm == mm)
-		update_user_asce(mm, 1);
+	if (current->active_mm == mm) {
+		clear_user_asce();
+		set_user_asce(mm);
+	}
 	__tlb_flush_local();
 }
 
@@ -108,7 +110,7 @@
 	pgd_t *pgd;
 
 	if (current->active_mm == mm) {
-		clear_user_asce(mm, 1);
+		clear_user_asce();
 		__tlb_flush_mm(mm);
 	}
 	while (mm->context.asce_limit > limit) {
@@ -134,7 +136,7 @@
 		crst_table_free(mm, (unsigned long *) pgd);
 	}
 	if (current->active_mm == mm)
-		update_user_asce(mm, 1);
+		set_user_asce(mm);
 }
 #endif