[MIPS] FPU ownership management & preemption fixes

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/kernel/r2300_switch.S b/arch/mips/kernel/r2300_switch.S
index 656bde2..28c2e2e 100644
--- a/arch/mips/kernel/r2300_switch.S
+++ b/arch/mips/kernel/r2300_switch.S
@@ -49,8 +49,7 @@
 #ifndef CONFIG_CPU_HAS_LLSC
 	sw      zero, ll_bit
 #endif
-	mfc0	t1, CP0_STATUS
-	sw	t1, THREAD_STATUS(a0)
+	mfc0	t2, CP0_STATUS
 	cpu_save_nonscratch a0
 	sw	ra, THREAD_REG31(a0)
 
@@ -60,8 +59,8 @@
 	lw	t3, TASK_THREAD_INFO(a0)
 	lw	t0, TI_FLAGS(t3)
 	li	t1, _TIF_USEDFPU
-	and	t2, t0, t1
-	beqz	t2, 1f
+	and	t1, t0
+	beqz	t1, 1f
 	nor	t1, zero, t1
 
 	and	t0, t0, t1
@@ -74,10 +73,13 @@
 	li	t1, ~ST0_CU1
 	and	t0, t0, t1
 	sw	t0, ST_OFF(t3)
+	/* clear thread_struct CU1 bit */
+	and	t2, t1
 
 	fpu_save_single a0, t0			# clobbers t0
 
 1:
+	sw	t2, THREAD_STATUS(a0)
 	/*
 	 * The order of restoring the registers takes care of the race
 	 * updating $28, $29 and kernelsp without disabling ints.
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index cc566cf..c7698fd 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -48,8 +48,7 @@
 #ifndef CONFIG_CPU_HAS_LLSC
 	sw	zero, ll_bit
 #endif
-	mfc0	t1, CP0_STATUS
-	LONG_S	t1, THREAD_STATUS(a0)
+	mfc0	t2, CP0_STATUS
 	cpu_save_nonscratch a0
 	LONG_S	ra, THREAD_REG31(a0)
 
@@ -59,8 +58,8 @@
 	PTR_L	t3, TASK_THREAD_INFO(a0)
 	LONG_L	t0, TI_FLAGS(t3)
 	li	t1, _TIF_USEDFPU
-	and	t2, t0, t1
-	beqz	t2, 1f
+	and	t1, t0
+	beqz	t1, 1f
 	nor	t1, zero, t1
 
 	and	t0, t0, t1
@@ -73,10 +72,13 @@
 	li	t1, ~ST0_CU1
 	and	t0, t0, t1
 	LONG_S	t0, ST_OFF(t3)
+	/* clear thread_struct CU1 bit */
+	and	t2, t1
 
 	fpu_save_double a0 t0 t1		# c0_status passed in t0
 						# clobbers t1
 1:
+	LONG_S	t2, THREAD_STATUS(a0)
 
 	/*
 	 * The order of restoring the registers takes care of the race
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index bf094fc..8c3c5a5 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -82,6 +82,7 @@
 {
 	int err = 0;
 	int i;
+	unsigned int used_math;
 
 	err |= __put_user(regs->cp0_epc, &sc->sc_pc);
 
@@ -104,22 +105,18 @@
 		err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp);
 	}
 
-	err |= __put_user(!!used_math(), &sc->sc_used_math);
+	used_math = !!used_math();
+	err |= __put_user(used_math, &sc->sc_used_math);
 
-	if (used_math()) {
+	if (used_math) {
 		/*
 		 * Save FPU state to signal context. Signal handler
 		 * will "inherit" current FPU state.
 		 */
-		preempt_disable();
-
-		if (!is_fpu_owner()) {
-			own_fpu();
-			restore_fp(current);
-		}
+		own_fpu(1);
+		enable_fp_in_kernel();
 		err |= save_fp_context(sc);
-
-		preempt_enable();
+		disable_fp_in_kernel();
 	}
 	return err;
 }
@@ -188,20 +185,18 @@
 	err |= __get_user(used_math, &sc->sc_used_math);
 	conditional_used_math(used_math);
 
-	preempt_disable();
-
-	if (used_math()) {
+	if (used_math) {
 		/* restore fpu context if we have used it before */
-		own_fpu();
+		own_fpu(0);
+		enable_fp_in_kernel();
 		if (!err)
 			err = check_and_restore_fp_context(sc);
+		disable_fp_in_kernel();
 	} else {
 		/* signal handler may have used FPU.  Give it up. */
-		lose_fpu();
+		lose_fpu(0);
 	}
 
-	preempt_enable();
-
 	return err;
 }
 
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 20013b6..151fd2f 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -181,6 +181,7 @@
 {
 	int err = 0;
 	int i;
+	u32 used_math;
 
 	err |= __put_user(regs->cp0_epc, &sc->sc_pc);
 
@@ -200,22 +201,18 @@
 		err |= __put_user(mflo3(), &sc->sc_lo3);
 	}
 
-	err |= __put_user(!!used_math(), &sc->sc_used_math);
+	used_math = !!used_math();
+	err |= __put_user(used_math, &sc->sc_used_math);
 
-	if (used_math()) {
+	if (used_math) {
 		/*
 		 * Save FPU state to signal context.  Signal handler
 		 * will "inherit" current FPU state.
 		 */
-		preempt_disable();
-
-		if (!is_fpu_owner()) {
-			own_fpu();
-			restore_fp(current);
-		}
+		own_fpu(1);
+		enable_fp_in_kernel();
 		err |= save_fp_context32(sc);
-
-		preempt_enable();
+		disable_fp_in_kernel();
 	}
 	return err;
 }
@@ -262,20 +259,18 @@
 	err |= __get_user(used_math, &sc->sc_used_math);
 	conditional_used_math(used_math);
 
-	preempt_disable();
-
-	if (used_math()) {
+	if (used_math) {
 		/* restore fpu context if we have used it before */
-		own_fpu();
+		own_fpu(0);
+		enable_fp_in_kernel();
 		if (!err)
 			err = check_and_restore_fp_context32(sc);
+		disable_fp_in_kernel();
 	} else {
 		/* signal handler may have used FPU.  Give it up. */
-		lose_fpu();
+		lose_fpu(0);
 	}
 
-	preempt_enable();
-
 	return err;
 }
 
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 18f56a9..7d76a85 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -610,16 +610,6 @@
 	if (fcr31 & FPU_CSR_UNI_X) {
 		int sig;
 
-		preempt_disable();
-
-#ifdef CONFIG_PREEMPT
-		if (!is_fpu_owner()) {
-			/* We might lose fpu before disabling preempt... */
-			own_fpu();
-			BUG_ON(!used_math());
-			restore_fp(current);
-		}
-#endif
 		/*
 		 * Unimplemented operation exception.  If we've got the full
 		 * software emulator on-board, let's use it...
@@ -630,18 +620,12 @@
 		 * register operands before invoking the emulator, which seems
 		 * a bit extreme for what should be an infrequent event.
 		 */
-		save_fp(current);
 		/* Ensure 'resume' not overwrite saved fp context again. */
-		lose_fpu();
-
-		preempt_enable();
+		lose_fpu(1);
 
 		/* Run the emulator */
 		sig = fpu_emulator_cop1Handler (regs, &current->thread.fpu, 1);
 
-		preempt_disable();
-
-		own_fpu();	/* Using the FPU again.  */
 		/*
 		 * We can't allow the emulated instruction to leave any of
 		 * the cause bit set in $fcr31.
@@ -649,9 +633,7 @@
 		current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
 
 		/* Restore the hardware register state */
-		restore_fp(current);
-
-		preempt_enable();
+		own_fpu(1);	/* Using the FPU again.  */
 
 		/* If something went wrong, signal */
 		if (sig)
@@ -775,12 +757,11 @@
 {
 	unsigned int cpid;
 
-	die_if_kernel("do_cpu invoked from kernel context!", regs);
-
 	cpid = (regs->cp0_cause >> CAUSEB_CE) & 3;
 
 	switch (cpid) {
 	case 0:
+		die_if_kernel("do_cpu invoked from kernel context!", regs);
 		if (!cpu_has_llsc)
 			if (!simulate_llsc(regs))
 				return;
@@ -791,21 +772,30 @@
 		break;
 
 	case 1:
-		preempt_disable();
-
-		own_fpu();
-		if (used_math()) {	/* Using the FPU again.  */
-			restore_fp(current);
-		} else {			/* First time FPU user.  */
+		if (!test_thread_flag(TIF_ALLOW_FP_IN_KERNEL))
+			die_if_kernel("do_cpu invoked from kernel context!",
+				      regs);
+		if (used_math())	/* Using the FPU again.  */
+			own_fpu(1);
+		else {			/* First time FPU user.  */
 			init_fpu();
 			set_used_math();
 		}
 
-		if (cpu_has_fpu) {
-			preempt_enable();
+		if (raw_cpu_has_fpu) {
+			if (test_thread_flag(TIF_ALLOW_FP_IN_KERNEL)) {
+				local_irq_disable();
+				if (cpu_has_fpu)
+					regs->cp0_status |= ST0_CU1;
+				/*
+				 * We must return without enabling
+				 * interrupts to ensure keep FPU
+				 * ownership until resume.
+				 */
+				return;
+			}
 		} else {
 			int sig;
-			preempt_enable();
 			sig = fpu_emulator_cop1Handler(regs,
 						&current->thread.fpu, 0);
 			if (sig)
@@ -1259,26 +1249,26 @@
 /*
  * This is used by native signal handling
  */
-asmlinkage int (*save_fp_context)(struct sigcontext *sc);
-asmlinkage int (*restore_fp_context)(struct sigcontext *sc);
+asmlinkage int (*save_fp_context)(struct sigcontext __user *sc);
+asmlinkage int (*restore_fp_context)(struct sigcontext __user *sc);
 
-extern asmlinkage int _save_fp_context(struct sigcontext *sc);
-extern asmlinkage int _restore_fp_context(struct sigcontext *sc);
+extern asmlinkage int _save_fp_context(struct sigcontext __user *sc);
+extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc);
 
-extern asmlinkage int fpu_emulator_save_context(struct sigcontext *sc);
-extern asmlinkage int fpu_emulator_restore_context(struct sigcontext *sc);
+extern asmlinkage int fpu_emulator_save_context(struct sigcontext __user *sc);
+extern asmlinkage int fpu_emulator_restore_context(struct sigcontext __user *sc);
 
 #ifdef CONFIG_SMP
-static int smp_save_fp_context(struct sigcontext *sc)
+static int smp_save_fp_context(struct sigcontext __user *sc)
 {
-	return cpu_has_fpu
+	return raw_cpu_has_fpu
 	       ? _save_fp_context(sc)
 	       : fpu_emulator_save_context(sc);
 }
 
-static int smp_restore_fp_context(struct sigcontext *sc)
+static int smp_restore_fp_context(struct sigcontext __user *sc)
 {
-	return cpu_has_fpu
+	return raw_cpu_has_fpu
 	       ? _restore_fp_context(sc)
 	       : fpu_emulator_restore_context(sc);
 }
@@ -1306,14 +1296,14 @@
 /*
  * This is used by 32-bit signal stuff on the 64-bit kernel
  */
-asmlinkage int (*save_fp_context32)(struct sigcontext32 *sc);
-asmlinkage int (*restore_fp_context32)(struct sigcontext32 *sc);
+asmlinkage int (*save_fp_context32)(struct sigcontext32 __user *sc);
+asmlinkage int (*restore_fp_context32)(struct sigcontext32 __user *sc);
 
-extern asmlinkage int _save_fp_context32(struct sigcontext32 *sc);
-extern asmlinkage int _restore_fp_context32(struct sigcontext32 *sc);
+extern asmlinkage int _save_fp_context32(struct sigcontext32 __user *sc);
+extern asmlinkage int _restore_fp_context32(struct sigcontext32 __user *sc);
 
-extern asmlinkage int fpu_emulator_save_context32(struct sigcontext32 *sc);
-extern asmlinkage int fpu_emulator_restore_context32(struct sigcontext32 *sc);
+extern asmlinkage int fpu_emulator_save_context32(struct sigcontext32 __user *sc);
+extern asmlinkage int fpu_emulator_restore_context32(struct sigcontext32 __user *sc);
 
 static inline void signal32_init(void)
 {
diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c
index 5b3390f..ed49ef0 100644
--- a/arch/mips/math-emu/kernel_linkage.c
+++ b/arch/mips/math-emu/kernel_linkage.c
@@ -51,7 +51,7 @@
  * with appropriate macros from uaccess.h
  */
 
-int fpu_emulator_save_context(struct sigcontext *sc)
+int fpu_emulator_save_context(struct sigcontext __user *sc)
 {
 	int i;
 	int err = 0;
@@ -65,7 +65,7 @@
 	return err;
 }
 
-int fpu_emulator_restore_context(struct sigcontext *sc)
+int fpu_emulator_restore_context(struct sigcontext __user *sc)
 {
 	int i;
 	int err = 0;
@@ -84,7 +84,7 @@
  * This is the o32 version
  */
 
-int fpu_emulator_save_context32(struct sigcontext32 *sc)
+int fpu_emulator_save_context32(struct sigcontext32 __user *sc)
 {
 	int i;
 	int err = 0;
@@ -98,7 +98,7 @@
 	return err;
 }
 
-int fpu_emulator_restore_context32(struct sigcontext32 *sc)
+int fpu_emulator_restore_context32(struct sigcontext32 __user *sc)
 {
 	int i;
 	int err = 0;
diff --git a/include/asm-mips/cpu-features.h b/include/asm-mips/cpu-features.h
index eadca26..5e4bed1 100644
--- a/include/asm-mips/cpu-features.h
+++ b/include/asm-mips/cpu-features.h
@@ -40,6 +40,9 @@
 #endif
 #ifndef cpu_has_fpu
 #define cpu_has_fpu		(current_cpu_data.options & MIPS_CPU_FPU)
+#define raw_cpu_has_fpu		(raw_current_cpu_data.options & MIPS_CPU_FPU)
+#else
+#define raw_cpu_has_fpu		cpu_has_fpu
 #endif
 #ifndef cpu_has_32fpr
 #define cpu_has_32fpr		(cpu_data[0].options & MIPS_CPU_32FPR)
diff --git a/include/asm-mips/cpu-info.h b/include/asm-mips/cpu-info.h
index 610d0cd..22fe845 100644
--- a/include/asm-mips/cpu-info.h
+++ b/include/asm-mips/cpu-info.h
@@ -87,6 +87,7 @@
 
 extern struct cpuinfo_mips cpu_data[];
 #define current_cpu_data cpu_data[smp_processor_id()]
+#define raw_current_cpu_data cpu_data[raw_smp_processor_id()]
 
 extern void cpu_probe(void);
 extern void cpu_report(void);
diff --git a/include/asm-mips/fpu.h b/include/asm-mips/fpu.h
index efef843..4e12d1f 100644
--- a/include/asm-mips/fpu.h
+++ b/include/asm-mips/fpu.h
@@ -27,11 +27,11 @@
 struct sigcontext;
 struct sigcontext32;
 
-extern asmlinkage int (*save_fp_context)(struct sigcontext *sc);
-extern asmlinkage int (*restore_fp_context)(struct sigcontext *sc);
+extern asmlinkage int (*save_fp_context)(struct sigcontext __user *sc);
+extern asmlinkage int (*restore_fp_context)(struct sigcontext __user *sc);
 
-extern asmlinkage int (*save_fp_context32)(struct sigcontext32 *sc);
-extern asmlinkage int (*restore_fp_context32)(struct sigcontext32 *sc);
+extern asmlinkage int (*save_fp_context32)(struct sigcontext32 __user *sc);
+extern asmlinkage int (*restore_fp_context32)(struct sigcontext32 __user *sc);
 
 extern void fpu_emulator_init_fpu(void);
 extern void _init_fpu(void);
@@ -68,6 +68,8 @@
 	/* We don't care about the c0 hazard here  */			\
 } while (0)
 
+#define __fpu_enabled()	(read_c0_status() & ST0_CU1)
+
 #define enable_fpu()							\
 do {									\
 	if (cpu_has_fpu)						\
@@ -93,31 +95,47 @@
 	return cpu_has_fpu && __is_fpu_owner();
 }
 
-static inline void own_fpu(void)
+static inline void __own_fpu(void)
 {
-	if (cpu_has_fpu) {
-		__enable_fpu();
-		KSTK_STATUS(current) |= ST0_CU1;
-		set_thread_flag(TIF_USEDFPU);
-	}
+	__enable_fpu();
+	KSTK_STATUS(current) |= ST0_CU1;
+	set_thread_flag(TIF_USEDFPU);
 }
 
-static inline void lose_fpu(void)
+static inline void own_fpu(int restore)
 {
-	if (cpu_has_fpu) {
+	preempt_disable();
+	if (cpu_has_fpu && !__is_fpu_owner()) {
+		__own_fpu();
+		if (restore)
+			_restore_fp(current);
+	}
+	preempt_enable();
+}
+
+static inline void lose_fpu(int save)
+{
+	preempt_disable();
+	if (is_fpu_owner()) {
+		if (save)
+			_save_fp(current);
 		KSTK_STATUS(current) &= ~ST0_CU1;
 		clear_thread_flag(TIF_USEDFPU);
 		__disable_fpu();
 	}
+	preempt_enable();
 }
 
 static inline void init_fpu(void)
 {
+	preempt_disable();
 	if (cpu_has_fpu) {
+		__own_fpu();
 		_init_fpu();
 	} else {
 		fpu_emulator_init_fpu();
 	}
+	preempt_enable();
 }
 
 static inline void save_fp(struct task_struct *tsk)
@@ -144,4 +162,18 @@
 	return tsk->thread.fpu.fpr;
 }
 
+static inline void enable_fp_in_kernel(void)
+{
+	set_thread_flag(TIF_ALLOW_FP_IN_KERNEL);
+	/* make sure CU1 and FPU ownership are consistent */
+	if (!__is_fpu_owner() && __fpu_enabled())
+		__disable_fpu();
+}
+
+static inline void disable_fp_in_kernel(void)
+{
+	BUG_ON(!__is_fpu_owner() && __fpu_enabled());
+	clear_thread_flag(TIF_ALLOW_FP_IN_KERNEL);
+}
+
 #endif /* _ASM_FPU_H */
diff --git a/include/asm-mips/thread_info.h b/include/asm-mips/thread_info.h
index fbcda82..6cf05f4 100644
--- a/include/asm-mips/thread_info.h
+++ b/include/asm-mips/thread_info.h
@@ -119,6 +119,7 @@
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		18
 #define TIF_FREEZE		19
+#define TIF_ALLOW_FP_IN_KERNEL	20
 #define TIF_SYSCALL_TRACE	31	/* syscall trace active */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)