sh: Fix occasional FPU register corruption under preempt.

Presently with preempt enabled there's the possibility to be preempted
after the TIF_USEDFPU test and the register save, leading to bogus
state post-__switch_to(). Use an explicit preempt_disable()/enable()
pair around unlazy_fpu()/clear_fpu() to avoid this. Follows the x86
change.

Reported-by: Takuo Koguchi <takuo.koguchi.sw@hitachi.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c
index ff99562..5627c0b 100644
--- a/arch/sh/kernel/cpu/sh2a/fpu.c
+++ b/arch/sh/kernel/cpu/sh2a/fpu.c
@@ -13,6 +13,7 @@
 #include <linux/signal.h>
 #include <asm/processor.h>
 #include <asm/io.h>
+#include <asm/fpu.h>
 
 /* The PR (precision) bit in the FP Status Register must be clear when
  * an frchg instruction is executed, otherwise the instruction is undefined.
diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c
index 817f993..8020796 100644
--- a/arch/sh/kernel/cpu/sh4/fpu.c
+++ b/arch/sh/kernel/cpu/sh4/fpu.c
@@ -16,6 +16,7 @@
 #include <asm/cpu/fpu.h>
 #include <asm/processor.h>
 #include <asm/system.h>
+#include <asm/fpu.h>
 
 /* The PR (precision) bit in the FP Status Register must be clear when
  * an frchg instruction is executed, otherwise the instruction is undefined.
diff --git a/arch/sh/kernel/cpu/sh5/fpu.c b/arch/sh/kernel/cpu/sh5/fpu.c
index 30b76a9..dd4f51f 100644
--- a/arch/sh/kernel/cpu/sh5/fpu.c
+++ b/arch/sh/kernel/cpu/sh5/fpu.c
@@ -17,6 +17,7 @@
 #include <asm/processor.h>
 #include <asm/user.h>
 #include <asm/io.h>
+#include <asm/fpu.h>
 
 /*
  * Initially load the FPU with signalling NANS.  This bit pattern
diff --git a/arch/sh/kernel/dump_task.c b/arch/sh/kernel/dump_task.c
index 4a8a408..1db7ce0 100644
--- a/arch/sh/kernel/dump_task.c
+++ b/arch/sh/kernel/dump_task.c
@@ -1,5 +1,6 @@
 #include <linux/elfcore.h>
 #include <linux/sched.h>
+#include <asm/fpu.h>
 
 /*
  * Capture the user space registers if the task is not running (in user space)
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 9ab1926..b98e37a 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -25,6 +25,7 @@
 #include <asm/pgalloc.h>
 #include <asm/system.h>
 #include <asm/ubc.h>
+#include <asm/fpu.h>
 
 static int hlt_counter;
 int ubc_usercnt = 0;
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index f6b5fbf..f311551 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -29,6 +29,7 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
+#include <asm/fpu.h>
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
diff --git a/include/asm-sh/fpu.h b/include/asm-sh/fpu.h
index f842988..f89abf5 100644
--- a/include/asm-sh/fpu.h
+++ b/include/asm-sh/fpu.h
@@ -1,9 +1,8 @@
 #ifndef __ASM_SH_FPU_H
 #define __ASM_SH_FPU_H
 
-#define SR_FD    0x00008000
-
 #ifndef __ASSEMBLY__
+#include <linux/preempt.h>
 #include <asm/ptrace.h>
 
 #ifdef CONFIG_SH_FPU
@@ -28,18 +27,23 @@
 
 extern int do_fpu_inst(unsigned short, struct pt_regs *);
 
-#define unlazy_fpu(tsk, regs) do {			\
-	if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) {	\
-		save_fpu(tsk, regs);			\
-	}						\
-} while (0)
+static inline void unlazy_fpu(struct task_struct *tsk, struct pt_regs *regs)
+{
+	preempt_disable();
+	if (test_tsk_thread_flag(tsk, TIF_USEDFPU))
+		save_fpu(tsk, regs);
+	preempt_enable();
+}
 
-#define clear_fpu(tsk, regs) do {				\
-	if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) {		\
-		clear_tsk_thread_flag(tsk, TIF_USEDFPU);	\
-		release_fpu(regs);				\
-	}							\
-} while (0)
+static inline void clear_fpu(struct task_struct *tsk, struct pt_regs *regs)
+{
+	preempt_disable();
+	if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) {
+		clear_tsk_thread_flag(tsk, TIF_USEDFPU);
+		release_fpu(regs);
+	}
+	preempt_enable();
+}
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-sh/processor.h b/include/asm-sh/processor.h
index 19fe47c..ec707b9 100644
--- a/include/asm-sh/processor.h
+++ b/include/asm-sh/processor.h
@@ -2,7 +2,6 @@
 #define __ASM_SH_PROCESSOR_H
 
 #include <asm/cpu-features.h>
-#include <asm/fpu.h>
 
 #ifndef __ASSEMBLY__
 /*
diff --git a/include/asm-sh/processor_32.h b/include/asm-sh/processor_32.h
index df2d5b0..c09305d 100644
--- a/include/asm-sh/processor_32.h
+++ b/include/asm-sh/processor_32.h
@@ -70,6 +70,7 @@
  */
 #define SR_DSP		0x00001000
 #define SR_IMASK	0x000000f0
+#define SR_FD		0x00008000
 
 /*
  * FPU structure and data
diff --git a/include/asm-sh/processor_64.h b/include/asm-sh/processor_64.h
index eda4bef..88a2edf 100644
--- a/include/asm-sh/processor_64.h
+++ b/include/asm-sh/processor_64.h
@@ -112,6 +112,7 @@
 #endif
 
 #define SR_IMASK 0x000000f0
+#define SR_FD    0x00008000
 #define SR_SSTEP 0x08000000
 
 #ifndef __ASSEMBLY__