[MIPS] FPU affinity for MT ASE.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f9be549..87f0b79 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1464,6 +1464,11 @@
 
 endchoice
 
+config MIPS_MT_FPAFF
+	bool "Dynamic FPU affinity for FP-intensive threads"
+	depends on MIPS_MT
+	default y
+
 config MIPS_VPE_LOADER_TOM
 	bool "Load VPE program into memory hidden from linux"
 	depends on MIPS_VPE_LOADER
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 8b393df..199a06e 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -185,6 +185,17 @@
 	childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
 	clear_tsk_thread_flag(p, TIF_USEDFPU);
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/*
+	 * FPU affinity support is cleaner if we track the
+	 * user-visible CPU affinity from the very beginning.
+	 * The generic cpus_allowed mask will already have
+	 * been copied from the parent before copy_thread
+	 * is invoked.
+	 */
+	p->thread.user_cpus_allowed = p->cpus_allowed;
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
 	if (clone_flags & CLONE_SETTLS)
 		ti->tp_value = regs->regs[7];
 
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 4e36b87..a0ac0e5 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -569,8 +569,19 @@
 	sys	sys_tkill		2
 	sys	sys_sendfile64		5
 	sys	sys_futex		6
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/*
+	 * For FPU affinity scheduling on MIPS MT processors, we need to
+	 * intercept sys_sched_xxxaffinity() calls until we get a proper hook
+	 * in kernel/sched.c.  Considered only temporary we only support these
+	 * hooks for the 32-bit kernel - there is no MIPS64 MT processor atm.
+	 */
+	sys	mipsmt_sys_sched_setaffinity	3
+	sys	mipsmt_sys_sched_getaffinity	3
+#else
 	sys	sys_sched_setaffinity	3
 	sys	sys_sched_getaffinity	3	/* 4240 */
+#endif /* CONFIG_MIPS_MT_FPAFF */
 	sys	sys_io_setup		2
 	sys	sys_io_destroy		1
 	sys	sys_io_getevents	5
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index dcbfd27..bcf1b10 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -529,7 +529,10 @@
 
 int __init fpu_disable(char *s)
 {
-	cpu_data[0].options &= ~MIPS_CPU_FPU;
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		cpu_data[i].options &= ~MIPS_CPU_FPU;
 
 	return 1;
 }
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 19b8e4b..5777090 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -150,6 +150,11 @@
 	unsigned long val;
 	int i, num;
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/* If we have an FPU, enroll ourselves in the FPU-full mask */
+	if (cpu_has_fpu)
+		cpu_set(0, mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
 	if (!cpu_has_mipsmt)
 		return;
 
@@ -312,6 +317,12 @@
 {
 	write_c0_compare(read_c0_count() + (8* mips_hpt_frequency/HZ));
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/* If we have an FPU, enroll ourselves in the FPU-full mask */
+	if (cpu_has_fpu)
+		cpu_set(smp_processor_id(), mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
 	local_irq_enable();
 }
 
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 6336fe8..e9902d8 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -758,6 +758,36 @@
 						&current->thread.fpu.soft);
 			if (sig)
 				force_sig(sig, current);
+#ifdef CONFIG_MIPS_MT_FPAFF
+			else {
+			/*
+			 * MIPS MT processors may have fewer FPU contexts
+			 * than CPU threads. If we've emulated more than
+			 * some threshold number of instructions, force
+			 * migration to a "CPU" that has FP support.
+			 */
+			 if(mt_fpemul_threshold > 0
+			 && ((current->thread.emulated_fp++
+			    > mt_fpemul_threshold))) {
+			  /*
+			   * If there's no FPU present, or if the
+			   * application has already restricted
+			   * the allowed set to exclude any CPUs
+			   * with FPUs, we'll skip the procedure.
+			   */
+			  if (cpus_intersects(current->cpus_allowed,
+			  			mt_fpu_cpumask)) {
+			    cpumask_t tmask;
+
+			    cpus_and(tmask,
+					current->thread.user_cpus_allowed,
+					mt_fpu_cpumask);
+			    set_cpus_allowed(current, tmask);
+			    current->thread.mflags |= MF_FPUBOUND;
+			  }
+			 }
+			}
+#endif /* CONFIG_MIPS_MT_FPAFF */
 		}
 
 		return;
diff --git a/include/asm-mips/cpu-features.h b/include/asm-mips/cpu-features.h
index 3f2b6d9..254e11e 100644
--- a/include/asm-mips/cpu-features.h
+++ b/include/asm-mips/cpu-features.h
@@ -40,7 +40,7 @@
 #define cpu_has_sb1_cache	(cpu_data[0].options & MIPS_CPU_SB1_CACHE)
 #endif
 #ifndef cpu_has_fpu
-#define cpu_has_fpu		(cpu_data[0].options & MIPS_CPU_FPU)
+#define cpu_has_fpu		(current_cpu_data.options & MIPS_CPU_FPU)
 #endif
 #ifndef cpu_has_32fpr
 #define cpu_has_32fpr		(cpu_data[0].options & MIPS_CPU_32FPR)
diff --git a/include/asm-mips/fpu.h b/include/asm-mips/fpu.h
index 9c828b1..b0f5001 100644
--- a/include/asm-mips/fpu.h
+++ b/include/asm-mips/fpu.h
@@ -21,6 +21,10 @@
 #include <asm/processor.h>
 #include <asm/current.h>
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+#include <asm/mips_mt.h>
+#endif
+
 struct sigcontext;
 struct sigcontext32;
 
diff --git a/include/asm-mips/processor.h b/include/asm-mips/processor.h
index 7866513..0fb75f0 100644
--- a/include/asm-mips/processor.h
+++ b/include/asm-mips/processor.h
@@ -134,6 +134,12 @@
 
 	/* Saved fpu/fpu emulator stuff. */
 	union mips_fpu_union fpu;
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/* Emulated instruction count */
+	unsigned long emulated_fp;
+	/* Saved per-thread scheduler affinity mask */
+	cpumask_t user_cpus_allowed;
+#endif /* CONFIG_MIPS_MT_FPAFF */
 
 	/* Saved state of the DSP ASE, if available. */
 	struct mips_dsp_state dsp;
@@ -159,6 +165,12 @@
 #define MF_N32		MF_32BIT_ADDR
 #define MF_N64		0
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+#define FPAFF_INIT 0, INIT_CPUMASK,
+#else
+#define FPAFF_INIT
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
 #define INIT_THREAD  { \
         /* \
          * saved main processor registers \
@@ -174,6 +186,10 @@
 	 */ \
 	INIT_FPU, \
 	/* \
+	 * fpu affinity state (null if not FPAFF) \
+	 */ \
+	FPAFF_INIT \
+	/* \
 	 * saved dsp/dsp emulator stuff \
 	 */ \
 	INIT_DSP, \
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index 3902669..261f71d 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -155,6 +155,37 @@
 
 struct task_struct;
 
+#ifdef CONFIG_MIPS_MT_FPAFF
+
+/*
+ * Handle the scheduler resume end of FPU affinity management.  We do this
+ * inline to try to keep the overhead down. If we have been forced to run on
+ * a "CPU" with an FPU because of a previous high level of FP computation,
+ * but did not actually use the FPU during the most recent time-slice (CU1
+ * isn't set), we undo the restriction on cpus_allowed.
+ *
+ * We're not calling set_cpus_allowed() here, because we have no need to
+ * force prompt migration - we're already switching the current CPU to a
+ * different thread.
+ */
+
+#define switch_to(prev,next,last)					\
+do {									\
+	if (cpu_has_fpu &&						\
+	    (prev->thread.mflags & MF_FPUBOUND) &&			\
+	     (!(KSTK_STATUS(prev) & ST0_CU1))) {			\
+		prev->thread.mflags &= ~MF_FPUBOUND;			\
+		prev->cpus_allowed = prev->thread.user_cpus_allowed;	\
+	}								\
+	if (cpu_has_dsp)						\
+		__save_dsp(prev);					\
+	next->thread.emulated_fp = 0;					\
+	(last) = resume(prev, next, next->thread_info);			\
+	if (cpu_has_dsp)						\
+		__restore_dsp(current);					\
+} while(0)
+
+#else
 #define switch_to(prev,next,last)					\
 do {									\
 	if (cpu_has_dsp)						\
@@ -163,6 +194,7 @@
 	if (cpu_has_dsp)						\
 		__restore_dsp(current);					\
 } while(0)
+#endif
 
 /*
  * On SMP systems, when the scheduler does migration-cost autodetection,