MIPS: Basic MSA context switching support

This patch adds support for context switching the MSA vector registers.
These 128 bit vector registers are aliased with the FP registers - an
FP register accesses the least significant bits of the vector register
with which it is aliased (ie. the register with the same index). Due to
both this & the requirement that the scalar FPU must be 64-bit (FR=1) if
enabled at the same time as MSA the kernel will enable MSA & scalar FP
at the same time for tasks which use MSA. If we restore the MSA vector
context then we might as well enable the scalar FPU since the reason it
was left disabled was to allow for lazy FP context restoring - but we
just restored the FP context as it's a subset of the vector context. If
we restore the FP context and have previously used MSA then we have to
restore the whole vector context anyway (see comment in
enable_restore_fp_context for details) so similarly we might as well
enable MSA.

Thus if a task does not use MSA then it will continue to behave as
without this patch - the scalar FP context will be saved & restored as
usual. But if a task executes an MSA instruction then it will save &
restore the vector context forever more.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/6431/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index cc78dd9..f938ecd 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -29,18 +29,8 @@
 #define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)
 
 /*
- * FPU context is saved iff the process has used it's FPU in the current
- * time slice as indicated by _TIF_USEDFPU.  In any case, the CU1 bit for user
- * space STATUS register should be 0, so that a process *always* starts its
- * userland with FPU disabled after each context switch.
- *
- * FPU will be enabled as soon as the process accesses FPU again, through
- * do_cpu() trap.
- */
-
-/*
  * task_struct *resume(task_struct *prev, task_struct *next,
- *		       struct thread_info *next_ti, int usedfpu)
+ *		       struct thread_info *next_ti, s32 fp_save)
  */
 	.align	5
 	LEAF(resume)
@@ -50,23 +40,37 @@
 	LONG_S	ra, THREAD_REG31(a0)
 
 	/*
-	 * check if we need to save FPU registers
+	 * Check whether we need to save any FP context. FP context is saved
+	 * iff the process has used the context with the scalar FPU or the MSA
+	 * ASE in the current time slice, as indicated by _TIF_USEDFPU and
+	 * _TIF_USEDMSA respectively. switch_to will have set fp_save
+	 * accordingly to an FP_SAVE_ enum value.
 	 */
+	beqz	a3, 2f
 
-	beqz	a3, 1f
-
-	PTR_L	t3, TASK_THREAD_INFO(a0)
 	/*
-	 * clear saved user stack CU1 bit
+	 * We do. Clear the saved CU1 bit for prev, such that next time it is
+	 * scheduled it will start in userland with the FPU disabled. If the
+	 * task uses the FPU then it will be enabled again via the do_cpu trap.
+	 * This allows us to lazily restore the FP context.
 	 */
+	PTR_L	t3, TASK_THREAD_INFO(a0)
 	LONG_L	t0, ST_OFF(t3)
 	li	t1, ~ST0_CU1
 	and	t0, t0, t1
 	LONG_S	t0, ST_OFF(t3)
 
+	/* Check whether we're saving scalar or vector context. */
+	bgtz	a3, 1f
+
+	/* Save 128b MSA vector context. */
+	msa_save_all	a0
+	b	2f
+
+1:	/* Save 32b/64b scalar FP context. */
 	fpu_save_double a0 t0 t1		# c0_status passed in t0
 						# clobbers t1
-1:
+2:
 
 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	PTR_LA	t8, __stack_chk_guard
@@ -141,6 +145,26 @@
 	jr	ra
 	END(_restore_fp)
 
+#ifdef CONFIG_CPU_HAS_MSA
+
+/*
+ * Save a thread's MSA vector context.
+ */
+LEAF(_save_msa)
+	msa_save_all	a0
+	jr	ra
+	END(_save_msa)
+
+/*
+ * Restore a thread's MSA vector context.
+ */
+LEAF(_restore_msa)
+	msa_restore_all	a0
+	jr	ra
+	END(_restore_msa)
+
+#endif
+
 /*
  * Load the FPU with signalling NANS.  This bit pattern we're using has
  * the property that no matter whether considered as single or as double