powerpc: Put FP/VSX and VR state into structures

This creates new 'thread_fp_state' and 'thread_vr_state' structures
to store FP/VSX state (including FPSCR) and Altivec/VSX state
(including VSCR), and uses them in the thread_struct.  In the
thread_fp_state, the FPRs and VSRs are represented as u64 rather
than double, since we rarely perform floating-point computations
on the values, and this will enable the structures to be used
in KVM code as well.  Similarly FPSCR is now a u64 rather than
a structure of two 32-bit values.

This takes the offsets out of the macros such as SAVE_32FPRS,
REST_32FPRS, etc.  This enables the same macros to be used for normal
and transactional state, enabling us to delete the transactional
versions of the macros.   This also removes the unused do_load_up_fpu
and do_load_up_altivec, which were in fact buggy since they didn't
create large enough stack frames to account for the fact that
load_up_fpu and load_up_altivec are not designed to be called from C
and assume that their caller's stack frame is an interrupt frame.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 5995457..140f670 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -98,123 +98,40 @@
 #define REST_8GPRS(n, base)	REST_4GPRS(n, base); REST_4GPRS(n+4, base)
 #define REST_10GPRS(n, base)	REST_8GPRS(n, base); REST_2GPRS(n+8, base)
 
-#define SAVE_FPR(n, base)	stfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#define SAVE_FPR(n, base)	stfd	n,8*TS_FPRWIDTH*(n)(base)
 #define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
 #define SAVE_4FPRS(n, base)	SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
 #define SAVE_8FPRS(n, base)	SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
 #define SAVE_16FPRS(n, base)	SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
 #define SAVE_32FPRS(n, base)	SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
-#define REST_FPR(n, base)	lfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#define REST_FPR(n, base)	lfd	n,8*TS_FPRWIDTH*(n)(base)
 #define REST_2FPRS(n, base)	REST_FPR(n, base); REST_FPR(n+1, base)
 #define REST_4FPRS(n, base)	REST_2FPRS(n, base); REST_2FPRS(n+2, base)
 #define REST_8FPRS(n, base)	REST_4FPRS(n, base); REST_4FPRS(n+4, base)
 #define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
 #define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
 
-#define SAVE_VR(n,b,base)	li b,THREAD_VR0+(16*(n));  stvx n,base,b
+#define SAVE_VR(n,b,base)	li b,16*(n);  stvx n,base,b
 #define SAVE_2VRS(n,b,base)	SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
 #define SAVE_4VRS(n,b,base)	SAVE_2VRS(n,b,base); SAVE_2VRS(n+2,b,base)
 #define SAVE_8VRS(n,b,base)	SAVE_4VRS(n,b,base); SAVE_4VRS(n+4,b,base)
 #define SAVE_16VRS(n,b,base)	SAVE_8VRS(n,b,base); SAVE_8VRS(n+8,b,base)
 #define SAVE_32VRS(n,b,base)	SAVE_16VRS(n,b,base); SAVE_16VRS(n+16,b,base)
-#define REST_VR(n,b,base)	li b,THREAD_VR0+(16*(n)); lvx n,base,b
+#define REST_VR(n,b,base)	li b,16*(n); lvx n,base,b
 #define REST_2VRS(n,b,base)	REST_VR(n,b,base); REST_VR(n+1,b,base)
 #define REST_4VRS(n,b,base)	REST_2VRS(n,b,base); REST_2VRS(n+2,b,base)
 #define REST_8VRS(n,b,base)	REST_4VRS(n,b,base); REST_4VRS(n+4,b,base)
 #define REST_16VRS(n,b,base)	REST_8VRS(n,b,base); REST_8VRS(n+8,b,base)
 #define REST_32VRS(n,b,base)	REST_16VRS(n,b,base); REST_16VRS(n+16,b,base)
 
-/* Save/restore FPRs, VRs and VSRs from their checkpointed backups in
- * thread_struct:
- */
-#define SAVE_FPR_TRANSACT(n, base)	stfd n,THREAD_TRANSACT_FPR0+	\
-					8*TS_FPRWIDTH*(n)(base)
-#define SAVE_2FPRS_TRANSACT(n, base)	SAVE_FPR_TRANSACT(n, base);	\
-					SAVE_FPR_TRANSACT(n+1, base)
-#define SAVE_4FPRS_TRANSACT(n, base)	SAVE_2FPRS_TRANSACT(n, base);	\
-					SAVE_2FPRS_TRANSACT(n+2, base)
-#define SAVE_8FPRS_TRANSACT(n, base)	SAVE_4FPRS_TRANSACT(n, base);	\
-					SAVE_4FPRS_TRANSACT(n+4, base)
-#define SAVE_16FPRS_TRANSACT(n, base)	SAVE_8FPRS_TRANSACT(n, base);	\
-					SAVE_8FPRS_TRANSACT(n+8, base)
-#define SAVE_32FPRS_TRANSACT(n, base)	SAVE_16FPRS_TRANSACT(n, base);	\
-					SAVE_16FPRS_TRANSACT(n+16, base)
-
-#define REST_FPR_TRANSACT(n, base)	lfd	n,THREAD_TRANSACT_FPR0+	\
-					8*TS_FPRWIDTH*(n)(base)
-#define REST_2FPRS_TRANSACT(n, base)	REST_FPR_TRANSACT(n, base);	\
-					REST_FPR_TRANSACT(n+1, base)
-#define REST_4FPRS_TRANSACT(n, base)	REST_2FPRS_TRANSACT(n, base);	\
-					REST_2FPRS_TRANSACT(n+2, base)
-#define REST_8FPRS_TRANSACT(n, base)	REST_4FPRS_TRANSACT(n, base);	\
-					REST_4FPRS_TRANSACT(n+4, base)
-#define REST_16FPRS_TRANSACT(n, base)	REST_8FPRS_TRANSACT(n, base);	\
-					REST_8FPRS_TRANSACT(n+8, base)
-#define REST_32FPRS_TRANSACT(n, base)	REST_16FPRS_TRANSACT(n, base);	\
-					REST_16FPRS_TRANSACT(n+16, base)
-
-
-#define SAVE_VR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VR0+(16*(n)); \
-					stvx n,b,base
-#define SAVE_2VRS_TRANSACT(n,b,base)	SAVE_VR_TRANSACT(n,b,base);	\
-					SAVE_VR_TRANSACT(n+1,b,base)
-#define SAVE_4VRS_TRANSACT(n,b,base)	SAVE_2VRS_TRANSACT(n,b,base);	\
-					SAVE_2VRS_TRANSACT(n+2,b,base)
-#define SAVE_8VRS_TRANSACT(n,b,base)	SAVE_4VRS_TRANSACT(n,b,base);	\
-					SAVE_4VRS_TRANSACT(n+4,b,base)
-#define SAVE_16VRS_TRANSACT(n,b,base)	SAVE_8VRS_TRANSACT(n,b,base);	\
-					SAVE_8VRS_TRANSACT(n+8,b,base)
-#define SAVE_32VRS_TRANSACT(n,b,base)	SAVE_16VRS_TRANSACT(n,b,base);	\
-					SAVE_16VRS_TRANSACT(n+16,b,base)
-
-#define REST_VR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VR0+(16*(n)); \
-					lvx n,b,base
-#define REST_2VRS_TRANSACT(n,b,base)	REST_VR_TRANSACT(n,b,base);	\
-					REST_VR_TRANSACT(n+1,b,base)
-#define REST_4VRS_TRANSACT(n,b,base)	REST_2VRS_TRANSACT(n,b,base);	\
-					REST_2VRS_TRANSACT(n+2,b,base)
-#define REST_8VRS_TRANSACT(n,b,base)	REST_4VRS_TRANSACT(n,b,base);	\
-					REST_4VRS_TRANSACT(n+4,b,base)
-#define REST_16VRS_TRANSACT(n,b,base)	REST_8VRS_TRANSACT(n,b,base);	\
-					REST_8VRS_TRANSACT(n+8,b,base)
-#define REST_32VRS_TRANSACT(n,b,base)	REST_16VRS_TRANSACT(n,b,base);	\
-					REST_16VRS_TRANSACT(n+16,b,base)
-
-
-#define SAVE_VSR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VSR0+(16*(n)); \
-					STXVD2X(n,R##base,R##b)
-#define SAVE_2VSRS_TRANSACT(n,b,base)	SAVE_VSR_TRANSACT(n,b,base);	\
-	                                SAVE_VSR_TRANSACT(n+1,b,base)
-#define SAVE_4VSRS_TRANSACT(n,b,base)	SAVE_2VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_2VSRS_TRANSACT(n+2,b,base)
-#define SAVE_8VSRS_TRANSACT(n,b,base)	SAVE_4VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_4VSRS_TRANSACT(n+4,b,base)
-#define SAVE_16VSRS_TRANSACT(n,b,base)	SAVE_8VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_8VSRS_TRANSACT(n+8,b,base)
-#define SAVE_32VSRS_TRANSACT(n,b,base)	SAVE_16VSRS_TRANSACT(n,b,base);	\
-	                                SAVE_16VSRS_TRANSACT(n+16,b,base)
-
-#define REST_VSR_TRANSACT(n,b,base)	li b,THREAD_TRANSACT_VSR0+(16*(n)); \
-					LXVD2X(n,R##base,R##b)
-#define REST_2VSRS_TRANSACT(n,b,base)	REST_VSR_TRANSACT(n,b,base);    \
-	                                REST_VSR_TRANSACT(n+1,b,base)
-#define REST_4VSRS_TRANSACT(n,b,base)	REST_2VSRS_TRANSACT(n,b,base);	\
-	                                REST_2VSRS_TRANSACT(n+2,b,base)
-#define REST_8VSRS_TRANSACT(n,b,base)	REST_4VSRS_TRANSACT(n,b,base);	\
-	                                REST_4VSRS_TRANSACT(n+4,b,base)
-#define REST_16VSRS_TRANSACT(n,b,base)	REST_8VSRS_TRANSACT(n,b,base);	\
-	                                REST_8VSRS_TRANSACT(n+8,b,base)
-#define REST_32VSRS_TRANSACT(n,b,base)	REST_16VSRS_TRANSACT(n,b,base);	\
-	                                REST_16VSRS_TRANSACT(n+16,b,base)
-
 /* Save the lower 32 VSRs in the thread VSR region */
-#define SAVE_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n));  STXVD2X(n,R##base,R##b)
+#define SAVE_VSR(n,b,base)	li b,16*(n);  STXVD2X(n,R##base,R##b)
 #define SAVE_2VSRS(n,b,base)	SAVE_VSR(n,b,base); SAVE_VSR(n+1,b,base)
 #define SAVE_4VSRS(n,b,base)	SAVE_2VSRS(n,b,base); SAVE_2VSRS(n+2,b,base)
 #define SAVE_8VSRS(n,b,base)	SAVE_4VSRS(n,b,base); SAVE_4VSRS(n+4,b,base)
 #define SAVE_16VSRS(n,b,base)	SAVE_8VSRS(n,b,base); SAVE_8VSRS(n+8,b,base)
 #define SAVE_32VSRS(n,b,base)	SAVE_16VSRS(n,b,base); SAVE_16VSRS(n+16,b,base)
-#define REST_VSR(n,b,base)	li b,THREAD_VSR0+(16*(n)); LXVD2X(n,R##base,R##b)
+#define REST_VSR(n,b,base)	li b,16*(n); LXVD2X(n,R##base,R##b)
 #define REST_2VSRS(n,b,base)	REST_VSR(n,b,base); REST_VSR(n+1,b,base)
 #define REST_4VSRS(n,b,base)	REST_2VSRS(n,b,base); REST_2VSRS(n+2,b,base)
 #define REST_8VSRS(n,b,base)	REST_4VSRS(n,b,base); REST_4VSRS(n+4,b,base)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ce4de5a..afe695e 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -144,8 +144,20 @@
 
 #define TS_FPROFFSET 0
 #define TS_VSRLOWOFFSET 1
-#define TS_FPR(i) fpr[i][TS_FPROFFSET]
-#define TS_TRANS_FPR(i) transact_fpr[i][TS_FPROFFSET]
+#define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
+#define TS_TRANS_FPR(i) transact_fp.fpr[i][TS_FPROFFSET]
+
+/* FP and VSX 0-31 register set */
+struct thread_fp_state {
+	u64	fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
+	u64	fpscr;		/* Floating point status */
+};
+
+/* Complete AltiVec register set including VSCR */
+struct thread_vr_state {
+	vector128	vr[32] __attribute__((aligned(16)));
+	vector128	vscr __attribute__((aligned(16)));
+};
 
 struct thread_struct {
 	unsigned long	ksp;		/* Kernel stack pointer */
@@ -198,13 +210,7 @@
 	unsigned long	dvc2;
 #endif
 #endif
-	/* FP and VSX 0-31 register set */
-	double		fpr[32][TS_FPRWIDTH] __attribute__((aligned(16)));
-	struct {
-
-		unsigned int pad;
-		unsigned int val;	/* Floating point status */
-	} fpscr;
+	struct thread_fp_state	fp_state;
 	int		fpexc_mode;	/* floating-point exception mode */
 	unsigned int	align_ctl;	/* alignment handling control */
 #ifdef CONFIG_PPC64
@@ -222,10 +228,7 @@
 	struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
 	unsigned long	trap_nr;	/* last trap # on this thread */
 #ifdef CONFIG_ALTIVEC
-	/* Complete AltiVec register set */
-	vector128	vr[32] __attribute__((aligned(16)));
-	/* AltiVec status */
-	vector128	vscr __attribute__((aligned(16)));
+	struct thread_vr_state vr_state;
 	unsigned long	vrsave;
 	int		used_vr;	/* set if process has used altivec */
 #endif /* CONFIG_ALTIVEC */
@@ -262,13 +265,8 @@
 	 * transact_fpr[] is the new set of transactional values.
 	 * VRs work the same way.
 	 */
-	double		transact_fpr[32][TS_FPRWIDTH];
-	struct {
-		unsigned int pad;
-		unsigned int val;	/* Floating point status */
-	} transact_fpscr;
-	vector128	transact_vr[32] __attribute__((aligned(16)));
-	vector128	transact_vscr __attribute__((aligned(16)));
+	struct thread_fp_state transact_fp;
+	struct thread_vr_state transact_vr;
 	unsigned long	transact_vrsave;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
@@ -322,8 +320,6 @@
 	.ksp = INIT_SP, \
 	.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
 	.fs = KERNEL_DS, \
-	.fpr = {{0}}, \
-	.fpscr = { .val = 0, }, \
 	.fpexc_mode = 0, \
 	.ppr = INIT_PPR, \
 }
diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h
index 3a7a67a..d89beab 100644
--- a/arch/powerpc/include/asm/sfp-machine.h
+++ b/arch/powerpc/include/asm/sfp-machine.h
@@ -125,7 +125,7 @@
 #define FP_EX_DIVZERO         (1 << (31 - 5))
 #define FP_EX_INEXACT         (1 << (31 - 6))
 
-#define __FPU_FPSCR	(current->thread.fpscr.val)
+#define __FPU_FPSCR	(current->thread.fp_state.fpscr)
 
 /* We only actually write to the destination register
  * if exceptions signalled (if any) will not trap.