[ARM] 4583/1: ARMv7: Add VFPv3 support

This patch adds the support for VFPv3 (the kernel currently supports
VFPv2). The main difference is 32 double registers (compared to 16).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a04f507..f4eeb03 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -951,7 +951,7 @@
 
 config VFP
 	bool "VFP-format floating point maths"
-	depends on CPU_V6 || CPU_ARM926T
+	depends on CPU_V6 || CPU_ARM926T || CPU_V7
 	help
 	  Say Y to include VFP support code in the kernel. This is needed
 	  if your hardware includes a VFP unit.
@@ -961,6 +961,11 @@
 
 	  Say N if your target does not have VFP hardware.
 
+config VFPv3
+	bool
+	depends on VFP
+	default y if CPU_V7
+
 endmenu
 
 menu "Userspace binary formats"
diff --git a/arch/arm/vfp/vfp.h b/arch/arm/vfp/vfp.h
index 791d023..c85860b 100644
--- a/arch/arm/vfp/vfp.h
+++ b/arch/arm/vfp/vfp.h
@@ -265,7 +265,11 @@
  * which returns (double)0.0.  This is useful for the compare with
  * zero instructions.
  */
+#ifdef CONFIG_VFPv3
+#define VFP_REG_ZERO	32
+#else
 #define VFP_REG_ZERO	16
+#endif
 extern u64 vfp_get_double(unsigned int reg);
 extern void vfp_put_double(u64 val, unsigned int reg);
 
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 53d9f8e..353f9e5 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -99,12 +99,12 @@
 	DBGSTR1	"save old state %p", r4
 	cmp	r4, #0
 	beq	no_old_VFP_process
+	VFPFSTMIA r4, r5		@ save the working registers
 	VFPFMRX	r5, FPSCR		@ current status
 	tst	r1, #FPEXC_EX		@ is there additional state to save?
 	VFPFMRX	r6, FPINST, NE		@ FPINST (only if FPEXC.EX is set)
 	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
 	VFPFMRX	r8, FPINST2, NE		@ FPINST2 if needed (and present)
-	VFPFSTMIA r4 			@ save the working registers
 	stmia	r4, {r1, r5, r6, r8}	@ save FPEXC, FPSCR, FPINST, FPINST2
 					@ and point r4 at the word at the
 					@ start of the register dump
@@ -114,7 +114,7 @@
 	DBGSTR1	"load state %p", r10
 	str	r10, [r3, r11, lsl #2]	@ update the last_VFP_context pointer
 					@ Load the saved state back into the VFP
-	VFPFLDMIA r10	 		@ reload the working registers while
+	VFPFLDMIA r10, r5		@ reload the working registers while
 					@ FPEXC is in a safe state
 	ldmia	r10, {r1, r5, r6, r8}	@ load FPEXC, FPSCR, FPINST, FPINST2
 	tst	r1, #FPEXC_EX		@ is there additional state to restore?
@@ -174,12 +174,12 @@
 	@ r0 - save location
 	@ r1 - FPEXC
 	DBGSTR1	"save VFP state %p", r0
+	VFPFSTMIA r0, r2		@ save the working registers
 	VFPFMRX	r2, FPSCR		@ current status
 	tst	r1, #FPEXC_EX		@ is there additional state to save?
 	VFPFMRX	r3, FPINST, NE		@ FPINST (only if FPEXC.EX is set)
 	tstne	r1, #FPEXC_FP2V		@ is there an FPINST2 to read?
 	VFPFMRX	r12, FPINST2, NE	@ FPINST2 if needed (and present)
-	VFPFSTMIA r0 			@ save the working registers
 	stmia	r0, {r1, r2, r3, r12}	@ save FPEXC, FPSCR, FPINST, FPINST2
 	mov	pc, lr
 #endif
@@ -217,8 +217,15 @@
 	fmrrd	r0, r1, d\dr
 	mov	pc, lr
 	.endr
+#ifdef CONFIG_VFPv3
+	@ d16 - d31 registers
+	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	mrrc	p11, 3, r0, r1, c\dr	@ fmrrd	r0, r1, d\dr
+	mov	pc, lr
+	.endr
+#endif
 
-	@ virtual register 16 for compare with zero
+	@ virtual register 16 (or 32 if VFPv3) for compare with zero
 	mov	r0, #0
 	mov	r1, #0
 	mov	pc, lr
@@ -231,3 +238,10 @@
 	fmdrr	d\dr, r0, r1
 	mov	pc, lr
 	.endr
+#ifdef CONFIG_VFPv3
+	@ d16 - d31 registers
+	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+	mcrr	p11, 3, r1, r2, c\dr	@ fmdrr	r1, r2, d\dr
+	mov	pc, lr
+	.endr
+#endif
diff --git a/arch/arm/vfp/vfpinstr.h b/arch/arm/vfp/vfpinstr.h
index 7f343a4..15b95b5 100644
--- a/arch/arm/vfp/vfpinstr.h
+++ b/arch/arm/vfp/vfpinstr.h
@@ -52,11 +52,11 @@
 #define FEXT_TO_IDX(inst)	((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
 
 #define vfp_get_sd(inst)	((inst & 0x0000f000) >> 11 | (inst & (1 << 22)) >> 22)
-#define vfp_get_dd(inst)	((inst & 0x0000f000) >> 12)
+#define vfp_get_dd(inst)	((inst & 0x0000f000) >> 12 | (inst & (1 << 22)) >> 18)
 #define vfp_get_sm(inst)	((inst & 0x0000000f) << 1 | (inst & (1 << 5)) >> 5)
-#define vfp_get_dm(inst)	((inst & 0x0000000f))
+#define vfp_get_dm(inst)	((inst & 0x0000000f) | (inst & (1 << 5)) >> 1)
 #define vfp_get_sn(inst)	((inst & 0x000f0000) >> 15 | (inst & (1 << 7)) >> 7)
-#define vfp_get_dn(inst)	((inst & 0x000f0000) >> 16)
+#define vfp_get_dn(inst)	((inst & 0x000f0000) >> 16 | (inst & (1 << 7)) >> 3)
 
 #define vfp_single(inst)	(((inst) & 0x0000f00) == 0xa00)