[ARM] 4584/2: ARMv7: Add Advanced SIMD (NEON) extension support

This patch enables the use of the Advanced SIMD (NEON) extension on
ARMv7. The NEON technology is a 64/128-bit hybrid SIMD architecture
for accelerating the performance of multimedia and signal processing
applications. The extension shares the registers with the VFP unit and
enabling/disabling and saving/restoring follow the same rules. In
addition, there are instructions that do not have the appropriate CP
number encoded, the checks being made in the call_fpe function.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index f4eeb03..709f9d3 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -966,6 +966,13 @@
 	depends on VFP
 	default y if CPU_V7
 
+config NEON
+	bool "Advanced SIMD (NEON) Extension support"
+	depends on VFPv3 && CPU_V7
+	help
+	  Say Y to include support code for NEON, the ARMv7 Advanced SIMD
+	  Extension.
+
 endmenu
 
 menu "Userspace binary formats"
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 29dec08..8de21f5 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -480,6 +480,13 @@
  * co-processor instructions.  However, we have to watch out
  * for the ARM6/ARM7 SWI bug.
  *
+ * NEON is a special case that has to be handled here. Not all
+ * NEON instructions are co-processor instructions, so we have
+ * to make a special case of checking for them. Plus, there's
+ * five groups of them, so we have a table of mask/opcode pairs
+ * to check against, and if any match then we branch off into the
+ * NEON handler code.
+ *
  * Emulators may wish to make use of the following registers:
  *  r0  = instruction opcode.
  *  r2  = PC+4
@@ -488,6 +495,23 @@
  *  lr  = unrecognised instruction return address
  */
 call_fpe:
+#ifdef CONFIG_NEON
+	adr	r6, .LCneon_opcodes
+2:
+	ldr	r7, [r6], #4			@ mask value
+	cmp	r7, #0				@ end mask?
+	beq	1f
+	and	r8, r0, r7
+	ldr	r7, [r6], #4			@ opcode bits matching in mask
+	cmp	r8, r7				@ NEON instruction?
+	bne	2b
+	get_thread_info r10
+	mov	r7, #1
+	strb	r7, [r10, #TI_USED_CP + 10]	@ mark CP#10 as used
+	strb	r7, [r10, #TI_USED_CP + 11]	@ mark CP#11 as used
+	b	do_vfp				@ let VFP handler handle this
+1:
+#endif
 	tst	r0, #0x08000000			@ only CDP/CPRT/LDC/STC have bit 27
 #if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710)
 	and	r8, r0, #0x0f000000		@ mask out op-code bits
@@ -537,6 +561,20 @@
 	mov	pc, lr				@ CP#14 (Debug)
 	mov	pc, lr				@ CP#15 (Control)
 
+#ifdef CONFIG_NEON
+	.align	6
+
+.LCneon_opcodes:
+	.word	0xfe000000			@ mask
+	.word	0xf2000000			@ opcode
+
+	.word	0xff100000			@ mask
+	.word	0xf4000000			@ opcode
+
+	.word	0x00000000			@ mask
+	.word	0x00000000			@ opcode
+#endif
+
 do_fpe:
 	enable_irq
 	ldr	r4, .LCfp