Linux Thumb-2 support for user-space applications

This patch implements Thumb-2 application support in Linux. Original
implementation by Paul Brook with fixes for VFP and Neon by Catalin
Marinas.

Signed-off-by: Paul Brook <paul@codesourcery.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 5e647eb..6fd1460 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -462,10 +462,6 @@
 __und_usr:
 	usr_entry
 
-	tst	r3, #PSR_T_BIT			@ Thumb mode?
-	bne	__und_usr_unknown		@ ignore FP
-	sub	r4, r2, #4
-
 	@
 	@ fall through to the emulation code, which returns using r9 if
 	@ it has emulated the instruction, or the more conventional lr
@@ -475,7 +471,24 @@
 	@
 	adr	r9, ret_from_exception
 	adr	lr, __und_usr_unknown
-1:	ldrt	r0, [r4]
+	tst	r3, #PSR_T_BIT			@ Thumb mode?
+	subeq	r4, r2, #4			@ ARM instr at LR - 4
+	subne	r4, r2, #2			@ Thumb instr at LR - 2
+1:	ldreqt	r0, [r4]
+	beq	call_fpe
+	@ Thumb instruction
+#if __LINUX_ARM_ARCH__ >= 7
+2:	ldrht	r5, [r4], #2
+	and	r0, r5, #0xf800			@ mask bits 111x x... .... ....
+	cmp	r0, #0xe800			@ 32bit instruction if xx != 0
+	blo	__und_usr_unknown
+3:	ldrht	r0, [r4]
+	add	r2, r2, #2			@ r2 is PC + 2, make it PC + 4
+	orr	r0, r0, r5, lsl #16
+#else
+	b	__und_usr_unknown
+#endif
+
 	@
 	@ fallthrough to call_fpe
 	@
@@ -484,10 +497,14 @@
  * The out of line fixup for the ldrt above.
  */
 	.section .fixup, "ax"
-2:	mov	pc, r9
+4:	mov	pc, r9
 	.previous
 	.section __ex_table,"a"
-	.long	1b, 2b
+	.long	1b, 4b
+#if __LINUX_ARM_ARCH__ >= 7
+	.long	2b, 4b
+	.long	3b, 4b
+#endif
 	.previous
 
 /*
@@ -514,9 +531,16 @@
  *  r10 = this threads thread_info structure.
  *  lr  = unrecognised instruction return address
  */
+	@
+	@ Fall-through from Thumb-2 __und_usr
+	@
+#ifdef CONFIG_NEON
+	adr	r6, .LCneon_thumb_opcodes
+	b	2f
+#endif
 call_fpe:
 #ifdef CONFIG_NEON
-	adr	r6, .LCneon_opcodes
+	adr	r6, .LCneon_arm_opcodes
 2:
 	ldr	r7, [r6], #4			@ mask value
 	cmp	r7, #0				@ end mask?
@@ -533,6 +557,7 @@
 1:
 #endif
 	tst	r0, #0x08000000			@ only CDP/CPRT/LDC/STC have bit 27
+	tstne	r0, #0x04000000			@ bit 26 set on both ARM and Thumb-2
 #if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710)
 	and	r8, r0, #0x0f000000		@ mask out op-code bits
 	teqne	r8, #0x0f000000			@ SWI (ARM6/7 bug)?
@@ -584,7 +609,7 @@
 #ifdef CONFIG_NEON
 	.align	6
 
-.LCneon_opcodes:
+.LCneon_arm_opcodes:
 	.word	0xfe000000			@ mask
 	.word	0xf2000000			@ opcode
 
@@ -593,6 +618,16 @@
 
 	.word	0x00000000			@ mask
 	.word	0x00000000			@ opcode
+
+.LCneon_thumb_opcodes:
+	.word	0xef000000			@ mask
+	.word	0xef000000			@ opcode
+
+	.word	0xff100000			@ mask
+	.word	0xf9000000			@ opcode
+
+	.word	0x00000000			@ mask
+	.word	0x00000000			@ opcode
 #endif
 
 do_fpe: