ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible

This patch fixes aes-armv4.S and sha1-armv4-large.S to work
natively in Thumb.  This allows ARM/Thumb interworking workarounds
to be removed.

I also take the opportunity to convert some explicit assembler
directives for exported functions to the standard
ENTRY()/ENDPROC().

For the code itself:

  * In sha1_block_data_order, use of TEQ with sp is deprecated in
    ARMv7 and not supported in Thumb.  For the branches back to
    .L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the
    assumption that clobbering the C flag here will not cause
    incorrect behaviour.

    For the first branch back to .L_20_39_or_60_79 the C flag is
    important, so sp is moved temporarily into another register so
    that TEQ can be used for the comparison.

  * In the AES code, most forms of register-indexed addressing with
    shifts and rotates are not permitted for loads and stores in
    Thumb, so the address calculation is done using a separate
    instruction for the Thumb case.

The resulting code is unlikely to be optimally scheduled, but it
should not have a large impact given the overall size of the code.
I haven't run any benchmarks.

Signed-off-by: Dave Martin <dave.martin@linaro.org>
Tested-by: David McCullough <ucdevel@gmail.com> (ARM only)
Acked-by: David McCullough <ucdevel@gmail.com>
Acked-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S
index e59b1d5..19d6cd6 100644
--- a/arch/arm/crypto/aes-armv4.S
+++ b/arch/arm/crypto/aes-armv4.S
@@ -34,8 +34,9 @@
 @ A little glue here to select the correct code below for the ARM CPU
 @ that is being targetted.
 
+#include <linux/linkage.h>
+
 .text
-.code	32
 
 .type	AES_Te,%object
 .align	5
@@ -145,10 +146,8 @@
 
 @ void AES_encrypt(const unsigned char *in, unsigned char *out,
 @ 		 const AES_KEY *key) {
-.global AES_encrypt
-.type   AES_encrypt,%function
 .align	5
-AES_encrypt:
+ENTRY(AES_encrypt)
 	sub	r3,pc,#8		@ AES_encrypt
 	stmdb   sp!,{r1,r4-r12,lr}
 	mov	r12,r0		@ inp
@@ -239,15 +238,8 @@
 	strb	r6,[r12,#14]
 	strb	r3,[r12,#15]
 #endif
-#if __ARM_ARCH__>=5
 	ldmia	sp!,{r4-r12,pc}
-#else
-	ldmia   sp!,{r4-r12,lr}
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-.size	AES_encrypt,.-AES_encrypt
+ENDPROC(AES_encrypt)
 
 .type   _armv4_AES_encrypt,%function
 .align	2
@@ -386,10 +378,8 @@
 	ldr	pc,[sp],#4		@ pop and return
 .size	_armv4_AES_encrypt,.-_armv4_AES_encrypt
 
-.global private_AES_set_encrypt_key
-.type   private_AES_set_encrypt_key,%function
 .align	5
-private_AES_set_encrypt_key:
+ENTRY(private_AES_set_encrypt_key)
 _armv4_AES_set_encrypt_key:
 	sub	r3,pc,#8		@ AES_set_encrypt_key
 	teq	r0,#0
@@ -658,15 +648,11 @@
 
 .Ldone:	mov	r0,#0
 	ldmia   sp!,{r4-r12,lr}
-.Labrt:	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-.size	private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
+.Labrt:	mov	pc,lr
+ENDPROC(private_AES_set_encrypt_key)
 
-.global private_AES_set_decrypt_key
-.type   private_AES_set_decrypt_key,%function
 .align	5
-private_AES_set_decrypt_key:
+ENTRY(private_AES_set_decrypt_key)
 	str	lr,[sp,#-4]!            @ push lr
 #if 0
 	@ kernel does both of these in setkey so optimise this bit out by
@@ -748,15 +734,8 @@
 	bne	.Lmix
 
 	mov	r0,#0
-#if __ARM_ARCH__>=5
 	ldmia	sp!,{r4-r12,pc}
-#else
-	ldmia   sp!,{r4-r12,lr}
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-.size	private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
+ENDPROC(private_AES_set_decrypt_key)
 
 .type	AES_Td,%object
 .align	5
@@ -862,10 +841,8 @@
 
 @ void AES_decrypt(const unsigned char *in, unsigned char *out,
 @ 		 const AES_KEY *key) {
-.global AES_decrypt
-.type   AES_decrypt,%function
 .align	5
-AES_decrypt:
+ENTRY(AES_decrypt)
 	sub	r3,pc,#8		@ AES_decrypt
 	stmdb   sp!,{r1,r4-r12,lr}
 	mov	r12,r0		@ inp
@@ -956,15 +933,8 @@
 	strb	r6,[r12,#14]
 	strb	r3,[r12,#15]
 #endif
-#if __ARM_ARCH__>=5
 	ldmia	sp!,{r4-r12,pc}
-#else
-	ldmia   sp!,{r4-r12,lr}
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-.size	AES_decrypt,.-AES_decrypt
+ENDPROC(AES_decrypt)
 
 .type   _armv4_AES_decrypt,%function
 .align	2
@@ -1064,7 +1034,9 @@
 	and	r9,lr,r1,lsr#8
 
 	ldrb	r7,[r10,r7]		@ Td4[s1>>0]
-	ldrb	r1,[r10,r1,lsr#24]	@ Td4[s1>>24]
+ ARM(	ldrb	r1,[r10,r1,lsr#24]  )	@ Td4[s1>>24]
+ THUMB(	add	r1,r10,r1,lsr#24    ) 	@ Td4[s1>>24]
+ THUMB(	ldrb	r1,[r1]		    )
 	ldrb	r8,[r10,r8]		@ Td4[s1>>16]
 	eor	r0,r7,r0,lsl#24
 	ldrb	r9,[r10,r9]		@ Td4[s1>>8]
@@ -1077,7 +1049,9 @@
 	ldrb	r8,[r10,r8]		@ Td4[s2>>0]
 	and	r9,lr,r2,lsr#16
 
-	ldrb	r2,[r10,r2,lsr#24]	@ Td4[s2>>24]
+ ARM(	ldrb	r2,[r10,r2,lsr#24]  )	@ Td4[s2>>24]
+ THUMB(	add	r2,r10,r2,lsr#24    )	@ Td4[s2>>24]
+ THUMB(	ldrb	r2,[r2]		    )
 	eor	r0,r0,r7,lsl#8
 	ldrb	r9,[r10,r9]		@ Td4[s2>>16]
 	eor	r1,r8,r1,lsl#16
@@ -1090,7 +1064,9 @@
 	and	r9,lr,r3		@ i2
 
 	ldrb	r9,[r10,r9]		@ Td4[s3>>0]
-	ldrb	r3,[r10,r3,lsr#24]	@ Td4[s3>>24]
+ ARM(	ldrb	r3,[r10,r3,lsr#24]  )	@ Td4[s3>>24]
+ THUMB(	add	r3,r10,r3,lsr#24    )	@ Td4[s3>>24]
+ THUMB(	ldrb	r3,[r3]		    )
 	eor	r0,r0,r7,lsl#16
 	ldr	r7,[r11,#0]
 	eor	r1,r1,r8,lsl#8