Thumb-2: Implement the unified arch/arm/lib functions

This patch adds the ARM/Thumb-2 unified support for the arch/arm/lib/*
files.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>



diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 1acd1da..2b60c7d 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -138,3 +138,76 @@
 	msr	cpsr_c, #\mode
 	.endm
 #endif
+
+/*
+ * STRT/LDRT access macros with ARM and Thumb-2 variants
+ */
+#ifdef CONFIG_THUMB2_KERNEL
+
+	.macro	usraccoff, instr, reg, ptr, inc, off, cond, abort
+9999:
+	.if	\inc == 1
+	\instr\cond\()bt \reg, [\ptr, #\off]
+	.elseif	\inc == 4
+	\instr\cond\()t \reg, [\ptr, #\off]
+	.else
+	.error	"Unsupported inc macro argument"
+	.endif
+
+	.section __ex_table,"a"
+	.align	3
+	.long	9999b, \abort
+	.previous
+	.endm
+
+	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort
+	@ explicit IT instruction needed because of the label
+	@ introduced by the USER macro
+	.ifnc	\cond,al
+	.if	\rept == 1
+	itt	\cond
+	.elseif	\rept == 2
+	ittt	\cond
+	.else
+	.error	"Unsupported rept macro argument"
+	.endif
+	.endif
+
+	@ Slightly optimised to avoid incrementing the pointer twice
+	usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort
+	.if	\rept == 2
+	usraccoff \instr, \reg, \ptr, \inc, 4, \cond, \abort
+	.endif
+
+	add\cond \ptr, #\rept * \inc
+	.endm
+
+#else	/* !CONFIG_THUMB2_KERNEL */
+
+	.macro	usracc, instr, reg, ptr, inc, cond, rept, abort
+	.rept	\rept
+9999:
+	.if	\inc == 1
+	\instr\cond\()bt \reg, [\ptr], #\inc
+	.elseif	\inc == 4
+	\instr\cond\()t \reg, [\ptr], #\inc
+	.else
+	.error	"Unsupported inc macro argument"
+	.endif
+
+	.section __ex_table,"a"
+	.align	3
+	.long	9999b, \abort
+	.previous
+	.endr
+	.endm
+
+#endif	/* CONFIG_THUMB2_KERNEL */
+
+	.macro	strusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
+	usracc	str, \reg, \ptr, \inc, \cond, \rept, \abort
+	.endm
+
+	.macro	ldrusr, reg, ptr, inc, cond=al, rept=1, abort=9001f
+	usracc	ldr, \reg, \ptr, \inc, \cond, \rept, \abort
+	.endm
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 0da9bc9..1d6bd40 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -17,6 +17,7 @@
 #include <asm/memory.h>
 #include <asm/domain.h>
 #include <asm/system.h>
+#include <asm/unified.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
@@ -365,8 +366,10 @@
 
 #define __put_user_asm_dword(x,__pu_addr,err)			\
 	__asm__ __volatile__(					\
-	"1:	strt	" __reg_oper1 ", [%1], #4\n"		\
-	"2:	strt	" __reg_oper0 ", [%1]\n"		\
+ ARM(	"1:	strt	" __reg_oper1 ", [%1], #4\n"	)	\
+ ARM(	"2:	strt	" __reg_oper0 ", [%1]\n"	)	\
+ THUMB(	"1:	strt	" __reg_oper1 ", [%1]\n"	)	\
+ THUMB(	"2:	strt	" __reg_oper0 ", [%1, #4]\n"	)	\
 	"3:\n"							\
 	"	.section .fixup,\"ax\"\n"			\
 	"	.align	2\n"					\
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
index 1154d92..638deb1 100644
--- a/arch/arm/lib/ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -43,7 +43,9 @@
 	rsb	ip, r2, #32
 	movmi	ah, ah, lsl r2
 	movpl	ah, al, lsl r3
-	orrmi	ah, ah, al, lsr ip
+ ARM(	orrmi	ah, ah, al, lsr ip	)
+ THUMB(	lsrmi	r3, al, ip		)
+ THUMB(	orrmi	ah, ah, r3		)
 	mov	al, al, lsl r2
 	mov	pc, lr
 
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
index 9f8b355..015e8aa5 100644
--- a/arch/arm/lib/ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -43,7 +43,9 @@
 	rsb	ip, r2, #32
 	movmi	al, al, lsr r2
 	movpl	al, ah, asr r3
-	orrmi	al, al, ah, lsl ip
+ ARM(	orrmi	al, al, ah, lsl ip	)
+ THUMB(	lslmi	r3, ah, ip		)
+ THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, asr r2
 	mov	pc, lr
 
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index b0951d0..aaf7220 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -38,7 +38,9 @@
 		beq	no_frame		@ we have no stack frames
 
 		tst	r1, #0x10		@ 26 or 32-bit mode?
-		moveq	mask, #0xfc000003	@ mask for 26-bit
+ ARM(		moveq	mask, #0xfc000003	)
+ THUMB(		moveq	mask, #0xfc000000	)
+ THUMB(		orreq	mask, #0x03		)
 		movne	mask, #0		@ mask for 32-bit
 
 1:		stmfd	sp!, {pc}		@ calculate offset of PC stored
@@ -126,7 +128,9 @@
 		mov	reg, #10
 		mov	r7, #0
 1:		mov	r3, #1
-		tst	instr, r3, lsl reg
+ ARM(		tst	instr, r3, lsl reg	)
+ THUMB(		lsl	r3, reg			)
+ THUMB(		tst	instr, r3		)
 		beq	2f
 		add	r7, r7, #1
 		teq	r7, #6
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
index 844f567..1279abd 100644
--- a/arch/arm/lib/clear_user.S
+++ b/arch/arm/lib/clear_user.S
@@ -27,21 +27,20 @@
 		ands	ip, r0, #3
 		beq	1f
 		cmp	ip, #2
-USER(		strbt	r2, [r0], #1)
-USER(		strlebt	r2, [r0], #1)
-USER(		strltbt	r2, [r0], #1)
+		strusr	r2, r0, 1
+		strusr	r2, r0, 1, le
+		strusr	r2, r0, 1, lt
 		rsb	ip, ip, #4
 		sub	r1, r1, ip		@  7  6  5  4  3  2  1
 1:		subs	r1, r1, #8		@ -1 -2 -3 -4 -5 -6 -7
-USER(		strplt	r2, [r0], #4)
-USER(		strplt	r2, [r0], #4)
+		strusr	r2, r0, 4, pl, rept=2
 		bpl	1b
 		adds	r1, r1, #4		@  3  2  1  0 -1 -2 -3
-USER(		strplt	r2, [r0], #4)
+		strusr	r2, r0, 4, pl
 2:		tst	r1, #2			@ 1x 1x 0x 0x 1x 1x 0x
-USER(		strnebt	r2, [r0], #1)
-USER(		strnebt	r2, [r0], #1)
+		strusr	r2, r0, 1, ne, rept=2
 		tst	r1, #1			@ x1 x0 x1 x0 x1 x0 x1
+		it	ne			@ explicit IT needed for the label
 USER(		strnebt	r2, [r0])
 		mov	r0, #0
 		ldmfd	sp!, {r1, pc}
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 56799a1..e4fe124 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -33,11 +33,15 @@
  *	Number of bytes NOT copied.
  */
 
+#ifndef CONFIG_THUMB2_KERNEL
+#define LDR1W_SHIFT	0
+#else
+#define LDR1W_SHIFT	1
+#endif
+#define STR1W_SHIFT	0
+
 	.macro ldr1w ptr reg abort
-100:	ldrt \reg, [\ptr], #4
-	.section __ex_table, "a"
-	.long 100b, \abort
-	.previous
+	ldrusr	\reg, \ptr, 4, abort=\abort
 	.endm
 
 	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
@@ -53,14 +57,11 @@
 	.endm
 
 	.macro ldr1b ptr reg cond=al abort
-100:	ldr\cond\()bt \reg, [\ptr], #1
-	.section __ex_table, "a"
-	.long 100b, \abort
-	.previous
+	ldrusr	\reg, \ptr, 1, \cond, abort=\abort
 	.endm
 
 	.macro str1w ptr reg abort
-	str \reg, [\ptr], #4
+	W(str) \reg, [\ptr], #4
 	.endm
 
 	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 139cce6..805e3f8 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -57,6 +57,13 @@
  *
  *	Restore registers with the values previously saved with the
  *	'preserv' macro. Called upon code termination.
+ *
+ * LDR1W_SHIFT
+ * STR1W_SHIFT
+ *
+ *	Correction to be applied to the "ip" register when branching into
+ *	the ldr1w or str1w instructions (some of these macros may expand to
+ *	than one 32bit instruction in Thumb-2)
  */
 
 
@@ -99,9 +106,15 @@
 
 5:		ands	ip, r2, #28
 		rsb	ip, ip, #32
+#if LDR1W_SHIFT > 0
+		lsl	ip, ip, #LDR1W_SHIFT
+#endif
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
-6:		nop
+6:
+		.rept	(1 << LDR1W_SHIFT)
+		W(nop)
+		.endr
 		ldr1w	r1, r3, abort=20f
 		ldr1w	r1, r4, abort=20f
 		ldr1w	r1, r5, abort=20f
@@ -110,9 +123,16 @@
 		ldr1w	r1, r8, abort=20f
 		ldr1w	r1, lr, abort=20f
 
+#if LDR1W_SHIFT < STR1W_SHIFT
+		lsl	ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
+#elif LDR1W_SHIFT > STR1W_SHIFT
+		lsr	ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
+#endif
 		add	pc, pc, ip
 		nop
-		nop
+		.rept	(1 << STR1W_SHIFT)
+		W(nop)
+		.endr
 		str1w	r0, r3, abort=20f
 		str1w	r0, r4, abort=20f
 		str1w	r0, r5, abort=20f
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index 878820f..1a71e15 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -33,8 +33,15 @@
  *	Number of bytes NOT copied.
  */
 
+#define LDR1W_SHIFT	0
+#ifndef CONFIG_THUMB2_KERNEL
+#define STR1W_SHIFT	0
+#else
+#define STR1W_SHIFT	1
+#endif
+
 	.macro ldr1w ptr reg abort
-	ldr \reg, [\ptr], #4
+	W(ldr) \reg, [\ptr], #4
 	.endm
 
 	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
@@ -50,10 +57,7 @@
 	.endm
 
 	.macro str1w ptr reg abort
-100:	strt \reg, [\ptr], #4
-	.section __ex_table, "a"
-	.long 100b, \abort
-	.previous
+	strusr	\reg, \ptr, 4, abort=\abort
 	.endm
 
 	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
@@ -68,10 +72,7 @@
 	.endm
 
 	.macro str1b ptr reg cond=al abort
-100:	str\cond\()bt \reg, [\ptr], #1
-	.section __ex_table, "a"
-	.long 100b, \abort
-	.previous
+	strusr	\reg, \ptr, 1, \cond, abort=\abort
 	.endm
 
 	.macro enter reg1 reg2
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index 14677fb..fd0e9dc 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -26,50 +26,28 @@
 		.endm
 
 		.macro	load1b,	reg1
-9999:		ldrbt	\reg1, [r0], $1
-		.section __ex_table, "a"
-		.align	3
-		.long	9999b, 6001f
-		.previous
+		ldrusr	\reg1, r0, 1
 		.endm
 
 		.macro	load2b, reg1, reg2
-9999:		ldrbt	\reg1, [r0], $1
-9998:		ldrbt	\reg2, [r0], $1
-		.section __ex_table, "a"
-		.long	9999b, 6001f
-		.long	9998b, 6001f
-		.previous
+		ldrusr	\reg1, r0, 1
+		ldrusr	\reg2, r0, 1
 		.endm
 
 		.macro	load1l, reg1
-9999:		ldrt	\reg1, [r0], $4
-		.section __ex_table, "a"
-		.align	3
-		.long	9999b, 6001f
-		.previous
+		ldrusr	\reg1, r0, 4
 		.endm
 
 		.macro	load2l, reg1, reg2
-9999:		ldrt	\reg1, [r0], $4
-9998:		ldrt	\reg2, [r0], $4
-		.section __ex_table, "a"
-		.long	9999b, 6001f
-		.long	9998b, 6001f
-		.previous
+		ldrusr	\reg1, r0, 4
+		ldrusr	\reg2, r0, 4
 		.endm
 
 		.macro	load4l, reg1, reg2, reg3, reg4
-9999:		ldrt	\reg1, [r0], $4
-9998:		ldrt	\reg2, [r0], $4
-9997:		ldrt	\reg3, [r0], $4
-9996:		ldrt	\reg4, [r0], $4
-		.section __ex_table, "a"
-		.long	9999b, 6001f
-		.long	9998b, 6001f
-		.long	9997b, 6001f
-		.long	9996b, 6001f
-		.previous
+		ldrusr	\reg1, r0, 4
+		ldrusr	\reg2, r0, 4
+		ldrusr	\reg3, r0, 4
+		ldrusr	\reg4, r0, 4
 		.endm
 
 /*
@@ -92,14 +70,14 @@
  */
 		.section .fixup,"ax"
 		.align	4
-6001:		mov	r4, #-EFAULT
+9001:		mov	r4, #-EFAULT
 		ldr	r5, [fp, #4]		@ *err_ptr
 		str	r4, [r5]
 		ldmia	sp, {r1, r2}		@ retrieve dst, len
 		add	r2, r2, r1
 		mov	r0, #0			@ zero the buffer
-6002:		teq	r2, r1
+9002:		teq	r2, r1
 		strneb	r0, [r1], #1
-		bne	6002b
+		bne	9002b
 		load_regs
 		.previous
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
index 1425e78..faa7748 100644
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -177,7 +177,9 @@
 	mov	yh, xh, lsr ip
 	mov	yl, xl, lsr ip
 	rsb	ip, ip, #32
-	orr	yl, yl, xh, lsl ip
+ ARM(	orr	yl, yl, xh, lsl ip	)
+ THUMB(	lsl	xh, xh, ip		)
+ THUMB(	orr	yl, yl, xh		)
 	mov	xh, xl, lsl ip
 	mov	xh, xh, lsr ip
 	mov	pc, lr
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
index 8c4defc..1e4cbd4 100644
--- a/arch/arm/lib/findbit.S
+++ b/arch/arm/lib/findbit.S
@@ -25,7 +25,10 @@
 		teq	r1, #0	
 		beq	3f
 		mov	r2, #0
-1:		ldrb	r3, [r0, r2, lsr #3]
+1:
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		eors	r3, r3, #0xff		@ invert bits
 		bne	.L_found		@ any now set - found zero bit
 		add	r2, r2, #8		@ next bit pointer
@@ -44,7 +47,9 @@
 		beq	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
-		ldrb	r3, [r0, r2, lsr #3]
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		eor	r3, r3, #0xff		@ now looking for a 1 bit
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
@@ -61,7 +66,10 @@
 		teq	r1, #0	
 		beq	3f
 		mov	r2, #0
-1:		ldrb	r3, [r0, r2, lsr #3]
+1:
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		movs	r3, r3
 		bne	.L_found		@ any now set - found zero bit
 		add	r2, r2, #8		@ next bit pointer
@@ -80,7 +88,9 @@
 		beq	3b
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
-		ldrb	r3, [r0, r2, lsr #3]
+ ARM(		ldrb	r3, [r0, r2, lsr #3]	)
+ THUMB(		lsr	r3, r2, #3		)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
 		orr	r2, r2, #7		@ if zero, then no bits here
@@ -95,7 +105,9 @@
 		beq	3f
 		mov	r2, #0
 1:		eor	r3, r2, #0x18		@ big endian byte ordering
-		ldrb	r3, [r0, r3, lsr #3]
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		eors	r3, r3, #0xff		@ invert bits
 		bne	.L_found		@ any now set - found zero bit
 		add	r2, r2, #8		@ next bit pointer
@@ -111,7 +123,9 @@
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
 		eor	r3, r2, #0x18		@ big endian byte ordering
-		ldrb	r3, [r0, r3, lsr #3]
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		eor	r3, r3, #0xff		@ now looking for a 1 bit
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
@@ -125,7 +139,9 @@
 		beq	3f
 		mov	r2, #0
 1:		eor	r3, r2, #0x18		@ big endian byte ordering
-		ldrb	r3, [r0, r3, lsr #3]
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		movs	r3, r3
 		bne	.L_found		@ any now set - found zero bit
 		add	r2, r2, #8		@ next bit pointer
@@ -141,7 +157,9 @@
 		ands	ip, r2, #7
 		beq	1b			@ If new byte, goto old routine
 		eor	r3, r2, #0x18		@ big endian byte ordering
-		ldrb	r3, [r0, r3, lsr #3]
+ ARM(		ldrb	r3, [r0, r3, lsr #3]	)
+ THUMB(		lsr	r3, #3			)
+ THUMB(		ldrb	r3, [r0, r3]		)
 		movs	r3, r3, lsr ip		@ shift off unused bits
 		bne	.L_found
 		orr	r2, r2, #7		@ if zero, then no bits here
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index 6763088..a1814d9 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -36,8 +36,13 @@
 ENDPROC(__get_user_1)
 
 ENTRY(__get_user_2)
+#ifdef CONFIG_THUMB2_KERNEL
+2:	ldrbt	r2, [r0]
+3:	ldrbt	r3, [r0, #1]
+#else
 2:	ldrbt	r2, [r0], #1
 3:	ldrbt	r3, [r0]
+#endif
 #ifndef __ARMEB__
 	orr	r2, r2, r3, lsl #8
 #else
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
index d658561..ff4f71b 100644
--- a/arch/arm/lib/io-writesw-armv4.S
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -75,7 +75,10 @@
 #endif
 
 .Loutsw_noalign:
-		ldr	r3, [r1, -r3]!
+ ARM(		ldr	r3, [r1, -r3]!	)
+ THUMB(		rsb	r3, r3, #0	)
+ THUMB(		ldr	r3, [r1, r3]	)
+ THUMB(		sub	r1, r3		)
 		subcs	r2, r2, #1
 		bcs	2f
 		subs	r2, r2, #2
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
index 99ea338..f83d449 100644
--- a/arch/arm/lib/lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -43,7 +43,9 @@
 	rsb	ip, r2, #32
 	movmi	al, al, lsr r2
 	movpl	al, ah, lsr r3
-	orrmi	al, al, ah, lsl ip
+ ARM(	orrmi	al, al, ah, lsl ip	)
+ THUMB(	lslmi	r3, ah, ip		)
+ THUMB(	orrmi	al, al, r3		)
 	mov	ah, ah, lsr r2
 	mov	pc, lr
 
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index e0d0026..a9b9e22 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -13,8 +13,11 @@
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
+#define LDR1W_SHIFT	0
+#define STR1W_SHIFT	0
+
 	.macro ldr1w ptr reg abort
-	ldr \reg, [\ptr], #4
+	W(ldr) \reg, [\ptr], #4
 	.endm
 
 	.macro ldr4w ptr reg1 reg2 reg3 reg4 abort
@@ -30,7 +33,7 @@
 	.endm
 
 	.macro str1w ptr reg abort
-	str \reg, [\ptr], #4
+	W(str) \reg, [\ptr], #4
 	.endm
 
 	.macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 1254918..5025c86 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -75,24 +75,24 @@
 		addne	pc, pc, ip		@ C is always clear here
 		b	7f
 6:		nop
-		ldr	r3, [r1, #-4]!
-		ldr	r4, [r1, #-4]!
-		ldr	r5, [r1, #-4]!
-		ldr	r6, [r1, #-4]!
-		ldr	r7, [r1, #-4]!
-		ldr	r8, [r1, #-4]!
-		ldr	lr, [r1, #-4]!
+		W(ldr)	r3, [r1, #-4]!
+		W(ldr)	r4, [r1, #-4]!
+		W(ldr)	r5, [r1, #-4]!
+		W(ldr)	r6, [r1, #-4]!
+		W(ldr)	r7, [r1, #-4]!
+		W(ldr)	r8, [r1, #-4]!
+		W(ldr)	lr, [r1, #-4]!
 
 		add	pc, pc, ip
 		nop
 		nop
-		str	r3, [r0, #-4]!
-		str	r4, [r0, #-4]!
-		str	r5, [r0, #-4]!
-		str	r6, [r0, #-4]!
-		str	r7, [r0, #-4]!
-		str	r8, [r0, #-4]!
-		str	lr, [r0, #-4]!
+		W(str)	r3, [r0, #-4]!
+		W(str)	r4, [r0, #-4]!
+		W(str)	r5, [r0, #-4]!
+		W(str)	r6, [r0, #-4]!
+		W(str)	r7, [r0, #-4]!
+		W(str)	r8, [r0, #-4]!
+		W(str)	lr, [r0, #-4]!
 
 	CALGN(	bcs	2b			)
 
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
index 864f3c1..02fedbf 100644
--- a/arch/arm/lib/putuser.S
+++ b/arch/arm/lib/putuser.S
@@ -37,6 +37,15 @@
 
 ENTRY(__put_user_2)
 	mov	ip, r2, lsr #8
+#ifdef CONFIG_THUMB2_KERNEL
+#ifndef __ARMEB__
+2:	strbt	r2, [r0]
+3:	strbt	ip, [r0, #1]
+#else
+2:	strbt	ip, [r0]
+3:	strbt	r2, [r0, #1]
+#endif
+#else	/* !CONFIG_THUMB2_KERNEL */
 #ifndef __ARMEB__
 2:	strbt	r2, [r0], #1
 3:	strbt	ip, [r0]
@@ -44,6 +53,7 @@
 2:	strbt	ip, [r0], #1
 3:	strbt	r2, [r0]
 #endif
+#endif	/* CONFIG_THUMB2_KERNEL */
 	mov	r0, #0
 	mov	pc, lr
 ENDPROC(__put_user_2)
@@ -55,8 +65,13 @@
 ENDPROC(__put_user_4)
 
 ENTRY(__put_user_8)
+#ifdef CONFIG_THUMB2_KERNEL
+5:	strt	r2, [r0]
+6:	strt	r3, [r0, #4]
+#else
 5:	strt	r2, [r0], #4
 6:	strt	r3, [r0]
+#endif
 	mov	r0, #0
 	mov	pc, lr
 ENDPROC(__put_user_8)
diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S
index 330373c..1c9814f 100644
--- a/arch/arm/lib/strncpy_from_user.S
+++ b/arch/arm/lib/strncpy_from_user.S
@@ -23,7 +23,7 @@
 ENTRY(__strncpy_from_user)
 	mov	ip, r1
 1:	subs	r2, r2, #1
-USER(	ldrplbt	r3, [r1], #1)
+	ldrusr	r3, r1, 1, pl
 	bmi	2f
 	strb	r3, [r0], #1
 	teq	r3, #0
diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S
index 90bb9d0..7855b29 100644
--- a/arch/arm/lib/strnlen_user.S
+++ b/arch/arm/lib/strnlen_user.S
@@ -23,7 +23,7 @@
 ENTRY(__strnlen_user)
 	mov	r2, r0
 1:
-USER(	ldrbt	r3, [r0], #1)
+	ldrusr	r3, r0, 1
 	teq	r3, #0
 	beq	2f
 	subs	r1, r1, #1