[ARM] 3256/1: Make the function-returning ldm's use sp as the base register

Patch from Catalin Marinas

If the low interrupt latency mode is enabled for the CPU (from ARMv6
onwards), the ldm/stm instructions are no longer atomic. An ldm instruction
restoring the sp and pc registers can be interrupted immediately after sp
was updated but before the pc. If this happens, the CPU restores the base
register to the value before the ldm instruction but if the base register
is not sp, the interrupt routine will corrupt the stack and the restarted
ldm instruction will load garbage.

Note that future ARM cores might always run in the low interrupt latency
mode.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S
index 990ee63..21effe0 100644
--- a/arch/arm/lib/csumpartialcopy.S
+++ b/arch/arm/lib/csumpartialcopy.S
@@ -18,11 +18,13 @@
  */
 
 		.macro	save_regs
+		mov	ip, sp
 		stmfd	sp!, {r1, r4 - r8, fp, ip, lr, pc}
+		sub	fp, ip, #4
 		.endm
 
-		.macro	load_regs,flags
-		LOADREGS(\flags,fp,{r1, r4 - r8, fp, sp, pc})
+		.macro	load_regs
+		ldmfd	sp, {r1, r4 - r8, fp, sp, pc}
 		.endm
 
 		.macro	load1b, reg1