[ARM] Fix csumpartial corner case

Ji-In Park discovered a bug in csumpartial which caused wrong
checksums with misaligned buffers.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
index cb5e370..fe797cf 100644
--- a/arch/arm/lib/csumpartial.S
+++ b/arch/arm/lib/csumpartial.S
@@ -39,6 +39,7 @@
 
 		/* we must have at least one byte. */
 		tst	buf, #1			@ odd address?
+		movne	sum, sum, ror #8
 		ldrneb	td0, [buf], #1
 		subne	len, len, #1
 		adcnes	sum, sum, td0, put_byte_1
@@ -103,6 +104,9 @@
 		cmp	len, #8			@ Ensure that we have at least
 		blo	.less8			@ 8 bytes to copy.
 
+		tst	buf, #1
+		movne	sum, sum, ror #8
+
 		adds	sum, sum, #0		@ C = 0
 		tst	buf, #3			@ Test destination alignment
 		blne	.not_aligned		@ aligh destination, return here