[CRYPTO] rmd128: Fix endian problems

This patch is based on Sebastian Siewior's patch and
fixes endian issues making rmd128 work properly on
big-endian machines.

Signed-off-by: Adrian-Ken Rueegsegger <rueegsegger@swiss-it.ch>
Acked-by: Sebastian Siewior <sebastian@breakpoint.cc>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/rmd128.c b/crypto/rmd128.c
index f72d2ce8..89a535a 100644
--- a/crypto/rmd128.c
+++ b/crypto/rmd128.c
@@ -44,7 +44,7 @@
 #define F4(x, y, z) (y ^ (z & (x ^ y)))	/* z ? x : y */
 
 #define ROUND(a, b, c, d, f, k, x, s)  { \
-	(a) += f((b), (c), (d)) + (x) + (k); \
+	(a) += f((b), (c), (d)) + le32_to_cpu(x) + (k); \
 	(a) = rol32((a), (s)); \
 }
 
@@ -218,28 +218,6 @@
 	return;
 }
 
-static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		le32_to_cpus(buf);
-		buf++;
-	}
-}
-
-static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		cpu_to_le32s(buf);
-		buf++;
-	}
-}
-
-static inline void rmd128_transform_helper(struct rmd128_ctx *ctx)
-{
-	le32_to_cpu_array(ctx->buffer, sizeof(ctx->buffer) / sizeof(u32));
-	rmd128_transform(ctx->state, ctx->buffer);
-}
-
 static void rmd128_init(struct crypto_tfm *tfm)
 {
 	struct rmd128_ctx *rctx = crypto_tfm_ctx(tfm);
@@ -272,13 +250,13 @@
 	memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail),
 	       data, avail);
 
-	rmd128_transform_helper(rctx);
+	rmd128_transform(rctx->state, rctx->buffer);
 	data += avail;
 	len -= avail;
 
 	while (len >= sizeof(rctx->buffer)) {
 		memcpy(rctx->buffer, data, sizeof(rctx->buffer));
-		rmd128_transform_helper(rctx);
+		rmd128_transform(rctx->state, rctx->buffer);
 		data += sizeof(rctx->buffer);
 		len -= sizeof(rctx->buffer);
 	}
@@ -290,10 +268,12 @@
 static void rmd128_final(struct crypto_tfm *tfm, u8 *out)
 {
 	struct rmd128_ctx *rctx = crypto_tfm_ctx(tfm);
-	u32 index, padlen;
+	u32 i, index, padlen;
 	u64 bits;
+	u32 *dst = (u32 *)out;
 	static const u8 padding[64] = { 0x80, };
-	bits = rctx->byte_count << 3;
+
+	bits = cpu_to_le64(rctx->byte_count << 3);
 
 	/* Pad out to 56 mod 64 */
 	index = rctx->byte_count & 0x3f;
@@ -304,7 +284,8 @@
 	rmd128_update(tfm, (const u8 *)&bits, sizeof(bits));
 
 	/* Store state in digest */
-	memcpy(out, rctx->state, sizeof(rctx->state));
+	for (i = 0; i < 4; i++)
+		dst[i] = cpu_to_le32(rctx->state[i]);
 
 	/* Wipe context */
 	memset(rctx, 0, sizeof(*rctx));