[CRYPTO] rmd320: Fix endian issues

This patch fixes endian issues making rmd320 work
properly on big-endian machines.

Signed-off-by: Adrian-Ken Rueegsegger <rueegsegger@swiss-it.ch>
Acked-by: Sebastian Siewior <sebastian@breakpoint.cc>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/rmd320.c b/crypto/rmd320.c
index b39c054..5b172f8 100644
--- a/crypto/rmd320.c
+++ b/crypto/rmd320.c
@@ -47,7 +47,7 @@
 #define F5(x, y, z) (x ^ (y | ~z))
 
 #define ROUND(a, b, c, d, e, f, k, x, s)  { \
-	(a) += f((b), (c), (d)) + (x) + (k); \
+	(a) += f((b), (c), (d)) + le32_to_cpu(x) + (k); \
 	(a) = rol32((a), (s)) + (e); \
 	(c) = rol32((c), 10); \
 }
@@ -280,28 +280,6 @@
 	return;
 }
 
-static inline void le32_to_cpu_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		le32_to_cpus(buf);
-		buf++;
-	}
-}
-
-static inline void cpu_to_le32_array(u32 *buf, unsigned int words)
-{
-	while (words--) {
-		cpu_to_le32s(buf);
-		buf++;
-	}
-}
-
-static inline void rmd320_transform_helper(struct rmd320_ctx *ctx)
-{
-	le32_to_cpu_array(ctx->buffer, sizeof(ctx->buffer) / sizeof(u32));
-	rmd320_transform(ctx->state, ctx->buffer);
-}
-
 static void rmd320_init(struct crypto_tfm *tfm)
 {
 	struct rmd320_ctx *rctx = crypto_tfm_ctx(tfm);
@@ -340,13 +318,13 @@
 	memcpy((char *)rctx->buffer + (sizeof(rctx->buffer) - avail),
 	       data, avail);
 
-	rmd320_transform_helper(rctx);
+	rmd320_transform(rctx->state, rctx->buffer);
 	data += avail;
 	len -= avail;
 
 	while (len >= sizeof(rctx->buffer)) {
 		memcpy(rctx->buffer, data, sizeof(rctx->buffer));
-		rmd320_transform_helper(rctx);
+		rmd320_transform(rctx->state, rctx->buffer);
 		data += sizeof(rctx->buffer);
 		len -= sizeof(rctx->buffer);
 	}
@@ -358,10 +336,12 @@
 static void rmd320_final(struct crypto_tfm *tfm, u8 *out)
 {
 	struct rmd320_ctx *rctx = crypto_tfm_ctx(tfm);
-	u32 index, padlen;
+	u32 i, index, padlen;
 	u64 bits;
+	u32 *dst = (u32 *)out;
 	static const u8 padding[64] = { 0x80, };
-	bits = rctx->byte_count << 3;
+
+	bits = cpu_to_le64(rctx->byte_count << 3);
 
 	/* Pad out to 56 mod 64 */
 	index = rctx->byte_count & 0x3f;
@@ -372,7 +352,8 @@
 	rmd320_update(tfm, (const u8 *)&bits, sizeof(bits));
 
 	/* Store state in digest */
-	memcpy(out, rctx->state, sizeof(rctx->state));
+	for (i = 0; i < 10; i++)
+		dst[i] = cpu_to_le32(rctx->state[i]);
 
 	/* Wipe context */
 	memset(rctx, 0, sizeof(*rctx));