crypto: sha512 - Move message schedule W[80] to static percpu area

The message schedule W (u64[80]) is too big for the stack. In order
for this algorithm to be used with shash it is moved to a static
percpu area.

Signed-off-by: Adrian-Ken Rueegsegger <ken@codelabs.ch>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index bc36861..cb85516 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -18,16 +18,17 @@
 #include <linux/crypto.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
-
+#include <linux/percpu.h>
 #include <asm/byteorder.h>
 
 struct sha512_ctx {
 	u64 state[8];
 	u32 count[4];
 	u8 buf[128];
-	u64 W[80];
 };
 
+static DEFINE_PER_CPU(u64[80], msg_schedule);
+
 static inline u64 Ch(u64 x, u64 y, u64 z)
 {
         return z ^ (x & (y ^ z));
@@ -89,11 +90,12 @@
 }
 
 static void
-sha512_transform(u64 *state, u64 *W, const u8 *input)
+sha512_transform(u64 *state, const u8 *input)
 {
 	u64 a, b, c, d, e, f, g, h, t1, t2;
 
 	int i;
+	u64 *W = get_cpu_var(msg_schedule);
 
 	/* load the input */
         for (i = 0; i < 16; i++)
@@ -132,6 +134,8 @@
 
 	/* erase our data */
 	a = b = c = d = e = f = g = h = t1 = t2 = 0;
+	memset(W, 0, sizeof(__get_cpu_var(msg_schedule)));
+	put_cpu_var(msg_schedule);
 }
 
 static void
@@ -187,10 +191,10 @@
 	/* Transform as many times as possible. */
 	if (len >= part_len) {
 		memcpy(&sctx->buf[index], data, part_len);
-		sha512_transform(sctx->state, sctx->W, sctx->buf);
+		sha512_transform(sctx->state, sctx->buf);
 
 		for (i = part_len; i + 127 < len; i+=128)
-			sha512_transform(sctx->state, sctx->W, &data[i]);
+			sha512_transform(sctx->state, &data[i]);
 
 		index = 0;
 	} else {
@@ -199,9 +203,6 @@
 
 	/* Buffer remaining input */
 	memcpy(&sctx->buf[index], &data[i], len - i);
-
-	/* erase our data */
-	memset(sctx->W, 0, sizeof(sctx->W));
 }
 
 static void