crypto: serpent-sse2 - add lrw support

Patch adds LRW support for serpent-sse2 by using lrw_crypt(). Patch has been
tested with tcrypt and automated filesystem tests.

Tcrypt benchmarks results (serpent-sse2/serpent_generic speed ratios):

Benchmark results with tcrypt:

Intel Celeron T1600 (x86_64) (fam:6, model:15, step:13):
size    lrw-enc lrw-dec
16B     1.00x   0.96x
64B     1.01x   1.01x
256B    3.01x   2.97x
1024B   3.39x   3.33x
8192B   3.35x   3.33x

AMD Phenom II 1055T (x86_64) (fam:16, model:10):
size    lrw-enc lrw-dec
16B     0.98x   1.03x
64B     1.01x   1.04x
256B    2.10x   2.14x
1024B   2.28x   2.33x
8192B   2.30x   2.33x

Intel Atom N270 (i586):
size    lrw-enc lrw-dec
16B     0.97x   0.97x
64B     1.47x   1.50x
256B    1.72x   1.69x
1024B   1.88x   1.81x
8192B   1.84x   1.79x

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/serpent.c b/crypto/serpent.c
index eb61630..7cc2cae 100644
--- a/crypto/serpent.c
+++ b/crypto/serpent.c
@@ -206,9 +206,9 @@
 	x1 ^= x4;	x3 ^= x4;	x4 &= x0;	\
 	x4 ^= x2;
 
-int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
+int __serpent_setkey(struct serpent_ctx *ctx, const u8 *key,
+		     unsigned int keylen)
 {
-	struct serpent_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 *k = ctx->expkey;
 	u8  *k8 = (u8 *)k;
 	u32 r0,r1,r2,r3,r4;
@@ -349,6 +349,12 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__serpent_setkey);
+
+int serpent_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen)
+{
+	return __serpent_setkey(crypto_tfm_ctx(tfm), key, keylen);
+}
 EXPORT_SYMBOL_GPL(serpent_setkey);
 
 void __serpent_encrypt(struct serpent_ctx *ctx, u8 *dst, const u8 *src)