crypto: arc4 - improve performance by using u32 for ctx and variables
This patch changes u8 in struct arc4_ctx and variables to u32 (as AMD seems
to have problem with u8 array). Below are tcrypt results of old 1-byte block
cipher versus ecb(arc4) with u8 and ecb(arc4) with u32.
tcrypt results, x86-64 (speed ratios: new-u32/old, new-u8/old):
u32 u8
AMD Phenom II : x3.6 x2.7
Intel Core 2 : x2.0 x1.9
tcrypt results, i386 (speed ratios: new-u32/old, new-u8/old):
u32 u8
Intel Atom N260 : x1.5 x1.4
Cc: Jon Oberheide <jon@oberheide.org>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/arc4.c b/crypto/arc4.c
index 07913fc5..5a772c3 100644
--- a/crypto/arc4.c
+++ b/crypto/arc4.c
@@ -22,8 +22,8 @@
#define ARC4_BLOCK_SIZE 1
struct arc4_ctx {
- u8 S[256];
- u8 x, y;
+ u32 S[256];
+ u32 x, y;
};
static int arc4_set_key(struct crypto_tfm *tfm, const u8 *in_key,
@@ -39,7 +39,7 @@
ctx->S[i] = i;
for (i = 0; i < 256; i++) {
- u8 a = ctx->S[i];
+ u32 a = ctx->S[i];
j = (j + in_key[k] + a) & 0xff;
ctx->S[i] = ctx->S[j];
ctx->S[j] = a;
@@ -53,9 +53,9 @@
static void arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in,
unsigned int len)
{
- u8 *const S = ctx->S;
- u8 x, y, a, b;
- u8 ty, ta, tb;
+ u32 *const S = ctx->S;
+ u32 x, y, a, b;
+ u32 ty, ta, tb;
if (len == 0)
return;