Merge master.kernel.org:/pub/scm/linux/kernel/git/herbert/crypto-2.6

* master.kernel.org:/pub/scm/linux/kernel/git/herbert/crypto-2.6:
  [CRYPTO] aes: Fixed array boundary violation
  [CRYPTO] tcrypt: Fix key alignment
  [CRYPTO] all: Add missing cra_alignmask
  [CRYPTO] all: Use kzalloc where possible
  [CRYPTO] api: Align tfm context as wide as possible
  [CRYPTO] twofish: Use rol32/ror32 where appropriate
diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c
index fb1b961..6f77e770 100644
--- a/arch/x86_64/crypto/aes.c
+++ b/arch/x86_64/crypto/aes.c
@@ -77,12 +77,11 @@
 struct aes_ctx
 {
 	u32 key_length;
-	u32 E[60];
-	u32 D[60];
+	u32 buf[120];
 };
 
-#define E_KEY ctx->E
-#define D_KEY ctx->D
+#define E_KEY (&ctx->buf[0])
+#define D_KEY (&ctx->buf[60])
 
 static u8 pow_tab[256] __initdata;
 static u8 log_tab[256] __initdata;
diff --git a/crypto/aes.c b/crypto/aes.c
index 0a6a5c1..a501729 100644
--- a/crypto/aes.c
+++ b/crypto/aes.c
@@ -75,12 +75,11 @@
 
 struct aes_ctx {
 	int key_length;
-	u32 E[60];
-	u32 D[60];
+	u32 buf[120];
 };
 
-#define E_KEY ctx->E
-#define D_KEY ctx->D
+#define E_KEY (&ctx->buf[0])
+#define D_KEY (&ctx->buf[60])
 
 static u8 pow_tab[256] __initdata;
 static u8 log_tab[256] __initdata;
diff --git a/crypto/api.c b/crypto/api.c
index e26156f..80bba63 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -165,7 +165,7 @@
 		break;
 	}
 
-	return len + alg->cra_alignmask;
+	return len + (alg->cra_alignmask & ~(crypto_tfm_ctx_alignment() - 1));
 }
 
 struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
@@ -179,12 +179,10 @@
 		goto out;
 
 	tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags);
-	tfm = kmalloc(tfm_size, GFP_KERNEL);
+	tfm = kzalloc(tfm_size, GFP_KERNEL);
 	if (tfm == NULL)
 		goto out_put;
 
-	memset(tfm, 0, tfm_size);
-	
 	tfm->__crt_alg = alg;
 	
 	if (crypto_init_flags(tfm, flags))
diff --git a/crypto/deflate.c b/crypto/deflate.c
index bc73342..f209368 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -73,12 +73,11 @@
 	int ret = 0;
 	struct z_stream_s *stream = &ctx->decomp_stream;
 
-	stream->workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
+	stream->workspace = kzalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
 	if (!stream->workspace ) {
 		ret = -ENOMEM;
 		goto out;
 	}
-	memset(stream->workspace, 0, zlib_inflate_workspacesize());
 	ret = zlib_inflateInit2(stream, -DEFLATE_DEF_WINBITS);
 	if (ret != Z_OK) {
 		ret = -EINVAL;
diff --git a/crypto/des.c b/crypto/des.c
index 7bb5486..2d74cab 100644
--- a/crypto/des.c
+++ b/crypto/des.c
@@ -965,6 +965,7 @@
 	.cra_blocksize		=	DES3_EDE_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct des3_ede_ctx),
 	.cra_module		=	THIS_MODULE,
+	.cra_alignmask		=	3,
 	.cra_list		=	LIST_HEAD_INIT(des3_ede_alg.cra_list),
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	DES3_EDE_KEY_SIZE,
diff --git a/crypto/serpent.c b/crypto/serpent.c
index 52ad1a4..e366406 100644
--- a/crypto/serpent.c
+++ b/crypto/serpent.c
@@ -481,6 +481,7 @@
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	SERPENT_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct serpent_ctx),
+	.cra_alignmask		=	3,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(serpent_alg.cra_list),
 	.cra_u			=	{ .cipher = {
diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h
index 733d07e..1f683ba 100644
--- a/crypto/tcrypt.h
+++ b/crypto/tcrypt.h
@@ -26,37 +26,38 @@
 #define MAX_IVLEN		32
 
 struct hash_testvec {
+	/* only used with keyed hash algorithms */
+	char key[128] __attribute__ ((__aligned__(4)));
 	char plaintext[128];
-	unsigned char psize;
 	char digest[MAX_DIGEST_SIZE];
-	unsigned char np;
 	unsigned char tap[MAX_TAP];
-	char key[128]; /* only used with keyed hash algorithms */
+	unsigned char psize;
+	unsigned char np;
 	unsigned char ksize;
 };
 
 struct hmac_testvec {
 	char key[128];
-	unsigned char ksize;
 	char plaintext[128];
-	unsigned char psize;
 	char digest[MAX_DIGEST_SIZE];
-	unsigned char np;
 	unsigned char tap[MAX_TAP];
+	unsigned char ksize;
+	unsigned char psize;
+	unsigned char np;
 };
 
 struct cipher_testvec {
-	unsigned char fail;
-	unsigned char wk; /* weak key flag */
-	char key[MAX_KEYLEN];
-	unsigned char klen;
+	char key[MAX_KEYLEN] __attribute__ ((__aligned__(4)));
 	char iv[MAX_IVLEN];
 	char input[48];
-	unsigned char ilen;
 	char result[48];
-	unsigned char rlen;
-	int np;
 	unsigned char tap[MAX_TAP];
+	int np;
+	unsigned char fail;
+	unsigned char wk; /* weak key flag */
+	unsigned char klen;
+	unsigned char ilen;
+	unsigned char rlen;
 };
 
 struct cipher_speed {
diff --git a/crypto/twofish.c b/crypto/twofish.c
index a26d885..ddfd5a3 100644
--- a/crypto/twofish.c
+++ b/crypto/twofish.c
@@ -44,6 +44,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/crypto.h>
+#include <linux/bitops.h>
 
 
 /* The large precomputed tables for the Twofish cipher (twofish.c)
@@ -542,9 +543,9 @@
 #define CALC_K(a, j, k, l, m, n) \
    x = CALC_K_2 (k, l, k, l, 0); \
    y = CALC_K_2 (m, n, m, n, 4); \
-   y = (y << 8) + (y >> 24); \
+   y = rol32(y, 8); \
    x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = (y << 9) + (y >> 23)
+   ctx->a[(j) + 1] = rol32(y, 9)
 
 #define CALC_K192_2(a, b, c, d, j) \
    CALC_K_2 (q0[a ^ key[(j) + 16]], \
@@ -555,9 +556,9 @@
 #define CALC_K192(a, j, k, l, m, n) \
    x = CALC_K192_2 (l, l, k, k, 0); \
    y = CALC_K192_2 (n, n, m, m, 4); \
-   y = (y << 8) + (y >> 24); \
+   y = rol32(y, 8); \
    x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = (y << 9) + (y >> 23)
+   ctx->a[(j) + 1] = rol32(y, 9)
 
 #define CALC_K256_2(a, b, j) \
    CALC_K192_2 (q1[b ^ key[(j) + 24]], \
@@ -568,9 +569,9 @@
 #define CALC_K256(a, j, k, l, m, n) \
    x = CALC_K256_2 (k, l, 0); \
    y = CALC_K256_2 (m, n, 4); \
-   y = (y << 8) + (y >> 24); \
+   y = rol32(y, 8); \
    x += y; y += x; ctx->a[j] = x; \
-   ctx->a[(j) + 1] = (y << 9) + (y >> 23)
+   ctx->a[(j) + 1] = rol32(y, 9)
 
 
 /* Macros to compute the g() function in the encryption and decryption
@@ -594,15 +595,15 @@
    x = G1 (a); y = G2 (b); \
    x += y; y += x + ctx->k[2 * (n) + 1]; \
    (c) ^= x + ctx->k[2 * (n)]; \
-   (c) = ((c) >> 1) + ((c) << 31); \
-   (d) = (((d) << 1)+((d) >> 31)) ^ y
+   (c) = ror32((c), 1); \
+   (d) = rol32((d), 1) ^ y
 
 #define DECROUND(n, a, b, c, d) \
    x = G1 (a); y = G2 (b); \
    x += y; y += x; \
    (d) ^= y + ctx->k[2 * (n) + 1]; \
-   (d) = ((d) >> 1) + ((d) << 31); \
-   (c) = (((c) << 1)+((c) >> 31)); \
+   (d) = ror32((d), 1); \
+   (c) = rol32((c), 1); \
    (c) ^= (x + ctx->k[2 * (n)])
 
 /* Encryption and decryption cycles; each one is simply two Feistel rounds
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 0c08c58..5158a9d 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -284,7 +284,11 @@
 
 static inline struct aes_ctx *aes_ctx(void *ctx)
 {
-	return (struct aes_ctx *)ALIGN((unsigned long)ctx, PADLOCK_ALIGNMENT);
+	unsigned long align = PADLOCK_ALIGNMENT;
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+	return (struct aes_ctx *)ALIGN((unsigned long)ctx, align);
 }
 
 static int
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index d88bf8a..0ab1bc1 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -229,6 +229,8 @@
 	} crt_u;
 	
 	struct crypto_alg *__crt_alg;
+
+	char __crt_ctx[] __attribute__ ((__aligned__));
 };
 
 /* 
@@ -301,7 +303,13 @@
 
 static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
 {
-	return (void *)&tfm[1];
+	return tfm->__crt_ctx;
+}
+
+static inline unsigned int crypto_tfm_ctx_alignment(void)
+{
+	struct crypto_tfm *tfm;
+	return __alignof__(tfm->__crt_ctx);
 }
 
 /*