[CRYPTO] Add alignmask for low-level cipher implementations

The VIA Padlock device requires the input and output buffers to
be aligned on 16-byte boundaries.  This patch adds the alignmask
attribute for low-level cipher implementations to indicate their
alignment requirements.

The mid-level crypt() function will copy the input/output buffers
if they are not aligned correctly before they are passed to the
low-level implementation.

Strictly speaking, some of the software implementations require
the buffers to be aligned on 4-byte boundaries as they do 32-bit
loads.  However, it is not clear whether it is better to copy
the buffers or pay the penalty for unaligned loads/stores.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/crypto/api.c b/crypto/api.c
index 394169a..f55856b 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -168,6 +168,12 @@
 {
 	int ret = 0;
 	struct crypto_alg *q;
+
+	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
+		return -EINVAL;
+
+	if (alg->cra_alignmask > PAGE_SIZE)
+		return -EINVAL;
 	
 	down_write(&crypto_alg_sem);
 	
diff --git a/crypto/cipher.c b/crypto/cipher.c
index 54c4a56..85eb12f 100644
--- a/crypto/cipher.c
+++ b/crypto/cipher.c
@@ -41,8 +41,10 @@
 			       struct scatter_walk *in,
 			       struct scatter_walk *out, unsigned int bsize)
 {
-	u8 src[bsize];
-	u8 dst[bsize];
+	unsigned int alignmask = desc->tfm->__crt_alg->cra_alignmask;
+	u8 buffer[bsize * 2 + alignmask];
+	u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
+	u8 *dst = src + bsize;
 	unsigned int n;
 
 	n = scatterwalk_copychunks(src, in, bsize, 0);
@@ -59,15 +61,24 @@
 static inline unsigned int crypt_fast(const struct cipher_desc *desc,
 				      struct scatter_walk *in,
 				      struct scatter_walk *out,
-				      unsigned int nbytes)
+				      unsigned int nbytes, u8 *tmp)
 {
 	u8 *src, *dst;
 
 	src = in->data;
 	dst = scatterwalk_samebuf(in, out) ? src : out->data;
 
+	if (tmp) {
+		memcpy(tmp, in->data, nbytes);
+		src = tmp;
+		dst = tmp;
+	}
+
 	nbytes = desc->prfn(desc, dst, src, nbytes);
 
+	if (tmp)
+		memcpy(out->data, tmp, nbytes);
+
 	scatterwalk_advance(in, nbytes);
 	scatterwalk_advance(out, nbytes);
 
@@ -87,6 +98,8 @@
 	struct scatter_walk walk_in, walk_out;
 	struct crypto_tfm *tfm = desc->tfm;
 	const unsigned int bsize = crypto_tfm_alg_blocksize(tfm);
+	unsigned int alignmask = tfm->__crt_alg->cra_alignmask;
+	unsigned long buffer = 0;
 
 	if (!nbytes)
 		return 0;
@@ -100,16 +113,27 @@
 	scatterwalk_start(&walk_out, dst);
 
 	for(;;) {
-		unsigned int n;
+		unsigned int n = nbytes;
+		u8 *tmp = NULL;
+
+		if (!scatterwalk_aligned(&walk_in, alignmask) ||
+		    !scatterwalk_aligned(&walk_out, alignmask)) {
+			if (!buffer) {
+				buffer = __get_free_page(GFP_ATOMIC);
+				if (!buffer)
+					n = 0;
+			}
+			tmp = (u8 *)buffer;
+		}
 
 		scatterwalk_map(&walk_in, 0);
 		scatterwalk_map(&walk_out, 1);
 
-		n = scatterwalk_clamp(&walk_in, nbytes);
+		n = scatterwalk_clamp(&walk_in, n);
 		n = scatterwalk_clamp(&walk_out, n);
 
 		if (likely(n >= bsize))
-			n = crypt_fast(desc, &walk_in, &walk_out, n);
+			n = crypt_fast(desc, &walk_in, &walk_out, n, tmp);
 		else
 			n = crypt_slow(desc, &walk_in, &walk_out, bsize);
 
@@ -119,10 +143,15 @@
 		scatterwalk_done(&walk_out, 1, nbytes);
 
 		if (!nbytes)
-			return 0;
+			break;
 
 		crypto_yield(tfm);
 	}
+
+	if (buffer)
+		free_page(buffer);
+
+	return 0;
 }
 
 static unsigned int cbc_process_encrypt(const struct cipher_desc *desc,
diff --git a/crypto/scatterwalk.h b/crypto/scatterwalk.h
index 5495bb9..e79925c 100644
--- a/crypto/scatterwalk.h
+++ b/crypto/scatterwalk.h
@@ -55,6 +55,12 @@
 	walk->len_this_segment -= nbytes;
 }
 
+static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk,
+					       unsigned int alignmask)
+{
+	return !(walk->offset & alignmask);
+}
+
 void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg);
 int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out);
 void scatterwalk_map(struct scatter_walk *walk, int out);