crypto: drbg - in-place cipher operation for CTR

The cipher implementations of the kernel crypto API favor in-place
cipher operations. Thus, switch the CTR cipher operation in the DRBG to
perform in-place operations. This is implemented by using the output
buffer as input buffer and zeroizing it before the cipher operation to
implement a CTR encryption of a NULL buffer.

The speed improvement is quite visibile with the following comparison
using the LRNG implementation.

Without the patch set:

      16 bytes|           12.267661 MB/s|    61338304 bytes |  5000000213 ns
      32 bytes|           23.603770 MB/s|   118018848 bytes |  5000000073 ns
      64 bytes|           46.732262 MB/s|   233661312 bytes |  5000000241 ns
     128 bytes|           90.038042 MB/s|   450190208 bytes |  5000000244 ns
     256 bytes|          160.399616 MB/s|   801998080 bytes |  5000000393 ns
     512 bytes|          259.878400 MB/s|  1299392000 bytes |  5000001675 ns
    1024 bytes|          386.050662 MB/s|  1930253312 bytes |  5000001661 ns
    2048 bytes|          493.641728 MB/s|  2468208640 bytes |  5000001598 ns
    4096 bytes|          581.835981 MB/s|  2909179904 bytes |  5000003426 ns

With the patch set:

      16 bytes |         17.051142 MB/s |     85255712 bytes |  5000000854 ns
      32 bytes |         32.695898 MB/s |    163479488 bytes |  5000000544 ns
      64 bytes |         64.490739 MB/s |    322453696 bytes |  5000000954 ns
     128 bytes |        123.285043 MB/s |    616425216 bytes |  5000000201 ns
     256 bytes |        233.434573 MB/s |   1167172864 bytes |  5000000573 ns
     512 bytes |        384.405197 MB/s |   1922025984 bytes |  5000000671 ns
    1024 bytes |        566.313370 MB/s |   2831566848 bytes |  5000001080 ns
    2048 bytes |        744.518042 MB/s |   3722590208 bytes |  5000000926 ns
    4096 bytes |        867.501670 MB/s |   4337508352 bytes |  5000002181 ns

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/drbg.c b/crypto/drbg.c
index ee302fd..bc52d95 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -261,8 +261,7 @@
 static int drbg_kcapi_sym_ctr(struct drbg_state *drbg,
 			      u8 *inbuf, u32 inbuflen,
 			      u8 *outbuf, u32 outlen);
-#define DRBG_CTR_NULL_LEN 128
-#define DRBG_OUTSCRATCHLEN DRBG_CTR_NULL_LEN
+#define DRBG_OUTSCRATCHLEN 256
 
 /* BCC function for CTR DRBG as defined in 10.4.3 */
 static int drbg_ctr_bcc(struct drbg_state *drbg,
@@ -555,8 +554,7 @@
 	}
 
 	/* 10.2.1.5.2 step 4.1 */
-	ret = drbg_kcapi_sym_ctr(drbg, drbg->ctr_null_value, DRBG_CTR_NULL_LEN,
-				 buf, len);
+	ret = drbg_kcapi_sym_ctr(drbg, NULL, 0, buf, len);
 	if (ret)
 		return ret;
 
@@ -1644,9 +1642,6 @@
 		skcipher_request_free(drbg->ctr_req);
 	drbg->ctr_req = NULL;
 
-	kfree(drbg->ctr_null_value_buf);
-	drbg->ctr_null_value = NULL;
-
 	kfree(drbg->outscratchpadbuf);
 	drbg->outscratchpadbuf = NULL;
 
@@ -1697,15 +1692,6 @@
 					crypto_req_done, &drbg->ctr_wait);
 
 	alignmask = crypto_skcipher_alignmask(sk_tfm);
-	drbg->ctr_null_value_buf = kzalloc(DRBG_CTR_NULL_LEN + alignmask,
-					   GFP_KERNEL);
-	if (!drbg->ctr_null_value_buf) {
-		drbg_fini_sym_kernel(drbg);
-		return -ENOMEM;
-	}
-	drbg->ctr_null_value = (u8 *)PTR_ALIGN(drbg->ctr_null_value_buf,
-					       alignmask + 1);
-
 	drbg->outscratchpadbuf = kmalloc(DRBG_OUTSCRATCHLEN + alignmask,
 					 GFP_KERNEL);
 	if (!drbg->outscratchpadbuf) {
@@ -1716,7 +1702,7 @@
 					      alignmask + 1);
 
 	sg_init_table(&drbg->sg_in, 1);
-	sg_init_table(&drbg->sg_out, 1);
+	sg_init_one(&drbg->sg_out, drbg->outscratchpad, DRBG_OUTSCRATCHLEN);
 
 	return alignmask;
 }
@@ -1747,10 +1733,18 @@
 			      u8 *outbuf, u32 outlen)
 {
 	struct scatterlist *sg_in = &drbg->sg_in, *sg_out = &drbg->sg_out;
+	u32 scratchpad_use = min_t(u32, outlen, DRBG_OUTSCRATCHLEN);
 	int ret;
 
-	sg_set_buf(sg_in, inbuf, inlen);
-	sg_set_buf(sg_out, drbg->outscratchpad, DRBG_OUTSCRATCHLEN);
+	if (inbuf) {
+		/* Use caller-provided input buffer */
+		sg_set_buf(sg_in, inbuf, inlen);
+	} else {
+		/* Use scratchpad for in-place operation */
+		inlen = scratchpad_use;
+		memset(drbg->outscratchpad, 0, scratchpad_use);
+		sg_set_buf(sg_in, drbg->outscratchpad, scratchpad_use);
+	}
 
 	while (outlen) {
 		u32 cryptlen = min3(inlen, outlen, (u32)DRBG_OUTSCRATCHLEN);
@@ -1766,6 +1760,7 @@
 		crypto_init_wait(&drbg->ctr_wait);
 
 		memcpy(outbuf, drbg->outscratchpad, cryptlen);
+		memzero_explicit(drbg->outscratchpad, cryptlen);
 
 		outlen -= cryptlen;
 		outbuf += cryptlen;
@@ -1773,7 +1768,6 @@
 	ret = 0;
 
 out:
-	memzero_explicit(drbg->outscratchpad, DRBG_OUTSCRATCHLEN);
 	return ret;
 }
 #endif /* CONFIG_CRYPTO_DRBG_CTR */
diff --git a/include/crypto/drbg.h b/include/crypto/drbg.h
index 54b9f5d3..3fb581b 100644
--- a/include/crypto/drbg.h
+++ b/include/crypto/drbg.h
@@ -122,8 +122,6 @@
 
 	struct crypto_skcipher *ctr_handle;	/* CTR mode cipher handle */
 	struct skcipher_request *ctr_req;	/* CTR mode request handle */
-	__u8 *ctr_null_value_buf;		/* CTR mode unaligned buffer */
-	__u8 *ctr_null_value;			/* CTR mode aligned zero buf */
 	__u8 *outscratchpadbuf;			/* CTR mode output scratchpad */
         __u8 *outscratchpad;			/* CTR mode aligned outbuf */
 	struct crypto_wait ctr_wait;		/* CTR mode async wait obj */