crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher

Patch adds AVX2/x86-64 implementation of Blowfish cipher, requiring 32 parallel
blocks for input (256 bytes). Table look-ups are performed using vpgatherdd
instruction directly from vector registers and thus should be faster than
earlier implementations.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 3807084..f3effb42 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1655,6 +1655,9 @@
 		.test = alg_test_null,
 		.fips_allowed = 1,
 	}, {
+		.alg = "__driver-cbc-blowfish-avx2",
+		.test = alg_test_null,
+	}, {
 		.alg = "__driver-cbc-camellia-aesni",
 		.test = alg_test_null,
 	}, {
@@ -1677,6 +1680,9 @@
 		.test = alg_test_null,
 		.fips_allowed = 1,
 	}, {
+		.alg = "__driver-ecb-blowfish-avx2",
+		.test = alg_test_null,
+	}, {
 		.alg = "__driver-ecb-camellia-aesni",
 		.test = alg_test_null,
 	}, {
@@ -1948,6 +1954,9 @@
 		.test = alg_test_null,
 		.fips_allowed = 1,
 	}, {
+		.alg = "cryptd(__driver-cbc-blowfish-avx2)",
+		.test = alg_test_null,
+	}, {
 		.alg = "cryptd(__driver-cbc-camellia-aesni)",
 		.test = alg_test_null,
 	}, {
@@ -1955,6 +1964,9 @@
 		.test = alg_test_null,
 		.fips_allowed = 1,
 	}, {
+		.alg = "cryptd(__driver-ecb-blowfish-avx2)",
+		.test = alg_test_null,
+	}, {
 		.alg = "cryptd(__driver-ecb-camellia-aesni)",
 		.test = alg_test_null,
 	}, {