crypto: camellia - add AVX2/AES-NI/x86_64 assembler implementation of camellia cipher

Patch adds AVX2/AES-NI/x86-64 implementation of Camellia cipher, requiring
32 parallel blocks for input (512 bytes). Compared to AVX implementation, this
version is extended to use the 256-bit wide YMM registers. For AES-NI
instructions data is split to two 128-bit registers and merged afterwards.
Even with this additional handling, performance should be higher compared
to the AES-NI/AVX implementation.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a21af59..a3a0ed8 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -43,6 +43,7 @@
 # These modules require assembler to support AVX2.
 ifeq ($(avx2_supported),yes)
 	obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
+	obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
 	obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
 	obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
 endif
@@ -73,6 +74,7 @@
 
 ifeq ($(avx2_supported),yes)
 	blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
+	camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
 	serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
 	twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
 endif