Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Merge the crypto tree to pick up inside-secure fixes.
diff --git a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
new file mode 100644
index 0000000..cec8d5d
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
@@ -0,0 +1,22 @@
+Arm TrustZone CryptoCell cryptographic engine
+
+Required properties:
+- compatible: Should be "arm,cryptocell-712-ree".
+- reg: Base physical address of the engine and length of memory mapped region.
+- interrupts: Interrupt number for the device.
+
+Optional properties:
+- interrupt-parent: The phandle for the interrupt controller that services
+  interrupts for this device.
+- clocks: Reference to the crypto engine clock.
+- dma-coherent: Present if dma operations are coherent.
+
+Examples:
+
+       arm_cc712: crypto@80000000 {
+               compatible = "arm,cryptocell-712-ree";
+               interrupt-parent = <&intc>;
+               interrupts = < 0 30 4 >;
+               reg = < 0x80000000 0x10000 >;
+
+       };
diff --git a/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt b/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
index 4ca8dd4..a13fbdb 100644
--- a/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
+++ b/Documentation/devicetree/bindings/crypto/samsung,exynos-rng4.txt
@@ -2,7 +2,9 @@
 
 Required properties:
 
-- compatible  : Should be "samsung,exynos4-rng".
+- compatible  : One of:
+                - "samsung,exynos4-rng" for Exynos4210 and Exynos4412
+                - "samsung,exynos5250-prng" for Exynos5250+
 - reg         : Specifies base physical address and size of the registers map.
 - clocks      : Phandle to clock-controller plus clock-specifier pair.
 - clock-names : "secss" as a clock name.
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
new file mode 100644
index 0000000..970487f
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt
@@ -0,0 +1,19 @@
+* STMicroelectronics STM32 CRYP
+
+Required properties:
+- compatible: Should be "st,stm32f756-cryp".
+- reg: The address and length of the peripheral registers space
+- clocks: The input clock of the CRYP instance
+- interrupts: The CRYP interrupt
+
+Optional properties:
+- resets: The input reset of the CRYP instance
+
+Example:
+crypto@50060000 {
+	compatible = "st,stm32f756-cryp";
+	reg = <0x50060000 0x400>;
+	interrupts = <79>;
+	clocks = <&rcc 0 STM32F7_AHB2_CLOCK(CRYP)>;
+	resets = <&rcc STM32F7_AHB2_RESET(CRYP)>;
+};
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
index 2654269..627b295 100644
--- a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
+++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
@@ -1,11 +1,19 @@
-BCM2835 Random number generator
+BCM2835/6368 Random number generator
 
 Required properties:
 
-- compatible : should be "brcm,bcm2835-rng"  or "brcm,bcm-nsp-rng" or
-  "brcm,bcm5301x-rng"
+- compatible : should be one of
+	"brcm,bcm2835-rng"
+	"brcm,bcm-nsp-rng"
+	"brcm,bcm5301x-rng" or
+	"brcm,bcm6368-rng"
 - reg : Specifies base physical address and size of the registers.
 
+Optional properties:
+
+- clocks : phandle to clock-controller plus clock-specifier pair
+- clock-names : "ipsec" as a clock name
+
 Example:
 
 rng {
@@ -17,3 +25,11 @@
 	compatible = "brcm,bcm-nsp-rng";
 	reg = <0x18033000 0x14>;
 };
+
+random: rng@10004180 {
+	compatible = "brcm,bcm6368-rng";
+	reg = <0x10004180 0x14>;
+
+	clocks = <&periph_clk 18>;
+	clock-names = "ipsec";
+};
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm6368.txt b/Documentation/devicetree/bindings/rng/brcm,bcm6368.txt
deleted file mode 100644
index 4b5ac60..0000000
--- a/Documentation/devicetree/bindings/rng/brcm,bcm6368.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-BCM6368 Random number generator
-
-Required properties:
-
-- compatible : should be "brcm,bcm6368-rng"
-- reg : Specifies base physical address and size of the registers
-- clocks : phandle to clock-controller plus clock-specifier pair
-- clock-names : "ipsec" as a clock name
-
-Example:
-	random: rng@10004180 {
-		compatible = "brcm,bcm6368-rng";
-		reg = <0x10004180 0x14>;
-
-		clocks = <&periph_clk 18>;
-		clock-names = "ipsec";
-	};
diff --git a/MAINTAINERS b/MAINTAINERS
index 82ad0ea..e6d849d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11995,6 +11995,7 @@
 SAMSUNG S5P Security SubSystem (SSS) DRIVER
 M:	Krzysztof Kozlowski <krzk@kernel.org>
 M:	Vladimir Zapolskiy <vz@mleia.com>
+M:	Kamil Konieczny <k.konieczny@partner.samsung.com>
 L:	linux-crypto@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
 S:	Maintained
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index 18768f3..07e3194 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -181,9 +181,8 @@
 	struct aesbs_cbc_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	ctx->enc_tfm = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(ctx->enc_tfm))
-		return PTR_ERR(ctx->enc_tfm);
-	return 0;
+
+	return PTR_ERR_OR_ZERO(ctx->enc_tfm);
 }
 
 static void cbc_exit(struct crypto_tfm *tfm)
@@ -258,9 +257,8 @@
 	struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(ctx->tweak_tfm))
-		return PTR_ERR(ctx->tweak_tfm);
-	return 0;
+
+	return PTR_ERR_OR_ZERO(ctx->tweak_tfm);
 }
 
 static void xts_exit(struct crypto_tfm *tfm)
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index b5edc59..f5e8295 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -24,7 +24,7 @@
 crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
-CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
+aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
 
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
 aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
diff --git a/arch/arm64/crypto/aes-ce-core.S b/arch/arm64/crypto/aes-ce-core.S
new file mode 100644
index 0000000..8efdfda
--- /dev/null
+++ b/arch/arm64/crypto/aes-ce-core.S
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.arch		armv8-a+crypto
+
+ENTRY(__aes_ce_encrypt)
+	sub		w3, w3, #2
+	ld1		{v0.16b}, [x2]
+	ld1		{v1.4s}, [x0], #16
+	cmp		w3, #10
+	bmi		0f
+	bne		3f
+	mov		v3.16b, v1.16b
+	b		2f
+0:	mov		v2.16b, v1.16b
+	ld1		{v3.4s}, [x0], #16
+1:	aese		v0.16b, v2.16b
+	aesmc		v0.16b, v0.16b
+2:	ld1		{v1.4s}, [x0], #16
+	aese		v0.16b, v3.16b
+	aesmc		v0.16b, v0.16b
+3:	ld1		{v2.4s}, [x0], #16
+	subs		w3, w3, #3
+	aese		v0.16b, v1.16b
+	aesmc		v0.16b, v0.16b
+	ld1		{v3.4s}, [x0], #16
+	bpl		1b
+	aese		v0.16b, v2.16b
+	eor		v0.16b, v0.16b, v3.16b
+	st1		{v0.16b}, [x1]
+	ret
+ENDPROC(__aes_ce_encrypt)
+
+ENTRY(__aes_ce_decrypt)
+	sub		w3, w3, #2
+	ld1		{v0.16b}, [x2]
+	ld1		{v1.4s}, [x0], #16
+	cmp		w3, #10
+	bmi		0f
+	bne		3f
+	mov		v3.16b, v1.16b
+	b		2f
+0:	mov		v2.16b, v1.16b
+	ld1		{v3.4s}, [x0], #16
+1:	aesd		v0.16b, v2.16b
+	aesimc		v0.16b, v0.16b
+2:	ld1		{v1.4s}, [x0], #16
+	aesd		v0.16b, v3.16b
+	aesimc		v0.16b, v0.16b
+3:	ld1		{v2.4s}, [x0], #16
+	subs		w3, w3, #3
+	aesd		v0.16b, v1.16b
+	aesimc		v0.16b, v0.16b
+	ld1		{v3.4s}, [x0], #16
+	bpl		1b
+	aesd		v0.16b, v2.16b
+	eor		v0.16b, v0.16b, v3.16b
+	st1		{v0.16b}, [x1]
+	ret
+ENDPROC(__aes_ce_decrypt)
+
+/*
+ * __aes_ce_sub() - use the aese instruction to perform the AES sbox
+ *                  substitution on each byte in 'input'
+ */
+ENTRY(__aes_ce_sub)
+	dup		v1.4s, w0
+	movi		v0.16b, #0
+	aese		v0.16b, v1.16b
+	umov		w0, v0.s[0]
+	ret
+ENDPROC(__aes_ce_sub)
+
+ENTRY(__aes_ce_invert)
+	ld1		{v0.4s}, [x1]
+	aesimc		v1.16b, v0.16b
+	st1		{v1.4s}, [x0]
+	ret
+ENDPROC(__aes_ce_invert)
diff --git a/arch/arm64/crypto/aes-ce-cipher.c b/arch/arm64/crypto/aes-ce-glue.c
similarity index 62%
rename from arch/arm64/crypto/aes-ce-cipher.c
rename to arch/arm64/crypto/aes-ce-glue.c
index 6a75cd7..e6b3227 100644
--- a/arch/arm64/crypto/aes-ce-cipher.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -29,6 +29,13 @@
 	u8 b[AES_BLOCK_SIZE];
 };
 
+asmlinkage void __aes_ce_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+asmlinkage void __aes_ce_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+
+asmlinkage u32 __aes_ce_sub(u32 l);
+asmlinkage void __aes_ce_invert(struct aes_block *out,
+				const struct aes_block *in);
+
 static int num_rounds(struct crypto_aes_ctx *ctx)
 {
 	/*
@@ -44,10 +51,6 @@
 static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct aes_block *out = (struct aes_block *)dst;
-	struct aes_block const *in = (struct aes_block *)src;
-	void *dummy0;
-	int dummy1;
 
 	if (!may_use_simd()) {
 		__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
@@ -55,49 +58,13 @@
 	}
 
 	kernel_neon_begin();
-
-	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.4s}, [%[key]], #16		;"
-		"	cmp	%w[rounds], #10			;"
-		"	bmi	0f				;"
-		"	bne	3f				;"
-		"	mov	v3.16b, v1.16b			;"
-		"	b	2f				;"
-		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"1:	aese	v0.16b, v2.16b			;"
-		"	aesmc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.4s}, [%[key]], #16		;"
-		"	aese	v0.16b, v3.16b			;"
-		"	aesmc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.4s}, [%[key]], #16		;"
-		"	subs	%w[rounds], %w[rounds], #3	;"
-		"	aese	v0.16b, v1.16b			;"
-		"	aesmc	v0.16b, v0.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"	bpl	1b				;"
-		"	aese	v0.16b, v2.16b			;"
-		"	eor	v0.16b, v0.16b, v3.16b		;"
-		"	st1	{v0.16b}, %[out]		;"
-
-	:	[out]		"=Q"(*out),
-		[key]		"=r"(dummy0),
-		[rounds]	"=r"(dummy1)
-	:	[in]		"Q"(*in),
-				"1"(ctx->key_enc),
-				"2"(num_rounds(ctx) - 2)
-	:	"cc");
-
+	__aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
 	kernel_neon_end();
 }
 
 static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct aes_block *out = (struct aes_block *)dst;
-	struct aes_block const *in = (struct aes_block *)src;
-	void *dummy0;
-	int dummy1;
 
 	if (!may_use_simd()) {
 		__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
@@ -105,62 +72,10 @@
 	}
 
 	kernel_neon_begin();
-
-	__asm__("	ld1	{v0.16b}, %[in]			;"
-		"	ld1	{v1.4s}, [%[key]], #16		;"
-		"	cmp	%w[rounds], #10			;"
-		"	bmi	0f				;"
-		"	bne	3f				;"
-		"	mov	v3.16b, v1.16b			;"
-		"	b	2f				;"
-		"0:	mov	v2.16b, v1.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"1:	aesd	v0.16b, v2.16b			;"
-		"	aesimc	v0.16b, v0.16b			;"
-		"2:	ld1	{v1.4s}, [%[key]], #16		;"
-		"	aesd	v0.16b, v3.16b			;"
-		"	aesimc	v0.16b, v0.16b			;"
-		"3:	ld1	{v2.4s}, [%[key]], #16		;"
-		"	subs	%w[rounds], %w[rounds], #3	;"
-		"	aesd	v0.16b, v1.16b			;"
-		"	aesimc	v0.16b, v0.16b			;"
-		"	ld1	{v3.4s}, [%[key]], #16		;"
-		"	bpl	1b				;"
-		"	aesd	v0.16b, v2.16b			;"
-		"	eor	v0.16b, v0.16b, v3.16b		;"
-		"	st1	{v0.16b}, %[out]		;"
-
-	:	[out]		"=Q"(*out),
-		[key]		"=r"(dummy0),
-		[rounds]	"=r"(dummy1)
-	:	[in]		"Q"(*in),
-				"1"(ctx->key_dec),
-				"2"(num_rounds(ctx) - 2)
-	:	"cc");
-
+	__aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
 	kernel_neon_end();
 }
 
-/*
- * aes_sub() - use the aese instruction to perform the AES sbox substitution
- *             on each byte in 'input'
- */
-static u32 aes_sub(u32 input)
-{
-	u32 ret;
-
-	__asm__("dup	v1.4s, %w[in]		;"
-		"movi	v0.16b, #0		;"
-		"aese	v0.16b, v1.16b		;"
-		"umov	%w[out], v0.4s[0]	;"
-
-	:	[out]	"=r"(ret)
-	:	[in]	"r"(input)
-	:		"v0","v1");
-
-	return ret;
-}
-
 int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 		     unsigned int key_len)
 {
@@ -189,7 +104,7 @@
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rko = rki + kwords;
 
-		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+		rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
 		rko[1] = rko[0] ^ rki[1];
 		rko[2] = rko[1] ^ rki[2];
 		rko[3] = rko[2] ^ rki[3];
@@ -202,7 +117,7 @@
 		} else if (key_len == AES_KEYSIZE_256) {
 			if (i >= 6)
 				break;
-			rko[4] = aes_sub(rko[3]) ^ rki[4];
+			rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
 			rko[5] = rko[4] ^ rki[5];
 			rko[6] = rko[5] ^ rki[6];
 			rko[7] = rko[6] ^ rki[7];
@@ -221,13 +136,7 @@
 
 	key_dec[0] = key_enc[j];
 	for (i = 1, j--; j > 0; i++, j--)
-		__asm__("ld1	{v0.4s}, %[in]		;"
-			"aesimc	v1.16b, v0.16b		;"
-			"st1	{v1.4s}, %[out]	;"
-
-		:	[out]	"=Q"(key_dec[i])
-		:	[in]	"Q"(key_enc[j])
-		:		"v0","v1");
+		__aes_ce_invert(key_dec + i, key_enc + j);
 	key_dec[i] = key_enc[0];
 
 	kernel_neon_end();
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 998ba51..2fa850e 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -665,6 +665,7 @@
 
 unregister_simds:
 	aes_exit();
+	return err;
 unregister_ciphers:
 	crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
 	return err;
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 3bf3dcf..a5ee78d7 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -690,8 +690,8 @@
 	       rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
 }
 
-static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
-			   unsigned int key_len)
+static int gcmaes_wrapper_set_key(struct crypto_aead *parent, const u8 *key,
+				  unsigned int key_len)
 {
 	struct cryptd_aead **ctx = crypto_aead_ctx(parent);
 	struct cryptd_aead *cryptd_tfm = *ctx;
@@ -716,8 +716,8 @@
 
 /* This is the Integrity Check Value (aka the authentication tag length and can
  * be 8, 12 or 16 bytes long. */
-static int rfc4106_set_authsize(struct crypto_aead *parent,
-				unsigned int authsize)
+static int gcmaes_wrapper_set_authsize(struct crypto_aead *parent,
+				       unsigned int authsize)
 {
 	struct cryptd_aead **ctx = crypto_aead_ctx(parent);
 	struct cryptd_aead *cryptd_tfm = *ctx;
@@ -929,7 +929,7 @@
 			      aes_ctx);
 }
 
-static int rfc4106_encrypt(struct aead_request *req)
+static int gcmaes_wrapper_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
@@ -945,7 +945,7 @@
 	return crypto_aead_encrypt(req);
 }
 
-static int rfc4106_decrypt(struct aead_request *req)
+static int gcmaes_wrapper_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
@@ -1117,7 +1117,7 @@
 {
 	__be32 counter = cpu_to_be32(1);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
 	void *aes_ctx = &(ctx->aes_key_expanded);
 	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
 
@@ -1128,6 +1128,30 @@
 			      aes_ctx);
 }
 
+static int generic_gcmaes_init(struct crypto_aead *aead)
+{
+	struct cryptd_aead *cryptd_tfm;
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_tfm = cryptd_alloc_aead("__driver-generic-gcm-aes-aesni",
+				       CRYPTO_ALG_INTERNAL,
+				       CRYPTO_ALG_INTERNAL);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+
+	*ctx = cryptd_tfm;
+	crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
+
+	return 0;
+}
+
+static void generic_gcmaes_exit(struct crypto_aead *aead)
+{
+	struct cryptd_aead **ctx = crypto_aead_ctx(aead);
+
+	cryptd_free_aead(*ctx);
+}
+
 static struct aead_alg aesni_aead_algs[] = { {
 	.setkey			= common_rfc4106_set_key,
 	.setauthsize		= common_rfc4106_set_authsize,
@@ -1147,10 +1171,10 @@
 }, {
 	.init			= rfc4106_init,
 	.exit			= rfc4106_exit,
-	.setkey			= rfc4106_set_key,
-	.setauthsize		= rfc4106_set_authsize,
-	.encrypt		= rfc4106_encrypt,
-	.decrypt		= rfc4106_decrypt,
+	.setkey			= gcmaes_wrapper_set_key,
+	.setauthsize		= gcmaes_wrapper_set_authsize,
+	.encrypt		= gcmaes_wrapper_encrypt,
+	.decrypt		= gcmaes_wrapper_decrypt,
 	.ivsize			= GCM_RFC4106_IV_SIZE,
 	.maxauthsize		= 16,
 	.base = {
@@ -1170,13 +1194,31 @@
 	.ivsize			= GCM_AES_IV_SIZE,
 	.maxauthsize		= 16,
 	.base = {
+		.cra_name		= "__generic-gcm-aes-aesni",
+		.cra_driver_name	= "__driver-generic-gcm-aes-aesni",
+		.cra_priority		= 0,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct generic_gcmaes_ctx),
+		.cra_alignmask		= AESNI_ALIGN - 1,
+		.cra_module		= THIS_MODULE,
+	},
+}, {
+	.init			= generic_gcmaes_init,
+	.exit			= generic_gcmaes_exit,
+	.setkey			= gcmaes_wrapper_set_key,
+	.setauthsize		= gcmaes_wrapper_set_authsize,
+	.encrypt		= gcmaes_wrapper_encrypt,
+	.decrypt		= gcmaes_wrapper_decrypt,
+	.ivsize			= GCM_AES_IV_SIZE,
+	.maxauthsize		= 16,
+	.base = {
 		.cra_name		= "gcm(aes)",
 		.cra_driver_name	= "generic-gcm-aesni",
 		.cra_priority		= 400,
 		.cra_flags		= CRYPTO_ALG_ASYNC,
 		.cra_blocksize		= 1,
-		.cra_ctxsize		= sizeof(struct generic_gcmaes_ctx),
-		.cra_alignmask		= AESNI_ALIGN - 1,
+		.cra_ctxsize		= sizeof(struct cryptd_aead *),
 		.cra_module		= THIS_MODULE,
 	},
 } };
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
index 1e6af1b..dce7c5d 100644
--- a/arch/x86/crypto/chacha20_glue.c
+++ b/arch/x86/crypto/chacha20_glue.c
@@ -107,7 +107,6 @@
 	.base.cra_priority	= 300,
 	.base.cra_blocksize	= 1,
 	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_alignmask	= sizeof(u32) - 1,
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= CHACHA20_KEY_SIZE,
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index 399a29d..cb91a64 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -59,13 +59,6 @@
 
 	salsa20_ivsetup(ctx, walk.iv);
 
-	if (likely(walk.nbytes == nbytes))
-	{
-		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
-				      walk.dst.virt.addr, nbytes);
-		return blkcipher_walk_done(desc, &walk, 0);
-	}
-
 	while (walk.nbytes >= 64) {
 		salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
 				      walk.dst.virt.addr,
diff --git a/crypto/Kconfig b/crypto/Kconfig
index f7911963..9327fbf 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -130,7 +130,7 @@
 
 config CRYPTO_ECDH
 	tristate "ECDH algorithm"
-	select CRYTPO_KPP
+	select CRYPTO_KPP
 	select CRYPTO_RNG_DEFAULT
 	help
 	  Generic implementation of the ECDH algorithm
diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c
index 1441f07..09776bb 100644
--- a/crypto/ablk_helper.c
+++ b/crypto/ablk_helper.c
@@ -18,9 +18,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  */
 
@@ -28,7 +26,6 @@
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/hardirq.h>
 #include <crypto/algapi.h>
 #include <crypto/cryptd.h>
 #include <crypto/ablk_helper.h>
diff --git a/crypto/aead.c b/crypto/aead.c
index f794b30..fe00cbd 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -295,7 +295,7 @@
 	if (err)
 		goto out;
 
-	ctx->sknull = crypto_get_default_null_skcipher2();
+	ctx->sknull = crypto_get_default_null_skcipher();
 	err = PTR_ERR(ctx->sknull);
 	if (IS_ERR(ctx->sknull))
 		goto out;
@@ -315,7 +315,7 @@
 	return err;
 
 drop_null:
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 	goto out;
 }
 EXPORT_SYMBOL_GPL(aead_init_geniv);
@@ -325,7 +325,7 @@
 	struct aead_geniv_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 EXPORT_SYMBOL_GPL(aead_exit_geniv);
 
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index d3f1c43..35d4dce 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -672,14 +672,15 @@
 	}
 
 	tsgl = areq->tsgl;
-	for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
-		if (!sg_page(sg))
-			continue;
-		put_page(sg_page(sg));
-	}
+	if (tsgl) {
+		for_each_sg(tsgl, sg, areq->tsgl_entries, i) {
+			if (!sg_page(sg))
+				continue;
+			put_page(sg_page(sg));
+		}
 
-	if (areq->tsgl && areq->tsgl_entries)
 		sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
+	}
 }
 EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls);
 
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 20df8c1..d963c8c 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -491,7 +491,7 @@
 		return ERR_CAST(aead);
 	}
 
-	null_tfm = crypto_get_default_null_skcipher2();
+	null_tfm = crypto_get_default_null_skcipher();
 	if (IS_ERR(null_tfm)) {
 		crypto_free_aead(aead);
 		kfree(tfm);
@@ -509,6 +509,7 @@
 	struct aead_tfm *tfm = private;
 
 	crypto_free_aead(tfm->aead);
+	crypto_put_default_null_skcipher();
 	kfree(tfm);
 }
 
@@ -541,7 +542,6 @@
 	unsigned int ivlen = crypto_aead_ivsize(tfm);
 
 	af_alg_pull_tsgl(sk, ctx->used, NULL, 0);
-	crypto_put_default_null_skcipher2();
 	sock_kzfree_s(sk, ctx->iv, ivlen);
 	sock_kfree_s(sk, ctx, ctx->len);
 	af_alg_release_parent(sk);
diff --git a/crypto/api.c b/crypto/api.c
index 2a2479d..6da802d 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -205,7 +205,8 @@
 }
 EXPORT_SYMBOL_GPL(crypto_alg_lookup);
 
-struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
+static struct crypto_alg *crypto_larval_lookup(const char *name, u32 type,
+					       u32 mask)
 {
 	struct crypto_alg *alg;
 
@@ -231,7 +232,6 @@
 
 	return crypto_larval_add(name, type, mask);
 }
-EXPORT_SYMBOL_GPL(crypto_larval_lookup);
 
 int crypto_probing_notify(unsigned long val, void *v)
 {
diff --git a/crypto/authenc.c b/crypto/authenc.c
index 875470b..d3d6d72 100644
--- a/crypto/authenc.c
+++ b/crypto/authenc.c
@@ -329,7 +329,7 @@
 	if (IS_ERR(enc))
 		goto err_free_ahash;
 
-	null = crypto_get_default_null_skcipher2();
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_skcipher;
@@ -363,7 +363,7 @@
 
 	crypto_free_ahash(ctx->auth);
 	crypto_free_skcipher(ctx->enc);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 
 static void crypto_authenc_free(struct aead_instance *inst)
diff --git a/crypto/authencesn.c b/crypto/authencesn.c
index 0cf5fef..15f91dd 100644
--- a/crypto/authencesn.c
+++ b/crypto/authencesn.c
@@ -352,7 +352,7 @@
 	if (IS_ERR(enc))
 		goto err_free_ahash;
 
-	null = crypto_get_default_null_skcipher2();
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_skcipher;
@@ -389,7 +389,7 @@
 
 	crypto_free_ahash(ctx->auth);
 	crypto_free_skcipher(ctx->enc);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 
 static void crypto_authenc_esn_free(struct aead_instance *inst)
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 6c43a0a..01c0d4a 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -18,7 +18,6 @@
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <linux/errno.h>
-#include <linux/hardirq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c
index a02286b..32ddd48 100644
--- a/crypto/camellia_generic.c
+++ b/crypto/camellia_generic.c
@@ -13,8 +13,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /*
diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c
index df5c726..66169c1 100644
--- a/crypto/cast5_generic.c
+++ b/crypto/cast5_generic.c
@@ -16,8 +16,7 @@
 * any later version.
 *
 * You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+* along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 
 
diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c
index 058c8d7..c8e5ec6 100644
--- a/crypto/cast6_generic.c
+++ b/crypto/cast6_generic.c
@@ -13,8 +13,7 @@
  * any later version.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 
diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
index 4a45fa4..e451c3c 100644
--- a/crypto/chacha20_generic.c
+++ b/crypto/chacha20_generic.c
@@ -9,44 +9,38 @@
  * (at your option) any later version.
  */
 
+#include <asm/unaligned.h>
 #include <crypto/algapi.h>
 #include <crypto/chacha20.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/module.h>
 
-static inline u32 le32_to_cpuvp(const void *p)
-{
-	return le32_to_cpup(p);
-}
-
 static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
 			     unsigned int bytes)
 {
-	u8 stream[CHACHA20_BLOCK_SIZE];
+	u32 stream[CHACHA20_BLOCK_WORDS];
 
 	if (dst != src)
 		memcpy(dst, src, bytes);
 
 	while (bytes >= CHACHA20_BLOCK_SIZE) {
 		chacha20_block(state, stream);
-		crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
+		crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
 		bytes -= CHACHA20_BLOCK_SIZE;
 		dst += CHACHA20_BLOCK_SIZE;
 	}
 	if (bytes) {
 		chacha20_block(state, stream);
-		crypto_xor(dst, stream, bytes);
+		crypto_xor(dst, (const u8 *)stream, bytes);
 	}
 }
 
 void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
 {
-	static const char constant[16] = "expand 32-byte k";
-
-	state[0]  = le32_to_cpuvp(constant +  0);
-	state[1]  = le32_to_cpuvp(constant +  4);
-	state[2]  = le32_to_cpuvp(constant +  8);
-	state[3]  = le32_to_cpuvp(constant + 12);
+	state[0]  = 0x61707865; /* "expa" */
+	state[1]  = 0x3320646e; /* "nd 3" */
+	state[2]  = 0x79622d32; /* "2-by" */
+	state[3]  = 0x6b206574; /* "te k" */
 	state[4]  = ctx->key[0];
 	state[5]  = ctx->key[1];
 	state[6]  = ctx->key[2];
@@ -55,10 +49,10 @@
 	state[9]  = ctx->key[5];
 	state[10] = ctx->key[6];
 	state[11] = ctx->key[7];
-	state[12] = le32_to_cpuvp(iv +  0);
-	state[13] = le32_to_cpuvp(iv +  4);
-	state[14] = le32_to_cpuvp(iv +  8);
-	state[15] = le32_to_cpuvp(iv + 12);
+	state[12] = get_unaligned_le32(iv +  0);
+	state[13] = get_unaligned_le32(iv +  4);
+	state[14] = get_unaligned_le32(iv +  8);
+	state[15] = get_unaligned_le32(iv + 12);
 }
 EXPORT_SYMBOL_GPL(crypto_chacha20_init);
 
@@ -72,7 +66,7 @@
 		return -EINVAL;
 
 	for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
-		ctx->key[i] = le32_to_cpuvp(key + i * sizeof(u32));
+		ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
 
 	return 0;
 }
@@ -111,7 +105,6 @@
 	.base.cra_priority	= 100,
 	.base.cra_blocksize	= 1,
 	.base.cra_ctxsize	= sizeof(struct chacha20_ctx),
-	.base.cra_alignmask	= sizeof(u32) - 1,
 	.base.cra_module	= THIS_MODULE,
 
 	.min_keysize		= CHACHA20_KEY_SIZE,
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index bd43cf5..552e3a8 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -32,7 +32,9 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 
-#define CRYPTD_MAX_CPU_QLEN 1000
+static unsigned int cryptd_max_cpu_qlen = 1000;
+module_param(cryptd_max_cpu_qlen, uint, 0);
+MODULE_PARM_DESC(cryptd_max_cpu_qlen, "Set cryptd Max queue depth");
 
 struct cryptd_cpu_queue {
 	struct crypto_queue queue;
@@ -116,6 +118,7 @@
 		crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
 		INIT_WORK(&cpu_queue->work, cryptd_queue_worker);
 	}
+	pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen);
 	return 0;
 }
 
@@ -1372,7 +1375,7 @@
 {
 	int err;
 
-	err = cryptd_init_queue(&queue, CRYPTD_MAX_CPU_QLEN);
+	err = cryptd_init_queue(&queue, cryptd_max_cpu_qlen);
 	if (err)
 		return err;
 
diff --git a/crypto/ecc.c b/crypto/ecc.c
index 633a9bc..18f32f2 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -964,7 +964,7 @@
 	 * DRBG with a security strength of 256.
 	 */
 	if (crypto_get_default_rng())
-		err = -EFAULT;
+		return -EFAULT;
 
 	err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)priv, nbytes);
 	crypto_put_default_rng();
diff --git a/crypto/echainiv.c b/crypto/echainiv.c
index e3d889b..45819e6 100644
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -118,8 +118,6 @@
 				struct rtattr **tb)
 {
 	struct aead_instance *inst;
-	struct crypto_aead_spawn *spawn;
-	struct aead_alg *alg;
 	int err;
 
 	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
@@ -127,9 +125,6 @@
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
-	spawn = aead_instance_ctx(inst);
-	alg = crypto_spawn_aead_alg(spawn);
-
 	err = -EINVAL;
 	if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize)
 		goto free_inst;
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 8589681..0ad879e 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -1101,7 +1101,7 @@
 	if (IS_ERR(aead))
 		return PTR_ERR(aead);
 
-	null = crypto_get_default_null_skcipher2();
+	null = crypto_get_default_null_skcipher();
 	err = PTR_ERR(null);
 	if (IS_ERR(null))
 		goto err_free_aead;
@@ -1129,7 +1129,7 @@
 	struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
 
 	crypto_free_aead(ctx->child);
-	crypto_put_default_null_skcipher2();
+	crypto_put_default_null_skcipher();
 }
 
 static void crypto_rfc4543_free(struct aead_instance *inst)
diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c
index 24e6019..a4b1c02 100644
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -160,8 +160,6 @@
 {
 	u64 a = le64_to_cpu(x->a);
 	u64 b = le64_to_cpu(x->b);
-
-	/* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */
 	u64 _tt = gf128mul_table_be[a >> 56];
 
 	r->a = cpu_to_le64((a << 8) | (b >> 56));
diff --git a/crypto/hmac.c b/crypto/hmac.c
index 92871dc..e747302 100644
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -195,11 +195,15 @@
 	salg = shash_attr_alg(tb[1], 0, 0);
 	if (IS_ERR(salg))
 		return PTR_ERR(salg);
+	alg = &salg->base;
 
+	/* The underlying hash algorithm must be unkeyed */
 	err = -EINVAL;
+	if (crypto_shash_alg_has_setkey(salg))
+		goto out_put_alg;
+
 	ds = salg->digestsize;
 	ss = salg->statesize;
-	alg = &salg->base;
 	if (ds > alg->cra_blocksize ||
 	    ss < alg->cra_blocksize)
 		goto out_put_alg;
diff --git a/crypto/internal.h b/crypto/internal.h
index f073204..ae65e5f 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -78,7 +78,6 @@
 
 struct crypto_larval *crypto_larval_alloc(const char *name, u32 type, u32 mask);
 void crypto_larval_kill(struct crypto_alg *alg);
-struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask);
 void crypto_alg_tested(const char *name, int err);
 
 void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
diff --git a/crypto/keywrap.c b/crypto/keywrap.c
index 744e351..ec5c6a0 100644
--- a/crypto/keywrap.c
+++ b/crypto/keywrap.c
@@ -188,7 +188,7 @@
 	}
 
 	/* Perform authentication check */
-	if (block.A != cpu_to_be64(0xa6a6a6a6a6a6a6a6))
+	if (block.A != cpu_to_be64(0xa6a6a6a6a6a6a6a6ULL))
 		ret = -EBADMSG;
 
 	memzero_explicit(&block, sizeof(struct crypto_kw_block));
@@ -221,7 +221,7 @@
 	 * Place the predefined IV into block A -- for encrypt, the caller
 	 * does not need to provide an IV, but he needs to fetch the final IV.
 	 */
-	block.A = cpu_to_be64(0xa6a6a6a6a6a6a6a6);
+	block.A = cpu_to_be64(0xa6a6a6a6a6a6a6a6ULL);
 
 	/*
 	 * src scatterlist is read-only. dst scatterlist is r/w. During the
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index eca04d3..2908382 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -26,7 +26,6 @@
 #include <linux/sched.h>
 #include <linux/sched/stat.h>
 #include <linux/slab.h>
-#include <linux/hardirq.h>
 
 #define MCRYPTD_MAX_CPU_QLEN 100
 #define MCRYPTD_BATCH 9
diff --git a/crypto/rsa_helper.c b/crypto/rsa_helper.c
index 0b66dc8..cad395d 100644
--- a/crypto/rsa_helper.c
+++ b/crypto/rsa_helper.c
@@ -30,7 +30,7 @@
 		return -EINVAL;
 
 	if (fips_enabled) {
-		while (!*ptr && n_sz) {
+		while (n_sz && !*ptr) {
 			ptr++;
 			n_sz--;
 		}
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index f550b5d..d7da0ee 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -188,13 +188,6 @@
 
 	salsa20_ivsetup(ctx, walk.iv);
 
-	if (likely(walk.nbytes == nbytes))
-	{
-		salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
-				      walk.src.virt.addr, nbytes);
-		return blkcipher_walk_done(desc, &walk, 0);
-	}
-
 	while (walk.nbytes >= 64) {
 		salsa20_encrypt_bytes(ctx, walk.dst.virt.addr,
 				      walk.src.virt.addr,
diff --git a/crypto/seqiv.c b/crypto/seqiv.c
index 570b7d1..39dbf2f 100644
--- a/crypto/seqiv.c
+++ b/crypto/seqiv.c
@@ -144,8 +144,6 @@
 static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct aead_instance *inst;
-	struct crypto_aead_spawn *spawn;
-	struct aead_alg *alg;
 	int err;
 
 	inst = aead_geniv_alloc(tmpl, tb, 0, 0);
@@ -153,9 +151,6 @@
 	if (IS_ERR(inst))
 		return PTR_ERR(inst);
 
-	spawn = aead_instance_ctx(inst);
-	alg = crypto_spawn_aead_alg(spawn);
-
 	err = -EINVAL;
 	if (inst->alg.ivsize != sizeof(u64))
 		goto free_inst;
diff --git a/crypto/shash.c b/crypto/shash.c
index 325a14d..e849d3e 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -25,11 +25,12 @@
 
 static const struct crypto_type crypto_shash_type;
 
-static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
-			   unsigned int keylen)
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+		    unsigned int keylen)
 {
 	return -ENOSYS;
 }
+EXPORT_SYMBOL_GPL(shash_no_setkey);
 
 static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
 				  unsigned int keylen)
diff --git a/crypto/simd.c b/crypto/simd.c
index 8820337..208226d 100644
--- a/crypto/simd.c
+++ b/crypto/simd.c
@@ -19,9 +19,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  */
 
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 9267cbd..28b4882 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -185,7 +185,8 @@
 }
 
 static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE],
-			unsigned int buflen)
+			 unsigned int buflen, const void *assoc,
+			 unsigned int aad_size)
 {
 	int np = (buflen + PAGE_SIZE - 1)/PAGE_SIZE;
 	int k, rem;
@@ -198,11 +199,16 @@
 	}
 
 	sg_init_table(sg, np + 1);
-	np--;
+
+	sg_set_buf(&sg[0], assoc, aad_size);
+
+	if (rem)
+		np--;
 	for (k = 0; k < np; k++)
 		sg_set_buf(&sg[k + 1], xbuf[k], PAGE_SIZE);
 
-	sg_set_buf(&sg[k + 1], xbuf[k], rem);
+	if (rem)
+		sg_set_buf(&sg[k + 1], xbuf[k], rem);
 }
 
 static void test_aead_speed(const char *algo, int enc, unsigned int secs,
@@ -316,14 +322,12 @@
 				goto out;
 			}
 
-			sg_init_aead(sg, xbuf,
-				    *b_size + (enc ? 0 : authsize));
+			sg_init_aead(sg, xbuf, *b_size + (enc ? 0 : authsize),
+				     assoc, aad_size);
 
 			sg_init_aead(sgout, xoutbuf,
-				    *b_size + (enc ? authsize : 0));
-
-			sg_set_buf(&sg[0], assoc, aad_size);
-			sg_set_buf(&sgout[0], assoc, aad_size);
+				     *b_size + (enc ? authsize : 0), assoc,
+				     aad_size);
 
 			aead_request_set_crypt(req, sg, sgout,
 					       *b_size + (enc ? 0 : authsize),
diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c
index 5f62c4f..f3a0dd2 100644
--- a/crypto/twofish_common.c
+++ b/crypto/twofish_common.c
@@ -24,9 +24,8 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  *
  * This code is a "clean room" implementation, written from the paper
  * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c
index ebf7a3e..07e6243 100644
--- a/crypto/twofish_generic.c
+++ b/crypto/twofish_generic.c
@@ -23,9 +23,8 @@
  * GNU General Public License for more details.
  * 
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
- * USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
  *
  * This code is a "clean room" implementation, written from the paper
  * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey,
diff --git a/crypto/xcbc.c b/crypto/xcbc.c
index df90b33..25c75af 100644
--- a/crypto/xcbc.c
+++ b/crypto/xcbc.c
@@ -12,8 +12,7 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  * Author:
  * 	Kazunori Miyazawa <miyazawa@linux-ipv6.org>
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index f6e3e5a..90e4bb24 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -73,26 +73,14 @@
 
 	  If unsure, say Y.
 
-config HW_RANDOM_BCM63XX
-	tristate "Broadcom BCM63xx Random Number Generator support"
-	depends on BCM63XX || BMIPS_GENERIC
-	default HW_RANDOM
-	---help---
-	  This driver provides kernel-side support for the Random Number
-	  Generator hardware found on the Broadcom BCM63xx SoCs.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called bcm63xx-rng
-
-	  If unusure, say Y.
-
 config HW_RANDOM_BCM2835
-	tristate "Broadcom BCM2835 Random Number Generator support"
-	depends on ARCH_BCM2835 || ARCH_BCM_NSP || ARCH_BCM_5301X
+	tristate "Broadcom BCM2835/BCM63xx Random Number Generator support"
+	depends on ARCH_BCM2835 || ARCH_BCM_NSP || ARCH_BCM_5301X || \
+		   ARCH_BCM_63XX || BCM63XX || BMIPS_GENERIC
 	default HW_RANDOM
 	---help---
 	  This driver provides kernel-side support for the Random Number
-	  Generator hardware found on the Broadcom BCM2835 SoCs.
+	  Generator hardware found on the Broadcom BCM2835 and BCM63xx SoCs.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called bcm2835-rng
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index f3728d0..e7146a8 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -9,7 +9,6 @@
 obj-$(CONFIG_HW_RANDOM_INTEL) += intel-rng.o
 obj-$(CONFIG_HW_RANDOM_AMD) += amd-rng.o
 obj-$(CONFIG_HW_RANDOM_ATMEL) += atmel-rng.o
-obj-$(CONFIG_HW_RANDOM_BCM63XX)	+= bcm63xx-rng.o
 obj-$(CONFIG_HW_RANDOM_GEODE) += geode-rng.o
 obj-$(CONFIG_HW_RANDOM_N2RNG) += n2-rng.o
 n2-rng-y := n2-drv.o n2-asm.o
diff --git a/drivers/char/hw_random/bcm2835-rng.c b/drivers/char/hw_random/bcm2835-rng.c
index 574211a..25e5631 100644
--- a/drivers/char/hw_random/bcm2835-rng.c
+++ b/drivers/char/hw_random/bcm2835-rng.c
@@ -15,6 +15,7 @@
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
+#include <linux/clk.h>
 
 #define RNG_CTRL	0x0
 #define RNG_STATUS	0x4
@@ -29,116 +30,182 @@
 
 #define RNG_INT_OFF	0x1
 
-static void __init nsp_rng_init(void __iomem *base)
-{
-	u32 val;
+struct bcm2835_rng_priv {
+	struct hwrng rng;
+	void __iomem *base;
+	bool mask_interrupts;
+	struct clk *clk;
+};
 
-	/* mask the interrupt */
-	val = readl(base + RNG_INT_MASK);
-	val |= RNG_INT_OFF;
-	writel(val, base + RNG_INT_MASK);
+static inline struct bcm2835_rng_priv *to_rng_priv(struct hwrng *rng)
+{
+	return container_of(rng, struct bcm2835_rng_priv, rng);
+}
+
+static inline u32 rng_readl(struct bcm2835_rng_priv *priv, u32 offset)
+{
+	/* MIPS chips strapped for BE will automagically configure the
+	 * peripheral registers for CPU-native byte order.
+	 */
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		return __raw_readl(priv->base + offset);
+	else
+		return readl(priv->base + offset);
+}
+
+static inline void rng_writel(struct bcm2835_rng_priv *priv, u32 val,
+			      u32 offset)
+{
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+		__raw_writel(val, priv->base + offset);
+	else
+		writel(val, priv->base + offset);
 }
 
 static int bcm2835_rng_read(struct hwrng *rng, void *buf, size_t max,
 			       bool wait)
 {
-	void __iomem *rng_base = (void __iomem *)rng->priv;
+	struct bcm2835_rng_priv *priv = to_rng_priv(rng);
 	u32 max_words = max / sizeof(u32);
 	u32 num_words, count;
 
-	while ((__raw_readl(rng_base + RNG_STATUS) >> 24) == 0) {
+	while ((rng_readl(priv, RNG_STATUS) >> 24) == 0) {
 		if (!wait)
 			return 0;
 		cpu_relax();
 	}
 
-	num_words = readl(rng_base + RNG_STATUS) >> 24;
+	num_words = rng_readl(priv, RNG_STATUS) >> 24;
 	if (num_words > max_words)
 		num_words = max_words;
 
 	for (count = 0; count < num_words; count++)
-		((u32 *)buf)[count] = readl(rng_base + RNG_DATA);
+		((u32 *)buf)[count] = rng_readl(priv, RNG_DATA);
 
 	return num_words * sizeof(u32);
 }
 
-static struct hwrng bcm2835_rng_ops = {
-	.name	= "bcm2835",
-	.read	= bcm2835_rng_read,
+static int bcm2835_rng_init(struct hwrng *rng)
+{
+	struct bcm2835_rng_priv *priv = to_rng_priv(rng);
+	int ret = 0;
+	u32 val;
+
+	if (!IS_ERR(priv->clk)) {
+		ret = clk_prepare_enable(priv->clk);
+		if (ret)
+			return ret;
+	}
+
+	if (priv->mask_interrupts) {
+		/* mask the interrupt */
+		val = rng_readl(priv, RNG_INT_MASK);
+		val |= RNG_INT_OFF;
+		rng_writel(priv, val, RNG_INT_MASK);
+	}
+
+	/* set warm-up count & enable */
+	rng_writel(priv, RNG_WARMUP_COUNT, RNG_STATUS);
+	rng_writel(priv, RNG_RBGEN, RNG_CTRL);
+
+	return ret;
+}
+
+static void bcm2835_rng_cleanup(struct hwrng *rng)
+{
+	struct bcm2835_rng_priv *priv = to_rng_priv(rng);
+
+	/* disable rng hardware */
+	rng_writel(priv, 0, RNG_CTRL);
+
+	if (!IS_ERR(priv->clk))
+		clk_disable_unprepare(priv->clk);
+}
+
+struct bcm2835_rng_of_data {
+	bool mask_interrupts;
+};
+
+static const struct bcm2835_rng_of_data nsp_rng_of_data = {
+	.mask_interrupts = true,
 };
 
 static const struct of_device_id bcm2835_rng_of_match[] = {
 	{ .compatible = "brcm,bcm2835-rng"},
-	{ .compatible = "brcm,bcm-nsp-rng", .data = nsp_rng_init},
-	{ .compatible = "brcm,bcm5301x-rng", .data = nsp_rng_init},
+	{ .compatible = "brcm,bcm-nsp-rng", .data = &nsp_rng_of_data },
+	{ .compatible = "brcm,bcm5301x-rng", .data = &nsp_rng_of_data },
+	{ .compatible = "brcm,bcm6368-rng"},
 	{},
 };
 
 static int bcm2835_rng_probe(struct platform_device *pdev)
 {
+	const struct bcm2835_rng_of_data *of_data;
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
-	void (*rng_setup)(void __iomem *base);
 	const struct of_device_id *rng_id;
-	void __iomem *rng_base;
+	struct bcm2835_rng_priv *priv;
+	struct resource *r;
 	int err;
 
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
 	/* map peripheral */
-	rng_base = of_iomap(np, 0);
-	if (!rng_base) {
+	priv->base = devm_ioremap_resource(dev, r);
+	if (IS_ERR(priv->base)) {
 		dev_err(dev, "failed to remap rng regs");
-		return -ENODEV;
+		return PTR_ERR(priv->base);
 	}
-	bcm2835_rng_ops.priv = (unsigned long)rng_base;
+
+	/* Clock is optional on most platforms */
+	priv->clk = devm_clk_get(dev, NULL);
+
+	priv->rng.name = pdev->name;
+	priv->rng.init = bcm2835_rng_init;
+	priv->rng.read = bcm2835_rng_read;
+	priv->rng.cleanup = bcm2835_rng_cleanup;
 
 	rng_id = of_match_node(bcm2835_rng_of_match, np);
-	if (!rng_id) {
-		iounmap(rng_base);
+	if (!rng_id)
 		return -EINVAL;
-	}
-	/* Check for rng init function, execute it */
-	rng_setup = rng_id->data;
-	if (rng_setup)
-		rng_setup(rng_base);
 
-	/* set warm-up count & enable */
-	__raw_writel(RNG_WARMUP_COUNT, rng_base + RNG_STATUS);
-	__raw_writel(RNG_RBGEN, rng_base + RNG_CTRL);
+	/* Check for rng init function, execute it */
+	of_data = rng_id->data;
+	if (of_data)
+		priv->mask_interrupts = of_data->mask_interrupts;
 
 	/* register driver */
-	err = hwrng_register(&bcm2835_rng_ops);
-	if (err) {
+	err = devm_hwrng_register(dev, &priv->rng);
+	if (err)
 		dev_err(dev, "hwrng registration failed\n");
-		iounmap(rng_base);
-	} else
+	else
 		dev_info(dev, "hwrng registered\n");
 
 	return err;
 }
 
-static int bcm2835_rng_remove(struct platform_device *pdev)
-{
-	void __iomem *rng_base = (void __iomem *)bcm2835_rng_ops.priv;
-
-	/* disable rng hardware */
-	__raw_writel(0, rng_base + RNG_CTRL);
-
-	/* unregister driver */
-	hwrng_unregister(&bcm2835_rng_ops);
-	iounmap(rng_base);
-
-	return 0;
-}
-
 MODULE_DEVICE_TABLE(of, bcm2835_rng_of_match);
 
+static struct platform_device_id bcm2835_rng_devtype[] = {
+	{ .name = "bcm2835-rng" },
+	{ .name = "bcm63xx-rng" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, bcm2835_rng_devtype);
+
 static struct platform_driver bcm2835_rng_driver = {
 	.driver = {
 		.name = "bcm2835-rng",
 		.of_match_table = bcm2835_rng_of_match,
 	},
 	.probe		= bcm2835_rng_probe,
-	.remove		= bcm2835_rng_remove,
+	.id_table	= bcm2835_rng_devtype,
 };
 module_platform_driver(bcm2835_rng_driver);
 
diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c
deleted file mode 100644
index 5132c9c..0000000
--- a/drivers/char/hw_random/bcm63xx-rng.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Broadcom BCM63xx Random Number Generator support
- *
- * Copyright (C) 2011, Florian Fainelli <florian@openwrt.org>
- * Copyright (C) 2009, Broadcom Corporation
- *
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-#include <linux/hw_random.h>
-#include <linux/of.h>
-
-#define RNG_CTRL			0x00
-#define RNG_EN				(1 << 0)
-
-#define RNG_STAT			0x04
-#define RNG_AVAIL_MASK			(0xff000000)
-
-#define RNG_DATA			0x08
-#define RNG_THRES			0x0c
-#define RNG_MASK			0x10
-
-struct bcm63xx_rng_priv {
-	struct hwrng rng;
-	struct clk *clk;
-	void __iomem *regs;
-};
-
-#define to_rng_priv(rng)	container_of(rng, struct bcm63xx_rng_priv, rng)
-
-static int bcm63xx_rng_init(struct hwrng *rng)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-	u32 val;
-	int error;
-
-	error = clk_prepare_enable(priv->clk);
-	if (error)
-		return error;
-
-	val = __raw_readl(priv->regs + RNG_CTRL);
-	val |= RNG_EN;
-	__raw_writel(val, priv->regs + RNG_CTRL);
-
-	return 0;
-}
-
-static void bcm63xx_rng_cleanup(struct hwrng *rng)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-	u32 val;
-
-	val = __raw_readl(priv->regs + RNG_CTRL);
-	val &= ~RNG_EN;
-	__raw_writel(val, priv->regs + RNG_CTRL);
-
-	clk_disable_unprepare(priv->clk);
-}
-
-static int bcm63xx_rng_data_present(struct hwrng *rng, int wait)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-
-	return __raw_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK;
-}
-
-static int bcm63xx_rng_data_read(struct hwrng *rng, u32 *data)
-{
-	struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-
-	*data = __raw_readl(priv->regs + RNG_DATA);
-
-	return 4;
-}
-
-static int bcm63xx_rng_probe(struct platform_device *pdev)
-{
-	struct resource *r;
-	int ret;
-	struct bcm63xx_rng_priv *priv;
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!r) {
-		dev_err(&pdev->dev, "no iomem resource\n");
-		return -ENXIO;
-	}
-
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	priv->rng.name = pdev->name;
-	priv->rng.init = bcm63xx_rng_init;
-	priv->rng.cleanup = bcm63xx_rng_cleanup;
-	priv->rng.data_present = bcm63xx_rng_data_present;
-	priv->rng.data_read = bcm63xx_rng_data_read;
-
-	priv->clk = devm_clk_get(&pdev->dev, "ipsec");
-	if (IS_ERR(priv->clk)) {
-		ret = PTR_ERR(priv->clk);
-		dev_err(&pdev->dev, "no clock for device: %d\n", ret);
-		return ret;
-	}
-
-	if (!devm_request_mem_region(&pdev->dev, r->start,
-					resource_size(r), pdev->name)) {
-		dev_err(&pdev->dev, "request mem failed");
-		return -EBUSY;
-	}
-
-	priv->regs = devm_ioremap_nocache(&pdev->dev, r->start,
-					resource_size(r));
-	if (!priv->regs) {
-		dev_err(&pdev->dev, "ioremap failed");
-		return -ENOMEM;
-	}
-
-	ret = devm_hwrng_register(&pdev->dev, &priv->rng);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to register rng device: %d\n",
-			ret);
-		return ret;
-	}
-
-	dev_info(&pdev->dev, "registered RNG driver\n");
-
-	return 0;
-}
-
-#ifdef CONFIG_OF
-static const struct of_device_id bcm63xx_rng_of_match[] = {
-	{ .compatible = "brcm,bcm6368-rng", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, bcm63xx_rng_of_match);
-#endif
-
-static struct platform_driver bcm63xx_rng_driver = {
-	.probe		= bcm63xx_rng_probe,
-	.driver		= {
-		.name	= "bcm63xx-rng",
-		.of_match_table = of_match_ptr(bcm63xx_rng_of_match),
-	},
-};
-
-module_platform_driver(bcm63xx_rng_driver);
-
-MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
-MODULE_DESCRIPTION("Broadcom BCM63xx RNG driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/char/random.c b/drivers/char/random.c
index ec42c8b..11304bb 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -431,9 +431,9 @@
 static int crng_init_cnt = 0;
 #define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
 static void _extract_crng(struct crng_state *crng,
-			  __u8 out[CHACHA20_BLOCK_SIZE]);
+			  __u32 out[CHACHA20_BLOCK_WORDS]);
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used);
+				    __u32 tmp[CHACHA20_BLOCK_WORDS], int used);
 static void process_random_ready_list(void);
 static void _get_random_bytes(void *buf, int nbytes);
 
@@ -817,7 +817,7 @@
 	unsigned long	flags;
 	int		i, num;
 	union {
-		__u8	block[CHACHA20_BLOCK_SIZE];
+		__u32	block[CHACHA20_BLOCK_WORDS];
 		__u32	key[8];
 	} buf;
 
@@ -851,7 +851,7 @@
 }
 
 static void _extract_crng(struct crng_state *crng,
-			  __u8 out[CHACHA20_BLOCK_SIZE])
+			  __u32 out[CHACHA20_BLOCK_WORDS])
 {
 	unsigned long v, flags;
 
@@ -867,7 +867,7 @@
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
+static void extract_crng(__u32 out[CHACHA20_BLOCK_WORDS])
 {
 	struct crng_state *crng = NULL;
 
@@ -885,7 +885,7 @@
  * enough) to mutate the CRNG key to provide backtracking protection.
  */
 static void _crng_backtrack_protect(struct crng_state *crng,
-				    __u8 tmp[CHACHA20_BLOCK_SIZE], int used)
+				    __u32 tmp[CHACHA20_BLOCK_WORDS], int used)
 {
 	unsigned long	flags;
 	__u32		*s, *d;
@@ -897,14 +897,14 @@
 		used = 0;
 	}
 	spin_lock_irqsave(&crng->lock, flags);
-	s = (__u32 *) &tmp[used];
+	s = &tmp[used / sizeof(__u32)];
 	d = &crng->state[4];
 	for (i=0; i < 8; i++)
 		*d++ ^= *s++;
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
-static void crng_backtrack_protect(__u8 tmp[CHACHA20_BLOCK_SIZE], int used)
+static void crng_backtrack_protect(__u32 tmp[CHACHA20_BLOCK_WORDS], int used)
 {
 	struct crng_state *crng = NULL;
 
@@ -920,7 +920,7 @@
 static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
 {
 	ssize_t ret = 0, i = CHACHA20_BLOCK_SIZE;
-	__u8 tmp[CHACHA20_BLOCK_SIZE];
+	__u32 tmp[CHACHA20_BLOCK_WORDS];
 	int large_request = (nbytes > 256);
 
 	while (nbytes) {
@@ -1507,7 +1507,7 @@
  */
 static void _get_random_bytes(void *buf, int nbytes)
 {
-	__u8 tmp[CHACHA20_BLOCK_SIZE];
+	__u32 tmp[CHACHA20_BLOCK_WORDS];
 
 	trace_get_random_bytes(nbytes, _RET_IP_);
 
@@ -2114,7 +2114,7 @@
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) {
-		extract_crng((u8 *)batch->entropy_u64);
+		extract_crng((__u32 *)batch->entropy_u64);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u64[batch->position++];
@@ -2144,7 +2144,7 @@
 	if (use_lock)
 		read_lock_irqsave(&batched_entropy_reset_lock, flags);
 	if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) {
-		extract_crng((u8 *)batch->entropy_u32);
+		extract_crng(batch->entropy_u32);
 		batch->position = 0;
 	}
 	ret = batch->entropy_u32[batch->position++];
diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c
index 4562784..22df6b5 100644
--- a/drivers/crypto/axis/artpec6_crypto.c
+++ b/drivers/crypto/axis/artpec6_crypto.c
@@ -22,6 +22,7 @@
 #include <linux/slab.h>
 
 #include <crypto/aes.h>
+#include <crypto/gcm.h>
 #include <crypto/internal/aead.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
@@ -1934,7 +1935,7 @@
 
 	memcpy(req_ctx->hw_ctx.J0, areq->iv, crypto_aead_ivsize(cipher));
 	// The HW omits the initial increment of the counter field.
-	crypto_inc(req_ctx->hw_ctx.J0+12, 4);
+	memcpy(req_ctx->hw_ctx.J0 + GCM_AES_IV_SIZE, "\x00\x00\x00\x01", 4);
 
 	ret = artpec6_crypto_setup_out_descr(common, &req_ctx->hw_ctx,
 		sizeof(struct artpec6_crypto_aead_hw_ctx), false, false);
@@ -2956,7 +2957,7 @@
 		.setkey = artpec6_crypto_aead_set_key,
 		.encrypt = artpec6_crypto_aead_encrypt,
 		.decrypt = artpec6_crypto_aead_decrypt,
-		.ivsize = AES_BLOCK_SIZE,
+		.ivsize = GCM_AES_IV_SIZE,
 		.maxauthsize = AES_BLOCK_SIZE,
 
 		.base = {
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index ce70b44..2b75f95 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -42,7 +42,6 @@
 #include <crypto/authenc.h>
 #include <crypto/skcipher.h>
 #include <crypto/hash.h>
-#include <crypto/aes.h>
 #include <crypto/sha3.h>
 
 #include "util.h"
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index f9f08fc..ad14b69 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -668,7 +668,7 @@
 	qm_sg_ents = 1 + !!ivsize + mapped_src_nents +
 		     (mapped_dst_nents > 1 ? mapped_dst_nents : 0);
 	if (unlikely(qm_sg_ents > CAAM_QI_MAX_AEAD_SG)) {
-		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+		dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
 			qm_sg_ents, CAAM_QI_MAX_AEAD_SG);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
 			   iv_dma, ivsize, op_type, 0, 0);
@@ -905,7 +905,7 @@
 
 	qm_sg_ents += mapped_dst_nents > 1 ? mapped_dst_nents : 0;
 	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
-		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+		dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
 			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
 			   iv_dma, ivsize, op_type, 0, 0);
@@ -1058,7 +1058,7 @@
 	}
 
 	if (unlikely(qm_sg_ents > CAAM_QI_MAX_ABLKCIPHER_SG)) {
-		dev_err(qidev, "Insufficient S/G entries: %d > %lu\n",
+		dev_err(qidev, "Insufficient S/G entries: %d > %zu\n",
 			qm_sg_ents, CAAM_QI_MAX_ABLKCIPHER_SG);
 		caam_unmap(qidev, req->src, req->dst, src_nents, dst_nents,
 			   iv_dma, ivsize, GIVENCRYPT, 0, 0);
diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
index 169e662..b0ba433 100644
--- a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
+++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
@@ -459,7 +459,8 @@
 	info->completion_addr = kzalloc(sizeof(union cpt_res_s), GFP_KERNEL);
 	if (unlikely(!info->completion_addr)) {
 		dev_err(&pdev->dev, "Unable to allocate memory for completion_addr\n");
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto request_cleanup;
 	}
 
 	result = (union cpt_res_s *)info->completion_addr;
diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
index 4addc23..deaefd5 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
@@ -6,7 +6,6 @@
 #include "nitrox_dev.h"
 #include "nitrox_req.h"
 #include "nitrox_csr.h"
-#include "nitrox_req.h"
 
 /* SLC_STORE_INFO */
 #define MIN_UDD_LEN 16
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
index ff02b71..ca1f0d7 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
@@ -21,7 +21,6 @@
 #include <crypto/ctr.h>
 #include <crypto/gcm.h>
 #include <crypto/scatterwalk.h>
-#include <linux/delay.h>
 
 #include "ccp-crypto.h"
 
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index b56b3f7..5ae9f87 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -19,3 +19,13 @@
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called chcr.
+
+config CHELSIO_IPSEC_INLINE
+        bool "Chelsio IPSec XFRM Tx crypto offload"
+        depends on CHELSIO_T4
+	depends on CRYPTO_DEV_CHELSIO
+        depends on XFRM_OFFLOAD
+        depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD
+        default n
+        ---help---
+          Enable support for IPSec Tx Inline.
diff --git a/drivers/crypto/chelsio/Makefile b/drivers/crypto/chelsio/Makefile
index bebdf06..eaecaf1 100644
--- a/drivers/crypto/chelsio/Makefile
+++ b/drivers/crypto/chelsio/Makefile
@@ -2,3 +2,4 @@
 
 obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chcr.o
 chcr-objs :=  chcr_core.o chcr_algo.o
+chcr-$(CONFIG_CHELSIO_IPSEC_INLINE) += chcr_ipsec.o
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 4eed717..b663b93 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -73,6 +73,29 @@
 
 #define IV AES_BLOCK_SIZE
 
+static unsigned int sgl_ent_len[] = {
+	0, 0, 16, 24, 40, 48, 64, 72, 88,
+	96, 112, 120, 136, 144, 160, 168, 184,
+	192, 208, 216, 232, 240, 256, 264, 280,
+	288, 304, 312, 328, 336, 352, 360, 376
+};
+
+static unsigned int dsgl_ent_len[] = {
+	0, 32, 32, 48, 48, 64, 64, 80, 80,
+	112, 112, 128, 128, 144, 144, 160, 160,
+	192, 192, 208, 208, 224, 224, 240, 240,
+	272, 272, 288, 288, 304, 304, 320, 320
+};
+
+static u32 round_constant[11] = {
+	0x01000000, 0x02000000, 0x04000000, 0x08000000,
+	0x10000000, 0x20000000, 0x40000000, 0x80000000,
+	0x1B000000, 0x36000000, 0x6C000000
+};
+
+static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
+				   unsigned char *input, int err);
+
 static inline  struct chcr_aead_ctx *AEAD_CTX(struct chcr_context *ctx)
 {
 	return ctx->crypto_ctx->aeadctx;
@@ -108,18 +131,6 @@
 	return (skb->len <= SGE_MAX_WR_LEN);
 }
 
-/*
- *	sgl_len - calculates the size of an SGL of the given capacity
- *	@n: the number of SGL entries
- *	Calculates the number of flits needed for a scatter/gather list that
- *	can hold the given number of entries.
- */
-static inline unsigned int sgl_len(unsigned int n)
-{
-	n--;
-	return (3 * n) / 2 + (n & 1) + 2;
-}
-
 static int sg_nents_xlen(struct scatterlist *sg, unsigned int reqlen,
 			 unsigned int entlen,
 			 unsigned int skip)
@@ -160,7 +171,6 @@
 
 	if (input == NULL)
 		goto out;
-	reqctx = ahash_request_ctx(req);
 	digestsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req));
 	if (reqctx->is_sg_map)
 		chcr_hash_dma_unmap(&u_ctx->lldi.pdev->dev, req);
@@ -183,30 +193,17 @@
 	}
 out:
 	req->base.complete(&req->base, err);
+}
 
-	}
-
-static inline void chcr_handle_aead_resp(struct aead_request *req,
-					 unsigned char *input,
-					 int err)
+static inline int get_aead_subtype(struct crypto_aead *aead)
 {
-	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct uld_ctx *u_ctx = ULD_CTX(a_ctx(tfm));
-
-
-	chcr_aead_dma_unmap(&u_ctx->lldi.pdev->dev, req, reqctx->op);
-	if (reqctx->b0_dma)
-		dma_unmap_single(&u_ctx->lldi.pdev->dev, reqctx->b0_dma,
-				 reqctx->b0_len, DMA_BIDIRECTIONAL);
-	if (reqctx->verify == VERIFY_SW) {
-		chcr_verify_tag(req, input, &err);
-		reqctx->verify = VERIFY_HW;
+	struct aead_alg *alg = crypto_aead_alg(aead);
+	struct chcr_alg_template *chcr_crypto_alg =
+		container_of(alg, struct chcr_alg_template, alg.aead);
+	return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
 }
-	req->base.complete(&req->base, err);
 
-}
-static void chcr_verify_tag(struct aead_request *req, u8 *input, int *err)
+void chcr_verify_tag(struct aead_request *req, u8 *input, int *err)
 {
 	u8 temp[SHA512_DIGEST_SIZE];
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
@@ -231,6 +228,25 @@
 		*err = 0;
 }
 
+static inline void chcr_handle_aead_resp(struct aead_request *req,
+					 unsigned char *input,
+					 int err)
+{
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct uld_ctx *u_ctx = ULD_CTX(a_ctx(tfm));
+
+	chcr_aead_dma_unmap(&u_ctx->lldi.pdev->dev, req, reqctx->op);
+	if (reqctx->b0_dma)
+		dma_unmap_single(&u_ctx->lldi.pdev->dev, reqctx->b0_dma,
+				 reqctx->b0_len, DMA_BIDIRECTIONAL);
+	if (reqctx->verify == VERIFY_SW) {
+		chcr_verify_tag(req, input, &err);
+		reqctx->verify = VERIFY_HW;
+	}
+	req->base.complete(&req->base, err);
+}
+
 /*
  *	chcr_handle_resp - Unmap the DMA buffers associated with the request
  *	@req: crypto request
@@ -595,14 +611,6 @@
 	}
 }
 
-static inline int get_aead_subtype(struct crypto_aead *aead)
-{
-	struct aead_alg *alg = crypto_aead_alg(aead);
-	struct chcr_alg_template *chcr_crypto_alg =
-		container_of(alg, struct chcr_alg_template, alg.aead);
-	return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
-}
-
 static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *alg = tfm->__crt_alg;
@@ -1101,7 +1109,6 @@
 
 }
 
-
 static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
 				   unsigned char *input, int err)
 {
@@ -2014,11 +2021,8 @@
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
 	int error = -EINVAL;
-	unsigned int dst_size;
 	unsigned int authsize = crypto_aead_authsize(tfm);
 
-	dst_size = req->assoclen + req->cryptlen + (op_type ?
-					-authsize : authsize);
 	/* validate key size */
 	if (aeadctx->enckey_len == 0)
 		goto err;
@@ -2202,9 +2206,9 @@
 	return ERR_PTR(error);
 }
 
-static int chcr_aead_dma_map(struct device *dev,
-			     struct aead_request *req,
-			     unsigned short op_type)
+int chcr_aead_dma_map(struct device *dev,
+		      struct aead_request *req,
+		      unsigned short op_type)
 {
 	int error;
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
@@ -2246,9 +2250,9 @@
 	return -ENOMEM;
 }
 
-static void chcr_aead_dma_unmap(struct device *dev,
-			     struct aead_request *req,
-			     unsigned short op_type)
+void chcr_aead_dma_unmap(struct device *dev,
+			 struct aead_request *req,
+			 unsigned short op_type)
 {
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
@@ -2273,10 +2277,10 @@
 	}
 }
 
-static inline void chcr_add_aead_src_ent(struct aead_request *req,
-			       struct ulptx_sgl *ulptx,
-			       unsigned int assoclen,
-			       unsigned short op_type)
+void chcr_add_aead_src_ent(struct aead_request *req,
+			   struct ulptx_sgl *ulptx,
+			   unsigned int assoclen,
+			   unsigned short op_type)
 {
 	struct ulptx_walk ulp_walk;
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
@@ -2308,11 +2312,11 @@
 	}
 }
 
-static inline void chcr_add_aead_dst_ent(struct aead_request *req,
-			       struct cpl_rx_phys_dsgl *phys_cpl,
-			       unsigned int assoclen,
-			       unsigned short op_type,
-			       unsigned short qid)
+void chcr_add_aead_dst_ent(struct aead_request *req,
+			   struct cpl_rx_phys_dsgl *phys_cpl,
+			   unsigned int assoclen,
+			   unsigned short op_type,
+			   unsigned short qid)
 {
 	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
@@ -2330,9 +2334,9 @@
 	dsgl_walk_end(&dsgl_walk, qid);
 }
 
-static inline void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
-					   struct ulptx_sgl *ulptx,
-					   struct  cipher_wr_param *wrparam)
+void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
+			     struct ulptx_sgl *ulptx,
+			     struct  cipher_wr_param *wrparam)
 {
 	struct ulptx_walk ulp_walk;
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
@@ -2355,10 +2359,10 @@
 	}
 }
 
-static inline void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
-					   struct cpl_rx_phys_dsgl *phys_cpl,
-					   struct  cipher_wr_param *wrparam,
-					   unsigned short qid)
+void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
+			     struct cpl_rx_phys_dsgl *phys_cpl,
+			     struct  cipher_wr_param *wrparam,
+			     unsigned short qid)
 {
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
 	struct dsgl_walk dsgl_walk;
@@ -2373,9 +2377,9 @@
 	dsgl_walk_end(&dsgl_walk, qid);
 }
 
-static inline void chcr_add_hash_src_ent(struct ahash_request *req,
-					   struct ulptx_sgl *ulptx,
-					   struct hash_wr_param *param)
+void chcr_add_hash_src_ent(struct ahash_request *req,
+			   struct ulptx_sgl *ulptx,
+			   struct hash_wr_param *param)
 {
 	struct ulptx_walk ulp_walk;
 	struct chcr_ahash_req_ctx *reqctx = ahash_request_ctx(req);
@@ -2402,9 +2406,8 @@
 	}
 }
 
-
-static inline int chcr_hash_dma_map(struct device *dev,
-			     struct ahash_request *req)
+int chcr_hash_dma_map(struct device *dev,
+		      struct ahash_request *req)
 {
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
 	int error = 0;
@@ -2414,13 +2417,13 @@
 	error = dma_map_sg(dev, req->src, sg_nents(req->src),
 			   DMA_TO_DEVICE);
 	if (!error)
-		return error;
+		return -ENOMEM;
 	req_ctx->is_sg_map = 1;
 	return 0;
 }
 
-static inline void chcr_hash_dma_unmap(struct device *dev,
-			     struct ahash_request *req)
+void chcr_hash_dma_unmap(struct device *dev,
+			 struct ahash_request *req)
 {
 	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
 
@@ -2433,9 +2436,8 @@
 
 }
 
-
-static int chcr_cipher_dma_map(struct device *dev,
-			     struct ablkcipher_request *req)
+int chcr_cipher_dma_map(struct device *dev,
+			struct ablkcipher_request *req)
 {
 	int error;
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
@@ -2469,8 +2471,9 @@
 	dma_unmap_single(dev, reqctx->iv_dma, IV, DMA_BIDIRECTIONAL);
 	return -ENOMEM;
 }
-static void chcr_cipher_dma_unmap(struct device *dev,
-				  struct ablkcipher_request *req)
+
+void chcr_cipher_dma_unmap(struct device *dev,
+			   struct ablkcipher_request *req)
 {
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
 
@@ -3375,6 +3378,40 @@
 	aeadctx->enckey_len = 0;
 	return -EINVAL;
 }
+
+static int chcr_aead_op(struct aead_request *req,
+			unsigned short op_type,
+			int size,
+			create_wr_t create_wr_fn)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct uld_ctx *u_ctx;
+	struct sk_buff *skb;
+
+	if (!a_ctx(tfm)->dev) {
+		pr_err("chcr : %s : No crypto device.\n", __func__);
+		return -ENXIO;
+	}
+	u_ctx = ULD_CTX(a_ctx(tfm));
+	if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+				   a_ctx(tfm)->tx_qidx)) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EBUSY;
+	}
+
+	/* Form a WR from req */
+	skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size,
+			   op_type);
+
+	if (IS_ERR(skb) || !skb)
+		return PTR_ERR(skb);
+
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx);
+	chcr_send_wr(skb);
+	return -EINPROGRESS;
+}
+
 static int chcr_aead_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
@@ -3427,38 +3464,6 @@
 	}
 }
 
-static int chcr_aead_op(struct aead_request *req,
-			  unsigned short op_type,
-			  int size,
-			  create_wr_t create_wr_fn)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct uld_ctx *u_ctx;
-	struct sk_buff *skb;
-
-	if (!a_ctx(tfm)->dev) {
-		pr_err("chcr : %s : No crypto device.\n", __func__);
-		return -ENXIO;
-	}
-	u_ctx = ULD_CTX(a_ctx(tfm));
-	if (cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
-				   a_ctx(tfm)->tx_qidx)) {
-		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
-			return -EBUSY;
-	}
-
-	/* Form a WR from req */
-	skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[a_ctx(tfm)->rx_qidx], size,
-			   op_type);
-
-	if (IS_ERR(skb) || !skb)
-		return PTR_ERR(skb);
-
-	skb->dev = u_ctx->lldi.ports[0];
-	set_wr_txq(skb, CPL_PRIORITY_DATA, a_ctx(tfm)->tx_qidx);
-	chcr_send_wr(skb);
-	return -EINPROGRESS;
-}
 static struct chcr_alg_template driver_algs[] = {
 	/* AES-CBC */
 	{
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
index 96c9335..d1673a5 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -226,15 +226,6 @@
 #define SPACE_LEFT(len) \
 	((SGE_MAX_WR_LEN - WR_MIN_LEN - (len)))
 
-unsigned int sgl_ent_len[] = {0, 0, 16, 24, 40, 48, 64, 72, 88,
-				96, 112, 120, 136, 144, 160, 168, 184,
-				192, 208, 216, 232, 240, 256, 264, 280,
-				288, 304, 312, 328, 336, 352, 360, 376};
-unsigned int dsgl_ent_len[] = {0, 32, 32, 48, 48, 64, 64, 80, 80,
-				112, 112, 128, 128, 144, 144, 160, 160,
-				192, 192, 208, 208, 224, 224, 240, 240,
-				272, 272, 288, 288, 304, 304, 320, 320};
-
 struct algo_param {
 	unsigned int auth_mode;
 	unsigned int mk_size;
@@ -404,10 +395,4 @@
 	return *(u32 *)(&bytes[0]);
 }
 
-static u32 round_constant[11] = {
-	0x01000000, 0x02000000, 0x04000000, 0x08000000,
-	0x10000000, 0x20000000, 0x40000000, 0x80000000,
-	0x1B000000, 0x36000000, 0x6C000000
-};
-
 #endif /* __CHCR_ALGO_H__ */
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index f5a2624..04f277c 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -48,6 +48,9 @@
 	.add = chcr_uld_add,
 	.state_change = chcr_uld_state_change,
 	.rx_handler = chcr_uld_rx_handler,
+#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+	.tx_handler = chcr_uld_tx_handler,
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
 };
 
 struct uld_ctx *assign_chcr_device(void)
@@ -164,6 +167,10 @@
 		goto out;
 	}
 	u_ctx->lldi = *lld;
+#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+	if (lld->crypto & ULP_CRYPTO_IPSEC_INLINE)
+		chcr_add_xfrmops(lld);
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
 out:
 	return u_ctx;
 }
@@ -187,6 +194,13 @@
 	return 0;
 }
 
+#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+int chcr_uld_tx_handler(struct sk_buff *skb, struct net_device *dev)
+{
+	return chcr_ipsec_xmit(skb, dev);
+}
+#endif /* CONFIG_CHELSIO_IPSEC_INLINE */
+
 static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
 {
 	struct uld_ctx *u_ctx = handle;
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index 94e7412..3c29ee0 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -39,6 +39,7 @@
 #include <crypto/algapi.h>
 #include "t4_hw.h"
 #include "cxgb4.h"
+#include "t4_msg.h"
 #include "cxgb4_uld.h"
 
 #define DRV_MODULE_NAME "chcr"
@@ -89,12 +90,49 @@
 	struct chcr_dev *dev;
 };
 
+struct chcr_ipsec_req {
+	struct ulp_txpkt ulptx;
+	struct ulptx_idata sc_imm;
+	struct cpl_tx_sec_pdu sec_cpl;
+	struct _key_ctx key_ctx;
+};
+
+struct chcr_ipsec_wr {
+	struct fw_ulptx_wr wreq;
+	struct chcr_ipsec_req req;
+};
+
+struct ipsec_sa_entry {
+	int hmac_ctrl;
+	unsigned int enckey_len;
+	unsigned int kctx_len;
+	unsigned int authsize;
+	__be32 key_ctx_hdr;
+	char salt[MAX_SALT];
+	char key[2 * AES_MAX_KEY_SIZE];
+};
+
+/*
+ *      sgl_len - calculates the size of an SGL of the given capacity
+ *      @n: the number of SGL entries
+ *      Calculates the number of flits needed for a scatter/gather list that
+ *      can hold the given number of entries.
+ */
+static inline unsigned int sgl_len(unsigned int n)
+{
+	n--;
+	return (3 * n) / 2 + (n & 1) + 2;
+}
+
 struct uld_ctx *assign_chcr_device(void);
 int chcr_send_wr(struct sk_buff *skb);
 int start_crypto(void);
 int stop_crypto(void);
 int chcr_uld_rx_handler(void *handle, const __be64 *rsp,
 			const struct pkt_gl *pgl);
+int chcr_uld_tx_handler(struct sk_buff *skb, struct net_device *dev);
 int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 		     int err);
+int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev);
+void chcr_add_xfrmops(const struct cxgb4_lld_info *lld);
 #endif /* __CHCR_CORE_H__ */
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 94a87e3..ea2c578 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -210,8 +210,6 @@
 	struct phys_sge_pairs *to;
 };
 
-
-
 struct chcr_gcm_ctx {
 	u8 ghash_h[AEAD_H_SIZE];
 };
@@ -227,8 +225,6 @@
 	struct chcr_authenc_ctx authenc[0];
 };
 
-
-
 struct chcr_aead_ctx {
 	__be32 key_ctx_hdr;
 	unsigned int enckey_len;
@@ -240,8 +236,6 @@
 	struct	__aead_ctx ctx[0];
 };
 
-
-
 struct hmac_ctx {
 	struct crypto_shash *base_hash;
 	u8 ipad[CHCR_HASH_MAX_BLOCK_SIZE_128];
@@ -307,44 +301,29 @@
 				       int size,
 				       unsigned short op_type);
 
-static int chcr_aead_op(struct aead_request *req_base,
-			  unsigned short op_type,
-			  int size,
-			  create_wr_t create_wr_fn);
-static inline int get_aead_subtype(struct crypto_aead *aead);
-static int chcr_handle_cipher_resp(struct ablkcipher_request *req,
-				   unsigned char *input, int err);
-static void chcr_verify_tag(struct aead_request *req, u8 *input, int *err);
-static int chcr_aead_dma_map(struct device *dev, struct aead_request *req,
-			     unsigned short op_type);
-static void chcr_aead_dma_unmap(struct device *dev, struct aead_request
-				*req, unsigned short op_type);
-static inline void chcr_add_aead_dst_ent(struct aead_request *req,
-				    struct cpl_rx_phys_dsgl *phys_cpl,
-				    unsigned int assoclen,
-				    unsigned short op_type,
-				    unsigned short qid);
-static inline void chcr_add_aead_src_ent(struct aead_request *req,
-				    struct ulptx_sgl *ulptx,
-				    unsigned int assoclen,
-				    unsigned short op_type);
-static inline void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
-					   struct ulptx_sgl *ulptx,
-					   struct  cipher_wr_param *wrparam);
-static int chcr_cipher_dma_map(struct device *dev,
-			       struct ablkcipher_request *req);
-static void chcr_cipher_dma_unmap(struct device *dev,
-				  struct ablkcipher_request *req);
-static inline void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
-					   struct cpl_rx_phys_dsgl *phys_cpl,
-					   struct  cipher_wr_param *wrparam,
-					   unsigned short qid);
+void chcr_verify_tag(struct aead_request *req, u8 *input, int *err);
+int chcr_aead_dma_map(struct device *dev, struct aead_request *req,
+		      unsigned short op_type);
+void chcr_aead_dma_unmap(struct device *dev, struct aead_request *req,
+			 unsigned short op_type);
+void chcr_add_aead_dst_ent(struct aead_request *req,
+			   struct cpl_rx_phys_dsgl *phys_cpl,
+			   unsigned int assoclen, unsigned short op_type,
+			   unsigned short qid);
+void chcr_add_aead_src_ent(struct aead_request *req, struct ulptx_sgl *ulptx,
+			   unsigned int assoclen, unsigned short op_type);
+void chcr_add_cipher_src_ent(struct ablkcipher_request *req,
+			     struct ulptx_sgl *ulptx,
+			     struct  cipher_wr_param *wrparam);
+int chcr_cipher_dma_map(struct device *dev, struct ablkcipher_request *req);
+void chcr_cipher_dma_unmap(struct device *dev, struct ablkcipher_request *req);
+void chcr_add_cipher_dst_ent(struct ablkcipher_request *req,
+			     struct cpl_rx_phys_dsgl *phys_cpl,
+			     struct  cipher_wr_param *wrparam,
+			     unsigned short qid);
 int sg_nents_len_skip(struct scatterlist *sg, u64 len, u64 skip);
-static inline void chcr_add_hash_src_ent(struct ahash_request *req,
-					 struct ulptx_sgl *ulptx,
-					 struct hash_wr_param *param);
-static inline int chcr_hash_dma_map(struct device *dev,
-				    struct ahash_request *req);
-static inline void chcr_hash_dma_unmap(struct device *dev,
-				       struct ahash_request *req);
+void chcr_add_hash_src_ent(struct ahash_request *req, struct ulptx_sgl *ulptx,
+			   struct hash_wr_param *param);
+int chcr_hash_dma_map(struct device *dev, struct ahash_request *req);
+void chcr_hash_dma_unmap(struct device *dev, struct ahash_request *req);
 #endif /* __CHCR_CRYPTO_H__ */
diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c
new file mode 100644
index 0000000..db1e241
--- /dev/null
+++ b/drivers/crypto/chelsio/chcr_ipsec.c
@@ -0,0 +1,654 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Written and Maintained by:
+ *	Atul Gupta (atul.gupta@chelsio.com)
+ */
+
+#define pr_fmt(fmt) "chcr:" fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/highmem.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/netdevice.h>
+#include <net/esp.h>
+#include <net/xfrm.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <crypto/authenc.h>
+#include <crypto/internal/aead.h>
+#include <crypto/null.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/aead.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/hash.h>
+
+#include "chcr_core.h"
+#include "chcr_algo.h"
+#include "chcr_crypto.h"
+
+/*
+ * Max Tx descriptor space we allow for an Ethernet packet to be inlined
+ * into a WR.
+ */
+#define MAX_IMM_TX_PKT_LEN 256
+#define GCM_ESP_IV_SIZE     8
+
+static int chcr_xfrm_add_state(struct xfrm_state *x);
+static void chcr_xfrm_del_state(struct xfrm_state *x);
+static void chcr_xfrm_free_state(struct xfrm_state *x);
+static bool chcr_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
+
+static const struct xfrmdev_ops chcr_xfrmdev_ops = {
+	.xdo_dev_state_add      = chcr_xfrm_add_state,
+	.xdo_dev_state_delete   = chcr_xfrm_del_state,
+	.xdo_dev_state_free     = chcr_xfrm_free_state,
+	.xdo_dev_offload_ok     = chcr_ipsec_offload_ok,
+};
+
+/* Add offload xfrms to Chelsio Interface */
+void chcr_add_xfrmops(const struct cxgb4_lld_info *lld)
+{
+	struct net_device *netdev = NULL;
+	int i;
+
+	for (i = 0; i < lld->nports; i++) {
+		netdev = lld->ports[i];
+		if (!netdev)
+			continue;
+		netdev->xfrmdev_ops = &chcr_xfrmdev_ops;
+		netdev->hw_enc_features |= NETIF_F_HW_ESP;
+		netdev->features |= NETIF_F_HW_ESP;
+		rtnl_lock();
+		netdev_change_features(netdev);
+		rtnl_unlock();
+	}
+}
+
+static inline int chcr_ipsec_setauthsize(struct xfrm_state *x,
+					 struct ipsec_sa_entry *sa_entry)
+{
+	int hmac_ctrl;
+	int authsize = x->aead->alg_icv_len / 8;
+
+	sa_entry->authsize = authsize;
+
+	switch (authsize) {
+	case ICV_8:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_DIV2;
+		break;
+	case ICV_12:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT;
+		break;
+	case ICV_16:
+		hmac_ctrl = CHCR_SCMD_HMAC_CTRL_NO_TRUNC;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return hmac_ctrl;
+}
+
+static inline int chcr_ipsec_setkey(struct xfrm_state *x,
+				    struct ipsec_sa_entry *sa_entry)
+{
+	struct crypto_cipher *cipher;
+	int keylen = (x->aead->alg_key_len + 7) / 8;
+	unsigned char *key = x->aead->alg_key;
+	int ck_size, key_ctx_size = 0;
+	unsigned char ghash_h[AEAD_H_SIZE];
+	int ret = 0;
+
+	if (keylen > 3) {
+		keylen -= 4;  /* nonce/salt is present in the last 4 bytes */
+		memcpy(sa_entry->salt, key + keylen, 4);
+	}
+
+	if (keylen == AES_KEYSIZE_128) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	} else if (keylen == AES_KEYSIZE_192) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	} else if (keylen == AES_KEYSIZE_256) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	} else {
+		pr_err("GCM: Invalid key length %d\n", keylen);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	memcpy(sa_entry->key, key, keylen);
+	sa_entry->enckey_len = keylen;
+	key_ctx_size = sizeof(struct _key_ctx) +
+			      ((DIV_ROUND_UP(keylen, 16)) << 4) +
+			      AEAD_H_SIZE;
+
+	sa_entry->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size,
+						 CHCR_KEYCTX_MAC_KEY_SIZE_128,
+						 0, 0,
+						 key_ctx_size >> 4);
+
+	/* Calculate the H = CIPH(K, 0 repeated 16 times).
+	 * It will go in key context
+	 */
+	cipher = crypto_alloc_cipher("aes-generic", 0, 0);
+	if (IS_ERR(cipher)) {
+		sa_entry->enckey_len = 0;
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = crypto_cipher_setkey(cipher, key, keylen);
+	if (ret) {
+		sa_entry->enckey_len = 0;
+		goto out1;
+	}
+	memset(ghash_h, 0, AEAD_H_SIZE);
+	crypto_cipher_encrypt_one(cipher, ghash_h, ghash_h);
+	memcpy(sa_entry->key + (DIV_ROUND_UP(sa_entry->enckey_len, 16) *
+	       16), ghash_h, AEAD_H_SIZE);
+	sa_entry->kctx_len = ((DIV_ROUND_UP(sa_entry->enckey_len, 16)) << 4) +
+			      AEAD_H_SIZE;
+out1:
+	crypto_free_cipher(cipher);
+out:
+	return ret;
+}
+
+/*
+ * chcr_xfrm_add_state
+ * returns 0 on success, negative error if failed to send message to FPGA
+ * positive error if FPGA returned a bad response
+ */
+static int chcr_xfrm_add_state(struct xfrm_state *x)
+{
+	struct ipsec_sa_entry *sa_entry;
+	int res = 0;
+
+	if (x->props.aalgo != SADB_AALG_NONE) {
+		pr_debug("CHCR: Cannot offload authenticated xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.calgo != SADB_X_CALG_NONE) {
+		pr_debug("CHCR: Cannot offload compressed xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.flags & XFRM_STATE_ESN) {
+		pr_debug("CHCR: Cannot offload ESN xfrm states\n");
+		return -EINVAL;
+	}
+	if (x->props.family != AF_INET &&
+	    x->props.family != AF_INET6) {
+		pr_debug("CHCR: Only IPv4/6 xfrm state offloaded\n");
+		return -EINVAL;
+	}
+	if (x->props.mode != XFRM_MODE_TRANSPORT &&
+	    x->props.mode != XFRM_MODE_TUNNEL) {
+		pr_debug("CHCR: Only transport and tunnel xfrm offload\n");
+		return -EINVAL;
+	}
+	if (x->id.proto != IPPROTO_ESP) {
+		pr_debug("CHCR: Only ESP xfrm state offloaded\n");
+		return -EINVAL;
+	}
+	if (x->encap) {
+		pr_debug("CHCR: Encapsulated xfrm state not offloaded\n");
+		return -EINVAL;
+	}
+	if (!x->aead) {
+		pr_debug("CHCR: Cannot offload xfrm states without aead\n");
+		return -EINVAL;
+	}
+	if (x->aead->alg_icv_len != 128 &&
+	    x->aead->alg_icv_len != 96) {
+		pr_debug("CHCR: Cannot offload xfrm states with AEAD ICV length other than 96b & 128b\n");
+	return -EINVAL;
+	}
+	if ((x->aead->alg_key_len != 128 + 32) &&
+	    (x->aead->alg_key_len != 256 + 32)) {
+		pr_debug("CHCR: Cannot offload xfrm states with AEAD key length other than 128/256 bit\n");
+		return -EINVAL;
+	}
+	if (x->tfcpad) {
+		pr_debug("CHCR: Cannot offload xfrm states with tfc padding\n");
+		return -EINVAL;
+	}
+	if (!x->geniv) {
+		pr_debug("CHCR: Cannot offload xfrm states without geniv\n");
+		return -EINVAL;
+	}
+	if (strcmp(x->geniv, "seqiv")) {
+		pr_debug("CHCR: Cannot offload xfrm states with geniv other than seqiv\n");
+		return -EINVAL;
+	}
+
+	sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
+	if (!sa_entry) {
+		res = -ENOMEM;
+		goto out;
+	}
+
+	sa_entry->hmac_ctrl = chcr_ipsec_setauthsize(x, sa_entry);
+	chcr_ipsec_setkey(x, sa_entry);
+	x->xso.offload_handle = (unsigned long)sa_entry;
+	try_module_get(THIS_MODULE);
+out:
+	return res;
+}
+
+static void chcr_xfrm_del_state(struct xfrm_state *x)
+{
+	/* do nothing */
+	if (!x->xso.offload_handle)
+		return;
+}
+
+static void chcr_xfrm_free_state(struct xfrm_state *x)
+{
+	struct ipsec_sa_entry *sa_entry;
+
+	if (!x->xso.offload_handle)
+		return;
+
+	sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle;
+	kfree(sa_entry);
+	module_put(THIS_MODULE);
+}
+
+static bool chcr_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
+{
+	/* Offload with IP options is not supported yet */
+	if (ip_hdr(skb)->ihl > 5)
+		return false;
+
+	return true;
+}
+
+static inline int is_eth_imm(const struct sk_buff *skb, unsigned int kctx_len)
+{
+	int hdrlen = sizeof(struct chcr_ipsec_req) + kctx_len;
+
+	hdrlen += sizeof(struct cpl_tx_pkt);
+	if (skb->len <= MAX_IMM_TX_PKT_LEN - hdrlen)
+		return hdrlen;
+	return 0;
+}
+
+static inline unsigned int calc_tx_sec_flits(const struct sk_buff *skb,
+					     unsigned int kctx_len)
+{
+	unsigned int flits;
+	int hdrlen = is_eth_imm(skb, kctx_len);
+
+	/* If the skb is small enough, we can pump it out as a work request
+	 * with only immediate data.  In that case we just have to have the
+	 * TX Packet header plus the skb data in the Work Request.
+	 */
+
+	if (hdrlen)
+		return DIV_ROUND_UP(skb->len + hdrlen, sizeof(__be64));
+
+	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1);
+
+	/* Otherwise, we're going to have to construct a Scatter gather list
+	 * of the skb body and fragments.  We also include the flits necessary
+	 * for the TX Packet Work Request and CPL.  We always have a firmware
+	 * Write Header (incorporated as part of the cpl_tx_pkt_lso and
+	 * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
+	 * message or, if we're doing a Large Send Offload, an LSO CPL message
+	 * with an embedded TX Packet Write CPL message.
+	 */
+	flits += (sizeof(struct fw_ulptx_wr) +
+		  sizeof(struct chcr_ipsec_req) +
+		  kctx_len +
+		  sizeof(struct cpl_tx_pkt_core)) / sizeof(__be64);
+	return flits;
+}
+
+inline void *copy_cpltx_pktxt(struct sk_buff *skb,
+				struct net_device *dev,
+				void *pos)
+{
+	struct adapter *adap;
+	struct port_info *pi;
+	struct sge_eth_txq *q;
+	struct cpl_tx_pkt_core *cpl;
+	u64 cntrl = 0;
+	u32 ctrl0, qidx;
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+
+	cpl = (struct cpl_tx_pkt_core *)pos;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
+	ctrl0 = TXPKT_OPCODE_V(CPL_TX_PKT_XT) | TXPKT_INTF_V(pi->tx_chan) |
+			       TXPKT_PF_V(adap->pf);
+	if (skb_vlan_tag_present(skb)) {
+		q->vlan_ins++;
+		cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb));
+	}
+
+	cpl->ctrl0 = htonl(ctrl0);
+	cpl->pack = htons(0);
+	cpl->len = htons(skb->len);
+	cpl->ctrl1 = cpu_to_be64(cntrl);
+
+	pos += sizeof(struct cpl_tx_pkt_core);
+	return pos;
+}
+
+inline void *copy_key_cpltx_pktxt(struct sk_buff *skb,
+				struct net_device *dev,
+				void *pos,
+				struct ipsec_sa_entry *sa_entry)
+{
+	struct adapter *adap;
+	struct port_info *pi;
+	struct sge_eth_txq *q;
+	unsigned int len, qidx;
+	struct _key_ctx *key_ctx;
+	int left, eoq, key_len;
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+	len = sa_entry->enckey_len + sizeof(struct cpl_tx_pkt_core);
+	key_len = sa_entry->kctx_len;
+
+	/* end of queue, reset pos to start of queue */
+	eoq = (void *)q->q.stat - pos;
+	left = eoq;
+	if (!eoq) {
+		pos = q->q.desc;
+		left = 64 * q->q.size;
+	}
+
+	/* Copy the Key context header */
+	key_ctx = (struct _key_ctx *)pos;
+	key_ctx->ctx_hdr = sa_entry->key_ctx_hdr;
+	memcpy(key_ctx->salt, sa_entry->salt, MAX_SALT);
+	pos += sizeof(struct _key_ctx);
+	left -= sizeof(struct _key_ctx);
+
+	if (likely(len <= left)) {
+		memcpy(key_ctx->key, sa_entry->key, key_len);
+		pos += key_len;
+	} else {
+		if (key_len <= left) {
+			memcpy(pos, sa_entry->key, key_len);
+			pos += key_len;
+		} else {
+			memcpy(pos, sa_entry->key, left);
+			memcpy(q->q.desc, sa_entry->key + left,
+			       key_len - left);
+			pos = (u8 *)q->q.desc + (key_len - left);
+		}
+	}
+	/* Copy CPL TX PKT XT */
+	pos = copy_cpltx_pktxt(skb, dev, pos);
+
+	return pos;
+}
+
+inline void *chcr_crypto_wreq(struct sk_buff *skb,
+			       struct net_device *dev,
+			       void *pos,
+			       int credits,
+			       struct ipsec_sa_entry *sa_entry)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	unsigned int immdatalen = 0;
+	unsigned int ivsize = GCM_ESP_IV_SIZE;
+	struct chcr_ipsec_wr *wr;
+	unsigned int flits;
+	u32 wr_mid;
+	int qidx = skb_get_queue_mapping(skb);
+	struct sge_eth_txq *q = &adap->sge.ethtxq[qidx + pi->first_qset];
+	unsigned int kctx_len = sa_entry->kctx_len;
+	int qid = q->q.cntxt_id;
+
+	atomic_inc(&adap->chcr_stats.ipsec_cnt);
+
+	flits = calc_tx_sec_flits(skb, kctx_len);
+
+	if (is_eth_imm(skb, kctx_len))
+		immdatalen = skb->len;
+
+	/* WR Header */
+	wr = (struct chcr_ipsec_wr *)pos;
+	wr->wreq.op_to_compl = htonl(FW_WR_OP_V(FW_ULPTX_WR));
+	wr_mid = FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(flits, 2));
+
+	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
+		netif_tx_stop_queue(q->txq);
+		q->q.stops++;
+		wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F;
+	}
+	wr_mid |= FW_ULPTX_WR_DATA_F;
+	wr->wreq.flowid_len16 = htonl(wr_mid);
+
+	/* ULPTX */
+	wr->req.ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(pi->port_id, qid);
+	wr->req.ulptx.len = htonl(DIV_ROUND_UP(flits, 2)  - 1);
+
+	/* Sub-command */
+	wr->req.sc_imm.cmd_more = FILL_CMD_MORE(immdatalen);
+	wr->req.sc_imm.len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) +
+					 sizeof(wr->req.key_ctx) +
+					 kctx_len +
+					 sizeof(struct cpl_tx_pkt_core) +
+					 immdatalen);
+
+	/* CPL_SEC_PDU */
+	wr->req.sec_cpl.op_ivinsrtofst = htonl(
+				CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) |
+				CPL_TX_SEC_PDU_CPLLEN_V(2) |
+				CPL_TX_SEC_PDU_PLACEHOLDER_V(1) |
+				CPL_TX_SEC_PDU_IVINSRTOFST_V(
+				(skb_transport_offset(skb) +
+				sizeof(struct ip_esp_hdr) + 1)));
+
+	wr->req.sec_cpl.pldlen = htonl(skb->len);
+
+	wr->req.sec_cpl.aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(
+				(skb_transport_offset(skb) + 1),
+				(skb_transport_offset(skb) +
+				 sizeof(struct ip_esp_hdr)),
+				(skb_transport_offset(skb) +
+				 sizeof(struct ip_esp_hdr) +
+				 GCM_ESP_IV_SIZE + 1), 0);
+
+	wr->req.sec_cpl.cipherstop_lo_authinsert =
+		FILL_SEC_CPL_AUTHINSERT(0, skb_transport_offset(skb) +
+					   sizeof(struct ip_esp_hdr) +
+					   GCM_ESP_IV_SIZE + 1,
+					   sa_entry->authsize,
+					   sa_entry->authsize);
+	wr->req.sec_cpl.seqno_numivs =
+		FILL_SEC_CPL_SCMD0_SEQNO(CHCR_ENCRYPT_OP, 1,
+					 CHCR_SCMD_CIPHER_MODE_AES_GCM,
+					 CHCR_SCMD_AUTH_MODE_GHASH,
+					 sa_entry->hmac_ctrl,
+					 ivsize >> 1);
+	wr->req.sec_cpl.ivgen_hdrlen =  FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 1,
+								  0, 0, 0);
+
+	pos += sizeof(struct fw_ulptx_wr) +
+	       sizeof(struct ulp_txpkt) +
+	       sizeof(struct ulptx_idata) +
+	       sizeof(struct cpl_tx_sec_pdu);
+
+	pos = copy_key_cpltx_pktxt(skb, dev, pos, sa_entry);
+
+	return pos;
+}
+
+/**
+ *      flits_to_desc - returns the num of Tx descriptors for the given flits
+ *      @n: the number of flits
+ *
+ *      Returns the number of Tx descriptors needed for the supplied number
+ *      of flits.
+ */
+static inline unsigned int flits_to_desc(unsigned int n)
+{
+	WARN_ON(n > SGE_MAX_WR_LEN / 8);
+	return DIV_ROUND_UP(n, 8);
+}
+
+static inline unsigned int txq_avail(const struct sge_txq *q)
+{
+	return q->size - 1 - q->in_use;
+}
+
+static void eth_txq_stop(struct sge_eth_txq *q)
+{
+	netif_tx_stop_queue(q->txq);
+	q->q.stops++;
+}
+
+static inline void txq_advance(struct sge_txq *q, unsigned int n)
+{
+	q->in_use += n;
+	q->pidx += n;
+	if (q->pidx >= q->size)
+		q->pidx -= q->size;
+}
+
+/*
+ *      chcr_ipsec_xmit called from ULD Tx handler
+ */
+int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct xfrm_state *x = xfrm_input_state(skb);
+	struct ipsec_sa_entry *sa_entry;
+	u64 *pos, *end, *before, *sgl;
+	int qidx, left, credits;
+	unsigned int flits = 0, ndesc, kctx_len;
+	struct adapter *adap;
+	struct sge_eth_txq *q;
+	struct port_info *pi;
+	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+	bool immediate = false;
+
+	if (!x->xso.offload_handle)
+		return NETDEV_TX_BUSY;
+
+	sa_entry = (struct ipsec_sa_entry *)x->xso.offload_handle;
+	kctx_len = sa_entry->kctx_len;
+
+	if (skb->sp->len != 1) {
+out_free:       dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	pi = netdev_priv(dev);
+	adap = pi->adapter;
+	qidx = skb->queue_mapping;
+	q = &adap->sge.ethtxq[qidx + pi->first_qset];
+
+	cxgb4_reclaim_completed_tx(adap, &q->q, true);
+
+	flits = calc_tx_sec_flits(skb, sa_entry->kctx_len);
+	ndesc = flits_to_desc(flits);
+	credits = txq_avail(&q->q) - ndesc;
+
+	if (unlikely(credits < 0)) {
+		eth_txq_stop(q);
+		dev_err(adap->pdev_dev,
+			"%s: Tx ring %u full while queue awake! cred:%d %d %d flits:%d\n",
+			dev->name, qidx, credits, ndesc, txq_avail(&q->q),
+			flits);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (is_eth_imm(skb, kctx_len))
+		immediate = true;
+
+	if (!immediate &&
+	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) {
+		q->mapping_err++;
+		goto out_free;
+	}
+
+	pos = (u64 *)&q->q.desc[q->q.pidx];
+	before = (u64 *)pos;
+	end = (u64 *)pos + flits;
+	/* Setup IPSec CPL */
+	pos = (void *)chcr_crypto_wreq(skb, dev, (void *)pos,
+				       credits, sa_entry);
+	if (before > (u64 *)pos) {
+		left = (u8 *)end - (u8 *)q->q.stat;
+		end = (void *)q->q.desc + left;
+	}
+	if (pos == (u64 *)q->q.stat) {
+		left = (u8 *)end - (u8 *)q->q.stat;
+		end = (void *)q->q.desc + left;
+		pos = (void *)q->q.desc;
+	}
+
+	sgl = (void *)pos;
+	if (immediate) {
+		cxgb4_inline_tx_skb(skb, &q->q, sgl);
+		dev_consume_skb_any(skb);
+	} else {
+		int last_desc;
+
+		cxgb4_write_sgl(skb, &q->q, (void *)sgl, end,
+				0, addr);
+		skb_orphan(skb);
+
+		last_desc = q->q.pidx + ndesc - 1;
+		if (last_desc >= q->q.size)
+			last_desc -= q->q.size;
+		q->q.sdesc[last_desc].skb = skb;
+		q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)sgl;
+	}
+	txq_advance(&q->q, ndesc);
+
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
+	return NETDEV_TX_OK;
+}
diff --git a/drivers/crypto/exynos-rng.c b/drivers/crypto/exynos-rng.c
index 451620b..4a06092 100644
--- a/drivers/crypto/exynos-rng.c
+++ b/drivers/crypto/exynos-rng.c
@@ -22,12 +22,18 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 
 #include <crypto/internal/rng.h>
 
 #define EXYNOS_RNG_CONTROL		0x0
 #define EXYNOS_RNG_STATUS		0x10
+
+#define EXYNOS_RNG_SEED_CONF		0x14
+#define EXYNOS_RNG_GEN_PRNG	        BIT(1)
+
 #define EXYNOS_RNG_SEED_BASE		0x140
 #define EXYNOS_RNG_SEED(n)		(EXYNOS_RNG_SEED_BASE + (n * 0x4))
 #define EXYNOS_RNG_OUT_BASE		0x160
@@ -43,13 +49,21 @@
 #define EXYNOS_RNG_SEED_REGS		5
 #define EXYNOS_RNG_SEED_SIZE		(EXYNOS_RNG_SEED_REGS * 4)
 
+enum exynos_prng_type {
+	EXYNOS_PRNG_UNKNOWN = 0,
+	EXYNOS_PRNG_EXYNOS4,
+	EXYNOS_PRNG_EXYNOS5,
+};
+
 /*
- * Driver re-seeds itself with generated random numbers to increase
- * the randomness.
+ * Driver re-seeds itself with generated random numbers to hinder
+ * backtracking of the original seed.
  *
  * Time for next re-seed in ms.
  */
-#define EXYNOS_RNG_RESEED_TIME		100
+#define EXYNOS_RNG_RESEED_TIME		1000
+#define EXYNOS_RNG_RESEED_BYTES		65536
+
 /*
  * In polling mode, do not wait infinitely for the engine to finish the work.
  */
@@ -63,13 +77,17 @@
 /* Device associated memory */
 struct exynos_rng_dev {
 	struct device			*dev;
+	enum exynos_prng_type		type;
 	void __iomem			*mem;
 	struct clk			*clk;
+	struct mutex 			lock;
 	/* Generated numbers stored for seeding during resume */
 	u8				seed_save[EXYNOS_RNG_SEED_SIZE];
 	unsigned int			seed_save_len;
 	/* Time of last seeding in jiffies */
 	unsigned long			last_seeding;
+	/* Bytes generated since last seeding */
+	unsigned long			bytes_seeding;
 };
 
 static struct exynos_rng_dev *exynos_rng_dev;
@@ -114,39 +132,12 @@
 	}
 
 	rng->last_seeding = jiffies;
+	rng->bytes_seeding = 0;
 
 	return 0;
 }
 
 /*
- * Read from output registers and put the data under 'dst' array,
- * up to dlen bytes.
- *
- * Returns number of bytes actually stored in 'dst' (dlen
- * or EXYNOS_RNG_SEED_SIZE).
- */
-static unsigned int exynos_rng_copy_random(struct exynos_rng_dev *rng,
-					   u8 *dst, unsigned int dlen)
-{
-	unsigned int cnt = 0;
-	int i, j;
-	u32 val;
-
-	for (j = 0; j < EXYNOS_RNG_SEED_REGS; j++) {
-		val = exynos_rng_readl(rng, EXYNOS_RNG_OUT(j));
-
-		for (i = 0; i < 4; i++) {
-			dst[cnt] = val & 0xff;
-			val >>= 8;
-			if (++cnt >= dlen)
-				return cnt;
-		}
-	}
-
-	return cnt;
-}
-
-/*
  * Start the engine and poll for finish.  Then read from output registers
  * filling the 'dst' buffer up to 'dlen' bytes or up to size of generated
  * random data (EXYNOS_RNG_SEED_SIZE).
@@ -160,8 +151,13 @@
 {
 	int retry = EXYNOS_RNG_WAIT_RETRIES;
 
-	exynos_rng_writel(rng, EXYNOS_RNG_CONTROL_START,
-			  EXYNOS_RNG_CONTROL);
+	if (rng->type == EXYNOS_PRNG_EXYNOS4) {
+		exynos_rng_writel(rng, EXYNOS_RNG_CONTROL_START,
+				  EXYNOS_RNG_CONTROL);
+	} else if (rng->type == EXYNOS_PRNG_EXYNOS5) {
+		exynos_rng_writel(rng, EXYNOS_RNG_GEN_PRNG,
+				  EXYNOS_RNG_SEED_CONF);
+	}
 
 	while (!(exynos_rng_readl(rng,
 			EXYNOS_RNG_STATUS) & EXYNOS_RNG_STATUS_RNG_DONE) && --retry)
@@ -173,7 +169,9 @@
 	/* Clear status bit */
 	exynos_rng_writel(rng, EXYNOS_RNG_STATUS_RNG_DONE,
 			  EXYNOS_RNG_STATUS);
-	*read = exynos_rng_copy_random(rng, dst, dlen);
+	*read = min_t(size_t, dlen, EXYNOS_RNG_SEED_SIZE);
+	memcpy_fromio(dst, rng->mem + EXYNOS_RNG_OUT_BASE, *read);
+	rng->bytes_seeding += *read;
 
 	return 0;
 }
@@ -187,13 +185,18 @@
 	unsigned int read = 0;
 	u8 seed[EXYNOS_RNG_SEED_SIZE];
 
-	if (time_before(now, next_seeding))
+	if (time_before(now, next_seeding) &&
+	    rng->bytes_seeding < EXYNOS_RNG_RESEED_BYTES)
 		return;
 
 	if (exynos_rng_get_random(rng, seed, sizeof(seed), &read))
 		return;
 
 	exynos_rng_set_seed(rng, seed, read);
+
+	/* Let others do some of their job. */
+	mutex_unlock(&rng->lock);
+	mutex_lock(&rng->lock);
 }
 
 static int exynos_rng_generate(struct crypto_rng *tfm,
@@ -209,6 +212,7 @@
 	if (ret)
 		return ret;
 
+	mutex_lock(&rng->lock);
 	do {
 		ret = exynos_rng_get_random(rng, dst, dlen, &read);
 		if (ret)
@@ -219,6 +223,7 @@
 
 		exynos_rng_reseed(rng);
 	} while (dlen > 0);
+	mutex_unlock(&rng->lock);
 
 	clk_disable_unprepare(rng->clk);
 
@@ -236,7 +241,9 @@
 	if (ret)
 		return ret;
 
+	mutex_lock(&rng->lock);
 	ret = exynos_rng_set_seed(ctx->rng, seed, slen);
+	mutex_unlock(&rng->lock);
 
 	clk_disable_unprepare(rng->clk);
 
@@ -259,7 +266,7 @@
 	.base			= {
 		.cra_name		= "stdrng",
 		.cra_driver_name	= "exynos_rng",
-		.cra_priority		= 100,
+		.cra_priority		= 300,
 		.cra_ctxsize		= sizeof(struct exynos_rng_ctx),
 		.cra_module		= THIS_MODULE,
 		.cra_init		= exynos_rng_kcapi_init,
@@ -279,6 +286,10 @@
 	if (!rng)
 		return -ENOMEM;
 
+	rng->type = (enum exynos_prng_type)of_device_get_match_data(&pdev->dev);
+
+	mutex_init(&rng->lock);
+
 	rng->dev = &pdev->dev;
 	rng->clk = devm_clk_get(&pdev->dev, "secss");
 	if (IS_ERR(rng->clk)) {
@@ -329,9 +340,14 @@
 	if (ret)
 		return ret;
 
+	mutex_lock(&rng->lock);
+
 	/* Get new random numbers and store them for seeding on resume. */
 	exynos_rng_get_random(rng, rng->seed_save, sizeof(rng->seed_save),
 			      &(rng->seed_save_len));
+
+	mutex_unlock(&rng->lock);
+
 	dev_dbg(rng->dev, "Stored %u bytes for seeding on system resume\n",
 		rng->seed_save_len);
 
@@ -354,8 +370,12 @@
 	if (ret)
 		return ret;
 
+	mutex_lock(&rng->lock);
+
 	ret = exynos_rng_set_seed(rng, rng->seed_save, rng->seed_save_len);
 
+	mutex_unlock(&rng->lock);
+
 	clk_disable_unprepare(rng->clk);
 
 	return ret;
@@ -367,6 +387,10 @@
 static const struct of_device_id exynos_rng_dt_match[] = {
 	{
 		.compatible = "samsung,exynos4-rng",
+		.data = (const void *)EXYNOS_PRNG_EXYNOS4,
+	}, {
+		.compatible = "samsung,exynos5250-prng",
+		.data = (const void *)EXYNOS_PRNG_EXYNOS5,
 	},
 	{ },
 };
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index e09d405..a5a36fe 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -2579,6 +2579,7 @@
 	for (i = 0; i < 3; ++i)
 		if (dev->bar[i])
 			iounmap(dev->bar[i]);
+	kfree(dev);
 
 err_out_free_regions:
 	pci_release_regions(pdev);
diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
index f2246a5..1e87637 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -743,8 +743,8 @@
 		}
 
 		if (!per_cpu(cpu_txwin, i)) {
-			/* shoudn't happen, Each chip will have NX engine */
-			pr_err("NX engine is not availavle for CPU %d\n", i);
+			/* shouldn't happen, Each chip will have NX engine */
+			pr_err("NX engine is not available for CPU %d\n", i);
 			return -EINVAL;
 		}
 	}
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index 8c4fd25..ff149e1 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -117,19 +117,19 @@
 
 #define CSR_RETRY_TIMES 500
 static int qat_hal_rd_ae_csr(struct icp_qat_fw_loader_handle *handle,
-			     unsigned char ae, unsigned int csr,
-			     unsigned int *value)
+			     unsigned char ae, unsigned int csr)
 {
 	unsigned int iterations = CSR_RETRY_TIMES;
+	int value;
 
 	do {
-		*value = GET_AE_CSR(handle, ae, csr);
+		value = GET_AE_CSR(handle, ae, csr);
 		if (!(GET_AE_CSR(handle, ae, LOCAL_CSR_STATUS) & LCS_STATUS))
-			return 0;
+			return value;
 	} while (iterations--);
 
 	pr_err("QAT: Read CSR timeout\n");
-	return -EFAULT;
+	return 0;
 }
 
 static int qat_hal_wr_ae_csr(struct icp_qat_fw_loader_handle *handle,
@@ -154,9 +154,9 @@
 {
 	unsigned int cur_ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER, &cur_ctx);
+	cur_ctx = qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER);
 	qat_hal_wr_ae_csr(handle, ae, CSR_CTX_POINTER, ctx);
-	qat_hal_rd_ae_csr(handle, ae, CTX_WAKEUP_EVENTS_INDIRECT, events);
+	*events = qat_hal_rd_ae_csr(handle, ae, CTX_WAKEUP_EVENTS_INDIRECT);
 	qat_hal_wr_ae_csr(handle, ae, CSR_CTX_POINTER, cur_ctx);
 }
 
@@ -169,13 +169,13 @@
 	int times = MAX_RETRY_TIMES;
 	int elapsed_cycles = 0;
 
-	qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT, &base_cnt);
+	base_cnt = qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT);
 	base_cnt &= 0xffff;
 	while ((int)cycles > elapsed_cycles && times--) {
 		if (chk_inactive)
-			qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS, &csr);
+			csr = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
 
-		qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT, &cur_cnt);
+		cur_cnt = qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT);
 		cur_cnt &= 0xffff;
 		elapsed_cycles = cur_cnt - base_cnt;
 
@@ -207,7 +207,7 @@
 	}
 
 	/* Sets the accelaration engine context mode to either four or eight */
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &csr);
+	csr = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	csr = IGNORE_W1C_MASK & csr;
 	new_csr = (mode == 4) ?
 		SET_BIT(csr, CE_INUSE_CONTEXTS_BITPOS) :
@@ -221,7 +221,7 @@
 {
 	unsigned int csr, new_csr;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &csr);
+	csr = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	csr &= IGNORE_W1C_MASK;
 
 	new_csr = (mode) ?
@@ -240,7 +240,7 @@
 {
 	unsigned int csr, new_csr;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &csr);
+	csr = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	csr &= IGNORE_W1C_MASK;
 	switch (lm_type) {
 	case ICP_LMEM0:
@@ -328,7 +328,7 @@
 {
 	unsigned int ctx, cur_ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER, &cur_ctx);
+	cur_ctx = qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER);
 
 	for (ctx = 0; ctx < ICP_QAT_UCLO_MAX_CTX; ctx++) {
 		if (!(ctx_mask & (1 << ctx)))
@@ -340,16 +340,18 @@
 	qat_hal_wr_ae_csr(handle, ae, CSR_CTX_POINTER, cur_ctx);
 }
 
-static void qat_hal_rd_indr_csr(struct icp_qat_fw_loader_handle *handle,
+static unsigned int qat_hal_rd_indr_csr(struct icp_qat_fw_loader_handle *handle,
 				unsigned char ae, unsigned char ctx,
-				unsigned int ae_csr, unsigned int *csr_val)
+				unsigned int ae_csr)
 {
-	unsigned int cur_ctx;
+	unsigned int cur_ctx, csr_val;
 
-	qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER, &cur_ctx);
+	cur_ctx = qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER);
 	qat_hal_wr_ae_csr(handle, ae, CSR_CTX_POINTER, ctx);
-	qat_hal_rd_ae_csr(handle, ae, ae_csr, csr_val);
+	csr_val = qat_hal_rd_ae_csr(handle, ae, ae_csr);
 	qat_hal_wr_ae_csr(handle, ae, CSR_CTX_POINTER, cur_ctx);
+
+	return csr_val;
 }
 
 static void qat_hal_put_sig_event(struct icp_qat_fw_loader_handle *handle,
@@ -358,7 +360,7 @@
 {
 	unsigned int ctx, cur_ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER, &cur_ctx);
+	cur_ctx = qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER);
 	for (ctx = 0; ctx < ICP_QAT_UCLO_MAX_CTX; ctx++) {
 		if (!(ctx_mask & (1 << ctx)))
 			continue;
@@ -374,7 +376,7 @@
 {
 	unsigned int ctx, cur_ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER, &cur_ctx);
+	cur_ctx = qat_hal_rd_ae_csr(handle, ae, CSR_CTX_POINTER);
 	for (ctx = 0; ctx < ICP_QAT_UCLO_MAX_CTX; ctx++) {
 		if (!(ctx_mask & (1 << ctx)))
 			continue;
@@ -392,13 +394,11 @@
 	int times = MAX_RETRY_TIMES;
 
 	for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
-		qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT,
-				  (unsigned int *)&base_cnt);
+		base_cnt = qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT);
 		base_cnt &= 0xffff;
 
 		do {
-			qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT,
-					  (unsigned int *)&cur_cnt);
+			cur_cnt = qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT);
 			cur_cnt &= 0xffff;
 		} while (times-- && (cur_cnt == base_cnt));
 
@@ -416,8 +416,8 @@
 {
 	unsigned int enable = 0, active = 0;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &enable);
-	qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS, &active);
+	enable = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
+	active = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
 	if ((enable & (0xff << CE_ENABLE_BITPOS)) ||
 	    (active & (1 << ACS_ABO_BITPOS)))
 		return 1;
@@ -540,7 +540,7 @@
 {
 	unsigned int ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx);
+	ctx = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	ctx &= IGNORE_W1C_MASK &
 		(~((ctx_mask & ICP_QAT_UCLO_AE_ALL_CTX) << CE_ENABLE_BITPOS));
 	qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, ctx);
@@ -583,7 +583,7 @@
 	unsigned int ustore_addr;
 	unsigned int i;
 
-	qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS, &ustore_addr);
+	ustore_addr = qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS);
 	uaddr |= UA_ECS;
 	qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, uaddr);
 	for (i = 0; i < words_num; i++) {
@@ -604,7 +604,7 @@
 {
 	unsigned int ctx;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx);
+	ctx = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	ctx &= IGNORE_W1C_MASK;
 	ctx_mask &= (ctx & CE_INUSE_CONTEXTS) ? 0x55 : 0xFF;
 	ctx |= (ctx_mask << CE_ENABLE_BITPOS);
@@ -636,10 +636,10 @@
 	int ret = 0;
 
 	for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
-		qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL, &csr_val);
+		csr_val = qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL);
 		csr_val &= ~(1 << MMC_SHARE_CS_BITPOS);
 		qat_hal_wr_ae_csr(handle, ae, AE_MISC_CONTROL, csr_val);
-		qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &csr_val);
+		csr_val = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 		csr_val &= IGNORE_W1C_MASK;
 		csr_val |= CE_NN_MODE;
 		qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, csr_val);
@@ -648,7 +648,7 @@
 		qat_hal_wr_indr_csr(handle, ae, ctx_mask, CTX_STS_INDIRECT,
 				    handle->hal_handle->upc_mask &
 				    INIT_PC_VALUE);
-		qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS, &savctx);
+		savctx = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
 		qat_hal_wr_ae_csr(handle, ae, ACTIVE_CTX_STATUS, 0);
 		qat_hal_put_wakeup_event(handle, ae, ctx_mask, XCWE_VOLUNTARY);
 		qat_hal_wr_indr_csr(handle, ae, ctx_mask,
@@ -760,7 +760,7 @@
 	for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
 		unsigned int csr_val = 0;
 
-		qat_hal_rd_ae_csr(handle, ae, SIGNATURE_ENABLE, &csr_val);
+		csr_val = qat_hal_rd_ae_csr(handle, ae, SIGNATURE_ENABLE);
 		csr_val |= 0x1;
 		qat_hal_wr_ae_csr(handle, ae, SIGNATURE_ENABLE, csr_val);
 	}
@@ -826,16 +826,16 @@
 	unsigned int i, uwrd_lo, uwrd_hi;
 	unsigned int ustore_addr, misc_control;
 
-	qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL, &misc_control);
+	misc_control = qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL);
 	qat_hal_wr_ae_csr(handle, ae, AE_MISC_CONTROL,
 			  misc_control & 0xfffffffb);
-	qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS, &ustore_addr);
+	ustore_addr = qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS);
 	uaddr |= UA_ECS;
 	for (i = 0; i < words_num; i++) {
 		qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, uaddr);
 		uaddr++;
-		qat_hal_rd_ae_csr(handle, ae, USTORE_DATA_LOWER, &uwrd_lo);
-		qat_hal_rd_ae_csr(handle, ae, USTORE_DATA_UPPER, &uwrd_hi);
+		uwrd_lo = qat_hal_rd_ae_csr(handle, ae, USTORE_DATA_LOWER);
+		uwrd_hi = qat_hal_rd_ae_csr(handle, ae, USTORE_DATA_UPPER);
 		uword[i] = uwrd_hi;
 		uword[i] = (uword[i] << 0x20) | uwrd_lo;
 	}
@@ -849,7 +849,7 @@
 {
 	unsigned int i, ustore_addr;
 
-	qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS, &ustore_addr);
+	ustore_addr = qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS);
 	uaddr |= UA_ECS;
 	qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, uaddr);
 	for (i = 0; i < words_num; i++) {
@@ -890,26 +890,27 @@
 		return -EINVAL;
 	}
 	/* save current context */
-	qat_hal_rd_indr_csr(handle, ae, ctx, LM_ADDR_0_INDIRECT, &ind_lm_addr0);
-	qat_hal_rd_indr_csr(handle, ae, ctx, LM_ADDR_1_INDIRECT, &ind_lm_addr1);
-	qat_hal_rd_indr_csr(handle, ae, ctx, INDIRECT_LM_ADDR_0_BYTE_INDEX,
-			    &ind_lm_addr_byte0);
-	qat_hal_rd_indr_csr(handle, ae, ctx, INDIRECT_LM_ADDR_1_BYTE_INDEX,
-			    &ind_lm_addr_byte1);
+	ind_lm_addr0 = qat_hal_rd_indr_csr(handle, ae, ctx, LM_ADDR_0_INDIRECT);
+	ind_lm_addr1 = qat_hal_rd_indr_csr(handle, ae, ctx, LM_ADDR_1_INDIRECT);
+	ind_lm_addr_byte0 = qat_hal_rd_indr_csr(handle, ae, ctx,
+						INDIRECT_LM_ADDR_0_BYTE_INDEX);
+	ind_lm_addr_byte1 = qat_hal_rd_indr_csr(handle, ae, ctx,
+						INDIRECT_LM_ADDR_1_BYTE_INDEX);
 	if (inst_num <= MAX_EXEC_INST)
 		qat_hal_get_uwords(handle, ae, 0, inst_num, savuwords);
 	qat_hal_get_wakeup_event(handle, ae, ctx, &wakeup_events);
-	qat_hal_rd_indr_csr(handle, ae, ctx, CTX_STS_INDIRECT, &savpc);
+	savpc = qat_hal_rd_indr_csr(handle, ae, ctx, CTX_STS_INDIRECT);
 	savpc = (savpc & handle->hal_handle->upc_mask) >> 0;
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	ctx_enables &= IGNORE_W1C_MASK;
-	qat_hal_rd_ae_csr(handle, ae, CC_ENABLE, &savcc);
-	qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS, &savctx);
-	qat_hal_rd_ae_csr(handle, ae, CTX_ARB_CNTL, &ctxarb_ctl);
-	qat_hal_rd_indr_csr(handle, ae, ctx, FUTURE_COUNT_SIGNAL_INDIRECT,
-			    &ind_cnt_sig);
-	qat_hal_rd_indr_csr(handle, ae, ctx, CTX_SIG_EVENTS_INDIRECT, &ind_sig);
-	qat_hal_rd_ae_csr(handle, ae, CTX_SIG_EVENTS_ACTIVE, &act_sig);
+	savcc = qat_hal_rd_ae_csr(handle, ae, CC_ENABLE);
+	savctx = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
+	ctxarb_ctl = qat_hal_rd_ae_csr(handle, ae, CTX_ARB_CNTL);
+	ind_cnt_sig = qat_hal_rd_indr_csr(handle, ae, ctx,
+					  FUTURE_COUNT_SIGNAL_INDIRECT);
+	ind_sig = qat_hal_rd_indr_csr(handle, ae, ctx,
+				      CTX_SIG_EVENTS_INDIRECT);
+	act_sig = qat_hal_rd_ae_csr(handle, ae, CTX_SIG_EVENTS_ACTIVE);
 	/* execute micro codes */
 	qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, ctx_enables);
 	qat_hal_wr_uwords(handle, ae, 0, inst_num, micro_inst);
@@ -927,8 +928,8 @@
 	if (endpc) {
 		unsigned int ctx_status;
 
-		qat_hal_rd_indr_csr(handle, ae, ctx, CTX_STS_INDIRECT,
-				    &ctx_status);
+		ctx_status = qat_hal_rd_indr_csr(handle, ae, ctx,
+						 CTX_STS_INDIRECT);
 		*endpc = ctx_status & handle->hal_handle->upc_mask;
 	}
 	/* retore to saved context */
@@ -938,7 +939,7 @@
 	qat_hal_put_wakeup_event(handle, ae, (1 << ctx), wakeup_events);
 	qat_hal_wr_indr_csr(handle, ae, (1 << ctx), CTX_STS_INDIRECT,
 			    handle->hal_handle->upc_mask & savpc);
-	qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL, &csr_val);
+	csr_val = qat_hal_rd_ae_csr(handle, ae, AE_MISC_CONTROL);
 	newcsr_val = CLR_BIT(csr_val, MMC_SHARE_CS_BITPOS);
 	qat_hal_wr_ae_csr(handle, ae, AE_MISC_CONTROL, newcsr_val);
 	qat_hal_wr_ae_csr(handle, ae, CC_ENABLE, savcc);
@@ -986,16 +987,16 @@
 		insts = (uint64_t)0xA030000000ull | ((reg_addr & 0x3ff) << 10);
 		break;
 	}
-	qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS, &savctx);
-	qat_hal_rd_ae_csr(handle, ae, CTX_ARB_CNTL, &ctxarb_cntl);
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	savctx = qat_hal_rd_ae_csr(handle, ae, ACTIVE_CTX_STATUS);
+	ctxarb_cntl = qat_hal_rd_ae_csr(handle, ae, CTX_ARB_CNTL);
+	ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	ctx_enables &= IGNORE_W1C_MASK;
 	if (ctx != (savctx & ACS_ACNO))
 		qat_hal_wr_ae_csr(handle, ae, ACTIVE_CTX_STATUS,
 				  ctx & ACS_ACNO);
 	qat_hal_get_uwords(handle, ae, 0, 1, &savuword);
 	qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, ctx_enables);
-	qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS, &ustore_addr);
+	ustore_addr = qat_hal_rd_ae_csr(handle, ae, USTORE_ADDRESS);
 	uaddr = UA_ECS;
 	qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, uaddr);
 	insts = qat_hal_set_uword_ecc(insts);
@@ -1011,7 +1012,7 @@
 	 * the instruction should have been executed
 	 * prior to clearing the ECS in putUwords
 	 */
-	qat_hal_rd_ae_csr(handle, ae, ALU_OUT, data);
+	*data = qat_hal_rd_ae_csr(handle, ae, ALU_OUT);
 	qat_hal_wr_ae_csr(handle, ae, USTORE_ADDRESS, ustore_addr);
 	qat_hal_wr_uwords(handle, ae, 0, 1, &savuword);
 	if (ctx != (savctx & ACS_ACNO))
@@ -1188,7 +1189,7 @@
 	unsigned short mask;
 	unsigned short dr_offset = 0x10;
 
-	status = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	status = ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	if (CE_INUSE_CONTEXTS & ctx_enables) {
 		if (ctx & 0x1) {
 			pr_err("QAT: bad 4-ctx mode,ctx=0x%x\n", ctx);
@@ -1238,7 +1239,7 @@
 	const int num_inst = ARRAY_SIZE(micro_inst), code_off = 1;
 	const unsigned short gprnum = 0, dly = num_inst * 0x5;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	if (CE_INUSE_CONTEXTS & ctx_enables) {
 		if (ctx & 0x1) {
 			pr_err("QAT: 4-ctx mode,ctx=0x%x\n", ctx);
@@ -1282,7 +1283,7 @@
 	unsigned int ctx_enables;
 	int stat = 0;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	ctx_enables &= IGNORE_W1C_MASK;
 	qat_hal_wr_ae_csr(handle, ae, CTX_ENABLES, ctx_enables | CE_NN_MODE);
 
@@ -1299,7 +1300,7 @@
 {
 	unsigned int ctx_enables;
 
-	qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES, &ctx_enables);
+	ctx_enables = qat_hal_rd_ae_csr(handle, ae, CTX_ENABLES);
 	if (ctx_enables & CE_INUSE_CONTEXTS) {
 		/* 4-ctx mode */
 		*relreg = absreg_num & 0x1F;
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 142c602..62830a4 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -1461,7 +1461,7 @@
 				       &dd->hash_flags)) {
 			/* hash or semi-hash ready */
 			clear_bit(HASH_FLAGS_DMA_READY, &dd->hash_flags);
-				goto finish;
+			goto finish;
 		}
 	}
 
diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig
index 602332e..61ef00b 100644
--- a/drivers/crypto/stm32/Kconfig
+++ b/drivers/crypto/stm32/Kconfig
@@ -18,3 +18,12 @@
 	help
           This enables support for the HASH hw accelerator which can be found
 	  on STMicroelectronics STM32 SOC.
+
+config CRYP_DEV_STM32
+	tristate "Support for STM32 cryp accelerators"
+	depends on ARCH_STM32
+	select CRYPTO_HASH
+	select CRYPTO_ENGINE
+	help
+          This enables support for the CRYP (AES/DES/TDES) hw accelerator which
+	  can be found on STMicroelectronics STM32 SOC.
diff --git a/drivers/crypto/stm32/Makefile b/drivers/crypto/stm32/Makefile
index 73cd56c..2c19fc1 100644
--- a/drivers/crypto/stm32/Makefile
+++ b/drivers/crypto/stm32/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_CRC_DEV_STM32) += stm32_crc32.o
-obj-$(CONFIG_HASH_DEV_STM32) += stm32-hash.o
\ No newline at end of file
+obj-$(CONFIG_HASH_DEV_STM32) += stm32-hash.o
+obj-$(CONFIG_CRYP_DEV_STM32) += stm32-cryp.o
diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
new file mode 100644
index 0000000..cf1dddb
--- /dev/null
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -0,0 +1,1172 @@
+/*
+ * Copyright (C) STMicroelectronics SA 2017
+ * Author: Fabien Dessenne <fabien.dessenne@st.com>
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#include <crypto/aes.h>
+#include <crypto/des.h>
+#include <crypto/engine.h>
+#include <crypto/scatterwalk.h>
+
+#define DRIVER_NAME             "stm32-cryp"
+
+/* Bit [0] encrypt / decrypt */
+#define FLG_ENCRYPT             BIT(0)
+/* Bit [8..1] algo & operation mode */
+#define FLG_AES                 BIT(1)
+#define FLG_DES                 BIT(2)
+#define FLG_TDES                BIT(3)
+#define FLG_ECB                 BIT(4)
+#define FLG_CBC                 BIT(5)
+#define FLG_CTR                 BIT(6)
+/* Mode mask = bits [15..0] */
+#define FLG_MODE_MASK           GENMASK(15, 0)
+
+/* Registers */
+#define CRYP_CR                 0x00000000
+#define CRYP_SR                 0x00000004
+#define CRYP_DIN                0x00000008
+#define CRYP_DOUT               0x0000000C
+#define CRYP_DMACR              0x00000010
+#define CRYP_IMSCR              0x00000014
+#define CRYP_RISR               0x00000018
+#define CRYP_MISR               0x0000001C
+#define CRYP_K0LR               0x00000020
+#define CRYP_K0RR               0x00000024
+#define CRYP_K1LR               0x00000028
+#define CRYP_K1RR               0x0000002C
+#define CRYP_K2LR               0x00000030
+#define CRYP_K2RR               0x00000034
+#define CRYP_K3LR               0x00000038
+#define CRYP_K3RR               0x0000003C
+#define CRYP_IV0LR              0x00000040
+#define CRYP_IV0RR              0x00000044
+#define CRYP_IV1LR              0x00000048
+#define CRYP_IV1RR              0x0000004C
+
+/* Registers values */
+#define CR_DEC_NOT_ENC          0x00000004
+#define CR_TDES_ECB             0x00000000
+#define CR_TDES_CBC             0x00000008
+#define CR_DES_ECB              0x00000010
+#define CR_DES_CBC              0x00000018
+#define CR_AES_ECB              0x00000020
+#define CR_AES_CBC              0x00000028
+#define CR_AES_CTR              0x00000030
+#define CR_AES_KP               0x00000038
+#define CR_AES_UNKNOWN          0xFFFFFFFF
+#define CR_ALGO_MASK            0x00080038
+#define CR_DATA32               0x00000000
+#define CR_DATA16               0x00000040
+#define CR_DATA8                0x00000080
+#define CR_DATA1                0x000000C0
+#define CR_KEY128               0x00000000
+#define CR_KEY192               0x00000100
+#define CR_KEY256               0x00000200
+#define CR_FFLUSH               0x00004000
+#define CR_CRYPEN               0x00008000
+
+#define SR_BUSY                 0x00000010
+#define SR_OFNE                 0x00000004
+
+#define IMSCR_IN                BIT(0)
+#define IMSCR_OUT               BIT(1)
+
+#define MISR_IN                 BIT(0)
+#define MISR_OUT                BIT(1)
+
+/* Misc */
+#define AES_BLOCK_32            (AES_BLOCK_SIZE / sizeof(u32))
+#define _walked_in              (cryp->in_walk.offset - cryp->in_sg->offset)
+#define _walked_out             (cryp->out_walk.offset - cryp->out_sg->offset)
+
+struct stm32_cryp_ctx {
+	struct stm32_cryp       *cryp;
+	int                     keylen;
+	u32                     key[AES_KEYSIZE_256 / sizeof(u32)];
+	unsigned long           flags;
+};
+
+struct stm32_cryp_reqctx {
+	unsigned long mode;
+};
+
+struct stm32_cryp {
+	struct list_head        list;
+	struct device           *dev;
+	void __iomem            *regs;
+	struct clk              *clk;
+	unsigned long           flags;
+	u32                     irq_status;
+	struct stm32_cryp_ctx   *ctx;
+
+	struct crypto_engine    *engine;
+
+	struct mutex            lock; /* protects req */
+	struct ablkcipher_request *req;
+
+	size_t                  hw_blocksize;
+
+	size_t                  total_in;
+	size_t                  total_in_save;
+	size_t                  total_out;
+	size_t                  total_out_save;
+
+	struct scatterlist      *in_sg;
+	struct scatterlist      *out_sg;
+	struct scatterlist      *out_sg_save;
+
+	struct scatterlist      in_sgl;
+	struct scatterlist      out_sgl;
+	bool                    sgs_copied;
+
+	int                     in_sg_len;
+	int                     out_sg_len;
+
+	struct scatter_walk     in_walk;
+	struct scatter_walk     out_walk;
+
+	u32                     last_ctr[4];
+};
+
+struct stm32_cryp_list {
+	struct list_head        dev_list;
+	spinlock_t              lock; /* protect dev_list */
+};
+
+static struct stm32_cryp_list cryp_list = {
+	.dev_list = LIST_HEAD_INIT(cryp_list.dev_list),
+	.lock     = __SPIN_LOCK_UNLOCKED(cryp_list.lock),
+};
+
+static inline bool is_aes(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_AES;
+}
+
+static inline bool is_des(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_DES;
+}
+
+static inline bool is_tdes(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_TDES;
+}
+
+static inline bool is_ecb(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_ECB;
+}
+
+static inline bool is_cbc(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_CBC;
+}
+
+static inline bool is_ctr(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_CTR;
+}
+
+static inline bool is_encrypt(struct stm32_cryp *cryp)
+{
+	return cryp->flags & FLG_ENCRYPT;
+}
+
+static inline bool is_decrypt(struct stm32_cryp *cryp)
+{
+	return !is_encrypt(cryp);
+}
+
+static inline u32 stm32_cryp_read(struct stm32_cryp *cryp, u32 ofst)
+{
+	return readl_relaxed(cryp->regs + ofst);
+}
+
+static inline void stm32_cryp_write(struct stm32_cryp *cryp, u32 ofst, u32 val)
+{
+	writel_relaxed(val, cryp->regs + ofst);
+}
+
+static inline int stm32_cryp_wait_busy(struct stm32_cryp *cryp)
+{
+	u32 status;
+
+	return readl_relaxed_poll_timeout(cryp->regs + CRYP_SR, status,
+			!(status & SR_BUSY), 10, 100000);
+}
+
+static struct stm32_cryp *stm32_cryp_find_dev(struct stm32_cryp_ctx *ctx)
+{
+	struct stm32_cryp *tmp, *cryp = NULL;
+
+	spin_lock_bh(&cryp_list.lock);
+	if (!ctx->cryp) {
+		list_for_each_entry(tmp, &cryp_list.dev_list, list) {
+			cryp = tmp;
+			break;
+		}
+		ctx->cryp = cryp;
+	} else {
+		cryp = ctx->cryp;
+	}
+
+	spin_unlock_bh(&cryp_list.lock);
+
+	return cryp;
+}
+
+static int stm32_cryp_check_aligned(struct scatterlist *sg, size_t total,
+				    size_t align)
+{
+	int len = 0;
+
+	if (!total)
+		return 0;
+
+	if (!IS_ALIGNED(total, align))
+		return -EINVAL;
+
+	while (sg) {
+		if (!IS_ALIGNED(sg->offset, sizeof(u32)))
+			return -EINVAL;
+
+		if (!IS_ALIGNED(sg->length, align))
+			return -EINVAL;
+
+		len += sg->length;
+		sg = sg_next(sg);
+	}
+
+	if (len != total)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int stm32_cryp_check_io_aligned(struct stm32_cryp *cryp)
+{
+	int ret;
+
+	ret = stm32_cryp_check_aligned(cryp->in_sg, cryp->total_in,
+				       cryp->hw_blocksize);
+	if (ret)
+		return ret;
+
+	ret = stm32_cryp_check_aligned(cryp->out_sg, cryp->total_out,
+				       cryp->hw_blocksize);
+
+	return ret;
+}
+
+static void sg_copy_buf(void *buf, struct scatterlist *sg,
+			unsigned int start, unsigned int nbytes, int out)
+{
+	struct scatter_walk walk;
+
+	if (!nbytes)
+		return;
+
+	scatterwalk_start(&walk, sg);
+	scatterwalk_advance(&walk, start);
+	scatterwalk_copychunks(buf, &walk, nbytes, out);
+	scatterwalk_done(&walk, out, 0);
+}
+
+static int stm32_cryp_copy_sgs(struct stm32_cryp *cryp)
+{
+	void *buf_in, *buf_out;
+	int pages, total_in, total_out;
+
+	if (!stm32_cryp_check_io_aligned(cryp)) {
+		cryp->sgs_copied = 0;
+		return 0;
+	}
+
+	total_in = ALIGN(cryp->total_in, cryp->hw_blocksize);
+	pages = total_in ? get_order(total_in) : 1;
+	buf_in = (void *)__get_free_pages(GFP_ATOMIC, pages);
+
+	total_out = ALIGN(cryp->total_out, cryp->hw_blocksize);
+	pages = total_out ? get_order(total_out) : 1;
+	buf_out = (void *)__get_free_pages(GFP_ATOMIC, pages);
+
+	if (!buf_in || !buf_out) {
+		dev_err(cryp->dev, "Can't allocate pages when unaligned\n");
+		cryp->sgs_copied = 0;
+		return -EFAULT;
+	}
+
+	sg_copy_buf(buf_in, cryp->in_sg, 0, cryp->total_in, 0);
+
+	sg_init_one(&cryp->in_sgl, buf_in, total_in);
+	cryp->in_sg = &cryp->in_sgl;
+	cryp->in_sg_len = 1;
+
+	sg_init_one(&cryp->out_sgl, buf_out, total_out);
+	cryp->out_sg_save = cryp->out_sg;
+	cryp->out_sg = &cryp->out_sgl;
+	cryp->out_sg_len = 1;
+
+	cryp->sgs_copied = 1;
+
+	return 0;
+}
+
+static void stm32_cryp_hw_write_iv(struct stm32_cryp *cryp, u32 *iv)
+{
+	if (!iv)
+		return;
+
+	stm32_cryp_write(cryp, CRYP_IV0LR, cpu_to_be32(*iv++));
+	stm32_cryp_write(cryp, CRYP_IV0RR, cpu_to_be32(*iv++));
+
+	if (is_aes(cryp)) {
+		stm32_cryp_write(cryp, CRYP_IV1LR, cpu_to_be32(*iv++));
+		stm32_cryp_write(cryp, CRYP_IV1RR, cpu_to_be32(*iv++));
+	}
+}
+
+static void stm32_cryp_hw_write_key(struct stm32_cryp *c)
+{
+	unsigned int i;
+	int r_id;
+
+	if (is_des(c)) {
+		stm32_cryp_write(c, CRYP_K1LR, cpu_to_be32(c->ctx->key[0]));
+		stm32_cryp_write(c, CRYP_K1RR, cpu_to_be32(c->ctx->key[1]));
+	} else {
+		r_id = CRYP_K3RR;
+		for (i = c->ctx->keylen / sizeof(u32); i > 0; i--, r_id -= 4)
+			stm32_cryp_write(c, r_id,
+					 cpu_to_be32(c->ctx->key[i - 1]));
+	}
+}
+
+static u32 stm32_cryp_get_hw_mode(struct stm32_cryp *cryp)
+{
+	if (is_aes(cryp) && is_ecb(cryp))
+		return CR_AES_ECB;
+
+	if (is_aes(cryp) && is_cbc(cryp))
+		return CR_AES_CBC;
+
+	if (is_aes(cryp) && is_ctr(cryp))
+		return CR_AES_CTR;
+
+	if (is_des(cryp) && is_ecb(cryp))
+		return CR_DES_ECB;
+
+	if (is_des(cryp) && is_cbc(cryp))
+		return CR_DES_CBC;
+
+	if (is_tdes(cryp) && is_ecb(cryp))
+		return CR_TDES_ECB;
+
+	if (is_tdes(cryp) && is_cbc(cryp))
+		return CR_TDES_CBC;
+
+	dev_err(cryp->dev, "Unknown mode\n");
+	return CR_AES_UNKNOWN;
+}
+
+static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
+{
+	int ret;
+	u32 cfg, hw_mode;
+
+	/* Disable interrupt */
+	stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+
+	/* Set key */
+	stm32_cryp_hw_write_key(cryp);
+
+	/* Set configuration */
+	cfg = CR_DATA8 | CR_FFLUSH;
+
+	switch (cryp->ctx->keylen) {
+	case AES_KEYSIZE_128:
+		cfg |= CR_KEY128;
+		break;
+
+	case AES_KEYSIZE_192:
+		cfg |= CR_KEY192;
+		break;
+
+	default:
+	case AES_KEYSIZE_256:
+		cfg |= CR_KEY256;
+		break;
+	}
+
+	hw_mode = stm32_cryp_get_hw_mode(cryp);
+	if (hw_mode == CR_AES_UNKNOWN)
+		return -EINVAL;
+
+	/* AES ECB/CBC decrypt: run key preparation first */
+	if (is_decrypt(cryp) &&
+	    ((hw_mode == CR_AES_ECB) || (hw_mode == CR_AES_CBC))) {
+		stm32_cryp_write(cryp, CRYP_CR, cfg | CR_AES_KP | CR_CRYPEN);
+
+		/* Wait for end of processing */
+		ret = stm32_cryp_wait_busy(cryp);
+		if (ret) {
+			dev_err(cryp->dev, "Timeout (key preparation)\n");
+			return ret;
+		}
+	}
+
+	cfg |= hw_mode;
+
+	if (is_decrypt(cryp))
+		cfg |= CR_DEC_NOT_ENC;
+
+	/* Apply config and flush (valid when CRYPEN = 0) */
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	switch (hw_mode) {
+	case CR_DES_CBC:
+	case CR_TDES_CBC:
+	case CR_AES_CBC:
+	case CR_AES_CTR:
+		stm32_cryp_hw_write_iv(cryp, (u32 *)cryp->req->info);
+		break;
+
+	default:
+		break;
+	}
+
+	/* Enable now */
+	cfg |= CR_CRYPEN;
+
+	stm32_cryp_write(cryp, CRYP_CR, cfg);
+
+	return 0;
+}
+
+static void stm32_cryp_finish_req(struct stm32_cryp *cryp)
+{
+	int err = 0;
+
+	if (cryp->sgs_copied) {
+		void *buf_in, *buf_out;
+		int pages, len;
+
+		buf_in = sg_virt(&cryp->in_sgl);
+		buf_out = sg_virt(&cryp->out_sgl);
+
+		sg_copy_buf(buf_out, cryp->out_sg_save, 0,
+			    cryp->total_out_save, 1);
+
+		len = ALIGN(cryp->total_in_save, cryp->hw_blocksize);
+		pages = len ? get_order(len) : 1;
+		free_pages((unsigned long)buf_in, pages);
+
+		len = ALIGN(cryp->total_out_save, cryp->hw_blocksize);
+		pages = len ? get_order(len) : 1;
+		free_pages((unsigned long)buf_out, pages);
+	}
+
+	crypto_finalize_cipher_request(cryp->engine, cryp->req, err);
+	cryp->req = NULL;
+
+	memset(cryp->ctx->key, 0, cryp->ctx->keylen);
+
+	mutex_unlock(&cryp->lock);
+}
+
+static int stm32_cryp_cpu_start(struct stm32_cryp *cryp)
+{
+	/* Enable interrupt and let the IRQ handler do everything */
+	stm32_cryp_write(cryp, CRYP_IMSCR, IMSCR_IN | IMSCR_OUT);
+
+	return 0;
+}
+
+static int stm32_cryp_cra_init(struct crypto_tfm *tfm)
+{
+	tfm->crt_ablkcipher.reqsize = sizeof(struct stm32_cryp_reqctx);
+
+	return 0;
+}
+
+static int stm32_cryp_crypt(struct ablkcipher_request *req, unsigned long mode)
+{
+	struct stm32_cryp_ctx *ctx = crypto_ablkcipher_ctx(
+			crypto_ablkcipher_reqtfm(req));
+	struct stm32_cryp_reqctx *rctx = ablkcipher_request_ctx(req);
+	struct stm32_cryp *cryp = stm32_cryp_find_dev(ctx);
+
+	if (!cryp)
+		return -ENODEV;
+
+	rctx->mode = mode;
+
+	return crypto_transfer_cipher_request_to_engine(cryp->engine, req);
+}
+
+static int stm32_cryp_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			     unsigned int keylen)
+{
+	struct stm32_cryp_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+
+	memcpy(ctx->key, key, keylen);
+	ctx->keylen = keylen;
+
+	return 0;
+}
+
+static int stm32_cryp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_192 &&
+	    keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+	else
+		return stm32_cryp_setkey(tfm, key, keylen);
+}
+
+static int stm32_cryp_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+				 unsigned int keylen)
+{
+	if (keylen != DES_KEY_SIZE)
+		return -EINVAL;
+	else
+		return stm32_cryp_setkey(tfm, key, keylen);
+}
+
+static int stm32_cryp_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+				  unsigned int keylen)
+{
+	if (keylen != (3 * DES_KEY_SIZE))
+		return -EINVAL;
+	else
+		return stm32_cryp_setkey(tfm, key, keylen);
+}
+
+static int stm32_cryp_aes_ecb_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_ECB | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_aes_ecb_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_ECB);
+}
+
+static int stm32_cryp_aes_cbc_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_CBC | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_aes_cbc_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_CBC);
+}
+
+static int stm32_cryp_aes_ctr_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_CTR | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_aes_ctr_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_AES | FLG_CTR);
+}
+
+static int stm32_cryp_des_ecb_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_DES | FLG_ECB | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_des_ecb_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_DES | FLG_ECB);
+}
+
+static int stm32_cryp_des_cbc_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_DES | FLG_CBC | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_des_cbc_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_DES | FLG_CBC);
+}
+
+static int stm32_cryp_tdes_ecb_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_TDES | FLG_ECB | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_tdes_ecb_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_TDES | FLG_ECB);
+}
+
+static int stm32_cryp_tdes_cbc_encrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC | FLG_ENCRYPT);
+}
+
+static int stm32_cryp_tdes_cbc_decrypt(struct ablkcipher_request *req)
+{
+	return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC);
+}
+
+static int stm32_cryp_prepare_req(struct crypto_engine *engine,
+				  struct ablkcipher_request *req)
+{
+	struct stm32_cryp_ctx *ctx;
+	struct stm32_cryp *cryp;
+	struct stm32_cryp_reqctx *rctx;
+	int ret;
+
+	if (!req)
+		return -EINVAL;
+
+	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+
+	cryp = ctx->cryp;
+
+	if (!cryp)
+		return -ENODEV;
+
+	mutex_lock(&cryp->lock);
+
+	rctx = ablkcipher_request_ctx(req);
+	rctx->mode &= FLG_MODE_MASK;
+
+	ctx->cryp = cryp;
+
+	cryp->flags = (cryp->flags & ~FLG_MODE_MASK) | rctx->mode;
+	cryp->hw_blocksize = is_aes(cryp) ? AES_BLOCK_SIZE : DES_BLOCK_SIZE;
+	cryp->ctx = ctx;
+
+	cryp->req = req;
+	cryp->total_in = req->nbytes;
+	cryp->total_out = cryp->total_in;
+
+	cryp->total_in_save = cryp->total_in;
+	cryp->total_out_save = cryp->total_out;
+
+	cryp->in_sg = req->src;
+	cryp->out_sg = req->dst;
+	cryp->out_sg_save = cryp->out_sg;
+
+	cryp->in_sg_len = sg_nents_for_len(cryp->in_sg, cryp->total_in);
+	if (cryp->in_sg_len < 0) {
+		dev_err(cryp->dev, "Cannot get in_sg_len\n");
+		ret = cryp->in_sg_len;
+		goto out;
+	}
+
+	cryp->out_sg_len = sg_nents_for_len(cryp->out_sg, cryp->total_out);
+	if (cryp->out_sg_len < 0) {
+		dev_err(cryp->dev, "Cannot get out_sg_len\n");
+		ret = cryp->out_sg_len;
+		goto out;
+	}
+
+	ret = stm32_cryp_copy_sgs(cryp);
+	if (ret)
+		goto out;
+
+	scatterwalk_start(&cryp->in_walk, cryp->in_sg);
+	scatterwalk_start(&cryp->out_walk, cryp->out_sg);
+
+	ret = stm32_cryp_hw_init(cryp);
+out:
+	if (ret)
+		mutex_unlock(&cryp->lock);
+
+	return ret;
+}
+
+static int stm32_cryp_prepare_cipher_req(struct crypto_engine *engine,
+					 struct ablkcipher_request *req)
+{
+	return stm32_cryp_prepare_req(engine, req);
+}
+
+static int stm32_cryp_cipher_one_req(struct crypto_engine *engine,
+				     struct ablkcipher_request *req)
+{
+	struct stm32_cryp_ctx *ctx = crypto_ablkcipher_ctx(
+			crypto_ablkcipher_reqtfm(req));
+	struct stm32_cryp *cryp = ctx->cryp;
+
+	if (!cryp)
+		return -ENODEV;
+
+	return stm32_cryp_cpu_start(cryp);
+}
+
+static u32 *stm32_cryp_next_out(struct stm32_cryp *cryp, u32 *dst,
+				unsigned int n)
+{
+	scatterwalk_advance(&cryp->out_walk, n);
+
+	if (unlikely(cryp->out_sg->length == _walked_out)) {
+		cryp->out_sg = sg_next(cryp->out_sg);
+		if (cryp->out_sg) {
+			scatterwalk_start(&cryp->out_walk, cryp->out_sg);
+			return (sg_virt(cryp->out_sg) + _walked_out);
+		}
+	}
+
+	return (u32 *)((u8 *)dst + n);
+}
+
+static u32 *stm32_cryp_next_in(struct stm32_cryp *cryp, u32 *src,
+			       unsigned int n)
+{
+	scatterwalk_advance(&cryp->in_walk, n);
+
+	if (unlikely(cryp->in_sg->length == _walked_in)) {
+		cryp->in_sg = sg_next(cryp->in_sg);
+		if (cryp->in_sg) {
+			scatterwalk_start(&cryp->in_walk, cryp->in_sg);
+			return (sg_virt(cryp->in_sg) + _walked_in);
+		}
+	}
+
+	return (u32 *)((u8 *)src + n);
+}
+
+static void stm32_cryp_check_ctr_counter(struct stm32_cryp *cryp)
+{
+	u32 cr;
+
+	if (unlikely(cryp->last_ctr[3] == 0xFFFFFFFF)) {
+		cryp->last_ctr[3] = 0;
+		cryp->last_ctr[2]++;
+		if (!cryp->last_ctr[2]) {
+			cryp->last_ctr[1]++;
+			if (!cryp->last_ctr[1])
+				cryp->last_ctr[0]++;
+		}
+
+		cr = stm32_cryp_read(cryp, CRYP_CR);
+		stm32_cryp_write(cryp, CRYP_CR, cr & ~CR_CRYPEN);
+
+		stm32_cryp_hw_write_iv(cryp, (u32 *)cryp->last_ctr);
+
+		stm32_cryp_write(cryp, CRYP_CR, cr);
+	}
+
+	cryp->last_ctr[0] = stm32_cryp_read(cryp, CRYP_IV0LR);
+	cryp->last_ctr[1] = stm32_cryp_read(cryp, CRYP_IV0RR);
+	cryp->last_ctr[2] = stm32_cryp_read(cryp, CRYP_IV1LR);
+	cryp->last_ctr[3] = stm32_cryp_read(cryp, CRYP_IV1RR);
+}
+
+static bool stm32_cryp_irq_read_data(struct stm32_cryp *cryp)
+{
+	unsigned int i, j;
+	u32 d32, *dst;
+	u8 *d8;
+
+	dst = sg_virt(cryp->out_sg) + _walked_out;
+
+	for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) {
+		if (likely(cryp->total_out >= sizeof(u32))) {
+			/* Read a full u32 */
+			*dst = stm32_cryp_read(cryp, CRYP_DOUT);
+
+			dst = stm32_cryp_next_out(cryp, dst, sizeof(u32));
+			cryp->total_out -= sizeof(u32);
+		} else if (!cryp->total_out) {
+			/* Empty fifo out (data from input padding) */
+			d32 = stm32_cryp_read(cryp, CRYP_DOUT);
+		} else {
+			/* Read less than an u32 */
+			d32 = stm32_cryp_read(cryp, CRYP_DOUT);
+			d8 = (u8 *)&d32;
+
+			for (j = 0; j < cryp->total_out; j++) {
+				*((u8 *)dst) = *(d8++);
+				dst = stm32_cryp_next_out(cryp, dst, 1);
+			}
+			cryp->total_out = 0;
+		}
+	}
+
+	return !cryp->total_out || !cryp->total_in;
+}
+
+static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp)
+{
+	unsigned int i, j;
+	u32 *src;
+	u8 d8[4];
+
+	src = sg_virt(cryp->in_sg) + _walked_in;
+
+	for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) {
+		if (likely(cryp->total_in >= sizeof(u32))) {
+			/* Write a full u32 */
+			stm32_cryp_write(cryp, CRYP_DIN, *src);
+
+			src = stm32_cryp_next_in(cryp, src, sizeof(u32));
+			cryp->total_in -= sizeof(u32);
+		} else if (!cryp->total_in) {
+			/* Write padding data */
+			stm32_cryp_write(cryp, CRYP_DIN, 0);
+		} else {
+			/* Write less than an u32 */
+			memset(d8, 0, sizeof(u32));
+			for (j = 0; j < cryp->total_in; j++) {
+				d8[j] = *((u8 *)src);
+				src = stm32_cryp_next_in(cryp, src, 1);
+			}
+
+			stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8);
+			cryp->total_in = 0;
+		}
+	}
+}
+
+static void stm32_cryp_irq_write_data(struct stm32_cryp *cryp)
+{
+	if (unlikely(!cryp->total_in)) {
+		dev_warn(cryp->dev, "No more data to process\n");
+		return;
+	}
+
+	if (is_aes(cryp) && is_ctr(cryp))
+		stm32_cryp_check_ctr_counter(cryp);
+
+	stm32_cryp_irq_write_block(cryp);
+}
+
+static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg)
+{
+	struct stm32_cryp *cryp = arg;
+
+	if (cryp->irq_status & MISR_OUT)
+		/* Output FIFO IRQ: read data */
+		if (unlikely(stm32_cryp_irq_read_data(cryp))) {
+			/* All bytes processed, finish */
+			stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+			stm32_cryp_finish_req(cryp);
+			return IRQ_HANDLED;
+		}
+
+	if (cryp->irq_status & MISR_IN) {
+		/* Input FIFO IRQ: write data */
+		stm32_cryp_irq_write_data(cryp);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t stm32_cryp_irq(int irq, void *arg)
+{
+	struct stm32_cryp *cryp = arg;
+
+	cryp->irq_status = stm32_cryp_read(cryp, CRYP_MISR);
+
+	return IRQ_WAKE_THREAD;
+}
+
+static struct crypto_alg crypto_algs[] = {
+{
+	.cra_name		= "ecb(aes)",
+	.cra_driver_name	= "stm32-ecb-aes",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.setkey		= stm32_cryp_aes_setkey,
+		.encrypt	= stm32_cryp_aes_ecb_encrypt,
+		.decrypt	= stm32_cryp_aes_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(aes)",
+	.cra_driver_name	= "stm32-cbc-aes",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_aes_setkey,
+		.encrypt	= stm32_cryp_aes_cbc_encrypt,
+		.decrypt	= stm32_cryp_aes_cbc_decrypt,
+	}
+},
+{
+	.cra_name		= "ctr(aes)",
+	.cra_driver_name	= "stm32-ctr-aes",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_aes_setkey,
+		.encrypt	= stm32_cryp_aes_ctr_encrypt,
+		.decrypt	= stm32_cryp_aes_ctr_decrypt,
+	}
+},
+{
+	.cra_name		= "ecb(des)",
+	.cra_driver_name	= "stm32-ecb-des",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= DES_BLOCK_SIZE,
+		.max_keysize	= DES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_des_setkey,
+		.encrypt	= stm32_cryp_des_ecb_encrypt,
+		.decrypt	= stm32_cryp_des_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(des)",
+	.cra_driver_name	= "stm32-cbc-des",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= DES_BLOCK_SIZE,
+		.max_keysize	= DES_BLOCK_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_des_setkey,
+		.encrypt	= stm32_cryp_des_cbc_encrypt,
+		.decrypt	= stm32_cryp_des_cbc_decrypt,
+	}
+},
+{
+	.cra_name		= "ecb(des3_ede)",
+	.cra_driver_name	= "stm32-ecb-des3",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= 3 * DES_BLOCK_SIZE,
+		.max_keysize	= 3 * DES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_tdes_setkey,
+		.encrypt	= stm32_cryp_tdes_ecb_encrypt,
+		.decrypt	= stm32_cryp_tdes_ecb_decrypt,
+	}
+},
+{
+	.cra_name		= "cbc(des3_ede)",
+	.cra_driver_name	= "stm32-cbc-des3",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= DES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct stm32_cryp_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= stm32_cryp_cra_init,
+	.cra_ablkcipher = {
+		.min_keysize	= 3 * DES_BLOCK_SIZE,
+		.max_keysize	= 3 * DES_BLOCK_SIZE,
+		.ivsize		= DES_BLOCK_SIZE,
+		.setkey		= stm32_cryp_tdes_setkey,
+		.encrypt	= stm32_cryp_tdes_cbc_encrypt,
+		.decrypt	= stm32_cryp_tdes_cbc_decrypt,
+	}
+},
+};
+
+static const struct of_device_id stm32_dt_ids[] = {
+	{ .compatible = "st,stm32f756-cryp", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, stm32_dt_ids);
+
+static int stm32_cryp_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct stm32_cryp *cryp;
+	struct resource *res;
+	struct reset_control *rst;
+	int irq, ret;
+
+	cryp = devm_kzalloc(dev, sizeof(*cryp), GFP_KERNEL);
+	if (!cryp)
+		return -ENOMEM;
+
+	cryp->dev = dev;
+
+	mutex_init(&cryp->lock);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	cryp->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(cryp->regs)) {
+		dev_err(dev, "Cannot map CRYP IO\n");
+		return PTR_ERR(cryp->regs);
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "Cannot get IRQ resource\n");
+		return irq;
+	}
+
+	ret = devm_request_threaded_irq(dev, irq, stm32_cryp_irq,
+					stm32_cryp_irq_thread, IRQF_ONESHOT,
+					dev_name(dev), cryp);
+	if (ret) {
+		dev_err(dev, "Cannot grab IRQ\n");
+		return ret;
+	}
+
+	cryp->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(cryp->clk)) {
+		dev_err(dev, "Could not get clock\n");
+		return PTR_ERR(cryp->clk);
+	}
+
+	ret = clk_prepare_enable(cryp->clk);
+	if (ret) {
+		dev_err(cryp->dev, "Failed to enable clock\n");
+		return ret;
+	}
+
+	rst = devm_reset_control_get(dev, NULL);
+	if (!IS_ERR(rst)) {
+		reset_control_assert(rst);
+		udelay(2);
+		reset_control_deassert(rst);
+	}
+
+	platform_set_drvdata(pdev, cryp);
+
+	spin_lock(&cryp_list.lock);
+	list_add(&cryp->list, &cryp_list.dev_list);
+	spin_unlock(&cryp_list.lock);
+
+	/* Initialize crypto engine */
+	cryp->engine = crypto_engine_alloc_init(dev, 1);
+	if (!cryp->engine) {
+		dev_err(dev, "Could not init crypto engine\n");
+		ret = -ENOMEM;
+		goto err_engine1;
+	}
+
+	cryp->engine->prepare_cipher_request = stm32_cryp_prepare_cipher_req;
+	cryp->engine->cipher_one_request = stm32_cryp_cipher_one_req;
+
+	ret = crypto_engine_start(cryp->engine);
+	if (ret) {
+		dev_err(dev, "Could not start crypto engine\n");
+		goto err_engine2;
+	}
+
+	ret = crypto_register_algs(crypto_algs, ARRAY_SIZE(crypto_algs));
+	if (ret) {
+		dev_err(dev, "Could not register algs\n");
+		goto err_algs;
+	}
+
+	dev_info(dev, "Initialized\n");
+
+	return 0;
+
+err_algs:
+err_engine2:
+	crypto_engine_exit(cryp->engine);
+err_engine1:
+	spin_lock(&cryp_list.lock);
+	list_del(&cryp->list);
+	spin_unlock(&cryp_list.lock);
+
+	clk_disable_unprepare(cryp->clk);
+
+	return ret;
+}
+
+static int stm32_cryp_remove(struct platform_device *pdev)
+{
+	struct stm32_cryp *cryp = platform_get_drvdata(pdev);
+
+	if (!cryp)
+		return -ENODEV;
+
+	crypto_unregister_algs(crypto_algs, ARRAY_SIZE(crypto_algs));
+
+	crypto_engine_exit(cryp->engine);
+
+	spin_lock(&cryp_list.lock);
+	list_del(&cryp->list);
+	spin_unlock(&cryp_list.lock);
+
+	clk_disable_unprepare(cryp->clk);
+
+	return 0;
+}
+
+static struct platform_driver stm32_cryp_driver = {
+	.probe  = stm32_cryp_probe,
+	.remove = stm32_cryp_remove,
+	.driver = {
+		.name           = DRIVER_NAME,
+		.of_match_table = stm32_dt_ids,
+	},
+};
+
+module_platform_driver(stm32_cryp_driver);
+
+MODULE_AUTHOR("Fabien Dessenne <fabien.dessenne@st.com>");
+MODULE_DESCRIPTION("STMicrolectronics STM32 CRYP hardware driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 6f9fa6e..c48a752 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -58,6 +58,13 @@
 extern struct list_head adapter_list;
 extern struct mutex uld_mutex;
 
+/* Suspend an Ethernet Tx queue with fewer available descriptors than this.
+ * This is the same as calc_tx_descs() for a TSO packet with
+ * nr_frags == MAX_SKB_FRAGS.
+ */
+#define ETHTXQ_STOP_THRES \
+	(1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
+
 enum {
 	MAX_NPORTS	= 4,     /* max # of ports */
 	SERNUM_LEN	= 24,    /* Serial # length */
@@ -564,6 +571,7 @@
 
 enum {
 	ULP_CRYPTO_LOOKASIDE = 1 << 0,
+	ULP_CRYPTO_IPSEC_INLINE = 1 << 1,
 };
 
 struct rx_sw_desc;
@@ -968,6 +976,11 @@
 	SCHED_CLASS_RATEMODE_ABS = 1,   /* Kb/s */
 };
 
+struct tx_sw_desc {                /* SW state per Tx descriptor */
+	struct sk_buff *skb;
+	struct ulptx_sgl *sgl;
+};
+
 /* Support for "sched_queue" command to allow one or more NIC TX Queues
  * to be bound to a TX Scheduling Class.
  */
@@ -1700,4 +1713,14 @@
 void free_tx_desc(struct adapter *adap, struct sge_txq *q,
 		  unsigned int n, bool unmap);
 void free_txq(struct adapter *adap, struct sge_txq *q);
+void cxgb4_reclaim_completed_tx(struct adapter *adap,
+				struct sge_txq *q, bool unmap);
+int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb,
+		  dma_addr_t *addr);
+void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
+			 void *pos);
+void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
+		     struct ulptx_sgl *sgl, u64 *end, unsigned int start,
+		     const dma_addr_t *addr);
+void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n);
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 917663b..cf47183 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3096,6 +3096,8 @@
 		   atomic_read(&adap->chcr_stats.error));
 	seq_printf(seq, "Fallback: %10u \n",
 		   atomic_read(&adap->chcr_stats.fallback));
+	seq_printf(seq, "IPSec PDU: %10u\n",
+		   atomic_read(&adap->chcr_stats.ipsec_cnt));
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 6f900ff..05a4abf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4096,7 +4096,7 @@
 		} else {
 			adap->vres.ncrypto_fc = val[0];
 		}
-		adap->params.crypto |= ULP_CRYPTO_LOOKASIDE;
+		adap->params.crypto = ntohs(caps_cmd.cryptocaps);
 		adap->num_uld += 1;
 	}
 #undef FW_PARAM_PFVF
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 71a315b..6b5fea4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -637,6 +637,7 @@
 	lld->nchan = adap->params.nports;
 	lld->nports = adap->params.nports;
 	lld->wr_cred = adap->params.ofldq_wr_cred;
+	lld->crypto = adap->params.crypto;
 	lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
 	lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
 	lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 08e709a..1d37672 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -297,6 +297,7 @@
 	atomic_t complete;
 	atomic_t error;
 	atomic_t fallback;
+	atomic_t ipsec_cnt;
 };
 
 #define OCQ_WIN_OFFSET(pdev, vres) \
@@ -322,6 +323,7 @@
 	unsigned char wr_cred;               /* WR 16-byte credits */
 	unsigned char adapter_type;          /* type of adapter */
 	unsigned char fw_api_ver;            /* FW API version */
+	unsigned char crypto;                /* crypto support */
 	unsigned int fw_vers;                /* FW version */
 	unsigned int iscsi_iolen;            /* iSCSI max I/O length */
 	unsigned int cclk_ps;                /* Core clock period in psec */
@@ -370,6 +372,7 @@
 			      struct t4_lro_mgr *lro_mgr,
 			      struct napi_struct *napi);
 	void (*lro_flush)(struct t4_lro_mgr *);
+	int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
 };
 
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 922f2f9..6c7b0ac 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -41,6 +41,7 @@
 #include <linux/jiffies.h>
 #include <linux/prefetch.h>
 #include <linux/export.h>
+#include <net/xfrm.h>
 #include <net/ipv6.h>
 #include <net/tcp.h>
 #include <net/busy_poll.h>
@@ -53,6 +54,7 @@
 #include "t4_msg.h"
 #include "t4fw_api.h"
 #include "cxgb4_ptp.h"
+#include "cxgb4_uld.h"
 
 /*
  * Rx buffer size.  We use largish buffers if possible but settle for single
@@ -110,14 +112,6 @@
 #define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
 
 /*
- * Suspend an Ethernet Tx queue with fewer available descriptors than this.
- * This is the same as calc_tx_descs() for a TSO packet with
- * nr_frags == MAX_SKB_FRAGS.
- */
-#define ETHTXQ_STOP_THRES \
-	(1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
-
-/*
  * Suspension threshold for non-Ethernet Tx queues.  We require enough room
  * for a full sized WR.
  */
@@ -134,11 +128,6 @@
  */
 #define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN
 
-struct tx_sw_desc {                /* SW state per Tx descriptor */
-	struct sk_buff *skb;
-	struct ulptx_sgl *sgl;
-};
-
 struct rx_sw_desc {                /* SW state per Rx descriptor */
 	struct page *page;
 	dma_addr_t dma_addr;
@@ -248,8 +237,8 @@
 	return fl->avail - fl->pend_cred <= s->fl_starve_thres;
 }
 
-static int map_skb(struct device *dev, const struct sk_buff *skb,
-		   dma_addr_t *addr)
+int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb,
+		  dma_addr_t *addr)
 {
 	const skb_frag_t *fp, *end;
 	const struct skb_shared_info *si;
@@ -277,6 +266,7 @@
 out_err:
 	return -ENOMEM;
 }
+EXPORT_SYMBOL(cxgb4_map_skb);
 
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 static void unmap_skb(struct device *dev, const struct sk_buff *skb,
@@ -411,7 +401,7 @@
 }
 
 /**
- *	reclaim_completed_tx - reclaims completed Tx descriptors
+ *	cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
  *	@adap: the adapter
  *	@q: the Tx queue to reclaim completed descriptors from
  *	@unmap: whether the buffers should be unmapped for DMA
@@ -420,7 +410,7 @@
  *	and frees the associated buffers if possible.  Called with the Tx
  *	queue locked.
  */
-static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
+inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
 					bool unmap)
 {
 	int avail = reclaimable(q);
@@ -437,6 +427,7 @@
 		q->in_use -= avail;
 	}
 }
+EXPORT_SYMBOL(cxgb4_reclaim_completed_tx);
 
 static inline int get_buf_size(struct adapter *adapter,
 			       const struct rx_sw_desc *d)
@@ -833,7 +824,7 @@
 }
 
 /**
- *	write_sgl - populate a scatter/gather list for a packet
+ *	cxgb4_write_sgl - populate a scatter/gather list for a packet
  *	@skb: the packet
  *	@q: the Tx queue we are writing into
  *	@sgl: starting location for writing the SGL
@@ -849,9 +840,9 @@
  *	right after the end of the SGL but does not account for any potential
  *	wrap around, i.e., @end > @sgl.
  */
-static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
-		      struct ulptx_sgl *sgl, u64 *end, unsigned int start,
-		      const dma_addr_t *addr)
+void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
+		     struct ulptx_sgl *sgl, u64 *end, unsigned int start,
+		     const dma_addr_t *addr)
 {
 	unsigned int i, len;
 	struct ulptx_sge_pair *to;
@@ -903,6 +894,7 @@
 	if ((uintptr_t)end & 8)           /* 0-pad to multiple of 16 */
 		*end = 0;
 }
+EXPORT_SYMBOL(cxgb4_write_sgl);
 
 /* This function copies 64 byte coalesced work request to
  * memory mapped BAR2 space. For coalesced WR SGE fetches
@@ -921,14 +913,14 @@
 }
 
 /**
- *	ring_tx_db - check and potentially ring a Tx queue's doorbell
+ *	cxgb4_ring_tx_db - check and potentially ring a Tx queue's doorbell
  *	@adap: the adapter
  *	@q: the Tx queue
  *	@n: number of new descriptors to give to HW
  *
  *	Ring the doorbel for a Tx queue.
  */
-static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
+inline void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
 {
 	/* Make sure that all writes to the TX Descriptors are committed
 	 * before we tell the hardware about them.
@@ -995,9 +987,10 @@
 		wmb();
 	}
 }
+EXPORT_SYMBOL(cxgb4_ring_tx_db);
 
 /**
- *	inline_tx_skb - inline a packet's data into Tx descriptors
+ *	cxgb4_inline_tx_skb - inline a packet's data into Tx descriptors
  *	@skb: the packet
  *	@q: the Tx queue where the packet will be inlined
  *	@pos: starting position in the Tx queue where to inline the packet
@@ -1007,8 +1000,8 @@
  *	Most of the complexity of this operation is dealing with wrap arounds
  *	in the middle of the packet we want to inline.
  */
-static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
-			  void *pos)
+void cxgb4_inline_tx_skb(const struct sk_buff *skb,
+			 const struct sge_txq *q, void *pos)
 {
 	u64 *p;
 	int left = (void *)q->stat - pos;
@@ -1030,6 +1023,7 @@
 	if ((uintptr_t)p & 8)
 		*p = 0;
 }
+EXPORT_SYMBOL(cxgb4_inline_tx_skb);
 
 static void *inline_tx_skb_header(const struct sk_buff *skb,
 				  const struct sge_txq *q,  void *pos,
@@ -1199,6 +1193,12 @@
 
 	pi = netdev_priv(dev);
 	adap = pi->adapter;
+	ssi = skb_shinfo(skb);
+#ifdef CONFIG_CHELSIO_IPSEC_INLINE
+	if (xfrm_offload(skb) && !ssi->gso_size)
+		return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
+#endif /* CHELSIO_IPSEC_INLINE */
+
 	qidx = skb_get_queue_mapping(skb);
 	if (ptp_enabled) {
 		spin_lock(&adap->ptp_lock);
@@ -1215,7 +1215,7 @@
 	}
 	skb_tx_timestamp(skb);
 
-	reclaim_completed_tx(adap, &q->q, true);
+	cxgb4_reclaim_completed_tx(adap, &q->q, true);
 	cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
 
 #ifdef CONFIG_CHELSIO_T4_FCOE
@@ -1245,7 +1245,7 @@
 		immediate = true;
 
 	if (!immediate &&
-	    unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) {
+	    unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) {
 		q->mapping_err++;
 		if (ptp_enabled)
 			spin_unlock(&adap->ptp_lock);
@@ -1264,7 +1264,6 @@
 	end = (u64 *)wr + flits;
 
 	len = immediate ? skb->len : 0;
-	ssi = skb_shinfo(skb);
 	if (ssi->gso_size) {
 		struct cpl_tx_pkt_lso *lso = (void *)wr;
 		bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
@@ -1341,13 +1340,13 @@
 	cpl->ctrl1 = cpu_to_be64(cntrl);
 
 	if (immediate) {
-		inline_tx_skb(skb, &q->q, cpl + 1);
+		cxgb4_inline_tx_skb(skb, &q->q, cpl + 1);
 		dev_consume_skb_any(skb);
 	} else {
 		int last_desc;
 
-		write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), end, 0,
-			  addr);
+		cxgb4_write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1),
+				end, 0, addr);
 		skb_orphan(skb);
 
 		last_desc = q->q.pidx + ndesc - 1;
@@ -1359,7 +1358,7 @@
 
 	txq_advance(&q->q, ndesc);
 
-	ring_tx_db(adap, &q->q, ndesc);
+	cxgb4_ring_tx_db(adap, &q->q, ndesc);
 	if (ptp_enabled)
 		spin_unlock(&adap->ptp_lock);
 	return NETDEV_TX_OK;
@@ -1369,9 +1368,9 @@
  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
  *	@q: the SGE control Tx queue
  *
- *	This is a variant of reclaim_completed_tx() that is used for Tx queues
- *	that send only immediate data (presently just the control queues) and
- *	thus do not have any sk_buffs to release.
+ *	This is a variant of cxgb4_reclaim_completed_tx() that is used
+ *	for Tx queues that send only immediate data (presently just
+ *	the control queues) and	thus do not have any sk_buffs to release.
  */
 static inline void reclaim_completed_tx_imm(struct sge_txq *q)
 {
@@ -1446,13 +1445,13 @@
 	}
 
 	wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
-	inline_tx_skb(skb, &q->q, wr);
+	cxgb4_inline_tx_skb(skb, &q->q, wr);
 
 	txq_advance(&q->q, ndesc);
 	if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES))
 		ctrlq_check_stop(q, wr);
 
-	ring_tx_db(q->adap, &q->q, ndesc);
+	cxgb4_ring_tx_db(q->adap, &q->q, ndesc);
 	spin_unlock(&q->sendq.lock);
 
 	kfree_skb(skb);
@@ -1487,7 +1486,7 @@
 		txq_advance(&q->q, ndesc);
 		spin_unlock(&q->sendq.lock);
 
-		inline_tx_skb(skb, &q->q, wr);
+		cxgb4_inline_tx_skb(skb, &q->q, wr);
 		kfree_skb(skb);
 
 		if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
@@ -1500,14 +1499,15 @@
 			}
 		}
 		if (written > 16) {
-			ring_tx_db(q->adap, &q->q, written);
+			cxgb4_ring_tx_db(q->adap, &q->q, written);
 			written = 0;
 		}
 		spin_lock(&q->sendq.lock);
 	}
 	q->full = 0;
-ringdb: if (written)
-		ring_tx_db(q->adap, &q->q, written);
+ringdb:
+	if (written)
+		cxgb4_ring_tx_db(q->adap, &q->q, written);
 	spin_unlock(&q->sendq.lock);
 }
 
@@ -1650,7 +1650,7 @@
 		 */
 		spin_unlock(&q->sendq.lock);
 
-		reclaim_completed_tx(q->adap, &q->q, false);
+		cxgb4_reclaim_completed_tx(q->adap, &q->q, false);
 
 		flits = skb->priority;                /* previously saved */
 		ndesc = flits_to_desc(flits);
@@ -1661,9 +1661,9 @@
 
 		pos = (u64 *)&q->q.desc[q->q.pidx];
 		if (is_ofld_imm(skb))
-			inline_tx_skb(skb, &q->q, pos);
-		else if (map_skb(q->adap->pdev_dev, skb,
-				 (dma_addr_t *)skb->head)) {
+			cxgb4_inline_tx_skb(skb, &q->q, pos);
+		else if (cxgb4_map_skb(q->adap->pdev_dev, skb,
+				       (dma_addr_t *)skb->head)) {
 			txq_stop_maperr(q);
 			spin_lock(&q->sendq.lock);
 			break;
@@ -1694,9 +1694,9 @@
 				pos = (void *)txq->desc;
 			}
 
-			write_sgl(skb, &q->q, (void *)pos,
-				  end, hdr_len,
-				  (dma_addr_t *)skb->head);
+			cxgb4_write_sgl(skb, &q->q, (void *)pos,
+					end, hdr_len,
+					(dma_addr_t *)skb->head);
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 			skb->dev = q->adap->port[0];
 			skb->destructor = deferred_unmap_destructor;
@@ -1710,7 +1710,7 @@
 		txq_advance(&q->q, ndesc);
 		written += ndesc;
 		if (unlikely(written > 32)) {
-			ring_tx_db(q->adap, &q->q, written);
+			cxgb4_ring_tx_db(q->adap, &q->q, written);
 			written = 0;
 		}
 
@@ -1725,7 +1725,7 @@
 			kfree_skb(skb);
 	}
 	if (likely(written))
-		ring_tx_db(q->adap, &q->q, written);
+		cxgb4_ring_tx_db(q->adap, &q->q, written);
 
 	/*Indicate that no thread is processing the Pending Send Queue
 	 * currently.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 57eb4ad..be36583 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -513,6 +513,13 @@
 	u64 cookie;
 };
 
+#define FW_ULPTX_WR_DATA_S      28
+#define FW_ULPTX_WR_DATA_M      0x1
+#define FW_ULPTX_WR_DATA_V(x)   ((x) << FW_ULPTX_WR_DATA_S)
+#define FW_ULPTX_WR_DATA_G(x)   \
+	(((x) >> FW_ULPTX_WR_DATA_S) & FW_ULPTX_WR_DATA_M)
+#define FW_ULPTX_WR_DATA_F      FW_ULPTX_WR_DATA_V(1U)
+
 struct fw_tp_wr {
 	__be32 op_to_immdlen;
 	__be32 flowid_len16;
diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
index caaa470..b83d660 100644
--- a/include/crypto/chacha20.h
+++ b/include/crypto/chacha20.h
@@ -13,12 +13,13 @@
 #define CHACHA20_IV_SIZE	16
 #define CHACHA20_KEY_SIZE	32
 #define CHACHA20_BLOCK_SIZE	64
+#define CHACHA20_BLOCK_WORDS	(CHACHA20_BLOCK_SIZE / sizeof(u32))
 
 struct chacha20_ctx {
 	u32 key[8];
 };
 
-void chacha20_block(u32 *state, void *stream);
+void chacha20_block(u32 *state, u32 *stream);
 void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
 int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			   unsigned int keysize);
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index f0b44c1..c2bae8d 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -82,6 +82,14 @@
 			    struct ahash_instance *inst);
 void ahash_free_instance(struct crypto_instance *inst);
 
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+		    unsigned int keylen);
+
+static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
+{
+	return alg->setkey != shash_no_setkey;
+}
+
 int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
 			    struct hash_alg_common *alg,
 			    struct crypto_instance *inst);
diff --git a/include/crypto/null.h b/include/crypto/null.h
index 5757c0a..15aeef6 100644
--- a/include/crypto/null.h
+++ b/include/crypto/null.h
@@ -12,14 +12,4 @@
 struct crypto_skcipher *crypto_get_default_null_skcipher(void);
 void crypto_put_default_null_skcipher(void);
 
-static inline struct crypto_skcipher *crypto_get_default_null_skcipher2(void)
-{
-	return crypto_get_default_null_skcipher();
-}
-
-static inline void crypto_put_default_null_skcipher2(void)
-{
-	crypto_put_default_null_skcipher();
-}
-
 #endif
diff --git a/lib/chacha20.c b/lib/chacha20.c
index 250ceed..29d3801 100644
--- a/lib/chacha20.c
+++ b/lib/chacha20.c
@@ -21,7 +21,7 @@
 	return (v << n) | (v >> (sizeof(v) * 8 - n));
 }
 
-extern void chacha20_block(u32 *state, void *stream)
+void chacha20_block(u32 *state, u32 *stream)
 {
 	u32 x[16], *out = stream;
 	int i;