Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index cfb0010..1a58ad8 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -12,6 +12,7 @@
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
+obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
 
 obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
 
@@ -24,3 +25,5 @@
 salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
 
 aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
+
+ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index eb0566e..20bb0e1a 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -16,6 +16,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/inst.h>
 
 .text
 
@@ -122,103 +123,72 @@
 	movups 0x10(%rsi), %xmm2	# other user key
 	movaps %xmm2, (%rcx)
 	add $0x10, %rcx
-	# aeskeygenassist $0x1, %xmm2, %xmm1	# round 1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+	AESKEYGENASSIST 0x1 %xmm2 %xmm1		# round 1
 	call _key_expansion_256a
-	# aeskeygenassist $0x1, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+	AESKEYGENASSIST 0x1 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x2, %xmm2, %xmm1	# round 2
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+	AESKEYGENASSIST 0x2 %xmm2 %xmm1		# round 2
 	call _key_expansion_256a
-	# aeskeygenassist $0x2, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+	AESKEYGENASSIST 0x2 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x4, %xmm2, %xmm1	# round 3
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+	AESKEYGENASSIST 0x4 %xmm2 %xmm1		# round 3
 	call _key_expansion_256a
-	# aeskeygenassist $0x4, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+	AESKEYGENASSIST 0x4 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x8, %xmm2, %xmm1	# round 4
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+	AESKEYGENASSIST 0x8 %xmm2 %xmm1		# round 4
 	call _key_expansion_256a
-	# aeskeygenassist $0x8, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+	AESKEYGENASSIST 0x8 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x10, %xmm2, %xmm1	# round 5
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+	AESKEYGENASSIST 0x10 %xmm2 %xmm1	# round 5
 	call _key_expansion_256a
-	# aeskeygenassist $0x10, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+	AESKEYGENASSIST 0x10 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x20, %xmm2, %xmm1	# round 6
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+	AESKEYGENASSIST 0x20 %xmm2 %xmm1	# round 6
 	call _key_expansion_256a
-	# aeskeygenassist $0x20, %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+	AESKEYGENASSIST 0x20 %xmm0 %xmm1
 	call _key_expansion_256b
-	# aeskeygenassist $0x40, %xmm2, %xmm1	# round 7
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+	AESKEYGENASSIST 0x40 %xmm2 %xmm1	# round 7
 	call _key_expansion_256a
 	jmp .Ldec_key
 .Lenc_key192:
 	movq 0x10(%rsi), %xmm2		# other user key
-	# aeskeygenassist $0x1, %xmm2, %xmm1	# round 1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+	AESKEYGENASSIST 0x1 %xmm2 %xmm1		# round 1
 	call _key_expansion_192a
-	# aeskeygenassist $0x2, %xmm2, %xmm1	# round 2
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+	AESKEYGENASSIST 0x2 %xmm2 %xmm1		# round 2
 	call _key_expansion_192b
-	# aeskeygenassist $0x4, %xmm2, %xmm1	# round 3
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+	AESKEYGENASSIST 0x4 %xmm2 %xmm1		# round 3
 	call _key_expansion_192a
-	# aeskeygenassist $0x8, %xmm2, %xmm1	# round 4
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+	AESKEYGENASSIST 0x8 %xmm2 %xmm1		# round 4
 	call _key_expansion_192b
-	# aeskeygenassist $0x10, %xmm2, %xmm1	# round 5
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+	AESKEYGENASSIST 0x10 %xmm2 %xmm1	# round 5
 	call _key_expansion_192a
-	# aeskeygenassist $0x20, %xmm2, %xmm1	# round 6
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+	AESKEYGENASSIST 0x20 %xmm2 %xmm1	# round 6
 	call _key_expansion_192b
-	# aeskeygenassist $0x40, %xmm2, %xmm1	# round 7
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+	AESKEYGENASSIST 0x40 %xmm2 %xmm1	# round 7
 	call _key_expansion_192a
-	# aeskeygenassist $0x80, %xmm2, %xmm1	# round 8
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
+	AESKEYGENASSIST 0x80 %xmm2 %xmm1	# round 8
 	call _key_expansion_192b
 	jmp .Ldec_key
 .Lenc_key128:
-	# aeskeygenassist $0x1, %xmm0, %xmm1	# round 1
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+	AESKEYGENASSIST 0x1 %xmm0 %xmm1		# round 1
 	call _key_expansion_128
-	# aeskeygenassist $0x2, %xmm0, %xmm1	# round 2
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+	AESKEYGENASSIST 0x2 %xmm0 %xmm1		# round 2
 	call _key_expansion_128
-	# aeskeygenassist $0x4, %xmm0, %xmm1	# round 3
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+	AESKEYGENASSIST 0x4 %xmm0 %xmm1		# round 3
 	call _key_expansion_128
-	# aeskeygenassist $0x8, %xmm0, %xmm1	# round 4
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+	AESKEYGENASSIST 0x8 %xmm0 %xmm1		# round 4
 	call _key_expansion_128
-	# aeskeygenassist $0x10, %xmm0, %xmm1	# round 5
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+	AESKEYGENASSIST 0x10 %xmm0 %xmm1	# round 5
 	call _key_expansion_128
-	# aeskeygenassist $0x20, %xmm0, %xmm1	# round 6
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+	AESKEYGENASSIST 0x20 %xmm0 %xmm1	# round 6
 	call _key_expansion_128
-	# aeskeygenassist $0x40, %xmm0, %xmm1	# round 7
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
+	AESKEYGENASSIST 0x40 %xmm0 %xmm1	# round 7
 	call _key_expansion_128
-	# aeskeygenassist $0x80, %xmm0, %xmm1	# round 8
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
+	AESKEYGENASSIST 0x80 %xmm0 %xmm1	# round 8
 	call _key_expansion_128
-	# aeskeygenassist $0x1b, %xmm0, %xmm1	# round 9
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
+	AESKEYGENASSIST 0x1b %xmm0 %xmm1	# round 9
 	call _key_expansion_128
-	# aeskeygenassist $0x36, %xmm0, %xmm1	# round 10
-	.byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
+	AESKEYGENASSIST 0x36 %xmm0 %xmm1	# round 10
 	call _key_expansion_128
 .Ldec_key:
 	sub $0x10, %rcx
@@ -231,8 +201,7 @@
 .align 4
 .Ldec_key_loop:
 	movaps (%rdi), %xmm0
-	# aesimc %xmm0, %xmm1
-	.byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
+	AESIMC %xmm0 %xmm1
 	movaps %xmm1, (%rsi)
 	add $0x10, %rdi
 	sub $0x10, %rsi
@@ -274,51 +243,37 @@
 	je .Lenc192
 	add $0x20, TKEYP
 	movaps -0x60(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps -0x50(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 .align 4
 .Lenc192:
 	movaps -0x40(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps -0x30(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 .align 4
 .Lenc128:
 	movaps -0x20(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps -0x10(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps (TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x10(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x20(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x30(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x40(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x50(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x60(TKEYP), KEY
-	# aesenc KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
+	AESENC KEY STATE
 	movaps 0x70(TKEYP), KEY
-	# aesenclast KEY, STATE	# last round
-	.byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
+	AESENCLAST KEY STATE
 	ret
 
 /*
@@ -353,135 +308,79 @@
 	je .L4enc192
 	add $0x20, TKEYP
 	movaps -0x60(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps -0x50(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 #.align 4
 .L4enc192:
 	movaps -0x40(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps -0x30(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 #.align 4
 .L4enc128:
 	movaps -0x20(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps -0x10(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps (TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x10(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x20(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x30(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x40(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x50(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x60(TKEYP), KEY
-	# aesenc KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xc2
-	# aesenc KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xe2
-	# aesenc KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xea
-	# aesenc KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdc, 0xf2
+	AESENC KEY STATE1
+	AESENC KEY STATE2
+	AESENC KEY STATE3
+	AESENC KEY STATE4
 	movaps 0x70(TKEYP), KEY
-	# aesenclast KEY, STATE1	# last round
-	.byte 0x66, 0x0f, 0x38, 0xdd, 0xc2
-	# aesenclast KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdd, 0xe2
-	# aesenclast KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdd, 0xea
-	# aesenclast KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdd, 0xf2
+	AESENCLAST KEY STATE1		# last round
+	AESENCLAST KEY STATE2
+	AESENCLAST KEY STATE3
+	AESENCLAST KEY STATE4
 	ret
 
 /*
@@ -518,51 +417,37 @@
 	je .Ldec192
 	add $0x20, TKEYP
 	movaps -0x60(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps -0x50(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 .align 4
 .Ldec192:
 	movaps -0x40(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps -0x30(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 .align 4
 .Ldec128:
 	movaps -0x20(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps -0x10(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps (TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x10(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x20(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x30(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x40(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x50(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x60(TKEYP), KEY
-	# aesdec KEY, STATE
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
+	AESDEC KEY STATE
 	movaps 0x70(TKEYP), KEY
-	# aesdeclast KEY, STATE		# last round
-	.byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
+	AESDECLAST KEY STATE
 	ret
 
 /*
@@ -597,135 +482,79 @@
 	je .L4dec192
 	add $0x20, TKEYP
 	movaps -0x60(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps -0x50(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 .align 4
 .L4dec192:
 	movaps -0x40(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps -0x30(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 .align 4
 .L4dec128:
 	movaps -0x20(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps -0x10(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps (TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x10(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x20(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x30(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x40(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x50(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x60(TKEYP), KEY
-	# aesdec KEY, STATE1
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xc2
-	# aesdec KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xe2
-	# aesdec KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xea
-	# aesdec KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xde, 0xf2
+	AESDEC KEY STATE1
+	AESDEC KEY STATE2
+	AESDEC KEY STATE3
+	AESDEC KEY STATE4
 	movaps 0x70(TKEYP), KEY
-	# aesdeclast KEY, STATE1	# last round
-	.byte 0x66, 0x0f, 0x38, 0xdf, 0xc2
-	# aesdeclast KEY, STATE2
-	.byte 0x66, 0x0f, 0x38, 0xdf, 0xe2
-	# aesdeclast KEY, STATE3
-	.byte 0x66, 0x0f, 0x38, 0xdf, 0xea
-	# aesdeclast KEY, STATE4
-	.byte 0x66, 0x0f, 0x38, 0xdf, 0xf2
+	AESDECLAST KEY STATE1		# last round
+	AESDECLAST KEY STATE2
+	AESDECLAST KEY STATE3
+	AESDECLAST KEY STATE4
 	ret
 
 /*
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
new file mode 100644
index 0000000..1eb7f90
--- /dev/null
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -0,0 +1,157 @@
+/*
+ * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
+ * instructions. This file contains accelerated part of ghash
+ * implementation. More information about PCLMULQDQ can be found at:
+ *
+ * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
+ *
+ * Copyright (c) 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *	     Vinodh Gopal
+ *	     Erdinc Ozturk
+ *	     Deniz Karakoyunlu
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/inst.h>
+
+.data
+
+.align 16
+.Lbswap_mask:
+	.octa 0x000102030405060708090a0b0c0d0e0f
+.Lpoly:
+	.octa 0xc2000000000000000000000000000001
+.Ltwo_one:
+	.octa 0x00000001000000000000000000000001
+
+#define DATA	%xmm0
+#define SHASH	%xmm1
+#define T1	%xmm2
+#define T2	%xmm3
+#define T3	%xmm4
+#define BSWAP	%xmm5
+#define IN1	%xmm6
+
+.text
+
+/*
+ * __clmul_gf128mul_ble:	internal ABI
+ * input:
+ *	DATA:			operand1
+ *	SHASH:			operand2, hash_key << 1 mod poly
+ * output:
+ *	DATA:			operand1 * operand2 mod poly
+ * changed:
+ *	T1
+ *	T2
+ *	T3
+ */
+__clmul_gf128mul_ble:
+	movaps DATA, T1
+	pshufd $0b01001110, DATA, T2
+	pshufd $0b01001110, SHASH, T3
+	pxor DATA, T2
+	pxor SHASH, T3
+
+	PCLMULQDQ 0x00 SHASH DATA	# DATA = a0 * b0
+	PCLMULQDQ 0x11 SHASH T1		# T1 = a1 * b1
+	PCLMULQDQ 0x00 T3 T2		# T2 = (a1 + a0) * (b1 + b0)
+	pxor DATA, T2
+	pxor T1, T2			# T2 = a0 * b1 + a1 * b0
+
+	movaps T2, T3
+	pslldq $8, T3
+	psrldq $8, T2
+	pxor T3, DATA
+	pxor T2, T1			# <T1:DATA> is result of
+					# carry-less multiplication
+
+	# first phase of the reduction
+	movaps DATA, T3
+	psllq $1, T3
+	pxor DATA, T3
+	psllq $5, T3
+	pxor DATA, T3
+	psllq $57, T3
+	movaps T3, T2
+	pslldq $8, T2
+	psrldq $8, T3
+	pxor T2, DATA
+	pxor T3, T1
+
+	# second phase of the reduction
+	movaps DATA, T2
+	psrlq $5, T2
+	pxor DATA, T2
+	psrlq $1, T2
+	pxor DATA, T2
+	psrlq $1, T2
+	pxor T2, T1
+	pxor T1, DATA
+	ret
+
+/* void clmul_ghash_mul(char *dst, const be128 *shash) */
+ENTRY(clmul_ghash_mul)
+	movups (%rdi), DATA
+	movups (%rsi), SHASH
+	movaps .Lbswap_mask, BSWAP
+	PSHUFB_XMM BSWAP DATA
+	call __clmul_gf128mul_ble
+	PSHUFB_XMM BSWAP DATA
+	movups DATA, (%rdi)
+	ret
+
+/*
+ * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
+ *			   const be128 *shash);
+ */
+ENTRY(clmul_ghash_update)
+	cmp $16, %rdx
+	jb .Lupdate_just_ret	# check length
+	movaps .Lbswap_mask, BSWAP
+	movups (%rdi), DATA
+	movups (%rcx), SHASH
+	PSHUFB_XMM BSWAP DATA
+.align 4
+.Lupdate_loop:
+	movups (%rsi), IN1
+	PSHUFB_XMM BSWAP IN1
+	pxor IN1, DATA
+	call __clmul_gf128mul_ble
+	sub $16, %rdx
+	add $16, %rsi
+	cmp $16, %rdx
+	jge .Lupdate_loop
+	PSHUFB_XMM BSWAP DATA
+	movups DATA, (%rdi)
+.Lupdate_just_ret:
+	ret
+
+/*
+ * void clmul_ghash_setkey(be128 *shash, const u8 *key);
+ *
+ * Calculate hash_key << 1 mod poly
+ */
+ENTRY(clmul_ghash_setkey)
+	movaps .Lbswap_mask, BSWAP
+	movups (%rsi), %xmm0
+	PSHUFB_XMM BSWAP %xmm0
+	movaps %xmm0, %xmm1
+	psllq $1, %xmm0
+	psrlq $63, %xmm1
+	movaps %xmm1, %xmm2
+	pslldq $8, %xmm1
+	psrldq $8, %xmm2
+	por %xmm1, %xmm0
+	# reduction
+	pshufd $0b00100100, %xmm2, %xmm1
+	pcmpeqd .Ltwo_one, %xmm1
+	pand .Lpoly, %xmm1
+	pxor %xmm1, %xmm0
+	movups %xmm0, (%rdi)
+	ret
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
new file mode 100644
index 0000000..cbcc8d8
--- /dev/null
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -0,0 +1,333 @@
+/*
+ * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
+ * instructions. This file contains glue code.
+ *
+ * Copyright (c) 2009 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/cryptd.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/hash.h>
+#include <asm/i387.h>
+
+#define GHASH_BLOCK_SIZE	16
+#define GHASH_DIGEST_SIZE	16
+
+void clmul_ghash_mul(char *dst, const be128 *shash);
+
+void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
+			const be128 *shash);
+
+void clmul_ghash_setkey(be128 *shash, const u8 *key);
+
+struct ghash_async_ctx {
+	struct cryptd_ahash *cryptd_tfm;
+};
+
+struct ghash_ctx {
+	be128 shash;
+};
+
+struct ghash_desc_ctx {
+	u8 buffer[GHASH_BLOCK_SIZE];
+	u32 bytes;
+};
+
+static int ghash_init(struct shash_desc *desc)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+	memset(dctx, 0, sizeof(*dctx));
+
+	return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+			const u8 *key, unsigned int keylen)
+{
+	struct ghash_ctx *ctx = crypto_shash_ctx(tfm);
+
+	if (keylen != GHASH_BLOCK_SIZE) {
+		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	clmul_ghash_setkey(&ctx->shash, key);
+
+	return 0;
+}
+
+static int ghash_update(struct shash_desc *desc,
+			 const u8 *src, unsigned int srclen)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+	u8 *dst = dctx->buffer;
+
+	kernel_fpu_begin();
+	if (dctx->bytes) {
+		int n = min(srclen, dctx->bytes);
+		u8 *pos = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+		dctx->bytes -= n;
+		srclen -= n;
+
+		while (n--)
+			*pos++ ^= *src++;
+
+		if (!dctx->bytes)
+			clmul_ghash_mul(dst, &ctx->shash);
+	}
+
+	clmul_ghash_update(dst, src, srclen, &ctx->shash);
+	kernel_fpu_end();
+
+	if (srclen & 0xf) {
+		src += srclen - (srclen & 0xf);
+		srclen &= 0xf;
+		dctx->bytes = GHASH_BLOCK_SIZE - srclen;
+		while (srclen--)
+			*dst++ ^= *src++;
+	}
+
+	return 0;
+}
+
+static void ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+{
+	u8 *dst = dctx->buffer;
+
+	if (dctx->bytes) {
+		u8 *tmp = dst + (GHASH_BLOCK_SIZE - dctx->bytes);
+
+		while (dctx->bytes--)
+			*tmp++ ^= 0;
+
+		kernel_fpu_begin();
+		clmul_ghash_mul(dst, &ctx->shash);
+		kernel_fpu_end();
+	}
+
+	dctx->bytes = 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
+	u8 *buf = dctx->buffer;
+
+	ghash_flush(ctx, dctx);
+	memcpy(dst, buf, GHASH_BLOCK_SIZE);
+
+	return 0;
+}
+
+static struct shash_alg ghash_alg = {
+	.digestsize	= GHASH_DIGEST_SIZE,
+	.init		= ghash_init,
+	.update		= ghash_update,
+	.final		= ghash_final,
+	.setkey		= ghash_setkey,
+	.descsize	= sizeof(struct ghash_desc_ctx),
+	.base		= {
+		.cra_name		= "__ghash",
+		.cra_driver_name	= "__ghash-pclmulqdqni",
+		.cra_priority		= 0,
+		.cra_flags		= CRYPTO_ALG_TYPE_SHASH,
+		.cra_blocksize		= GHASH_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct ghash_ctx),
+		.cra_module		= THIS_MODULE,
+		.cra_list		= LIST_HEAD_INIT(ghash_alg.base.cra_list),
+	},
+};
+
+static int ghash_async_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+	if (!irq_fpu_usable()) {
+		memcpy(cryptd_req, req, sizeof(*req));
+		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+		return crypto_ahash_init(cryptd_req);
+	} else {
+		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+		struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
+
+		desc->tfm = child;
+		desc->flags = req->base.flags;
+		return crypto_shash_init(desc);
+	}
+}
+
+static int ghash_async_update(struct ahash_request *req)
+{
+	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+
+	if (!irq_fpu_usable()) {
+		struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+		struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+		struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+		return crypto_ahash_update(cryptd_req);
+	} else {
+		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+		return shash_ahash_update(req, desc);
+	}
+}
+
+static int ghash_async_final(struct ahash_request *req)
+{
+	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+
+	if (!irq_fpu_usable()) {
+		struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+		struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+		struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+		memcpy(cryptd_req, req, sizeof(*req));
+		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+		return crypto_ahash_final(cryptd_req);
+	} else {
+		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+		return crypto_shash_final(desc, req->result);
+	}
+}
+
+static int ghash_async_digest(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ahash_request *cryptd_req = ahash_request_ctx(req);
+	struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+	if (!irq_fpu_usable()) {
+		memcpy(cryptd_req, req, sizeof(*req));
+		ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+		return crypto_ahash_digest(cryptd_req);
+	} else {
+		struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+		struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
+
+		desc->tfm = child;
+		desc->flags = req->base.flags;
+		return shash_ahash_digest(req, desc);
+	}
+}
+
+static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct crypto_ahash *child = &ctx->cryptd_tfm->base;
+	int err;
+
+	crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
+			       & CRYPTO_TFM_REQ_MASK);
+	err = crypto_ahash_setkey(child, key, keylen);
+	crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
+			       & CRYPTO_TFM_RES_MASK);
+
+	return 0;
+}
+
+static int ghash_async_init_tfm(struct crypto_tfm *tfm)
+{
+	struct cryptd_ahash *cryptd_tfm;
+	struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0);
+	if (IS_ERR(cryptd_tfm))
+		return PTR_ERR(cryptd_tfm);
+	ctx->cryptd_tfm = cryptd_tfm;
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct ahash_request) +
+				 crypto_ahash_reqsize(&cryptd_tfm->base));
+
+	return 0;
+}
+
+static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cryptd_free_ahash(ctx->cryptd_tfm);
+}
+
+static struct ahash_alg ghash_async_alg = {
+	.init		= ghash_async_init,
+	.update		= ghash_async_update,
+	.final		= ghash_async_final,
+	.setkey		= ghash_async_setkey,
+	.digest		= ghash_async_digest,
+	.halg = {
+		.digestsize	= GHASH_DIGEST_SIZE,
+		.base = {
+			.cra_name		= "ghash",
+			.cra_driver_name	= "ghash-clmulni",
+			.cra_priority		= 400,
+			.cra_flags		= CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+			.cra_blocksize		= GHASH_BLOCK_SIZE,
+			.cra_type		= &crypto_ahash_type,
+			.cra_module		= THIS_MODULE,
+			.cra_list		= LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list),
+			.cra_init		= ghash_async_init_tfm,
+			.cra_exit		= ghash_async_exit_tfm,
+		},
+	},
+};
+
+static int __init ghash_pclmulqdqni_mod_init(void)
+{
+	int err;
+
+	if (!cpu_has_pclmulqdq) {
+		printk(KERN_INFO "Intel PCLMULQDQ-NI instructions are not"
+		       " detected.\n");
+		return -ENODEV;
+	}
+
+	err = crypto_register_shash(&ghash_alg);
+	if (err)
+		goto err_out;
+	err = crypto_register_ahash(&ghash_async_alg);
+	if (err)
+		goto err_shash;
+
+	return 0;
+
+err_shash:
+	crypto_unregister_shash(&ghash_alg);
+err_out:
+	return err;
+}
+
+static void __exit ghash_pclmulqdqni_mod_exit(void)
+{
+	crypto_unregister_ahash(&ghash_async_alg);
+	crypto_unregister_shash(&ghash_alg);
+}
+
+module_init(ghash_pclmulqdqni_mod_init);
+module_exit(ghash_pclmulqdqni_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
+		   "acclerated by PCLMULQDQ-NI");
+MODULE_ALIAS("ghash");
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 9cfc88b..613700f 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -248,6 +248,7 @@
 #define cpu_has_x2apic		boot_cpu_has(X86_FEATURE_X2APIC)
 #define cpu_has_xsave		boot_cpu_has(X86_FEATURE_XSAVE)
 #define cpu_has_hypervisor	boot_cpu_has(X86_FEATURE_HYPERVISOR)
+#define cpu_has_pclmulqdq	boot_cpu_has(X86_FEATURE_PCLMULQDQ)
 
 #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
 # define cpu_has_invlpg		1
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
index 0b20bbb..ebfb8a9 100644
--- a/arch/x86/include/asm/i387.h
+++ b/arch/x86/include/asm/i387.h
@@ -10,6 +10,8 @@
 #ifndef _ASM_X86_I387_H
 #define _ASM_X86_I387_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/regset.h>
@@ -411,4 +413,9 @@
 	}
 }
 
+#endif /* __ASSEMBLY__ */
+
+#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5
+#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5
+
 #endif /* _ASM_X86_I387_H */
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
new file mode 100644
index 0000000..14cf526
--- /dev/null
+++ b/arch/x86/include/asm/inst.h
@@ -0,0 +1,150 @@
+/*
+ * Generate .byte code for some instructions not supported by old
+ * binutils.
+ */
+#ifndef X86_ASM_INST_H
+#define X86_ASM_INST_H
+
+#ifdef __ASSEMBLY__
+
+	.macro XMM_NUM opd xmm
+	.ifc \xmm,%xmm0
+	\opd = 0
+	.endif
+	.ifc \xmm,%xmm1
+	\opd = 1
+	.endif
+	.ifc \xmm,%xmm2
+	\opd = 2
+	.endif
+	.ifc \xmm,%xmm3
+	\opd = 3
+	.endif
+	.ifc \xmm,%xmm4
+	\opd = 4
+	.endif
+	.ifc \xmm,%xmm5
+	\opd = 5
+	.endif
+	.ifc \xmm,%xmm6
+	\opd = 6
+	.endif
+	.ifc \xmm,%xmm7
+	\opd = 7
+	.endif
+	.ifc \xmm,%xmm8
+	\opd = 8
+	.endif
+	.ifc \xmm,%xmm9
+	\opd = 9
+	.endif
+	.ifc \xmm,%xmm10
+	\opd = 10
+	.endif
+	.ifc \xmm,%xmm11
+	\opd = 11
+	.endif
+	.ifc \xmm,%xmm12
+	\opd = 12
+	.endif
+	.ifc \xmm,%xmm13
+	\opd = 13
+	.endif
+	.ifc \xmm,%xmm14
+	\opd = 14
+	.endif
+	.ifc \xmm,%xmm15
+	\opd = 15
+	.endif
+	.endm
+
+	.macro PFX_OPD_SIZE
+	.byte 0x66
+	.endm
+
+	.macro PFX_REX opd1 opd2
+	.if (\opd1 | \opd2) & 8
+	.byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1)
+	.endif
+	.endm
+
+	.macro MODRM mod opd1 opd2
+	.byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
+	.endm
+
+	.macro PSHUFB_XMM xmm1 xmm2
+	XMM_NUM pshufb_opd1 \xmm1
+	XMM_NUM pshufb_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX pshufb_opd1 pshufb_opd2
+	.byte 0x0f, 0x38, 0x00
+	MODRM 0xc0 pshufb_opd1 pshufb_opd2
+	.endm
+
+	.macro PCLMULQDQ imm8 xmm1 xmm2
+	XMM_NUM clmul_opd1 \xmm1
+	XMM_NUM clmul_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX clmul_opd1 clmul_opd2
+	.byte 0x0f, 0x3a, 0x44
+	MODRM 0xc0 clmul_opd1 clmul_opd2
+	.byte \imm8
+	.endm
+
+	.macro AESKEYGENASSIST rcon xmm1 xmm2
+	XMM_NUM aeskeygen_opd1 \xmm1
+	XMM_NUM aeskeygen_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aeskeygen_opd1 aeskeygen_opd2
+	.byte 0x0f, 0x3a, 0xdf
+	MODRM 0xc0 aeskeygen_opd1 aeskeygen_opd2
+	.byte \rcon
+	.endm
+
+	.macro AESIMC xmm1 xmm2
+	XMM_NUM aesimc_opd1 \xmm1
+	XMM_NUM aesimc_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aesimc_opd1 aesimc_opd2
+	.byte 0x0f, 0x38, 0xdb
+	MODRM 0xc0 aesimc_opd1 aesimc_opd2
+	.endm
+
+	.macro AESENC xmm1 xmm2
+	XMM_NUM aesenc_opd1 \xmm1
+	XMM_NUM aesenc_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aesenc_opd1 aesenc_opd2
+	.byte 0x0f, 0x38, 0xdc
+	MODRM 0xc0 aesenc_opd1 aesenc_opd2
+	.endm
+
+	.macro AESENCLAST xmm1 xmm2
+	XMM_NUM aesenclast_opd1 \xmm1
+	XMM_NUM aesenclast_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aesenclast_opd1 aesenclast_opd2
+	.byte 0x0f, 0x38, 0xdd
+	MODRM 0xc0 aesenclast_opd1 aesenclast_opd2
+	.endm
+
+	.macro AESDEC xmm1 xmm2
+	XMM_NUM aesdec_opd1 \xmm1
+	XMM_NUM aesdec_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aesdec_opd1 aesdec_opd2
+	.byte 0x0f, 0x38, 0xde
+	MODRM 0xc0 aesdec_opd1 aesdec_opd2
+	.endm
+
+	.macro AESDECLAST xmm1 xmm2
+	XMM_NUM aesdeclast_opd1 \xmm1
+	XMM_NUM aesdeclast_opd2 \xmm2
+	PFX_OPD_SIZE
+	PFX_REX aesdeclast_opd1 aesdeclast_opd2
+	.byte 0x0f, 0x38, 0xdf
+	MODRM 0xc0 aesdeclast_opd1 aesdeclast_opd2
+	.endm
+#endif
+
+#endif
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 26b5dd0..81c185a 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -440,6 +440,15 @@
 	  See also:
 	  <http://planeta.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html>
 
+config CRYPTO_GHASH_CLMUL_NI_INTEL
+	tristate "GHASH digest algorithm (CLMUL-NI accelerated)"
+	depends on (X86 || UML_X86) && 64BIT
+	select CRYPTO_SHASH
+	select CRYPTO_CRYPTD
+	help
+	  GHASH is message digest algorithm for GCM (Galois/Counter Mode).
+	  The implementation is accelerated by CLMUL-NI of Intel.
+
 comment "Ciphers"
 
 config CRYPTO_AES
diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c
index 3aa6e38..2bc3321 100644
--- a/crypto/ansi_cprng.c
+++ b/crypto/ansi_cprng.c
@@ -85,7 +85,7 @@
  * Returns DEFAULT_BLK_SZ bytes of random data per call
  * returns 0 if generation succeded, <0 if something went wrong
  */
-static int _get_more_prng_bytes(struct prng_context *ctx)
+static int _get_more_prng_bytes(struct prng_context *ctx, int cont_test)
 {
 	int i;
 	unsigned char tmp[DEFAULT_BLK_SZ];
@@ -132,7 +132,7 @@
 			 */
 			if (!memcmp(ctx->rand_data, ctx->last_rand_data,
 					DEFAULT_BLK_SZ)) {
-				if (fips_enabled) {
+				if (cont_test) {
 					panic("cprng %p Failed repetition check!\n",
 						ctx);
 				}
@@ -185,16 +185,14 @@
 }
 
 /* Our exported functions */
-static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx)
+static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx,
+				int do_cont_test)
 {
 	unsigned char *ptr = buf;
 	unsigned int byte_count = (unsigned int)nbytes;
 	int err;
 
 
-	if (nbytes < 0)
-		return -EINVAL;
-
 	spin_lock_bh(&ctx->prng_lock);
 
 	err = -EINVAL;
@@ -220,7 +218,7 @@
 
 remainder:
 	if (ctx->rand_data_valid == DEFAULT_BLK_SZ) {
-		if (_get_more_prng_bytes(ctx) < 0) {
+		if (_get_more_prng_bytes(ctx, do_cont_test) < 0) {
 			memset(buf, 0, nbytes);
 			err = -EINVAL;
 			goto done;
@@ -247,7 +245,7 @@
 	 */
 	for (; byte_count >= DEFAULT_BLK_SZ; byte_count -= DEFAULT_BLK_SZ) {
 		if (ctx->rand_data_valid == DEFAULT_BLK_SZ) {
-			if (_get_more_prng_bytes(ctx) < 0) {
+			if (_get_more_prng_bytes(ctx, do_cont_test) < 0) {
 				memset(buf, 0, nbytes);
 				err = -EINVAL;
 				goto done;
@@ -356,7 +354,7 @@
 {
 	struct prng_context *prng = crypto_rng_ctx(tfm);
 
-	return get_prng_bytes(rdata, dlen, prng);
+	return get_prng_bytes(rdata, dlen, prng, 0);
 }
 
 /*
@@ -404,19 +402,79 @@
 	}
 };
 
+#ifdef CONFIG_CRYPTO_FIPS
+static int fips_cprng_get_random(struct crypto_rng *tfm, u8 *rdata,
+			    unsigned int dlen)
+{
+	struct prng_context *prng = crypto_rng_ctx(tfm);
+
+	return get_prng_bytes(rdata, dlen, prng, 1);
+}
+
+static int fips_cprng_reset(struct crypto_rng *tfm, u8 *seed, unsigned int slen)
+{
+	u8 rdata[DEFAULT_BLK_SZ];
+	int rc;
+
+	struct prng_context *prng = crypto_rng_ctx(tfm);
+
+	rc = cprng_reset(tfm, seed, slen);
+
+	if (!rc)
+		goto out;
+
+	/* this primes our continuity test */
+	rc = get_prng_bytes(rdata, DEFAULT_BLK_SZ, prng, 0);
+	prng->rand_data_valid = DEFAULT_BLK_SZ;
+
+out:
+	return rc;
+}
+
+static struct crypto_alg fips_rng_alg = {
+	.cra_name		= "fips(ansi_cprng)",
+	.cra_driver_name	= "fips_ansi_cprng",
+	.cra_priority		= 300,
+	.cra_flags		= CRYPTO_ALG_TYPE_RNG,
+	.cra_ctxsize		= sizeof(struct prng_context),
+	.cra_type		= &crypto_rng_type,
+	.cra_module		= THIS_MODULE,
+	.cra_list		= LIST_HEAD_INIT(rng_alg.cra_list),
+	.cra_init		= cprng_init,
+	.cra_exit		= cprng_exit,
+	.cra_u			= {
+		.rng = {
+			.rng_make_random	= fips_cprng_get_random,
+			.rng_reset		= fips_cprng_reset,
+			.seedsize = DEFAULT_PRNG_KSZ + 2*DEFAULT_BLK_SZ,
+		}
+	}
+};
+#endif
 
 /* Module initalization */
 static int __init prng_mod_init(void)
 {
-	if (fips_enabled)
-		rng_alg.cra_priority += 200;
+	int rc = 0;
 
-	return crypto_register_alg(&rng_alg);
+	rc = crypto_register_alg(&rng_alg);
+#ifdef CONFIG_CRYPTO_FIPS
+	if (rc)
+		goto out;
+
+	rc = crypto_register_alg(&fips_rng_alg);
+
+out:
+#endif
+	return rc;
 }
 
 static void __exit prng_mod_fini(void)
 {
 	crypto_unregister_alg(&rng_alg);
+#ifdef CONFIG_CRYPTO_FIPS
+	crypto_unregister_alg(&fips_rng_alg);
+#endif
 	return;
 }
 
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 3533582..f8ae0d9 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -711,6 +711,13 @@
 }
 EXPORT_SYMBOL_GPL(cryptd_ahash_child);
 
+struct shash_desc *cryptd_shash_desc(struct ahash_request *req)
+{
+	struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+	return &rctx->desc;
+}
+EXPORT_SYMBOL_GPL(cryptd_shash_desc);
+
 void cryptd_free_ahash(struct cryptd_ahash *tfm)
 {
 	crypto_free_ahash(&tfm->base);
diff --git a/crypto/digest.c b/crypto/digest.c
deleted file mode 100644
index 5d3f130..0000000
--- a/crypto/digest.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Cryptographic API.
- *
- * Digest operations.
- *
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option) 
- * any later version.
- *
- */
-
-#include <crypto/internal/hash.h>
-#include <crypto/scatterwalk.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <linux/hardirq.h>
-#include <linux/highmem.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-
-#include "internal.h"
-
-static int init(struct hash_desc *desc)
-{
-	struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm);
-
-	tfm->__crt_alg->cra_digest.dia_init(tfm);
-	return 0;
-}
-
-static int update2(struct hash_desc *desc,
-		   struct scatterlist *sg, unsigned int nbytes)
-{
-	struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm);
-	unsigned int alignmask = crypto_tfm_alg_alignmask(tfm);
-
-	if (!nbytes)
-		return 0;
-
-	for (;;) {
-		struct page *pg = sg_page(sg);
-		unsigned int offset = sg->offset;
-		unsigned int l = sg->length;
-
-		if (unlikely(l > nbytes))
-			l = nbytes;
-		nbytes -= l;
-
-		do {
-			unsigned int bytes_from_page = min(l, ((unsigned int)
-							   (PAGE_SIZE)) - 
-							   offset);
-			char *src = crypto_kmap(pg, 0);
-			char *p = src + offset;
-
-			if (unlikely(offset & alignmask)) {
-				unsigned int bytes =
-					alignmask + 1 - (offset & alignmask);
-				bytes = min(bytes, bytes_from_page);
-				tfm->__crt_alg->cra_digest.dia_update(tfm, p,
-								      bytes);
-				p += bytes;
-				bytes_from_page -= bytes;
-				l -= bytes;
-			}
-			tfm->__crt_alg->cra_digest.dia_update(tfm, p,
-							      bytes_from_page);
-			crypto_kunmap(src, 0);
-			crypto_yield(desc->flags);
-			offset = 0;
-			pg++;
-			l -= bytes_from_page;
-		} while (l > 0);
-
-		if (!nbytes)
-			break;
-		sg = scatterwalk_sg_next(sg);
-	}
-
-	return 0;
-}
-
-static int update(struct hash_desc *desc,
-		  struct scatterlist *sg, unsigned int nbytes)
-{
-	if (WARN_ON_ONCE(in_irq()))
-		return -EDEADLK;
-	return update2(desc, sg, nbytes);
-}
-
-static int final(struct hash_desc *desc, u8 *out)
-{
-	struct crypto_tfm *tfm = crypto_hash_tfm(desc->tfm);
-	unsigned long alignmask = crypto_tfm_alg_alignmask(tfm);
-	struct digest_alg *digest = &tfm->__crt_alg->cra_digest;
-
-	if (unlikely((unsigned long)out & alignmask)) {
-		unsigned long align = alignmask + 1;
-		unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm);
-		u8 *dst = (u8 *)ALIGN(addr, align) +
-			  ALIGN(tfm->__crt_alg->cra_ctxsize, align);
-
-		digest->dia_final(tfm, dst);
-		memcpy(out, dst, digest->dia_digestsize);
-	} else
-		digest->dia_final(tfm, out);
-
-	return 0;
-}
-
-static int nosetkey(struct crypto_hash *tfm, const u8 *key, unsigned int keylen)
-{
-	crypto_hash_clear_flags(tfm, CRYPTO_TFM_RES_MASK);
-	return -ENOSYS;
-}
-
-static int setkey(struct crypto_hash *hash, const u8 *key, unsigned int keylen)
-{
-	struct crypto_tfm *tfm = crypto_hash_tfm(hash);
-
-	crypto_hash_clear_flags(hash, CRYPTO_TFM_RES_MASK);
-	return tfm->__crt_alg->cra_digest.dia_setkey(tfm, key, keylen);
-}
-
-static int digest(struct hash_desc *desc,
-		  struct scatterlist *sg, unsigned int nbytes, u8 *out)
-{
-	if (WARN_ON_ONCE(in_irq()))
-		return -EDEADLK;
-
-	init(desc);
-	update2(desc, sg, nbytes);
-	return final(desc, out);
-}
-
-int crypto_init_digest_ops(struct crypto_tfm *tfm)
-{
-	struct hash_tfm *ops = &tfm->crt_hash;
-	struct digest_alg *dalg = &tfm->__crt_alg->cra_digest;
-
-	if (dalg->dia_digestsize > PAGE_SIZE / 8)
-		return -EINVAL;
-	
-	ops->init	= init;
-	ops->update	= update;
-	ops->final	= final;
-	ops->digest	= digest;
-	ops->setkey	= dalg->dia_setkey ? setkey : nosetkey;
-	ops->digestsize	= dalg->dia_digestsize;
-	
-	return 0;
-}
-
-void crypto_exit_digest_ops(struct crypto_tfm *tfm)
-{
-}
-
-static int digest_async_nosetkey(struct crypto_ahash *tfm_async, const u8 *key,
-			unsigned int keylen)
-{
-	crypto_ahash_clear_flags(tfm_async, CRYPTO_TFM_RES_MASK);
-	return -ENOSYS;
-}
-
-static int digest_async_setkey(struct crypto_ahash *tfm_async, const u8 *key,
-			unsigned int keylen)
-{
-	struct crypto_tfm    *tfm        = crypto_ahash_tfm(tfm_async);
-	struct digest_alg    *dalg       = &tfm->__crt_alg->cra_digest;
-
-	crypto_ahash_clear_flags(tfm_async, CRYPTO_TFM_RES_MASK);
-	return dalg->dia_setkey(tfm, key, keylen);
-}
-
-static int digest_async_init(struct ahash_request *req)
-{
-	struct crypto_tfm *tfm  = req->base.tfm;
-	struct digest_alg *dalg = &tfm->__crt_alg->cra_digest;
-
-	dalg->dia_init(tfm);
-	return 0;
-}
-
-static int digest_async_update(struct ahash_request *req)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	struct hash_desc  desc = {
-		.tfm   = __crypto_hash_cast(tfm),
-		.flags = req->base.flags,
-	};
-
-	update(&desc, req->src, req->nbytes);
-	return 0;
-}
-
-static int digest_async_final(struct ahash_request *req)
-{
-	struct crypto_tfm *tfm  = req->base.tfm;
-	struct hash_desc  desc = {
-		.tfm   = __crypto_hash_cast(tfm),
-		.flags = req->base.flags,
-	};
-
-	final(&desc, req->result);
-	return 0;
-}
-
-static int digest_async_digest(struct ahash_request *req)
-{
-	struct crypto_tfm *tfm  = req->base.tfm;
-	struct hash_desc  desc = {
-		.tfm   = __crypto_hash_cast(tfm),
-		.flags = req->base.flags,
-	};
-
-	return digest(&desc, req->src, req->nbytes, req->result);
-}
-
-int crypto_init_digest_ops_async(struct crypto_tfm *tfm)
-{
-	struct ahash_tfm  *crt  = &tfm->crt_ahash;
-	struct digest_alg *dalg = &tfm->__crt_alg->cra_digest;
-
-	if (dalg->dia_digestsize > PAGE_SIZE / 8)
-		return -EINVAL;
-
-	crt->init       = digest_async_init;
-	crt->update     = digest_async_update;
-	crt->final      = digest_async_final;
-	crt->digest     = digest_async_digest;
-	crt->setkey     = dalg->dia_setkey ? digest_async_setkey :
-						digest_async_nosetkey;
-	crt->digestsize = dalg->dia_digestsize;
-
-	return 0;
-}
diff --git a/crypto/hash.c b/crypto/hash.c
deleted file mode 100644
index cb86b19..0000000
--- a/crypto/hash.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Cryptographic Hash operations.
- * 
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option) 
- * any later version.
- */
-
-#include <crypto/internal/hash.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/seq_file.h>
-
-#include "internal.h"
-
-static unsigned int crypto_hash_ctxsize(struct crypto_alg *alg, u32 type,
-					u32 mask)
-{
-	return alg->cra_ctxsize;
-}
-
-static int hash_setkey_unaligned(struct crypto_hash *crt, const u8 *key,
-		                 unsigned int keylen)
-{
-	struct crypto_tfm *tfm = crypto_hash_tfm(crt);
-	struct hash_alg *alg = &tfm->__crt_alg->cra_hash;
-	unsigned long alignmask = crypto_hash_alignmask(crt);
-	int ret;
-	u8 *buffer, *alignbuffer;
-	unsigned long absize;
-
-	absize = keylen + alignmask;
-	buffer = kmalloc(absize, GFP_ATOMIC);
-	if (!buffer)
-		return -ENOMEM;
-
-	alignbuffer = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1);
-	memcpy(alignbuffer, key, keylen);
-	ret = alg->setkey(crt, alignbuffer, keylen);
-	memset(alignbuffer, 0, keylen);
-	kfree(buffer);
-	return ret;
-}
-
-static int hash_setkey(struct crypto_hash *crt, const u8 *key,
-		       unsigned int keylen)
-{
-	struct crypto_tfm *tfm = crypto_hash_tfm(crt);
-	struct hash_alg *alg = &tfm->__crt_alg->cra_hash;
-	unsigned long alignmask = crypto_hash_alignmask(crt);
-
-	if ((unsigned long)key & alignmask)
-		return hash_setkey_unaligned(crt, key, keylen);
-
-	return alg->setkey(crt, key, keylen);
-}
-
-static int hash_async_setkey(struct crypto_ahash *tfm_async, const u8 *key,
-			unsigned int keylen)
-{
-	struct crypto_tfm  *tfm      = crypto_ahash_tfm(tfm_async);
-	struct crypto_hash *tfm_hash = __crypto_hash_cast(tfm);
-	struct hash_alg    *alg      = &tfm->__crt_alg->cra_hash;
-
-	return alg->setkey(tfm_hash, key, keylen);
-}
-
-static int hash_async_init(struct ahash_request *req)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	struct hash_alg   *alg = &tfm->__crt_alg->cra_hash;
-	struct hash_desc  desc = {
-		.tfm = __crypto_hash_cast(tfm),
-		.flags = req->base.flags,
-	};
-
-	return alg->init(&desc);
-}
-
-static int hash_async_update(struct ahash_request *req)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	struct hash_alg   *alg = &tfm->__crt_alg->cra_hash;
-	struct hash_desc  desc = {
-		.tfm = __crypto_hash_cast(tfm),
-		.flags = req->base.flags,
-	};
-
-	return alg->update(&desc, req->src, req->nbytes);
-}
-
-static int hash_async_final(struct ahash_request *req)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	struct hash_alg   *alg = &tfm->__crt_alg->cra_hash;
-	struct hash_desc  desc = {
-		.tfm = __crypto_hash_cast(tfm),
-		.flags = req->base.flags,
-	};
-
-	return alg->final(&desc, req->result);
-}
-
-static int hash_async_digest(struct ahash_request *req)
-{
-	struct crypto_tfm *tfm = req->base.tfm;
-	struct hash_alg   *alg = &tfm->__crt_alg->cra_hash;
-	struct hash_desc  desc = {
-		.tfm = __crypto_hash_cast(tfm),
-		.flags = req->base.flags,
-	};
-
-	return alg->digest(&desc, req->src, req->nbytes, req->result);
-}
-
-static int crypto_init_hash_ops_async(struct crypto_tfm *tfm)
-{
-	struct ahash_tfm *crt = &tfm->crt_ahash;
-	struct hash_alg  *alg = &tfm->__crt_alg->cra_hash;
-
-	crt->init       = hash_async_init;
-	crt->update     = hash_async_update;
-	crt->final      = hash_async_final;
-	crt->digest     = hash_async_digest;
-	crt->setkey     = hash_async_setkey;
-	crt->digestsize = alg->digestsize;
-
-	return 0;
-}
-
-static int crypto_init_hash_ops_sync(struct crypto_tfm *tfm)
-{
-	struct hash_tfm *crt = &tfm->crt_hash;
-	struct hash_alg *alg = &tfm->__crt_alg->cra_hash;
-
-	crt->init       = alg->init;
-	crt->update     = alg->update;
-	crt->final      = alg->final;
-	crt->digest     = alg->digest;
-	crt->setkey     = hash_setkey;
-	crt->digestsize = alg->digestsize;
-
-	return 0;
-}
-
-static int crypto_init_hash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
-{
-	struct hash_alg *alg = &tfm->__crt_alg->cra_hash;
-
-	if (alg->digestsize > PAGE_SIZE / 8)
-		return -EINVAL;
-
-	if ((mask & CRYPTO_ALG_TYPE_HASH_MASK) != CRYPTO_ALG_TYPE_HASH_MASK)
-		return crypto_init_hash_ops_async(tfm);
-	else
-		return crypto_init_hash_ops_sync(tfm);
-}
-
-static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg)
-	__attribute__ ((unused));
-static void crypto_hash_show(struct seq_file *m, struct crypto_alg *alg)
-{
-	seq_printf(m, "type         : hash\n");
-	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
-	seq_printf(m, "digestsize   : %u\n", alg->cra_hash.digestsize);
-}
-
-const struct crypto_type crypto_hash_type = {
-	.ctxsize = crypto_hash_ctxsize,
-	.init = crypto_init_hash_ops,
-#ifdef CONFIG_PROC_FS
-	.show = crypto_hash_show,
-#endif
-};
-EXPORT_SYMBOL_GPL(crypto_hash_type);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Generic cryptographic hash type");
diff --git a/crypto/proc.c b/crypto/proc.c
index 5dc07e4..ff4cb4a 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -115,13 +115,6 @@
 		seq_printf(m, "max keysize  : %u\n",
 					alg->cra_cipher.cia_max_keysize);
 		break;
-		
-	case CRYPTO_ALG_TYPE_DIGEST:
-		seq_printf(m, "type         : digest\n");
-		seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
-		seq_printf(m, "digestsize   : %u\n",
-		           alg->cra_digest.dia_digestsize);
-		break;
 	case CRYPTO_ALG_TYPE_COMPRESS:
 		seq_printf(m, "type         : compression\n");
 		break;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 6d5b746..7620bfc 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1201,7 +1201,7 @@
 		      unsigned int tcount)
 {
 	const char *algo = crypto_tfm_alg_driver_name(crypto_rng_tfm(tfm));
-	int err, i, j, seedsize;
+	int err = 0, i, j, seedsize;
 	u8 *seed;
 	char result[32];
 
@@ -1943,6 +1943,15 @@
 			}
 		}
 	}, {
+		.alg = "ghash",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = {
+				.vecs = ghash_tv_template,
+				.count = GHASH_TEST_VECTORS
+			}
+		}
+	}, {
 		.alg = "hmac(md5)",
 		.test = alg_test_hash,
 		.suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 9963b18..fb76517 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1003,6 +1003,21 @@
 	},
 };
 
+#define GHASH_TEST_VECTORS 1
+
+static struct hash_testvec ghash_tv_template[] =
+{
+	{
+
+		.key	= "\xdf\xa6\xbf\x4d\xed\x81\xdb\x03\xff\xca\xff\x95\xf8\x30\xf0\x61",
+		.ksize	= 16,
+		.plaintext = "\x95\x2b\x2a\x56\xa5\x60\x04a\xc0\xb3\x2b\x66\x56\xa0\x5b\x40\xb6",
+		.psize	= 16,
+		.digest	= "\xda\x53\xeb\x0a\xd2\xc5\x5b\xb6"
+			  "\x4f\xc4\x80\x2c\xc3\xfe\xda\x60",
+	},
+};
+
 /*
  * HMAC-MD5 test vectors from RFC2202
  * (These need to be fixed to not use strlen).
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 1573aeb..5c2d13c 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -52,7 +52,8 @@
 static struct hwrng *current_rng;
 static LIST_HEAD(rng_list);
 static DEFINE_MUTEX(rng_mutex);
-
+static int data_avail;
+static u8 rng_buffer[SMP_CACHE_BYTES] __cacheline_aligned;
 
 static inline int hwrng_init(struct hwrng *rng)
 {
@@ -67,19 +68,6 @@
 		rng->cleanup(rng);
 }
 
-static inline int hwrng_data_present(struct hwrng *rng, int wait)
-{
-	if (!rng->data_present)
-		return 1;
-	return rng->data_present(rng, wait);
-}
-
-static inline int hwrng_data_read(struct hwrng *rng, u32 *data)
-{
-	return rng->data_read(rng, data);
-}
-
-
 static int rng_dev_open(struct inode *inode, struct file *filp)
 {
 	/* enforce read-only access to this chrdev */
@@ -91,54 +79,87 @@
 	return 0;
 }
 
+static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size,
+			int wait) {
+	int present;
+
+	if (rng->read)
+		return rng->read(rng, (void *)buffer, size, wait);
+
+	if (rng->data_present)
+		present = rng->data_present(rng, wait);
+	else
+		present = 1;
+
+	if (present)
+		return rng->data_read(rng, (u32 *)buffer);
+
+	return 0;
+}
+
 static ssize_t rng_dev_read(struct file *filp, char __user *buf,
 			    size_t size, loff_t *offp)
 {
-	u32 data;
 	ssize_t ret = 0;
 	int err = 0;
-	int bytes_read;
+	int bytes_read, len;
 
 	while (size) {
-		err = -ERESTARTSYS;
-		if (mutex_lock_interruptible(&rng_mutex))
+		if (mutex_lock_interruptible(&rng_mutex)) {
+			err = -ERESTARTSYS;
 			goto out;
+		}
+
 		if (!current_rng) {
-			mutex_unlock(&rng_mutex);
 			err = -ENODEV;
-			goto out;
+			goto out_unlock;
 		}
 
-		bytes_read = 0;
-		if (hwrng_data_present(current_rng,
-				       !(filp->f_flags & O_NONBLOCK)))
-			bytes_read = hwrng_data_read(current_rng, &data);
+		if (!data_avail) {
+			bytes_read = rng_get_data(current_rng, rng_buffer,
+				sizeof(rng_buffer),
+				!(filp->f_flags & O_NONBLOCK));
+			if (bytes_read < 0) {
+				err = bytes_read;
+				goto out_unlock;
+			}
+			data_avail = bytes_read;
+		}
+
+		if (!data_avail) {
+			if (filp->f_flags & O_NONBLOCK) {
+				err = -EAGAIN;
+				goto out_unlock;
+			}
+		} else {
+			len = data_avail;
+			if (len > size)
+				len = size;
+
+			data_avail -= len;
+
+			if (copy_to_user(buf + ret, rng_buffer + data_avail,
+								len)) {
+				err = -EFAULT;
+				goto out_unlock;
+			}
+
+			size -= len;
+			ret += len;
+		}
+
 		mutex_unlock(&rng_mutex);
 
-		err = -EAGAIN;
-		if (!bytes_read && (filp->f_flags & O_NONBLOCK))
-			goto out;
-		if (bytes_read < 0) {
-			err = bytes_read;
-			goto out;
-		}
-
-		err = -EFAULT;
-		while (bytes_read && size) {
-			if (put_user((u8)data, buf++))
-				goto out;
-			size--;
-			ret++;
-			bytes_read--;
-			data >>= 8;
-		}
-
 		if (need_resched())
 			schedule_timeout_interruptible(1);
-		err = -ERESTARTSYS;
-		if (signal_pending(current))
+
+		if (signal_pending(current)) {
+			err = -ERESTARTSYS;
 			goto out;
+		}
 	}
+out_unlock:
+	mutex_unlock(&rng_mutex);
 out:
 	return ret ? : err;
 }
@@ -280,7 +301,7 @@
 	struct hwrng *old_rng, *tmp;
 
 	if (rng->name == NULL ||
-	    rng->data_read == NULL)
+	    (rng->data_read == NULL && rng->read == NULL))
 		goto out;
 
 	mutex_lock(&rng_mutex);
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 1ffb53f..fc0d575 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -106,7 +106,6 @@
 extern const struct crypto_type crypto_ablkcipher_type;
 extern const struct crypto_type crypto_aead_type;
 extern const struct crypto_type crypto_blkcipher_type;
-extern const struct crypto_type crypto_hash_type;
 
 void crypto_mod_put(struct crypto_alg *alg);
 
diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h
index 2f65a6e..1c96b25 100644
--- a/include/crypto/cryptd.h
+++ b/include/crypto/cryptd.h
@@ -39,6 +39,7 @@
 struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name,
 					u32 type, u32 mask);
 struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm);
+struct shash_desc *cryptd_shash_desc(struct ahash_request *req);
 void cryptd_free_ahash(struct cryptd_ahash *tfm);
 
 #endif
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index fd92988..24d2e30 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -250,29 +250,6 @@
 	void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
 };
 
-struct digest_alg {
-	unsigned int dia_digestsize;
-	void (*dia_init)(struct crypto_tfm *tfm);
-	void (*dia_update)(struct crypto_tfm *tfm, const u8 *data,
-			   unsigned int len);
-	void (*dia_final)(struct crypto_tfm *tfm, u8 *out);
-	int (*dia_setkey)(struct crypto_tfm *tfm, const u8 *key,
-	                  unsigned int keylen);
-};
-
-struct hash_alg {
-	int (*init)(struct hash_desc *desc);
-	int (*update)(struct hash_desc *desc, struct scatterlist *sg,
-		      unsigned int nbytes);
-	int (*final)(struct hash_desc *desc, u8 *out);
-	int (*digest)(struct hash_desc *desc, struct scatterlist *sg,
-		      unsigned int nbytes, u8 *out);
-	int (*setkey)(struct crypto_hash *tfm, const u8 *key,
-		      unsigned int keylen);
-
-	unsigned int digestsize;
-};
-
 struct compress_alg {
 	int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src,
 			    unsigned int slen, u8 *dst, unsigned int *dlen);
@@ -293,8 +270,6 @@
 #define cra_aead	cra_u.aead
 #define cra_blkcipher	cra_u.blkcipher
 #define cra_cipher	cra_u.cipher
-#define cra_digest	cra_u.digest
-#define cra_hash	cra_u.hash
 #define cra_compress	cra_u.compress
 #define cra_rng		cra_u.rng
 
@@ -320,8 +295,6 @@
 		struct aead_alg aead;
 		struct blkcipher_alg blkcipher;
 		struct cipher_alg cipher;
-		struct digest_alg digest;
-		struct hash_alg hash;
 		struct compress_alg compress;
 		struct rng_alg rng;
 	} cra_u;
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 7244456..9bede76 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -22,10 +22,12 @@
  * @cleanup:		Cleanup callback (can be NULL).
  * @data_present:	Callback to determine if data is available
  *			on the RNG. If NULL, it is assumed that
- *			there is always data available.
+ *			there is always data available.  *OBSOLETE*
  * @data_read:		Read data from the RNG device.
  *			Returns the number of lower random bytes in "data".
- *			Must not be NULL.
+ *			Must not be NULL.    *OSOLETE*
+ * @read:		New API. drivers can fill up to max bytes of data
+ *			into the buffer. The buffer is aligned for any type.
  * @priv:		Private data, for use by the RNG driver.
  */
 struct hwrng {
@@ -34,6 +36,7 @@
 	void (*cleanup)(struct hwrng *rng);
 	int (*data_present)(struct hwrng *rng, int wait);
 	int (*data_read)(struct hwrng *rng, u32 *data);
+	int (*read)(struct hwrng *rng, void *data, size_t max, bool wait);
 	unsigned long priv;
 
 	/* internal. */