crypto: chacha20-ssse3 - Align stack pointer to 64 bytes

This aligns the stack pointer in chacha20_4block_xor_ssse3 to 64 bytes.
Fixes general protection faults and potential kernel panics.

Cc: stable@vger.kernel.org
Signed-off-by: Eli Cooper <elicooper@gmx.com>
Acked-by: Martin Willi <martin@strongswan.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S
index 712b130..3a33124 100644
--- a/arch/x86/crypto/chacha20-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha20-ssse3-x86_64.S
@@ -157,7 +157,9 @@
 	# done with the slightly better performing SSSE3 byte shuffling,
 	# 7/12-bit word rotation uses traditional shift+OR.
 
-	sub		$0x40,%rsp
+	mov		%rsp,%r11
+	sub		$0x80,%rsp
+	and		$~63,%rsp
 
 	# x0..15[0-3] = s0..3[0..3]
 	movq		0x00(%rdi),%xmm1
@@ -620,6 +622,6 @@
 	pxor		%xmm1,%xmm15
 	movdqu		%xmm15,0xf0(%rsi)
 
-	add		$0x40,%rsp
+	mov		%r11,%rsp
 	ret
 ENDPROC(chacha20_4block_xor_ssse3)