powerpc/64: Align hot loops of memset() and backwards_memcpy()

Align the hot loops in our assembly implementation of memset()
and backwards_memcpy().

backwards_memcpy() is called from tcp_v4_rcv(), so we might
want to optimise this a little more.

Signed-off-by: Anton Blanchard <anton@samba.org>
Reviewed-by: Nick Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 43435c6..eda7a96 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -37,6 +37,7 @@
 	clrldi	r5,r5,58
 	mtctr	r0
 	beq	5f
+	.balign 16
 4:	std	r4,0(r6)
 	std	r4,8(r6)
 	std	r4,16(r6)
@@ -90,6 +91,7 @@
 	andi.	r0,r6,3
 	mtctr	r7
 	bne	5f
+	.balign 16
 1:	lwz	r7,-4(r4)
 	lwzu	r8,-8(r4)
 	stw	r7,-4(r6)