microblaze: Support word copying in copy_tofrom_user

Word copying is used only for aligned addresses.
Here is space for improving to use any better copying technique.
Look at memcpy implementation.

Signed-off-by: Michal Simek <monstr@monstr.eu>
diff --git a/arch/microblaze/lib/uaccess_old.S b/arch/microblaze/lib/uaccess_old.S
index b327524..5810cec 100644
--- a/arch/microblaze/lib/uaccess_old.S
+++ b/arch/microblaze/lib/uaccess_old.S
@@ -53,7 +53,6 @@
 	nop
 	.size   __strncpy_user, . - __strncpy_user
 
-
 	.section	.fixup, "ax"
 	.align	2
 4:
@@ -95,7 +94,6 @@
 	nop
 	.size   __strnlen_user, . - __strnlen_user
 
-
 	.section	.fixup,"ax"
 4:
 	brid	3b
@@ -121,21 +119,34 @@
 	 * r7, r3 - count
 	 * r4 - tempval
 	 */
-	addik	r3,r7,0
-	beqi	r3,3f
-1:
-	lbu	r4,r6,r0
-	addik	r6,r6,1
-2:
-	sb	r4,r5,r0
-	addik	r3,r3,-1
-	bneid	r3,1b
-	addik	r5,r5,1		/* delay slot */
+	beqid	r7, 3f /* zero size is not likely */
+	andi	r3, r7, 0x3 /* filter add count */
+	bneid	r3, 4f /* if is odd value then byte copying */
+	or	r3, r5, r6 /* find if is any to/from unaligned */
+	andi	r3, r3, 0x3 /* mask unaligned */
+	bneid	r3, 1f /* it is unaligned -> then jump */
+	or	r3, r0, r0
+
+/* at least one 4 byte copy */
+5:	lw	r4, r6, r3
+6:	sw	r4, r5, r3
+	addik	r7, r7, -4
+	bneid	r7, 5b
+	addik	r3, r3, 4
+	addik	r3, r7, 0
+	rtsd	r15, 8
+	nop
+4:	or	r3, r0, r0
+1:	lbu	r4,r6,r3
+2:	sb	r4,r5,r3
+	addik	r7,r7,-1
+	bneid	r7,1b
+	addik	r3,r3,1		/* delay slot */
 3:
+	addik	r3,r7,0
 	rtsd	r15,8
 	nop
 	.size   __copy_tofrom_user, . - __copy_tofrom_user
 
-
 	.section	__ex_table,"a"
-	.word	1b,3b,2b,3b
+	.word	1b,3b,2b,3b,5b,3b,6b,3b