powerpc: Fix load/store float double alignment handler

When we introduced VSX, we changed the way FPRs are stored in the
thread_struct.  Unfortunately we missed the load/store float double
alignment handler code when updating how we access FPRs in the
thread_struct.

Below fixes this and merges the little/big endian case.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 076aa0e..5ffcfaa 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -367,27 +367,24 @@
 static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
 			   unsigned int flags)
 {
-	char *ptr = (char *) &current->thread.TS_FPR(reg);
-	int i, ret;
+	char *ptr0 = (char *) &current->thread.TS_FPR(reg);
+	char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
+	int i, ret, sw = 0;
 
 	if (!(flags & F))
 		return 0;
 	if (reg & 1)
 		return 0;	/* invalid form: FRS/FRT must be even */
-	if (!(flags & SW)) {
-		/* not byte-swapped - easy */
-		if (!(flags & ST))
-			ret = __copy_from_user(ptr, addr, 16);
-		else
-			ret = __copy_to_user(addr, ptr, 16);
-	} else {
-		/* each FPR value is byte-swapped separately */
-		ret = 0;
-		for (i = 0; i < 16; ++i) {
-			if (!(flags & ST))
-				ret |= __get_user(ptr[i^7], addr + i);
-			else
-				ret |= __put_user(ptr[i^7], addr + i);
+	if (flags & SW)
+		sw = 7;
+	ret = 0;
+	for (i = 0; i < 8; ++i) {
+		if (!(flags & ST)) {
+			ret |= __get_user(ptr0[i^sw], addr + i);
+			ret |= __get_user(ptr1[i^sw], addr + i + 8);
+		} else {
+			ret |= __put_user(ptr0[i^sw], addr + i);
+			ret |= __put_user(ptr1[i^sw], addr + i + 8);
 		}
 	}
 	if (ret)