[SPARC64]: Fix futex_atomic_cmpxchg_inatomic implementation.

I copied the logic from ll/sc arch implementations, but that
was wrong and makes no sense at all.  Just do a straight
compare-exchange instruction, just like x86.

Based upon bug reports from Dennis Gilmore and Fabio Massimo.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h
index dee4020..7392fc4 100644
--- a/include/asm-sparc64/futex.h
+++ b/include/asm-sparc64/futex.h
@@ -87,24 +87,22 @@
 futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	__asm__ __volatile__(
-	"\n1:	lduwa	[%2] %%asi, %0\n"
-	"2:	casa	[%2] %%asi, %0, %1\n"
-	"3:\n"
+	"\n1:	casa	[%3] %%asi, %2, %0\n"
+	"2:\n"
 	"	.section .fixup,#alloc,#execinstr\n"
 	"	.align	4\n"
-	"4:	ba	3b\n"
-	"	 mov	%3, %0\n"
+	"3:	ba	2b\n"
+	"	 mov	%4, %0\n"
 	"	.previous\n"
 	"	.section __ex_table,\"a\"\n"
 	"	.align	4\n"
-	"	.word	1b, 4b\n"
-	"	.word	2b, 4b\n"
+	"	.word	1b, 3b\n"
 	"	.previous\n"
-	: "=&r" (oldval)
-	: "r" (newval), "r" (uaddr), "i" (-EFAULT)
+	: "=r" (newval)
+	: "0" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
 	: "memory");
 
-	return oldval;
+	return newval;
 }
 
 #endif /* !(_SPARC64_FUTEX_H) */