sparc64: simple microoptimizations for atomic functions

Simple microoptimizations for sparc64 atomic functions:
Save one instruction by using a delay slot.
Use %g1 instead of %g7, because %g1 is written earlier.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index 0268210..703c9c3 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -52,10 +52,9 @@
 	cas	[%o1], %g1, %g7
 	cmp	%g1, %g7
 	bne,pn	%icc, 2f
-	 add	%g7, %o0, %g7
-	sra	%g7, 0, %o0
+	 add	%g1, %o0, %g1
 	retl
-	 nop
+	 sra	%g1, 0, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
 	.size	atomic_add_ret, .-atomic_add_ret
 
@@ -68,10 +67,9 @@
 	cas	[%o1], %g1, %g7
 	cmp	%g1, %g7
 	bne,pn	%icc, 2f
-	 sub	%g7, %o0, %g7
-	sra	%g7, 0, %o0
+	 sub	%g1, %o0, %g1
 	retl
-	 nop
+	 sra	%g1, 0, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
 	.size	atomic_sub_ret, .-atomic_sub_ret
 
@@ -114,10 +112,9 @@
 	casx	[%o1], %g1, %g7
 	cmp	%g1, %g7
 	bne,pn	%xcc, 2f
-	 add	%g7, %o0, %g7
-	mov	%g7, %o0
-	retl
 	 nop
+	retl
+	 add	%g1, %o0, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
 	.size	atomic64_add_ret, .-atomic64_add_ret
 
@@ -130,9 +127,8 @@
 	casx	[%o1], %g1, %g7
 	cmp	%g1, %g7
 	bne,pn	%xcc, 2f
-	 sub	%g7, %o0, %g7
-	mov	%g7, %o0
-	retl
 	 nop
+	retl
+	 sub	%g1, %o0, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
 	.size	atomic64_sub_ret, .-atomic64_sub_ret