[PATCH] atomic: add_unless cmpxchg optimise

Without branch hints, the very unlikely chance of the loop repeating due to
cmpxchg failure is unrolled with gcc-4 that I have tested.

Improve this for architectures with a native cas/cmpxchg.  llsc archs
should try to implement this natively.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Andi Kleen <ak@muc.de>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h
index be6fefe..de1d992 100644
--- a/include/asm-s390/atomic.h
+++ b/include/asm-s390/atomic.h
@@ -89,10 +89,15 @@
 static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
-
 	c = atomic_read(v);
-	while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c)
+	for (;;) {
+		if (unlikely(c == u))
+			break;
+		old = atomic_cmpxchg(v, c, c + a);
+		if (likely(old == c))
+			break;
 		c = old;
+	}
 	return c != u;
 }
 
@@ -167,10 +172,15 @@
 					  long long a, long long u)
 {
 	long long c, old;
-
 	c = atomic64_read(v);
-	while (c != u && (old = atomic64_cmpxchg(v, c, c + a)) != c)
+	for (;;) {
+		if (unlikely(c == u))
+			break;
+		old = atomic64_cmpxchg(v, c, c + a);
+		if (likely(old == c))
+			break;
 		c = old;
+	}
 	return c != u;
 }