[PATCH] s390: atomic primitives

      Hugh Dickins <hugh@veritas.com>

Fix the broken atomic_cmpxchg primitive.  Add atomic_sub_and_test,
atomic64_sub_return, atomic64_sub_and_test, atomic64_cmpxchg,
atomic64_add_unless and atomic64_inc_not_zero.  Replace old style
atomic_compare_and_swap by atomic_cmpxchg.  Shorten the whole header by
defining most primitives with the two inline functions atomic_add_return and
atomic_sub_return.

In addition this patch contains the s390 related fixes of Hugh's "mm: fill
arch atomic64 gaps" patch.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h
index 6d07c7d..d82aedf 100644
--- a/include/asm-s390/atomic.h
+++ b/include/asm-s390/atomic.h
@@ -5,7 +5,7 @@
  *  include/asm-s390/atomic.h
  *
  *  S390 version
- *    Copyright (C) 1999-2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *    Copyright (C) 1999-2005 IBM Deutschland Entwicklung GmbH, IBM Corporation
  *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
  *               Denis Joseph Barrow,
  *		 Arnd Bergmann (arndb@de.ibm.com)
@@ -45,59 +45,57 @@
 #define atomic_read(v)          ((v)->counter)
 #define atomic_set(v,i)         (((v)->counter) = (i))
 
-static __inline__ void atomic_add(int i, atomic_t * v)
-{
-	       __CS_LOOP(v, i, "ar");
-}
 static __inline__ int atomic_add_return(int i, atomic_t * v)
 {
 	return __CS_LOOP(v, i, "ar");
 }
-static __inline__ int atomic_add_negative(int i, atomic_t * v)
-{
-	return __CS_LOOP(v, i, "ar") < 0;
-}
-static __inline__ void atomic_sub(int i, atomic_t * v)
-{
-	       __CS_LOOP(v, i, "sr");
-}
+#define atomic_add(_i, _v)		atomic_add_return(_i, _v)
+#define atomic_add_negative(_i, _v)	(atomic_add_return(_i, _v) < 0)
+#define atomic_inc(_v)			atomic_add_return(1, _v)
+#define atomic_inc_return(_v)		atomic_add_return(1, _v)
+#define atomic_inc_and_test(_v)		(atomic_add_return(1, _v) == 0)
+
 static __inline__ int atomic_sub_return(int i, atomic_t * v)
 {
 	return __CS_LOOP(v, i, "sr");
 }
-static __inline__ void atomic_inc(volatile atomic_t * v)
-{
-	       __CS_LOOP(v, 1, "ar");
-}
-static __inline__ int atomic_inc_return(volatile atomic_t * v)
-{
-	return __CS_LOOP(v, 1, "ar");
-}
+#define atomic_sub(_i, _v)		atomic_sub_return(_i, _v)
+#define atomic_sub_and_test(_i, _v)	(atomic_sub_return(_i, _v) == 0)
+#define atomic_dec(_v)			atomic_sub_return(1, _v)
+#define atomic_dec_return(_v)		atomic_sub_return(1, _v)
+#define atomic_dec_and_test(_v)		(atomic_sub_return(1, _v) == 0)
 
-static __inline__ int atomic_inc_and_test(volatile atomic_t * v)
-{
-	return __CS_LOOP(v, 1, "ar") == 0;
-}
-static __inline__ void atomic_dec(volatile atomic_t * v)
-{
-	       __CS_LOOP(v, 1, "sr");
-}
-static __inline__ int atomic_dec_return(volatile atomic_t * v)
-{
-	return __CS_LOOP(v, 1, "sr");
-}
-static __inline__ int atomic_dec_and_test(volatile atomic_t * v)
-{
-	return __CS_LOOP(v, 1, "sr") == 0;
-}
 static __inline__ void atomic_clear_mask(unsigned long mask, atomic_t * v)
 {
 	       __CS_LOOP(v, ~mask, "nr");
 }
+
 static __inline__ void atomic_set_mask(unsigned long mask, atomic_t * v)
 {
 	       __CS_LOOP(v, mask, "or");
 }
+
+static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	__asm__ __volatile__("  cs   %0,%3,0(%2)\n"
+			     : "+d" (old), "=m" (v->counter)
+			     : "a" (v), "d" (new), "m" (v->counter)
+			     : "cc", "memory" );
+	return old;
+}
+
+static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int c, old;
+
+	c = atomic_read(v);
+	while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c)
+		c = old;
+	return c != u;
+}
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
 #undef __CS_LOOP
 
 #ifdef __s390x__
@@ -123,92 +121,61 @@
 #define atomic64_read(v)          ((v)->counter)
 #define atomic64_set(v,i)         (((v)->counter) = (i))
 
-static __inline__ void atomic64_add(long long i, atomic64_t * v)
-{
-	       __CSG_LOOP(v, i, "agr");
-}
 static __inline__ long long atomic64_add_return(long long i, atomic64_t * v)
 {
 	return __CSG_LOOP(v, i, "agr");
 }
-static __inline__ long long atomic64_add_negative(long long i, atomic64_t * v)
+#define atomic64_add(_i, _v)		atomic64_add_return(_i, _v)
+#define atomic64_add_negative(_i, _v)	(atomic64_add_return(_i, _v) < 0)
+#define atomic64_inc(_v)		atomic64_add_return(1, _v)
+#define atomic64_inc_return(_v)		atomic64_add_return(1, _v)
+#define atomic64_inc_and_test(_v)	(atomic64_add_return(1, _v) == 0)
+
+static __inline__ long long atomic64_sub_return(long long i, atomic64_t * v)
 {
-	return __CSG_LOOP(v, i, "agr") < 0;
+	return __CSG_LOOP(v, i, "sgr");
 }
-static __inline__ void atomic64_sub(long long i, atomic64_t * v)
-{
-	       __CSG_LOOP(v, i, "sgr");
-}
-static __inline__ void atomic64_inc(volatile atomic64_t * v)
-{
-	       __CSG_LOOP(v, 1, "agr");
-}
-static __inline__ long long atomic64_inc_return(volatile atomic64_t * v)
-{
-	return __CSG_LOOP(v, 1, "agr");
-}
-static __inline__ long long atomic64_inc_and_test(volatile atomic64_t * v)
-{
-	return __CSG_LOOP(v, 1, "agr") == 0;
-}
-static __inline__ void atomic64_dec(volatile atomic64_t * v)
-{
-	       __CSG_LOOP(v, 1, "sgr");
-}
-static __inline__ long long atomic64_dec_return(volatile atomic64_t * v)
-{
-	return __CSG_LOOP(v, 1, "sgr");
-}
-static __inline__ long long atomic64_dec_and_test(volatile atomic64_t * v)
-{
-	return __CSG_LOOP(v, 1, "sgr") == 0;
-}
+#define atomic64_sub(_i, _v)		atomic64_sub_return(_i, _v)
+#define atomic64_sub_and_test(_i, _v)	(atomic64_sub_return(_i, _v) == 0)
+#define atomic64_dec(_v)		atomic64_sub_return(1, _v)
+#define atomic64_dec_return(_v)		atomic64_sub_return(1, _v)
+#define atomic64_dec_and_test(_v)	(atomic64_sub_return(1, _v) == 0)
+
 static __inline__ void atomic64_clear_mask(unsigned long mask, atomic64_t * v)
 {
 	       __CSG_LOOP(v, ~mask, "ngr");
 }
+
 static __inline__ void atomic64_set_mask(unsigned long mask, atomic64_t * v)
 {
 	       __CSG_LOOP(v, mask, "ogr");
 }
 
-#undef __CSG_LOOP
-#endif
-
-/*
-  returns 0  if expected_oldval==value in *v ( swap was successful )
-  returns 1  if unsuccessful.
-
-  This is non-portable, use bitops or spinlocks instead!
-*/
-static __inline__ int
-atomic_compare_and_swap(int expected_oldval,int new_val,atomic_t *v)
+static __inline__ long long atomic64_cmpxchg(atomic64_t *v,
+					     long long old, long long new)
 {
-        int retval;
-
-        __asm__ __volatile__(
-                "  lr   %0,%3\n"
-                "  cs   %0,%4,0(%2)\n"
-                "  ipm  %0\n"
-                "  srl  %0,28\n"
-                "0:"
-                : "=&d" (retval), "=m" (v->counter)
-                : "a" (v), "d" (expected_oldval) , "d" (new_val),
-		  "m" (v->counter) : "cc", "memory" );
-        return retval;
+	__asm__ __volatile__("  csg  %0,%3,0(%2)\n"
+			     : "+d" (old), "=m" (v->counter)
+			     : "a" (v), "d" (new), "m" (v->counter)
+			     : "cc", "memory" );
+	return old;
 }
 
-#define atomic_cmpxchg(v, o, n) (atomic_compare_and_swap((o), (n), &((v)->counter)))
+static __inline__ int atomic64_add_unless(atomic64_t *v,
+					  long long a, long long u)
+{
+	long long c, old;
 
-#define atomic_add_unless(v, a, u)				\
-({								\
-	int c, old;						\
-	c = atomic_read(v);					\
-	while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
-		c = old;					\
-	c != (u);						\
-})
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+	c = atomic64_read(v);
+	while (c != u && (old = atomic64_cmpxchg(v, c, c + a)) != c)
+		c = old;
+	return c != u;
+}
+
+#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
+
+#undef __CSG_LOOP
+#endif
 
 #define smp_mb__before_atomic_dec()	smp_mb()
 #define smp_mb__after_atomic_dec()	smp_mb()