sparc64: Make rwsems 64-bit.

Basically tip-off the powerpc code, use a 64-bit type and atomic64_t
interfaces for the implementation.

This gets us off of the by-hand asm code I wrote, which frankly I
think probably ruins I-cache hit rates.

The idea was the keep the call chains less deep, but anything taking
the rw-semaphores probably is also calling other stuff and therefore
already has allocated a stack-frame.  So no real stack frame savings
ever.

Ben H. has posted patches to make powerpc use 64-bit too and with some
abstractions we can probably use a shared header file somewhere.

With suggestions from Sam Ravnborg.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h
index 6e56210..a2b4302 100644
--- a/arch/sparc/include/asm/rwsem.h
+++ b/arch/sparc/include/asm/rwsem.h
@@ -15,16 +15,21 @@
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
-#include <asm/rwsem-const.h>
 
 struct rwsem_waiter;
 
 struct rw_semaphore {
-	signed int count;
-	spinlock_t		wait_lock;
-	struct list_head	wait_list;
+	signed long			count;
+#define RWSEM_UNLOCKED_VALUE		0x00000000L
+#define RWSEM_ACTIVE_BIAS		0x00000001L
+#define RWSEM_ACTIVE_MASK		0xffffffffL
+#define RWSEM_WAITING_BIAS		(-RWSEM_ACTIVE_MASK-1)
+#define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+	spinlock_t			wait_lock;
+	struct list_head		wait_list;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lockdep_map	dep_map;
+	struct lockdep_map		dep_map;
 #endif
 };
 
@@ -41,6 +46,11 @@
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
 
+extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
+
 extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
 			 struct lock_class_key *key);
 
@@ -51,27 +61,103 @@
 	__init_rwsem((sem), #sem, &__key);			\
 } while (0)
 
-extern void __down_read(struct rw_semaphore *sem);
-extern int __down_read_trylock(struct rw_semaphore *sem);
-extern void __down_write(struct rw_semaphore *sem);
-extern int __down_write_trylock(struct rw_semaphore *sem);
-extern void __up_read(struct rw_semaphore *sem);
-extern void __up_write(struct rw_semaphore *sem);
-extern void __downgrade_write(struct rw_semaphore *sem);
+/*
+ * lock for reading
+ */
+static inline void __down_read(struct rw_semaphore *sem)
+{
+	if (unlikely(atomic64_inc_return((atomic64_t *)(&sem->count)) <= 0L))
+		rwsem_down_read_failed(sem);
+}
 
+static inline int __down_read_trylock(struct rw_semaphore *sem)
+{
+	long tmp;
+
+	while ((tmp = sem->count) >= 0L) {
+		if (tmp == cmpxchg(&sem->count, tmp,
+				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * lock for writing
+ */
 static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
 {
-	__down_write(sem);
+	long tmp;
+
+	tmp = atomic64_add_return(RWSEM_ACTIVE_WRITE_BIAS,
+				  (atomic64_t *)(&sem->count));
+	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
+		rwsem_down_write_failed(sem);
 }
 
-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
+static inline void __down_write(struct rw_semaphore *sem)
 {
-	return atomic_add_return(delta, (atomic_t *)(&sem->count));
+	__down_write_nested(sem, 0);
 }
 
-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
-	atomic_add(delta, (atomic_t *)(&sem->count));
+	long tmp;
+
+	tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+		      RWSEM_ACTIVE_WRITE_BIAS);
+	return tmp == RWSEM_UNLOCKED_VALUE;
+}
+
+/*
+ * unlock after reading
+ */
+static inline void __up_read(struct rw_semaphore *sem)
+{
+	long tmp;
+
+	tmp = atomic64_dec_return((atomic64_t *)(&sem->count));
+	if (unlikely(tmp < -1L && (tmp & RWSEM_ACTIVE_MASK) == 0L))
+		rwsem_wake(sem);
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+	if (unlikely(atomic64_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
+					 (atomic64_t *)(&sem->count)) < 0L))
+		rwsem_wake(sem);
+}
+
+/*
+ * implement atomic add functionality
+ */
+static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
+{
+	atomic64_add(delta, (atomic64_t *)(&sem->count));
+}
+
+/*
+ * downgrade write lock to read lock
+ */
+static inline void __downgrade_write(struct rw_semaphore *sem)
+{
+	long tmp;
+
+	tmp = atomic64_add_return(-RWSEM_WAITING_BIAS, (atomic64_t *)(&sem->count));
+	if (tmp < 0L)
+		rwsem_downgrade_wake(sem);
+}
+
+/*
+ * implement exchange and add functionality
+ */
+static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
+{
+	return atomic64_add_return(delta, (atomic64_t *)(&sem->count));
 }
 
 static inline int rwsem_is_locked(struct rw_semaphore *sem)