sh: Overhaul spinlocks and rwlocks for SH-4A SMP.

This rips out some of the old spinlock and rwlock behaviour that the SH-2
parts were using and reworks them for LL/SC semantics on the SH-4A.

This is primarily only useful for SH-X3 multi-cores, but can also be used
for building CONFIG_SMP=y testing kernels on SH-4A UP.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/include/asm-sh/spinlock.h b/include/asm-sh/spinlock.h
index 92f6e20..e793181 100644
--- a/include/asm-sh/spinlock.h
+++ b/include/asm-sh/spinlock.h
@@ -2,6 +2,7 @@
  * include/asm-sh/spinlock.h
  *
  * Copyright (C) 2002, 2003 Paul Mundt
+ * Copyright (C) 2006, 2007 Akio Idehara
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -10,17 +11,22 @@
 #ifndef __ASM_SH_SPINLOCK_H
 #define __ASM_SH_SPINLOCK_H
 
-#include <asm/atomic.h>
-#include <asm/spinlock_types.h>
+/*
+ * The only locking implemented here uses SH-4A opcodes. For others,
+ * split this out as per atomic-*.h.
+ */
+#ifndef CONFIG_CPU_SH4A
+#error "Need movli.l/movco.l for spinlocks"
+#endif
 
 /*
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  */
 
-#define __raw_spin_is_locked(x)	((x)->lock != 0)
+#define __raw_spin_is_locked(x)		((x)->lock <= 0)
 #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock)
 #define __raw_spin_unlock_wait(x) \
-	do { cpu_relax(); } while (__raw_spin_is_locked(x))
+	do { cpu_relax(); } while ((x)->lock)
 
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
@@ -30,12 +36,19 @@
  */
 static inline void __raw_spin_lock(raw_spinlock_t *lock)
 {
+	unsigned long tmp;
+	unsigned long oldval;
+
 	__asm__ __volatile__ (
-		"1:\n\t"
-		"tas.b @%0\n\t"
-		"bf/s 1b\n\t"
-		"nop\n\t"
-		: "=r" (lock->lock)
+		"1:						\n\t"
+		"movli.l	@%2, %0	! __raw_spin_lock	\n\t"
+		"mov		%0, %1				\n\t"
+		"mov		#0, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"cmp/pl		%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
 		: "r" (&lock->lock)
 		: "t", "memory"
 	);
@@ -43,12 +56,36 @@
 
 static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 {
-	//assert_spin_locked(lock);
+	unsigned long tmp;
 
-	lock->lock = 0;
+	__asm__ __volatile__ (
+		"mov		#1, %0 ! __raw_spin_unlock	\n\t"
+		"mov.l		%0, @%1				\n\t"
+		: "=&z" (tmp)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
 }
 
-#define __raw_spin_trylock(x) (!test_and_set_bit(0, &(x)->lock))
+static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+{
+	unsigned long tmp, oldval;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! __raw_spin_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"mov		#0, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&lock->lock)
+		: "t", "memory"
+	);
+
+	return oldval;
+}
 
 /*
  * Read-write spinlocks, allowing multiple readers but only one writer.
@@ -59,58 +96,124 @@
  * read-locks.
  */
 
+/**
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define __raw_read_can_lock(x)	((x)->lock > 0)
+
+/**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+#define __raw_write_can_lock(x)	((x)->lock == RW_LOCK_BIAS)
+
 static inline void __raw_read_lock(raw_rwlock_t *rw)
 {
-	__raw_spin_lock(&rw->lock);
+	unsigned long tmp;
 
-	atomic_inc(&rw->counter);
-
-	__raw_spin_unlock(&rw->lock);
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! __raw_read_lock	\n\t"
+		"cmp/pl		%0				\n\t"
+		"bf		1b				\n\t"
+		"add		#-1, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
 }
 
 static inline void __raw_read_unlock(raw_rwlock_t *rw)
 {
-	__raw_spin_lock(&rw->lock);
+	unsigned long tmp;
 
-	atomic_dec(&rw->counter);
-
-	__raw_spin_unlock(&rw->lock);
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! __raw_read_unlock	\n\t"
+		"add		#1, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
 }
 
 static inline void __raw_write_lock(raw_rwlock_t *rw)
 {
-	__raw_spin_lock(&rw->lock);
-	atomic_set(&rw->counter, -1);
+	unsigned long tmp;
+
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%1, %0	! __raw_write_lock	\n\t"
+		"cmp/hs		%2, %0				\n\t"
+		"bf		1b				\n\t"
+		"sub		%2, %0				\n\t"
+		"movco.l	%0, @%1				\n\t"
+		"bf		1b				\n\t"
+		: "=&z" (tmp)
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
 }
 
 static inline void __raw_write_unlock(raw_rwlock_t *rw)
 {
-	atomic_set(&rw->counter, 0);
-	__raw_spin_unlock(&rw->lock);
+	__asm__ __volatile__ (
+		"mov.l		%1, @%0 ! __raw_write_unlock	\n\t"
+		:
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
 }
 
-static inline int __raw_write_can_lock(raw_rwlock_t *rw)
+static inline int __raw_read_trylock(raw_rwlock_t *rw)
 {
-	return (atomic_read(&rw->counter) == RW_LOCK_BIAS);
-}
+	unsigned long tmp, oldval;
 
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
-{
-	atomic_t *count = (atomic_t*)lock;
-	if (atomic_dec_return(count) >= 0)
-		return 1;
-	atomic_inc(count);
-	return 0;
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! __raw_read_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"cmp/pl		%0				\n\t"
+		"bf		2f				\n\t"
+		"add		#-1, %0				\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"2:						\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&rw->lock)
+		: "t", "memory"
+	);
+
+	return (oldval > 0);
 }
 
 static inline int __raw_write_trylock(raw_rwlock_t *rw)
 {
-	if (atomic_sub_and_test(RW_LOCK_BIAS, &rw->counter))
-		return 1;
-	
-	atomic_add(RW_LOCK_BIAS, &rw->counter);
+	unsigned long tmp, oldval;
 
-	return 0;
+	__asm__ __volatile__ (
+		"1:						\n\t"
+		"movli.l	@%2, %0	! __raw_write_trylock	\n\t"
+		"mov		%0, %1				\n\t"
+		"cmp/hs		%3, %0				\n\t"
+		"bf		2f				\n\t"
+		"sub		%3, %0				\n\t"
+		"2:						\n\t"
+		"movco.l	%0, @%2				\n\t"
+		"bf		1b				\n\t"
+		"synco						\n\t"
+		: "=&z" (tmp), "=&r" (oldval)
+		: "r" (&rw->lock), "r" (RW_LOCK_BIAS)
+		: "t", "memory"
+	);
+
+	return (oldval > (RW_LOCK_BIAS - 1));
 }
 
 #define _raw_spin_relax(lock)	cpu_relax()