[SPARC64]: More fully work around Spitfire Errata 51.

It appears that a memory barrier soon after a mispredicted
branch, not just in the delay slot, can cause the hang
condition of this cpu errata.

So move them out-of-line, and explicitly put them into
a "branch always, predict taken" delay slot which should
fully kill this problem.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h
index d80f3379..e175afc 100644
--- a/include/asm-sparc64/atomic.h
+++ b/include/asm-sparc64/atomic.h
@@ -72,10 +72,10 @@
 
 /* Atomic operations are already serializing */
 #ifdef CONFIG_SMP
-#define smp_mb__before_atomic_dec()	membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_atomic_dec()	membar("#StoreLoad | #StoreStore")
-#define smp_mb__before_atomic_inc()	membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_atomic_inc()	membar("#StoreLoad | #StoreStore")
+#define smp_mb__before_atomic_dec()	membar_storeload_loadload();
+#define smp_mb__after_atomic_dec()	membar_storeload_storestore();
+#define smp_mb__before_atomic_inc()	membar_storeload_loadload();
+#define smp_mb__after_atomic_inc()	membar_storeload_storestore();
 #else
 #define smp_mb__before_atomic_dec()	barrier()
 #define smp_mb__after_atomic_dec()	barrier()
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index 9c5e719..6388b83 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -72,8 +72,8 @@
 }
 
 #ifdef CONFIG_SMP
-#define smp_mb__before_clear_bit()	membar("#StoreLoad | #LoadLoad")
-#define smp_mb__after_clear_bit()	membar("#StoreLoad | #StoreStore")
+#define smp_mb__before_clear_bit()	membar_storeload_loadload()
+#define smp_mb__after_clear_bit()	membar_storeload_storestore()
 #else
 #define smp_mb__before_clear_bit()	barrier()
 #define smp_mb__after_clear_bit()	barrier()
diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h
index d265bf6..a02c437 100644
--- a/include/asm-sparc64/spinlock.h
+++ b/include/asm-sparc64/spinlock.h
@@ -43,7 +43,7 @@
 #define spin_is_locked(lp)  ((lp)->lock != 0)
 
 #define spin_unlock_wait(lp)	\
-do {	membar("#LoadLoad");	\
+do {	rmb();			\
 } while((lp)->lock)
 
 static inline void _raw_spin_lock(spinlock_t *lock)
@@ -129,7 +129,7 @@
 #define spin_is_locked(__lock)	((__lock)->lock != 0)
 #define spin_unlock_wait(__lock)	\
 do { \
-	membar("#LoadLoad"); \
+	rmb(); \
 } while((__lock)->lock)
 
 extern void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller);
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index ee4bdfc..5e94c05 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -28,6 +28,14 @@
 #define ARCH_SUN4C_SUN4 0
 #define ARCH_SUN4 0
 
+extern void mb(void);
+extern void rmb(void);
+extern void wmb(void);
+extern void membar_storeload(void);
+extern void membar_storeload_storestore(void);
+extern void membar_storeload_loadload(void);
+extern void membar_storestore_loadstore(void);
+
 #endif
 
 #define setipl(__new_ipl) \
@@ -78,16 +86,11 @@
 
 #define nop() 		__asm__ __volatile__ ("nop")
 
-#define membar(type)	__asm__ __volatile__ ("membar " type : : : "memory")
-#define mb()		\
-	membar("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
-#define rmb()		membar("#LoadLoad")
-#define wmb()		membar("#StoreStore")
 #define read_barrier_depends()		do { } while(0)
 #define set_mb(__var, __value) \
-	do { __var = __value; membar("#StoreLoad | #StoreStore"); } while(0)
+	do { __var = __value; membar_storeload_storestore(); } while(0)
 #define set_wmb(__var, __value) \
-	do { __var = __value; membar("#StoreStore"); } while(0)
+	do { __var = __value; wmb(); } while(0)
 
 #ifdef CONFIG_SMP
 #define smp_mb()	mb()