sh: Provide optimized non-atomic bitops for SH-2A.

This ties in the new SH-2A 32-bit non-atomic bitops.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/include/asm/bitops-op32.h b/arch/sh/include/asm/bitops-op32.h
new file mode 100644
index 0000000..f0ae7e9
--- /dev/null
+++ b/arch/sh/include/asm/bitops-op32.h
@@ -0,0 +1,142 @@
+#ifndef __ASM_SH_BITOPS_OP32_H
+#define __ASM_SH_BITOPS_OP32_H
+
+/*
+ * The bit modifying instructions on SH-2A are only capable of working
+ * with a 3-bit immediate, which signifies the shift position for the bit
+ * being worked on.
+ */
+#if defined(__BIG_ENDIAN)
+#define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
+#define BYTE_NUMBER(nr)		((nr ^ BITOP_LE_SWIZZLE) / BITS_PER_BYTE)
+#define BYTE_OFFSET(nr)		((nr ^ BITOP_LE_SWIZZLE) % BITS_PER_BYTE)
+#else
+#define BYTE_NUMBER(nr)		((nr) / BITS_PER_BYTE)
+#define BYTE_OFFSET(nr)		((nr) % BITS_PER_BYTE)
+#endif
+
+#define IS_IMMEDIATE(nr)	(__builtin_constant_p(nr))
+
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+	if (IS_IMMEDIATE(nr)) {
+		__asm__ __volatile__ (
+			"bset.b %1, @(%O2,%0)		! __set_bit\n\t"
+			: "+r" (addr)
+			: "i" (BYTE_OFFSET(nr)), "i" (BYTE_NUMBER(nr))
+			: "t", "memory"
+		);
+	} else {
+		unsigned long mask = BIT_MASK(nr);
+		unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+		*p |= mask;
+	}
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+	if (IS_IMMEDIATE(nr)) {
+		__asm__ __volatile__ (
+			"bclr.b %1, @(%O2,%0)		! __clear_bit\n\t"
+			: "+r" (addr)
+			: "i" (BYTE_OFFSET(nr)),
+			  "i" (BYTE_NUMBER(nr))
+			: "t", "memory"
+		);
+	} else {
+		unsigned long mask = BIT_MASK(nr);
+		unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+		*p &= ~mask;
+	}
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+	if (IS_IMMEDIATE(nr)) {
+		__asm__ __volatile__ (
+			"bxor.b %1, @(%O2,%0)		! __change_bit\n\t"
+			: "+r" (addr)
+			: "i" (BYTE_OFFSET(nr)),
+			  "i" (BYTE_NUMBER(nr))
+			: "t", "memory"
+		);
+	} else {
+		unsigned long mask = BIT_MASK(nr);
+		unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+		*p ^= mask;
+	}
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old | mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old & ~mask;
+	return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr,
+					    volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old ^ mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+#endif /* __ASM_SH_BITOPS_OP32_H */
diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h
index 9b141e0..ebe595b 100644
--- a/arch/sh/include/asm/bitops.h
+++ b/arch/sh/include/asm/bitops.h
@@ -13,6 +13,9 @@
 
 #ifdef CONFIG_GUSA_RB
 #include <asm/bitops-grb.h>
+#elif defined(CONFIG_CPU_SH2A)
+#include <asm-generic/bitops/atomic.h>
+#include <asm/bitops-op32.h>
 #elif defined(CONFIG_CPU_SH4A)
 #include <asm/bitops-llsc.h>
 #else