MIPS: Octeon: Reverse the order of register accesses to the FAU

64 bit access is unaffected but for 32 bit access, swap high and
low words.  Similarly for 16 bit access, reverse the order of the
four possible words, and for 8 bit access reverse the order of byte
accesses.

Signed-off-by: Paul Martin <paul.martin@codethink.co.uk>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/9630/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/include/asm/octeon/cvmx-fau.h b/arch/mips/include/asm/octeon/cvmx-fau.h
index ef98f7f..dafeae3 100644
--- a/arch/mips/include/asm/octeon/cvmx-fau.h
+++ b/arch/mips/include/asm/octeon/cvmx-fau.h
@@ -105,6 +105,16 @@
 	} s;
 } cvmx_fau_async_tagwait_result_t;
 
+#ifdef __BIG_ENDIAN_BITFIELD
+#define SWIZZLE_8  0
+#define SWIZZLE_16 0
+#define SWIZZLE_32 0
+#else
+#define SWIZZLE_8  0x7
+#define SWIZZLE_16 0x6
+#define SWIZZLE_32 0x4
+#endif
+
 /**
  * Builds a store I/O address for writing to the FAU
  *
@@ -175,6 +185,7 @@
 static inline int32_t cvmx_fau_fetch_and_add32(cvmx_fau_reg_32_t reg,
 					       int32_t value)
 {
+	reg ^= SWIZZLE_32;
 	return cvmx_read64_int32(__cvmx_fau_atomic_address(0, reg, value));
 }
 
@@ -189,6 +200,7 @@
 static inline int16_t cvmx_fau_fetch_and_add16(cvmx_fau_reg_16_t reg,
 					       int16_t value)
 {
+	reg ^= SWIZZLE_16;
 	return cvmx_read64_int16(__cvmx_fau_atomic_address(0, reg, value));
 }
 
@@ -201,6 +213,7 @@
  */
 static inline int8_t cvmx_fau_fetch_and_add8(cvmx_fau_reg_8_t reg, int8_t value)
 {
+	reg ^= SWIZZLE_8;
 	return cvmx_read64_int8(__cvmx_fau_atomic_address(0, reg, value));
 }
 
@@ -247,6 +260,7 @@
 		uint64_t i32;
 		cvmx_fau_tagwait32_t t;
 	} result;
+	reg ^= SWIZZLE_32;
 	result.i32 =
 	    cvmx_read64_int32(__cvmx_fau_atomic_address(1, reg, value));
 	return result.t;
@@ -270,6 +284,7 @@
 		uint64_t i16;
 		cvmx_fau_tagwait16_t t;
 	} result;
+	reg ^= SWIZZLE_16;
 	result.i16 =
 	    cvmx_read64_int16(__cvmx_fau_atomic_address(1, reg, value));
 	return result.t;
@@ -292,6 +307,7 @@
 		uint64_t i8;
 		cvmx_fau_tagwait8_t t;
 	} result;
+	reg ^= SWIZZLE_8;
 	result.i8 = cvmx_read64_int8(__cvmx_fau_atomic_address(1, reg, value));
 	return result.t;
 }
@@ -521,6 +537,7 @@
  */
 static inline void cvmx_fau_atomic_add32(cvmx_fau_reg_32_t reg, int32_t value)
 {
+	reg ^= SWIZZLE_32;
 	cvmx_write64_int32(__cvmx_fau_store_address(0, reg), value);
 }
 
@@ -533,6 +550,7 @@
  */
 static inline void cvmx_fau_atomic_add16(cvmx_fau_reg_16_t reg, int16_t value)
 {
+	reg ^= SWIZZLE_16;
 	cvmx_write64_int16(__cvmx_fau_store_address(0, reg), value);
 }
 
@@ -544,6 +562,7 @@
  */
 static inline void cvmx_fau_atomic_add8(cvmx_fau_reg_8_t reg, int8_t value)
 {
+	reg ^= SWIZZLE_8;
 	cvmx_write64_int8(__cvmx_fau_store_address(0, reg), value);
 }
 
@@ -568,6 +587,7 @@
  */
 static inline void cvmx_fau_atomic_write32(cvmx_fau_reg_32_t reg, int32_t value)
 {
+	reg ^= SWIZZLE_32;
 	cvmx_write64_int32(__cvmx_fau_store_address(1, reg), value);
 }
 
@@ -580,6 +600,7 @@
  */
 static inline void cvmx_fau_atomic_write16(cvmx_fau_reg_16_t reg, int16_t value)
 {
+	reg ^= SWIZZLE_16;
 	cvmx_write64_int16(__cvmx_fau_store_address(1, reg), value);
 }
 
@@ -591,6 +612,7 @@
  */
 static inline void cvmx_fau_atomic_write8(cvmx_fau_reg_8_t reg, int8_t value)
 {
+	reg ^= SWIZZLE_8;
 	cvmx_write64_int8(__cvmx_fau_store_address(1, reg), value);
 }