Tidy up memory barriers.

Change-Id: I937ea93e6df1835ecfe2d4bb7d84c24fe7fc097b
diff --git a/runtime/atomic.cc b/runtime/atomic.cc
index 47cee6a..bac0a99 100644
--- a/runtime/atomic.cc
+++ b/runtime/atomic.cc
@@ -15,135 +15,52 @@
  */
 
 #include "atomic.h"
-
-#define NEED_SWAP_MUTEXES !defined(__arm__) && !defined(__i386__)
-
-#if NEED_SWAP_MUTEXES
-#include <vector>
 #include "base/mutex.h"
 #include "base/stl_util.h"
-#include "base/stringprintf.h"
 #include "thread-inl.h"
-#endif
 
 namespace art {
 
-#if NEED_SWAP_MUTEXES
-// We stripe across a bunch of different mutexes to reduce contention.
-static const size_t kSwapMutexCount = 32;
-static std::vector<Mutex*>* gSwapMutexes;
+std::vector<Mutex*>* QuasiAtomic::gSwapMutexes = nullptr;
 
-static Mutex& GetSwapMutex(const volatile int64_t* addr) {
-  return *(*gSwapMutexes)[(reinterpret_cast<unsigned>(addr) >> 3U) % kSwapMutexCount];
+Mutex* QuasiAtomic::GetSwapMutex(const volatile int64_t* addr) {
+  return (*gSwapMutexes)[(reinterpret_cast<unsigned>(addr) >> 3U) % kSwapMutexCount];
 }
-#endif
 
 void QuasiAtomic::Startup() {
-#if NEED_SWAP_MUTEXES
-  gSwapMutexes = new std::vector<Mutex*>;
-  for (size_t i = 0; i < kSwapMutexCount; ++i) {
-    gSwapMutexes->push_back(new Mutex("QuasiAtomic stripe"));
+  if (kNeedSwapMutexes) {
+    gSwapMutexes = new std::vector<Mutex*>;
+    for (size_t i = 0; i < kSwapMutexCount; ++i) {
+      gSwapMutexes->push_back(new Mutex("QuasiAtomic stripe"));
+    }
   }
-#endif
 }
 
 void QuasiAtomic::Shutdown() {
-#if NEED_SWAP_MUTEXES
-  STLDeleteElements(gSwapMutexes);
-  delete gSwapMutexes;
-#endif
+  if (kNeedSwapMutexes) {
+    STLDeleteElements(gSwapMutexes);
+    delete gSwapMutexes;
+  }
 }
 
-int64_t QuasiAtomic::Read64(volatile const int64_t* addr) {
-  int64_t value;
-#if NEED_SWAP_MUTEXES
-  MutexLock mu(Thread::Current(), GetSwapMutex(addr));
-  value = *addr;
-#elif defined(__arm__)
-  // Exclusive loads are defined not to tear, clearing the exclusive state isn't necessary. If we
-  // have LPAE (such as Cortex-A15) then ldrd would suffice.
-  __asm__ __volatile__("@ QuasiAtomic::Read64\n"
-      "ldrexd     %0, %H0, [%1]"
-      : "=&r" (value)
-      : "r" (addr));
-#elif defined(__i386__)
-  __asm__ __volatile__(
-      "movq     %1, %0\n"
-      : "=x" (value)
-      : "m" (*addr));
-#else
-#error Unexpected architecture
-#endif
-  return value;
+int64_t QuasiAtomic::SwapMutexRead64(volatile const int64_t* addr) {
+  MutexLock mu(Thread::Current(), *GetSwapMutex(addr));
+  return *addr;
 }
 
-void QuasiAtomic::Write64(volatile int64_t* addr, int64_t value) {
-#if NEED_SWAP_MUTEXES
-  MutexLock mu(Thread::Current(), GetSwapMutex(addr));
+void QuasiAtomic::SwapMutexWrite64(volatile int64_t* addr, int64_t value) {
+  MutexLock mu(Thread::Current(), *GetSwapMutex(addr));
   *addr = value;
-#elif defined(__arm__)
-  // The write is done as a swap so that the cache-line is in the exclusive state for the store. If
-  // we know that ARM architecture has LPAE (such as Cortex-A15) this isn't necessary and strd will
-  // suffice.
-  int64_t prev;
-  int status;
-  do {
-    __asm__ __volatile__("@ QuasiAtomic::Write64\n"
-        "ldrexd     %0, %H0, [%3]\n"
-        "strexd     %1, %4, %H4, [%3]"
-        : "=&r" (prev), "=&r" (status), "+m"(*addr)
-        : "r" (addr), "r" (value)
-        : "cc");
-  } while (__builtin_expect(status != 0, 0));
-#elif defined(__i386__)
-  __asm__ __volatile__(
-      "movq     %1, %0"
-      : "=m" (*addr)
-      : "x" (value));
-#else
-#error Unexpected architecture
-#endif
 }
 
 
-bool QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
-#if NEED_SWAP_MUTEXES
-  MutexLock mu(Thread::Current(), GetSwapMutex(addr));
+bool QuasiAtomic::SwapMutexCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+  MutexLock mu(Thread::Current(), *GetSwapMutex(addr));
   if (*addr == old_value) {
     *addr = new_value;
     return true;
   }
   return false;
-#elif defined(__arm__)
-  int64_t prev;
-  int status;
-  do {
-    __asm__ __volatile__("@ QuasiAtomic::Cas64\n"
-        "ldrexd     %0, %H0, [%3]\n"
-        "mov        %1, #0\n"
-        "teq        %0, %4\n"
-        "teqeq      %H0, %H4\n"
-        "strexdeq   %1, %5, %H5, [%3]"
-        : "=&r" (prev), "=&r" (status), "+m"(*addr)
-        : "r" (addr), "Ir" (old_value), "r" (new_value)
-        : "cc");
-  } while (__builtin_expect(status != 0, 0));
-  return prev == old_value;
-#elif defined(__i386__)
-  // The compiler does the right job and works better than inline assembly, especially with -O0
-  // compilation.
-  return __sync_bool_compare_and_swap(addr, old_value, new_value);
-#else
-#error Unexpected architecture
-#endif
-}
-
-bool QuasiAtomic::LongAtomicsUseMutexes() {
-#if NEED_SWAP_MUTEXES
-  return true;
-#else
-  return false;
-#endif
 }
 
 }  // namespace art