Merge "Avoid volatile 64-bit tearing on 32-bit architectures."
diff --git a/build/Android.common.mk b/build/Android.common.mk
index c95b1c6..aaa1490 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -254,6 +254,14 @@
 # To use oprofile_android --callgraph, uncomment this and recompile with "mmm art -B -j16"
 # ART_TARGET_CFLAGS += -fno-omit-frame-pointer -marm -mapcs
 
+# Addition CPU specific CFLAGS.
+ifeq ($(TARGET_ARCH),arm)
+  ifneq ($(filter cortex-a15, $(TARGET_CPU_VARIANT)),)
+    # Fake a ARM feature for LPAE support.
+    ART_TARGET_CFLAGS += -D__ARM_FEATURE_LPAE=1
+  endif
+endif
+
 ART_HOST_NON_DEBUG_CFLAGS := $(art_non_debug_cflags)
 ART_TARGET_NON_DEBUG_CFLAGS := $(art_non_debug_cflags)
 
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 795f917..6867fef 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 #include <vector>
 
+#include "base/logging.h"
 #include "base/macros.h"
 
 namespace art {
@@ -110,18 +111,76 @@
   // Reads the 64-bit value at "addr" without tearing.
   static int64_t Read64(volatile const int64_t* addr) {
     if (!kNeedSwapMutexes) {
-      return *addr;
+      int64_t value;
+#if defined(__LP64__)
+      value = *addr;
+#else
+#if defined(__arm__)
+#if defined(__ARM_FEATURE_LPAE)
+      // With LPAE support (such as Cortex-A15) then ldrd is defined not to tear.
+      __asm__ __volatile__("@ QuasiAtomic::Read64\n"
+        "ldrd     %0, %H0, %1"
+        : "=r" (value)
+        : "m" (*addr));
+#else
+      // Exclusive loads are defined not to tear, clearing the exclusive state isn't necessary.
+      __asm__ __volatile__("@ QuasiAtomic::Read64\n"
+        "ldrexd     %0, %H0, %1"
+        : "=r" (value)
+        : "Q" (*addr));
+#endif
+#elif defined(__i386__)
+  __asm__ __volatile__(
+      "movq     %1, %0\n"
+      : "=x" (value)
+      : "m" (*addr));
+#else
+      LOG(FATAL) << "Unsupported architecture";
+#endif
+#endif  // defined(__LP64__)
+      return value;
     } else {
       return SwapMutexRead64(addr);
     }
   }
 
   // Writes to the 64-bit value at "addr" without tearing.
-  static void Write64(volatile int64_t* addr, int64_t val) {
+  static void Write64(volatile int64_t* addr, int64_t value) {
     if (!kNeedSwapMutexes) {
-      *addr = val;
+#if defined(__LP64__)
+      *addr = value;
+#else
+#if defined(__arm__)
+#if defined(__ARM_FEATURE_LPAE)
+    // If we know that ARM architecture has LPAE (such as Cortex-A15) strd is defined not to tear.
+    __asm__ __volatile__("@ QuasiAtomic::Write64\n"
+      "strd     %1, %H1, %0"
+      : "=m"(*addr)
+      : "r" (value));
+#else
+    // The write is done as a swap so that the cache-line is in the exclusive state for the store.
+    int64_t prev;
+    int status;
+    do {
+      __asm__ __volatile__("@ QuasiAtomic::Write64\n"
+        "ldrexd     %0, %H0, %2\n"
+        "strexd     %1, %3, %H3, %2"
+        : "=&r" (prev), "=&r" (status), "+Q"(*addr)
+        : "r" (value)
+        : "cc");
+      } while (UNLIKELY(status != 0));
+#endif
+#elif defined(__i386__)
+      __asm__ __volatile__(
+        "movq     %1, %0"
+        : "=m" (*addr)
+        : "x" (value));
+#else
+      LOG(FATAL) << "Unsupported architecture";
+#endif
+#endif  // defined(__LP64__)
     } else {
-      SwapMutexWrite64(addr, val);
+      SwapMutexWrite64(addr, value);
     }
   }