ART: Simplify atomic.h

Prefer std::atomic operations over wrappers in atomic.h. Exceptions
are cases that relate to the Java data memory operations and CAS
operations.

Bug: 71621075
Test: art/test.py --host -j32
Test: art/test.py --target --64 -j4
Change-Id: I9a157e9dede852c1b2aa67d22e3e604a68a9ef1c
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index bd3a145..83532fd 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1555,7 +1555,7 @@
     self->AssertNoPendingException();
     CHECK_GT(work_units, 0U);
 
-    index_.StoreRelaxed(begin);
+    index_.store(begin, std::memory_order_relaxed);
     for (size_t i = 0; i < work_units; ++i) {
       thread_pool_->AddTask(self, new ForAllClosureLambda<Fn>(this, end, fn));
     }
@@ -1573,7 +1573,7 @@
   }
 
   size_t NextIndex() {
-    return index_.FetchAndAddSequentiallyConsistent(1);
+    return index_.fetch_add(1, std::memory_order_seq_cst);
   }
 
  private:
@@ -2837,7 +2837,8 @@
                                                               /*expected*/ nullptr,
                                                               compiled_method);
   CHECK(result == MethodTable::kInsertResultSuccess);
-  non_relative_linker_patch_count_.FetchAndAddRelaxed(non_relative_linker_patch_count);
+  non_relative_linker_patch_count_.fetch_add(non_relative_linker_patch_count,
+                                             std::memory_order_relaxed);
   DCHECK(GetCompiledMethod(method_ref) != nullptr) << method_ref.PrettyMethod();
 }
 
@@ -2948,7 +2949,7 @@
 }
 
 size_t CompilerDriver::GetNonRelativeLinkerPatchCount() const {
-  return non_relative_linker_patch_count_.LoadRelaxed();
+  return non_relative_linker_patch_count_.load(std::memory_order_relaxed);
 }
 
 void CompilerDriver::SetRequiresConstructorBarrier(Thread* self,
diff --git a/compiler/utils/atomic_dex_ref_map-inl.h b/compiler/utils/atomic_dex_ref_map-inl.h
index 7977e82..4bd323d 100644
--- a/compiler/utils/atomic_dex_ref_map-inl.h
+++ b/compiler/utils/atomic_dex_ref_map-inl.h
@@ -70,7 +70,7 @@
   if (array == nullptr) {
     return false;
   }
-  *out = (*array)[ref.index].LoadRelaxed();
+  *out = (*array)[ref.index].load(std::memory_order_relaxed);
   return true;
 }
 
@@ -81,8 +81,8 @@
   if (array == nullptr) {
     return false;
   }
-  *out = (*array)[ref.index].LoadRelaxed();
-  (*array)[ref.index].StoreSequentiallyConsistent(nullptr);
+  *out = (*array)[ref.index].load(std::memory_order_relaxed);
+  (*array)[ref.index].store(nullptr, std::memory_order_seq_cst);
   return true;
 }
 
@@ -121,7 +121,7 @@
     const DexFile* dex_file = pair.first;
     const ElementArray& elements = pair.second;
     for (size_t i = 0; i < elements.size(); ++i) {
-      visitor(DexFileReference(dex_file, i), elements[i].LoadRelaxed());
+      visitor(DexFileReference(dex_file, i), elements[i].load(std::memory_order_relaxed));
     }
   }
 }
@@ -130,7 +130,7 @@
 inline void AtomicDexRefMap<DexFileReferenceType, Value>::ClearEntries() {
   for (auto& it : arrays_) {
     for (auto& element : it.second) {
-      element.StoreRelaxed(nullptr);
+      element.store(nullptr, std::memory_order_relaxed);
     }
   }
 }
diff --git a/libartbase/base/allocator.cc b/libartbase/base/allocator.cc
index a424145..17da789 100644
--- a/libartbase/base/allocator.cc
+++ b/libartbase/base/allocator.cc
@@ -83,9 +83,9 @@
   if (kEnableTrackingAllocator) {
     os << "Dumping native memory usage\n";
     for (size_t i = 0; i < kAllocatorTagCount; ++i) {
-      uint64_t bytes_used = g_bytes_used[i].LoadRelaxed();
+      uint64_t bytes_used = g_bytes_used[i].load(std::memory_order_relaxed);
       uint64_t max_bytes_used = g_max_bytes_used[i];
-      uint64_t total_bytes_used = g_total_bytes_used[i].LoadRelaxed();
+      uint64_t total_bytes_used = g_total_bytes_used[i].load(std::memory_order_relaxed);
       if (total_bytes_used != 0) {
         os << static_cast<AllocatorTag>(i) << " active=" << bytes_used << " max="
            << max_bytes_used << " total=" << total_bytes_used << "\n";
diff --git a/libartbase/base/allocator.h b/libartbase/base/allocator.h
index d92fe19..7ddbacf 100644
--- a/libartbase/base/allocator.h
+++ b/libartbase/base/allocator.h
@@ -84,15 +84,15 @@
 void Dump(std::ostream& os);
 
 inline void RegisterAllocation(AllocatorTag tag, size_t bytes) {
-  g_total_bytes_used[tag].FetchAndAddSequentiallyConsistent(bytes);
-  size_t new_bytes = g_bytes_used[tag].FetchAndAddSequentiallyConsistent(bytes) + bytes;
+  g_total_bytes_used[tag].fetch_add(bytes, std::memory_order_seq_cst);
+  size_t new_bytes = g_bytes_used[tag].fetch_add(bytes, std::memory_order_seq_cst) + bytes;
   if (g_max_bytes_used[tag] < new_bytes) {
     g_max_bytes_used[tag] = new_bytes;
   }
 }
 
 inline void RegisterFree(AllocatorTag tag, size_t bytes) {
-  g_bytes_used[tag].FetchAndSubSequentiallyConsistent(bytes);
+  g_bytes_used[tag].fetch_sub(bytes, std::memory_order_seq_cst);
 }
 
 }  // namespace TrackedAllocators
diff --git a/libartbase/base/atomic.h b/libartbase/base/atomic.h
index fd34cc6..f736667 100644
--- a/libartbase/base/atomic.h
+++ b/libartbase/base/atomic.h
@@ -35,94 +35,28 @@
 
   explicit Atomic<T>(T value) : std::atomic<T>(value) { }
 
-  // Load from memory without ordering or synchronization constraints.
-  T LoadRelaxed() const {
-    return this->load(std::memory_order_relaxed);
-  }
-
-  // Load from memory with acquire ordering.
-  T LoadAcquire() const {
-    return this->load(std::memory_order_acquire);
-  }
-
-  // Word tearing allowed, but may race.
-  // TODO: Optimize?
-  // There has been some discussion of eventually disallowing word
-  // tearing for Java data loads.
+  // Load data from an atomic variable with Java data memory order semantics.
+  //
+  // Promises memory access semantics of ordinary Java data.
+  // Does not order other memory accesses.
+  // Long and double accesses may be performed 32 bits at a time.
+  // There are no "cache coherence" guarantees; e.g. loads from the same location may be reordered.
+  // In contrast to normal C++ accesses, racing accesses are allowed.
   T LoadJavaData() const {
     return this->load(std::memory_order_relaxed);
   }
 
-  // Load from memory with a total ordering.
-  // Corresponds exactly to a Java volatile load.
-  T LoadSequentiallyConsistent() const {
-    return this->load(std::memory_order_seq_cst);
-  }
-
-  // Store to memory without ordering or synchronization constraints.
-  void StoreRelaxed(T desired_value) {
-    this->store(desired_value, std::memory_order_relaxed);
-  }
-
-  // Word tearing allowed, but may race.
+  // Store data in an atomic variable with Java data memory ordering semantics.
+  //
+  // Promises memory access semantics of ordinary Java data.
+  // Does not order other memory accesses.
+  // Long and double accesses may be performed 32 bits at a time.
+  // There are no "cache coherence" guarantees; e.g. loads from the same location may be reordered.
+  // In contrast to normal C++ accesses, racing accesses are allowed.
   void StoreJavaData(T desired_value) {
     this->store(desired_value, std::memory_order_relaxed);
   }
 
-  // Store to memory with release ordering.
-  void StoreRelease(T desired_value) {
-    this->store(desired_value, std::memory_order_release);
-  }
-
-  // Store to memory with a total ordering.
-  void StoreSequentiallyConsistent(T desired_value) {
-    this->store(desired_value, std::memory_order_seq_cst);
-  }
-
-  // Atomically replace the value with desired_value.
-  T ExchangeRelaxed(T desired_value) {
-    return this->exchange(desired_value, std::memory_order_relaxed);
-  }
-
-  // Atomically replace the value with desired_value.
-  T ExchangeSequentiallyConsistent(T desired_value) {
-    return this->exchange(desired_value, std::memory_order_seq_cst);
-  }
-
-  // Atomically replace the value with desired_value.
-  T ExchangeAcquire(T desired_value) {
-    return this->exchange(desired_value, std::memory_order_acquire);
-  }
-
-  // Atomically replace the value with desired_value.
-  T ExchangeRelease(T desired_value) {
-    return this->exchange(desired_value, std::memory_order_release);
-  }
-
-  // Atomically replace the value with desired_value if it matches the expected_value.
-  // Participates in total ordering of atomic operations. Returns true on success, false otherwise.
-  // If the value does not match, updates the expected_value argument with the value that was
-  // atomically read for the failed comparison.
-  bool CompareAndExchangeStrongSequentiallyConsistent(T* expected_value, T desired_value) {
-    return this->compare_exchange_strong(*expected_value, desired_value, std::memory_order_seq_cst);
-  }
-
-  // Atomically replace the value with desired_value if it matches the expected_value.
-  // Participates in total ordering of atomic operations. Returns true on success, false otherwise.
-  // If the value does not match, updates the expected_value argument with the value that was
-  // atomically read for the failed comparison.
-  bool CompareAndExchangeStrongAcquire(T* expected_value, T desired_value) {
-    return this->compare_exchange_strong(*expected_value, desired_value, std::memory_order_acquire);
-  }
-
-  // Atomically replace the value with desired_value if it matches the expected_value.
-  // Participates in total ordering of atomic operations. Returns true on success, false otherwise.
-  // If the value does not match, updates the expected_value argument with the value that was
-  // atomically read for the failed comparison.
-  bool CompareAndExchangeStrongRelease(T* expected_value, T desired_value) {
-    return this->compare_exchange_strong(*expected_value, desired_value, std::memory_order_release);
-  }
-
   // Atomically replace the value with desired_value if it matches the expected_value.
   // Participates in total ordering of atomic operations.
   bool CompareAndSetStrongSequentiallyConsistent(T expected_value, T desired_value) {
@@ -166,66 +100,8 @@
     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_release);
   }
 
-  T FetchAndAddSequentiallyConsistent(const T value) {
-    return this->fetch_add(value, std::memory_order_seq_cst);  // Return old_value.
-  }
-
-  T FetchAndAddRelaxed(const T value) {
-    return this->fetch_add(value, std::memory_order_relaxed);  // Return old_value.
-  }
-
-  T FetchAndAddAcquire(const T value) {
-    return this->fetch_add(value, std::memory_order_acquire);  // Return old_value.
-  }
-
-  T FetchAndAddRelease(const T value) {
-    return this->fetch_add(value, std::memory_order_acquire);  // Return old_value.
-  }
-
-  T FetchAndSubSequentiallyConsistent(const T value) {
-    return this->fetch_sub(value, std::memory_order_seq_cst);  // Return old value.
-  }
-
-  T FetchAndSubRelaxed(const T value) {
-    return this->fetch_sub(value, std::memory_order_relaxed);  // Return old value.
-  }
-
-  T FetchAndBitwiseAndSequentiallyConsistent(const T value) {
-    return this->fetch_and(value, std::memory_order_seq_cst);  // Return old_value.
-  }
-
-  T FetchAndBitwiseAndAcquire(const T value) {
-    return this->fetch_and(value, std::memory_order_acquire);  // Return old_value.
-  }
-
-  T FetchAndBitwiseAndRelease(const T value) {
-    return this->fetch_and(value, std::memory_order_release);  // Return old_value.
-  }
-
-  T FetchAndBitwiseOrSequentiallyConsistent(const T value) {
-    return this->fetch_or(value, std::memory_order_seq_cst);  // Return old_value.
-  }
-
-  T FetchAndBitwiseOrAcquire(const T value) {
-    return this->fetch_or(value, std::memory_order_acquire);  // Return old_value.
-  }
-
-  T FetchAndBitwiseOrRelease(const T value) {
-    return this->fetch_or(value, std::memory_order_release);  // Return old_value.
-  }
-
-  T FetchAndBitwiseXorSequentiallyConsistent(const T value) {
-    return this->fetch_xor(value, std::memory_order_seq_cst);  // Return old_value.
-  }
-
-  T FetchAndBitwiseXorAcquire(const T value) {
-    return this->fetch_xor(value, std::memory_order_acquire);  // Return old_value.
-  }
-
-  T FetchAndBitwiseXorRelease(const T value) {
-    return this->fetch_xor(value, std::memory_order_release);  // Return old_value.
-  }
-
+  // Returns the address of the current atomic variable. This is only used by futex() which is
+  // declared to take a volatile address (see base/mutex-inl.h).
   volatile T* Address() {
     return reinterpret_cast<T*>(this);
   }
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index 04bb6ba..88075ba 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -69,18 +69,18 @@
     thread_pool.AddTask(self, new CheckWaitTask(&barrier, &count1, &count2));
   }
   thread_pool.StartWorkers(self);
-  while (count1.LoadRelaxed() != num_threads) {
+  while (count1.load(std::memory_order_relaxed) != num_threads) {
     timeout_barrier.Increment(self, 1, 100);  // sleep 100 msecs
   }
   // Count 2 should still be zero since no thread should have gone past the barrier.
-  EXPECT_EQ(0, count2.LoadRelaxed());
+  EXPECT_EQ(0, count2.load(std::memory_order_relaxed));
   // Perform one additional Wait(), allowing pool threads to proceed.
   barrier.Wait(self);
   // Wait for all the threads to finish.
   thread_pool.Wait(self, true, false);
   // Both counts should be equal to num_threads now.
-  EXPECT_EQ(count1.LoadRelaxed(), num_threads);
-  EXPECT_EQ(count2.LoadRelaxed(), num_threads);
+  EXPECT_EQ(count1.load(std::memory_order_relaxed), num_threads);
+  EXPECT_EQ(count2.load(std::memory_order_relaxed), num_threads);
   timeout_barrier.Init(self, 0);  // Reset to zero for destruction.
 }
 
@@ -124,7 +124,7 @@
   // Wait for all the tasks to complete using the barrier.
   barrier.Increment(self, expected_total_tasks);
   // The total number of completed tasks should be equal to expected_total_tasks.
-  EXPECT_EQ(count.LoadRelaxed(), expected_total_tasks);
+  EXPECT_EQ(count.load(std::memory_order_relaxed), expected_total_tasks);
 }
 
 }  // namespace art
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index d6dbab4..dfa14b9 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -161,7 +161,7 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_.LoadRelaxed();
+    int32_t cur_state = state_.load(std::memory_order_relaxed);
     if (LIKELY(cur_state >= 0)) {
       // Add as an extra reader.
       done = state_.CompareAndSetWeakAcquire(cur_state, cur_state + 1);
@@ -185,7 +185,7 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_.LoadRelaxed();
+    int32_t cur_state = state_.load(std::memory_order_relaxed);
     if (LIKELY(cur_state > 0)) {
       // Reduce state by 1 and impose lock release load/store ordering.
       // Note, the relaxed loads below musn't reorder before the CompareAndSet.
@@ -193,8 +193,8 @@
       // a status bit into the state on contention.
       done = state_.CompareAndSetWeakSequentiallyConsistent(cur_state, cur_state - 1);
       if (done && (cur_state - 1) == 0) {  // Weak CAS may fail spuriously.
-        if (num_pending_writers_.LoadRelaxed() > 0 ||
-            num_pending_readers_.LoadRelaxed() > 0) {
+        if (num_pending_writers_.load(std::memory_order_relaxed) > 0 ||
+            num_pending_readers_.load(std::memory_order_relaxed) > 0) {
           // Wake any exclusive waiters as there are now no readers.
           futex(state_.Address(), FUTEX_WAKE, -1, nullptr, nullptr, 0);
         }
@@ -221,7 +221,7 @@
 }
 
 inline pid_t Mutex::GetExclusiveOwnerTid() const {
-  return exclusive_owner_.LoadRelaxed();
+  return exclusive_owner_.load(std::memory_order_relaxed);
 }
 
 inline void Mutex::AssertExclusiveHeld(const Thread* self) const {
@@ -248,16 +248,16 @@
 
 inline pid_t ReaderWriterMutex::GetExclusiveOwnerTid() const {
 #if ART_USE_FUTEXES
-  int32_t state = state_.LoadRelaxed();
+  int32_t state = state_.load(std::memory_order_relaxed);
   if (state == 0) {
     return 0;  // No owner.
   } else if (state > 0) {
     return -1;  // Shared.
   } else {
-    return exclusive_owner_.LoadRelaxed();
+    return exclusive_owner_.load(std::memory_order_relaxed);
   }
 #else
-  return exclusive_owner_.LoadRelaxed();
+  return exclusive_owner_.load(std::memory_order_relaxed);
 #endif
 }
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index a1f30b6..73b4641 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -128,15 +128,15 @@
  public:
   explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
     for (uint32_t i = 0;
-         !gAllMutexData->all_mutexes_guard.CompareAndSetWeakAcquire(0, mutex);
+         !gAllMutexData->all_mutexes_guard.CompareAndSetWeakAcquire(nullptr, mutex);
          ++i) {
       BackOff(i);
     }
   }
 
   ~ScopedAllMutexesLock() {
-    DCHECK_EQ(gAllMutexData->all_mutexes_guard.LoadRelaxed(), mutex_);
-    gAllMutexData->all_mutexes_guard.StoreRelease(0);
+    DCHECK_EQ(gAllMutexData->all_mutexes_guard.load(std::memory_order_relaxed), mutex_);
+    gAllMutexData->all_mutexes_guard.store(nullptr, std::memory_order_release);
   }
 
  private:
@@ -147,15 +147,17 @@
  public:
   explicit ScopedExpectedMutexesOnWeakRefAccessLock(const BaseMutex* mutex) : mutex_(mutex) {
     for (uint32_t i = 0;
-         !Locks::expected_mutexes_on_weak_ref_access_guard_.CompareAndSetWeakAcquire(0, mutex);
+         !Locks::expected_mutexes_on_weak_ref_access_guard_.CompareAndSetWeakAcquire(nullptr,
+                                                                                     mutex);
          ++i) {
       BackOff(i);
     }
   }
 
   ~ScopedExpectedMutexesOnWeakRefAccessLock() {
-    DCHECK_EQ(Locks::expected_mutexes_on_weak_ref_access_guard_.LoadRelaxed(), mutex_);
-    Locks::expected_mutexes_on_weak_ref_access_guard_.StoreRelease(0);
+    DCHECK_EQ(Locks::expected_mutexes_on_weak_ref_access_guard_.load(std::memory_order_relaxed),
+              mutex_);
+    Locks::expected_mutexes_on_weak_ref_access_guard_.store(nullptr, std::memory_order_release);
   }
 
  private:
@@ -293,7 +295,7 @@
 void BaseMutex::ContentionLogData::AddToWaitTime(uint64_t value) {
   if (kLogLockContentions) {
     // Atomically add value to wait_time.
-    wait_time.FetchAndAddSequentiallyConsistent(value);
+    wait_time.fetch_add(value, std::memory_order_seq_cst);
   }
 }
 
@@ -306,19 +308,19 @@
     data->AddToWaitTime(nano_time_blocked);
     ContentionLogEntry* log = data->contention_log;
     // This code is intentionally racy as it is only used for diagnostics.
-    uint32_t slot = data->cur_content_log_entry.LoadRelaxed();
+    int32_t slot = data->cur_content_log_entry.load(std::memory_order_relaxed);
     if (log[slot].blocked_tid == blocked_tid &&
         log[slot].owner_tid == blocked_tid) {
       ++log[slot].count;
     } else {
       uint32_t new_slot;
       do {
-        slot = data->cur_content_log_entry.LoadRelaxed();
+        slot = data->cur_content_log_entry.load(std::memory_order_relaxed);
         new_slot = (slot + 1) % kContentionLogSize;
       } while (!data->cur_content_log_entry.CompareAndSetWeakRelaxed(slot, new_slot));
       log[new_slot].blocked_tid = blocked_tid;
       log[new_slot].owner_tid = owner_tid;
-      log[new_slot].count.StoreRelaxed(1);
+      log[new_slot].count.store(1, std::memory_order_relaxed);
     }
   }
 }
@@ -327,8 +329,8 @@
   if (kLogLockContentions) {
     const ContentionLogData* data = contention_log_data_;
     const ContentionLogEntry* log = data->contention_log;
-    uint64_t wait_time = data->wait_time.LoadRelaxed();
-    uint32_t contention_count = data->contention_count.LoadRelaxed();
+    uint64_t wait_time = data->wait_time.load(std::memory_order_relaxed);
+    uint32_t contention_count = data->contention_count.load(std::memory_order_relaxed);
     if (contention_count == 0) {
       os << "never contended";
     } else {
@@ -340,7 +342,7 @@
       for (size_t i = 0; i < kContentionLogSize; ++i) {
         uint64_t blocked_tid = log[i].blocked_tid;
         uint64_t owner_tid = log[i].owner_tid;
-        uint32_t count = log[i].count.LoadRelaxed();
+        uint32_t count = log[i].count.load(std::memory_order_relaxed);
         if (count > 0) {
           auto it = most_common_blocked.find(blocked_tid);
           if (it != most_common_blocked.end()) {
@@ -386,8 +388,8 @@
 Mutex::Mutex(const char* name, LockLevel level, bool recursive)
     : BaseMutex(name, level), exclusive_owner_(0), recursive_(recursive), recursion_count_(0) {
 #if ART_USE_FUTEXES
-  DCHECK_EQ(0, state_.LoadRelaxed());
-  DCHECK_EQ(0, num_contenders_.LoadRelaxed());
+  DCHECK_EQ(0, state_.load(std::memory_order_relaxed));
+  DCHECK_EQ(0, num_contenders_.load(std::memory_order_relaxed));
 #else
   CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, nullptr));
 #endif
@@ -402,7 +404,7 @@
 Mutex::~Mutex() {
   bool safe_to_call_abort = Locks::IsSafeToCallAbortRacy();
 #if ART_USE_FUTEXES
-  if (state_.LoadRelaxed() != 0) {
+  if (state_.load(std::memory_order_relaxed) != 0) {
     LOG(safe_to_call_abort ? FATAL : WARNING)
         << "destroying mutex with owner: " << GetExclusiveOwnerTid();
   } else {
@@ -410,7 +412,7 @@
       LOG(safe_to_call_abort ? FATAL : WARNING)
           << "unexpectedly found an owner on unlocked mutex " << name_;
     }
-    if (num_contenders_.LoadSequentiallyConsistent() != 0) {
+    if (num_contenders_.load(std::memory_order_seq_cst) != 0) {
       LOG(safe_to_call_abort ? FATAL : WARNING)
           << "unexpectedly found a contender on mutex " << name_;
     }
@@ -436,7 +438,7 @@
 #if ART_USE_FUTEXES
     bool done = false;
     do {
-      int32_t cur_state = state_.LoadRelaxed();
+      int32_t cur_state = state_.load(std::memory_order_relaxed);
       if (LIKELY(cur_state == 0)) {
         // Change state from 0 to 1 and impose load/store ordering appropriate for lock acquisition.
         done = state_.CompareAndSetWeakAcquire(0 /* cur_state */, 1 /* new state */);
@@ -457,12 +459,12 @@
         num_contenders_--;
       }
     } while (!done);
-    DCHECK_EQ(state_.LoadRelaxed(), 1);
+    DCHECK_EQ(state_.load(std::memory_order_relaxed), 1);
 #else
     CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
 #endif
     DCHECK_EQ(GetExclusiveOwnerTid(), 0);
-    exclusive_owner_.StoreRelaxed(SafeGetTid(self));
+    exclusive_owner_.store(SafeGetTid(self), std::memory_order_relaxed);
     RegisterAsLocked(self);
   }
   recursion_count_++;
@@ -482,7 +484,7 @@
 #if ART_USE_FUTEXES
     bool done = false;
     do {
-      int32_t cur_state = state_.LoadRelaxed();
+      int32_t cur_state = state_.load(std::memory_order_relaxed);
       if (cur_state == 0) {
         // Change state from 0 to 1 and impose load/store ordering appropriate for lock acquisition.
         done = state_.CompareAndSetWeakAcquire(0 /* cur_state */, 1 /* new state */);
@@ -490,7 +492,7 @@
         return false;
       }
     } while (!done);
-    DCHECK_EQ(state_.LoadRelaxed(), 1);
+    DCHECK_EQ(state_.load(std::memory_order_relaxed), 1);
 #else
     int result = pthread_mutex_trylock(&mutex_);
     if (result == EBUSY) {
@@ -502,7 +504,7 @@
     }
 #endif
     DCHECK_EQ(GetExclusiveOwnerTid(), 0);
-    exclusive_owner_.StoreRelaxed(SafeGetTid(self));
+    exclusive_owner_.store(SafeGetTid(self), std::memory_order_relaxed);
     RegisterAsLocked(self);
   }
   recursion_count_++;
@@ -539,10 +541,10 @@
 #if ART_USE_FUTEXES
     bool done = false;
     do {
-      int32_t cur_state = state_.LoadRelaxed();
+      int32_t cur_state = state_.load(std::memory_order_relaxed);
       if (LIKELY(cur_state == 1)) {
         // We're no longer the owner.
-        exclusive_owner_.StoreRelaxed(0);
+        exclusive_owner_.store(0 /* pid */, std::memory_order_relaxed);
         // Change state to 0 and impose load/store ordering appropriate for lock release.
         // Note, the relaxed loads below mustn't reorder before the CompareAndSet.
         // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
@@ -550,7 +552,7 @@
         done = state_.CompareAndSetWeakSequentiallyConsistent(cur_state, 0 /* new state */);
         if (LIKELY(done)) {  // Spurious fail?
           // Wake a contender.
-          if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
+          if (UNLIKELY(num_contenders_.load(std::memory_order_relaxed) > 0)) {
             futex(state_.Address(), FUTEX_WAKE, 1, nullptr, nullptr, 0);
           }
         }
@@ -569,7 +571,7 @@
       }
     } while (!done);
 #else
-    exclusive_owner_.StoreRelaxed(0);
+    exclusive_owner_.store(0 /* pid */, std::memory_order_relaxed);
     CHECK_MUTEX_CALL(pthread_mutex_unlock, (&mutex_));
 #endif
   }
@@ -593,7 +595,7 @@
 #if ART_USE_FUTEXES
   // Wake up all the waiters so they will respond to the emtpy checkpoint.
   DCHECK(should_respond_to_empty_checkpoint_request_);
-  if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
+  if (UNLIKELY(num_contenders_.load(std::memory_order_relaxed) > 0)) {
     futex(state_.Address(), FUTEX_WAKE, -1, nullptr, nullptr, 0);
   }
 #else
@@ -610,15 +612,15 @@
 #if !ART_USE_FUTEXES
   CHECK_MUTEX_CALL(pthread_rwlock_init, (&rwlock_, nullptr));
 #endif
-  exclusive_owner_.StoreRelaxed(0);
+  exclusive_owner_.store(0 /* pid */, std::memory_order_relaxed);
 }
 
 ReaderWriterMutex::~ReaderWriterMutex() {
 #if ART_USE_FUTEXES
-  CHECK_EQ(state_.LoadRelaxed(), 0);
+  CHECK_EQ(state_.load(std::memory_order_relaxed), 0);
   CHECK_EQ(GetExclusiveOwnerTid(), 0);
-  CHECK_EQ(num_pending_readers_.LoadRelaxed(), 0);
-  CHECK_EQ(num_pending_writers_.LoadRelaxed(), 0);
+  CHECK_EQ(num_pending_readers_.load(std::memory_order_relaxed), 0);
+  CHECK_EQ(num_pending_writers_.load(std::memory_order_relaxed), 0);
 #else
   // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
   // may still be using locks.
@@ -637,7 +639,7 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_.LoadRelaxed();
+    int32_t cur_state = state_.load(std::memory_order_relaxed);
     if (LIKELY(cur_state == 0)) {
       // Change state from 0 to -1 and impose load/store ordering appropriate for lock acquisition.
       done = state_.CompareAndSetWeakAcquire(0 /* cur_state*/, -1 /* new state */);
@@ -658,12 +660,12 @@
       --num_pending_writers_;
     }
   } while (!done);
-  DCHECK_EQ(state_.LoadRelaxed(), -1);
+  DCHECK_EQ(state_.load(std::memory_order_relaxed), -1);
 #else
   CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
 #endif
   DCHECK_EQ(GetExclusiveOwnerTid(), 0);
-  exclusive_owner_.StoreRelaxed(SafeGetTid(self));
+  exclusive_owner_.store(SafeGetTid(self), std::memory_order_relaxed);
   RegisterAsLocked(self);
   AssertExclusiveHeld(self);
 }
@@ -676,10 +678,10 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_.LoadRelaxed();
+    int32_t cur_state = state_.load(std::memory_order_relaxed);
     if (LIKELY(cur_state == -1)) {
       // We're no longer the owner.
-      exclusive_owner_.StoreRelaxed(0);
+      exclusive_owner_.store(0 /* pid */, std::memory_order_relaxed);
       // Change state from -1 to 0 and impose load/store ordering appropriate for lock release.
       // Note, the relaxed loads below musn't reorder before the CompareAndSet.
       // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
@@ -687,8 +689,8 @@
       done = state_.CompareAndSetWeakSequentiallyConsistent(-1 /* cur_state*/, 0 /* new state */);
       if (LIKELY(done)) {  // Weak CAS may fail spuriously.
         // Wake any waiters.
-        if (UNLIKELY(num_pending_readers_.LoadRelaxed() > 0 ||
-                     num_pending_writers_.LoadRelaxed() > 0)) {
+        if (UNLIKELY(num_pending_readers_.load(std::memory_order_relaxed) > 0 ||
+                     num_pending_writers_.load(std::memory_order_relaxed) > 0)) {
           futex(state_.Address(), FUTEX_WAKE, -1, nullptr, nullptr, 0);
         }
       }
@@ -697,7 +699,7 @@
     }
   } while (!done);
 #else
-  exclusive_owner_.StoreRelaxed(0);
+  exclusive_owner_.store(0 /* pid */, std::memory_order_relaxed);
   CHECK_MUTEX_CALL(pthread_rwlock_unlock, (&rwlock_));
 #endif
 }
@@ -710,7 +712,7 @@
   timespec end_abs_ts;
   InitTimeSpec(true, CLOCK_MONOTONIC, ms, ns, &end_abs_ts);
   do {
-    int32_t cur_state = state_.LoadRelaxed();
+    int32_t cur_state = state_.load(std::memory_order_relaxed);
     if (cur_state == 0) {
       // Change state from 0 to -1 and impose load/store ordering appropriate for lock acquisition.
       done = state_.CompareAndSetWeakAcquire(0 /* cur_state */, -1 /* new state */);
@@ -753,7 +755,7 @@
     PLOG(FATAL) << "pthread_rwlock_timedwrlock failed for " << name_;
   }
 #endif
-  exclusive_owner_.StoreRelaxed(SafeGetTid(self));
+  exclusive_owner_.store(SafeGetTid(self), std::memory_order_relaxed);
   RegisterAsLocked(self);
   AssertSharedHeld(self);
   return true;
@@ -782,7 +784,7 @@
 #if ART_USE_FUTEXES
   bool done = false;
   do {
-    int32_t cur_state = state_.LoadRelaxed();
+    int32_t cur_state = state_.load(std::memory_order_relaxed);
     if (cur_state >= 0) {
       // Add as an extra reader and impose load/store ordering appropriate for lock acquisition.
       done = state_.CompareAndSetWeakAcquire(cur_state, cur_state + 1);
@@ -822,9 +824,9 @@
       << " level=" << static_cast<int>(level_)
       << " owner=" << GetExclusiveOwnerTid()
 #if ART_USE_FUTEXES
-      << " state=" << state_.LoadSequentiallyConsistent()
-      << " num_pending_writers=" << num_pending_writers_.LoadSequentiallyConsistent()
-      << " num_pending_readers=" << num_pending_readers_.LoadSequentiallyConsistent()
+      << " state=" << state_.load(std::memory_order_seq_cst)
+      << " num_pending_writers=" << num_pending_writers_.load(std::memory_order_seq_cst)
+      << " num_pending_readers=" << num_pending_readers_.load(std::memory_order_seq_cst)
 #endif
       << " ";
   DumpContention(os);
@@ -844,8 +846,8 @@
 #if ART_USE_FUTEXES
   // Wake up all the waiters so they will respond to the emtpy checkpoint.
   DCHECK(should_respond_to_empty_checkpoint_request_);
-  if (UNLIKELY(num_pending_readers_.LoadRelaxed() > 0 ||
-               num_pending_writers_.LoadRelaxed() > 0)) {
+  if (UNLIKELY(num_pending_readers_.load(std::memory_order_relaxed) > 0 ||
+               num_pending_writers_.load(std::memory_order_relaxed) > 0)) {
     futex(state_.Address(), FUTEX_WAKE, -1, nullptr, nullptr, 0);
   }
 #else
@@ -856,7 +858,7 @@
 ConditionVariable::ConditionVariable(const char* name, Mutex& guard)
     : name_(name), guard_(guard) {
 #if ART_USE_FUTEXES
-  DCHECK_EQ(0, sequence_.LoadRelaxed());
+  DCHECK_EQ(0, sequence_.load(std::memory_order_relaxed));
   num_waiters_ = 0;
 #else
   pthread_condattr_t cond_attrs;
@@ -899,7 +901,7 @@
     sequence_++;  // Indicate the broadcast occurred.
     bool done = false;
     do {
-      int32_t cur_sequence = sequence_.LoadRelaxed();
+      int32_t cur_sequence = sequence_.load(std::memory_order_relaxed);
       // Requeue waiters onto mutex. The waiter holds the contender count on the mutex high ensuring
       // mutex unlocks will awaken the requeued waiter thread.
       done = futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0,
@@ -948,7 +950,7 @@
   // Ensure the Mutex is contended so that requeued threads are awoken.
   guard_.num_contenders_++;
   guard_.recursion_count_ = 1;
-  int32_t cur_sequence = sequence_.LoadRelaxed();
+  int32_t cur_sequence = sequence_.load(std::memory_order_relaxed);
   guard_.ExclusiveUnlock(self);
   if (futex(sequence_.Address(), FUTEX_WAIT, cur_sequence, nullptr, nullptr, 0) != 0) {
     // Futex failed, check it is an expected error.
@@ -974,14 +976,14 @@
   CHECK_GE(num_waiters_, 0);
   num_waiters_--;
   // We awoke and so no longer require awakes from the guard_'s unlock.
-  CHECK_GE(guard_.num_contenders_.LoadRelaxed(), 0);
+  CHECK_GE(guard_.num_contenders_.load(std::memory_order_relaxed), 0);
   guard_.num_contenders_--;
 #else
   pid_t old_owner = guard_.GetExclusiveOwnerTid();
-  guard_.exclusive_owner_.StoreRelaxed(0);
+  guard_.exclusive_owner_.store(0 /* pid */, std::memory_order_relaxed);
   guard_.recursion_count_ = 0;
   CHECK_MUTEX_CALL(pthread_cond_wait, (&cond_, &guard_.mutex_));
-  guard_.exclusive_owner_.StoreRelaxed(old_owner);
+  guard_.exclusive_owner_.store(old_owner, std::memory_order_relaxed);
 #endif
   guard_.recursion_count_ = old_recursion_count;
 }
@@ -999,7 +1001,7 @@
   // Ensure the Mutex is contended so that requeued threads are awoken.
   guard_.num_contenders_++;
   guard_.recursion_count_ = 1;
-  int32_t cur_sequence = sequence_.LoadRelaxed();
+  int32_t cur_sequence = sequence_.load(std::memory_order_relaxed);
   guard_.ExclusiveUnlock(self);
   if (futex(sequence_.Address(), FUTEX_WAIT, cur_sequence, &rel_ts, nullptr, 0) != 0) {
     if (errno == ETIMEDOUT) {
@@ -1015,7 +1017,7 @@
   CHECK_GE(num_waiters_, 0);
   num_waiters_--;
   // We awoke and so no longer require awakes from the guard_'s unlock.
-  CHECK_GE(guard_.num_contenders_.LoadRelaxed(), 0);
+  CHECK_GE(guard_.num_contenders_.load(std::memory_order_relaxed), 0);
   guard_.num_contenders_--;
 #else
 #if !defined(__APPLE__)
@@ -1024,7 +1026,7 @@
   int clock = CLOCK_REALTIME;
 #endif
   pid_t old_owner = guard_.GetExclusiveOwnerTid();
-  guard_.exclusive_owner_.StoreRelaxed(0);
+  guard_.exclusive_owner_.store(0 /* pid */, std::memory_order_relaxed);
   guard_.recursion_count_ = 0;
   timespec ts;
   InitTimeSpec(true, clock, ms, ns, &ts);
@@ -1035,7 +1037,7 @@
     errno = rc;
     PLOG(FATAL) << "TimedWait failed for " << name_;
   }
-  guard_.exclusive_owner_.StoreRelaxed(old_owner);
+  guard_.exclusive_owner_.store(old_owner, std::memory_order_relaxed);
 #endif
   guard_.recursion_count_ = old_recursion_count;
   return timed_out;
@@ -1254,12 +1256,13 @@
 }
 
 void Locks::SetClientCallback(ClientCallback* safe_to_call_abort_cb) {
-  safe_to_call_abort_callback.StoreRelease(safe_to_call_abort_cb);
+  safe_to_call_abort_callback.store(safe_to_call_abort_cb, std::memory_order_release);
 }
 
 // Helper to allow checking shutdown while ignoring locking requirements.
 bool Locks::IsSafeToCallAbortRacy() {
-  Locks::ClientCallback* safe_to_call_abort_cb = safe_to_call_abort_callback.LoadAcquire();
+  Locks::ClientCallback* safe_to_call_abort_cb =
+      safe_to_call_abort_callback.load(std::memory_order_acquire);
   return safe_to_call_abort_cb != nullptr && safe_to_call_abort_cb();
 }
 
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 4376617..b0eb23d 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -224,7 +224,7 @@
  public:
   bool HasEverContended() const {
     if (kLogLockContentions) {
-      return contention_log_data_->contention_count.LoadSequentiallyConsistent() > 0;
+      return contention_log_data_->contention_count.load(std::memory_order_seq_cst) > 0;
     }
     return false;
   }
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index c59e2e8..5da5470 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -88,7 +88,7 @@
 
 template<ReadBarrierOption kReadBarrierOption>
 inline mirror::Class* ClassTable::TableSlot::Read() const {
-  const uint32_t before = data_.LoadRelaxed();
+  const uint32_t before = data_.load(std::memory_order_relaxed);
   ObjPtr<mirror::Class> const before_ptr(ExtractPtr(before));
   ObjPtr<mirror::Class> const after_ptr(
       GcRoot<mirror::Class>(before_ptr).Read<kReadBarrierOption>());
@@ -102,7 +102,7 @@
 
 template<typename Visitor>
 inline void ClassTable::TableSlot::VisitRoot(const Visitor& visitor) const {
-  const uint32_t before = data_.LoadRelaxed();
+  const uint32_t before = data_.load(std::memory_order_relaxed);
   ObjPtr<mirror::Class> before_ptr(ExtractPtr(before));
   GcRoot<mirror::Class> root(before_ptr);
   visitor.VisitRoot(root.AddressWithoutBarrier());
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 3e90fe2..0b08041 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -53,14 +53,14 @@
    public:
     TableSlot() : data_(0u) {}
 
-    TableSlot(const TableSlot& copy) : data_(copy.data_.LoadRelaxed()) {}
+    TableSlot(const TableSlot& copy) : data_(copy.data_.load(std::memory_order_relaxed)) {}
 
     explicit TableSlot(ObjPtr<mirror::Class> klass);
 
     TableSlot(ObjPtr<mirror::Class> klass, uint32_t descriptor_hash);
 
     TableSlot& operator=(const TableSlot& copy) {
-      data_.StoreRelaxed(copy.data_.LoadRelaxed());
+      data_.store(copy.data_.load(std::memory_order_relaxed), std::memory_order_relaxed);
       return *this;
     }
 
@@ -69,7 +69,7 @@
     }
 
     uint32_t Hash() const {
-      return MaskHash(data_.LoadRelaxed());
+      return MaskHash(data_.load(std::memory_order_relaxed));
     }
 
     static uint32_t MaskHash(uint32_t hash) {
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index 6b103bf..7a4bd87 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -74,8 +74,8 @@
   void Reset() {
     DCHECK(mem_map_.get() != nullptr);
     DCHECK(begin_ != nullptr);
-    front_index_.StoreRelaxed(0);
-    back_index_.StoreRelaxed(0);
+    front_index_.store(0, std::memory_order_relaxed);
+    back_index_.store(0, std::memory_order_relaxed);
     debug_is_sorted_ = true;
     mem_map_->MadviseDontNeedAndZero();
   }
@@ -103,7 +103,7 @@
     int32_t index;
     int32_t new_index;
     do {
-      index = back_index_.LoadRelaxed();
+      index = back_index_.load(std::memory_order_relaxed);
       new_index = index + num_slots;
       if (UNLIKELY(static_cast<size_t>(new_index) >= growth_limit_)) {
         // Stack overflow.
@@ -134,31 +134,32 @@
     if (kIsDebugBuild) {
       debug_is_sorted_ = false;
     }
-    const int32_t index = back_index_.LoadRelaxed();
+    const int32_t index = back_index_.load(std::memory_order_relaxed);
     DCHECK_LT(static_cast<size_t>(index), growth_limit_);
-    back_index_.StoreRelaxed(index + 1);
+    back_index_.store(index + 1, std::memory_order_relaxed);
     begin_[index].Assign(value);
   }
 
   T* PopBack() REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK_GT(back_index_.LoadRelaxed(), front_index_.LoadRelaxed());
+    DCHECK_GT(back_index_.load(std::memory_order_relaxed),
+              front_index_.load(std::memory_order_relaxed));
     // Decrement the back index non atomically.
-    back_index_.StoreRelaxed(back_index_.LoadRelaxed() - 1);
-    return begin_[back_index_.LoadRelaxed()].AsMirrorPtr();
+    back_index_.store(back_index_.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed);
+    return begin_[back_index_.load(std::memory_order_relaxed)].AsMirrorPtr();
   }
 
   // Take an item from the front of the stack.
   T PopFront() {
-    int32_t index = front_index_.LoadRelaxed();
-    DCHECK_LT(index, back_index_.LoadRelaxed());
-    front_index_.StoreRelaxed(index + 1);
+    int32_t index = front_index_.load(std::memory_order_relaxed);
+    DCHECK_LT(index, back_index_.load(std::memory_order_relaxed));
+    front_index_.store(index + 1, std::memory_order_relaxed);
     return begin_[index];
   }
 
   // Pop a number of elements.
   void PopBackCount(int32_t n) {
     DCHECK_GE(Size(), static_cast<size_t>(n));
-    back_index_.StoreRelaxed(back_index_.LoadRelaxed() - n);
+    back_index_.store(back_index_.load(std::memory_order_relaxed) - n, std::memory_order_relaxed);
   }
 
   bool IsEmpty() const {
@@ -170,15 +171,17 @@
   }
 
   size_t Size() const {
-    DCHECK_LE(front_index_.LoadRelaxed(), back_index_.LoadRelaxed());
-    return back_index_.LoadRelaxed() - front_index_.LoadRelaxed();
+    DCHECK_LE(front_index_.load(std::memory_order_relaxed),
+              back_index_.load(std::memory_order_relaxed));
+    return
+        back_index_.load(std::memory_order_relaxed) - front_index_.load(std::memory_order_relaxed);
   }
 
   StackReference<T>* Begin() const {
-    return begin_ + front_index_.LoadRelaxed();
+    return begin_ + front_index_.load(std::memory_order_relaxed);
   }
   StackReference<T>* End() const {
-    return begin_ + back_index_.LoadRelaxed();
+    return begin_ + back_index_.load(std::memory_order_relaxed);
   }
 
   size_t Capacity() const {
@@ -193,11 +196,11 @@
   }
 
   void Sort() {
-    int32_t start_back_index = back_index_.LoadRelaxed();
-    int32_t start_front_index = front_index_.LoadRelaxed();
+    int32_t start_back_index = back_index_.load(std::memory_order_relaxed);
+    int32_t start_front_index = front_index_.load(std::memory_order_relaxed);
     std::sort(Begin(), End(), ObjectComparator());
-    CHECK_EQ(start_back_index, back_index_.LoadRelaxed());
-    CHECK_EQ(start_front_index, front_index_.LoadRelaxed());
+    CHECK_EQ(start_back_index, back_index_.load(std::memory_order_relaxed));
+    CHECK_EQ(start_front_index, front_index_.load(std::memory_order_relaxed));
     if (kIsDebugBuild) {
       debug_is_sorted_ = true;
     }
@@ -236,7 +239,7 @@
     }
     int32_t index;
     do {
-      index = back_index_.LoadRelaxed();
+      index = back_index_.load(std::memory_order_relaxed);
       if (UNLIKELY(static_cast<size_t>(index) >= limit)) {
         // Stack overflow.
         return false;
diff --git a/runtime/gc/accounting/bitmap-inl.h b/runtime/gc/accounting/bitmap-inl.h
index a71b212..a4273e5 100644
--- a/runtime/gc/accounting/bitmap-inl.h
+++ b/runtime/gc/accounting/bitmap-inl.h
@@ -37,7 +37,7 @@
   auto* atomic_entry = reinterpret_cast<Atomic<uintptr_t>*>(&bitmap_begin_[word_index]);
   uintptr_t old_word;
   do {
-    old_word = atomic_entry->LoadRelaxed();
+    old_word = atomic_entry->load(std::memory_order_relaxed);
     // Fast path: The bit is already set.
     if ((old_word & word_mask) != 0) {
       DCHECK(TestBit(bit_index));
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index 14f5d0e..d9c0418 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -43,7 +43,7 @@
   Atomic<uintptr_t>* word_atomic = reinterpret_cast<Atomic<uintptr_t>*>(address);
 
   // Word with the byte we are trying to cas cleared.
-  const uintptr_t cur_word = word_atomic->LoadRelaxed() &
+  const uintptr_t cur_word = word_atomic->load(std::memory_order_relaxed) &
       ~(static_cast<uintptr_t>(0xFF) << shift_in_bits);
   const uintptr_t old_word = cur_word | (static_cast<uintptr_t>(old_value) << shift_in_bits);
   const uintptr_t new_word = cur_word | (static_cast<uintptr_t>(new_value) << shift_in_bits);
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 384e3c2..d460e00 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -41,7 +41,7 @@
   DCHECK_LT(index, bitmap_size_ / sizeof(intptr_t)) << " bitmap_size_ = " << bitmap_size_;
   uintptr_t old_word;
   do {
-    old_word = atomic_entry->LoadRelaxed();
+    old_word = atomic_entry->load(std::memory_order_relaxed);
     // Fast path: The bit is already set.
     if ((old_word & mask) != 0) {
       DCHECK(Test(obj));
@@ -59,7 +59,8 @@
   DCHECK(bitmap_begin_ != nullptr);
   DCHECK_GE(addr, heap_begin_);
   const uintptr_t offset = addr - heap_begin_;
-  return (bitmap_begin_[OffsetToIndex(offset)].LoadRelaxed() & OffsetToMask(offset)) != 0;
+  size_t index = OffsetToIndex(offset);
+  return (bitmap_begin_[index].load(std::memory_order_relaxed) & OffsetToMask(offset)) != 0;
 }
 
 template<size_t kAlignment>
@@ -119,7 +120,7 @@
 
     // Traverse the middle, full part.
     for (size_t i = index_start + 1; i < index_end; ++i) {
-      uintptr_t w = bitmap_begin_[i].LoadRelaxed();
+      uintptr_t w = bitmap_begin_[i].load(std::memory_order_relaxed);
       if (w != 0) {
         const uintptr_t ptr_base = IndexToOffset(i) + heap_begin_;
         // Iterate on the bits set in word `w`, from the least to the most significant bit.
@@ -168,7 +169,7 @@
   uintptr_t end = OffsetToIndex(HeapLimit() - heap_begin_ - 1);
   Atomic<uintptr_t>* bitmap_begin = bitmap_begin_;
   for (uintptr_t i = 0; i <= end; ++i) {
-    uintptr_t w = bitmap_begin[i].LoadRelaxed();
+    uintptr_t w = bitmap_begin[i].load(std::memory_order_relaxed);
     if (w != 0) {
       uintptr_t ptr_base = IndexToOffset(i) + heap_begin_;
       do {
@@ -192,7 +193,7 @@
   const uintptr_t mask = OffsetToMask(offset);
   DCHECK_LT(index, bitmap_size_ / sizeof(intptr_t)) << " bitmap_size_ = " << bitmap_size_;
   Atomic<uintptr_t>* atomic_entry = &bitmap_begin_[index];
-  uintptr_t old_word = atomic_entry->LoadRelaxed();
+  uintptr_t old_word = atomic_entry->load(std::memory_order_relaxed);
   if (kSetBit) {
     // Check the bit before setting the word incase we are trying to mark a read only bitmap
     // like an image space bitmap. This bitmap is mapped as read only and will fault if we
@@ -200,10 +201,10 @@
     // occur if we check before setting the bit. This also prevents dirty pages that would
     // occur if the bitmap was read write and we did not check the bit.
     if ((old_word & mask) == 0) {
-      atomic_entry->StoreRelaxed(old_word | mask);
+      atomic_entry->store(old_word | mask, std::memory_order_relaxed);
     }
   } else {
-    atomic_entry->StoreRelaxed(old_word & ~mask);
+    atomic_entry->store(old_word & ~mask, std::memory_order_relaxed);
   }
   DCHECK_EQ(Test(obj), kSetBit);
   return (old_word & mask) != 0;
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 0247564..d84288f 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -145,7 +145,7 @@
   Atomic<uintptr_t>* const src = source_bitmap->Begin();
   Atomic<uintptr_t>* const dest = Begin();
   for (size_t i = 0; i < count; ++i) {
-    dest[i].StoreRelaxed(src[i].LoadRelaxed());
+    dest[i].store(src[i].load(std::memory_order_relaxed), std::memory_order_relaxed);
   }
 }
 
@@ -184,7 +184,8 @@
   Atomic<uintptr_t>* live = live_bitmap.bitmap_begin_;
   Atomic<uintptr_t>* mark = mark_bitmap.bitmap_begin_;
   for (size_t i = start; i <= end; i++) {
-    uintptr_t garbage = live[i].LoadRelaxed() & ~mark[i].LoadRelaxed();
+    uintptr_t garbage =
+        live[i].load(std::memory_order_relaxed) & ~mark[i].load(std::memory_order_relaxed);
     if (UNLIKELY(garbage != 0)) {
       uintptr_t ptr_base = IndexToOffset(i) + live_bitmap.heap_begin_;
       do {
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 56983be..6e345fb 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -78,13 +78,13 @@
     if (kIsDebugBuild) {
       if (Thread::Current() == thread_running_gc_) {
         DCHECK(!kGrayImmuneObject ||
-               updated_all_immune_objects_.LoadRelaxed() ||
+               updated_all_immune_objects_.load(std::memory_order_relaxed) ||
                gc_grays_immune_objects_);
       } else {
         DCHECK(kGrayImmuneObject);
       }
     }
-    if (!kGrayImmuneObject || updated_all_immune_objects_.LoadRelaxed()) {
+    if (!kGrayImmuneObject || updated_all_immune_objects_.load(std::memory_order_relaxed)) {
       return ref;
     }
     // This may or may not succeed, which is ok because the object may already be gray.
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index b10c504..bb5167f 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -291,14 +291,14 @@
   rb_mark_bit_stack_full_ = false;
   mark_from_read_barrier_measurements_ = measure_read_barrier_slow_path_;
   if (measure_read_barrier_slow_path_) {
-    rb_slow_path_ns_.StoreRelaxed(0);
-    rb_slow_path_count_.StoreRelaxed(0);
-    rb_slow_path_count_gc_.StoreRelaxed(0);
+    rb_slow_path_ns_.store(0, std::memory_order_relaxed);
+    rb_slow_path_count_.store(0, std::memory_order_relaxed);
+    rb_slow_path_count_gc_.store(0, std::memory_order_relaxed);
   }
 
   immune_spaces_.Reset();
-  bytes_moved_.StoreRelaxed(0);
-  objects_moved_.StoreRelaxed(0);
+  bytes_moved_.store(0, std::memory_order_relaxed);
+  objects_moved_.store(0, std::memory_order_relaxed);
   GcCause gc_cause = GetCurrentIteration()->GetGcCause();
   if (gc_cause == kGcCauseExplicit ||
       gc_cause == kGcCauseCollectorTransition ||
@@ -308,7 +308,7 @@
     force_evacuate_all_ = false;
   }
   if (kUseBakerReadBarrier) {
-    updated_all_immune_objects_.StoreRelaxed(false);
+    updated_all_immune_objects_.store(false, std::memory_order_relaxed);
     // GC may gray immune objects in the thread flip.
     gc_grays_immune_objects_ = true;
     if (kIsDebugBuild) {
@@ -350,7 +350,7 @@
         concurrent_copying_->region_space_->RevokeThreadLocalBuffers(thread);
         reinterpret_cast<Atomic<size_t>*>(
             &concurrent_copying_->from_space_num_objects_at_first_pause_)->
-                FetchAndAddSequentiallyConsistent(thread_local_objects);
+                fetch_add(thread_local_objects, std::memory_order_seq_cst);
       } else {
         concurrent_copying_->region_space_->RevokeThreadLocalBuffers(thread);
       }
@@ -430,7 +430,8 @@
       cc->from_space_num_bytes_at_first_pause_ = cc->region_space_->GetBytesAllocated();
     }
     cc->is_marking_ = true;
-    cc->mark_stack_mode_.StoreRelaxed(ConcurrentCopying::kMarkStackModeThreadLocal);
+    cc->mark_stack_mode_.store(ConcurrentCopying::kMarkStackModeThreadLocal,
+                               std::memory_order_relaxed);
     if (kIsDebugBuild) {
       cc->region_space_->AssertAllRegionLiveBytesZeroOrCleared();
     }
@@ -728,7 +729,7 @@
   }
   // Since all of the objects that may point to other spaces are gray, we can avoid all the read
   // barriers in the immune spaces.
-  updated_all_immune_objects_.StoreRelaxed(true);
+  updated_all_immune_objects_.store(true, std::memory_order_relaxed);
 }
 
 void ConcurrentCopying::SwapStacks() {
@@ -816,7 +817,7 @@
   if (kUseBakerReadBarrier) {
     // This release fence makes the field updates in the above loop visible before allowing mutator
     // getting access to immune objects without graying it first.
-    updated_all_immune_objects_.StoreRelease(true);
+    updated_all_immune_objects_.store(true, std::memory_order_release);
     // Now whiten immune objects concurrently accessed and grayed by mutators. We can't do this in
     // the above loop because we would incorrectly disable the read barrier by whitening an object
     // which may point to an unscanned, white object, breaking the to-space invariant.
@@ -1018,8 +1019,8 @@
     heap_->rb_table_->ClearAll();
     DCHECK(heap_->rb_table_->IsAllCleared());
   }
-  is_mark_stack_push_disallowed_.StoreSequentiallyConsistent(1);
-  mark_stack_mode_.StoreSequentiallyConsistent(kMarkStackModeOff);
+  is_mark_stack_push_disallowed_.store(1, std::memory_order_seq_cst);
+  mark_stack_mode_.store(kMarkStackModeOff, std::memory_order_seq_cst);
 }
 
 void ConcurrentCopying::PushOntoFalseGrayStack(mirror::Object* ref) {
@@ -1069,11 +1070,11 @@
 }
 
 void ConcurrentCopying::PushOntoMarkStack(mirror::Object* to_ref) {
-  CHECK_EQ(is_mark_stack_push_disallowed_.LoadRelaxed(), 0)
+  CHECK_EQ(is_mark_stack_push_disallowed_.load(std::memory_order_relaxed), 0)
       << " " << to_ref << " " << mirror::Object::PrettyTypeOf(to_ref);
   Thread* self = Thread::Current();  // TODO: pass self as an argument from call sites?
   CHECK(thread_running_gc_ != nullptr);
-  MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed();
+  MarkStackMode mark_stack_mode = mark_stack_mode_.load(std::memory_order_relaxed);
   if (LIKELY(mark_stack_mode == kMarkStackModeThreadLocal)) {
     if (LIKELY(self == thread_running_gc_)) {
       // If GC-running thread, use the GC mark stack instead of a thread-local mark stack.
@@ -1412,7 +1413,7 @@
   CHECK(self == thread_running_gc_);
   CHECK(self->GetThreadLocalMarkStack() == nullptr);
   size_t count = 0;
-  MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed();
+  MarkStackMode mark_stack_mode = mark_stack_mode_.load(std::memory_order_relaxed);
   if (mark_stack_mode == kMarkStackModeThreadLocal) {
     // Process the thread-local mark stacks and the GC mark stack.
     count += ProcessThreadLocalMarkStacks(/* disable_weak_ref_access */ false,
@@ -1597,10 +1598,10 @@
   CHECK(thread_running_gc_ != nullptr);
   CHECK_EQ(self, thread_running_gc_);
   CHECK(self->GetThreadLocalMarkStack() == nullptr);
-  MarkStackMode before_mark_stack_mode = mark_stack_mode_.LoadRelaxed();
+  MarkStackMode before_mark_stack_mode = mark_stack_mode_.load(std::memory_order_relaxed);
   CHECK_EQ(static_cast<uint32_t>(before_mark_stack_mode),
            static_cast<uint32_t>(kMarkStackModeThreadLocal));
-  mark_stack_mode_.StoreRelaxed(kMarkStackModeShared);
+  mark_stack_mode_.store(kMarkStackModeShared, std::memory_order_relaxed);
   DisableWeakRefAccessCallback dwrac(this);
   // Process the thread local mark stacks one last time after switching to the shared mark stack
   // mode and disable weak ref accesses.
@@ -1615,10 +1616,10 @@
   CHECK(thread_running_gc_ != nullptr);
   CHECK_EQ(self, thread_running_gc_);
   CHECK(self->GetThreadLocalMarkStack() == nullptr);
-  MarkStackMode before_mark_stack_mode = mark_stack_mode_.LoadRelaxed();
+  MarkStackMode before_mark_stack_mode = mark_stack_mode_.load(std::memory_order_relaxed);
   CHECK_EQ(static_cast<uint32_t>(before_mark_stack_mode),
            static_cast<uint32_t>(kMarkStackModeShared));
-  mark_stack_mode_.StoreRelaxed(kMarkStackModeGcExclusive);
+  mark_stack_mode_.store(kMarkStackModeGcExclusive, std::memory_order_relaxed);
   QuasiAtomic::ThreadFenceForConstructor();
   if (kVerboseMode) {
     LOG(INFO) << "Switched to GC exclusive mark stack mode";
@@ -1630,7 +1631,7 @@
   CHECK(thread_running_gc_ != nullptr);
   CHECK_EQ(self, thread_running_gc_);
   CHECK(self->GetThreadLocalMarkStack() == nullptr);
-  MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed();
+  MarkStackMode mark_stack_mode = mark_stack_mode_.load(std::memory_order_relaxed);
   if (mark_stack_mode == kMarkStackModeThreadLocal) {
     // Thread-local mark stack mode.
     RevokeThreadLocalMarkStacks(false, nullptr);
@@ -1738,9 +1739,9 @@
     }
     IssueEmptyCheckpoint();
     // Disable the check.
-    is_mark_stack_push_disallowed_.StoreSequentiallyConsistent(0);
+    is_mark_stack_push_disallowed_.store(0, std::memory_order_seq_cst);
     if (kUseBakerReadBarrier) {
-      updated_all_immune_objects_.StoreSequentiallyConsistent(false);
+      updated_all_immune_objects_.store(false, std::memory_order_seq_cst);
     }
     CheckEmptyMarkStack();
   }
@@ -1753,10 +1754,10 @@
     const uint64_t from_objects = region_space_->GetObjectsAllocatedInFromSpace();
     const uint64_t unevac_from_bytes = region_space_->GetBytesAllocatedInUnevacFromSpace();
     const uint64_t unevac_from_objects = region_space_->GetObjectsAllocatedInUnevacFromSpace();
-    uint64_t to_bytes = bytes_moved_.LoadSequentiallyConsistent();
-    cumulative_bytes_moved_.FetchAndAddRelaxed(to_bytes);
-    uint64_t to_objects = objects_moved_.LoadSequentiallyConsistent();
-    cumulative_objects_moved_.FetchAndAddRelaxed(to_objects);
+    uint64_t to_bytes = bytes_moved_.load(std::memory_order_seq_cst);
+    cumulative_bytes_moved_.fetch_add(to_bytes, std::memory_order_relaxed);
+    uint64_t to_objects = objects_moved_.load(std::memory_order_seq_cst);
+    cumulative_objects_moved_.fetch_add(to_objects, std::memory_order_relaxed);
     if (kEnableFromSpaceAccountingCheck) {
       CHECK_EQ(from_space_num_objects_at_first_pause_, from_objects + unevac_from_objects);
       CHECK_EQ(from_space_num_bytes_at_first_pause_, from_bytes + unevac_from_bytes);
@@ -1787,12 +1788,12 @@
                 << " unevac_from_space size=" << region_space_->UnevacFromSpaceSize()
                 << " to_space size=" << region_space_->ToSpaceSize();
       LOG(INFO) << "(before) num_bytes_allocated="
-                << heap_->num_bytes_allocated_.LoadSequentiallyConsistent();
+                << heap_->num_bytes_allocated_.load(std::memory_order_seq_cst);
     }
     RecordFree(ObjectBytePair(freed_objects, freed_bytes));
     if (kVerboseMode) {
       LOG(INFO) << "(after) num_bytes_allocated="
-                << heap_->num_bytes_allocated_.LoadSequentiallyConsistent();
+                << heap_->num_bytes_allocated_.load(std::memory_order_seq_cst);
     }
   }
 
@@ -2042,7 +2043,7 @@
       if (Thread::Current() == thread_running_gc_ && !gc_grays_immune_objects_) {
         return;
       }
-      bool updated_all_immune_objects = updated_all_immune_objects_.LoadSequentiallyConsistent();
+      bool updated_all_immune_objects = updated_all_immune_objects_.load(std::memory_order_seq_cst);
       CHECK(updated_all_immune_objects || ref->GetReadBarrierState() == ReadBarrier::GrayState())
           << "Unmarked immune space ref. obj=" << obj << " rb_state="
           << (obj != nullptr ? obj->GetReadBarrierState() : 0U)
@@ -2165,7 +2166,7 @@
     mirror::Object* expected_ref = ref;
     mirror::Object* new_ref = to_ref;
     do {
-      if (expected_ref != addr->LoadRelaxed()) {
+      if (expected_ref != addr->load(std::memory_order_relaxed)) {
         // It was updated by the mutator.
         break;
       }
@@ -2184,7 +2185,7 @@
     auto new_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(to_ref);
     // If the cas fails, then it was updated by the mutator.
     do {
-      if (ref != addr->LoadRelaxed().AsMirrorPtr()) {
+      if (ref != addr->load(std::memory_order_relaxed).AsMirrorPtr()) {
         // It was updated by the mutator.
         break;
       }
@@ -2378,8 +2379,9 @@
       fall_back_to_non_moving = true;
       if (kVerboseMode) {
         LOG(INFO) << "Out of memory in the to-space. Fall back to non-moving. skipped_bytes="
-                  << to_space_bytes_skipped_.LoadSequentiallyConsistent()
-                  << " skipped_objects=" << to_space_objects_skipped_.LoadSequentiallyConsistent();
+                  << to_space_bytes_skipped_.load(std::memory_order_seq_cst)
+                  << " skipped_objects="
+                  << to_space_objects_skipped_.load(std::memory_order_seq_cst);
       }
       fall_back_to_non_moving = true;
       to_ref = heap_->non_moving_space_->Alloc(Thread::Current(), obj_size,
@@ -2431,9 +2433,9 @@
           region_space_->FreeLarge</*kForEvac*/ true>(to_ref, bytes_allocated);
         } else {
           // Record the lost copy for later reuse.
-          heap_->num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated);
-          to_space_bytes_skipped_.FetchAndAddSequentiallyConsistent(bytes_allocated);
-          to_space_objects_skipped_.FetchAndAddSequentiallyConsistent(1);
+          heap_->num_bytes_allocated_.fetch_add(bytes_allocated, std::memory_order_seq_cst);
+          to_space_bytes_skipped_.fetch_add(bytes_allocated, std::memory_order_seq_cst);
+          to_space_objects_skipped_.fetch_add(1, std::memory_order_seq_cst);
           MutexLock mu(Thread::Current(), skipped_blocks_lock_);
           skipped_blocks_map_.insert(std::make_pair(bytes_allocated,
                                                     reinterpret_cast<uint8_t*>(to_ref)));
@@ -2477,8 +2479,8 @@
     bool success = from_ref->CasLockWordWeakRelaxed(old_lock_word, new_lock_word);
     if (LIKELY(success)) {
       // The CAS succeeded.
-      objects_moved_.FetchAndAddRelaxed(1);
-      bytes_moved_.FetchAndAddRelaxed(region_space_alloc_size);
+      objects_moved_.fetch_add(1, std::memory_order_relaxed);
+      bytes_moved_.fetch_add(region_space_alloc_size, std::memory_order_relaxed);
       if (LIKELY(!fall_back_to_non_moving)) {
         DCHECK(region_space_->IsInToSpace(to_ref));
       } else {
@@ -2704,9 +2706,10 @@
   }
   if (measure_read_barrier_slow_path_) {
     MutexLock mu(self, rb_slow_path_histogram_lock_);
-    rb_slow_path_time_histogram_.AdjustAndAddValue(rb_slow_path_ns_.LoadRelaxed());
-    rb_slow_path_count_total_ += rb_slow_path_count_.LoadRelaxed();
-    rb_slow_path_count_gc_total_ += rb_slow_path_count_gc_.LoadRelaxed();
+    rb_slow_path_time_histogram_.AdjustAndAddValue(
+        rb_slow_path_ns_.load(std::memory_order_relaxed));
+    rb_slow_path_count_total_ += rb_slow_path_count_.load(std::memory_order_relaxed);
+    rb_slow_path_count_gc_total_ += rb_slow_path_count_gc_.load(std::memory_order_relaxed);
   }
 }
 
@@ -2760,15 +2763,15 @@
 
 mirror::Object* ConcurrentCopying::MarkFromReadBarrierWithMeasurements(mirror::Object* from_ref) {
   if (Thread::Current() != thread_running_gc_) {
-    rb_slow_path_count_.FetchAndAddRelaxed(1u);
+    rb_slow_path_count_.fetch_add(1u, std::memory_order_relaxed);
   } else {
-    rb_slow_path_count_gc_.FetchAndAddRelaxed(1u);
+    rb_slow_path_count_gc_.fetch_add(1u, std::memory_order_relaxed);
   }
   ScopedTrace tr(__FUNCTION__);
   const uint64_t start_time = measure_read_barrier_slow_path_ ? NanoTime() : 0u;
   mirror::Object* ret = Mark(from_ref);
   if (measure_read_barrier_slow_path_) {
-    rb_slow_path_ns_.FetchAndAddRelaxed(NanoTime() - start_time);
+    rb_slow_path_ns_.fetch_add(NanoTime() - start_time, std::memory_order_relaxed);
   }
   return ret;
 }
@@ -2787,8 +2790,10 @@
   if (rb_slow_path_count_gc_total_ > 0) {
     os << "GC slow path count " << rb_slow_path_count_gc_total_ << "\n";
   }
-  os << "Cumulative bytes moved " << cumulative_bytes_moved_.LoadRelaxed() << "\n";
-  os << "Cumulative objects moved " << cumulative_objects_moved_.LoadRelaxed() << "\n";
+  os << "Cumulative bytes moved "
+     << cumulative_bytes_moved_.load(std::memory_order_relaxed) << "\n";
+  os << "Cumulative objects moved "
+     << cumulative_objects_moved_.load(std::memory_order_relaxed) << "\n";
 
   os << "Peak regions allocated "
      << region_space_->GetMaxPeakNumNonFreeRegions() << " ("
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 9ab965e..2335964 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -116,21 +116,21 @@
   mark_stack_ = heap_->GetMarkStack();
   DCHECK(mark_stack_ != nullptr);
   immune_spaces_.Reset();
-  no_reference_class_count_.StoreRelaxed(0);
-  normal_count_.StoreRelaxed(0);
-  class_count_.StoreRelaxed(0);
-  object_array_count_.StoreRelaxed(0);
-  other_count_.StoreRelaxed(0);
-  reference_count_.StoreRelaxed(0);
-  large_object_test_.StoreRelaxed(0);
-  large_object_mark_.StoreRelaxed(0);
-  overhead_time_ .StoreRelaxed(0);
-  work_chunks_created_.StoreRelaxed(0);
-  work_chunks_deleted_.StoreRelaxed(0);
-  mark_null_count_.StoreRelaxed(0);
-  mark_immune_count_.StoreRelaxed(0);
-  mark_fastpath_count_.StoreRelaxed(0);
-  mark_slowpath_count_.StoreRelaxed(0);
+  no_reference_class_count_.store(0, std::memory_order_relaxed);
+  normal_count_.store(0, std::memory_order_relaxed);
+  class_count_.store(0, std::memory_order_relaxed);
+  object_array_count_.store(0, std::memory_order_relaxed);
+  other_count_.store(0, std::memory_order_relaxed);
+  reference_count_.store(0, std::memory_order_relaxed);
+  large_object_test_.store(0, std::memory_order_relaxed);
+  large_object_mark_.store(0, std::memory_order_relaxed);
+  overhead_time_ .store(0, std::memory_order_relaxed);
+  work_chunks_created_.store(0, std::memory_order_relaxed);
+  work_chunks_deleted_.store(0, std::memory_order_relaxed);
+  mark_null_count_.store(0, std::memory_order_relaxed);
+  mark_immune_count_.store(0, std::memory_order_relaxed);
+  mark_fastpath_count_.store(0, std::memory_order_relaxed);
+  mark_slowpath_count_.store(0, std::memory_order_relaxed);
   {
     // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap.
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -724,7 +724,7 @@
         if (kUseFinger) {
           std::atomic_thread_fence(std::memory_order_seq_cst);
           if (reinterpret_cast<uintptr_t>(ref) >=
-              static_cast<uintptr_t>(mark_sweep_->atomic_finger_.LoadRelaxed())) {
+              static_cast<uintptr_t>(mark_sweep_->atomic_finger_.load(std::memory_order_relaxed))) {
             return;
           }
         }
@@ -1046,7 +1046,7 @@
           // This function does not handle heap end increasing, so we must use the space end.
           uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
           uintptr_t end = reinterpret_cast<uintptr_t>(space->End());
-          atomic_finger_.StoreRelaxed(AtomicInteger::MaxValue());
+          atomic_finger_.store(AtomicInteger::MaxValue(), std::memory_order_relaxed);
 
           // Create a few worker tasks.
           const size_t n = thread_count * 2;
@@ -1405,8 +1405,8 @@
   thread_pool->Wait(self, true, true);
   thread_pool->StopWorkers(self);
   mark_stack_->Reset();
-  CHECK_EQ(work_chunks_created_.LoadSequentiallyConsistent(),
-           work_chunks_deleted_.LoadSequentiallyConsistent())
+  CHECK_EQ(work_chunks_created_.load(std::memory_order_seq_cst),
+           work_chunks_deleted_.load(std::memory_order_seq_cst))
       << " some of the work chunks were leaked";
 }
 
@@ -1462,28 +1462,32 @@
   if (kCountScannedTypes) {
     VLOG(gc)
         << "MarkSweep scanned"
-        << " no reference objects=" << no_reference_class_count_.LoadRelaxed()
-        << " normal objects=" << normal_count_.LoadRelaxed()
-        << " classes=" << class_count_.LoadRelaxed()
-        << " object arrays=" << object_array_count_.LoadRelaxed()
-        << " references=" << reference_count_.LoadRelaxed()
-        << " other=" << other_count_.LoadRelaxed();
+        << " no reference objects=" << no_reference_class_count_.load(std::memory_order_relaxed)
+        << " normal objects=" << normal_count_.load(std::memory_order_relaxed)
+        << " classes=" << class_count_.load(std::memory_order_relaxed)
+        << " object arrays=" << object_array_count_.load(std::memory_order_relaxed)
+        << " references=" << reference_count_.load(std::memory_order_relaxed)
+        << " other=" << other_count_.load(std::memory_order_relaxed);
   }
   if (kCountTasks) {
-    VLOG(gc) << "Total number of work chunks allocated: " << work_chunks_created_.LoadRelaxed();
+    VLOG(gc)
+        << "Total number of work chunks allocated: "
+        << work_chunks_created_.load(std::memory_order_relaxed);
   }
   if (kMeasureOverhead) {
-    VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_.LoadRelaxed());
+    VLOG(gc) << "Overhead time " << PrettyDuration(overhead_time_.load(std::memory_order_relaxed));
   }
   if (kProfileLargeObjects) {
-    VLOG(gc) << "Large objects tested " << large_object_test_.LoadRelaxed()
-        << " marked " << large_object_mark_.LoadRelaxed();
+    VLOG(gc)
+        << "Large objects tested " << large_object_test_.load(std::memory_order_relaxed)
+        << " marked " << large_object_mark_.load(std::memory_order_relaxed);
   }
   if (kCountMarkedObjects) {
-    VLOG(gc) << "Marked: null=" << mark_null_count_.LoadRelaxed()
-        << " immune=" <<  mark_immune_count_.LoadRelaxed()
-        << " fastpath=" << mark_fastpath_count_.LoadRelaxed()
-        << " slowpath=" << mark_slowpath_count_.LoadRelaxed();
+    VLOG(gc)
+        << "Marked: null=" << mark_null_count_.load(std::memory_order_relaxed)
+        << " immune=" <<  mark_immune_count_.load(std::memory_order_relaxed)
+        << " fastpath=" << mark_fastpath_count_.load(std::memory_order_relaxed)
+        << " slowpath=" << mark_slowpath_count_.load(std::memory_order_relaxed);
   }
   CHECK(mark_stack_->IsEmpty());  // Ensure that the mark stack is empty.
   mark_stack_->Reset();
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 41ee183..948d233 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -156,7 +156,7 @@
     pre_fence_visitor(obj, usable_size);
     QuasiAtomic::ThreadFenceForConstructor();
     size_t num_bytes_allocated_before =
-        num_bytes_allocated_.FetchAndAddRelaxed(bytes_tl_bulk_allocated);
+        num_bytes_allocated_.fetch_add(bytes_tl_bulk_allocated, std::memory_order_relaxed);
     new_num_bytes_allocated = num_bytes_allocated_before + bytes_tl_bulk_allocated;
     if (bytes_tl_bulk_allocated > 0) {
       // Only trace when we get an increase in the number of bytes allocated. This happens when
@@ -187,7 +187,7 @@
       DCHECK(allocation_records_ != nullptr);
       allocation_records_->RecordAllocation(self, &obj, bytes_allocated);
     }
-    AllocationListener* l = alloc_listener_.LoadSequentiallyConsistent();
+    AllocationListener* l = alloc_listener_.load(std::memory_order_seq_cst);
     if (l != nullptr) {
       // Same as above. We assume that a listener that was once stored will never be deleted.
       // Otherwise we'd have to perform this under a lock.
@@ -393,7 +393,7 @@
 inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type,
                                             size_t alloc_size,
                                             bool grow) {
-  size_t new_footprint = num_bytes_allocated_.LoadSequentiallyConsistent() + alloc_size;
+  size_t new_footprint = num_bytes_allocated_.load(std::memory_order_seq_cst) + alloc_size;
   if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
     if (UNLIKELY(new_footprint > growth_limit_)) {
       return true;
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index a725ec4..52afb38 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -549,7 +549,7 @@
     AddRememberedSet(non_moving_space_rem_set);
   }
   // TODO: Count objects in the image space here?
-  num_bytes_allocated_.StoreRelaxed(0);
+  num_bytes_allocated_.store(0, std::memory_order_relaxed);
   mark_stack_.reset(accounting::ObjectStack::Create("mark stack", kDefaultMarkStackSize,
                                                     kDefaultMarkStackSize));
   const size_t alloc_stack_capacity = max_allocation_stack_size_ + kAllocationStackReserveSize;
@@ -1053,7 +1053,8 @@
   }
 
   os << "Registered native bytes allocated: "
-     << old_native_bytes_allocated_.LoadRelaxed() + new_native_bytes_allocated_.LoadRelaxed()
+     << (old_native_bytes_allocated_.load(std::memory_order_relaxed) +
+         new_native_bytes_allocated_.load(std::memory_order_relaxed))
      << "\n";
 
   BaseMutex::DumpAll(os);
@@ -1120,11 +1121,7 @@
 ALWAYS_INLINE
 static inline AllocationListener* GetAndOverwriteAllocationListener(
     Atomic<AllocationListener*>* storage, AllocationListener* new_value) {
-  AllocationListener* old;
-  do {
-    old = storage->LoadSequentiallyConsistent();
-  } while (!storage->CompareAndSetStrongSequentiallyConsistent(old, new_value));
-  return old;
+  return storage->exchange(new_value);
 }
 
 Heap::~Heap() {
@@ -1142,12 +1139,11 @@
   delete thread_flip_lock_;
   delete pending_task_lock_;
   delete backtrace_lock_;
-  if (unique_backtrace_count_.LoadRelaxed() != 0 || seen_backtrace_count_.LoadRelaxed() != 0) {
-    LOG(INFO) << "gc stress unique=" << unique_backtrace_count_.LoadRelaxed()
-        << " total=" << seen_backtrace_count_.LoadRelaxed() +
-            unique_backtrace_count_.LoadRelaxed();
+  uint64_t unique_count = unique_backtrace_count_.load(std::memory_order_relaxed);
+  uint64_t seen_count = seen_backtrace_count_.load(std::memory_order_relaxed);
+  if (unique_count != 0 || seen_count != 0) {
+    LOG(INFO) << "gc stress unique=" << unique_count << " total=" << (unique_count + seen_count);
   }
-
   VLOG(heap) << "Finished ~Heap()";
 }
 
@@ -1493,7 +1489,7 @@
   }
 
   // Ignore early dawn of the universe verifications.
-  if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.LoadRelaxed()) < 10 * KB)) {
+  if (UNLIKELY(num_bytes_allocated_.load(std::memory_order_relaxed) < 10 * KB)) {
     return;
   }
   CHECK_ALIGNED(obj.Ptr(), kObjectAlignment) << "Object isn't aligned";
@@ -1525,9 +1521,10 @@
   // Use signed comparison since freed bytes can be negative when background compaction foreground
   // transitions occurs. This is caused by the moving objects from a bump pointer space to a
   // free list backed space typically increasing memory footprint due to padding and binning.
-  DCHECK_LE(freed_bytes, static_cast<int64_t>(num_bytes_allocated_.LoadRelaxed()));
+  DCHECK_LE(freed_bytes,
+            static_cast<int64_t>(num_bytes_allocated_.load(std::memory_order_relaxed)));
   // Note: This relies on 2s complement for handling negative freed_bytes.
-  num_bytes_allocated_.FetchAndSubSequentiallyConsistent(static_cast<ssize_t>(freed_bytes));
+  num_bytes_allocated_.fetch_sub(static_cast<ssize_t>(freed_bytes));
   if (Runtime::Current()->HasStatsEnabled()) {
     RuntimeStats* thread_stats = Thread::Current()->GetStats();
     thread_stats->freed_objects += freed_objects;
@@ -1544,10 +1541,10 @@
   // ahead-of-time, bulk counting of bytes allocated in rosalloc thread-local buffers.
   // If there's a concurrent revoke, ok to not necessarily reset num_bytes_freed_revoke_
   // all the way to zero exactly as the remainder will be subtracted at the next GC.
-  size_t bytes_freed = num_bytes_freed_revoke_.LoadSequentiallyConsistent();
-  CHECK_GE(num_bytes_freed_revoke_.FetchAndSubSequentiallyConsistent(bytes_freed),
+  size_t bytes_freed = num_bytes_freed_revoke_.load();
+  CHECK_GE(num_bytes_freed_revoke_.fetch_sub(bytes_freed),
            bytes_freed) << "num_bytes_freed_revoke_ underflow";
-  CHECK_GE(num_bytes_allocated_.FetchAndSubSequentiallyConsistent(bytes_freed),
+  CHECK_GE(num_bytes_allocated_.fetch_sub(bytes_freed),
            bytes_freed) << "num_bytes_allocated_ underflow";
   GetCurrentGcIteration()->SetFreedRevoke(bytes_freed);
 }
@@ -1703,13 +1700,13 @@
           // Always print that we ran homogeneous space compation since this can cause jank.
           VLOG(heap) << "Ran heap homogeneous space compaction, "
                     << " requested defragmentation "
-                    << count_requested_homogeneous_space_compaction_.LoadSequentiallyConsistent()
+                    << count_requested_homogeneous_space_compaction_.load()
                     << " performed defragmentation "
-                    << count_performed_homogeneous_space_compaction_.LoadSequentiallyConsistent()
+                    << count_performed_homogeneous_space_compaction_.load()
                     << " ignored homogeneous space compaction "
-                    << count_ignored_homogeneous_space_compaction_.LoadSequentiallyConsistent()
+                    << count_ignored_homogeneous_space_compaction_.load()
                     << " delayed count = "
-                    << count_delayed_oom_.LoadSequentiallyConsistent();
+                    << count_delayed_oom_.load();
         }
         break;
       }
@@ -1972,7 +1969,7 @@
   VLOG(heap) << "TransitionCollector: " << static_cast<int>(collector_type_)
              << " -> " << static_cast<int>(collector_type);
   uint64_t start_time = NanoTime();
-  uint32_t before_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
+  uint32_t before_allocated = num_bytes_allocated_.load();
   Runtime* const runtime = Runtime::Current();
   Thread* const self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
@@ -2110,7 +2107,7 @@
     ScopedObjectAccess soa(self);
     soa.Vm()->UnloadNativeLibraries();
   }
-  int32_t after_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
+  int32_t after_allocated = num_bytes_allocated_.load(std::memory_order_seq_cst);
   int32_t delta_allocated = before_allocated - after_allocated;
   std::string saved_str;
   if (delta_allocated >= 0) {
@@ -2559,7 +2556,9 @@
     // Move all bytes from new_native_bytes_allocated_ to
     // old_native_bytes_allocated_ now that GC has been triggered, resetting
     // new_native_bytes_allocated_ to zero in the process.
-    old_native_bytes_allocated_.FetchAndAddRelaxed(new_native_bytes_allocated_.ExchangeRelaxed(0));
+    old_native_bytes_allocated_.fetch_add(
+        new_native_bytes_allocated_.exchange(0, std::memory_order_relaxed),
+        std::memory_order_relaxed);
   }
 
   DCHECK_LT(gc_type, collector::kGcTypeMax);
@@ -2759,7 +2758,7 @@
       : heap_(heap), fail_count_(fail_count), verify_referent_(verify_referent) {}
 
   size_t GetFailureCount() const {
-    return fail_count_->LoadSequentiallyConsistent();
+    return fail_count_->load(std::memory_order_seq_cst);
   }
 
   void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED, ObjPtr<mirror::Reference> ref) const
@@ -2811,7 +2810,7 @@
       // Verify that the reference is live.
       return true;
     }
-    if (fail_count_->FetchAndAddSequentiallyConsistent(1) == 0) {
+    if (fail_count_->fetch_add(1, std::memory_order_seq_cst) == 0) {
       // Print message on only on first failure to prevent spam.
       LOG(ERROR) << "!!!!!!!!!!!!!!Heap corruption detected!!!!!!!!!!!!!!!!!!!";
     }
@@ -2924,7 +2923,7 @@
   }
 
   size_t GetFailureCount() const {
-    return fail_count_->LoadSequentiallyConsistent();
+    return fail_count_->load(std::memory_order_seq_cst);
   }
 
  private:
@@ -3605,7 +3604,7 @@
 }
 
 void Heap::ClearConcurrentGCRequest() {
-  concurrent_gc_pending_.StoreRelaxed(false);
+  concurrent_gc_pending_.store(false, std::memory_order_relaxed);
 }
 
 void Heap::RequestConcurrentGC(Thread* self, GcCause cause, bool force_full) {
@@ -3732,8 +3731,9 @@
   if (rosalloc_space_ != nullptr) {
     size_t freed_bytes_revoke = rosalloc_space_->RevokeThreadLocalBuffers(thread);
     if (freed_bytes_revoke > 0U) {
-      num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke);
-      CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed());
+      num_bytes_freed_revoke_.fetch_add(freed_bytes_revoke, std::memory_order_seq_cst);
+      CHECK_GE(num_bytes_allocated_.load(std::memory_order_relaxed),
+               num_bytes_freed_revoke_.load(std::memory_order_relaxed));
     }
   }
   if (bump_pointer_space_ != nullptr) {
@@ -3748,8 +3748,9 @@
   if (rosalloc_space_ != nullptr) {
     size_t freed_bytes_revoke = rosalloc_space_->RevokeThreadLocalBuffers(thread);
     if (freed_bytes_revoke > 0U) {
-      num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke);
-      CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed());
+      num_bytes_freed_revoke_.fetch_add(freed_bytes_revoke, std::memory_order_seq_cst);
+      CHECK_GE(num_bytes_allocated_.load(std::memory_order_relaxed),
+               num_bytes_freed_revoke_.load(std::memory_order_relaxed));
     }
   }
 }
@@ -3758,8 +3759,9 @@
   if (rosalloc_space_ != nullptr) {
     size_t freed_bytes_revoke = rosalloc_space_->RevokeAllThreadLocalBuffers();
     if (freed_bytes_revoke > 0U) {
-      num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke);
-      CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed());
+      num_bytes_freed_revoke_.fetch_add(freed_bytes_revoke, std::memory_order_seq_cst);
+      CHECK_GE(num_bytes_allocated_.load(std::memory_order_relaxed),
+               num_bytes_freed_revoke_.load(std::memory_order_relaxed));
     }
   }
   if (bump_pointer_space_ != nullptr) {
@@ -3771,7 +3773,7 @@
 }
 
 bool Heap::IsGCRequestPending() const {
-  return concurrent_gc_pending_.LoadRelaxed();
+  return concurrent_gc_pending_.load(std::memory_order_relaxed);
 }
 
 void Heap::RunFinalization(JNIEnv* env, uint64_t timeout) {
@@ -3781,7 +3783,7 @@
 }
 
 void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) {
-  size_t old_value = new_native_bytes_allocated_.FetchAndAddRelaxed(bytes);
+  size_t old_value = new_native_bytes_allocated_.fetch_add(bytes, std::memory_order_relaxed);
 
   if (old_value > NativeAllocationGcWatermark() * HeapGrowthMultiplier() &&
              !IsGCRequestPending()) {
@@ -3803,12 +3805,12 @@
   size_t allocated;
   size_t new_freed_bytes;
   do {
-    allocated = new_native_bytes_allocated_.LoadRelaxed();
+    allocated = new_native_bytes_allocated_.load(std::memory_order_relaxed);
     new_freed_bytes = std::min(allocated, bytes);
   } while (!new_native_bytes_allocated_.CompareAndSetWeakRelaxed(allocated,
                                                                    allocated - new_freed_bytes));
   if (new_freed_bytes < bytes) {
-    old_native_bytes_allocated_.FetchAndSubRelaxed(bytes - new_freed_bytes);
+    old_native_bytes_allocated_.fetch_sub(bytes - new_freed_bytes, std::memory_order_relaxed);
   }
 }
 
@@ -3942,9 +3944,9 @@
       StackHandleScope<1> hs(self);
       auto h = hs.NewHandleWrapper(obj);
       CollectGarbage(/* clear_soft_references */ false);
-      unique_backtrace_count_.FetchAndAddSequentiallyConsistent(1);
+      unique_backtrace_count_.fetch_add(1, std::memory_order_seq_cst);
     } else {
-      seen_backtrace_count_.FetchAndAddSequentiallyConsistent(1);
+      seen_backtrace_count_.fetch_add(1, std::memory_order_seq_cst);
     }
   }
 }
@@ -4020,11 +4022,11 @@
 }
 
 void Heap::SetGcPauseListener(GcPauseListener* l) {
-  gc_pause_listener_.StoreRelaxed(l);
+  gc_pause_listener_.store(l, std::memory_order_relaxed);
 }
 
 void Heap::RemoveGcPauseListener() {
-  gc_pause_listener_.StoreRelaxed(nullptr);
+  gc_pause_listener_.store(nullptr, std::memory_order_relaxed);
 }
 
 mirror::Object* Heap::AllocWithNewTLAB(Thread* self,
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 021fe58..9af57d1 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -496,7 +496,7 @@
 
   // Returns the number of bytes currently allocated.
   size_t GetBytesAllocated() const {
-    return num_bytes_allocated_.LoadSequentiallyConsistent();
+    return num_bytes_allocated_.load(std::memory_order_seq_cst);
   }
 
   // Returns the number of objects currently allocated.
@@ -546,7 +546,7 @@
   // Returns how much free memory we have until we need to grow the heap to perform an allocation.
   // Similar to GetFreeMemoryUntilGC. Implements java.lang.Runtime.freeMemory.
   size_t GetFreeMemory() const {
-    size_t byte_allocated = num_bytes_allocated_.LoadSequentiallyConsistent();
+    size_t byte_allocated = num_bytes_allocated_.load(std::memory_order_seq_cst);
     size_t total_memory = GetTotalMemory();
     // Make sure we don't get a negative number.
     return total_memory - std::min(total_memory, byte_allocated);
@@ -775,11 +775,11 @@
   // Allocation tracking support
   // Callers to this function use double-checked locking to ensure safety on allocation_records_
   bool IsAllocTrackingEnabled() const {
-    return alloc_tracking_enabled_.LoadRelaxed();
+    return alloc_tracking_enabled_.load(std::memory_order_relaxed);
   }
 
   void SetAllocTrackingEnabled(bool enabled) REQUIRES(Locks::alloc_tracker_lock_) {
-    alloc_tracking_enabled_.StoreRelaxed(enabled);
+    alloc_tracking_enabled_.store(enabled, std::memory_order_relaxed);
   }
 
   AllocRecordObjectMap* GetAllocationRecords() const
@@ -825,7 +825,7 @@
   void SetGcPauseListener(GcPauseListener* l);
   // Get the currently installed gc pause listener, or null.
   GcPauseListener* GetGcPauseListener() {
-    return gc_pause_listener_.LoadAcquire();
+    return gc_pause_listener_.load(std::memory_order_acquire);
   }
   // Remove a gc pause listener. Note: the listener must not be deleted, as for performance
   // reasons, we assume it stays valid when we read it (so that we don't require a lock).
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index 9ebb131..4c58549 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -46,16 +46,18 @@
                                                            size_t* bytes_tl_bulk_allocated) {
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   num_bytes = RoundUp(num_bytes, kAlignment);
-  uint8_t* end = end_.LoadRelaxed();
+  uint8_t* end = end_.load(std::memory_order_relaxed);
   if (end + num_bytes > growth_end_) {
     return nullptr;
   }
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(end);
-  end_.StoreRelaxed(end + num_bytes);
+  end_.store(end + num_bytes, std::memory_order_relaxed);
   *bytes_allocated = num_bytes;
   // Use the CAS free versions as an optimization.
-  objects_allocated_.StoreRelaxed(objects_allocated_.LoadRelaxed() + 1);
-  bytes_allocated_.StoreRelaxed(bytes_allocated_.LoadRelaxed() + num_bytes);
+  objects_allocated_.store(objects_allocated_.load(std::memory_order_relaxed) + 1,
+                           std::memory_order_relaxed);
+  bytes_allocated_.store(bytes_allocated_.load(std::memory_order_relaxed) + num_bytes,
+                         std::memory_order_relaxed);
   if (UNLIKELY(usable_size != nullptr)) {
     *usable_size = num_bytes;
   }
@@ -68,7 +70,7 @@
   uint8_t* old_end;
   uint8_t* new_end;
   do {
-    old_end = end_.LoadRelaxed();
+    old_end = end_.load(std::memory_order_relaxed);
     new_end = old_end + num_bytes;
     // If there is no more room in the region, we are out of memory.
     if (UNLIKELY(new_end > growth_end_)) {
@@ -81,8 +83,8 @@
 inline mirror::Object* BumpPointerSpace::AllocNonvirtual(size_t num_bytes) {
   mirror::Object* ret = AllocNonvirtualWithoutAccounting(num_bytes);
   if (ret != nullptr) {
-    objects_allocated_.FetchAndAddSequentiallyConsistent(1);
-    bytes_allocated_.FetchAndAddSequentiallyConsistent(num_bytes);
+    objects_allocated_.fetch_add(1, std::memory_order_seq_cst);
+    bytes_allocated_.fetch_add(num_bytes, std::memory_order_seq_cst);
   }
   return ret;
 }
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index ce0e0f3..e95da01 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -72,8 +72,8 @@
   // Reset the end of the space back to the beginning, we move the end forward as we allocate
   // objects.
   SetEnd(Begin());
-  objects_allocated_.StoreRelaxed(0);
-  bytes_allocated_.StoreRelaxed(0);
+  objects_allocated_.store(0, std::memory_order_relaxed);
+  bytes_allocated_.store(0, std::memory_order_relaxed);
   growth_end_ = Limit();
   {
     MutexLock mu(Thread::Current(), block_lock_);
@@ -160,7 +160,7 @@
 
 uint64_t BumpPointerSpace::GetBytesAllocated() {
   // Start out pre-determined amount (blocks which are not being allocated into).
-  uint64_t total = static_cast<uint64_t>(bytes_allocated_.LoadRelaxed());
+  uint64_t total = static_cast<uint64_t>(bytes_allocated_.load(std::memory_order_relaxed));
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::runtime_shutdown_lock_);
   MutexLock mu2(self, *Locks::thread_list_lock_);
@@ -178,7 +178,7 @@
 
 uint64_t BumpPointerSpace::GetObjectsAllocated() {
   // Start out pre-determined amount (blocks which are not being allocated into).
-  uint64_t total = static_cast<uint64_t>(objects_allocated_.LoadRelaxed());
+  uint64_t total = static_cast<uint64_t>(objects_allocated_.load(std::memory_order_relaxed));
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::runtime_shutdown_lock_);
   MutexLock mu2(self, *Locks::thread_list_lock_);
@@ -195,8 +195,8 @@
 }
 
 void BumpPointerSpace::RevokeThreadLocalBuffersLocked(Thread* thread) {
-  objects_allocated_.FetchAndAddSequentiallyConsistent(thread->GetThreadLocalObjectsAllocated());
-  bytes_allocated_.FetchAndAddSequentiallyConsistent(thread->GetThreadLocalBytesAllocated());
+  objects_allocated_.fetch_add(thread->GetThreadLocalObjectsAllocated(), std::memory_order_seq_cst);
+  bytes_allocated_.fetch_add(thread->GetThreadLocalBytesAllocated(), std::memory_order_seq_cst);
   thread->SetTlab(nullptr, nullptr, nullptr);
 }
 
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 7b43362..5ba13ca 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -155,8 +155,8 @@
 
   // Record objects / bytes freed.
   void RecordFree(int32_t objects, int32_t bytes) {
-    objects_allocated_.FetchAndSubSequentiallyConsistent(objects);
-    bytes_allocated_.FetchAndSubSequentiallyConsistent(bytes);
+    objects_allocated_.fetch_sub(objects, std::memory_order_seq_cst);
+    bytes_allocated_.fetch_sub(bytes, std::memory_order_seq_cst);
   }
 
   void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) OVERRIDE
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index c100bc0..e2154b8 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -672,7 +672,7 @@
     // Loaded the map, use the image header from the file now in case we patch it with
     // RelocateInPlace.
     image_header = reinterpret_cast<ImageHeader*>(map->Begin());
-    const uint32_t bitmap_index = ImageSpace::bitmap_index_.FetchAndAddSequentiallyConsistent(1);
+    const uint32_t bitmap_index = ImageSpace::bitmap_index_.fetch_add(1, std::memory_order_seq_cst);
     std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u",
                                          image_filename,
                                          bitmap_index));
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
index 410931c..7072a7e 100644
--- a/runtime/gc/space/region_space-inl.h
+++ b/runtime/gc/space/region_space-inl.h
@@ -100,13 +100,13 @@
   uint8_t* old_top;
   uint8_t* new_top;
   do {
-    old_top = top_.LoadRelaxed();
+    old_top = top_.load(std::memory_order_relaxed);
     new_top = old_top + num_bytes;
     if (UNLIKELY(new_top > end_)) {
       return nullptr;
     }
   } while (!top_.CompareAndSetWeakRelaxed(old_top, new_top));
-  objects_allocated_.FetchAndAddRelaxed(1);
+  objects_allocated_.fetch_add(1, std::memory_order_relaxed);
   DCHECK_LE(Top(), end_);
   DCHECK_LT(old_top, end_);
   DCHECK_LE(new_top, end_);
@@ -365,11 +365,11 @@
 inline size_t RegionSpace::Region::ObjectsAllocated() const {
   if (IsLarge()) {
     DCHECK_LT(begin_ + kRegionSize, Top());
-    DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
+    DCHECK_EQ(objects_allocated_.load(std::memory_order_relaxed), 0U);
     return 1;
   } else if (IsLargeTail()) {
     DCHECK_EQ(begin_, Top());
-    DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
+    DCHECK_EQ(objects_allocated_.load(std::memory_order_relaxed), 0U);
     return 0;
   } else {
     DCHECK(IsAllocated()) << "state=" << state_;
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 8d94c86..5ea434a 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -489,7 +489,7 @@
 void RegionSpace::RecordAlloc(mirror::Object* ref) {
   CHECK(ref != nullptr);
   Region* r = RefToRegion(ref);
-  r->objects_allocated_.FetchAndAddSequentiallyConsistent(1);
+  r->objects_allocated_.fetch_add(1, std::memory_order_seq_cst);
 }
 
 bool RegionSpace::AllocNewTlab(Thread* self, size_t min_bytes) {
@@ -589,10 +589,10 @@
 }
 
 void RegionSpace::Region::Clear(bool zero_and_release_pages) {
-  top_.StoreRelaxed(begin_);
+  top_.store(begin_, std::memory_order_relaxed);
   state_ = RegionState::kRegionStateFree;
   type_ = RegionType::kRegionTypeNone;
-  objects_allocated_.StoreRelaxed(0);
+  objects_allocated_.store(0, std::memory_order_relaxed);
   alloc_time_ = 0;
   live_bytes_ = static_cast<size_t>(-1);
   if (zero_and_release_pages) {
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index d63257d..6a1371a 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -300,11 +300,11 @@
     void Init(size_t idx, uint8_t* begin, uint8_t* end) {
       idx_ = idx;
       begin_ = begin;
-      top_.StoreRelaxed(begin);
+      top_.store(begin, std::memory_order_relaxed);
       end_ = end;
       state_ = RegionState::kRegionStateFree;
       type_ = RegionType::kRegionTypeNone;
-      objects_allocated_.StoreRelaxed(0);
+      objects_allocated_.store(0, std::memory_order_relaxed);
       alloc_time_ = 0;
       live_bytes_ = static_cast<size_t>(-1);
       is_newly_allocated_ = false;
@@ -334,7 +334,7 @@
       if (is_free) {
         DCHECK(IsInNoSpace());
         DCHECK_EQ(begin_, Top());
-        DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
+        DCHECK_EQ(objects_allocated_.load(std::memory_order_relaxed), 0U);
       }
       return is_free;
     }
@@ -461,11 +461,11 @@
     }
 
     ALWAYS_INLINE uint8_t* Top() const {
-      return top_.LoadRelaxed();
+      return top_.load(std::memory_order_relaxed);
     }
 
     void SetTop(uint8_t* new_top) {
-      top_.StoreRelaxed(new_top);
+      top_.store(new_top, std::memory_order_relaxed);
     }
 
     uint8_t* End() const {
@@ -480,10 +480,10 @@
 
     void RecordThreadLocalAllocations(size_t num_objects, size_t num_bytes) {
       DCHECK(IsAllocated());
-      DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
+      DCHECK_EQ(objects_allocated_.load(std::memory_order_relaxed), 0U);
       DCHECK_EQ(Top(), end_);
-      objects_allocated_.StoreRelaxed(num_objects);
-      top_.StoreRelaxed(begin_ + num_bytes);
+      objects_allocated_.store(num_objects, std::memory_order_relaxed);
+      top_.store(begin_ + num_bytes, std::memory_order_relaxed);
       DCHECK_LE(Top(), end_);
     }
 
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 7af19fa..bc3ab48 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -272,7 +272,7 @@
 
   // Current address at which the space ends, which may vary as the space is filled.
   uint8_t* End() const {
-    return end_.LoadRelaxed();
+    return end_.load(std::memory_order_relaxed);
   }
 
   // The end of the address range covered by the space.
@@ -283,7 +283,7 @@
   // Change the end of the space. Be careful with use since changing the end of a space to an
   // invalid value may break the GC.
   void SetEnd(uint8_t* end) {
-    end_.StoreRelaxed(end);
+    end_.store(end, std::memory_order_relaxed);
   }
 
   void SetLimit(uint8_t* limit) {
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index cde155f..8c73ef9 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -122,7 +122,7 @@
     // Need to mark the card since this will update the mod-union table next GC cycle.
     card_table->MarkCard(ptrs[i]);
   }
-  zygote_space->objects_allocated_.FetchAndSubSequentiallyConsistent(num_ptrs);
+  zygote_space->objects_allocated_.fetch_sub(num_ptrs, std::memory_order_seq_cst);
 }
 
 }  // namespace space
diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h
index 0823101..10c1398 100644
--- a/runtime/gc/space/zygote_space.h
+++ b/runtime/gc/space/zygote_space.h
@@ -67,7 +67,7 @@
   }
 
   uint64_t GetObjectsAllocated() {
-    return objects_allocated_.LoadSequentiallyConsistent();
+    return objects_allocated_.load(std::memory_order_seq_cst);
   }
 
   void Clear() OVERRIDE;
diff --git a/runtime/gc/task_processor_test.cc b/runtime/gc/task_processor_test.cc
index 77b40e4..38581ce 100644
--- a/runtime/gc/task_processor_test.cc
+++ b/runtime/gc/task_processor_test.cc
@@ -37,7 +37,7 @@
     if (max_recursion_ > 0) {
       task_processor_->AddTask(self,
                                new RecursiveTask(task_processor_, counter_, max_recursion_ - 1));
-      counter_->FetchAndAddSequentiallyConsistent(1U);
+      counter_->fetch_add(1U, std::memory_order_seq_cst);
     }
   }
 
@@ -54,7 +54,7 @@
   }
   virtual void Run(Thread* self) OVERRIDE {
     task_processor_->RunAllTasks(self);
-    done_running_->StoreSequentiallyConsistent(true);
+    done_running_->store(true, std::memory_order_seq_cst);
   }
 
  private:
@@ -76,7 +76,7 @@
   thread_pool.StartWorkers(self);
   ASSERT_FALSE(done_running);
   // Wait until all the tasks are done, but since we didn't interrupt, done_running should be 0.
-  while (counter.LoadSequentiallyConsistent() != kRecursion) {
+  while (counter.load(std::memory_order_seq_cst) != kRecursion) {
     usleep(10);
   }
   ASSERT_FALSE(done_running);
@@ -84,11 +84,11 @@
   thread_pool.Wait(self, true, false);
   // After the interrupt and wait, the WorkUntilInterruptedTasktask should have terminated and
   // set done_running_ to true.
-  ASSERT_TRUE(done_running.LoadSequentiallyConsistent());
+  ASSERT_TRUE(done_running.load(std::memory_order_seq_cst));
 
   // Test that we finish remaining tasks before returning from RunTasksUntilInterrupted.
-  counter.StoreSequentiallyConsistent(0);
-  done_running.StoreSequentiallyConsistent(false);
+  counter.store(0, std::memory_order_seq_cst);
+  done_running.store(false, std::memory_order_seq_cst);
   // Self interrupt before any of the other tasks run, but since we added them we should keep on
   // working until all the tasks are completed.
   task_processor.Stop(self);
@@ -96,8 +96,8 @@
   thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running));
   thread_pool.StartWorkers(self);
   thread_pool.Wait(self, true, false);
-  ASSERT_TRUE(done_running.LoadSequentiallyConsistent());
-  ASSERT_EQ(counter.LoadSequentiallyConsistent(), kRecursion);
+  ASSERT_TRUE(done_running.load(std::memory_order_seq_cst));
+  ASSERT_EQ(counter.load(std::memory_order_seq_cst), kRecursion);
 }
 
 class TestOrderTask : public HeapTask {
@@ -137,10 +137,10 @@
   Atomic<bool> done_running(false);
   // Add a task which will wait until interrupted to the thread pool.
   thread_pool.AddTask(self, new WorkUntilDoneTask(&task_processor, &done_running));
-  ASSERT_FALSE(done_running.LoadSequentiallyConsistent());
+  ASSERT_FALSE(done_running.load(std::memory_order_seq_cst));
   thread_pool.StartWorkers(self);
   thread_pool.Wait(self, true, false);
-  ASSERT_TRUE(done_running.LoadSequentiallyConsistent());
+  ASSERT_TRUE(done_running.load(std::memory_order_seq_cst));
   ASSERT_EQ(counter, kNumTasks);
 }
 
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index da4c4b2..8fe68bd 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -736,14 +736,14 @@
   // mutator lock exclusively held so that we don't have any threads in the middle of
   // DecodeWeakGlobal.
   Locks::mutator_lock_->AssertExclusiveHeld(self);
-  allow_accessing_weak_globals_.StoreSequentiallyConsistent(false);
+  allow_accessing_weak_globals_.store(false, std::memory_order_seq_cst);
 }
 
 void JavaVMExt::AllowNewWeakGlobals() {
   CHECK(!kUseReadBarrier);
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::jni_weak_globals_lock_);
-  allow_accessing_weak_globals_.StoreSequentiallyConsistent(true);
+  allow_accessing_weak_globals_.store(true, std::memory_order_seq_cst);
   weak_globals_add_condition_.Broadcast(self);
 }
 
@@ -770,7 +770,7 @@
   DCHECK(self != nullptr);
   return kUseReadBarrier ?
       self->GetWeakRefAccessEnabled() :
-      allow_accessing_weak_globals_.LoadSequentiallyConsistent();
+      allow_accessing_weak_globals_.load(std::memory_order_seq_cst);
 }
 
 ObjPtr<mirror::Object> JavaVMExt::DecodeWeakGlobal(Thread* self, IndirectRef ref) {
@@ -809,7 +809,7 @@
   }
   // self can be null during a runtime shutdown. ~Runtime()->~ClassLinker()->DecodeWeakGlobal().
   if (!kUseReadBarrier) {
-    DCHECK(allow_accessing_weak_globals_.LoadSequentiallyConsistent());
+    DCHECK(allow_accessing_weak_globals_.load(std::memory_order_seq_cst));
   }
   return weak_globals_.SynchronizedGet(ref);
 }
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 291a983..1e61ba0 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -1625,7 +1625,7 @@
      * so waitForDebugger() doesn't return if we stall for a bit here.
      */
     Dbg::GoActive();
-    last_activity_time_ms_.StoreSequentiallyConsistent(0);
+    last_activity_time_ms_.store(0, std::memory_order_seq_cst);
   }
 
   /*
@@ -1703,7 +1703,7 @@
    * the initial setup.  Only update if this is a non-DDMS packet.
    */
   if (request->GetCommandSet() != kJDWPDdmCmdSet) {
-    last_activity_time_ms_.StoreSequentiallyConsistent(MilliTime());
+    last_activity_time_ms_.store(MilliTime(), std::memory_order_seq_cst);
   }
 
   return replyLength;
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 557b032..447e3bf 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -729,7 +729,7 @@
     return -1;
   }
 
-  int64_t last = last_activity_time_ms_.LoadSequentiallyConsistent();
+  int64_t last = last_activity_time_ms_.load(std::memory_order_seq_cst);
 
   /* initializing or in the middle of something? */
   if (last == 0) {
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index b2d58da..1c4b93e 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -623,7 +623,7 @@
 bool JitCodeCache::IsWeakAccessEnabled(Thread* self) const {
   return kUseReadBarrier
       ? self->GetWeakRefAccessEnabled()
-      : is_weak_access_enabled_.LoadSequentiallyConsistent();
+      : is_weak_access_enabled_.load(std::memory_order_seq_cst);
 }
 
 void JitCodeCache::WaitUntilInlineCacheAccessible(Thread* self) {
@@ -645,13 +645,13 @@
 
 void JitCodeCache::AllowInlineCacheAccess() {
   DCHECK(!kUseReadBarrier);
-  is_weak_access_enabled_.StoreSequentiallyConsistent(true);
+  is_weak_access_enabled_.store(true, std::memory_order_seq_cst);
   BroadcastForInlineCacheAccess();
 }
 
 void JitCodeCache::DisallowInlineCacheAccess() {
   DCHECK(!kUseReadBarrier);
-  is_weak_access_enabled_.StoreSequentiallyConsistent(false);
+  is_weak_access_enabled_.store(false, std::memory_order_seq_cst);
 }
 
 void JitCodeCache::CopyInlineCacheInto(const InlineCache& ic,
@@ -820,7 +820,7 @@
       // code.
       GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
     }
-    last_update_time_ns_.StoreRelease(NanoTime());
+    last_update_time_ns_.store(NanoTime(), std::memory_order_release);
     VLOG(jit)
         << "JIT added (osr=" << std::boolalpha << osr << std::noboolalpha << ") "
         << ArtMethod::PrettyMethod(method) << "@" << method
@@ -1647,7 +1647,7 @@
 }
 
 uint64_t JitCodeCache::GetLastUpdateTimeNs() const {
-  return last_update_time_ns_.LoadAcquire();
+  return last_update_time_ns_.load(std::memory_order_acquire);
 }
 
 bool JitCodeCache::IsOsrCompiled(ArtMethod* method) {
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 3ffedca..7a4876c 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -154,7 +154,7 @@
   GcRoot<mirror::CallSite>& target = GetResolvedCallSites()[call_site_idx];
   Atomic<GcRoot<mirror::CallSite>>& ref =
       reinterpret_cast<Atomic<GcRoot<mirror::CallSite>>&>(target);
-  return ref.LoadSequentiallyConsistent().Read();
+  return ref.load(std::memory_order_seq_cst).Read();
 }
 
 inline CallSite* DexCache::SetResolvedCallSite(uint32_t call_site_idx, CallSite* call_site) {
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 55dd514..c7561f4 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -673,7 +673,7 @@
 inline kSize Object::GetFieldAcquire(MemberOffset field_offset) {
   const uint8_t* raw_addr = reinterpret_cast<const uint8_t*>(this) + field_offset.Int32Value();
   const kSize* addr = reinterpret_cast<const kSize*>(raw_addr);
-  return reinterpret_cast<const Atomic<kSize>*>(addr)->LoadAcquire();
+  return reinterpret_cast<const Atomic<kSize>*>(addr)->load(std::memory_order_acquire);
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
@@ -956,7 +956,7 @@
   uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
-  bool success = atomic_addr->CompareAndExchangeStrongSequentiallyConsistent(&old_ref, new_ref);
+  bool success = atomic_addr->compare_exchange_strong(old_ref, new_ref, std::memory_order_seq_cst);
   ObjPtr<Object> witness_value(PtrCompression<kPoisonHeapReferences, Object>::Decompress(old_ref));
   if (kIsDebugBuild) {
     // Ensure caller has done read barrier on the reference field so it's in the to-space.
@@ -986,7 +986,7 @@
   uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
-  uint32_t old_ref = atomic_addr->ExchangeSequentiallyConsistent(new_ref);
+  uint32_t old_ref = atomic_addr->exchange(new_ref, std::memory_order_seq_cst);
   ObjPtr<Object> old_value(PtrCompression<kPoisonHeapReferences, Object>::Decompress(old_ref));
   if (kIsDebugBuild) {
     // Ensure caller has done read barrier on the reference field so it's in the to-space.
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index f274cfc..0e03e37 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -87,16 +87,18 @@
     DCHECK_ALIGNED(dst_bytes, sizeof(uintptr_t));
     // Use word sized copies to begin.
     while (num_bytes >= sizeof(uintptr_t)) {
-      reinterpret_cast<Atomic<uintptr_t>*>(dst_bytes)->StoreRelaxed(
-          reinterpret_cast<Atomic<uintptr_t>*>(src_bytes)->LoadRelaxed());
+      reinterpret_cast<Atomic<uintptr_t>*>(dst_bytes)->store(
+          reinterpret_cast<Atomic<uintptr_t>*>(src_bytes)->load(std::memory_order_relaxed),
+          std::memory_order_relaxed);
       src_bytes += sizeof(uintptr_t);
       dst_bytes += sizeof(uintptr_t);
       num_bytes -= sizeof(uintptr_t);
     }
     // Copy possible 32 bit word.
     if (sizeof(uintptr_t) != sizeof(uint32_t) && num_bytes >= sizeof(uint32_t)) {
-      reinterpret_cast<Atomic<uint32_t>*>(dst_bytes)->StoreRelaxed(
-          reinterpret_cast<Atomic<uint32_t>*>(src_bytes)->LoadRelaxed());
+      reinterpret_cast<Atomic<uint32_t>*>(dst_bytes)->store(
+          reinterpret_cast<Atomic<uint32_t>*>(src_bytes)->load(std::memory_order_relaxed),
+          std::memory_order_relaxed);
       src_bytes += sizeof(uint32_t);
       dst_bytes += sizeof(uint32_t);
       num_bytes -= sizeof(uint32_t);
@@ -104,8 +106,9 @@
     // Copy remaining bytes, avoid going past the end of num_bytes since there may be a redzone
     // there.
     while (num_bytes > 0) {
-      reinterpret_cast<Atomic<uint8_t>*>(dst_bytes)->StoreRelaxed(
-          reinterpret_cast<Atomic<uint8_t>*>(src_bytes)->LoadRelaxed());
+      reinterpret_cast<Atomic<uint8_t>*>(dst_bytes)->store(
+          reinterpret_cast<Atomic<uint8_t>*>(src_bytes)->load(std::memory_order_relaxed),
+          std::memory_order_relaxed);
       src_bytes += sizeof(uint8_t);
       dst_bytes += sizeof(uint8_t);
       num_bytes -= sizeof(uint8_t);
@@ -173,7 +176,7 @@
 uint32_t Object::GenerateIdentityHashCode() {
   uint32_t expected_value, new_value;
   do {
-    expected_value = hash_code_seed.LoadRelaxed();
+    expected_value = hash_code_seed.load(std::memory_order_relaxed);
     new_value = expected_value * 1103515245 + 12345;
   } while (!hash_code_seed.CompareAndSetWeakRelaxed(expected_value, new_value) ||
       (expected_value & LockWord::kHashMask) == 0);
@@ -181,7 +184,7 @@
 }
 
 void Object::SetHashCodeSeed(uint32_t new_seed) {
-  hash_code_seed.StoreRelaxed(new_seed);
+  hash_code_seed.store(new_seed, std::memory_order_relaxed);
 }
 
 int32_t Object::IdentityHashCode() {
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 95f82cb..d00c90b 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -730,7 +730,7 @@
     uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
     kSize* addr = reinterpret_cast<kSize*>(raw_addr);
     if (kIsVolatile) {
-      reinterpret_cast<Atomic<kSize>*>(addr)->StoreSequentiallyConsistent(new_value);
+      reinterpret_cast<Atomic<kSize>*>(addr)->store(new_value, std::memory_order_seq_cst);
     } else {
       reinterpret_cast<Atomic<kSize>*>(addr)->StoreJavaData(new_value);
     }
@@ -742,7 +742,7 @@
     const uint8_t* raw_addr = reinterpret_cast<const uint8_t*>(this) + field_offset.Int32Value();
     const kSize* addr = reinterpret_cast<const kSize*>(raw_addr);
     if (kIsVolatile) {
-      return reinterpret_cast<const Atomic<kSize>*>(addr)->LoadSequentiallyConsistent();
+      return reinterpret_cast<const Atomic<kSize>*>(addr)->load(std::memory_order_seq_cst);
     } else {
       return reinterpret_cast<const Atomic<kSize>*>(addr)->LoadJavaData();
     }
diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h
index cf1f85d..356fef0 100644
--- a/runtime/mirror/object_reference.h
+++ b/runtime/mirror/object_reference.h
@@ -110,13 +110,13 @@
   template <bool kIsVolatile = false>
   MirrorType* AsMirrorPtr() const REQUIRES_SHARED(Locks::mutator_lock_) {
     return Compression::Decompress(
-        kIsVolatile ? reference_.LoadSequentiallyConsistent() : reference_.LoadJavaData());
+        kIsVolatile ? reference_.load(std::memory_order_seq_cst) : reference_.LoadJavaData());
   }
 
   template <bool kIsVolatile = false>
   void Assign(MirrorType* other) REQUIRES_SHARED(Locks::mutator_lock_) {
     if (kIsVolatile) {
-      reference_.StoreSequentiallyConsistent(Compression::Compress(other));
+      reference_.store(Compression::Compress(other), std::memory_order_seq_cst);
     } else {
       reference_.StoreJavaData(Compression::Compress(other));
     }
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 2a938da..e110763 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -140,7 +140,7 @@
     }
   }
   DCHECK(HasHashCode());
-  return hash_code_.LoadRelaxed();
+  return hash_code_.load(std::memory_order_relaxed);
 }
 
 bool Monitor::Install(Thread* self) {
@@ -155,7 +155,7 @@
       break;
     }
     case LockWord::kHashCode: {
-      CHECK_EQ(hash_code_.LoadRelaxed(), static_cast<int32_t>(lw.GetHashCode()));
+      CHECK_EQ(hash_code_.load(std::memory_order_relaxed), static_cast<int32_t>(lw.GetHashCode()));
       break;
     }
     case LockWord::kFatLocked: {
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 384ebbe..6b7604e 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -130,7 +130,7 @@
   bool IsLocked() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!monitor_lock_);
 
   bool HasHashCode() const {
-    return hash_code_.LoadRelaxed() != 0;
+    return hash_code_.load(std::memory_order_relaxed) != 0;
   }
 
   MonitorId GetMonitorId() const {
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 58f6c04..5035ba0 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -130,7 +130,7 @@
         ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
         // Update the field atomically. This may fail if mutator updates before us, but it's ok.
         if (ref != old_ref) {
-          Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root);
+          Atomic<MirrorType*>* atomic_root = reinterpret_cast<Atomic<MirrorType*>*>(root);
           atomic_root->CompareAndSetStrongRelaxed(old_ref, ref);
         }
       }
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 2f6f50e..e34f32e 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -251,6 +251,7 @@
       union StateAndFlags new_state_and_flags;
       new_state_and_flags.as_int = old_state_and_flags.as_int;
       new_state_and_flags.as_struct.state = kRunnable;
+
       // CAS the value with a memory barrier.
       if (LIKELY(tls32_.state_and_flags.as_atomic_int.CompareAndSetWeakAcquire(
                                                  old_state_and_flags.as_int,
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 5b03c2d..af61115 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1280,7 +1280,7 @@
     AtomicClearFlag(kSuspendRequest);
   } else {
     // Two bits might be set simultaneously.
-    tls32_.state_and_flags.as_atomic_int.FetchAndBitwiseOrSequentiallyConsistent(flags);
+    tls32_.state_and_flags.as_atomic_int.fetch_or(flags, std::memory_order_seq_cst);
     TriggerSuspend();
   }
   return true;
@@ -1318,7 +1318,7 @@
     if (pending_threads != nullptr) {
       bool done = false;
       do {
-        int32_t cur_val = pending_threads->LoadRelaxed();
+        int32_t cur_val = pending_threads->load(std::memory_order_relaxed);
         CHECK_GT(cur_val, 0) << "Unexpected value for PassActiveSuspendBarriers(): " << cur_val;
         // Reduce value by 1.
         done = pending_threads->CompareAndSetWeakRelaxed(cur_val, cur_val - 1);
@@ -1558,7 +1558,7 @@
   Atomic<Closure*>* atomic_func = reinterpret_cast<Atomic<Closure*>*>(&tlsPtr_.flip_function);
   Closure* func;
   do {
-    func = atomic_func->LoadRelaxed();
+    func = atomic_func->load(std::memory_order_relaxed);
     if (func == nullptr) {
       return nullptr;
     }
@@ -1570,7 +1570,7 @@
 void Thread::SetFlipFunction(Closure* function) {
   CHECK(function != nullptr);
   Atomic<Closure*>* atomic_func = reinterpret_cast<Atomic<Closure*>*>(&tlsPtr_.flip_function);
-  atomic_func->StoreSequentiallyConsistent(function);
+  atomic_func->store(function, std::memory_order_seq_cst);
 }
 
 void Thread::FullSuspendCheck() {
@@ -2102,7 +2102,7 @@
                 "art::Thread has a size which is not a multiple of 4.");
   tls32_.state_and_flags.as_struct.flags = 0;
   tls32_.state_and_flags.as_struct.state = kNative;
-  tls32_.interrupted.StoreRelaxed(false);
+  tls32_.interrupted.store(false, std::memory_order_relaxed);
   memset(&tlsPtr_.held_mutexes[0], 0, sizeof(tlsPtr_.held_mutexes));
   std::fill(tlsPtr_.rosalloc_runs,
             tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBracketsInThread,
@@ -2397,24 +2397,24 @@
 bool Thread::Interrupted() {
   DCHECK_EQ(Thread::Current(), this);
   // No other thread can concurrently reset the interrupted flag.
-  bool interrupted = tls32_.interrupted.LoadSequentiallyConsistent();
+  bool interrupted = tls32_.interrupted.load(std::memory_order_seq_cst);
   if (interrupted) {
-    tls32_.interrupted.StoreSequentiallyConsistent(false);
+    tls32_.interrupted.store(false, std::memory_order_seq_cst);
   }
   return interrupted;
 }
 
 // Implements java.lang.Thread.isInterrupted.
 bool Thread::IsInterrupted() {
-  return tls32_.interrupted.LoadSequentiallyConsistent();
+  return tls32_.interrupted.load(std::memory_order_seq_cst);
 }
 
 void Thread::Interrupt(Thread* self) {
   MutexLock mu(self, *wait_mutex_);
-  if (tls32_.interrupted.LoadSequentiallyConsistent()) {
+  if (tls32_.interrupted.load(std::memory_order_seq_cst)) {
     return;
   }
-  tls32_.interrupted.StoreSequentiallyConsistent(true);
+  tls32_.interrupted.store(true, std::memory_order_seq_cst);
   NotifyLocked(self);
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index 6549fc1..a3b0113 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -541,7 +541,7 @@
   bool IsInterrupted();
   void Interrupt(Thread* self) REQUIRES(!*wait_mutex_);
   void SetInterrupted(bool i) {
-    tls32_.interrupted.StoreSequentiallyConsistent(i);
+    tls32_.interrupted.store(i, std::memory_order_seq_cst);
   }
   void Notify() REQUIRES(!*wait_mutex_);
 
@@ -1095,11 +1095,11 @@
   }
 
   void AtomicSetFlag(ThreadFlag flag) {
-    tls32_.state_and_flags.as_atomic_int.FetchAndBitwiseOrSequentiallyConsistent(flag);
+    tls32_.state_and_flags.as_atomic_int.fetch_or(flag, std::memory_order_seq_cst);
   }
 
   void AtomicClearFlag(ThreadFlag flag) {
-    tls32_.state_and_flags.as_atomic_int.FetchAndBitwiseAndSequentiallyConsistent(-1 ^ flag);
+    tls32_.state_and_flags.as_atomic_int.fetch_and(-1 ^ flag, std::memory_order_seq_cst);
   }
 
   void ResetQuickAllocEntryPointsForThread(bool is_marking);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 8095ef5..44af867 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -732,7 +732,7 @@
     if (reason == SuspendReason::kForDebugger) {
       ++debug_suspend_all_count_;
     }
-    pending_threads.StoreRelaxed(list_.size() - num_ignored);
+    pending_threads.store(list_.size() - num_ignored, std::memory_order_relaxed);
     // Increment everybody's suspend count (except those that should be ignored).
     for (const auto& thread : list_) {
       if (thread == ignore1 || thread == ignore2) {
@@ -748,7 +748,7 @@
       if (thread->IsSuspended()) {
         // Only clear the counter for the current thread.
         thread->ClearSuspendBarrier(&pending_threads);
-        pending_threads.FetchAndSubSequentiallyConsistent(1);
+        pending_threads.fetch_sub(1, std::memory_order_seq_cst);
       }
     }
   }
@@ -761,7 +761,7 @@
 #endif
   const uint64_t start_time = NanoTime();
   while (true) {
-    int32_t cur_val = pending_threads.LoadRelaxed();
+    int32_t cur_val = pending_threads.load(std::memory_order_relaxed);
     if (LIKELY(cur_val > 0)) {
 #if ART_USE_FUTEXES
       if (futex(pending_threads.Address(), FUTEX_WAIT, cur_val, &wait_timeout, nullptr, 0) != 0) {
diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc
index 895a108..d784200 100644
--- a/runtime/thread_pool_test.cc
+++ b/runtime/thread_pool_test.cc
@@ -71,7 +71,7 @@
   // Wait for tasks to complete.
   thread_pool.Wait(self, true, false);
   // Make sure that we finished all the work.
-  EXPECT_EQ(num_tasks, count.LoadSequentiallyConsistent());
+  EXPECT_EQ(num_tasks, count.load(std::memory_order_seq_cst));
 }
 
 TEST_F(ThreadPoolTest, StopStart) {
@@ -84,7 +84,7 @@
   }
   usleep(200);
   // Check that no threads started prematurely.
-  EXPECT_EQ(0, count.LoadSequentiallyConsistent());
+  EXPECT_EQ(0, count.load(std::memory_order_seq_cst));
   // Signal the threads to start processing tasks.
   thread_pool.StartWorkers(self);
   usleep(200);
@@ -93,7 +93,7 @@
   thread_pool.AddTask(self, new CountTask(&bad_count));
   usleep(200);
   // Ensure that the task added after the workers were stopped doesn't get run.
-  EXPECT_EQ(0, bad_count.LoadSequentiallyConsistent());
+  EXPECT_EQ(0, bad_count.load(std::memory_order_seq_cst));
   // Allow tasks to finish up and delete themselves.
   thread_pool.StartWorkers(self);
   thread_pool.Wait(self, false, false);
@@ -157,7 +157,7 @@
   thread_pool.AddTask(self, new TreeTask(&thread_pool, &count, depth));
   thread_pool.StartWorkers(self);
   thread_pool.Wait(self, true, false);
-  EXPECT_EQ((1 << depth) - 1, count.LoadSequentiallyConsistent());
+  EXPECT_EQ((1 << depth) - 1, count.load(std::memory_order_seq_cst));
 }
 
 class PeerTask : public Task {
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 91d2b37..bea510a 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -675,7 +675,7 @@
   static_assert(18 <= kMinBufSize, "Minimum buffer size not large enough for trace header");
 
   // Update current offset.
-  cur_offset_.StoreRelaxed(kTraceHeaderLength);
+  cur_offset_.store(kTraceHeaderLength, std::memory_order_relaxed);
 
   if (output_mode == TraceOutputMode::kStreaming) {
     streaming_lock_ = new Mutex("tracing lock", LockLevel::kTracingStreamingLock);
@@ -717,7 +717,7 @@
     // Clean up.
     STLDeleteValues(&seen_methods_);
   } else {
-    final_offset = cur_offset_.LoadRelaxed();
+    final_offset = cur_offset_.load(std::memory_order_relaxed);
     GetVisitedMethods(final_offset, &visited_methods);
   }
 
@@ -944,7 +944,7 @@
 }
 
 void Trace::WriteToBuf(const uint8_t* src, size_t src_size) {
-  int32_t old_offset = cur_offset_.LoadRelaxed();
+  int32_t old_offset = cur_offset_.load(std::memory_order_relaxed);
   int32_t new_offset = old_offset + static_cast<int32_t>(src_size);
   if (dchecked_integral_cast<size_t>(new_offset) > buffer_size_) {
     // Flush buffer.
@@ -957,24 +957,24 @@
       if (!trace_file_->WriteFully(src, src_size)) {
         PLOG(WARNING) << "Failed streaming a tracing event.";
       }
-      cur_offset_.StoreRelease(0);  // Buffer is empty now.
+      cur_offset_.store(0, std::memory_order_release);  // Buffer is empty now.
       return;
     }
 
     old_offset = 0;
     new_offset = static_cast<int32_t>(src_size);
   }
-  cur_offset_.StoreRelease(new_offset);
+  cur_offset_.store(new_offset, std::memory_order_release);
   // Fill in data.
   memcpy(buf_.get() + old_offset, src, src_size);
 }
 
 void Trace::FlushBuf() {
-  int32_t offset = cur_offset_.LoadRelaxed();
+  int32_t offset = cur_offset_.load(std::memory_order_relaxed);
   if (!trace_file_->WriteFully(buf_.get(), offset)) {
     PLOG(WARNING) << "Failed flush the remaining data in streaming.";
   }
-  cur_offset_.StoreRelease(0);
+  cur_offset_.store(0, std::memory_order_release);
 }
 
 void Trace::LogMethodTraceEvent(Thread* thread, ArtMethod* method,
@@ -990,7 +990,7 @@
   // We do a busy loop here trying to acquire the next offset.
   if (trace_output_mode_ != TraceOutputMode::kStreaming) {
     do {
-      old_offset = cur_offset_.LoadRelaxed();
+      old_offset = cur_offset_.load(std::memory_order_relaxed);
       new_offset = old_offset + GetRecordSize(clock_source_);
       if (static_cast<size_t>(new_offset) > buffer_size_) {
         overflow_ = true;