Revert "Add spin loop to mutex, overhaul monitor"

This reverts commit 0d508a01106746e0d8865752850f4f03bcce1e01.

Reason for revert: Preparing revert, in case the art/ci/host-x86_64-cdex-fast failure recurs.

Bug: 140590186

Change-Id: I404b6ee498ff4eda73ec3b8fc4bf8e92efb2705f
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index abae75d..0d5ce15 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -82,27 +82,6 @@
   }
 }
 
-// Wait until pred(testLoc->load(std::memory_order_relaxed)) holds, or until a
-// short time interval, on the order of kernel context-switch time, passes.
-// Return true if the predicate test succeeded, false if we timed out.
-template<typename Pred>
-static inline bool WaitBrieflyFor(AtomicInteger* testLoc, Pred pred) {
-  // TODO: Tune these parameters correctly. BackOff(3) should take on the order of 100 cycles. So
-  // this should result in retrying <= 10 times, usually waiting around 100 cycles each. The
-  // maximum delay should be significantly less than the expected futex() context switch time, so
-  // there should be little danger of this worsening things appreciably. If the lock was only
-  // held briefly by a running thread, this should help immensely.
-  static constexpr uint32_t kMaxBackOff = 3;  // Should probably be <= kSpinMax above.
-  static constexpr uint32_t kMaxIters = 50;
-  for (uint32_t i = 1; i <= kMaxIters; ++i) {
-    BackOff(std::min(i, kMaxBackOff));
-    if (pred(testLoc->load(std::memory_order_relaxed))) {
-      return true;
-    }
-  }
-  return false;
-}
-
 class ScopedAllMutexesLock final {
  public:
   explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
@@ -402,31 +381,24 @@
       } else {
         // Failed to acquire, hang up.
         ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
-        if (!WaitBrieflyFor(&state_and_contenders_,
-            [](int32_t v) { return (v & kHeldMask) == 0; })) {
-          // Increment contender count. We can't create enough threads for this to overflow.
-          increment_contenders();
-          // Make cur_state again reflect the expected value of state_and_contenders.
-          cur_state += kContenderIncrement;
-          if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
-            self->CheckEmptyCheckpointFromMutex();
-          }
-          do {
-            if (futex(state_and_contenders_.Address(), FUTEX_WAIT_PRIVATE, cur_state,
-                      nullptr, nullptr, 0) != 0) {
-              // We only went to sleep after incrementing and contenders and checking that the
-              // lock is still held by someone else.  EAGAIN and EINTR both indicate a spurious
-              // failure, try again from the beginning.  We don't use TEMP_FAILURE_RETRY so we can
-              // intentionally retry to acquire the lock.
-              if ((errno != EAGAIN) && (errno != EINTR)) {
-                PLOG(FATAL) << "futex wait failed for " << name_;
-              }
-            }
-            // Retry until not held. In heavy contention situations we otherwise get redundant
-            // futex wakeups as a result of repeatedly decrementing and incrementing contenders.
-          } while ((state_and_contenders_.load(std::memory_order_relaxed) & kHeldMask) != 0);
-          decrement_contenders();
+        // Increment contender count. We can't create enough threads for this to overflow.
+        increment_contenders();
+        // Make cur_state again reflect the expected value of state_and_contenders.
+        cur_state += kContenderIncrement;
+        if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
+          self->CheckEmptyCheckpointFromMutex();
         }
+        if (futex(state_and_contenders_.Address(), FUTEX_WAIT_PRIVATE, cur_state,
+                  nullptr, nullptr, 0) != 0) {
+          // We only went to sleep after incrementing and contenders and checking that the lock
+          // is still held by someone else.
+          // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
+          // We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
+          if ((errno != EAGAIN) && (errno != EINTR)) {
+            PLOG(FATAL) << "futex wait failed for " << name_;
+          }
+        }
+        decrement_contenders();
       }
     } while (!done);
     // Confirm that lock is now held.
@@ -487,50 +459,6 @@
   return true;
 }
 
-bool Mutex::ExclusiveTryLockWithSpinning(Thread* self) {
-  // Spin a small number of times, since this affects our ability to respond to suspension
-  // requests. We spin repeatedly only if the mutex repeatedly becomes available and unavailable
-  // in rapid succession, and then we will typically not spin for the maximal period.
-  const int max_spins = 5;
-  for (int i = 0; i < max_spins; ++i) {
-    if (ExclusiveTryLock(self)) {
-      return true;
-    }
-#if ART_USE_FUTEXES
-    if (!WaitBrieflyFor(&state_and_contenders_,
-            [](int32_t v) { return (v & kHeldMask) == 0; })) {
-      return false;
-    }
-#endif
-  }
-  return ExclusiveTryLock(self);
-}
-
-#if ART_USE_FUTEXES
-void Mutex::ExclusiveLockUncontendedFor(Thread* new_owner, unsigned int n) {
-  DCHECK_EQ(GetDepth(), 0u);
-  DCHECK(recursive_ || n == 1);
-  DCHECK_GE(n, 1u);
-  DCHECK_EQ(level_, kMonitorLock);
-  state_and_contenders_.store(kHeldMask, std::memory_order_relaxed);
-  recursion_count_ = n;
-  exclusive_owner_.store(SafeGetTid(new_owner), std::memory_order_relaxed);
-  // Don't call RegisterAsLocked(). It wouldn't register anything anyway.  And
-  // this happens as we're inflating a monitor, which doesn't logically affect
-  // held "locks"; it effectively just converts a thin lock to a mutex.  By doing
-  // this while the lock is already held, we're delaying the acquisition of a
-  // logically held mutex, which can introduce bogus lock order violations.
-}
-
-void Mutex::ExclusiveUnlockUncontended() {
-  DCHECK_EQ(level_, kMonitorLock);
-  state_and_contenders_.store(0, std::memory_order_relaxed);
-  recursion_count_ = 0;
-  exclusive_owner_.store(0 /* pid */, std::memory_order_relaxed);
-  // Skip RegisterAsUnlocked(), which wouldn't do anything anyway.
-}
-#endif  // ART_USE_FUTEXES
-
 void Mutex::ExclusiveUnlock(Thread* self) {
   if (kIsDebugBuild && self != nullptr && self != Thread::Current()) {
     std::string name1 = "<null>";
@@ -661,20 +589,18 @@
     } else {
       // Failed to acquire, hang up.
       ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
-      if (!WaitBrieflyFor(&state_, [](int32_t v) { return v == 0; })) {
-        num_contenders_.fetch_add(1);
-        if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
-          self->CheckEmptyCheckpointFromMutex();
-        }
-        if (futex(state_.Address(), FUTEX_WAIT_PRIVATE, cur_state, nullptr, nullptr, 0) != 0) {
-          // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
-          // We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
-          if ((errno != EAGAIN) && (errno != EINTR)) {
-            PLOG(FATAL) << "futex wait failed for " << name_;
-          }
-        }
-        num_contenders_.fetch_sub(1);
+      num_contenders_.fetch_add(1);
+      if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
+        self->CheckEmptyCheckpointFromMutex();
       }
+      if (futex(state_.Address(), FUTEX_WAIT_PRIVATE, cur_state, nullptr, nullptr, 0) != 0) {
+        // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
+        // We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
+        if ((errno != EAGAIN) && (errno != EINTR)) {
+          PLOG(FATAL) << "futex wait failed for " << name_;
+        }
+      }
+      num_contenders_.fetch_sub(1);
     }
   } while (!done);
   DCHECK_EQ(state_.load(std::memory_order_relaxed), -1);
@@ -739,24 +665,22 @@
         return false;  // Timed out.
       }
       ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
-      if (!WaitBrieflyFor(&state_, [](int32_t v) { return v == 0; })) {
-        num_contenders_.fetch_add(1);
-        if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
-          self->CheckEmptyCheckpointFromMutex();
-        }
-        if (futex(state_.Address(), FUTEX_WAIT_PRIVATE, cur_state, &rel_ts, nullptr, 0) != 0) {
-          if (errno == ETIMEDOUT) {
-            num_contenders_.fetch_sub(1);
-            return false;  // Timed out.
-          } else if ((errno != EAGAIN) && (errno != EINTR)) {
-            // EAGAIN and EINTR both indicate a spurious failure,
-            // recompute the relative time out from now and try again.
-            // We don't use TEMP_FAILURE_RETRY so we can recompute rel_ts;
-            PLOG(FATAL) << "timed futex wait failed for " << name_;
-          }
-        }
-        num_contenders_.fetch_sub(1);
+      num_contenders_.fetch_add(1);
+      if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
+        self->CheckEmptyCheckpointFromMutex();
       }
+      if (futex(state_.Address(), FUTEX_WAIT_PRIVATE, cur_state, &rel_ts, nullptr, 0) != 0) {
+        if (errno == ETIMEDOUT) {
+          num_contenders_.fetch_sub(1);
+          return false;  // Timed out.
+        } else if ((errno != EAGAIN) && (errno != EINTR)) {
+          // EAGAIN and EINTR both indicate a spurious failure,
+          // recompute the relative time out from now and try again.
+          // We don't use TEMP_FAILURE_RETRY so we can recompute rel_ts;
+          PLOG(FATAL) << "timed futex wait failed for " << name_;
+        }
+      }
+      num_contenders_.fetch_sub(1);
     }
   } while (!done);
 #else
@@ -782,18 +706,16 @@
 void ReaderWriterMutex::HandleSharedLockContention(Thread* self, int32_t cur_state) {
   // Owner holds it exclusively, hang up.
   ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
-  if (!WaitBrieflyFor(&state_, [](int32_t v) { return v >= 0; })) {
-    num_contenders_.fetch_add(1);
-    if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
-      self->CheckEmptyCheckpointFromMutex();
-    }
-    if (futex(state_.Address(), FUTEX_WAIT_PRIVATE, cur_state, nullptr, nullptr, 0) != 0) {
-      if (errno != EAGAIN && errno != EINTR) {
-        PLOG(FATAL) << "futex wait failed for " << name_;
-      }
-    }
-    num_contenders_.fetch_sub(1);
+  num_contenders_.fetch_add(1);
+  if (UNLIKELY(should_respond_to_empty_checkpoint_request_)) {
+    self->CheckEmptyCheckpointFromMutex();
   }
+  if (futex(state_.Address(), FUTEX_WAIT_PRIVATE, cur_state, nullptr, nullptr, 0) != 0) {
+    if (errno != EAGAIN && errno != EINTR) {
+      PLOG(FATAL) << "futex wait failed for " << name_;
+    }
+  }
+  num_contenders_.fetch_sub(1);
 }
 #endif
 
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index b889660..136e17a 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -57,7 +57,7 @@
 constexpr bool kDebugLocking = kIsDebugBuild;
 
 // Record Log contention information, dumpable via SIGQUIT.
-#if ART_USE_FUTEXES
+#ifdef ART_USE_FUTEXES
 // To enable lock contention logging, set this to true.
 constexpr bool kLogLockContentions = false;
 // FUTEX_WAKE first argument:
@@ -102,11 +102,7 @@
 
   BaseMutex(const char* name, LockLevel level);
   virtual ~BaseMutex();
-
-  // Add this mutex to those owned by self, and perform appropriate checking.
-  // For this call only, self may also be another suspended thread.
   void RegisterAsLocked(Thread* self);
-
   void RegisterAsUnlocked(Thread* self);
   void CheckSafeToWait(Thread* self);
 
@@ -177,8 +173,6 @@
   // Returns true if acquires exclusive access, false otherwise.
   bool ExclusiveTryLock(Thread* self) TRY_ACQUIRE(true);
   bool TryLock(Thread* self) TRY_ACQUIRE(true) { return ExclusiveTryLock(self); }
-  // Equivalent to ExclusiveTryLock, but retry for a short period before giving up.
-  bool ExclusiveTryLockWithSpinning(Thread* self) TRY_ACQUIRE(true);
 
   // Release exclusive access.
   void ExclusiveUnlock(Thread* self) RELEASE();
@@ -206,9 +200,7 @@
   // whether we hold the lock; any other information may be invalidated before we return.
   pid_t GetExclusiveOwnerTid() const;
 
-  // Returns how many times this Mutex has been locked, it is typically better to use
-  // AssertHeld/NotHeld. For a simply held mutex this method returns 1. Should only be called
-  // while holding the mutex or threads are suspended.
+  // Returns how many times this Mutex has been locked, it is better to use AssertHeld/NotHeld.
   unsigned int GetDepth() const {
     return recursion_count_;
   }
@@ -220,18 +212,6 @@
 
   void WakeupToRespondToEmptyCheckpoint() override;
 
-#if ART_USE_FUTEXES
-  // Acquire the mutex n times, possibly on behalf of another thread. Acquisition must be
-  // uncontended. New_owner must be current thread or suspended.
-  // n must be >= 1. Mutex must be at level kMonitorLock.
-  // Not implementable for the pthreads version, so we must avoid calling it there.
-  void ExclusiveLockUncontendedFor(Thread* new_owner, unsigned int n);
-
-  // Undo the effect of the previous calling, setting the mutex back to unheld.
-  // Still assumes no concurrent access.
-  void ExclusiveUnlockUncontended();
-#endif  // ART_USE_FUTEXES
-
  private:
 #if ART_USE_FUTEXES
   // Low order bit: 0 is unheld, 1 is held.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index ba2ae86..85b79da 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -2266,12 +2266,6 @@
                                 std::memory_order_release);
 }
 
-#pragma clang diagnostic push
-#if !ART_USE_FUTEXES
-// Frame gets too large, perhaps due to Bionic pthread_mutex_lock size. We don't care.
-#  pragma clang diagnostic ignored "-Wframe-larger-than="
-#endif
-// This has a large frame, but shouldn't be run anywhere near the stack limit.
 void Heap::PreZygoteFork() {
   if (!HasZygoteSpace()) {
     // We still want to GC in case there is some unreachable non moving objects that could cause a
@@ -2432,7 +2426,6 @@
     AddRememberedSet(post_zygote_non_moving_space_rem_set);
   }
 }
-#pragma clang diagnostic pop
 
 void Heap::FlushAllocStack() {
   MarkAllocStackAsLive(allocation_stack_.get());
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 30559a0..ac7890c 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -42,7 +42,6 @@
  *  |10|9|8|765432109876|5432109876543210|
  *  |00|m|r| lock count |thread id owner |
  *
- * The lock count is zero, but the owner is nonzero for a simply held lock.
  * When the lock word is in the "fat" state and its bits are formatted as follows:
  *
  *  |33|2|2|2222222211111111110000000000|
@@ -73,8 +72,7 @@
     kMarkBitStateSize = 1,
     // Number of bits to encode the thin lock owner.
     kThinLockOwnerSize = 16,
-    // Remaining bits are the recursive lock count. Zero means it is locked exactly once
-    // and not recursively.
+    // Remaining bits are the recursive lock count.
     kThinLockCountSize = 32 - kThinLockOwnerSize - kStateSize - kReadBarrierStateSize -
         kMarkBitStateSize,
 
@@ -236,8 +234,7 @@
   // Return the owner thin lock thread id.
   uint32_t ThinLockOwner() const;
 
-  // Return the number of times a lock value has been re-locked. Only valid in thin-locked state.
-  // If the lock is held only once the return value is zero.
+  // Return the number of times a lock value has been locked.
   uint32_t ThinLockCount() const;
 
   // Return the Monitor encoded in a fat lock.
diff --git a/runtime/monitor-inl.h b/runtime/monitor-inl.h
index 2a6feda..e8ffafa 100644
--- a/runtime/monitor-inl.h
+++ b/runtime/monitor-inl.h
@@ -29,74 +29,6 @@
   return obj_.Read<kReadBarrierOption>();
 }
 
-// Lock monitor lock n more times.
-void Monitor::LockMonitorLock(Thread* thread, int n) NO_THREAD_SAFETY_ANALYSIS {
-  // Since this only adjusts the number of times a lock is held, we pretend it
-  // doesn't acquire any locks.
-  // The expected value of n is zero; the obvious inefficiency doesn't matter.
-  for (int i = 0; i < n; ++i) {
-    monitor_lock_.Lock(thread);
-  }
-}
-
-// Unlock monitor n times, but not completely.
-void Monitor::UnlockMonitorLock(Thread* thread, int n) NO_THREAD_SAFETY_ANALYSIS {
-  // We lie about locking behavior as in UnlockMonitorLock().
-  for (int i = 0; i < n; ++i) {
-    monitor_lock_.Unlock(thread);
-  }
-}
-
-// Check for request to set lock owner info.
-void Monitor::CheckLockOwnerRequest(Thread* self) {
-  uint32_t request_tid = lock_owner_request_.load(std::memory_order_relaxed);
-  if (request_tid != 0 && request_tid == self->GetThreadId()) {
-    SetLockingMethod(self);
-    // Only do this the first time after a request.
-    lock_owner_request_.store(0, std::memory_order_relaxed);
-  }
-}
-
-uintptr_t Monitor::LockOwnerInfoChecksum(ArtMethod* m, uint32_t dex_pc, uint32_t thread_id) {
-  uintptr_t dpc_and_thread_id = static_cast<uintptr_t>((dex_pc << 8) ^ thread_id);
-  return reinterpret_cast<uintptr_t>(m) ^ dpc_and_thread_id
-      ^ (dpc_and_thread_id << (/* ptr_size / 2 */ (sizeof m) << 2));
-}
-
-void Monitor::SetLockOwnerInfo(ArtMethod* method, uint32_t dex_pc, uint32_t thread_id) {
-  lock_owner_method_.store(method, std::memory_order_relaxed);
-  lock_owner_dex_pc_.store(dex_pc, std::memory_order_relaxed);
-  lock_owner_thread_id_.store(thread_id, std::memory_order_relaxed);
-  uintptr_t sum = LockOwnerInfoChecksum(method, dex_pc, thread_id);
-  lock_owner_sum_.store(sum, std::memory_order_relaxed);
-}
-
-void Monitor::GetLockOwnerInfo(/*out*/ArtMethod** method, /*out*/uint32_t* dex_pc,
-                               uint32_t thread_id) {
-  ArtMethod* owners_method;
-  uint32_t owners_dex_pc;
-  uint32_t owners_thread_id;
-  uintptr_t owners_sum;
-  DCHECK_NE(thread_id, 0u);
-  do {
-    owners_thread_id = lock_owner_thread_id_.load(std::memory_order_relaxed);
-    if (owners_thread_id == 0u) {
-      break;
-    }
-    owners_method = lock_owner_method_.load(std::memory_order_relaxed);
-    owners_dex_pc = lock_owner_dex_pc_.load(std::memory_order_relaxed);
-    owners_sum = lock_owner_sum_.load(std::memory_order_relaxed);
-  } while (owners_sum != LockOwnerInfoChecksum(owners_method, owners_dex_pc, owners_thread_id));
-  if (owners_thread_id == thread_id) {
-    *method = owners_method;
-    *dex_pc = owners_dex_pc;
-  } else {
-    *method = nullptr;
-    *dex_pc = 0;
-  }
-}
-
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_MONITOR_INL_H_
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index b82296e..9d114ed 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -52,8 +52,7 @@
 /*
  * Every Object has a monitor associated with it, but not every Object is actually locked.  Even
  * the ones that are locked do not need a full-fledged monitor until a) there is actual contention
- * or b) wait() is called on the Object, or (c) we need to lock an object that also has an
- * identity hashcode.
+ * or b) wait() is called on the Object.
  *
  * For Android, we have implemented a scheme similar to the one described in Bacon et al.'s
  * "Thin locks: featherweight synchronization for Java" (ACM 1998).  Things are even easier for us,
@@ -91,15 +90,17 @@
 }
 
 Monitor::Monitor(Thread* self, Thread* owner, ObjPtr<mirror::Object> obj, int32_t hash_code)
-    : monitor_lock_("a monitor lock", kMonitorLock, /*recursive=*/ true),
+    : monitor_lock_("a monitor lock", kMonitorLock),
+      monitor_contenders_("monitor contenders", monitor_lock_),
       num_waiters_(0),
       owner_(owner),
+      lock_count_(0),
       obj_(GcRoot<mirror::Object>(obj)),
       wait_set_(nullptr),
       wake_set_(nullptr),
       hash_code_(hash_code),
-      lock_owner_method_(nullptr),
-      lock_owner_dex_pc_(0),
+      locking_method_(nullptr),
+      locking_dex_pc_(0),
       monitor_id_(MonitorPool::ComputeMonitorId(this, self)) {
 #ifdef __LP64__
   DCHECK(false) << "Should not be reached in 64b";
@@ -116,15 +117,17 @@
                  ObjPtr<mirror::Object> obj,
                  int32_t hash_code,
                  MonitorId id)
-    : monitor_lock_("a monitor lock", kMonitorLock, /*recursive=*/ true),
+    : monitor_lock_("a monitor lock", kMonitorLock),
+      monitor_contenders_("monitor contenders", monitor_lock_),
       num_waiters_(0),
       owner_(owner),
+      lock_count_(0),
       obj_(GcRoot<mirror::Object>(obj)),
       wait_set_(nullptr),
       wake_set_(nullptr),
       hash_code_(hash_code),
-      lock_owner_thread_id_(0),
-      lock_owner_request_(0),
+      locking_method_(nullptr),
+      locking_dex_pc_(0),
       monitor_id_(id) {
 #ifdef __LP64__
   next_free_ = nullptr;
@@ -147,105 +150,20 @@
   return hc;
 }
 
-void Monitor::SetLockingMethod(Thread* owner) {
-  DCHECK(owner == Thread::Current() || owner->IsSuspended());
-  // Do not abort on dex pc errors. This can easily happen when we want to dump a stack trace on
-  // abort.
-  ArtMethod* lock_owner_method;
-  uint32_t lock_owner_dex_pc;
-  lock_owner_method = owner->GetCurrentMethod(&lock_owner_dex_pc, false);
-  if (lock_owner_method != nullptr && UNLIKELY(lock_owner_method->IsProxyMethod())) {
-    // Grab another frame. Proxy methods are not helpful for lock profiling. This should be rare
-    // enough that it's OK to walk the stack twice.
-    struct NextMethodVisitor final : public StackVisitor {
-      explicit NextMethodVisitor(Thread* thread) REQUIRES_SHARED(Locks::mutator_lock_)
-          : StackVisitor(thread,
-                         nullptr,
-                         StackVisitor::StackWalkKind::kIncludeInlinedFrames,
-                         false),
-            count_(0),
-            method_(nullptr),
-            dex_pc_(0) {}
-      bool VisitFrame() override REQUIRES_SHARED(Locks::mutator_lock_) {
-        ArtMethod* m = GetMethod();
-        if (m->IsRuntimeMethod()) {
-          // Continue if this is a runtime method.
-          return true;
-        }
-        count_++;
-        if (count_ == 2u) {
-          method_ = m;
-          dex_pc_ = GetDexPc(false);
-          return false;
-        }
-        return true;
-      }
-      size_t count_;
-      ArtMethod* method_;
-      uint32_t dex_pc_;
-    };
-    NextMethodVisitor nmv(owner_.load(std::memory_order_relaxed));
-    nmv.WalkStack();
-    lock_owner_method = nmv.method_;
-    lock_owner_dex_pc = nmv.dex_pc_;
-  }
-  SetLockOwnerInfo(lock_owner_method, lock_owner_dex_pc, owner->GetThreadId());
-  DCHECK(lock_owner_method == nullptr || !lock_owner_method->IsProxyMethod());
-}
-
-void Monitor::SetLockingMethodNoProxy(Thread *owner) {
-  DCHECK(owner == Thread::Current());
-  uint32_t lock_owner_dex_pc;
-  ArtMethod* lock_owner_method = owner->GetCurrentMethod(&lock_owner_dex_pc);
-  // We don't expect a proxy method here.
-  DCHECK(lock_owner_method == nullptr || !lock_owner_method->IsProxyMethod());
-  SetLockOwnerInfo(lock_owner_method, lock_owner_dex_pc, owner->GetThreadId());
-}
-
-bool Monitor::Install(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
-  // This may or may not result in acquiring monitor_lock_. Its behavior is much more complicated
-  // than what clang thread safety analysis understands.
-  // Monitor is not yet public.
-  Thread* owner = owner_.load(std::memory_order_relaxed);
-  CHECK(owner == nullptr || owner == self || (ART_USE_FUTEXES && owner->IsSuspended()));
+bool Monitor::Install(Thread* self) {
+  MutexLock mu(self, monitor_lock_);  // Uncontended mutex acquisition as monitor isn't yet public.
+  CHECK(owner_ == nullptr || owner_ == self || owner_->IsSuspended());
   // Propagate the lock state.
   LockWord lw(GetObject()->GetLockWord(false));
   switch (lw.GetState()) {
     case LockWord::kThinLocked: {
-      DCHECK(owner != nullptr);
-      CHECK_EQ(owner->GetThreadId(), lw.ThinLockOwner());
-      uint32_t lockCount = lw.ThinLockCount();
-      // lockCount is 0 for a simply held lock.
-#if ART_USE_FUTEXES
-      monitor_lock_.ExclusiveLockUncontendedFor(owner, lockCount + 1);
-#else
-      for (uint32_t i = 0; i <= lockCount; ++i) {
-        monitor_lock_.ExclusiveLock(owner);
-      }
-#endif
-      LockWord fat(this, lw.GCState());
-      // Publish the updated lock word, which may race with other threads.
-      bool success = GetObject()->CasLockWord(lw, fat, CASMode::kWeak, std::memory_order_release);
-      if (success) {
-        if (ATraceEnabled()) {
-          SetLockingMethod(owner);
-        }
-        return true;
-      } else {
-#if ART_USE_FUTEXES
-        monitor_lock_.ExclusiveUnlockUncontended();
-#else
-        for (uint32_t i = 0; i <= lockCount; ++i) {
-          monitor_lock_.ExclusiveUnlock(owner);
-        }
-#endif
-        return false;
-      }
+      CHECK_EQ(owner_->GetThreadId(), lw.ThinLockOwner());
+      lock_count_ = lw.ThinLockCount();
+      break;
     }
     case LockWord::kHashCode: {
       CHECK_EQ(hash_code_.load(std::memory_order_relaxed), static_cast<int32_t>(lw.GetHashCode()));
-      LockWord fat(this, lw.GCState());
-      return GetObject()->CasLockWord(lw, fat, CASMode::kWeak, std::memory_order_release);
+      break;
     }
     case LockWord::kFatLocked: {
       // The owner_ is suspended but another thread beat us to install a monitor.
@@ -260,6 +178,52 @@
       UNREACHABLE();
     }
   }
+  LockWord fat(this, lw.GCState());
+  // Publish the updated lock word, which may race with other threads.
+  bool success = GetObject()->CasLockWord(lw, fat, CASMode::kWeak, std::memory_order_release);
+  // Lock profiling.
+  if (success && owner_ != nullptr && lock_profiling_threshold_ != 0) {
+    // Do not abort on dex pc errors. This can easily happen when we want to dump a stack trace on
+    // abort.
+    locking_method_ = owner_->GetCurrentMethod(&locking_dex_pc_, false);
+    if (locking_method_ != nullptr && UNLIKELY(locking_method_->IsProxyMethod())) {
+      // Grab another frame. Proxy methods are not helpful for lock profiling. This should be rare
+      // enough that it's OK to walk the stack twice.
+      struct NextMethodVisitor final : public StackVisitor {
+        explicit NextMethodVisitor(Thread* thread) REQUIRES_SHARED(Locks::mutator_lock_)
+            : StackVisitor(thread,
+                           nullptr,
+                           StackVisitor::StackWalkKind::kIncludeInlinedFrames,
+                           false),
+              count_(0),
+              method_(nullptr),
+              dex_pc_(0) {}
+        bool VisitFrame() override REQUIRES_SHARED(Locks::mutator_lock_) {
+          ArtMethod* m = GetMethod();
+          if (m->IsRuntimeMethod()) {
+            // Continue if this is a runtime method.
+            return true;
+          }
+          count_++;
+          if (count_ == 2u) {
+            method_ = m;
+            dex_pc_ = GetDexPc(false);
+            return false;
+          }
+          return true;
+        }
+        size_t count_;
+        ArtMethod* method_;
+        uint32_t dex_pc_;
+      };
+      NextMethodVisitor nmv(owner_);
+      nmv.WalkStack();
+      locking_method_ = nmv.method_;
+      locking_dex_pc_ = nmv.dex_pc_;
+    }
+    DCHECK(locking_method_ == nullptr || !locking_method_->IsProxyMethod());
+  }
+  return success;
 }
 
 Monitor::~Monitor() {
@@ -407,222 +371,226 @@
   return oss.str();
 }
 
-bool Monitor::TryLock(Thread* self, bool spin) {
-  // The monitor_lock_ acquisition handles the recursive case.
-  bool success = spin ? monitor_lock_.ExclusiveTryLockWithSpinning(self)
-      : monitor_lock_.ExclusiveTryLock(self);
-  if (success) {
-    Thread * owner = owner_.load(std::memory_order_relaxed);
-    if (owner == nullptr) {  // Unowned.
-      owner_.store(self, std::memory_order_relaxed);
-      DCHECK_EQ(monitor_lock_.GetDepth(), 1u);
-      if (ATraceEnabled()) {
-        SetLockingMethodNoProxy(self);
-      }
-    } else {
-      DCHECK_EQ(owner, self);
+bool Monitor::TryLockLocked(Thread* self) {
+  if (owner_ == nullptr) {  // Unowned.
+    owner_ = self;
+    CHECK_EQ(lock_count_, 0);
+    // When debugging, save the current monitor holder for future
+    // acquisition failures to use in sampled logging.
+    if (lock_profiling_threshold_ != 0) {
+      locking_method_ = self->GetCurrentMethod(&locking_dex_pc_);
+      // We don't expect a proxy method here.
+      DCHECK(locking_method_ == nullptr || !locking_method_->IsProxyMethod());
     }
-    AtraceMonitorLock(self, GetObject(), /* is_wait= */ false);
-    DCHECK(monitor_lock_.IsExclusiveHeld(self));
+  } else if (owner_ == self) {  // Recursive.
+    lock_count_++;
+  } else {
+    return false;
   }
-  return success;
+  AtraceMonitorLock(self, GetObject(), /* is_wait= */ false);
+  return true;
 }
 
+bool Monitor::TryLock(Thread* self) {
+  MutexLock mu(self, monitor_lock_);
+  return TryLockLocked(self);
+}
+
+// Asserts that a mutex isn't held when the class comes into and out of scope.
+class ScopedAssertNotHeld {
+ public:
+  ScopedAssertNotHeld(Thread* self, Mutex& mu) : self_(self), mu_(mu) {
+    mu_.AssertNotHeld(self_);
+  }
+
+  ~ScopedAssertNotHeld() {
+    mu_.AssertNotHeld(self_);
+  }
+
+ private:
+  Thread* const self_;
+  Mutex& mu_;
+  DISALLOW_COPY_AND_ASSIGN(ScopedAssertNotHeld);
+};
+
 template <LockReason reason>
 void Monitor::Lock(Thread* self) {
+  ScopedAssertNotHeld sanh(self, monitor_lock_);
   bool called_monitors_callback = false;
-  if (TryLock(self, /*spin=*/ true)) {
-    // TODO: This preserves original behavior. Correct?
-    if (called_monitors_callback) {
-      CHECK(reason == LockReason::kForLock);
-      Runtime::Current()->GetRuntimeCallbacks()->MonitorContendedLocked(this);
+  monitor_lock_.Lock(self);
+  while (true) {
+    if (TryLockLocked(self)) {
+      break;
     }
-    return;
-  }
-  // Contended. We hold no locks, so tread carefully.
-  const bool log_contention = (lock_profiling_threshold_ != 0);
-  uint64_t wait_start_ms = log_contention ? MilliTime() : 0;
+    // Contended.
+    const bool log_contention = (lock_profiling_threshold_ != 0);
+    uint64_t wait_start_ms = log_contention ? MilliTime() : 0;
+    ArtMethod* owners_method = locking_method_;
+    uint32_t owners_dex_pc = locking_dex_pc_;
+    // Do this before releasing the lock so that we don't get deflated.
+    size_t num_waiters = num_waiters_;
+    ++num_waiters_;
 
-  Thread* orig_owner = owner_.load(std::memory_order_relaxed);
-  const uint32_t orig_owner_thread_id = orig_owner == nullptr? 0u : orig_owner->GetThreadId();
-  ArtMethod* owners_method;
-  uint32_t owners_dex_pc;
-
-  // Do this before releasing the mutator lock so that we don't get deflated.
-  size_t num_waiters = num_waiters_.fetch_add(1, std::memory_order_relaxed);
-
-  bool started_trace = false;
-  if (ATraceEnabled()) {
-    if (orig_owner != nullptr) {  // Did the owner_ give the lock up?
-      GetLockOwnerInfo(&owners_method, &owners_dex_pc, orig_owner_thread_id);
-      // Acquiring thread_list_lock_ ensures that owner doesn't disappear while
-      // we're looking at it.
-      Locks::thread_list_lock_->ExclusiveLock(self);
-      std::ostringstream oss;
-      std::string name;
-      orig_owner->GetThreadName(name);
-      oss << PrettyContentionInfo(name,
-                                  orig_owner_thread_id,
-                                  owners_method,
-                                  owners_dex_pc,
-                                  num_waiters);
-      Locks::thread_list_lock_->ExclusiveUnlock(self);
-      // Add info for contending thread.
-      uint32_t pc;
-      ArtMethod* m = self->GetCurrentMethod(&pc);
-      const char* filename;
-      int32_t line_number;
-      TranslateLocation(m, pc, &filename, &line_number);
-      oss << " blocking from "
-          << ArtMethod::PrettyMethod(m) << "(" << (filename != nullptr ? filename : "null")
-          << ":" << line_number << ")";
-      ATraceBegin(oss.str().c_str());
-      started_trace = true;
+    // If systrace logging is enabled, first look at the lock owner. Acquiring the monitor's
+    // lock and then re-acquiring the mutator lock can deadlock.
+    bool started_trace = false;
+    if (ATraceEnabled()) {
+      if (owner_ != nullptr) {  // Did the owner_ give the lock up?
+        std::ostringstream oss;
+        std::string name;
+        owner_->GetThreadName(name);
+        oss << PrettyContentionInfo(name,
+                                    owner_->GetTid(),
+                                    owners_method,
+                                    owners_dex_pc,
+                                    num_waiters);
+        // Add info for contending thread.
+        uint32_t pc;
+        ArtMethod* m = self->GetCurrentMethod(&pc);
+        const char* filename;
+        int32_t line_number;
+        TranslateLocation(m, pc, &filename, &line_number);
+        oss << " blocking from "
+            << ArtMethod::PrettyMethod(m) << "(" << (filename != nullptr ? filename : "null")
+            << ":" << line_number << ")";
+        ATraceBegin(oss.str().c_str());
+        started_trace = true;
+      }
     }
-  }
-  // Call the contended locking cb once and only once. Also only call it if we are locking for
-  // the first time, not during a Wait wakeup.
-  if (reason == LockReason::kForLock && !called_monitors_callback) {
-    called_monitors_callback = true;
-    Runtime::Current()->GetRuntimeCallbacks()->MonitorContendedLocking(this);
-  }
-  if (log_contention) {
-    // Request the current holder to set lock_owner_info.
-    // Do this even if tracing is enabled, so we semi-consistently get the information
-    // corresponding to MonitorExit.
-    // TODO: Consider optionally obtaining a stack trace here via a checkpoint.  That would allow
-    // us to see what the other thread is doing while we're waiting.
-    lock_owner_request_.store(orig_owner_thread_id, std::memory_order_relaxed);
-  }
-  self->SetMonitorEnterObject(GetObject().Ptr());
-  {
-    ScopedThreadSuspension tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
 
-    // Acquire monitor_lock_ without mutator_lock_, expecting to block this time.
-    monitor_lock_.Lock(self);
-    owner_.store(self, std::memory_order_relaxed);
-
-    if (orig_owner_thread_id != 0u) {
-      // Woken from contention.
-      if (log_contention) {
-        uint64_t wait_ms = MilliTime() - wait_start_ms;
-        uint32_t sample_percent;
-        if (wait_ms >= lock_profiling_threshold_) {
-          sample_percent = 100;
-        } else {
-          sample_percent = 100 * wait_ms / lock_profiling_threshold_;
+    monitor_lock_.Unlock(self);  // Let go of locks in order.
+    // Call the contended locking cb once and only once. Also only call it if we are locking for
+    // the first time, not during a Wait wakeup.
+    if (reason == LockReason::kForLock && !called_monitors_callback) {
+      called_monitors_callback = true;
+      Runtime::Current()->GetRuntimeCallbacks()->MonitorContendedLocking(this);
+    }
+    self->SetMonitorEnterObject(GetObject().Ptr());
+    {
+      ScopedThreadSuspension tsc(self, kBlocked);  // Change to blocked and give up mutator_lock_.
+      uint32_t original_owner_thread_id = 0u;
+      {
+        // Reacquire monitor_lock_ without mutator_lock_ for Wait.
+        MutexLock mu2(self, monitor_lock_);
+        if (owner_ != nullptr) {  // Did the owner_ give the lock up?
+          original_owner_thread_id = owner_->GetThreadId();
+          monitor_contenders_.Wait(self);  // Still contended so wait.
         }
-        if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
-          // Do this unconditionally for consistency. It's possible another thread
-          // snuck in in the middle, and tracing was enabled. In that case, we may get its
-          // MonitorEnter information. We can live with that.
-          GetLockOwnerInfo(&owners_method, &owners_dex_pc, orig_owner_thread_id);
-
-          // Reacquire mutator_lock_ for logging.
-          ScopedObjectAccess soa(self);
-
-          bool owner_alive = false;
-          uint32_t original_owner_tid;  // System thread id, not original_owner_thread_id!
-          std::string original_owner_name;
-
-          const bool should_dump_stacks = stack_dump_lock_profiling_threshold_ > 0 &&
-              wait_ms > stack_dump_lock_profiling_threshold_;
-          std::string owner_stack_dump;
-
-          // Acquire thread-list lock to find thread and keep it from dying until we've got all
-          // the info we need.
-          {
-            Locks::thread_list_lock_->ExclusiveLock(self);
-
-            // Re-find the owner in case the thread got killed.
-            Thread* original_owner = Runtime::Current()->GetThreadList()->FindThreadByThreadId(
-                orig_owner_thread_id);
-
-            if (original_owner != nullptr) {
-              owner_alive = true;
-              original_owner_tid = original_owner->GetTid();
-              original_owner->GetThreadName(original_owner_name);
-
-              if (should_dump_stacks) {
-                // Very long contention. Dump stacks.
-                struct CollectStackTrace : public Closure {
-                  void Run(art::Thread* thread) override
-                      REQUIRES_SHARED(art::Locks::mutator_lock_) {
-                    thread->DumpJavaStack(oss);
-                  }
-
-                  std::ostringstream oss;
-                };
-                CollectStackTrace owner_trace;
-                // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its
-                // execution.
-                original_owner->RequestSynchronousCheckpoint(&owner_trace);
-                owner_stack_dump = owner_trace.oss.str();
-              } else {
-                Locks::thread_list_lock_->ExclusiveUnlock(self);
-              }
-            } else {
-              Locks::thread_list_lock_->ExclusiveUnlock(self);
-            }
-            // This is all the data we need. Now drop the thread-list lock, it's OK for the
-            // owner to go away now.
+      }
+      if (original_owner_thread_id != 0u) {
+        // Woken from contention.
+        if (log_contention) {
+          uint64_t wait_ms = MilliTime() - wait_start_ms;
+          uint32_t sample_percent;
+          if (wait_ms >= lock_profiling_threshold_) {
+            sample_percent = 100;
+          } else {
+            sample_percent = 100 * wait_ms / lock_profiling_threshold_;
           }
+          if (sample_percent != 0 && (static_cast<uint32_t>(rand() % 100) < sample_percent)) {
+            // Reacquire mutator_lock_ for logging.
+            ScopedObjectAccess soa(self);
 
-          // If we found the owner (and thus have owner data), go and log now.
-          if (owner_alive) {
-            // Give the detailed traces for really long contention.
-            if (should_dump_stacks) {
-              // This must be here (and not above) because we cannot hold the thread-list lock
-              // while running the checkpoint.
-              std::ostringstream self_trace_oss;
-              self->DumpJavaStack(self_trace_oss);
+            bool owner_alive = false;
+            pid_t original_owner_tid = 0;
+            std::string original_owner_name;
 
-              uint32_t pc;
-              ArtMethod* m = self->GetCurrentMethod(&pc);
+            const bool should_dump_stacks = stack_dump_lock_profiling_threshold_ > 0 &&
+                wait_ms > stack_dump_lock_profiling_threshold_;
+            std::string owner_stack_dump;
 
-              LOG(WARNING) << "Long "
-                  << PrettyContentionInfo(original_owner_name,
-                                          original_owner_tid,
-                                          owners_method,
-                                          owners_dex_pc,
-                                          num_waiters)
-                  << " in " << ArtMethod::PrettyMethod(m) << " for "
-                  << PrettyDuration(MsToNs(wait_ms)) << "\n"
-                  << "Current owner stack:\n" << owner_stack_dump
-                  << "Contender stack:\n" << self_trace_oss.str();
-            } else if (wait_ms > kLongWaitMs && owners_method != nullptr) {
-              uint32_t pc;
-              ArtMethod* m = self->GetCurrentMethod(&pc);
-              // TODO: We should maybe check that original_owner is still a live thread.
-              LOG(WARNING) << "Long "
-                  << PrettyContentionInfo(original_owner_name,
-                                          original_owner_tid,
-                                          owners_method,
-                                          owners_dex_pc,
-                                          num_waiters)
-                  << " in " << ArtMethod::PrettyMethod(m) << " for "
-                  << PrettyDuration(MsToNs(wait_ms));
+            // Acquire thread-list lock to find thread and keep it from dying until we've got all
+            // the info we need.
+            {
+              Locks::thread_list_lock_->ExclusiveLock(Thread::Current());
+
+              // Re-find the owner in case the thread got killed.
+              Thread* original_owner = Runtime::Current()->GetThreadList()->FindThreadByThreadId(
+                  original_owner_thread_id);
+
+              if (original_owner != nullptr) {
+                owner_alive = true;
+                original_owner_tid = original_owner->GetTid();
+                original_owner->GetThreadName(original_owner_name);
+
+                if (should_dump_stacks) {
+                  // Very long contention. Dump stacks.
+                  struct CollectStackTrace : public Closure {
+                    void Run(art::Thread* thread) override
+                        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+                      thread->DumpJavaStack(oss);
+                    }
+
+                    std::ostringstream oss;
+                  };
+                  CollectStackTrace owner_trace;
+                  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its
+                  // execution.
+                  original_owner->RequestSynchronousCheckpoint(&owner_trace);
+                  owner_stack_dump = owner_trace.oss.str();
+                } else {
+                  Locks::thread_list_lock_->ExclusiveUnlock(Thread::Current());
+                }
+              } else {
+                Locks::thread_list_lock_->ExclusiveUnlock(Thread::Current());
+              }
+              // This is all the data we need. Now drop the thread-list lock, it's OK for the
+              // owner to go away now.
             }
-            LogContentionEvent(self,
-                              wait_ms,
-                              sample_percent,
-                              owners_method,
-                              owners_dex_pc);
+
+            // If we found the owner (and thus have owner data), go and log now.
+            if (owner_alive) {
+              // Give the detailed traces for really long contention.
+              if (should_dump_stacks) {
+                // This must be here (and not above) because we cannot hold the thread-list lock
+                // while running the checkpoint.
+                std::ostringstream self_trace_oss;
+                self->DumpJavaStack(self_trace_oss);
+
+                uint32_t pc;
+                ArtMethod* m = self->GetCurrentMethod(&pc);
+
+                LOG(WARNING) << "Long "
+                    << PrettyContentionInfo(original_owner_name,
+                                            original_owner_tid,
+                                            owners_method,
+                                            owners_dex_pc,
+                                            num_waiters)
+                    << " in " << ArtMethod::PrettyMethod(m) << " for "
+                    << PrettyDuration(MsToNs(wait_ms)) << "\n"
+                    << "Current owner stack:\n" << owner_stack_dump
+                    << "Contender stack:\n" << self_trace_oss.str();
+              } else if (wait_ms > kLongWaitMs && owners_method != nullptr) {
+                uint32_t pc;
+                ArtMethod* m = self->GetCurrentMethod(&pc);
+                // TODO: We should maybe check that original_owner is still a live thread.
+                LOG(WARNING) << "Long "
+                    << PrettyContentionInfo(original_owner_name,
+                                            original_owner_tid,
+                                            owners_method,
+                                            owners_dex_pc,
+                                            num_waiters)
+                    << " in " << ArtMethod::PrettyMethod(m) << " for "
+                    << PrettyDuration(MsToNs(wait_ms));
+              }
+              LogContentionEvent(self,
+                                wait_ms,
+                                sample_percent,
+                                owners_method,
+                                owners_dex_pc);
+            }
           }
         }
       }
     }
+    if (started_trace) {
+      ATraceEnd();
+    }
+    self->SetMonitorEnterObject(nullptr);
+    monitor_lock_.Lock(self);  // Reacquire locks in order.
+    --num_waiters_;
   }
-  // We've successfully acquired monitor_lock_ and set owner_.
-  if (ATraceEnabled()) {
-    SetLockingMethodNoProxy(self);
-  }
-  if (started_trace) {
-    ATraceEnd();
-  }
-  self->SetMonitorEnterObject(nullptr);
-  num_waiters_.fetch_sub(1, std::memory_order_relaxed);
-  DCHECK(monitor_lock_.IsExclusiveHeld(self));
+  monitor_lock_.Unlock(self);
   // We need to pair this with a single contended locking call. NB we match the RI behavior and call
   // this even if MonitorEnter failed.
   if (called_monitors_callback) {
@@ -666,6 +634,7 @@
                            uint32_t expected_owner_thread_id,
                            uint32_t found_owner_thread_id,
                            Monitor* monitor) {
+  // Acquire thread list lock so threads won't disappear from under us.
   std::string current_owner_string;
   std::string expected_owner_string;
   std::string found_owner_string;
@@ -731,37 +700,39 @@
 
 bool Monitor::Unlock(Thread* self) {
   DCHECK(self != nullptr);
-  Thread* owner = owner_.load(std::memory_order_relaxed);
-  if (owner == self) {
-    // We own the monitor, so nobody else can be in here.
-    CheckLockOwnerRequest(self);
-    AtraceMonitorUnlock();
-    if (monitor_lock_.GetDepth() == 1) {
-      owner_.store(nullptr, std::memory_order_relaxed);
-      SignalWaiterAndReleaseMonitorLock(self);
-    } else {
-      monitor_lock_.Unlock(self);
-      DCHECK(monitor_lock_.IsExclusiveHeld(self));
-      DCHECK_EQ(owner_.load(std::memory_order_relaxed), self);
-    }
-    return true;
-  }
-  // We don't own this, so we're not allowed to unlock it.
-  // The JNI spec says that we should throw IllegalMonitorStateException in this case.
   uint32_t owner_thread_id = 0u;
+  DCHECK(!monitor_lock_.IsExclusiveHeld(self));
+  monitor_lock_.Lock(self);
+  Thread* owner = owner_;
   if (owner != nullptr) {
     owner_thread_id = owner->GetThreadId();
   }
-  // Pretends to release monitor_lock_, which we should not.
+  if (owner == self) {
+    // We own the monitor, so nobody else can be in here.
+    AtraceMonitorUnlock();
+    if (lock_count_ == 0) {
+      owner_ = nullptr;
+      locking_method_ = nullptr;
+      locking_dex_pc_ = 0;
+      SignalContendersAndReleaseMonitorLock(self);
+      return true;
+    } else {
+      --lock_count_;
+      monitor_lock_.Unlock(self);
+      return true;
+    }
+  }
+  // We don't own this, so we're not allowed to unlock it.
+  // The JNI spec says that we should throw IllegalMonitorStateException in this case.
   FailedUnlock(GetObject(), self->GetThreadId(), owner_thread_id, this);
-  FakeUnlockMonitorLock();
+  monitor_lock_.Unlock(self);
   return false;
 }
 
-void Monitor::SignalWaiterAndReleaseMonitorLock(Thread* self) {
-  // We want to release the monitor and signal up to one thread that was waiting
-  // but has since been notified.
-  DCHECK_EQ(monitor_lock_.GetDepth(), 1u);
+void Monitor::SignalContendersAndReleaseMonitorLock(Thread* self) {
+  // We want to signal one thread to wake up, to acquire the monitor that
+  // we are releasing. This could either be a Thread waiting on its own
+  // ConditionVariable, or a thread waiting on monitor_contenders_.
   while (wake_set_ != nullptr) {
     // No risk of waking ourselves here; since monitor_lock_ is not released until we're ready to
     // return, notify can't move the current thread from wait_set_ to wake_set_ until this
@@ -769,7 +740,6 @@
     Thread* thread = wake_set_;
     wake_set_ = thread->GetWaitNext();
     thread->SetWaitNext(nullptr);
-    owner_.store(nullptr, std::memory_order_relaxed);
 
     // Check to see if the thread is still waiting.
     {
@@ -794,14 +764,16 @@
         // Release the lock, so that a potentially awakened thread will not
         // immediately contend on it. The lock ordering here is:
         // monitor_lock_, self->GetWaitMutex, thread->GetWaitMutex
-        monitor_lock_.Unlock(self);  // Releases contenders.
+        monitor_lock_.Unlock(self);
         thread->GetWaitConditionVariable()->Signal(self);
         return;
       }
     }
   }
+  // If we didn't wake any threads that were originally waiting on us,
+  // wake a contender.
+  monitor_contenders_.Signal(self);
   monitor_lock_.Unlock(self);
-  DCHECK(!monitor_lock_.IsExclusiveHeld(self));
 }
 
 void Monitor::Wait(Thread* self, int64_t ms, int32_t ns,
@@ -809,8 +781,11 @@
   DCHECK(self != nullptr);
   DCHECK(why == kTimedWaiting || why == kWaiting || why == kSleeping);
 
+  monitor_lock_.Lock(self);
+
   // Make sure that we hold the lock.
-  if (owner_.load(std::memory_order_relaxed) != self) {
+  if (owner_ != self) {
+    monitor_lock_.Unlock(self);
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
     return;
   }
@@ -823,18 +798,23 @@
 
   // Enforce the timeout range.
   if (ms < 0 || ns < 0 || ns > 999999) {
+    monitor_lock_.Unlock(self);
     self->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
                              "timeout arguments out of range: ms=%" PRId64 " ns=%d", ms, ns);
     return;
   }
 
-  CheckLockOwnerRequest(self);
-
   /*
    * Release our hold - we need to let it go even if we're a few levels
    * deep in a recursive lock, and we need to restore that later.
    */
-  unsigned int prev_lock_count = monitor_lock_.GetDepth();
+  int prev_lock_count = lock_count_;
+  lock_count_ = 0;
+  owner_ = nullptr;
+  ArtMethod* saved_method = locking_method_;
+  locking_method_ = nullptr;
+  uintptr_t saved_dex_pc = locking_dex_pc_;
+  locking_dex_pc_ = 0;
 
   AtraceMonitorUnlock();  // For the implict Unlock() just above. This will only end the deepest
                           // nesting, but that is enough for the visualization, and corresponds to
@@ -860,7 +840,7 @@
      * until we've signalled contenders on this monitor.
      */
     AppendToWaitSet(self);
-    num_waiters_.fetch_add(1, std::memory_order_relaxed);
+    ++num_waiters_;
 
 
     // Set wait_monitor_ to the monitor object we will be waiting on. When wait_monitor_ is
@@ -870,10 +850,7 @@
     self->SetWaitMonitor(this);
 
     // Release the monitor lock.
-    owner_.store(nullptr, std::memory_order_relaxed);
-    UnlockMonitorLock(self, prev_lock_count - 1);
-    DCHECK(monitor_lock_.IsExclusiveHeld(self));
-    SignalWaiterAndReleaseMonitorLock(self);
+    SignalContendersAndReleaseMonitorLock(self);
 
     // Handle the case where the thread was interrupted before we called wait().
     if (self->IsInterrupted()) {
@@ -922,19 +899,30 @@
 
   // Re-acquire the monitor and lock.
   Lock<LockReason::kForWait>(self);
-  LockMonitorLock(self, prev_lock_count - 1);
-  DCHECK(monitor_lock_.IsExclusiveHeld(self));
-  DCHECK_EQ(monitor_lock_.GetDepth(), prev_lock_count);
+  monitor_lock_.Lock(self);
   self->GetWaitMutex()->AssertNotHeld(self);
 
-  num_waiters_.fetch_sub(1, std::memory_order_relaxed);
+  /*
+   * We remove our thread from wait set after restoring the count
+   * and owner fields so the subroutine can check that the calling
+   * thread owns the monitor. Aside from that, the order of member
+   * updates is not order sensitive as we hold the pthread mutex.
+   */
+  owner_ = self;
+  lock_count_ = prev_lock_count;
+  locking_method_ = saved_method;
+  locking_dex_pc_ = saved_dex_pc;
+  --num_waiters_;
   RemoveFromWaitSet(self);
+
+  monitor_lock_.Unlock(self);
 }
 
 void Monitor::Notify(Thread* self) {
   DCHECK(self != nullptr);
+  MutexLock mu(self, monitor_lock_);
   // Make sure that we hold the lock.
-  if (owner_.load(std::memory_order_relaxed) != self) {
+  if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
     return;
   }
@@ -949,8 +937,9 @@
 
 void Monitor::NotifyAll(Thread* self) {
   DCHECK(self != nullptr);
+  MutexLock mu(self, monitor_lock_);
   // Make sure that we hold the lock.
-  if (owner_.load(std::memory_order_relaxed) != self) {
+  if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
     return;
   }
@@ -979,20 +968,29 @@
   if (lw.GetState() == LockWord::kFatLocked) {
     Monitor* monitor = lw.FatLockMonitor();
     DCHECK(monitor != nullptr);
-    // Can't deflate if we have anybody waiting on the CV or trying to acquire the monitor.
-    if (monitor->num_waiters_.load(std::memory_order_relaxed) > 0) {
+    MutexLock mu(self, monitor->monitor_lock_);
+    // Can't deflate if we have anybody waiting on the CV.
+    if (monitor->num_waiters_ > 0) {
       return false;
     }
-    if (!monitor->monitor_lock_.ExclusiveTryLock(self)) {
-      // We cannot deflate a monitor that's currently held. It's unclear whether we should if
-      // we could.
-      return false;
-    }
-    Thread* owner = monitor->owner_.load(std::memory_order_relaxed);
+    Thread* owner = monitor->owner_;
     if (owner != nullptr) {
-      DCHECK(owner == self);
-      monitor->monitor_lock_.ExclusiveUnlock(self);
-      return false;
+      // Can't deflate if we are locked and have a hash code.
+      if (monitor->HasHashCode()) {
+        return false;
+      }
+      // Can't deflate if our lock count is too high.
+      if (static_cast<uint32_t>(monitor->lock_count_) > LockWord::kThinLockMaxCount) {
+        return false;
+      }
+      // Deflate to a thin lock.
+      LockWord new_lw = LockWord::FromThinLockId(owner->GetThreadId(),
+                                                 monitor->lock_count_,
+                                                 lw.GCState());
+      // Assume no concurrent read barrier state changes as mutators are suspended.
+      obj->SetLockWord(new_lw, false);
+      VLOG(monitor) << "Deflated " << obj << " to thin lock " << owner->GetTid() << " / "
+          << monitor->lock_count_;
     } else if (monitor->HasHashCode()) {
       LockWord new_lw = LockWord::FromHashCode(monitor->GetHashCode(), lw.GCState());
       // Assume no concurrent read barrier state changes as mutators are suspended.
@@ -1005,10 +1003,6 @@
       obj->SetLockWord(new_lw, false);
       VLOG(monitor) << "Deflated" << obj << " to empty lock word";
     }
-    DCHECK_EQ(monitor->monitor_lock_.GetDepth(), 1u);
-    DCHECK_EQ(monitor->owner_.load(std::memory_order_relaxed), static_cast<Thread*>(nullptr));
-    monitor->monitor_lock_.ExclusiveUnlock(self);
-    DCHECK(!(monitor->monitor_lock_.IsExclusiveHeld(self)));
     // The monitor is deflated, mark the object as null so that we know to delete it during the
     // next GC.
     monitor->obj_ = GcRoot<mirror::Object>(nullptr);
@@ -1094,10 +1088,6 @@
   size_t contention_count = 0;
   StackHandleScope<1> hs(self);
   Handle<mirror::Object> h_obj(hs.NewHandle(obj));
-#if !ART_USE_FUTEXES
-  // In this case we cannot inflate an unowned monitor, so we sometimes defer inflation.
-  bool should_inflate = false;
-#endif
   while (true) {
     // We initially read the lockword with ordinary Java/relaxed semantics. When stronger
     // semantics are needed, we address it below. Since GetLockWord bottoms out to a relaxed load,
@@ -1108,11 +1098,6 @@
         // No ordering required for preceding lockword read, since we retest.
         LockWord thin_locked(LockWord::FromThinLockId(thread_id, 0, lock_word.GCState()));
         if (h_obj->CasLockWord(lock_word, thin_locked, CASMode::kWeak, std::memory_order_acquire)) {
-#if !ART_USE_FUTEXES
-          if (should_inflate) {
-            InflateThinLocked(self, h_obj, lock_word, 0);
-          }
-#endif
           AtraceMonitorLock(self, h_obj.Get(), /* is_wait= */ false);
           return h_obj.Get();  // Success!
         }
@@ -1167,16 +1152,9 @@
             // of nanoseconds or less.
             sched_yield();
           } else {
-#if ART_USE_FUTEXES
             contention_count = 0;
             // No ordering required for initial lockword read. Install rereads it anyway.
             InflateThinLocked(self, h_obj, lock_word, 0);
-#else
-            // Can't inflate from non-owning thread. Keep waiting. Bad for power, but this code
-            // isn't used on-device.
-            should_inflate = true;
-            usleep(10);
-#endif
           }
         }
         continue;  // Start from the beginning.
@@ -1190,7 +1168,6 @@
           return mon->TryLock(self) ? h_obj.Get() : nullptr;
         } else {
           mon->Lock(self);
-          DCHECK(mon->monitor_lock_.IsExclusiveHeld(self));
           return h_obj.Get();  // Success!
         }
       }
@@ -1554,7 +1531,8 @@
 }
 
 bool Monitor::IsLocked() REQUIRES_SHARED(Locks::mutator_lock_) {
-  return GetOwner() != nullptr;
+  MutexLock mu(Thread::Current(), monitor_lock_);
+  return owner_ != nullptr;
 }
 
 void Monitor::TranslateLocation(ArtMethod* method,
@@ -1575,7 +1553,8 @@
 }
 
 uint32_t Monitor::GetOwnerThreadId() {
-  Thread* owner = GetOwner();
+  MutexLock mu(Thread::Current(), monitor_lock_);
+  Thread* owner = owner_;
   if (owner != nullptr) {
     return owner->GetThreadId();
   } else {
@@ -1703,14 +1682,14 @@
       break;
     case LockWord::kFatLocked: {
       Monitor* mon = lock_word.FatLockMonitor();
-      owner_ = mon->owner_.load(std::memory_order_relaxed);
+      owner_ = mon->owner_;
       // Here it is okay for the owner to be null since we don't reset the LockWord back to
       // kUnlocked until we get a GC. In cases where this hasn't happened yet we will have a fat
       // lock without an owner.
       if (owner_ != nullptr) {
-        entry_count_ = mon->monitor_lock_.GetDepth();
+        entry_count_ = 1 + mon->lock_count_;
       } else {
-        DCHECK_EQ(mon->monitor_lock_.GetDepth(), 0u) << "Monitor is fat-locked without any owner!";
+        DCHECK_EQ(mon->lock_count_, 0) << "Monitor is fat-locked without any owner!";
       }
       for (Thread* waiter = mon->wait_set_; waiter != nullptr; waiter = waiter->GetWaitNext()) {
         waiters_.push_back(waiter);
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 2d7fdb1..4187f27 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -21,7 +21,6 @@
 #include <stdint.h>
 #include <stdlib.h>
 
-#include <atomic>
 #include <iosfwd>
 #include <list>
 #include <vector>
@@ -130,14 +129,12 @@
 
   void SetObject(ObjPtr<mirror::Object> object);
 
-  // Provides no memory ordering guarantees.
-  Thread* GetOwner() const {
-    return owner_.load(std::memory_order_relaxed);
+  Thread* GetOwner() const NO_THREAD_SAFETY_ANALYSIS {
+    return owner_;
   }
 
   int32_t GetHashCode();
 
-  // Is the monitor currently locked? Debug only, provides no memory ordering guarantees.
   bool IsLocked() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!monitor_lock_);
 
   bool HasHashCode() const {
@@ -179,7 +176,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Install the monitor into its object, may fail if another thread installs a different monitor
-  // first. Monitor remains in the same logical state as before, i.e. held the same # of times.
+  // first.
   bool Install(Thread* self)
       REQUIRES(!monitor_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -192,9 +189,7 @@
   // this routine.
   void RemoveFromWaitSet(Thread* thread) REQUIRES(monitor_lock_);
 
-  // Release the monitor lock and signal a waiting thread that has been notified
-  // and now needs the lock. Assumes the monitor lock is held exactly once.
-  void SignalWaiterAndReleaseMonitorLock(Thread* self) RELEASE(monitor_lock_);
+  void SignalContendersAndReleaseMonitorLock(Thread* self) RELEASE(monitor_lock_);
 
   // Changes the shape of a monitor from thin to fat, preserving the internal lock state. The
   // calling thread must own the lock or the owner must be suspended. There's a race with other
@@ -215,33 +210,37 @@
                            uint32_t expected_owner_thread_id,
                            uint32_t found_owner_thread_id,
                            Monitor* mon)
-      REQUIRES(!Locks::thread_list_lock_)
+      REQUIRES(!Locks::thread_list_lock_,
+               !monitor_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Try to lock without blocking, returns true if we acquired the lock.
-  // If spin is true, then we spin for a short period before failing.
-  bool TryLock(Thread* self, bool spin = false)
-      TRY_ACQUIRE(true, monitor_lock_)
+  bool TryLock(Thread* self)
+      REQUIRES(!monitor_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  // Variant for already holding the monitor lock.
+  bool TryLockLocked(Thread* self)
+      REQUIRES(monitor_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<LockReason reason = LockReason::kForLock>
   void Lock(Thread* self)
-      ACQUIRE(monitor_lock_)
+      REQUIRES(!monitor_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool Unlock(Thread* thread)
-      RELEASE(monitor_lock_)
+      REQUIRES(!monitor_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   static void DoNotify(Thread* self, ObjPtr<mirror::Object> obj, bool notify_all)
       REQUIRES_SHARED(Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS;  // For mon->Notify.
 
   void Notify(Thread* self)
-      REQUIRES(monitor_lock_)
+      REQUIRES(!monitor_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void NotifyAll(Thread* self)
-      REQUIRES(monitor_lock_)
+      REQUIRES(!monitor_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   static std::string PrettyContentionInfo(const std::string& owner_name,
@@ -271,7 +270,7 @@
   // Since we're allowed to wake up "early", we clamp extremely long durations to return at the end
   // of the 32-bit time epoch.
   void Wait(Thread* self, int64_t msec, int32_t nsec, bool interruptShouldThrow, ThreadState why)
-      REQUIRES(monitor_lock_)
+      REQUIRES(!monitor_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Translates the provided method and pc into its declaring class' source file and line number.
@@ -280,18 +279,8 @@
                                 int32_t* line_number)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Provides no memory ordering guarantees.
   uint32_t GetOwnerThreadId() REQUIRES(!monitor_lock_);
 
-  // Set locking_method_ and locking_dex_pc_ corresponding to owner's current stack.
-  // owner is either self or suspended.
-  void SetLockingMethod(Thread* owner) REQUIRES(monitor_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // The same, but without checking for a proxy method. Currently requires owner == self.
-  void SetLockingMethodNoProxy(Thread* owner) REQUIRES(monitor_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Support for systrace output of monitor operations.
   ALWAYS_INLINE static void AtraceMonitorLock(Thread* self,
                                               ObjPtr<mirror::Object> obj,
@@ -305,32 +294,19 @@
 
   static uint32_t lock_profiling_threshold_;
   static uint32_t stack_dump_lock_profiling_threshold_;
-  static bool capture_method_eagerly_;
 
-  // Holding the monitor N times is represented by holding monitor_lock_ N times.
   Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
-  // Lock monitor lock n more times.
-  void LockMonitorLock(Thread* thread, int n)
-      REQUIRES(monitor_lock_);
+  ConditionVariable monitor_contenders_ GUARDED_BY(monitor_lock_);
 
-  // Unlock monitor n times, but not completely.
-  void UnlockMonitorLock(Thread* thread, int n)
-      REQUIRES(monitor_lock_);
+  // Number of people waiting on the condition.
+  size_t num_waiters_ GUARDED_BY(monitor_lock_);
 
-  // Pretend to unlock monitor lock.
-  void FakeUnlockMonitorLock() RELEASE(monitor_lock_) NO_THREAD_SAFETY_ANALYSIS {}
+  // Which thread currently owns the lock?
+  Thread* volatile owner_ GUARDED_BY(monitor_lock_);
 
-  // Number of threads either waiting on the condition or waiting on a contended
-  // monitor acquisition. Prevents deflation.
-  std::atomic<size_t> num_waiters_;
-
-  // Which thread currently owns the lock? monitor_lock_ only keeps the tid.
-  // Only set while holding monitor_lock_. Non-locking readers only use it to
-  // compare to self or for debugging.
-  std::atomic<Thread*> owner_;
-
-  // Owner's recursive lock depth is given by monitor_lock_.GetDepth().
+  // Owner's recursive lock depth.
+  int lock_count_ GUARDED_BY(monitor_lock_);
 
   // What object are we part of. This is a weak root. Do not access
   // this directly, use GetObject() to read it so it will be guarded
@@ -346,73 +322,11 @@
   // Stored object hash code, generated lazily by GetHashCode.
   AtomicInteger hash_code_;
 
-  // Data structure used to remember the method and dex pc of the thread currently holding the
-  // lock. Used for tracing and contention reporting. Setting these is expensive, since it
-  // involves a partial stack walk. We set them only as follows, to minimize the cost:
-  // - If tracing is enabled, they are needed immediately when we first notice contention, so we
-  //   set them unconditionally when a monitor is acquired.
-  // - If contention reporting is enabled, we use the lock_owner_request_ field to have the
-  //   contending thread request them. The current owner then sets them when releasing the monitor,
-  //   making them available when the contending thread acquires the monitor.
-  // - If both are enabled, we blindly do both. This usually prevents us from switching between
-  //   reporting the end and beginning of critical sections for contention logging when tracing is
-  //   enabled.  We expect that tracing overhead is normally much higher than for contention
-  //   logging, so the added cost should be small. It also minimizes glitches when enabling and
-  //   disabling traces.
-  // We're tolerant of missing information. E.g. when tracing is initially turned on, we may
-  // not have the lock holder information if the holder acquired the lock with tracing off.
-  //
-  // We make this data unconditionally atomic; for contention logging all accesses are in fact
-  // protected by the monitor, but for tracing, reads are not. Writes are always
-  // protected by the monitor.
-  //
-  // The fields are always accessed without memory ordering. We store a checksum, and reread if
-  // the checksum doesn't correspond to the values.  This results in values that are correct with
-  // very high probability, but not certainty.
-  //
-  // If we need lock_owner information for a certain thread for contenion logging, we store its
-  // tid in lock_owner_request_. To satisfy the request, we store lock_owner_tid_,
-  // lock_owner_method_, and lock_owner_dex_pc_ and the corresponding checksum while holding the
-  // monitor.
-  //
-  // At all times, either lock_owner_tid_ is zero, the checksum is valid, or a thread is actively
-  // in the process of establishing one of those states. Only one thread at a time can be actively
-  // establishing such a state, since writes are protected by the monitor.
-  std::atomic<uint32_t> lock_owner_thread_id_;  // Thin lock id, not system tid.
-  std::atomic<ArtMethod*> lock_owner_method_;
-  std::atomic<uint32_t> lock_owner_dex_pc_;
-  std::atomic<uintptr_t> lock_owner_sum_;
-
-  // Request lock owner save method and dex_pc. Written asynchronously.
-  std::atomic<uint32_t> lock_owner_request_;
-
-  // Compute method, dex pc, and tid "checksum".
-  uintptr_t LockOwnerInfoChecksum(ArtMethod* m, uint32_t dpc, uint32_t thread_id);
-
-  // Set owning method, dex pc, and tid. owner_ field is set and points to current thread.
-  void SetLockOwnerInfo(ArtMethod* method, uint32_t dex_pc, uint32_t thread_id)
-      REQUIRES(monitor_lock_);
-
-  // Get owning method and dex pc for the given tid, if available.
-  void GetLockOwnerInfo(/*out*/ArtMethod** method, /*out*/uint32_t* dex_pc, uint32_t thread_id);
-
-  // Do the same, while holding the monitor. There are no concurrent updates.
-  void GetLockOwnerInfoLocked(/*out*/ArtMethod** method, /*out*/uint32_t* dex_pc,
-                              uint32_t thread_id)
-      REQUIRES(monitor_lock_);
-
-  // We never clear lock_owner method and dex pc. With capture_method_eagerly_, the information
-  // should always be fresh, though in racey cases, it can be inconsistent with owner_,
-  // so it's not 100% reliable. For lock contention monitoring, in the absence of tracing,
-  // there is a small risk that the current owner may finish before noticing the request,
-  // or the information will be overwritten by another intervening request and monitor
-  // release, so it's also not 100% reliable. But if we report information at all, it
-  // should always (modulo accidental checksum matches) pertain to to an acquisition of the
-  // right monitor by the right thread, so it's extremely unlikely to be seriously misleading.
-
-  // Check for and act on a pending lock_owner_request_
-  void CheckLockOwnerRequest(Thread* self)
-      REQUIRES(monitor_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
+  // Method and dex pc where the lock owner acquired the lock, used when lock
+  // sampling is enabled. locking_method_ may be null if the lock is currently
+  // unlocked, or if the lock is acquired by the system when the stack is empty.
+  ArtMethod* locking_method_ GUARDED_BY(monitor_lock_);
+  uint32_t locking_dex_pc_ GUARDED_BY(monitor_lock_);
 
   // The denser encoded version of this monitor as stored in the lock word.
   MonitorId monitor_id_;
diff --git a/test/2029-contended-monitors/expected.txt b/test/2029-contended-monitors/expected.txt
deleted file mode 100644
index bc31e70..0000000
--- a/test/2029-contended-monitors/expected.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-Starting
-Atomic increments
-Hold time 2, shared lock
-Hold time 20, shared lock
-Hold time 200, shared lock
-Hold time 2000, shared lock
-Hold time 20000, shared lock
-Hold time 200000, shared lock
-Hold time 2000000, shared lock
-Hold for 2 msecs while sleeping, shared lock
-Hold for 2 msecs while sleeping, private lock
diff --git a/test/2029-contended-monitors/info.txt b/test/2029-contended-monitors/info.txt
deleted file mode 100644
index f6ccdd3..0000000
--- a/test/2029-contended-monitors/info.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-Checks that monitor-protected increments at various granularities are indeed
-atomic. Also checks j.u.c. increments. Can be configured to print execution
-times for contended and uncontentended monitor acquisition under different
-cicumstances.
diff --git a/test/2029-contended-monitors/src/Main.java b/test/2029-contended-monitors/src/Main.java
deleted file mode 100644
index 11db33e..0000000
--- a/test/2029-contended-monitors/src/Main.java
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (C) 2019 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-import java.util.concurrent.atomic.AtomicInteger;
-
-public class Main {
-
-  private final boolean PRINT_TIMES = false;  // False for use as run test.
-
-  private final int TOTAL_ITERS = 16_000_000; // Number of increments done by each thread.
-                                              // Must be multiple of largest hold time below,
-                                              // currently 2_000_000, times any possible thread
-                                              // count.
-
-  private int counter;
-
-  private AtomicInteger atomicCounter = new AtomicInteger();
-
-  private Object lock;
-
-  private int currentThreadCount = 0;
-
-  // A function such that if we repeatedly apply it to -1, the value oscillates
-  // between -1 and 3. Thus the average value is 1.
-  // This is designed to make it hard for the compiler to predict the values in
-  // the sequence.
-  private int nextInt(int x) {
-    if (x < 0) {
-      return x * x + 2;
-    } else {
-      return x - 4;
-    }
-  }
-
-  // Increment counter by n, holding log for time roughly propertional to n.
-  // N must be even.
-  private void holdFor(Object lock, int n) {
-    synchronized(lock) {
-      int y = -1;
-      for (int i = 0; i < n; ++i) {
-        counter += y;
-        y = nextInt(y);
-      }
-    }
-  }
-
-  private class RepeatedLockHolder implements Runnable {
-    RepeatedLockHolder(boolean shared, int n /* even */) {
-      sharedLock = shared;
-      holdTime = n;
-    }
-    @Override
-    public void run() {
-      Object myLock = sharedLock ? lock : new Object();
-      int nIters = TOTAL_ITERS / currentThreadCount / holdTime;
-      for (int i = 0; i < nIters; ++i) {
-        holdFor(myLock, holdTime);
-      }
-    }
-    private boolean sharedLock;
-    private int holdTime;
-  }
-
-  private class SleepyLockHolder implements Runnable {
-    SleepyLockHolder(boolean shared) {
-      sharedLock = shared;
-    }
-    @Override
-    public void run() {
-      Object myLock = sharedLock ? lock : new Object();
-      int nIters = TOTAL_ITERS / currentThreadCount / 10_000;
-      for (int i = 0; i < nIters; ++i) {
-        synchronized(myLock) {
-          try {
-            Thread.sleep(2);
-          } catch(InterruptedException e) {
-            throw new AssertionError("Unexpected interrupt");
-          }
-          counter += 10_000;
-        }
-      }
-    }
-    private boolean sharedLock;
-  }
-
-  // Increment atomicCounter n times, on average by 1 each time.
-  private class RepeatedIncrementer implements Runnable {
-    @Override
-    public void run() {
-      int y = -1;
-      int nIters = TOTAL_ITERS / currentThreadCount;
-      for (int i = 0; i < nIters; ++i) {
-        atomicCounter.addAndGet(y);
-        y = nextInt(y);
-      }
-    }
-  }
-
-  // Run n threads doing work. Return the elapsed time this took, in milliseconds.
-  private long runMultiple(int n, Runnable work) {
-    Thread[] threads = new Thread[n];
-    // Replace lock, so that we start with a clean, uninflated lock each time.
-    lock = new Object();
-    for (int i = 0; i < n; ++i) {
-      threads[i] = new Thread(work);
-    }
-    long startTime = System.currentTimeMillis();
-    for (int i = 0; i < n; ++i) {
-      threads[i].start();
-    }
-    for (int i = 0; i < n; ++i) {
-      try {
-        threads[i].join();
-      } catch(InterruptedException e) {
-        throw new AssertionError("Unexpected interrupt");
-      }
-    }
-    return System.currentTimeMillis() - startTime;
-  }
-
-  // Run on different numbers of threads.
-  private void runAll(Runnable work, Runnable init, Runnable checker) {
-    for (int i = 1; i <= 8; i *= 2) {
-      currentThreadCount = i;
-      init.run();
-      long time = runMultiple(i, work);
-      if (PRINT_TIMES) {
-        System.out.print(time + (i == 8 ? "\n" : "\t"));
-      }
-      checker.run();
-    }
-  }
-
-  private class CheckAtomicCounter implements Runnable {
-    @Override
-    public void run() {
-      if (atomicCounter.get() != TOTAL_ITERS) {
-        throw new AssertionError("Failed atomicCounter postcondition check for "
-            + currentThreadCount + " threads");
-      }
-    }
-  }
-
-  private class CheckCounter implements Runnable {
-    @Override
-    public void run() {
-      if (counter != TOTAL_ITERS) {
-        throw new AssertionError("Failed counter postcondition check for "
-            + currentThreadCount + " threads");
-      }
-    }
-  }
-
-  private void run() {
-    if (PRINT_TIMES) {
-      System.out.println("All times in milliseconds for 1, 2, 4 and 8 threads");
-    }
-    System.out.println("Atomic increments");
-    runAll(new RepeatedIncrementer(), () -> { atomicCounter.set(0); }, new CheckAtomicCounter());
-    for (int i = 2; i <= 2_000_000; i *= 10) {
-      // i * 8 (max thread count) divides TOTAL_ITERS
-      System.out.println("Hold time " + i + ", shared lock");
-      runAll(new RepeatedLockHolder(true, i), () -> { counter = 0; }, new CheckCounter());
-    }
-    if (PRINT_TIMES) {
-      for (int i = 2; i <= 2_000_000; i *= 1000) {
-        // i divides TOTAL_ITERS
-        System.out.println("Hold time " + i + ", private lock");
-        // Since there is no mutual exclusion final counter value is unpredictable.
-        runAll(new RepeatedLockHolder(false, i), () -> { counter = 0; }, () -> {});
-      }
-    }
-    System.out.println("Hold for 2 msecs while sleeping, shared lock");
-    runAll(new SleepyLockHolder(true), () -> { counter = 0; }, new CheckCounter());
-    System.out.println("Hold for 2 msecs while sleeping, private lock");
-    runAll(new SleepyLockHolder(false), () -> { counter = 0; }, () -> {});
-  }
-
-  public static void main(String[] args) {
-    System.out.println("Starting");
-    new Main().run();
-  }
-}