Pass self to lock methods.

This avoids frequent recomputation of
Thread::Current/pthread_getspecific.

Also add a futex based reader/writer mutex that is disabled.

Change-Id: I118fdb99ef1d1c4bfda6446ba3a0d8b6ab31eaee
diff --git a/src/compiler.h b/src/compiler.h
index a2cc317..c5f19f7 100644
--- a/src/compiler.h
+++ b/src/compiler.h
@@ -27,6 +27,7 @@
 #include "dex_file.h"
 #include "instruction_set.h"
 #include "invoke_type.h"
+#include "mutex.h"
 #include "oat_file.h"
 #include "object.h"
 #include "runtime.h"
diff --git a/src/debugger.cc b/src/debugger.cc
index aad75b1..8477054 100644
--- a/src/debugger.cc
+++ b/src/debugger.cc
@@ -2416,7 +2416,7 @@
 
       // Wait for the request to finish executing.
       while (req->invoke_needed_) {
-        req->cond_.Wait(req->lock_);
+        req->cond_.Wait(self, req->lock_);
       }
     }
     VLOG(jdwp) << "    Control has returned from event thread";
diff --git a/src/dex_file.h b/src/dex_file.h
index fad6fa9..cca3935 100644
--- a/src/dex_file.h
+++ b/src/dex_file.h
@@ -26,7 +26,6 @@
 #include "logging.h"
 #include "mem_map.h"
 #include "modifiers.h"
-#include "mutex.h"
 #include "safe_map.h"
 #include "stringpiece.h"
 #include "UniquePtr.h"
diff --git a/src/heap.cc b/src/heap.cc
index 0f9b65b..3ab6419 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -461,7 +461,7 @@
 }
 
 bool Heap::IsLiveObjectLocked(const Object* obj) {
-  Locks::heap_bitmap_lock_->AssertReaderHeld();
+  Locks::heap_bitmap_lock_->AssertReaderHeld(Thread::Current());
   return IsHeapAddress(obj) && GetLiveBitmap()->Test(obj);
 }
 
@@ -604,10 +604,7 @@
   // done in the runnable state where suspension is expected.
 #ifndef NDEBUG
   Thread* self = Thread::Current();
-  {
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
-    CHECK_EQ(self->GetState(), kRunnable);
-  }
+  DCHECK_EQ(self->GetState(), kRunnable);
   self->AssertThreadSuspensionIsAllowable();
 #endif
 
@@ -618,7 +615,10 @@
 
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
-  GcType last_gc = WaitForConcurrentGcToComplete();
+#ifdef NDEBUG
+  Thread* self = Thread::Current();
+#endif
+  GcType last_gc = WaitForConcurrentGcToComplete(self);
   if (last_gc != kGcTypeNone) {
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
     ptr = TryToAllocate(space, alloc_size, false);
@@ -628,9 +628,6 @@
   }
 
   // Loop through our different Gc types and try to Gc until we get enough free memory.
-#ifdef NDEBUG
-  Thread* self = Thread::Current();
-#endif
   for (size_t i = static_cast<size_t>(last_gc) + 1; i < static_cast<size_t>(kGcTypeMax); ++i) {
     bool run_gc = false;
     GcType gc_type = static_cast<GcType>(i);
@@ -772,14 +769,16 @@
 void Heap::CollectGarbage(bool clear_soft_references) {
   // Even if we waited for a GC we still need to do another GC since weaks allocated during the
   // last GC will not have necessarily been cleared.
-  WaitForConcurrentGcToComplete();
-  ScopedThreadStateChange tsc(Thread::Current(), kWaitingPerformingGc);
+  Thread* self = Thread::Current();
+  WaitForConcurrentGcToComplete(self);
+  ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   CollectGarbageInternal(have_zygote_space_ ? kGcTypePartial : kGcTypeFull, clear_soft_references);
 }
 
 void Heap::PreZygoteFork() {
   static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock);
-  MutexLock mu(zygote_creation_lock_);
+  Thread* self = Thread::Current();
+  MutexLock mu(self, zygote_creation_lock_);
 
   // Try to see if we have any Zygote spaces.
   if (have_zygote_space_) {
@@ -790,7 +789,7 @@
 
   {
     // Flush the alloc stack.
-    WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     FlushAllocStack();
   }
 
@@ -866,31 +865,27 @@
 }
 
 GcType Heap::CollectGarbageInternal(GcType gc_type, bool clear_soft_references) {
-  Locks::mutator_lock_->AssertNotHeld();
-#ifndef NDEBUG
-  {
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
-    CHECK_EQ(Thread::Current()->GetState(), kWaitingPerformingGc);
-  }
-#endif
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
+  DCHECK_EQ(self->GetState(), kWaitingPerformingGc);
 
   // Ensure there is only one GC at a time.
   bool start_collect = false;
   while (!start_collect) {
     {
-      MutexLock mu(*gc_complete_lock_);
+      MutexLock mu(self, *gc_complete_lock_);
       if (!is_gc_running_) {
         is_gc_running_ = true;
         start_collect = true;
       }
     }
     if (!start_collect) {
-      WaitForConcurrentGcToComplete();
+      WaitForConcurrentGcToComplete(self);
       // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
       //       Not doing at the moment to ensure soft references are cleared.
     }
   }
-  gc_complete_lock_->AssertNotHeld();
+  gc_complete_lock_->AssertNotHeld(self);
 
   // We need to do partial GCs every now and then to avoid the heap growing too much and
   // fragmenting.
@@ -902,14 +897,14 @@
   }
 
   if (concurrent_gc_) {
-    CollectGarbageConcurrentMarkSweepPlan(gc_type, clear_soft_references);
+    CollectGarbageConcurrentMarkSweepPlan(self, gc_type, clear_soft_references);
   } else {
-    CollectGarbageMarkSweepPlan(gc_type, clear_soft_references);
+    CollectGarbageMarkSweepPlan(self, gc_type, clear_soft_references);
   }
   bytes_since_last_gc_ = 0;
 
   {
-    MutexLock mu(*gc_complete_lock_);
+    MutexLock mu(self, *gc_complete_lock_);
     is_gc_running_ = false;
     last_gc_type_ = gc_type;
     // Wake anyone who may have been waiting for the GC to complete.
@@ -920,7 +915,7 @@
   return gc_type;
 }
 
-void Heap::CollectGarbageMarkSweepPlan(GcType gc_type, bool clear_soft_references) {
+void Heap::CollectGarbageMarkSweepPlan(Thread* self, GcType gc_type, bool clear_soft_references) {
   TimingLogger timings("CollectGarbageInternal", true);
 
   std::stringstream gc_type_str;
@@ -931,7 +926,7 @@
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   thread_list->SuspendAll();
   timings.AddSplit("SuspendAll");
-  Locks::mutator_lock_->AssertExclusiveHeld();
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
 
   size_t bytes_freed = 0;
   Object* cleared_references = NULL;
@@ -942,7 +937,7 @@
     timings.AddSplit("Init");
 
     if (verify_pre_gc_heap_) {
-      WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
       if (!VerifyHeapReferences()) {
         LOG(FATAL) << "Pre " << gc_type_str.str() << "Gc verification failed";
       }
@@ -1049,7 +1044,7 @@
 
     const bool swap = true;
     if (swap) {
-      SwapBitmaps();
+      SwapBitmaps(self);
     }
 
 #ifndef NDEBUG
@@ -1077,7 +1072,7 @@
   }
 
   if (verify_post_gc_heap_) {
-    WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     if (!VerifyHeapReferences()) {
       LOG(FATAL) << "Post " + gc_type_str.str() + "Gc verification failed";
     }
@@ -1286,7 +1281,7 @@
 
 // Must do this with mutators suspended since we are directly accessing the allocation stacks.
 bool Heap::VerifyHeapReferences() {
-  Locks::mutator_lock_->AssertExclusiveHeld();
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
   // Lets sort our allocation stacks so that we can efficiently binary search them.
   std::sort(allocation_stack_->Begin(), allocation_stack_->End());
   std::sort(live_stack_->Begin(), live_stack_->End());
@@ -1389,7 +1384,7 @@
 };
 
 bool Heap::VerifyMissingCardMarks() {
-  Locks::mutator_lock_->AssertExclusiveHeld();
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
 
   VerifyLiveStackReferences visitor(this);
   GetLiveBitmap()->Visit(visitor);
@@ -1406,12 +1401,12 @@
   return true;
 }
 
-void Heap::SwapBitmaps() {
+void Heap::SwapBitmaps(Thread* self) {
   // Swap the live and mark bitmaps for each alloc space. This is needed since sweep re-swaps
   // these bitmaps. Doing this enables us to sweep with the heap unlocked since new allocations
   // set the live bit, but since we have the bitmaps reversed at this point, this sets the mark bit
   // instead, resulting in no new allocated objects being incorrectly freed by sweep.
-  WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
   for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
     Space* space = *it;
     // We never allocate into zygote spaces.
@@ -1438,7 +1433,7 @@
   }
 }
 
-void Heap::CollectGarbageConcurrentMarkSweepPlan(GcType gc_type, bool clear_soft_references) {
+void Heap::CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_type, bool clear_soft_references) {
   TimingLogger timings("ConcurrentCollectGarbageInternal", true);
   uint64_t root_begin = NanoTime(), root_end = 0, dirty_begin = 0, dirty_end = 0;
   std::stringstream gc_type_str;
@@ -1448,7 +1443,7 @@
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   thread_list->SuspendAll();
   timings.AddSplit("SuspendAll");
-  Locks::mutator_lock_->AssertExclusiveHeld();
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
 
   size_t bytes_freed = 0;
   Object* cleared_references = NULL;
@@ -1460,7 +1455,7 @@
     timings.AddSplit("Init");
 
     if (verify_pre_gc_heap_) {
-      WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
       if (!VerifyHeapReferences()) {
         LOG(FATAL) << "Pre " << gc_type_str.str() << "Gc verification failed";
       }
@@ -1472,7 +1467,7 @@
 
     // Check that all objects which reference things in the live stack are on dirty cards.
     if (verify_missing_card_marks_) {
-      ReaderMutexLock mu(*Locks::heap_bitmap_lock_);
+      ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
       // Sort the live stack so that we can quickly binary search it later.
       std::sort(live_stack_->Begin(), live_stack_->End());
       if (!VerifyMissingCardMarks()) {
@@ -1509,7 +1504,7 @@
     }
 
     {
-      WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
 
       for (Object** it = live_stack_->Begin(); it != live_stack_->End(); ++it) {
         CHECK(!GetLiveBitmap()->Test(*it));
@@ -1560,11 +1555,11 @@
     // Allow mutators to go again, acquire share on mutator_lock_ to continue.
     thread_list->ResumeAll();
     {
-      ReaderMutexLock reader_lock(*Locks::mutator_lock_);
+      ReaderMutexLock reader_lock(self, *Locks::mutator_lock_);
       root_end = NanoTime();
       timings.AddSplit("RootEnd");
 
-      WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
       UpdateAndMarkModUnion(timings, gc_type);
 
       // Mark everything as live so that sweeping system weak works correctly for sticky mark bit
@@ -1585,10 +1580,10 @@
     dirty_begin = NanoTime();
     thread_list->SuspendAll();
     timings.AddSplit("ReSuspend");
-    Locks::mutator_lock_->AssertExclusiveHeld();
+    Locks::mutator_lock_->AssertExclusiveHeld(self);
 
     {
-      WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
 
       // Re-mark root set.
       mark_sweep.ReMarkRoots();
@@ -1607,7 +1602,7 @@
     }
 
     {
-      ReaderMutexLock mu(*Locks::heap_bitmap_lock_);
+      ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
 
       mark_sweep.ProcessReferences(clear_soft_references);
       timings.AddSplit("ProcessReferences");
@@ -1623,15 +1618,15 @@
     // bit instead, resulting in no new allocated objects being incorrectly freed by sweep.
     const bool swap = true;
     if (swap) {
-      SwapBitmaps();
+      SwapBitmaps(self);
     }
 
     // Only need to do this if we have the card mark verification on, and only during concurrent GC.
     if (verify_missing_card_marks_) {
-      WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
       mark_sweep.SweepArray(timings, allocation_stack_.get(), swap);
     } else {
-      WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
       // We only sweep over the live stack, and the live stack should not intersect with the
       // allocation stack, so it should be safe to UnMark anything in the allocation stack as live.
       UnMarkAllocStack(alloc_space_->GetLiveBitmap(), large_object_space_->GetLiveObjects(),
@@ -1641,13 +1636,13 @@
 
     if (kIsDebugBuild) {
       // Verify that we only reach marked objects from the image space.
-      ReaderMutexLock mu(*Locks::heap_bitmap_lock_);
+      ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
       mark_sweep.VerifyImageRoots();
       timings.AddSplit("VerifyImageRoots");
     }
 
     if (verify_post_gc_heap_) {
-      WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
       if (!VerifyHeapReferences()) {
         LOG(FATAL) << "Post " << gc_type_str.str() << "Gc verification failed";
       }
@@ -1656,11 +1651,11 @@
 
     thread_list->ResumeAll();
     dirty_end = NanoTime();
-    Locks::mutator_lock_->AssertNotHeld();
+    Locks::mutator_lock_->AssertNotHeld(self);
 
     {
       // TODO: this lock shouldn't be necessary (it's why we did the bitmap flip above).
-      WriterMutexLock mu(*Locks::heap_bitmap_lock_);
+      WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
       if (gc_type != kGcTypeSticky) {
         mark_sweep.SweepLargeObjects(swap);
         timings.AddSplit("SweepLargeObjects");
@@ -1674,7 +1669,7 @@
     }
 
     if (verify_system_weaks_) {
-      ReaderMutexLock mu(*Locks::heap_bitmap_lock_);
+      ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
       mark_sweep.VerifySystemWeaks();
       timings.AddSplit("VerifySystemWeaks");
     }
@@ -1714,23 +1709,23 @@
   logger->End(); // Next iteration.
 }
 
-GcType Heap::WaitForConcurrentGcToComplete() {
+GcType Heap::WaitForConcurrentGcToComplete(Thread* self) {
   GcType last_gc_type = kGcTypeNone;
   if (concurrent_gc_) {
     bool do_wait;
     uint64_t wait_start = NanoTime();
     {
       // Check if GC is running holding gc_complete_lock_.
-      MutexLock mu(*gc_complete_lock_);
+      MutexLock mu(self, *gc_complete_lock_);
       do_wait = is_gc_running_;
     }
     if (do_wait) {
       // We must wait, change thread state then sleep on gc_complete_cond_;
       ScopedThreadStateChange tsc(Thread::Current(), kWaitingForGcToComplete);
       {
-        MutexLock mu(*gc_complete_lock_);
+        MutexLock mu(self, *gc_complete_lock_);
         while (is_gc_running_) {
-          gc_complete_cond_->Wait(*gc_complete_lock_);
+          gc_complete_cond_->Wait(self, *gc_complete_lock_);
         }
         last_gc_type = last_gc_type_;
       }
@@ -1809,7 +1804,7 @@
 }
 
 void Heap::ClearGrowthLimit() {
-  WaitForConcurrentGcToComplete();
+  WaitForConcurrentGcToComplete(Thread::Current());
   alloc_space_->ClearGrowthLimit();
 }
 
@@ -1941,7 +1936,7 @@
   requesting_gc_ = false;
 }
 
-void Heap::ConcurrentGC() {
+void Heap::ConcurrentGC(Thread* self) {
   if (Runtime::Current()->IsShuttingDown() || !concurrent_gc_) {
     return;
   }
@@ -1949,9 +1944,9 @@
   // TODO: We shouldn't need a WaitForConcurrentGcToComplete here since only
   //       concurrent GC resumes threads before the GC is completed and this function
   //       is only called within the GC daemon thread.
-  if (WaitForConcurrentGcToComplete() == kGcTypeNone) {
+  if (WaitForConcurrentGcToComplete(self) == kGcTypeNone) {
     // Start a concurrent GC as one wasn't in progress
-    ScopedThreadStateChange tsc(Thread::Current(), kWaitingPerformingGc);
+    ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
     if (alloc_space_->Size() > min_alloc_space_size_for_sticky_gc_) {
       CollectGarbageInternal(kGcTypeSticky, false);
     } else {
@@ -1960,8 +1955,8 @@
   }
 }
 
-void Heap::Trim() {
-  WaitForConcurrentGcToComplete();
+void Heap::Trim(Thread* self) {
+  WaitForConcurrentGcToComplete(self);
   alloc_space_->Trim();
 }
 
diff --git a/src/heap.h b/src/heap.h
index b905952..0a054e2 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -25,7 +25,7 @@
 #include "globals.h"
 #include "gtest/gtest.h"
 #include "heap_bitmap.h"
-#include "mutex.h"
+#include "locks.h"
 #include "offsets.h"
 #include "safe_map.h"
 #include "timing_logger.h"
@@ -39,12 +39,13 @@
 
 class AllocSpace;
 class Class;
+class ConditionVariable;
 class HeapBitmap;
 class ImageSpace;
 class LargeObjectSpace;
 class MarkStack;
 class ModUnionTable;
-
+class Mutex;
 class Object;
 class Space;
 class SpaceTest;
@@ -122,7 +123,7 @@
 
   // Does a concurrent GC, should only be called by the GC daemon thread
   // through runtime.
-  void ConcurrentGC();
+  void ConcurrentGC(Thread* self);
 
   // Implements java.lang.Runtime.maxMemory.
   int64_t GetMaxMemory();
@@ -159,7 +160,7 @@
 
   // Blocks the caller until the garbage collector becomes idle and returns
   // true if we waited for the GC to complete.
-  GcType WaitForConcurrentGcToComplete();
+  GcType WaitForConcurrentGcToComplete(Thread* self);
 
   const Spaces& GetSpaces() {
     return spaces_;
@@ -247,7 +248,7 @@
 
   void DumpForSigQuit(std::ostream& os);
 
-  void Trim();
+  void Trim(Thread* self);
 
   HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     return live_bitmap_.get();
@@ -303,7 +304,7 @@
   void RequestConcurrentGC();
 
   // Swap bitmaps (if we are a full Gc then we swap the zygote bitmap too).
-  void SwapBitmaps() EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
+  void SwapBitmaps(Thread* self) EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   void RecordAllocation(size_t size, const Object* object)
       LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_);
@@ -315,10 +316,11 @@
                      Locks::heap_bitmap_lock_,
                      Locks::mutator_lock_,
                      Locks::thread_suspend_count_lock_);
-  void CollectGarbageMarkSweepPlan(GcType gc_plan, bool clear_soft_references)
+  void CollectGarbageMarkSweepPlan(Thread* self, GcType gc_plan, bool clear_soft_references)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_,
                      Locks::mutator_lock_);
-  void CollectGarbageConcurrentMarkSweepPlan(GcType gc_plan, bool clear_soft_references)
+  void CollectGarbageConcurrentMarkSweepPlan(Thread* self, GcType gc_plan,
+                                             bool clear_soft_references)
       LOCKS_EXCLUDED(Locks::heap_bitmap_lock_,
                      Locks::mutator_lock_);
 
diff --git a/src/jdwp/jdwp_event.cc b/src/jdwp/jdwp_event.cc
index 4f11a65..19f10eb 100644
--- a/src/jdwp/jdwp_event.cc
+++ b/src/jdwp/jdwp_event.cc
@@ -585,7 +585,8 @@
   bool waited = false;
 
   /* this is held for very brief periods; contention is unlikely */
-  MutexLock mu(event_thread_lock_);
+  Thread* self = Thread::Current();
+  MutexLock mu(self, event_thread_lock_);
 
   /*
    * If another thread is already doing stuff, wait for it.  This can
@@ -594,7 +595,7 @@
   while (event_thread_id_ != 0) {
     VLOG(jdwp) << StringPrintf("event in progress (%#llx), %#llx sleeping", event_thread_id_, threadId);
     waited = true;
-    event_thread_cond_.Wait(event_thread_lock_);
+    event_thread_cond_.Wait(self, event_thread_lock_);
   }
 
   if (waited || threadId != 0) {
@@ -1075,7 +1076,7 @@
   Thread* self = Thread::Current();
   bool safe_to_release_mutator_lock_over_send;
   for (size_t i=0; i < kMutatorLock; ++i) {
-    if (self->GetHeldMutex(static_cast<MutexLevel>(i)) != NULL) {
+    if (self->GetHeldMutex(static_cast<LockLevel>(i)) != NULL) {
       safe_to_release_mutator_lock_over_send = false;
       break;
     }
diff --git a/src/jdwp/jdwp_main.cc b/src/jdwp/jdwp_main.cc
index 4fec005..a09c488 100644
--- a/src/jdwp/jdwp_main.cc
+++ b/src/jdwp/jdwp_main.cc
@@ -118,7 +118,8 @@
  * the thread is accepting network connections.
  */
 JdwpState* JdwpState::Create(const JdwpOptions* options) {
-  Locks::mutator_lock_->AssertNotHeld();
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
   UniquePtr<JdwpState> state(new JdwpState(options));
   switch (options->transport) {
   case kJdwpTransportSocket:
@@ -157,7 +158,7 @@
        * Wait until the thread finishes basic initialization.
        * TODO: cond vars should be waited upon in a loop
        */
-      state->thread_start_cond_.Wait(state->thread_start_lock_);
+      state->thread_start_cond_.Wait(self, state->thread_start_lock_);
     } else {
       {
         MutexLock attach_locker(state->attach_lock_);
@@ -171,7 +172,7 @@
          * Wait until the thread finishes basic initialization.
          * TODO: cond vars should be waited upon in a loop
          */
-        state->thread_start_cond_.Wait(state->thread_start_lock_);
+        state->thread_start_cond_.Wait(self, state->thread_start_lock_);
 
         /*
          * For suspend=y, wait for the debugger to connect to us or for us to
@@ -182,8 +183,8 @@
          * when we wake up.
          */
         {
-          ScopedThreadStateChange tsc(Thread::Current(), kWaitingForDebuggerToAttach);
-          state->attach_cond_.Wait(state->attach_lock_);
+          ScopedThreadStateChange tsc(self, kWaitingForDebuggerToAttach);
+          state->attach_cond_.Wait(self, state->attach_lock_);
         }
       }
       if (!state->IsActive()) {
@@ -294,14 +295,15 @@
   thread_ = Thread::Current();
   run = true;
 
-  thread_start_lock_.Lock();
-  debug_thread_started_ = true;
-  thread_start_cond_.Broadcast();
-  thread_start_lock_.Unlock();
+  {
+    MutexLock locker(thread_, thread_start_lock_);
+    debug_thread_started_ = true;
+    thread_start_cond_.Broadcast();
+  }
 
   /* set the thread state to kWaitingInMainDebuggerLoop so GCs don't wait for us */
   {
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
+    MutexLock mu(thread_, *Locks::thread_suspend_count_lock_);
     CHECK_EQ(thread_->GetState(), kNative);
     thread_->SetState(kWaitingInMainDebuggerLoop);
   }
@@ -346,7 +348,7 @@
     while (!Dbg::IsDisposed()) {
       {
         // sanity check -- shouldn't happen?
-        MutexLock mu(*Locks::thread_suspend_count_lock_);
+        MutexLock mu(thread_, *Locks::thread_suspend_count_lock_);
         CHECK_EQ(thread_->GetState(), kWaitingInMainDebuggerLoop);
       }
 
@@ -366,7 +368,7 @@
         }
 
         /* wake anybody who's waiting for us */
-        MutexLock mu(attach_lock_);
+        MutexLock mu(thread_, attach_lock_);
         attach_cond_.Broadcast();
       }
     }
@@ -400,11 +402,8 @@
   }
 
   /* back to native, for thread shutdown */
-  {
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
-    CHECK_EQ(thread_->GetState(), kWaitingInMainDebuggerLoop);
-    thread_->SetState(kNative);
-  }
+  CHECK_EQ(thread_->GetState(), kWaitingInMainDebuggerLoop);
+  thread_->SetState(kNative);
 
   VLOG(jdwp) << "JDWP: thread detaching and exiting...";
   runtime->DetachCurrentThread();
diff --git a/src/jni_compiler_test.cc b/src/jni_compiler_test.cc
index 31b14c9..81941c0 100644
--- a/src/jni_compiler_test.cc
+++ b/src/jni_compiler_test.cc
@@ -118,7 +118,7 @@
   {
     MutexLock mu(*Locks::thread_suspend_count_lock_);
     EXPECT_EQ(kNative, Thread::Current()->GetState());
-    Locks::mutator_lock_->AssertNotHeld();
+    Locks::mutator_lock_->AssertNotHeld(Thread::Current());
   }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
diff --git a/src/jni_internal.cc b/src/jni_internal.cc
index 77066c4..0f93461 100644
--- a/src/jni_internal.cc
+++ b/src/jni_internal.cc
@@ -527,7 +527,7 @@
       } else {
         while (jni_on_load_result_ == kPending) {
           VLOG(jni) << "[" << *self << " waiting for \"" << path_ << "\" " << "JNI_OnLoad...]";
-          jni_on_load_cond_.Wait(jni_on_load_lock_);
+          jni_on_load_cond_.Wait(self, jni_on_load_lock_);
         }
 
         okay = (jni_on_load_result_ == kOkay);
diff --git a/src/locks.cc b/src/locks.cc
new file mode 100644
index 0000000..20bf81c
--- /dev/null
+++ b/src/locks.cc
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "locks.h"
+
+#include "mutex.h"
+
+namespace art {
+
+ReaderWriterMutex* Locks::mutator_lock_ = NULL;
+Mutex* Locks::thread_list_lock_ = NULL;
+Mutex* Locks::classlinker_classes_lock_ = NULL;
+ReaderWriterMutex* Locks::heap_bitmap_lock_ = NULL;
+Mutex* Locks::abort_lock_ = NULL;
+Mutex* Locks::logging_lock_ = NULL;
+Mutex* Locks::unexpected_signal_lock_ = NULL;
+Mutex* Locks::thread_suspend_count_lock_ = NULL;
+
+void Locks::Init() {
+  if (logging_lock_ != NULL) {
+    // Already initialized.
+    DCHECK(mutator_lock_ != NULL);
+    DCHECK(thread_list_lock_ != NULL);
+    DCHECK(classlinker_classes_lock_ != NULL);
+    DCHECK(heap_bitmap_lock_ != NULL);
+    DCHECK(abort_lock_ != NULL);
+    DCHECK(logging_lock_ != NULL);
+    DCHECK(unexpected_signal_lock_ != NULL);
+    DCHECK(thread_suspend_count_lock_ != NULL);
+  } else {
+    logging_lock_ = new Mutex("logging lock", kLoggingLock, true);
+    abort_lock_ = new Mutex("abort lock", kAbortLock, true);
+    DCHECK(mutator_lock_ == NULL);
+    mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock);
+    DCHECK(thread_list_lock_ == NULL);
+    thread_list_lock_ = new Mutex("thread list lock", kThreadListLock);
+    DCHECK(classlinker_classes_lock_ == NULL);
+    classlinker_classes_lock_ = new Mutex("ClassLinker classes lock", kClassLinkerClassesLock);
+    DCHECK(heap_bitmap_lock_ == NULL);
+    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock);
+    DCHECK(unexpected_signal_lock_ == NULL);
+    unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
+    DCHECK(thread_suspend_count_lock_ == NULL);
+    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock);
+  }
+}
+
+}  // namespace art
diff --git a/src/locks.h b/src/locks.h
new file mode 100644
index 0000000..c5821d8
--- /dev/null
+++ b/src/locks.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_SRC_LOCKS_H_
+#define ART_SRC_LOCKS_H_
+
+#include <ostream>
+
+#include "macros.h"
+
+namespace art {
+
+class LOCKABLE Mutex;
+class LOCKABLE ReaderWriterMutex;
+
+// LockLevel is used to impose a lock hierarchy [1] where acquisition of a Mutex at a higher or
+// equal level to a lock a thread holds is invalid. The lock hierarchy achieves a cycle free
+// partial ordering and thereby cause deadlock situations to fail checks.
+//
+// [1] http://www.drdobbs.com/parallel/use-lock-hierarchies-to-avoid-deadlock/204801163
+enum LockLevel {
+  kLoggingLock = 0,
+  kUnexpectedSignalLock = 1,
+  kThreadSuspendCountLock = 2,
+  kAbortLock = 3,
+  kDefaultMutexLevel = 4,
+  kJdwpSerialLock = 5,
+  kAllocSpaceLock = 6,
+  kLoadLibraryLock = 7,
+  kClassLinkerClassesLock = 8,
+  kThreadListLock = 9,
+  kHeapBitmapLock = 10,
+  kMonitorLock = 11,
+  kMutatorLock = 12,
+  kZygoteCreationLock = 13,
+  kMaxMutexLevel = kMutatorLock,
+};
+std::ostream& operator<<(std::ostream& os, const LockLevel& rhs);
+
+// Global mutexes corresponding to the levels above.
+class Locks {
+ public:
+  static void Init();
+
+  // The mutator_lock_ is used to allow mutators to execute in a shared (reader) mode or to block
+  // mutators by having an exclusive (writer) owner. In normal execution each mutator thread holds
+  // a share on the mutator_lock_. The garbage collector may also execute with shared access but
+  // at times requires exclusive access to the heap (not to be confused with the heap meta-data
+  // guarded by the heap_lock_ below). When the garbage collector requires exclusive access it asks
+  // the mutators to suspend themselves which also involves usage of the thread_suspend_count_lock_
+  // to cover weaknesses in using ReaderWriterMutexes with ConditionVariables. We use a condition
+  // variable to wait upon in the suspension logic as releasing and then re-acquiring a share on
+  // the mutator lock doesn't necessarily allow the exclusive user (e.g the garbage collector)
+  // chance to acquire the lock.
+  //
+  // Thread suspension:
+  // Shared users                                  | Exclusive user
+  // (holding mutator lock and in kRunnable state) |   .. running ..
+  //   .. running ..                               | Request thread suspension by:
+  //   .. running ..                               |   - acquiring thread_suspend_count_lock_
+  //   .. running ..                               |   - incrementing Thread::suspend_count_ on
+  //   .. running ..                               |     all mutator threads
+  //   .. running ..                               |   - releasing thread_suspend_count_lock_
+  //   .. running ..                               | Block trying to acquire exclusive mutator lock
+  // Poll Thread::suspend_count_ and enter full    |   .. blocked ..
+  // suspend code.                                 |   .. blocked ..
+  // Change state to kSuspended                    |   .. blocked ..
+  // x: Release share on mutator_lock_             | Carry out exclusive access
+  // Acquire thread_suspend_count_lock_            |   .. exclusive ..
+  // while Thread::suspend_count_ > 0              |   .. exclusive ..
+  //   - wait on Thread::resume_cond_              |   .. exclusive ..
+  //     (releases thread_suspend_count_lock_)     |   .. exclusive ..
+  //   .. waiting ..                               | Release mutator_lock_
+  //   .. waiting ..                               | Request thread resumption by:
+  //   .. waiting ..                               |   - acquiring thread_suspend_count_lock_
+  //   .. waiting ..                               |   - decrementing Thread::suspend_count_ on
+  //   .. waiting ..                               |     all mutator threads
+  //   .. waiting ..                               |   - notifying on Thread::resume_cond_
+  //    - re-acquire thread_suspend_count_lock_    |   - releasing thread_suspend_count_lock_
+  // Release thread_suspend_count_lock_            |  .. running ..
+  // Acquire share on mutator_lock_                |  .. running ..
+  //  - This could block but the thread still      |  .. running ..
+  //    has a state of kSuspended and so this      |  .. running ..
+  //    isn't an issue.                            |  .. running ..
+  // Acquire thread_suspend_count_lock_            |  .. running ..
+  //  - we poll here as we're transitioning into   |  .. running ..
+  //    kRunnable and an individual thread suspend |  .. running ..
+  //    request (e.g for debugging) won't try      |  .. running ..
+  //    to acquire the mutator lock (which would   |  .. running ..
+  //    block as we hold the mutator lock). This   |  .. running ..
+  //    poll ensures that if the suspender thought |  .. running ..
+  //    we were suspended by incrementing our      |  .. running ..
+  //    Thread::suspend_count_ and then reading    |  .. running ..
+  //    our state we go back to waiting on         |  .. running ..
+  //    Thread::resume_cond_.                      |  .. running ..
+  // can_go_runnable = Thread::suspend_count_ == 0 |  .. running ..
+  // Release thread_suspend_count_lock_            |  .. running ..
+  // if can_go_runnable                            |  .. running ..
+  //   Change state to kRunnable                   |  .. running ..
+  // else                                          |  .. running ..
+  //   Goto x                                      |  .. running ..
+  //  .. running ..                                |  .. running ..
+  static ReaderWriterMutex* mutator_lock_;
+
+  // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap.
+  static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_);
+
+  // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
+  // attaching and detaching.
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
+
+  // Guards lists of classes within the class linker.
+  static Mutex* classlinker_classes_lock_ ACQUIRED_AFTER(thread_list_lock_);
+
+  // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
+  // doesn't try to hold a higher level Mutex.
+  #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(classlinker_classes_lock_)
+
+  // Have an exclusive aborting thread.
+  static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+
+  // Allow mutual exclusion when manipulating Thread::suspend_count_.
+  // TODO: Does the trade-off of a per-thread lock make sense?
+  static Mutex* thread_suspend_count_lock_ ACQUIRED_AFTER(abort_lock_);
+
+  // One unexpected signal at a time lock.
+  static Mutex* unexpected_signal_lock_ ACQUIRED_AFTER(thread_suspend_count_lock_);
+
+  // Have an exclusive logging thread.
+  static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_);
+};
+
+}  // namespace art
+
+#endif  // ART_SRC_LOCKS_H_
diff --git a/src/logging.cc b/src/logging.cc
index a0c07cf..48785c5 100644
--- a/src/logging.cc
+++ b/src/logging.cc
@@ -16,6 +16,7 @@
 
 #include "logging.h"
 
+#include "mutex.h"
 #include "runtime.h"
 #include "thread.h"
 #include "utils.h"
diff --git a/src/mark_sweep.cc b/src/mark_sweep.cc
index 2c280a2..cdb73db 100644
--- a/src/mark_sweep.cc
+++ b/src/mark_sweep.cc
@@ -25,10 +25,12 @@
 #include "heap.h"
 #include "indirect_reference_table.h"
 #include "intern_table.h"
+#include "jni_internal.h"
 #include "logging.h"
 #include "macros.h"
 #include "mark_stack.h"
 #include "monitor.h"
+#include "mutex.h"
 #include "object.h"
 #include "runtime.h"
 #include "space.h"
@@ -456,7 +458,7 @@
 };
 
 void MarkSweep::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
-  Locks::heap_bitmap_lock_->AssertExclusiveHeld();
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
 
   size_t freed_objects = num_ptrs;
   size_t freed_bytes = 0;
@@ -490,7 +492,7 @@
 }
 
 void MarkSweep::ZygoteSweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
-  Locks::heap_bitmap_lock_->AssertExclusiveHeld();
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
 
   SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
   Heap* heap = context->mark_sweep->GetHeap();
diff --git a/src/monitor.cc b/src/monitor.cc
index 6a18a90..c0484af 100644
--- a/src/monitor.cc
+++ b/src/monitor.cc
@@ -124,7 +124,7 @@
       wait_set_(NULL),
       locking_method_(NULL),
       locking_dex_pc_(0) {
-  monitor_lock_.Lock();
+  monitor_lock_.Lock(owner);
   // Propagate the lock state.
   uint32_t thin = *obj->GetRawLockWordAddress();
   lock_count_ = LW_LOCK_COUNT(thin);
@@ -201,7 +201,7 @@
     return;
   }
 
-  if (!monitor_lock_.TryLock()) {
+  if (!monitor_lock_.TryLock(self)) {
     uint64_t waitStart = 0;
     uint64_t waitEnd = 0;
     uint32_t wait_threshold = lock_profiling_threshold_;
@@ -215,7 +215,7 @@
       current_locking_method = locking_method_;
       current_locking_dex_pc = locking_dex_pc_;
 
-      monitor_lock_.Lock();
+      monitor_lock_.Lock(self);
       if (wait_threshold != 0) {
         waitEnd = NanoTime() / 1000;
       }
@@ -343,7 +343,7 @@
       owner_ = NULL;
       locking_method_ = NULL;
       locking_dex_pc_ = 0;
-      monitor_lock_.Unlock();
+      monitor_lock_.Unlock(self);
     } else {
       --lock_count_;
     }
@@ -353,7 +353,7 @@
     DCHECK(owner == NULL);
     DCHECK(locking_method_ == NULL);
     DCHECK_EQ(locking_dex_pc_, 0u);
-    monitor_lock_.Unlock();
+    monitor_lock_.Unlock(self);
   } else {
     // We don't own this, so we're not allowed to unlock it.
     // The JNI spec says that we should throw IllegalMonitorStateException
@@ -496,9 +496,9 @@
     } else {
       // Wait for a notification or a timeout to occur.
       if (!timed) {
-        self->wait_cond_->Wait(*self->wait_mutex_);
+        self->wait_cond_->Wait(self, *self->wait_mutex_);
       } else {
-        self->wait_cond_->TimedWait(*self->wait_mutex_, ts);
+        self->wait_cond_->TimedWait(self, *self->wait_mutex_, ts);
       }
       if (self->interrupted_) {
         wasInterrupted = true;
@@ -515,7 +515,7 @@
   Lock(self);
 
 
-  self->wait_mutex_->AssertNotHeld();
+  self->wait_mutex_->AssertNotHeld(self);
 
   /*
    * We remove our thread from wait set after restoring the count
diff --git a/src/mutex.cc b/src/mutex.cc
index c066bec..ccb913f 100644
--- a/src/mutex.cc
+++ b/src/mutex.cc
@@ -18,6 +18,9 @@
 
 #include <errno.h>
 
+#include "cutils/atomic.h"
+#include "cutils/atomic-inline.h"
+#include "linux/futex.h"
 #include "logging.h"
 #include "runtime.h"
 #include "thread.h"
@@ -33,6 +36,16 @@
 extern int pthread_mutex_unlock(pthread_mutex_t* mutex) UNLOCK_FUNCTION(1);
 extern int pthread_mutex_trylock(pthread_mutex_t* mutex) EXCLUSIVE_TRYLOCK_FUNCTION(0, mutex);
 
+#if ART_USE_FUTEXES
+#include "sys/syscall.h"
+#ifndef SYS_futex
+#define SYS_futex __NR_futex
+#endif
+int futex(volatile int *uaddr, int op, int val, const struct timespec *timeout, int *, int ) {
+  return syscall(SYS_futex, uaddr, op, val, timeout, NULL, NULL);
+}
+#endif  // ART_USE_FUTEXES
+
 namespace art {
 
 // This works on Mac OS 10.6 but hasn't been tested on older releases.
@@ -75,47 +88,9 @@
   // ...other stuff we don't care about.
 };
 
-ReaderWriterMutex* Locks::mutator_lock_ = NULL;
-Mutex* Locks::thread_list_lock_ = NULL;
-Mutex* Locks::classlinker_classes_lock_ = NULL;
-ReaderWriterMutex* Locks::heap_bitmap_lock_ = NULL;
-Mutex* Locks::abort_lock_ = NULL;
-Mutex* Locks::logging_lock_ = NULL;
-Mutex* Locks::unexpected_signal_lock_ = NULL;
-Mutex* Locks::thread_suspend_count_lock_ = NULL;
+BaseMutex::BaseMutex(const char* name, LockLevel level) : level_(level), name_(name) {}
 
-void Locks::Init() {
-  if (logging_lock_ != NULL) {
-    // Already initialized.
-    DCHECK(mutator_lock_ != NULL);
-    DCHECK(thread_list_lock_ != NULL);
-    DCHECK(classlinker_classes_lock_ != NULL);
-    DCHECK(heap_bitmap_lock_ != NULL);
-    DCHECK(abort_lock_ != NULL);
-    DCHECK(logging_lock_ != NULL);
-    DCHECK(unexpected_signal_lock_ != NULL);
-    DCHECK(thread_suspend_count_lock_ != NULL);
-  } else {
-    logging_lock_ = new Mutex("logging lock", kLoggingLock, true);
-    abort_lock_ = new Mutex("abort lock", kAbortLock, true);
-    DCHECK(mutator_lock_ == NULL);
-    mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock);
-    DCHECK(thread_list_lock_ == NULL);
-    thread_list_lock_ = new Mutex("thread list lock", kThreadListLock);
-    DCHECK(classlinker_classes_lock_ == NULL);
-    classlinker_classes_lock_ = new Mutex("ClassLinker classes lock", kClassLinkerClassesLock);
-    DCHECK(heap_bitmap_lock_ == NULL);
-    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock);
-    DCHECK(unexpected_signal_lock_ == NULL);
-    unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
-    DCHECK(thread_suspend_count_lock_ == NULL);
-    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock);
-  }
-}
-
-BaseMutex::BaseMutex(const char* name, MutexLevel level) : level_(level), name_(name) {}
-
-static void CheckUnattachedThread(MutexLevel level) {
+static void CheckUnattachedThread(LockLevel level) {
   // The check below enumerates the cases where we expect not to be able to sanity check locks
   // on a thread. TODO: tighten this check.
   if (kDebugLocking) {
@@ -126,9 +101,8 @@
   }
 }
 
-void BaseMutex::RegisterAsLockedWithCurrentThread() {
-  Thread* self = Thread::Current();
-  if (self == NULL) {
+void BaseMutex::RegisterAsLocked(Thread* self) {
+  if (UNLIKELY(self == NULL)) {
     CheckUnattachedThread(level_);
     return;
   }
@@ -136,7 +110,7 @@
     // Check if a bad Mutex of this level or lower is held.
     bool bad_mutexes_held = false;
     for (int i = level_; i >= 0; --i) {
-      BaseMutex* held_mutex = self->GetHeldMutex(static_cast<MutexLevel>(i));
+      BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
       if (UNLIKELY(held_mutex != NULL)) {
         LOG(ERROR) << "Lock level violation: holding \"" << held_mutex->name_ << "\" (level " << i
             << ") while locking \"" << name_ << "\" (level " << static_cast<int>(level_) << ")";
@@ -155,9 +129,8 @@
   }
 }
 
-void BaseMutex::RegisterAsUnlockedWithCurrentThread() {
-  Thread* self = Thread::Current();
-  if (self == NULL) {
+void BaseMutex::RegisterAsUnlocked(Thread* self) {
+  if (UNLIKELY(self == NULL)) {
     CheckUnattachedThread(level_);
     return;
   }
@@ -169,8 +142,7 @@
   }
 }
 
-void BaseMutex::CheckSafeToWait() {
-  Thread* self = Thread::Current();
+void BaseMutex::CheckSafeToWait(Thread* self) {
   if (self == NULL) {
     CheckUnattachedThread(level_);
     return;
@@ -180,7 +152,7 @@
     bool bad_mutexes_held = false;
     for (int i = kMaxMutexLevel; i >= 0; --i) {
       if (i != level_) {
-        BaseMutex* held_mutex = self->GetHeldMutex(static_cast<MutexLevel>(i));
+        BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
         if (held_mutex != NULL) {
           LOG(ERROR) << "Holding " << held_mutex->name_ << " (level " << i
               << ") while performing wait on: "
@@ -193,7 +165,7 @@
   }
 }
 
-Mutex::Mutex(const char* name, MutexLevel level, bool recursive)
+Mutex::Mutex(const char* name, LockLevel level, bool recursive)
     : BaseMutex(name, level), recursive_(recursive), recursion_count_(0) {
 #if defined(__BIONIC__) || defined(__APPLE__)
   // Use recursive mutexes for bionic and Apple otherwise the
@@ -220,27 +192,27 @@
   }
 }
 
-void Mutex::ExclusiveLock() {
+void Mutex::ExclusiveLock(Thread* self) {
   if (kDebugLocking && !recursive_) {
-    AssertNotHeld();
+    AssertNotHeld(self);
   }
-  if (!recursive_ || !IsExclusiveHeld()) {
+  if (!recursive_ || !IsExclusiveHeld(self)) {
     CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
-    RegisterAsLockedWithCurrentThread();
+    RegisterAsLocked(self);
   }
   recursion_count_++;
   if (kDebugLocking) {
     CHECK(recursion_count_ == 1 || recursive_) << "Unexpected recursion count on mutex: "
         << name_ << " " << recursion_count_;
-    AssertHeld();
+    AssertHeld(self);
   }
 }
 
-bool Mutex::ExclusiveTryLock() {
+bool Mutex::ExclusiveTryLock(Thread* self) {
   if (kDebugLocking && !recursive_) {
-    AssertNotHeld();
+    AssertNotHeld(self);
   }
-  if (!recursive_ || !IsExclusiveHeld()) {
+  if (!recursive_ || !IsExclusiveHeld(self)) {
     int result = pthread_mutex_trylock(&mutex_);
     if (result == EBUSY) {
       return false;
@@ -249,32 +221,31 @@
       errno = result;
       PLOG(FATAL) << "pthread_mutex_trylock failed for " << name_;
     }
-    RegisterAsLockedWithCurrentThread();
+    RegisterAsLocked(self);
   }
   recursion_count_++;
   if (kDebugLocking) {
     CHECK(recursion_count_ == 1 || recursive_) << "Unexpected recursion count on mutex: "
         << name_ << " " << recursion_count_;
-    AssertHeld();
+    AssertHeld(self);
   }
   return true;
 }
 
-void Mutex::ExclusiveUnlock() {
-  AssertHeld();
+void Mutex::ExclusiveUnlock(Thread* self) {
+  AssertHeld(self);
   recursion_count_--;
   if (!recursive_ || recursion_count_ == 0) {
     if (kDebugLocking) {
       CHECK(recursion_count_ == 0 || recursive_) << "Unexpected recursion count on mutex: "
           << name_ << " " << recursion_count_;
     }
-    RegisterAsUnlockedWithCurrentThread();
+    RegisterAsUnlocked(self);
     CHECK_MUTEX_CALL(pthread_mutex_unlock, (&mutex_));
   }
 }
 
-bool Mutex::IsExclusiveHeld() const {
-  Thread* self = Thread::Current();
+bool Mutex::IsExclusiveHeld(const Thread* self) const {
   bool result;
   if (self == NULL || level_ == kMonitorLock) {  // Handle unattached threads and monitors.
     result = (GetExclusiveOwnerTid() == static_cast<uint64_t>(GetTid()));
@@ -309,11 +280,24 @@
 #endif
 }
 
-ReaderWriterMutex::ReaderWriterMutex(const char* name, MutexLevel level) : BaseMutex(name, level) {
+ReaderWriterMutex::ReaderWriterMutex(const char* name, LockLevel level) :
+    BaseMutex(name, level)
+#if ART_USE_FUTEXES
+    , state_(0), exclusive_owner_(0), num_pending_readers_(0), num_pending_writers_(0)
+#endif
+{
+#if !ART_USE_FUTEXES
   CHECK_MUTEX_CALL(pthread_rwlock_init, (&rwlock_, NULL));
+#endif
 }
 
 ReaderWriterMutex::~ReaderWriterMutex() {
+#if ART_USE_FUTEXES
+  CHECK_EQ(state_, 0);
+  CHECK_EQ(exclusive_owner_, 0U);
+  CHECK_EQ(num_pending_readers_, 0);
+  CHECK_EQ(num_pending_writers_, 0);
+#else
   // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
   // may still be using locks.
   int rc = pthread_rwlock_destroy(&rwlock_);
@@ -323,23 +307,89 @@
     bool shutting_down = Runtime::Current()->IsShuttingDown();
     PLOG(shutting_down ? WARNING : FATAL) << "pthread_mutex_destroy failed for " << name_;
   }
+#endif
 }
 
-void ReaderWriterMutex::ExclusiveLock() {
-  AssertNotExclusiveHeld();
+void ReaderWriterMutex::ExclusiveLock(Thread* self) {
+  AssertNotExclusiveHeld(self);
+#if ART_USE_FUTEXES
+  bool done = false;
+  do {
+    int32_t cur_state = state_;
+    if (cur_state == 0) {
+      // Change state from 0 to -1.
+      done = android_atomic_cmpxchg(0, -1, &state_) == 0;
+    } else {
+      // Failed to acquire, hang up.
+      android_atomic_inc(&num_pending_writers_);
+      if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
+        if (errno != EAGAIN) {
+          PLOG(FATAL) << "futex wait failed for " << name_;
+        }
+      }
+      android_atomic_dec(&num_pending_writers_);
+    }
+  } while(!done);
+  exclusive_owner_ = static_cast<uint64_t>(GetTid());
+#else
   CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
-  RegisterAsLockedWithCurrentThread();
-  AssertExclusiveHeld();
+#endif
+  RegisterAsLocked(self);
+  AssertExclusiveHeld(self);
 }
 
-void ReaderWriterMutex::ExclusiveUnlock() {
-  AssertExclusiveHeld();
-  RegisterAsUnlockedWithCurrentThread();
+void ReaderWriterMutex::ExclusiveUnlock(Thread* self) {
+  AssertExclusiveHeld(self);
+  RegisterAsUnlocked(self);
+#if ART_USE_FUTEXES
+  bool done = false;
+  do {
+    int32_t cur_state = state_;
+    if (cur_state == -1) {
+      // We're no longer the owner.
+      exclusive_owner_ = 0;
+      // Change state from -1 to 0.
+      done = android_atomic_cmpxchg(-1, 0, &state_) == 0;
+      if (done) { // cmpxchg may fail due to noise?
+        // Wake any waiters.
+        if (num_pending_readers_ > 0 || num_pending_writers_ > 0) {
+          futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
+        }
+      }
+    } else {
+      LOG(FATAL) << "Unexpected state_:" << cur_state << " for " << name_;
+    }
+  } while(!done);
+#else
   CHECK_MUTEX_CALL(pthread_rwlock_unlock, (&rwlock_));
+#endif
 }
 
 #if HAVE_TIMED_RWLOCK
-bool ReaderWriterMutex::ExclusiveLockWithTimeout(const timespec& abs_timeout) {
+bool ReaderWriterMutex::ExclusiveLockWithTimeout(Thread* self, const timespec& abs_timeout) {
+#if ART_USE_FUTEXES
+  bool done = false;
+  do {
+    int32_t cur_state = state_;
+    if (cur_state == 0) {
+      // Change state from 0 to -1.
+      done = android_atomic_cmpxchg(0, -1, &state_) == 0;
+    } else {
+      // Failed to acquire, hang up.
+      android_atomic_inc(&num_pending_writers_);
+      if (futex(&state_, FUTEX_WAIT, cur_state, &abs_timeout, NULL, 0) != 0) {
+        if (errno == ETIMEDOUT) {
+          android_atomic_dec(&num_pending_writers_);
+          return false;
+        } else if (errno != EAGAIN) {
+          PLOG(FATAL) << "timed futex wait failed for " << name_;
+        }
+      }
+      android_atomic_dec(&num_pending_writers_);
+    }
+  } while(!done);
+  exclusive_owner_ = static_cast<uint64_t>(GetTid());
+#else
   int result = pthread_rwlock_timedwrlock(&rwlock_, &abs_timeout);
   if (result == ETIMEDOUT) {
     return false;
@@ -348,19 +398,53 @@
     errno = result;
     PLOG(FATAL) << "pthread_rwlock_timedwrlock failed for " << name_;
   }
-  RegisterAsLockedWithCurrentThread();
-  AssertSharedHeld();
+#endif
+  RegisterAsLocked(self);
+  AssertSharedHeld(self);
   return true;
 }
 #endif
 
-void ReaderWriterMutex::SharedLock() {
+void ReaderWriterMutex::SharedLock(Thread* self) {
+#if ART_USE_FUTEXES
+  bool done = false;
+  do {
+    int32_t cur_state = state_;
+    if (cur_state >= 0) {
+      // Add as an extra reader.
+      done = android_atomic_cmpxchg(cur_state, cur_state + 1, &state_) == 0;
+    } else {
+      // Owner holds it exclusively, hang up.
+      android_atomic_inc(&num_pending_readers_);
+      if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
+        if (errno != EAGAIN) {
+          PLOG(FATAL) << "futex wait failed for " << name_;
+        }
+      }
+      android_atomic_dec(&num_pending_readers_);
+    }
+  } while(!done);
+#else
   CHECK_MUTEX_CALL(pthread_rwlock_rdlock, (&rwlock_));
-  RegisterAsLockedWithCurrentThread();
-  AssertSharedHeld();
+#endif
+  RegisterAsLocked(self);
+  AssertSharedHeld(self);
 }
 
-bool ReaderWriterMutex::SharedTryLock() {
+bool ReaderWriterMutex::SharedTryLock(Thread* self) {
+#if ART_USE_FUTEXES
+  bool done = false;
+  do {
+    int32_t cur_state = state_;
+    if (cur_state >= 0) {
+      // Add as an extra reader.
+      done = android_atomic_cmpxchg(cur_state, cur_state + 1, &state_) == 0;
+    } else {
+      // Owner holds it exclusively.
+      return false;
+    }
+  } while(!done);
+#else
   int result = pthread_rwlock_tryrdlock(&rwlock_);
   if (result == EBUSY) {
     return false;
@@ -369,32 +453,50 @@
     errno = result;
     PLOG(FATAL) << "pthread_mutex_trylock failed for " << name_;
   }
-  RegisterAsLockedWithCurrentThread();
-  AssertSharedHeld();
+#endif
+  RegisterAsLocked(self);
+  AssertSharedHeld(self);
   return true;
 }
 
-void ReaderWriterMutex::SharedUnlock() {
-  AssertSharedHeld();
-  RegisterAsUnlockedWithCurrentThread();
+void ReaderWriterMutex::SharedUnlock(Thread* self) {
+  AssertSharedHeld(self);
+  RegisterAsUnlocked(self);
+#if ART_USE_FUTEXES
+  bool done = false;
+  do {
+    int32_t cur_state = state_;
+    if (LIKELY(cur_state > 0)) {
+      // Reduce state by 1.
+      done = android_atomic_cmpxchg(cur_state, cur_state - 1, &state_) == 0;
+      if (done && (cur_state - 1) == 0) { // cmpxchg may fail due to noise?
+        if (num_pending_writers_ > 0 || num_pending_readers_ > 0) {
+          // Wake any exclusive waiters as there are now no readers.
+          futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
+        }
+      }
+    } else {
+      LOG(FATAL) << "Unexpected state_:" << cur_state << " for " << name_;
+    }
+  } while(!done);
+#else
   CHECK_MUTEX_CALL(pthread_rwlock_unlock, (&rwlock_));
+#endif
 }
 
-bool ReaderWriterMutex::IsExclusiveHeld() const {
+bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
   bool result = (GetExclusiveOwnerTid() == static_cast<uint64_t>(GetTid()));
   if (kDebugLocking) {
     // Sanity that if the pthread thinks we own the lock the Thread agrees.
-    Thread* self = Thread::Current();
     CHECK((self == NULL) || !result || (self->GetHeldMutex(level_) == this));
   }
   return result;
 }
 
-bool ReaderWriterMutex::IsSharedHeld() const {
-  Thread* self = Thread::Current();
+bool ReaderWriterMutex::IsSharedHeld(const Thread* self) const {
   bool result;
   if (UNLIKELY(self == NULL)) {  // Handle unattached threads.
-    result = IsExclusiveHeld(); // TODO: a better best effort here.
+    result = IsExclusiveHeld(self); // TODO: a better best effort here.
   } else {
     result = (self->GetHeldMutex(level_) == this);
   }
@@ -402,12 +504,16 @@
 }
 
 uint64_t ReaderWriterMutex::GetExclusiveOwnerTid() const {
+#if ART_USE_FUTEXES
+  return exclusive_owner_;
+#else
 #if defined(__BIONIC__)
   return rwlock_.writerThreadId;
 #elif defined(__GLIBC__)
   return reinterpret_cast<const glibc_pthread_rwlock_t*>(&rwlock_)->writer;
 #elif defined(__APPLE__)
-  const darwin_pthread_rwlock_t* dprwlock = reinterpret_cast<const darwin_pthread_rwlock_t*>(&rwlock_);
+  const darwin_pthread_rwlock_t*
+      dprwlock = reinterpret_cast<const darwin_pthread_rwlock_t*>(&rwlock_);
   pthread_t owner = dprwlock->darwin_pthread_rwlock_owner;
   if (owner == (pthread_t)0) {
     return 0;
@@ -418,6 +524,7 @@
 #else
 #error unsupported C library
 #endif
+#endif
 }
 
 ConditionVariable::ConditionVariable(const std::string& name) : name_(name) {
@@ -443,21 +550,21 @@
   CHECK_MUTEX_CALL(pthread_cond_signal, (&cond_));
 }
 
-void ConditionVariable::Wait(Mutex& mutex) {
-  mutex.CheckSafeToWait();
+void ConditionVariable::Wait(Thread* self, Mutex& mutex) {
+  mutex.CheckSafeToWait(self);
   unsigned int old_recursion_count = mutex.recursion_count_;
   mutex.recursion_count_ = 0;
   CHECK_MUTEX_CALL(pthread_cond_wait, (&cond_, &mutex.mutex_));
   mutex.recursion_count_ = old_recursion_count;
 }
 
-void ConditionVariable::TimedWait(Mutex& mutex, const timespec& ts) {
+void ConditionVariable::TimedWait(Thread* self, Mutex& mutex, const timespec& ts) {
 #ifdef HAVE_TIMEDWAIT_MONOTONIC
 #define TIMEDWAIT pthread_cond_timedwait_monotonic
 #else
 #define TIMEDWAIT pthread_cond_timedwait
 #endif
-  mutex.CheckSafeToWait();
+  mutex.CheckSafeToWait(self);
   unsigned int old_recursion_count = mutex.recursion_count_;
   mutex.recursion_count_ = 0;
   int rc = TIMEDWAIT(&cond_, &mutex.mutex_, &ts);
diff --git a/src/mutex.h b/src/mutex.h
index 85d75ab..af2b352 100644
--- a/src/mutex.h
+++ b/src/mutex.h
@@ -24,8 +24,11 @@
 #include <string>
 
 #include "globals.h"
-#include "logging.h"
+#include "locks.h"
 #include "macros.h"
+#include "thread.h"
+
+#define ART_USE_FUTEXES 0
 
 // Currently Darwin doesn't support locks with timeouts.
 #if !defined(__APPLE__)
@@ -38,126 +41,6 @@
 
 const bool kDebugLocking = kIsDebugBuild;
 
-class LOCKABLE Mutex;
-class LOCKABLE ReaderWriterMutex;
-
-// MutexLevel is used to impose a lock hierarchy [1] where acquisition of a Mutex at a higher or
-// equal level to a lock a thread holds is invalid. The lock hierarchy achieves a cycle free
-// partial ordering and thereby cause deadlock situations to fail checks.
-//
-// [1] http://www.drdobbs.com/parallel/use-lock-hierarchies-to-avoid-deadlock/204801163
-enum MutexLevel {
-  kLoggingLock = 0,
-  kUnexpectedSignalLock = 1,
-  kThreadSuspendCountLock = 2,
-  kAbortLock = 3,
-  kDefaultMutexLevel = 4,
-  kJdwpSerialLock = 5,
-  kAllocSpaceLock = 6,
-  kLoadLibraryLock = 7,
-  kClassLinkerClassesLock = 8,
-  kThreadListLock = 9,
-  kHeapBitmapLock = 10,
-  kMonitorLock = 11,
-  kMutatorLock = 12,
-  kZygoteCreationLock = 13,
-  kMaxMutexLevel = kMutatorLock,
-};
-std::ostream& operator<<(std::ostream& os, const MutexLevel& rhs);
-
-// Global mutexes corresponding to the levels above.
-class Locks {
- public:
-  static void Init();
-
-  // The mutator_lock_ is used to allow mutators to execute in a shared (reader) mode or to block
-  // mutators by having an exclusive (writer) owner. In normal execution each mutator thread holds
-  // a share on the mutator_lock_. The garbage collector may also execute with shared access but
-  // at times requires exclusive access to the heap (not to be confused with the heap meta-data
-  // guarded by the heap_lock_ below). When the garbage collector requires exclusive access it asks
-  // the mutators to suspend themselves which also involves usage of the thread_suspend_count_lock_
-  // to cover weaknesses in using ReaderWriterMutexes with ConditionVariables. We use a condition
-  // variable to wait upon in the suspension logic as releasing and then re-acquiring a share on
-  // the mutator lock doesn't necessarily allow the exclusive user (e.g the garbage collector)
-  // chance to acquire the lock.
-  //
-  // Thread suspension:
-  // Shared users                                  | Exclusive user
-  // (holding mutator lock and in kRunnable state) |   .. running ..
-  //   .. running ..                               | Request thread suspension by:
-  //   .. running ..                               |   - acquiring thread_suspend_count_lock_
-  //   .. running ..                               |   - incrementing Thread::suspend_count_ on
-  //   .. running ..                               |     all mutator threads
-  //   .. running ..                               |   - releasing thread_suspend_count_lock_
-  //   .. running ..                               | Block trying to acquire exclusive mutator lock
-  // Poll Thread::suspend_count_ and enter full    |   .. blocked ..
-  // suspend code.                                 |   .. blocked ..
-  // Change state to kSuspended                    |   .. blocked ..
-  // x: Release share on mutator_lock_             | Carry out exclusive access
-  // Acquire thread_suspend_count_lock_            |   .. exclusive ..
-  // while Thread::suspend_count_ > 0              |   .. exclusive ..
-  //   - wait on Thread::resume_cond_              |   .. exclusive ..
-  //     (releases thread_suspend_count_lock_)     |   .. exclusive ..
-  //   .. waiting ..                               | Release mutator_lock_
-  //   .. waiting ..                               | Request thread resumption by:
-  //   .. waiting ..                               |   - acquiring thread_suspend_count_lock_
-  //   .. waiting ..                               |   - decrementing Thread::suspend_count_ on
-  //   .. waiting ..                               |     all mutator threads
-  //   .. waiting ..                               |   - notifying on Thread::resume_cond_
-  //    - re-acquire thread_suspend_count_lock_    |   - releasing thread_suspend_count_lock_
-  // Release thread_suspend_count_lock_            |  .. running ..
-  // Acquire share on mutator_lock_                |  .. running ..
-  //  - This could block but the thread still      |  .. running ..
-  //    has a state of kSuspended and so this      |  .. running ..
-  //    isn't an issue.                            |  .. running ..
-  // Acquire thread_suspend_count_lock_            |  .. running ..
-  //  - we poll here as we're transitioning into   |  .. running ..
-  //    kRunnable and an individual thread suspend |  .. running ..
-  //    request (e.g for debugging) won't try      |  .. running ..
-  //    to acquire the mutator lock (which would   |  .. running ..
-  //    block as we hold the mutator lock). This   |  .. running ..
-  //    poll ensures that if the suspender thought |  .. running ..
-  //    we were suspended by incrementing our      |  .. running ..
-  //    Thread::suspend_count_ and then reading    |  .. running ..
-  //    our state we go back to waiting on         |  .. running ..
-  //    Thread::resume_cond_.                      |  .. running ..
-  // can_go_runnable = Thread::suspend_count_ == 0 |  .. running ..
-  // Release thread_suspend_count_lock_            |  .. running ..
-  // if can_go_runnable                            |  .. running ..
-  //   Change state to kRunnable                   |  .. running ..
-  // else                                          |  .. running ..
-  //   Goto x                                      |  .. running ..
-  //  .. running ..                                |  .. running ..
-  static ReaderWriterMutex* mutator_lock_;
-
-  // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap.
-  static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_);
-
-  // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
-  // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
-
-  // Guards lists of classes within the class linker.
-  static Mutex* classlinker_classes_lock_ ACQUIRED_AFTER(thread_list_lock_);
-
-  // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
-  // doesn't try to hold a higher level Mutex.
-  #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(classlinker_classes_lock_)
-
-  // Have an exclusive aborting thread.
-  static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
-
-  // Allow mutual exclusion when manipulating Thread::suspend_count_.
-  // TODO: Does the trade-off of a per-thread lock make sense?
-  static Mutex* thread_suspend_count_lock_ ACQUIRED_AFTER(abort_lock_);
-
-  // One unexpected signal at a time lock.
-  static Mutex* unexpected_signal_lock_ ACQUIRED_AFTER(thread_suspend_count_lock_);
-
-  // Have an exclusive logging thread.
-  static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_);
-};
-
 // Base class for all Mutex implementations
 class BaseMutex {
  public:
@@ -171,13 +54,13 @@
  protected:
   friend class ConditionVariable;
 
-  BaseMutex(const char* name, MutexLevel level);
+  BaseMutex(const char* name, LockLevel level);
   virtual ~BaseMutex() {}
-  void RegisterAsLockedWithCurrentThread();
-  void RegisterAsUnlockedWithCurrentThread();
-  void CheckSafeToWait();
+  void RegisterAsLocked(Thread* self);
+  void RegisterAsUnlocked(Thread* self);
+  void CheckSafeToWait(Thread* self);
 
-  const MutexLevel level_;  // Support for lock hierarchy.
+  const LockLevel level_;  // Support for lock hierarchy.
   const std::string name_;
 };
 
@@ -195,41 +78,42 @@
 //   an error. Being non-reentrant simplifies Waiting on ConditionVariables.
 class LOCKABLE Mutex : public BaseMutex {
  public:
-  explicit Mutex(const char* name, MutexLevel level = kDefaultMutexLevel, bool recursive = false);
+  explicit Mutex(const char* name, LockLevel level = kDefaultMutexLevel, bool recursive = false);
   ~Mutex();
 
   virtual bool IsMutex() const { return true; }
 
   // Block until mutex is free then acquire exclusive access.
-  void ExclusiveLock() EXCLUSIVE_LOCK_FUNCTION();
-  void Lock() EXCLUSIVE_LOCK_FUNCTION() {  ExclusiveLock(); }
+  void ExclusiveLock(Thread* self) EXCLUSIVE_LOCK_FUNCTION();
+  void Lock(Thread* self) EXCLUSIVE_LOCK_FUNCTION() {  ExclusiveLock(self); }
 
   // Returns true if acquires exclusive access, false otherwise.
-  bool ExclusiveTryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true);
-  bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true) { return ExclusiveTryLock(); }
+  bool ExclusiveTryLock(Thread* self) EXCLUSIVE_TRYLOCK_FUNCTION(true);
+  bool TryLock(Thread* self) EXCLUSIVE_TRYLOCK_FUNCTION(true) { return ExclusiveTryLock(self); }
 
   // Release exclusive access.
-  void ExclusiveUnlock() UNLOCK_FUNCTION();
-  void Unlock() UNLOCK_FUNCTION() {  ExclusiveUnlock(); }
+  void ExclusiveUnlock(Thread* self) UNLOCK_FUNCTION();
+  void Unlock(Thread* self) UNLOCK_FUNCTION() {  ExclusiveUnlock(self); }
 
   // Is the current thread the exclusive holder of the Mutex.
-  bool IsExclusiveHeld() const;
+  bool IsExclusiveHeld(const Thread* self) const;
 
   // Assert that the Mutex is exclusively held by the current thread.
-  void AssertExclusiveHeld() {
+  void AssertExclusiveHeld(const Thread* self) {
     if (kDebugLocking) {
-      CHECK(IsExclusiveHeld());
+      CHECK(IsExclusiveHeld(self));
     }
   }
-  void AssertHeld() { AssertExclusiveHeld(); }
+  void AssertHeld(const Thread* self) { AssertExclusiveHeld(self); }
+  void AssertHeld() { AssertExclusiveHeld(Thread::Current()); }
 
   // Assert that the Mutex is not held by the current thread.
-  void AssertNotHeldExclusive() {
+  void AssertNotHeldExclusive(const Thread* self) {
     if (kDebugLocking) {
-      CHECK(!IsExclusiveHeld());
+      CHECK(!IsExclusiveHeld(self));
     }
   }
-  void AssertNotHeld() { AssertNotHeldExclusive(); }
+  void AssertNotHeld(const Thread* self) { AssertNotHeldExclusive(self); }
 
   // Id associated with exclusive owner.
   uint64_t GetExclusiveOwnerTid() const;
@@ -266,79 +150,91 @@
 // * for large values of n the SharedLock may block.
 class LOCKABLE ReaderWriterMutex : public BaseMutex {
  public:
-  explicit ReaderWriterMutex(const char* name, MutexLevel level = kDefaultMutexLevel);
+  explicit ReaderWriterMutex(const char* name, LockLevel level = kDefaultMutexLevel);
   ~ReaderWriterMutex();
 
   virtual bool IsReaderWriterMutex() const { return true; }
 
   // Block until ReaderWriterMutex is free then acquire exclusive access.
-  void ExclusiveLock() EXCLUSIVE_LOCK_FUNCTION();
-  void WriterLock() EXCLUSIVE_LOCK_FUNCTION() {  ExclusiveLock(); }
+  void ExclusiveLock(Thread* self) EXCLUSIVE_LOCK_FUNCTION();
+  void WriterLock(Thread* self) EXCLUSIVE_LOCK_FUNCTION() {  ExclusiveLock(self); }
 
   // Release exclusive access.
-  void ExclusiveUnlock() UNLOCK_FUNCTION();
-  void WriterUnlock() UNLOCK_FUNCTION() {  ExclusiveUnlock(); }
+  void ExclusiveUnlock(Thread* self) UNLOCK_FUNCTION();
+  void WriterUnlock(Thread* self) UNLOCK_FUNCTION() {  ExclusiveUnlock(self); }
 
   // Block until ReaderWriterMutex is free and acquire exclusive access. Returns true on success
   // or false if timeout is reached.
 #if HAVE_TIMED_RWLOCK
-  bool ExclusiveLockWithTimeout(const timespec& abs_timeout) EXCLUSIVE_TRYLOCK_FUNCTION(true);
+  bool ExclusiveLockWithTimeout(Thread* self, const timespec& abs_timeout)
+      EXCLUSIVE_TRYLOCK_FUNCTION(true);
 #endif
 
   // Block until ReaderWriterMutex is shared or free then acquire a share on the access.
-  void SharedLock() SHARED_LOCK_FUNCTION();
-  void ReaderLock() SHARED_LOCK_FUNCTION() { SharedLock(); }
+  void SharedLock(Thread* self) SHARED_LOCK_FUNCTION();
+  void ReaderLock(Thread* self) SHARED_LOCK_FUNCTION() { SharedLock(self); }
 
   // Try to acquire share of ReaderWriterMutex.
-  bool SharedTryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true);
+  bool SharedTryLock(Thread* self) EXCLUSIVE_TRYLOCK_FUNCTION(true);
 
   // Release a share of the access.
-  void SharedUnlock() UNLOCK_FUNCTION();
-  void ReaderUnlock() UNLOCK_FUNCTION() { SharedUnlock(); }
+  void SharedUnlock(Thread* self) UNLOCK_FUNCTION();
+  void ReaderUnlock(Thread* self) UNLOCK_FUNCTION() { SharedUnlock(self); }
 
   // Is the current thread the exclusive holder of the ReaderWriterMutex.
-  bool IsExclusiveHeld() const;
+  bool IsExclusiveHeld(const Thread* self) const;
 
   // Assert the current thread has exclusive access to the ReaderWriterMutex.
-  void AssertExclusiveHeld() {
+  void AssertExclusiveHeld(const Thread* self) {
     if (kDebugLocking) {
-      CHECK(IsExclusiveHeld());
+      CHECK(IsExclusiveHeld(self));
     }
   }
-  void AssertWriterHeld() { AssertExclusiveHeld(); }
+  void AssertWriterHeld(const Thread* self) { AssertExclusiveHeld(self); }
 
   // Assert the current thread doesn't have exclusive access to the ReaderWriterMutex.
-  void AssertNotExclusiveHeld() {
+  void AssertNotExclusiveHeld(const Thread* self) {
     if (kDebugLocking) {
-      CHECK(!IsExclusiveHeld());
+      CHECK(!IsExclusiveHeld(self));
     }
   }
-  void AssertNotWriterHeld() { AssertNotExclusiveHeld(); }
+  void AssertNotWriterHeld(const Thread* self) { AssertNotExclusiveHeld(self); }
 
   // Is the current thread a shared holder of the ReaderWriterMutex.
-  bool IsSharedHeld() const;
+  bool IsSharedHeld(const Thread* self) const;
 
   // Assert the current thread has shared access to the ReaderWriterMutex.
-  void AssertSharedHeld() {
+  void AssertSharedHeld(const Thread* self) {
     if (kDebugLocking) {
-      CHECK(IsSharedHeld());
+      CHECK(IsSharedHeld(self));
     }
   }
-  void AssertReaderHeld() { AssertSharedHeld(); }
+  void AssertReaderHeld(const Thread* self) { AssertSharedHeld(self); }
 
   // Assert the current thread doesn't hold this ReaderWriterMutex either in shared or exclusive
   // mode.
-  void AssertNotHeld() {
+  void AssertNotHeld(const Thread* self) {
     if (kDebugLocking) {
-      CHECK(!IsSharedHeld());
+      CHECK(!IsSharedHeld(self));
     }
   }
 
   // Id associated with exclusive owner.
   uint64_t GetExclusiveOwnerTid() const;
- private:
-  pthread_rwlock_t rwlock_;
 
+ private:
+#if ART_USE_FUTEXES
+  // -1 implies held exclusive, +ve shared held by state_ many owners.
+  volatile int32_t state_;
+  // Exclusive owner.
+  volatile uint64_t exclusive_owner_;
+  // Pending readers.
+  volatile int32_t num_pending_readers_;
+  // Pending writers.
+  volatile int32_t num_pending_writers_;
+#else
+  pthread_rwlock_t rwlock_;
+#endif
   friend class MutexTester;
   DISALLOW_COPY_AND_ASSIGN(ReaderWriterMutex);
 };
@@ -352,8 +248,8 @@
 
   void Broadcast();
   void Signal();
-  void Wait(Mutex& mutex);
-  void TimedWait(Mutex& mutex, const timespec& ts);
+  void Wait(Thread* self, Mutex& mutex);
+  void TimedWait(Thread* self, Mutex& mutex, const timespec& ts);
 
  private:
   pthread_cond_t cond_;
@@ -365,15 +261,20 @@
 // upon destruction.
 class SCOPED_LOCKABLE MutexLock {
  public:
-  explicit MutexLock(Mutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(mu) {
-    mu_.ExclusiveLock();
+  explicit MutexLock(Thread* self, Mutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : self_(self), mu_(mu) {
+    mu_.ExclusiveLock(self_);
+  }
+
+  explicit MutexLock(Mutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : self_(Thread::Current()), mu_(mu) {
+    mu_.ExclusiveLock(self_);
   }
 
   ~MutexLock() UNLOCK_FUNCTION() {
-    mu_.ExclusiveUnlock();
+    mu_.ExclusiveUnlock(self_);
   }
 
  private:
+  Thread* const self_;
   Mutex& mu_;
   DISALLOW_COPY_AND_ASSIGN(MutexLock);
 };
@@ -384,15 +285,22 @@
 // construction and releases it upon destruction.
 class SCOPED_LOCKABLE ReaderMutexLock {
  public:
-  explicit ReaderMutexLock(ReaderWriterMutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(mu) {
-    mu_.SharedLock();
+  explicit ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) :
+      self_(self), mu_(mu) {
+    mu_.SharedLock(self_);
+  }
+
+  explicit ReaderMutexLock(ReaderWriterMutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) :
+      self_(Thread::Current()), mu_(mu) {
+    mu_.SharedLock(self_);
   }
 
   ~ReaderMutexLock() UNLOCK_FUNCTION() {
-    mu_.SharedUnlock();
+    mu_.SharedUnlock(self_);
   }
 
  private:
+  Thread* const self_;
   ReaderWriterMutex& mu_;
   DISALLOW_COPY_AND_ASSIGN(ReaderMutexLock);
 };
@@ -404,15 +312,22 @@
 // construction and releases it upon destruction.
 class SCOPED_LOCKABLE WriterMutexLock {
  public:
-  explicit WriterMutexLock(ReaderWriterMutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(mu) {
-    mu_.ExclusiveLock();
+  explicit WriterMutexLock(Thread* self, ReaderWriterMutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) :
+      self_(self), mu_(mu) {
+    mu_.ExclusiveLock(self_);
+  }
+
+  explicit WriterMutexLock(ReaderWriterMutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) :
+      self_(Thread::Current()), mu_(mu) {
+    mu_.ExclusiveLock(self_);
   }
 
   ~WriterMutexLock() UNLOCK_FUNCTION() {
-    mu_.ExclusiveUnlock();
+    mu_.ExclusiveUnlock(self_);
   }
 
  private:
+  Thread* self_;
   ReaderWriterMutex& mu_;
   DISALLOW_COPY_AND_ASSIGN(WriterMutexLock);
 };
@@ -420,27 +335,6 @@
 // "WriterMutexLock mu(lock)".
 #define WriterMutexLock(x) COMPILE_ASSERT(0, writer_mutex_lock_declaration_missing_variable_name)
 
-// Scoped unlocker/locker for a ReaderWriterMutex that releases read access to mu upon
-// construction and acquires it again upon destruction.
-class ReaderMutexUnlock {
- public:
-  explicit ReaderMutexUnlock(ReaderWriterMutex& mu) UNLOCK_FUNCTION(mu) : mu_(mu) {
-    mu_.SharedUnlock();
-  }
-
-  ~ReaderMutexUnlock() SHARED_LOCK_FUNCTION(mu_) {
-    mu_.SharedLock();
-  }
-
- private:
-  ReaderWriterMutex& mu_;
-  DISALLOW_COPY_AND_ASSIGN(ReaderMutexUnlock);
-};
-// Catch bug where variable name is omitted. "ReaderMutexUnlock (lock);" instead of
-// "ReaderMutexUnlock mu(lock)".
-#define ReaderMutexUnlock(x) \
-    COMPILE_ASSERT(0, reader_mutex_unlock_declaration_missing_variable_name)
-
 }  // namespace art
 
 #endif  // ART_SRC_MUTEX_H_
diff --git a/src/mutex_test.cc b/src/mutex_test.cc
index a998939..4dac3c6 100644
--- a/src/mutex_test.cc
+++ b/src/mutex_test.cc
@@ -28,9 +28,9 @@
 
     // This test is single-threaded, so we also know _who_ should hold the lock.
     if (expected_depth == 0) {
-      mu.AssertNotHeld();
+      mu.AssertNotHeld(Thread::Current());
     } else {
-      mu.AssertHeld();
+      mu.AssertHeld(Thread::Current());
     }
   }
 };
@@ -38,9 +38,9 @@
 TEST_F(MutexTest, LockUnlock) {
   Mutex mu("test mutex");
   MutexTester::AssertDepth(mu, 0U);
-  mu.Lock();
+  mu.Lock(Thread::Current());
   MutexTester::AssertDepth(mu, 1U);
-  mu.Unlock();
+  mu.Unlock(Thread::Current());
   MutexTester::AssertDepth(mu, 0U);
 }
 
@@ -48,9 +48,9 @@
 static void TryLockUnlockTest() NO_THREAD_SAFETY_ANALYSIS {
   Mutex mu("test mutex");
   MutexTester::AssertDepth(mu, 0U);
-  ASSERT_TRUE(mu.TryLock());
+  ASSERT_TRUE(mu.TryLock(Thread::Current()));
   MutexTester::AssertDepth(mu, 1U);
-  mu.Unlock();
+  mu.Unlock(Thread::Current());
   MutexTester::AssertDepth(mu, 0U);
 }
 
@@ -62,13 +62,13 @@
 static void RecursiveLockUnlockTest() NO_THREAD_SAFETY_ANALYSIS {
   Mutex mu("test mutex", kDefaultMutexLevel, true);
   MutexTester::AssertDepth(mu, 0U);
-  mu.Lock();
+  mu.Lock(Thread::Current());
   MutexTester::AssertDepth(mu, 1U);
-  mu.Lock();
+  mu.Lock(Thread::Current());
   MutexTester::AssertDepth(mu, 2U);
-  mu.Unlock();
+  mu.Unlock(Thread::Current());
   MutexTester::AssertDepth(mu, 1U);
-  mu.Unlock();
+  mu.Unlock(Thread::Current());
   MutexTester::AssertDepth(mu, 0U);
 }
 
@@ -80,13 +80,13 @@
 static void RecursiveTryLockUnlockTest() NO_THREAD_SAFETY_ANALYSIS {
   Mutex mu("test mutex", kDefaultMutexLevel, true);
   MutexTester::AssertDepth(mu, 0U);
-  ASSERT_TRUE(mu.TryLock());
+  ASSERT_TRUE(mu.TryLock(Thread::Current()));
   MutexTester::AssertDepth(mu, 1U);
-  ASSERT_TRUE(mu.TryLock());
+  ASSERT_TRUE(mu.TryLock(Thread::Current()));
   MutexTester::AssertDepth(mu, 2U);
-  mu.Unlock();
+  mu.Unlock(Thread::Current());
   MutexTester::AssertDepth(mu, 1U);
-  mu.Unlock();
+  mu.Unlock(Thread::Current());
   MutexTester::AssertDepth(mu, 0U);
 }
 
@@ -102,9 +102,9 @@
 
   static void* Callback(void* arg) {
     RecursiveLockWait* state = reinterpret_cast<RecursiveLockWait*>(arg);
-    state->mu.Lock();
+    state->mu.Lock(Thread::Current());
     state->cv.Signal();
-    state->mu.Unlock();
+    state->mu.Unlock(Thread::Current());
     return NULL;
   }
 
@@ -115,17 +115,17 @@
 // GCC has trouble with our mutex tests, so we have to turn off thread safety analysis.
 static void RecursiveLockWaitTest() NO_THREAD_SAFETY_ANALYSIS {
   RecursiveLockWait state;
-  state.mu.Lock();
-  state.mu.Lock();
+  state.mu.Lock(Thread::Current());
+  state.mu.Lock(Thread::Current());
 
   pthread_t pthread;
   int pthread_create_result = pthread_create(&pthread, NULL, RecursiveLockWait::Callback, &state);
   ASSERT_EQ(0, pthread_create_result);
 
-  state.cv.Wait(state.mu);
+  state.cv.Wait(Thread::Current(), state.mu);
 
-  state.mu.Unlock();
-  state.mu.Unlock();
+  state.mu.Unlock(Thread::Current());
+  state.mu.Unlock(Thread::Current());
 }
 
 // This ensures we don't hang when waiting on a recursively locked mutex,
@@ -136,33 +136,33 @@
 
 TEST_F(MutexTest, SharedLockUnlock) {
   ReaderWriterMutex mu("test rwmutex");
-  mu.AssertNotHeld();
-  mu.AssertNotExclusiveHeld();
-  mu.SharedLock();
-  mu.AssertSharedHeld();
-  mu.AssertNotExclusiveHeld();
-  mu.SharedUnlock();
-  mu.AssertNotHeld();
+  mu.AssertNotHeld(Thread::Current());
+  mu.AssertNotExclusiveHeld(Thread::Current());
+  mu.SharedLock(Thread::Current());
+  mu.AssertSharedHeld(Thread::Current());
+  mu.AssertNotExclusiveHeld(Thread::Current());
+  mu.SharedUnlock(Thread::Current());
+  mu.AssertNotHeld(Thread::Current());
 }
 
 TEST_F(MutexTest, ExclusiveLockUnlock) {
   ReaderWriterMutex mu("test rwmutex");
-  mu.AssertNotHeld();
-  mu.ExclusiveLock();
-  mu.AssertSharedHeld();
-  mu.AssertExclusiveHeld();
-  mu.ExclusiveUnlock();
-  mu.AssertNotHeld();
+  mu.AssertNotHeld(Thread::Current());
+  mu.ExclusiveLock(Thread::Current());
+  mu.AssertSharedHeld(Thread::Current());
+  mu.AssertExclusiveHeld(Thread::Current());
+  mu.ExclusiveUnlock(Thread::Current());
+  mu.AssertNotHeld(Thread::Current());
 }
 
 // GCC has trouble with our mutex tests, so we have to turn off thread safety analysis.
 static void SharedTryLockUnlockTest() NO_THREAD_SAFETY_ANALYSIS {
   ReaderWriterMutex mu("test rwmutex");
-  mu.AssertNotHeld();
-  ASSERT_TRUE(mu.SharedTryLock());
-  mu.AssertSharedHeld();
-  mu.SharedUnlock();
-  mu.AssertNotHeld();
+  mu.AssertNotHeld(Thread::Current());
+  ASSERT_TRUE(mu.SharedTryLock(Thread::Current()));
+  mu.AssertSharedHeld(Thread::Current());
+  mu.SharedUnlock(Thread::Current());
+  mu.AssertNotHeld(Thread::Current());
 }
 
 TEST_F(MutexTest, SharedTryLockUnlock) {
diff --git a/src/native/dalvik_system_VMRuntime.cc b/src/native/dalvik_system_VMRuntime.cc
index fae06f6..f37b237 100644
--- a/src/native/dalvik_system_VMRuntime.cc
+++ b/src/native/dalvik_system_VMRuntime.cc
@@ -154,14 +154,15 @@
   }
 }
 
-static void VMRuntime_trimHeap(JNIEnv*, jobject) {
+static void VMRuntime_trimHeap(JNIEnv* env, jobject) {
   // Trim the managed heap.
   Heap* heap = Runtime::Current()->GetHeap();
   uint64_t start_ns = NanoTime();
   AllocSpace* alloc_space = heap->GetAllocSpace();
   size_t alloc_space_size = alloc_space->Size();
   float utilization = static_cast<float>(heap->GetBytesAllocated()) / alloc_space_size;
-  heap->Trim();
+  Thread* self = static_cast<JNIEnvExt*>(env)->self;
+  heap->Trim(self);
   // Trim the native heap.
   dlmalloc_trim(0);
   dlmalloc_inspect_all(MspaceMadviseCallback, NULL);
@@ -170,8 +171,9 @@
             << " alloc space with " << static_cast<int>(100 * utilization) << "% utilization";
 }
 
-static void VMRuntime_concurrentGC(JNIEnv*, jobject) {
-  Runtime::Current()->GetHeap()->ConcurrentGC();
+static void VMRuntime_concurrentGC(JNIEnv* env, jobject) {
+  Thread* self = static_cast<JNIEnvExt*>(env)->self;
+  Runtime::Current()->GetHeap()->ConcurrentGC(self);
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/src/native/java_lang_Thread.cc b/src/native/java_lang_Thread.cc
index 2a6f177..edf55c3 100644
--- a/src/native/java_lang_Thread.cc
+++ b/src/native/java_lang_Thread.cc
@@ -98,7 +98,7 @@
 
 static void Thread_nativeInterrupt(JNIEnv* env, jobject java_thread) {
   ScopedObjectAccess soa(env);
-  MutexLock mu(*Locks::thread_list_lock_);
+  MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
   Thread* thread = Thread::FromManagedThread(soa, java_thread);
   if (thread != NULL) {
     thread->Interrupt();
diff --git a/src/oat/jni/arm/jni_internal_arm.cc b/src/oat/jni/arm/jni_internal_arm.cc
index 5220669..61f29af 100644
--- a/src/oat/jni/arm/jni_internal_arm.cc
+++ b/src/oat/jni/arm/jni_internal_arm.cc
@@ -21,6 +21,7 @@
 #include "asm_support.h"
 #include "compiled_method.h"
 #include "compiler.h"
+#include "jni_internal.h"
 #include "oat/utils/arm/assembler_arm.h"
 #include "oat/utils/assembler.h"
 #include "object.h"
diff --git a/src/oat/jni/mips/jni_internal_mips.cc b/src/oat/jni/mips/jni_internal_mips.cc
index 6021cc1..a1fc0bf 100644
--- a/src/oat/jni/mips/jni_internal_mips.cc
+++ b/src/oat/jni/mips/jni_internal_mips.cc
@@ -21,6 +21,7 @@
 #include "asm_support.h"
 #include "compiled_method.h"
 #include "compiler.h"
+#include "jni_internal.h"
 #include "oat/utils/mips/assembler_mips.h"
 #include "oat/utils/assembler.h"
 #include "object.h"
diff --git a/src/oat/jni/x86/jni_internal_x86.cc b/src/oat/jni/x86/jni_internal_x86.cc
index a9d4004..c34112b 100644
--- a/src/oat/jni/x86/jni_internal_x86.cc
+++ b/src/oat/jni/x86/jni_internal_x86.cc
@@ -16,6 +16,7 @@
 
 #include "compiled_method.h"
 #include "compiler.h"
+#include "jni_internal.h"
 #include "oat/utils/assembler.h"
 #include "oat/utils/x86/assembler_x86.h"
 #include "object.h"
diff --git a/src/oat/runtime/callee_save_frame.h b/src/oat/runtime/callee_save_frame.h
index 28bcda6..a8ebce8 100644
--- a/src/oat/runtime/callee_save_frame.h
+++ b/src/oat/runtime/callee_save_frame.h
@@ -17,6 +17,7 @@
 #ifndef ART_SRC_OAT_RUNTIME_CALLEE_SAVE_FRAME_H_
 #define ART_SRC_OAT_RUNTIME_CALLEE_SAVE_FRAME_H_
 
+#include "../src/mutex.h"
 #include "thread.h"
 
 namespace art {
@@ -24,10 +25,11 @@
 class AbstractMethod;
 
 // Place a special frame at the TOS that will save the callee saves for the given type.
-static void  FinishCalleeSaveFrameSetup(Thread* self, AbstractMethod** sp, Runtime::CalleeSaveType type)
+static void FinishCalleeSaveFrameSetup(Thread* self, AbstractMethod** sp,
+                                       Runtime::CalleeSaveType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Be aware the store below may well stomp on an incoming argument.
-  Locks::mutator_lock_->AssertSharedHeld();
+  Locks::mutator_lock_->AssertSharedHeld(self);
   *sp = Runtime::Current()->GetCalleeSaveMethod(type);
   self->SetTopOfStack(sp, 0);
   self->VerifyStack();
diff --git a/src/oat/runtime/support_jni.cc b/src/oat/runtime/support_jni.cc
index 6116d56..60bcf08 100644
--- a/src/oat/runtime/support_jni.cc
+++ b/src/oat/runtime/support_jni.cc
@@ -24,7 +24,7 @@
 
 // Used by the JNI dlsym stub to find the native method to invoke if none is registered.
 extern void* FindNativeMethod(Thread* self) LOCKS_EXCLUDED(Locks::mutator_lock_) {
-  Locks::mutator_lock_->AssertNotHeld();  // We come here as Native.
+  Locks::mutator_lock_->AssertNotHeld(self);  // We come here as Native.
   DCHECK(Thread::Current() == self);
   ScopedObjectAccess soa(self);
 
diff --git a/src/object.cc b/src/object.cc
index eb11469..284f221 100644
--- a/src/object.cc
+++ b/src/object.cc
@@ -582,7 +582,6 @@
 void AbstractMethod::Invoke(Thread* self, Object* receiver, JValue* args, JValue* result) const {
   if (kIsDebugBuild) {
     self->AssertThreadSuspensionIsAllowable();
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
     CHECK_EQ(kRunnable, self->GetState());
   }
 
diff --git a/src/reference_table.cc b/src/reference_table.cc
index 1f6cab7..19b6d42 100644
--- a/src/reference_table.cc
+++ b/src/reference_table.cc
@@ -17,6 +17,7 @@
 #include "reference_table.h"
 
 #include "indirect_reference_table.h"
+#include "mutex.h"
 
 #include "object.h"
 
@@ -63,7 +64,7 @@
   bool operator()(const Object* obj1, const Object* obj2)
     // TODO: enable analysis when analysis can work with the STL.
       NO_THREAD_SAFETY_ANALYSIS {
-    Locks::mutator_lock_->AssertSharedHeld();
+    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
     // Ensure null references and cleared jweaks appear at the end.
     if (obj1 == NULL) {
       return true;
diff --git a/src/runtime.cc b/src/runtime.cc
index b4dfcfe..2b9a28d 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -118,7 +118,7 @@
   }
 
   // Make sure to let the GC complete if it is running.
-  heap_->WaitForConcurrentGcToComplete();
+  heap_->WaitForConcurrentGcToComplete(Thread::Current());
 
   // Make sure our internal threads are dead before we start tearing down things they're using.
   Dbg::StopJdwp();
diff --git a/src/runtime_linux.cc b/src/runtime_linux.cc
index e503358..85eeb8f 100644
--- a/src/runtime_linux.cc
+++ b/src/runtime_linux.cc
@@ -21,6 +21,7 @@
 #include <sys/utsname.h>
 
 #include "logging.h"
+#include "mutex.h"
 #include "stringprintf.h"
 #include "utils.h"
 
diff --git a/src/runtime_support.h b/src/runtime_support.h
index b4a23ff..eff50b3 100644
--- a/src/runtime_support.h
+++ b/src/runtime_support.h
@@ -20,7 +20,9 @@
 #include "class_linker.h"
 #include "common_throws.h"
 #include "dex_file.h"
+#include "indirect_reference_table.h"
 #include "invoke_type.h"
+#include "jni_internal.h"
 #include "object.h"
 #include "object_utils.h"
 #include "thread.h"
diff --git a/src/scoped_thread_state_change.h b/src/scoped_thread_state_change.h
index 14956e4..b36922e 100644
--- a/src/scoped_thread_state_change.h
+++ b/src/scoped_thread_state_change.h
@@ -18,6 +18,7 @@
 #define ART_SRC_SCOPED_THREAD_STATE_CHANGE_H_
 
 #include "casts.h"
+#include "jni_internal.h"
 #include "thread.h"
 
 namespace art {
@@ -197,7 +198,7 @@
       LOCKS_EXCLUDED(JavaVMExt::globals_lock,
                      JavaVMExt::weak_globals_lock)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Locks::mutator_lock_->AssertSharedHeld();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
     return down_cast<T>(Self()->DecodeJObject(obj));
   }
@@ -206,7 +207,7 @@
       LOCKS_EXCLUDED(JavaVMExt::globals_lock,
                      JavaVMExt::weak_globals_lock)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Locks::mutator_lock_->AssertSharedHeld();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
 #ifdef MOVING_GARBAGE_COLLECTOR
     // TODO: we should make these unique weak globals if Field instances can ever move.
@@ -219,7 +220,7 @@
       LOCKS_EXCLUDED(JavaVMExt::globals_lock,
                      JavaVMExt::weak_globals_lock)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Locks::mutator_lock_->AssertSharedHeld();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
 #ifdef MOVING_GARBAGE_COLLECTOR
     UNIMPLEMENTED(WARNING);
@@ -231,7 +232,7 @@
       LOCKS_EXCLUDED(JavaVMExt::globals_lock,
                      JavaVMExt::weak_globals_lock)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Locks::mutator_lock_->AssertSharedHeld();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
 #ifdef MOVING_GARBAGE_COLLECTOR
     // TODO: we should make these unique weak globals if Method instances can ever move.
@@ -242,7 +243,7 @@
 
   jmethodID EncodeMethod(AbstractMethod* method) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Locks::mutator_lock_->AssertSharedHeld();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
 #ifdef MOVING_GARBAGE_COLLECTOR
     UNIMPLEMENTED(WARNING);
@@ -285,14 +286,14 @@
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
       : ScopedObjectAccessUnchecked(env) {
-    Locks::mutator_lock_->AssertSharedHeld();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
   }
 
   explicit ScopedObjectAccess(Thread* self)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
       : ScopedObjectAccessUnchecked(self) {
-    Locks::mutator_lock_->AssertSharedHeld();
+    Locks::mutator_lock_->AssertSharedHeld(Self());
   }
 
   ~ScopedObjectAccess() UNLOCK_FUNCTION(Locks::mutator_lock_) {
diff --git a/src/signal_catcher.cc b/src/signal_catcher.cc
index 7239374..57cae76 100644
--- a/src/signal_catcher.cc
+++ b/src/signal_catcher.cc
@@ -69,9 +69,10 @@
   // Create a raw pthread; its start routine will attach to the runtime.
   CHECK_PTHREAD_CALL(pthread_create, (&pthread_, NULL, &Run, this), "signal catcher thread");
 
-  MutexLock mu(lock_);
+  Thread* self = Thread::Current();
+  MutexLock mu(self, lock_);
   while (thread_ == NULL) {
-    cond_.Wait(lock_);
+    cond_.Wait(self, lock_);
   }
 }
 
@@ -122,12 +123,12 @@
 
   // We should exclusively hold the mutator lock, set state to Runnable without a pending
   // suspension to avoid giving away or trying to re-acquire the mutator lock.
-  Locks::mutator_lock_->AssertExclusiveHeld();
   Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertExclusiveHeld(self);
   ThreadState old_state;
   int suspend_count;
   {
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
+    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
     suspend_count = self->GetSuspendCount();
     if (suspend_count != 0) {
       CHECK_EQ(suspend_count, 1);
@@ -155,7 +156,7 @@
 
   os << "----- end " << getpid() << " -----\n";
   {
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
+    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
     self->SetState(old_state);
     if (suspend_count != 0) {
       self->ModifySuspendCount(+1, false);
@@ -201,7 +202,7 @@
   Thread* self = Thread::Current();
 
   {
-    MutexLock mu(signal_catcher->lock_);
+    MutexLock mu(self, signal_catcher->lock_);
     signal_catcher->thread_ = self;
     signal_catcher->cond_.Broadcast();
   }
diff --git a/src/space.h b/src/space.h
index c3c31a8..d6c7f98 100644
--- a/src/space.h
+++ b/src/space.h
@@ -23,6 +23,7 @@
 #include "globals.h"
 #include "image.h"
 #include "macros.h"
+#include "mutex.h"
 #include "dlmalloc.h"
 #include "mem_map.h"
 
diff --git a/src/stack.cc b/src/stack.cc
index 2567c50..7ec57b4 100644
--- a/src/stack.cc
+++ b/src/stack.cc
@@ -24,21 +24,6 @@
 
 namespace art {
 
-void ManagedStack::PushManagedStackFragment(ManagedStack* fragment) {
-  // Copy this top fragment into given fragment.
-  memcpy(fragment, this, sizeof(ManagedStack));
-  // Clear this fragment, which has become the top.
-  memset(this, 0, sizeof(ManagedStack));
-  // Link our top fragment onto the given fragment.
-  link_ = fragment;
-}
-
-void ManagedStack::PopManagedStackFragment(const ManagedStack& fragment) {
-  DCHECK(&fragment == link_);
-  // Copy this given fragment back to the top.
-  memcpy(this, &fragment, sizeof(ManagedStack));
-}
-
 size_t ManagedStack::NumShadowFrameReferences() const {
   size_t count = 0;
   for (const ManagedStack* current_fragment = this; current_fragment != NULL;
diff --git a/src/stack.h b/src/stack.h
index 4686c6b..ca379d4 100644
--- a/src/stack.h
+++ b/src/stack.h
@@ -147,8 +147,21 @@
  public:
   ManagedStack()
       : link_(NULL), top_shadow_frame_(NULL), top_quick_frame_(NULL), top_quick_frame_pc_(0) {}
-  void PushManagedStackFragment(ManagedStack* fragment);
-  void PopManagedStackFragment(const ManagedStack& record);
+
+  void PushManagedStackFragment(ManagedStack* fragment) {
+    // Copy this top fragment into given fragment.
+    memcpy(fragment, this, sizeof(ManagedStack));
+    // Clear this fragment, which has become the top.
+    memset(this, 0, sizeof(ManagedStack));
+    // Link our top fragment onto the given fragment.
+    link_ = fragment;
+  }
+
+  void PopManagedStackFragment(const ManagedStack& fragment) {
+    DCHECK(&fragment == link_);
+    // Copy this given fragment back to the top.
+    memcpy(this, &fragment, sizeof(ManagedStack));
+  }
 
   ManagedStack* GetLink() const {
     return link_;
diff --git a/src/thread.cc b/src/thread.cc
index f879ee2..bc5b68e 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -37,6 +37,7 @@
 #include "heap.h"
 #include "jni_internal.h"
 #include "monitor.h"
+#include "mutex.h"
 #include "oat/runtime/context.h"
 #include "object.h"
 #include "object_utils.h"
@@ -139,9 +140,11 @@
   Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetInt(thread_peer)));
   // Sanity check that if we have a result it is either suspended or we hold the thread_list_lock_
   // to stop it from going away.
-  MutexLock mu(*Locks::thread_suspend_count_lock_);
-  if (result != NULL && !result->IsSuspended()) {
-    Locks::thread_list_lock_->AssertHeld();
+  if (kIsDebugBuild) {
+    MutexLock mu(soa.Self(), *Locks::thread_suspend_count_lock_);
+    if (result != NULL && !result->IsSuspended()) {
+      Locks::thread_list_lock_->AssertHeld(soa.Self());
+    }
   }
   return result;
 }
@@ -453,13 +456,13 @@
 
 // Attempt to rectify locks so that we dump thread list with required locks before exiting.
 static void UnsafeLogFatalForSuspendCount(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
-  Locks::thread_suspend_count_lock_->Unlock();
-  Locks::mutator_lock_->SharedTryLock();
-  if (!Locks::mutator_lock_->IsSharedHeld()) {
+  Locks::thread_suspend_count_lock_->Unlock(self);
+  Locks::mutator_lock_->SharedTryLock(self);
+  if (!Locks::mutator_lock_->IsSharedHeld(self)) {
     LOG(WARNING) << "Dumping thread list without holding mutator_lock_";
   }
-  Locks::thread_list_lock_->TryLock();
-  if (!Locks::thread_list_lock_->IsExclusiveHeld()) {
+  Locks::thread_list_lock_->TryLock(self);
+  if (!Locks::thread_list_lock_->IsExclusiveHeld(self)) {
     LOG(WARNING) << "Dumping thread list without holding thread_list_lock_";
   }
   std::ostringstream ss;
@@ -526,7 +529,7 @@
   DCHECK_EQ(GetState(), kRunnable);
   state_and_flags_.as_struct.state = new_state;
   // Release share on mutator_lock_.
-  Locks::mutator_lock_->SharedUnlock();
+  Locks::mutator_lock_->SharedUnlock(this);
 }
 
 ThreadState Thread::TransitionFromSuspendedToRunnable() {
@@ -534,33 +537,33 @@
   ThreadState old_state = GetState();
   DCHECK_NE(old_state, kRunnable);
   do {
-    Locks::mutator_lock_->AssertNotHeld();  // Otherwise we starve GC..
+    Locks::mutator_lock_->AssertNotHeld(this);  // Otherwise we starve GC..
     DCHECK_EQ(GetState(), old_state);
     if (ReadFlag(kSuspendRequest)) {
       // Wait while our suspend count is non-zero.
-      MutexLock mu(*Locks::thread_suspend_count_lock_);
+      MutexLock mu(this, *Locks::thread_suspend_count_lock_);
       DCHECK_EQ(GetState(), old_state);
       while (ReadFlag(kSuspendRequest)) {
         // Re-check when Thread::resume_cond_ is notified.
-        Thread::resume_cond_->Wait(*Locks::thread_suspend_count_lock_);
+        Thread::resume_cond_->Wait(this, *Locks::thread_suspend_count_lock_);
         DCHECK_EQ(GetState(), old_state);
       }
       DCHECK_EQ(GetSuspendCount(), 0);
     }
     // Re-acquire shared mutator_lock_ access.
-    Locks::mutator_lock_->SharedLock();
+    Locks::mutator_lock_->SharedLock(this);
     // Atomically change from suspended to runnable if no suspend request pending.
     int16_t old_flags = state_and_flags_.as_struct.flags;
     if ((old_flags & kSuspendRequest) == 0) {
       int32_t old_state_and_flags = old_flags | (old_state << 16);
       int32_t new_state_and_flags = old_flags | (kRunnable << 16);
       done = android_atomic_cmpxchg(old_state_and_flags, new_state_and_flags,
-                                    reinterpret_cast<volatile int32_t*>(&state_and_flags_))
+                                    &state_and_flags_.as_int)
                                         == 0;
     }
     if (!done) {
       // Failed to transition to Runnable. Release shared mutator_lock_ access and try again.
-      Locks::mutator_lock_->SharedUnlock();
+      Locks::mutator_lock_->SharedUnlock(this);
     }
   } while (!done);
   return old_state;
@@ -576,14 +579,14 @@
     Thread* thread;
     {
       ScopedObjectAccess soa(Thread::Current());
-      MutexLock mu(*Locks::thread_list_lock_);
+      MutexLock mu(soa.Self(), *Locks::thread_list_lock_);
       thread = Thread::FromManagedThread(soa, peer);
       if (thread == NULL) {
         LOG(WARNING) << "No such thread for suspend: " << peer;
         return NULL;
       }
       {
-        MutexLock mu(*Locks::thread_suspend_count_lock_);
+        MutexLock mu(soa.Self(), *Locks::thread_suspend_count_lock_);
         if (request_suspension) {
           thread->ModifySuspendCount(+1, true /* for_debugger */);
           request_suspension = false;
@@ -612,7 +615,7 @@
       // Release locks and come out of runnable state.
     }
     for (int i = kMaxMutexLevel; i >= 0; --i) {
-      BaseMutex* held_mutex = Thread::Current()->GetHeldMutex(static_cast<MutexLevel>(i));
+      BaseMutex* held_mutex = Thread::Current()->GetHeldMutex(static_cast<LockLevel>(i));
       if (held_mutex != NULL) {
         LOG(FATAL) << "Holding " << held_mutex->GetName()
             << " while sleeping for thread suspension";
@@ -640,9 +643,10 @@
   std::string group_name;
   int priority;
   bool is_daemon = false;
+  Thread* self = Thread::Current();
 
   if (thread != NULL && thread->peer_ != NULL) {
-    ScopedObjectAccess soa(Thread::Current());
+    ScopedObjectAccess soa(self);
     Object* native_peer = soa.Decode<Object*>(thread->peer_);
     priority = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)->GetInt(native_peer);
     is_daemon = soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)->GetBoolean(native_peer);
@@ -667,7 +671,7 @@
     if (is_daemon) {
       os << " daemon";
     }
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
+    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
     os << " prio=" << priority
        << " tid=" << thread->GetThinLockId()
        << " " << thread->GetState() << "\n";
@@ -678,7 +682,7 @@
   }
 
   if (thread != NULL) {
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
+    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
     os << "  | group=\"" << group_name << "\""
        << " sCount=" << thread->suspend_count_
        << " dsCount=" << thread->debug_suspend_count_
@@ -1059,7 +1063,7 @@
 }
 
 Object* Thread::DecodeJObject(jobject obj) {
-  Locks::mutator_lock_->AssertSharedHeld();
+  Locks::mutator_lock_->AssertSharedHeld(this);
   if (obj == NULL) {
     return NULL;
   }
@@ -1077,7 +1081,7 @@
     {
       JavaVMExt* vm = Runtime::Current()->GetJavaVM();
       IndirectReferenceTable& globals = vm->globals;
-      MutexLock mu(vm->globals_lock);
+      MutexLock mu(this, vm->globals_lock);
       result = const_cast<Object*>(globals.Get(ref));
       break;
     }
@@ -1085,7 +1089,7 @@
     {
       JavaVMExt* vm = Runtime::Current()->GetJavaVM();
       IndirectReferenceTable& weak_globals = vm->weak_globals;
-      MutexLock mu(vm->weak_globals_lock);
+      MutexLock mu(this, vm->weak_globals_lock);
       result = const_cast<Object*>(weak_globals.Get(ref));
       if (result == kClearedJniWeakGlobal) {
         // This is a special case where it's okay to return NULL.
@@ -1117,6 +1121,40 @@
   return result;
 }
 
+// Implements java.lang.Thread.interrupted.
+bool Thread::Interrupted() {
+  MutexLock mu(*wait_mutex_);
+  bool interrupted = interrupted_;
+  interrupted_ = false;
+  return interrupted;
+}
+
+// Implements java.lang.Thread.isInterrupted.
+bool Thread::IsInterrupted() {
+  MutexLock mu(*wait_mutex_);
+  return interrupted_;
+}
+
+void Thread::Interrupt() {
+  MutexLock mu(*wait_mutex_);
+  if (interrupted_) {
+    return;
+  }
+  interrupted_ = true;
+  NotifyLocked();
+}
+
+void Thread::Notify() {
+  MutexLock mu(*wait_mutex_);
+  NotifyLocked();
+}
+
+void Thread::NotifyLocked() {
+  if (wait_monitor_ != NULL) {
+    wait_cond_->Signal();
+  }
+}
+
 class CountStackDepthVisitor : public StackVisitor {
  public:
   CountStackDepthVisitor(const ManagedStack* stack,
@@ -1874,7 +1912,7 @@
     for (int i = kMaxMutexLevel; i >= 0; --i) {
       // We expect no locks except the mutator_lock_.
       if (i != kMutatorLock) {
-        BaseMutex* held_mutex = GetHeldMutex(static_cast<MutexLevel>(i));
+        BaseMutex* held_mutex = GetHeldMutex(static_cast<LockLevel>(i));
         if (held_mutex != NULL) {
           LOG(ERROR) << "holding \"" << held_mutex->GetName()
                   << "\" at point where thread suspension is expected";
diff --git a/src/thread.h b/src/thread.h
index 1b9bb74..257dee4 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -25,14 +25,10 @@
 #include <string>
 #include <vector>
 
-#include "dex_file.h"
 #include "globals.h"
-#include "jni_internal.h"
-#include "logging.h"
 #include "macros.h"
-#include "mutex.h"
-#include "mem_map.h"
 #include "oat/runtime/oat_support_entrypoints.h"
+#include "locks.h"
 #include "offsets.h"
 #include "runtime_stats.h"
 #include "stack.h"
@@ -44,13 +40,16 @@
 
 namespace art {
 
+class AbstractMethod;
 class Array;
+class BaseMutex;
 class Class;
 class ClassLinker;
 class ClassLoader;
 class Context;
 struct DebugInvokeReq;
-class AbstractMethod;
+class DexFile;
+struct JNIEnvExt;
 class Monitor;
 class Object;
 class Runtime;
@@ -158,22 +157,16 @@
 
   ThreadState SetState(ThreadState new_state);
 
-  int GetSuspendCount() const
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_) {
-    Locks::thread_suspend_count_lock_->AssertHeld();
+  int GetSuspendCount() const EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_) {
     return suspend_count_;
   }
 
-  int GetDebugSuspendCount() const
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_) {
-    Locks::thread_suspend_count_lock_->AssertHeld();
+  int GetDebugSuspendCount() const EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_) {
     return debug_suspend_count_;
   }
 
-  bool IsSuspended() const
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_) {
-    int suspend_count = GetSuspendCount();
-    return suspend_count != 0 && GetState() != kRunnable;
+  bool IsSuspended() const EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_) {
+    return GetState() != kRunnable && ReadFlag(kSuspendRequest);
   }
 
   void ModifySuspendCount(int delta, bool for_debugger)
@@ -386,38 +379,14 @@
   }
 
   // Convert a jobject into a Object*
-  Object* DecodeJObject(jobject obj)
-      LOCKS_EXCLUDED(JavaVMExt::globals_lock,
-                     JavaVMExt::weak_globals_lock)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* DecodeJObject(jobject obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Implements java.lang.Thread.interrupted.
-  bool Interrupted() {
-    MutexLock mu(*wait_mutex_);
-    bool interrupted = interrupted_;
-    interrupted_ = false;
-    return interrupted;
-  }
-
+  bool Interrupted();
   // Implements java.lang.Thread.isInterrupted.
-  bool IsInterrupted() {
-    MutexLock mu(*wait_mutex_);
-    return interrupted_;
-  }
-
-  void Interrupt() {
-    MutexLock mu(*wait_mutex_);
-    if (interrupted_) {
-      return;
-    }
-    interrupted_ = true;
-    NotifyLocked();
-  }
-
-  void Notify() {
-    MutexLock mu(*wait_mutex_);
-    NotifyLocked();
-  }
+  bool IsInterrupted();
+  void Interrupt();
+  void Notify();
 
   ClassLoader* GetClassLoaderOverride() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return class_loader_override_;
@@ -579,11 +548,11 @@
     return frame;
   }
 
-  BaseMutex* GetHeldMutex(MutexLevel level) const {
+  BaseMutex* GetHeldMutex(LockLevel level) const {
     return held_mutexes_[level];
   }
 
-  void SetHeldMutex(MutexLevel level, BaseMutex* mutex) {
+  void SetHeldMutex(LockLevel level, BaseMutex* mutex) {
     held_mutexes_[level] = mutex;
   }
 
@@ -634,11 +603,7 @@
   void InitPthreadKeySelf();
   void InitStackHwm();
 
-  void NotifyLocked() EXCLUSIVE_LOCKS_REQUIRED(wait_mutex_) {
-    if (wait_monitor_ != NULL) {
-      wait_cond_->Signal();
-    }
-  }
+  void NotifyLocked() EXCLUSIVE_LOCKS_REQUIRED(wait_mutex_);
 
   bool ReadFlag(ThreadFlag flag) const {
     return (state_and_flags_.as_struct.flags & flag) != 0;
diff --git a/src/thread_list.cc b/src/thread_list.cc
index 550d5c7..082d7af 100644
--- a/src/thread_list.cc
+++ b/src/thread_list.cc
@@ -128,18 +128,18 @@
 
 #if HAVE_TIMED_RWLOCK
 // Attempt to rectify locks so that we dump thread list with required locks before exiting.
-static void UnsafeLogFatalForThreadSuspendAllTimeout() NO_THREAD_SAFETY_ANALYSIS {
+static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
   Runtime* runtime = Runtime::Current();
   std::ostringstream ss;
   ss << "Thread suspend timeout\n";
   runtime->DumpLockHolders(ss);
   ss << "\n";
-  Locks::mutator_lock_->SharedTryLock();
-  if (!Locks::mutator_lock_->IsSharedHeld()) {
+  Locks::mutator_lock_->SharedTryLock(self);
+  if (!Locks::mutator_lock_->IsSharedHeld(self)) {
     LOG(WARNING) << "Dumping thread list without holding mutator_lock_";
   }
-  Locks::thread_list_lock_->TryLock();
-  if (!Locks::thread_list_lock_->IsExclusiveHeld()) {
+  Locks::thread_list_lock_->TryLock(self);
+  if (!Locks::thread_list_lock_->IsExclusiveHeld(self)) {
     LOG(WARNING) << "Dumping thread list without holding thread_list_lock_";
   }
   runtime->GetThreadList()->DumpLocked(ss);
@@ -153,16 +153,15 @@
   VLOG(threads) << *self << " SuspendAll starting...";
 
   if (kIsDebugBuild) {
-    Locks::mutator_lock_->AssertNotHeld();
-    Locks::thread_list_lock_->AssertNotHeld();
-    Locks::thread_suspend_count_lock_->AssertNotHeld();
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
+    Locks::mutator_lock_->AssertNotHeld(self);
+    Locks::thread_list_lock_->AssertNotHeld(self);
+    Locks::thread_suspend_count_lock_->AssertNotHeld(self);
     CHECK_NE(self->GetState(), kRunnable);
   }
   {
-    MutexLock mu(*Locks::thread_list_lock_);
+    MutexLock mu(self, *Locks::thread_list_lock_);
     {
-      MutexLock mu2(*Locks::thread_suspend_count_lock_);
+      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
       // Update global suspend all state for attaching threads.
       ++suspend_all_count_;
       // Increment everybody's suspend count (except our own).
@@ -183,11 +182,11 @@
   timespec timeout;
   clock_gettime(CLOCK_REALTIME, &timeout);
   timeout.tv_sec += 30;
-  if (UNLIKELY(!Locks::mutator_lock_->ExclusiveLockWithTimeout(timeout))) {
-    UnsafeLogFatalForThreadSuspendAllTimeout();
+  if (UNLIKELY(!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, timeout))) {
+    UnsafeLogFatalForThreadSuspendAllTimeout(self);
   }
 #else
-  Locks::mutator_lock_->ExclusiveLock();
+  Locks::mutator_lock_->ExclusiveLock(self);
 #endif
 
   // Debug check that all threads are suspended.
@@ -200,9 +199,10 @@
   Thread* self = Thread::Current();
 
   VLOG(threads) << *self << " ResumeAll starting";
+  Locks::mutator_lock_->ExclusiveUnlock(self);
   {
-    MutexLock mu(*Locks::thread_list_lock_);
-    MutexLock mu2(*Locks::thread_suspend_count_lock_);
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     // Update global suspend all state for attaching threads.
     --suspend_all_count_;
     // Decrement the suspend counts for all threads.
@@ -219,20 +219,20 @@
     VLOG(threads) << *self << " ResumeAll waking others";
     Thread::resume_cond_->Broadcast();
   }
-  Locks::mutator_lock_->ExclusiveUnlock();
   VLOG(threads) << *self << " ResumeAll complete";
 }
 
 void ThreadList::Resume(Thread* thread, bool for_debugger) {
-  DCHECK(thread != Thread::Current());
+  Thread* self = Thread::Current();
+  DCHECK_NE(thread, self);
   VLOG(threads) << "Resume(" << *thread << ") starting..." << (for_debugger ? " (debugger)" : "");
 
   {
     // To check Contains.
-    MutexLock mu(*Locks::thread_list_lock_);
+    MutexLock mu(self, *Locks::thread_list_lock_);
     // To check IsSuspended.
-    MutexLock mu2(*Locks::thread_suspend_count_lock_);
-    CHECK(thread->IsSuspended());
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    DCHECK(thread->IsSuspended());
     if (!Contains(thread)) {
       return;
     }
@@ -241,7 +241,7 @@
 
   {
     VLOG(threads) << "Resume(" << *thread << ") waking others";
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
+    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
     Thread::resume_cond_->Broadcast();
   }
 
@@ -255,9 +255,9 @@
   VLOG(threads) << *self << " SuspendAllForDebugger starting...";
 
   {
-    MutexLock mu(*Locks::thread_list_lock_);
+    MutexLock mu(self, *Locks::thread_list_lock_);
     {
-      MutexLock mu(*Locks::thread_suspend_count_lock_);
+      MutexLock mu(self, *Locks::thread_suspend_count_lock_);
       // Update global suspend all state for attaching threads.
       ++suspend_all_count_;
       ++debug_suspend_all_count_;
@@ -280,14 +280,14 @@
   timespec timeout;
   clock_gettime(CLOCK_REALTIME, &timeout);
   timeout.tv_sec += 30;
-  if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(timeout)) {
-    UnsafeLogFatalForThreadSuspendAllTimeout();
+  if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, timeout)) {
+    UnsafeLogFatalForThreadSuspendAllTimeout(self);
   } else {
-    Locks::mutator_lock_->ExclusiveUnlock();
+    Locks::mutator_lock_->ExclusiveUnlock(self);
   }
 #else
-  Locks::mutator_lock_->ExclusiveLock();
-  Locks::mutator_lock_->ExclusiveUnlock();
+  Locks::mutator_lock_->ExclusiveLock(self);
+  Locks::mutator_lock_->ExclusiveUnlock(self);
 #endif
   AssertThreadsAreSuspended();
 
@@ -305,7 +305,7 @@
   // Collisions with other suspends aren't really interesting. We want
   // to ensure that we're the only one fiddling with the suspend count
   // though.
-  MutexLock mu(*Locks::thread_suspend_count_lock_);
+  MutexLock mu(self, *Locks::thread_suspend_count_lock_);
   self->ModifySuspendCount(+1, true);
 
   // Suspend ourselves.
@@ -319,7 +319,7 @@
   Dbg::ClearWaitForEventThread();
 
   while (self->suspend_count_ != 0) {
-    Thread::resume_cond_->Wait(*Locks::thread_suspend_count_lock_);
+    Thread::resume_cond_->Wait(self, *Locks::thread_suspend_count_lock_);
     if (self->suspend_count_ != 0) {
       // The condition was signaled but we're still suspended. This
       // can happen if the debugger lets go while a SIGQUIT thread
@@ -340,8 +340,8 @@
   VLOG(threads) << *self << " UndoDebuggerSuspensions starting";
 
   {
-    MutexLock mu(*Locks::thread_list_lock_);
-    MutexLock mu2(*Locks::thread_suspend_count_lock_);
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     // Update global suspend all state for attaching threads.
     suspend_all_count_ -= debug_suspend_all_count_;
     debug_suspend_all_count_ = 0;
@@ -356,7 +356,7 @@
   }
 
   {
-    MutexLock mu(*Locks::thread_suspend_count_lock_);
+    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
     Thread::resume_cond_->Broadcast();
   }
 
@@ -364,8 +364,9 @@
 }
 
 void ThreadList::WaitForOtherNonDaemonThreadsToExit() {
-  Locks::mutator_lock_->AssertNotHeld();
-  MutexLock mu(*Locks::thread_list_lock_);
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
+  MutexLock mu(self, *Locks::thread_list_lock_);
   bool all_threads_are_daemons;
   do {
     all_threads_are_daemons = true;
@@ -373,28 +374,29 @@
       // TODO: there's a race here with thread exit that's being worked around by checking if the
       // thread has a peer.
       Thread* thread = *it;
-      if (thread != Thread::Current() && thread->HasPeer() && !thread->IsDaemon()) {
+      if (thread != self && thread->HasPeer() && !thread->IsDaemon()) {
         all_threads_are_daemons = false;
         break;
       }
     }
     if (!all_threads_are_daemons) {
       // Wait for another thread to exit before re-checking.
-      thread_exit_cond_.Wait(*Locks::thread_list_lock_);
+      thread_exit_cond_.Wait(self, *Locks::thread_list_lock_);
     }
   } while(!all_threads_are_daemons);
 }
 
 void ThreadList::SuspendAllDaemonThreads() {
-  MutexLock mu(*Locks::thread_list_lock_);
+  Thread* self = Thread::Current();
+  MutexLock mu(self, *Locks::thread_list_lock_);
   { // Tell all the daemons it's time to suspend.
-    MutexLock mu2(*Locks::thread_suspend_count_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
     for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
       Thread* thread = *it;
       // This is only run after all non-daemon threads have exited, so the remainder should all be
       // daemons.
       CHECK(thread->IsDaemon());
-      if (thread != Thread::Current()) {
+      if (thread != self) {
         thread->ModifySuspendCount(+1, false);
       }
     }
@@ -406,8 +408,8 @@
     bool all_suspended = true;
     for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
       Thread* thread = *it;
-      MutexLock mu2(*Locks::thread_suspend_count_lock_);
-      if (thread != Thread::Current() && thread->GetState() == kRunnable) {
+      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+      if (thread != self && thread->GetState() == kRunnable) {
         if (!have_complained) {
           LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
           have_complained = true;
@@ -432,8 +434,8 @@
 
   // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing
   // SuspendAll requests.
-  MutexLock mu(*Locks::thread_list_lock_);
-  MutexLock mu2(*Locks::thread_suspend_count_lock_);
+  MutexLock mu(self, *Locks::thread_list_lock_);
+  MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
   self->suspend_count_ = suspend_all_count_;
   self->debug_suspend_count_ = debug_suspend_all_count_;
   CHECK(!Contains(self));
@@ -451,7 +453,7 @@
 
   {
     // Remove this thread from the list.
-    MutexLock mu(*Locks::thread_list_lock_);
+    MutexLock mu(self, *Locks::thread_list_lock_);
     CHECK(Contains(self));
     list_.remove(self);
   }
@@ -466,7 +468,7 @@
   CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, NULL), "detach self");
 
   // Signal that a thread just detached.
-  MutexLock mu(*Locks::thread_list_lock_);
+  MutexLock mu(NULL, *Locks::thread_list_lock_);
   thread_exit_cond_.Signal();
 }
 
@@ -477,14 +479,14 @@
 }
 
 void ThreadList::VisitRoots(Heap::RootVisitor* visitor, void* arg) const {
-  MutexLock mu(*Locks::thread_list_lock_);
+  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
   for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
     (*it)->VisitRoots(visitor, arg);
   }
 }
 
 uint32_t ThreadList::AllocThreadId() {
-  MutexLock mu(allocated_ids_lock_);
+  MutexLock mu(Thread::Current(), allocated_ids_lock_);
   for (size_t i = 0; i < allocated_ids_.size(); ++i) {
     if (!allocated_ids_[i]) {
       allocated_ids_.set(i);
@@ -496,7 +498,7 @@
 }
 
 void ThreadList::ReleaseThreadId(uint32_t id) {
-  MutexLock mu(allocated_ids_lock_);
+  MutexLock mu(Thread::Current(), allocated_ids_lock_);
   --id; // Zero is reserved to mean "invalid".
   DCHECK(allocated_ids_[id]) << id;
   allocated_ids_.reset(id);
diff --git a/src/trace.cc b/src/trace.cc
index d1f3f50..d0132e1 100644
--- a/src/trace.cc
+++ b/src/trace.cc
@@ -489,8 +489,9 @@
 }
 
 void Trace::DumpThreadList(std::ostream& os) {
-  Locks::thread_list_lock_->AssertNotHeld();
-  MutexLock mu(*Locks::thread_list_lock_);
+  Thread* self = Thread::Current();
+  Locks::thread_list_lock_->AssertNotHeld(self);
+  MutexLock mu(self, *Locks::thread_list_lock_);
   Runtime::Current()->GetThreadList()->ForEach(DumpThread, &os);
 }
 
@@ -499,9 +500,10 @@
 }
 
 void Trace::UninstallStubs() {
-  Locks::thread_list_lock_->AssertNotHeld();
+  Thread* self = Thread::Current();
+  Locks::thread_list_lock_->AssertNotHeld(self);
   Runtime::Current()->GetClassLinker()->VisitClasses(UninstallStubsClassVisitor, NULL);
-  MutexLock mu(*Locks::thread_list_lock_);
+  MutexLock mu(self, *Locks::thread_list_lock_);
   Runtime::Current()->GetThreadList()->ForEach(TraceRestoreStack, NULL);
 }