Global lock levels.

Introduce the notion of the mutators/GC being a shared-exclusive (aka
reader-writer) lock. Introduce globally ordered locks, analysable by
annotalysis, statically at compile time. Add locking attributes to
methods.

More subtly, remove the heap_lock_ and split between various locks that
are held for smaller periods (where work doesn't get blocked). Remove
buggy Dalvik style thread transitions. Make GC use CMS in all cases when
concurrent is enabled. Fix bug where suspend counts rather than debug
suspend counts were sent to JDWP. Move the PathClassLoader to
WellKnownClasses. In debugger refactor calls to send request and
possibly suspend. Break apart different VmWait thread states. Move
identity hash code to a shared method.

Change-Id: Icdbfc3ce3fcccd14341860ac7305d8e97b51f5c6
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 34e8627..c5a6407 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -147,7 +147,6 @@
 	src/card_table.cc \
 	src/check_jni.cc \
 	src/class_linker.cc \
-	src/class_loader.cc \
 	src/compiled_method.cc \
 	src/compiler.cc \
 	src/debugger.cc \
@@ -175,6 +174,7 @@
 	src/jdwp/jdwp_main.cc \
 	src/jdwp/jdwp_socket.cc \
 	src/jni_internal.cc \
+	src/jobject_comparator.cc \
 	src/logging.cc \
 	src/mark_stack.cc \
 	src/mark_sweep.cc \
@@ -223,8 +223,6 @@
 	src/reflection.cc \
 	src/runtime.cc \
 	src/runtime_support.cc \
-	src/scoped_thread_list_lock.cc \
-	src/scoped_thread_list_lock_releaser.cc \
 	src/signal_catcher.cc \
 	src/space.cc \
 	src/space_bitmap.cc \
diff --git a/src/card_table.h b/src/card_table.h
index d065bed..e1d0646 100644
--- a/src/card_table.h
+++ b/src/card_table.h
@@ -74,7 +74,9 @@
 
   // For every dirty card between begin and end invoke the visitor with the specified argument.
   template <typename Visitor>
-  void Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end, const Visitor& visitor) const {
+  void Scan(SpaceBitmap* bitmap, byte* scan_begin, byte* scan_end, const Visitor& visitor) const
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(bitmap->HasAddress(scan_begin));
     DCHECK(bitmap->HasAddress(scan_end - 1));  // scan_end is the byte after the last byte we scan.
     byte* card_cur = CardFromAddr(scan_begin);
diff --git a/src/check_jni.cc b/src/check_jni.cc
index 47f20e1..b387f5f 100644
--- a/src/check_jni.cc
+++ b/src/check_jni.cc
@@ -22,7 +22,7 @@
 #include "class_linker.h"
 #include "logging.h"
 #include "object_utils.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "space.h"
 #include "thread.h"
 #include "runtime.h"
@@ -35,6 +35,7 @@
 
 static void JniAbort(const char* jni_function_name, const char* msg) {
   Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
   Method* current_method = self->GetCurrentMethod();
 
   std::ostringstream os;
@@ -54,7 +55,11 @@
   if (vm->check_jni_abort_hook != NULL) {
     vm->check_jni_abort_hook(vm->check_jni_abort_hook_data, os.str());
   } else {
-    self->SetState(kNative); // Ensure that we get a native stack trace for this thread.
+    {
+      MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+      CHECK_NE(self->GetState(), kRunnable);
+      self->SetState(kNative); // Ensure that we get a native stack trace for this thread.
+    }
     LOG(FATAL) << os.str();
   }
 }
@@ -120,7 +125,8 @@
   NULL
 };
 
-static bool ShouldTrace(JavaVMExt* vm, const Method* method) {
+static bool ShouldTrace(JavaVMExt* vm, const Method* method)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // If both "-Xcheck:jni" and "-Xjnitrace:" are enabled, we print trace messages
   // when a native method that matches the -Xjnitrace argument calls a JNI function
   // such as NewByteArray.
@@ -146,16 +152,27 @@
 class ScopedCheck {
  public:
   // For JNIEnv* functions.
-  explicit ScopedCheck(JNIEnv* env, int flags, const char* functionName) : ts_(env) {
+  explicit ScopedCheck(JNIEnv* env, int flags, const char* functionName)
+      SHARED_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_)
+      : soa_(env) {
     Init(flags, functionName, true);
     CheckThread(flags);
   }
 
   // For JavaVM* functions.
-  explicit ScopedCheck(JavaVM* vm, bool has_method, const char* functionName) : ts_(vm) {
+  // TODO: it's not correct that this is a lock function, but making it so aids annotalysis.
+  explicit ScopedCheck(JavaVM* vm, bool has_method, const char* functionName)
+      SHARED_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_)
+      : soa_(vm) {
     Init(kFlag_Invocation, functionName, has_method);
   }
 
+  ~ScopedCheck() UNLOCK_FUNCTION(GlobalSynchronization::mutator_lock_) {}
+
+  const ScopedObjectAccess& soa() {
+    return soa_;
+  }
+
   bool ForceCopy() {
     return Runtime::Current()->GetJavaVM()->force_copy;
   }
@@ -179,7 +196,8 @@
    *
    * Works for both static and instance fields.
    */
-  void CheckFieldType(jobject java_object, jfieldID fid, char prim, bool isStatic) {
+  void CheckFieldType(jobject java_object, jfieldID fid, char prim, bool isStatic)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Field* f = CheckFieldID(fid);
     if (f == NULL) {
       return;
@@ -187,7 +205,7 @@
     Class* field_type = FieldHelper(f).GetType();
     if (!field_type->IsPrimitive()) {
       if (java_object != NULL) {
-        Object* obj = ts_.Decode<Object*>(java_object);
+        Object* obj = soa_.Decode<Object*>(java_object);
         // If java_object is a weak global ref whose referent has been cleared,
         // obj will be NULL.  Otherwise, obj should always be non-NULL
         // and valid.
@@ -224,8 +242,9 @@
    *
    * Assumes "jobj" has already been validated.
    */
-  void CheckInstanceFieldID(jobject java_object, jfieldID fid) {
-    Object* o = ts_.Decode<Object*>(java_object);
+  void CheckInstanceFieldID(jobject java_object, jfieldID fid)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    Object* o = soa_.Decode<Object*>(java_object);
     if (o == NULL || !Runtime::Current()->GetHeap()->IsHeapAddress(o)) {
       JniAbortF(function_name_, "field operation on invalid %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object);
@@ -257,7 +276,8 @@
    * Verify that the method's return type matches the type of call.
    * 'expectedType' will be "L" for all objects, including arrays.
    */
-  void CheckSig(jmethodID mid, const char* expectedType, bool isStatic) {
+  void CheckSig(jmethodID mid, const char* expectedType, bool isStatic)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* m = CheckMethodID(mid);
     if (m == NULL) {
       return;
@@ -282,8 +302,9 @@
    *
    * Assumes "java_class" has already been validated.
    */
-  void CheckStaticFieldID(jclass java_class, jfieldID fid) {
-    Class* c = ts_.Decode<Class*>(java_class);
+  void CheckStaticFieldID(jclass java_class, jfieldID fid)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    Class* c = soa_.Decode<Class*>(java_class);
     const Field* f = CheckFieldID(fid);
     if (f == NULL) {
       return;
@@ -303,12 +324,13 @@
    *
    * Instances of "java_class" must be instances of the method's declaring class.
    */
-  void CheckStaticMethod(jclass java_class, jmethodID mid) {
+  void CheckStaticMethod(jclass java_class, jmethodID mid)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const Method* m = CheckMethodID(mid);
     if (m == NULL) {
       return;
     }
-    Class* c = ts_.Decode<Class*>(java_class);
+    Class* c = soa_.Decode<Class*>(java_class);
     if (!c->IsAssignableFrom(m->GetDeclaringClass())) {
       JniAbortF(function_name_, "can't call static %s on class %s",
                 PrettyMethod(m).c_str(), PrettyClass(c).c_str());
@@ -322,12 +344,13 @@
    * (Note the mid might point to a declaration in an interface; this
    * will be handled automatically by the instanceof check.)
    */
-  void CheckVirtualMethod(jobject java_object, jmethodID mid) {
+  void CheckVirtualMethod(jobject java_object, jmethodID mid)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const Method* m = CheckMethodID(mid);
     if (m == NULL) {
       return;
     }
-    Object* o = ts_.Decode<Object*>(java_object);
+    Object* o = soa_.Decode<Object*>(java_object);
     if (!o->InstanceOf(m->GetDeclaringClass())) {
       JniAbortF(function_name_, "can't call %s on instance of %s",
                 PrettyMethod(m).c_str(), PrettyTypeOf(o).c_str());
@@ -370,11 +393,12 @@
    *
    * Use the kFlag_NullableUtf flag where 'u' field(s) are nullable.
    */
-  void Check(bool entry, const char* fmt0, ...) {
+  void Check(bool entry, const char* fmt0, ...)
+      SHARED_LOCKS_REQUIRED (GlobalSynchronization::mutator_lock_) {
     va_list ap;
 
     const Method* traceMethod = NULL;
-    if ((!ts_.Vm()->trace.empty() || VLOG_IS_ON(third_party_jni)) && has_method_) {
+    if ((!soa_.Vm()->trace.empty() || VLOG_IS_ON(third_party_jni)) && has_method_) {
       // We need to guard some of the invocation interface's calls: a bad caller might
       // use DetachCurrentThread or GetEnv on a thread that's not yet attached.
       Thread* self = Thread::Current();
@@ -383,7 +407,7 @@
       }
     }
 
-    if (((flags_ & kFlag_ForceTrace) != 0) || (traceMethod != NULL && ShouldTrace(ts_.Vm(), traceMethod))) {
+    if (((flags_ & kFlag_ForceTrace) != 0) || (traceMethod != NULL && ShouldTrace(soa_.Vm(), traceMethod))) {
       va_start(ap, fmt0);
       std::string msg;
       for (const char* fmt = fmt0; *fmt;) {
@@ -571,7 +595,8 @@
    * Because we're looking at an object on the GC heap, we have to switch
    * to "running" mode before doing the checks.
    */
-  bool CheckInstance(InstanceKind kind, jobject java_object) {
+  bool CheckInstance(InstanceKind kind, jobject java_object)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const char* what = NULL;
     switch (kind) {
     case kClass:
@@ -598,7 +623,7 @@
       return false;
     }
 
-    Object* obj = ts_.Decode<Object*>(java_object);
+    Object* obj = soa_.Decode<Object*>(java_object);
     if (!Runtime::Current()->GetHeap()->IsHeapAddress(obj)) {
       JniAbortF(function_name_, "%s is an invalid %s: %p (%p)",
                 what, ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object, obj);
@@ -645,13 +670,13 @@
    *
    * Since we're dealing with objects, switch to "running" mode.
    */
-  void CheckArray(jarray java_array) {
+  void CheckArray(jarray java_array) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (java_array == NULL) {
       JniAbortF(function_name_, "jarray was NULL");
       return;
     }
 
-    Array* a = ts_.Decode<Array*>(java_array);
+    Array* a = soa_.Decode<Array*>(java_array);
     if (!Runtime::Current()->GetHeap()->IsHeapAddress(a)) {
       JniAbortF(function_name_, "jarray is an invalid %s: %p (%p)",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_array)).c_str(), java_array, a);
@@ -666,12 +691,12 @@
     }
   }
 
-  Field* CheckFieldID(jfieldID fid) {
+  Field* CheckFieldID(jfieldID fid) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (fid == NULL) {
       JniAbortF(function_name_, "jfieldID was NULL");
       return NULL;
     }
-    Field* f = ts_.DecodeField(fid);
+    Field* f = soa_.DecodeField(fid);
     if (!Runtime::Current()->GetHeap()->IsHeapAddress(f) || !f->IsField()) {
       JniAbortF(function_name_, "invalid jfieldID: %p", fid);
       return NULL;
@@ -679,12 +704,12 @@
     return f;
   }
 
-  Method* CheckMethodID(jmethodID mid) {
+  Method* CheckMethodID(jmethodID mid) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (mid == NULL) {
       JniAbortF(function_name_, "jmethodID was NULL");
       return NULL;
     }
-    Method* m = ts_.DecodeMethod(mid);
+    Method* m = soa_.DecodeMethod(mid);
     if (!Runtime::Current()->GetHeap()->IsHeapAddress(m) || !m->IsMethod()) {
       JniAbortF(function_name_, "invalid jmethodID: %p", mid);
       return NULL;
@@ -698,12 +723,13 @@
    *
    * Switches to "running" mode before performing checks.
    */
-  void CheckObject(jobject java_object) {
+  void CheckObject(jobject java_object)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (java_object == NULL) {
       return;
     }
 
-    Object* o = ts_.Decode<Object*>(java_object);
+    Object* o = soa_.Decode<Object*>(java_object);
     if (!Runtime::Current()->GetHeap()->IsHeapAddress(o)) {
       // TODO: when we remove work_around_app_jni_bugs, this should be impossible.
       JniAbortF(function_name_, "native code passing in reference to invalid %s: %p",
@@ -721,7 +747,7 @@
     }
   }
 
-  void CheckThread(int flags) {
+  void CheckThread(int flags) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Thread* self = Thread::Current();
     if (self == NULL) {
       JniAbortF(function_name_, "a thread (tid %d) is making JNI calls without being attached", GetTid());
@@ -733,13 +759,13 @@
 
     // Verify that the current thread is (a) attached and (b) associated with
     // this particular instance of JNIEnv.
-    if (ts_.Env() != threadEnv) {
-      if (ts_.Vm()->work_around_app_jni_bugs) {
+    if (soa_.Env() != threadEnv) {
+      if (soa_.Vm()->work_around_app_jni_bugs) {
         // If we're keeping broken code limping along, we need to suppress the abort...
-        LOG(ERROR) << "APP BUG DETECTED: thread " << *self << " using JNIEnv* from thread " << *ts_.Self();
+        LOG(ERROR) << "APP BUG DETECTED: thread " << *self << " using JNIEnv* from thread " << *soa_.Self();
       } else {
         JniAbortF(function_name_, "thread %s using JNIEnv* from thread %s",
-                  ToStr<Thread>(*self).c_str(), ToStr<Thread>(*ts_.Self()).c_str());
+                  ToStr<Thread>(*self).c_str(), ToStr<Thread>(*soa_.Self()).c_str());
         return;
       }
     }
@@ -778,7 +804,7 @@
       // TODO: do we care any more? art always dumps pending exceptions on aborting threads.
       if (type != "java.lang.OutOfMemoryError") {
         JniAbortF(function_name_, "JNI %s called with pending exception: %s",
-                  function_name_, type.c_str(), jniGetStackTrace(ts_.Env()).c_str());
+                  function_name_, type.c_str(), jniGetStackTrace(soa_.Env()).c_str());
       } else {
         JniAbortF(function_name_, "JNI %s called with %s pending", function_name_, type.c_str());
       }
@@ -855,7 +881,7 @@
     return 0;
   }
 
-  const ScopedJniThreadState ts_;
+  const ScopedObjectAccess soa_;
   const char* function_name_;
   int flags_;
   bool has_method_;
@@ -1051,9 +1077,9 @@
  * data are allowed.  Returns a pointer to the copied data.
  */
 static void* CreateGuardedPACopy(JNIEnv* env, const jarray java_array, jboolean* isCopy) {
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
 
-  Array* a = ts.Decode<Array*>(java_array);
+  Array* a = soa.Decode<Array*>(java_array);
   size_t component_size = a->GetClass()->GetComponentSize();
   size_t byte_count = a->GetLength() * component_size;
   void* result = GuardedCopy::Create(a->GetRawData(component_size), byte_count, true);
@@ -1072,8 +1098,8 @@
     return;
   }
 
-  ScopedJniThreadState ts(env);
-  Array* a = ts.Decode<Array*>(java_array);
+  ScopedObjectAccess soa(env);
+  Array* a = soa.Decode<Array*>(java_array);
 
   GuardedCopy::Check(__FUNCTION__, dataBuf, true);
 
@@ -1461,8 +1487,7 @@
     CHECK_JNI_ENTRY(kFlag_CritOkay, "Esp", env, java_string, isCopy);
     const jchar* result = baseEnv(env)->GetStringChars(env, java_string, isCopy);
     if (sc.ForceCopy() && result != NULL) {
-      ScopedJniThreadState ts(env);
-      String* s = ts.Decode<String*>(java_string);
+      String* s = sc.soa().Decode<String*>(java_string);
       int byteCount = s->GetLength() * 2;
       result = (const jchar*) GuardedCopy::Create(result, byteCount, false);
       if (isCopy != NULL) {
@@ -1689,8 +1714,7 @@
     CHECK_JNI_ENTRY(kFlag_CritGet, "Esp", env, java_string, isCopy);
     const jchar* result = baseEnv(env)->GetStringCritical(env, java_string, isCopy);
     if (sc.ForceCopy() && result != NULL) {
-      ScopedJniThreadState ts(env);
-      String* s = ts.Decode<String*>(java_string);
+      String* s = sc.soa().Decode<String*>(java_string);
       int byteCount = s->GetLength() * 2;
       result = (const jchar*) GuardedCopy::Create(result, byteCount, false);
       if (isCopy != NULL) {
diff --git a/src/class_linker.cc b/src/class_linker.cc
index df14a41..3514612 100644
--- a/src/class_linker.cc
+++ b/src/class_linker.cc
@@ -45,8 +45,8 @@
 #if defined(ART_USE_LLVM_COMPILER)
 #include "compiler_llvm/runtime_support_llvm.h"
 #endif
-#include "scoped_jni_thread_state.h"
 #include "ScopedLocalRef.h"
+#include "scoped_thread_state_change.h"
 #include "space.h"
 #include "space_bitmap.h"
 #include "stack_indirect_reference_table.h"
@@ -58,7 +58,9 @@
 
 namespace art {
 
-static void ThrowNoClassDefFoundError(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2)));
+static void ThrowNoClassDefFoundError(const char* fmt, ...)
+    __attribute__((__format__(__printf__, 1, 2)))
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 static void ThrowNoClassDefFoundError(const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
@@ -66,7 +68,9 @@
   va_end(args);
 }
 
-static void ThrowClassFormatError(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2)));
+static void ThrowClassFormatError(const char* fmt, ...)
+    __attribute__((__format__(__printf__, 1, 2)))
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 static void ThrowClassFormatError(const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
@@ -74,7 +78,9 @@
   va_end(args);
 }
 
-static void ThrowLinkageError(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2)));
+static void ThrowLinkageError(const char* fmt, ...)
+    __attribute__((__format__(__printf__, 1, 2)))
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 static void ThrowLinkageError(const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
@@ -83,7 +89,8 @@
 }
 
 static void ThrowNoSuchMethodError(bool is_direct, Class* c, const StringPiece& name,
-                                   const StringPiece& signature) {
+                                   const StringPiece& signature)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ClassHelper kh(c);
   std::ostringstream msg;
   msg << "no " << (is_direct ? "direct" : "virtual") << " method " << name << signature
@@ -96,7 +103,8 @@
 }
 
 static void ThrowNoSuchFieldError(const StringPiece& scope, Class* c, const StringPiece& type,
-                                  const StringPiece& name) {
+                                  const StringPiece& name)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ClassHelper kh(c);
   std::ostringstream msg;
   msg << "no " << scope << "field " << name << " of type " << type
@@ -108,7 +116,9 @@
   Thread::Current()->ThrowNewException("Ljava/lang/NoSuchFieldError;", msg.str().c_str());
 }
 
-static void ThrowNullPointerException(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2)));
+static void ThrowNullPointerException(const char* fmt, ...)
+    __attribute__((__format__(__printf__, 1, 2)))
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 static void ThrowNullPointerException(const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
@@ -116,7 +126,8 @@
   va_end(args);
 }
 
-static void ThrowEarlierClassFailure(Class* c) {
+static void ThrowEarlierClassFailure(Class* c)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // The class failed to initialize on a previous attempt, so we want to throw
   // a NoClassDefFoundError (v2 2.17.5).  The exception to this rule is if we
   // failed in verification, in which case v2 5.4.1 says we need to re-throw
@@ -134,7 +145,8 @@
   }
 }
 
-static void WrapExceptionInInitializer() {
+static void WrapExceptionInInitializer()
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Thread* self = Thread::Current();
   JNIEnv* env = self->GetJniEnv();
 
@@ -172,8 +184,6 @@
   "Ljava/lang/reflect/Method;",
   "Ljava/lang/reflect/Proxy;",
   "Ljava/lang/ClassLoader;",
-  "Ldalvik/system/BaseDexClassLoader;",
-  "Ldalvik/system/PathClassLoader;",
   "Ljava/lang/Throwable;",
   "Ljava/lang/ClassNotFoundException;",
   "Ljava/lang/StackTraceElement;",
@@ -212,8 +222,8 @@
 }
 
 ClassLinker::ClassLinker(InternTable* intern_table)
-    : dex_lock_("ClassLinker dex lock"),
-      classes_lock_("ClassLinker classes lock"),
+    // dex_lock_ is recursive as it may be used in stack dumping.
+    : dex_lock_("ClassLinker dex lock", kDefaultMutexLevel, true),
       class_roots_(NULL),
       array_iftable_(NULL),
       init_done_(false),
@@ -433,20 +443,11 @@
       java_lang_ref_WeakReference->GetAccessFlags() |
           kAccClassIsReference | kAccClassIsWeakReference);
 
-  // Setup the ClassLoaders, verifying the object_size_
+  // Setup the ClassLoader, verifying the object_size_
   Class* java_lang_ClassLoader = FindSystemClass("Ljava/lang/ClassLoader;");
   CHECK_EQ(java_lang_ClassLoader->GetObjectSize(), sizeof(ClassLoader));
   SetClassRoot(kJavaLangClassLoader, java_lang_ClassLoader);
 
-  Class* dalvik_system_BaseDexClassLoader = FindSystemClass("Ldalvik/system/BaseDexClassLoader;");
-  CHECK_EQ(dalvik_system_BaseDexClassLoader->GetObjectSize(), sizeof(BaseDexClassLoader));
-  SetClassRoot(kDalvikSystemBaseDexClassLoader, dalvik_system_BaseDexClassLoader);
-
-  Class* dalvik_system_PathClassLoader = FindSystemClass("Ldalvik/system/PathClassLoader;");
-  CHECK_EQ(dalvik_system_PathClassLoader->GetObjectSize(), sizeof(PathClassLoader));
-  SetClassRoot(kDalvikSystemPathClassLoader, dalvik_system_PathClassLoader);
-  PathClassLoader::SetClass(dalvik_system_PathClassLoader);
-
   // Set up java.lang.Throwable, java.lang.ClassNotFoundException, and
   // java.lang.StackTraceElement as a convenience
   SetClassRoot(kJavaLangThrowable, FindSystemClass("Ljava/lang/Throwable;"));
@@ -536,7 +537,7 @@
     Class* c = GetClassRoot(ClassRoot(i));
     if (!c->IsArrayClass() && !c->IsPrimitive()) {
       EnsureInitialized(GetClassRoot(ClassRoot(i)), true, true);
-      CHECK(!self->IsExceptionPending()) << PrettyTypeOf(self->GetException());
+      self->AssertNoPendingException();
     }
   }
 }
@@ -656,11 +657,11 @@
 }
 
 const OatFile* ClassLinker::FindOpenedOatFileForDexFile(const DexFile& dex_file) {
+  MutexLock mu(dex_lock_);
   return FindOpenedOatFileFromDexLocation(dex_file.GetLocation());
 }
 
 const OatFile* ClassLinker::FindOpenedOatFileFromDexLocation(const std::string& dex_location) {
-  MutexLock mu(dex_lock_);
   for (size_t i = 0; i < oat_files_.size(); i++) {
     const OatFile* oat_file = oat_files_[i];
     DCHECK(oat_file != NULL);
@@ -698,6 +699,12 @@
 
 const DexFile* ClassLinker::FindOrCreateOatFileForDexLocation(const std::string& dex_location,
                                                               const std::string& oat_location) {
+  MutexLock mu(dex_lock_);
+  return FindOrCreateOatFileForDexLocationLocked(dex_location, oat_location);
+}
+
+const DexFile* ClassLinker::FindOrCreateOatFileForDexLocationLocked(const std::string& dex_location,
+                                                                    const std::string& oat_location) {
   uint32_t dex_location_checksum;
   if (!DexFile::GetChecksum(dex_location, dex_location_checksum)) {
     LOG(ERROR) << "Failed to compute checksum '" << dex_location << "'";
@@ -713,13 +720,12 @@
   }
 
   // Generate the output oat file for the dex file
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   UniquePtr<File> file(OS::OpenFile(oat_location.c_str(), true));
   if (file.get() == NULL) {
     LOG(ERROR) << "Failed to create oat file: " << oat_location;
     return NULL;
   }
-  if (!class_linker->GenerateOatFile(dex_location, file->Fd(), oat_location)) {
+  if (!GenerateOatFile(dex_location, file->Fd(), oat_location)) {
     LOG(ERROR) << "Failed to generate oat file: " << oat_location;
     return NULL;
   }
@@ -734,7 +740,7 @@
     LOG(ERROR) << "Failed to open generated oat file: " << oat_location;
     return NULL;
   }
-  class_linker->RegisterOatFile(*oat_file);
+  RegisterOatFileLocked(*oat_file);
   const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_location);
   if (oat_dex_file == NULL) {
     LOG(ERROR) << "Failed to find dex file in generated oat file: " << oat_location;
@@ -808,7 +814,7 @@
   // Look for an existing file next to dex. for example, for
   // /foo/bar/baz.jar, look for /foo/bar/baz.jar.oat.
   std::string oat_filename(OatFile::DexFilenameToOatFilename(dex_location));
-  const OatFile* oat_file = FindOatFileFromOatLocation(oat_filename);
+  const OatFile* oat_file = FindOatFileFromOatLocationLocked(oat_filename);
   if (oat_file != NULL) {
     uint32_t dex_location_checksum;
     if (!DexFile::GetChecksum(dex_location, dex_location_checksum)) {
@@ -829,7 +835,7 @@
   // Look for an existing file in the art-cache, validating the result if found
   // not found in /foo/bar/baz.oat? try /data/art-cache/foo@bar@baz.oat
   std::string cache_location(GetArtCacheFilenameOrDie(oat_filename));
-  oat_file = FindOatFileFromOatLocation(cache_location);
+  oat_file = FindOatFileFromOatLocationLocked(cache_location);
   if (oat_file != NULL) {
     uint32_t dex_location_checksum;
     if (!DexFile::GetChecksum(dex_location, dex_location_checksum)) {
@@ -850,11 +856,10 @@
 
   // Try to generate oat file if it wasn't found or was obsolete.
   std::string oat_cache_filename(GetArtCacheFilenameOrDie(oat_filename));
-  return FindOrCreateOatFileForDexLocation(dex_location, oat_cache_filename);
+  return FindOrCreateOatFileForDexLocationLocked(dex_location, oat_cache_filename);
 }
 
 const OatFile* ClassLinker::FindOpenedOatFileFromOatLocation(const std::string& oat_location) {
-  MutexLock mu(dex_lock_);
   for (size_t i = 0; i < oat_files_.size(); i++) {
     const OatFile* oat_file = oat_files_[i];
     DCHECK(oat_file != NULL);
@@ -867,6 +872,10 @@
 
 const OatFile* ClassLinker::FindOatFileFromOatLocation(const std::string& oat_location) {
   MutexLock mu(dex_lock_);
+  return FindOatFileFromOatLocationLocked(oat_location);
+}
+
+const OatFile* ClassLinker::FindOatFileFromOatLocationLocked(const std::string& oat_location) {
   const OatFile* oat_file = FindOpenedOatFileFromOatLocation(oat_location);
   if (oat_file != NULL) {
     return oat_file;
@@ -942,7 +951,6 @@
   IntArray::SetArrayClass(GetClassRoot(kIntArrayClass));
   LongArray::SetArrayClass(GetClassRoot(kLongArrayClass));
   ShortArray::SetArrayClass(GetClassRoot(kShortArrayClass));
-  PathClassLoader::SetClass(GetClassRoot(kDalvikSystemPathClassLoader));
   Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
   StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
@@ -984,7 +992,7 @@
   }
 
   {
-    MutexLock mu(classes_lock_);
+    MutexLock mu(*GlobalSynchronization::classlinker_classes_lock_);
     typedef Table::const_iterator It;  // TODO: C++0x auto
     for (It it = classes_.begin(), end = classes_.end(); it != end; ++it) {
       visitor(it->second, arg);
@@ -998,7 +1006,7 @@
 }
 
 void ClassLinker::VisitClasses(ClassVisitor* visitor, void* arg) const {
-  MutexLock mu(classes_lock_);
+  MutexLock mu(*GlobalSynchronization::classlinker_classes_lock_);
   typedef Table::const_iterator It;  // TODO: C++0x auto
   for (It it = classes_.begin(), end = classes_.end(); it != end; ++it) {
     if (!visitor(it->second, arg)) {
@@ -1012,6 +1020,24 @@
   }
 }
 
+static bool GetClassesVisitor(Class* c, void* arg) {
+  std::set<Class*>* classes = reinterpret_cast<std::set<Class*>*>(arg);
+  classes->insert(c);
+  return true;
+}
+
+void ClassLinker::VisitClassesWithoutClassesLock(ClassVisitor* visitor, void* arg) const {
+  std::set<Class*> classes;
+  VisitClasses(GetClassesVisitor, &classes);
+  typedef std::set<Class*>::const_iterator It;  // TODO: C++0x auto
+  for (It it = classes.begin(), end = classes.end(); it != end; ++it) {
+    if (!visitor(*it, arg)) {
+      return;
+    }
+  }
+}
+
+
 ClassLinker::~ClassLinker() {
   String::ResetClass();
   Field::ResetClass();
@@ -1024,7 +1050,6 @@
   IntArray::ResetArrayClass();
   LongArray::ResetArrayClass();
   ShortArray::ResetArrayClass();
-  PathClassLoader::ResetClass();
   Throwable::ResetClass();
   StackTraceElement::ResetClass();
   STLDeleteElements(&boot_class_path_);
@@ -1105,7 +1130,8 @@
       length);
 }
 
-static Class* EnsureResolved(Class* klass) {
+static Class* EnsureResolved(Class* klass)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   DCHECK(klass != NULL);
   // Wait for the class if it has not already been linked.
   Thread* self = Thread::Current();
@@ -1130,7 +1156,8 @@
   // Return the loaded class.  No exceptions should be pending.
   CHECK(klass->IsResolved()) << PrettyClass(klass);
   CHECK(!self->IsExceptionPending())
-      << PrettyClass(klass) << " " << PrettyTypeOf(self->GetException());
+      << PrettyClass(klass) << " " << PrettyTypeOf(self->GetException()) << "\n"
+      << self->GetException()->Dump();
   return klass;
 }
 
@@ -1142,7 +1169,7 @@
   DCHECK_NE(*descriptor, '\0') << "descriptor is empty string";
   Thread* self = Thread::Current();
   DCHECK(self != NULL);
-  CHECK(!self->IsExceptionPending()) << PrettyTypeOf(self->GetException());
+  self->AssertNoPendingException();
   if (descriptor[1] == '\0') {
     // only the descriptors of primitive types should be 1 character long, also avoid class lookup
     // for primitive classes that aren't backed by dex files.
@@ -1173,32 +1200,37 @@
     self->ClearException();
 
     // next try the compile time class path
-    const std::vector<const DexFile*>& class_path
-        = Runtime::Current()->GetCompileTimeClassPath(class_loader);
-    DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, class_path);
+    const std::vector<const DexFile*>* class_path;
+    {
+      ScopedObjectAccessUnchecked soa(Thread::Current());
+      ScopedLocalRef<jobject> jclass_loader(soa.Env(), soa.AddLocalReference<jobject>(class_loader));
+      class_path = &Runtime::Current()->GetCompileTimeClassPath(jclass_loader.get());
+    }
+
+    DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, *class_path);
     if (pair.second != NULL) {
       return DefineClass(descriptor, class_loader, *pair.first, *pair.second);
     }
 
   } else {
-    ScopedJniThreadState ts(self->GetJniEnv());
-    ScopedLocalRef<jobject> class_loader_object(ts.Env(),
-                                                ts.AddLocalReference<jobject>(class_loader));
+    ScopedObjectAccessUnchecked soa(self->GetJniEnv());
+    ScopedLocalRef<jobject> class_loader_object(soa.Env(),
+                                                soa.AddLocalReference<jobject>(class_loader));
     std::string class_name_string(DescriptorToDot(descriptor));
-    ScopedLocalRef<jobject> result(ts.Env(), NULL);
+    ScopedLocalRef<jobject> result(soa.Env(), NULL);
     {
       ScopedThreadStateChange tsc(self, kNative);
-      ScopedLocalRef<jobject> class_name_object(ts.Env(),
-                                                ts.Env()->NewStringUTF(class_name_string.c_str()));
+      ScopedLocalRef<jobject> class_name_object(soa.Env(),
+                                                soa.Env()->NewStringUTF(class_name_string.c_str()));
       if (class_name_object.get() == NULL) {
         return NULL;
       }
       CHECK(class_loader_object.get() != NULL);
-      result.reset(ts.Env()->CallObjectMethod(class_loader_object.get(),
-                                              WellKnownClasses::java_lang_ClassLoader_loadClass,
-                                              class_name_object.get()));
+      result.reset(soa.Env()->CallObjectMethod(class_loader_object.get(),
+                                               WellKnownClasses::java_lang_ClassLoader_loadClass,
+                                               class_name_object.get()));
     }
-    if (ts.Env()->ExceptionCheck()) {
+    if (soa.Env()->ExceptionCheck()) {
       // If the ClassLoader threw, pass that exception up.
       return NULL;
     } else if (result.get() == NULL) {
@@ -1208,7 +1240,7 @@
       return NULL;
     } else {
       // success, return Class*
-      return ts.Decode<Class*>(result.get());
+      return soa.Decode<Class*>(result.get());
     }
   }
 
@@ -1428,7 +1460,9 @@
   }
 }
 
-static void LinkCode(SirtRef<Method>& method, const OatFile::OatClass* oat_class, uint32_t method_index) {
+static void LinkCode(SirtRef<Method>& method, const OatFile::OatClass* oat_class,
+                     uint32_t method_index)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // Every kind of method should at least get an invoke stub from the oat_method.
   // non-abstract methods also get their code pointers.
   const OatFile::OatMethod oat_method = oat_class->GetOatMethod(method_index);
@@ -1863,7 +1897,7 @@
     LOG(INFO) << "Loaded class " << descriptor << source;
   }
   size_t hash = StringPieceHash()(descriptor);
-  MutexLock mu(classes_lock_);
+  MutexLock mu(*GlobalSynchronization::classlinker_classes_lock_);
   Table& classes = image_class ? image_classes_ : classes_;
   Class* existing = LookupClassLocked(descriptor.data(), klass->GetClassLoader(), hash, classes);
 #ifndef NDEBUG
@@ -1880,7 +1914,7 @@
 
 bool ClassLinker::RemoveClass(const char* descriptor, const ClassLoader* class_loader) {
   size_t hash = Hash(descriptor);
-  MutexLock mu(classes_lock_);
+  MutexLock mu(*GlobalSynchronization::classlinker_classes_lock_);
   typedef Table::iterator It;  // TODO: C++0x auto
   // TODO: determine if its better to search classes_ or image_classes_ first
   ClassHelper kh;
@@ -1905,7 +1939,7 @@
 
 Class* ClassLinker::LookupClass(const char* descriptor, const ClassLoader* class_loader) {
   size_t hash = Hash(descriptor);
-  MutexLock mu(classes_lock_);
+  MutexLock mu(*GlobalSynchronization::classlinker_classes_lock_);
   // TODO: determine if its better to search classes_ or image_classes_ first
   Class* klass = LookupClassLocked(descriptor, class_loader, hash, classes_);
   if (klass != NULL) {
@@ -1940,7 +1974,7 @@
 void ClassLinker::LookupClasses(const char* descriptor, std::vector<Class*>& classes) {
   classes.clear();
   size_t hash = Hash(descriptor);
-  MutexLock mu(classes_lock_);
+  MutexLock mu(*GlobalSynchronization::classlinker_classes_lock_);
   typedef Table::const_iterator It;  // TODO: C++0x auto
   // TODO: determine if its better to search classes_ or image_classes_ first
   ClassHelper kh(NULL, this);
@@ -1961,7 +1995,8 @@
 }
 
 #if !defined(NDEBUG) && !defined(ART_USE_LLVM_COMPILER)
-static void CheckMethodsHaveGcMaps(Class* klass) {
+static void CheckMethodsHaveGcMaps(Class* klass)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (!Runtime::Current()->IsStarted()) {
     return;
   }
@@ -2050,7 +2085,7 @@
           << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
           << " because: " << error_msg;
     }
-    DCHECK(!Thread::Current()->IsExceptionPending());
+    Thread::Current()->AssertNoPendingException();
     CHECK(verifier_failure == verifier::MethodVerifier::kNoFailure ||
           Runtime::Current()->IsCompiler());
     // Make sure all classes referenced by catch blocks are resolved
@@ -2064,7 +2099,7 @@
         << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
         << " because: " << error_msg;
     Thread* self = Thread::Current();
-    CHECK(!self->IsExceptionPending());
+    self->AssertNoPendingException();
     self->ThrowNewException("Ljava/lang/VerifyError;", error_msg.c_str());
     CHECK_EQ(klass->GetStatus(), Class::kStatusVerifying) << PrettyDescriptor(klass);
     klass->SetStatus(Class::kStatusError);
@@ -2300,7 +2335,8 @@
   return constructor;
 }
 
-static void CheckProxyConstructor(Method* constructor) {
+static void CheckProxyConstructor(Method* constructor)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   CHECK(constructor->IsConstructor());
   MethodHelper mh(constructor);
   CHECK_STREQ(mh.GetName(), "<init>");
@@ -2338,7 +2374,8 @@
   return method;
 }
 
-static void CheckProxyMethod(Method* method, SirtRef<Method>& prototype) {
+static void CheckProxyMethod(Method* method, SirtRef<Method>& prototype)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // Basic sanity
   CHECK(!prototype->IsFinal());
   CHECK(method->IsFinal());
@@ -2485,9 +2522,10 @@
   return success;
 }
 
-bool ClassLinker::WaitForInitializeClass(Class* klass, Thread* self, ObjectLock& lock) {
+bool ClassLinker::WaitForInitializeClass(Class* klass, Thread* self, ObjectLock& lock)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   while (true) {
-    CHECK(!self->IsExceptionPending()) << PrettyTypeOf(self->GetException());
+    self->AssertNoPendingException();
     lock.Wait();
 
     // When we wake up, repeat the test for init-in-progress.  If
@@ -3096,8 +3134,11 @@
 }
 
 struct LinkFieldsComparator {
-  explicit LinkFieldsComparator(FieldHelper* fh) : fh_(fh) {}
-  bool operator()(const Field* field1, const Field* field2) {
+  explicit LinkFieldsComparator(FieldHelper* fh)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      : fh_(fh) {}
+  // No thread safety analysis as will be called from STL. Checked lock held in constructor.
+  bool operator()(const Field* field1, const Field* field2) NO_THREAD_SAFETY_ANALYSIS {
     // First come reference fields, then 64-bit, and finally 32-bit
     fh_->ChangeField(field1);
     Primitive::Type type1 = fh_->GetTypeAsPrimitiveType();
@@ -3497,7 +3538,7 @@
   // lock held, because it might need to resolve a field's type, which would try to take the lock.
   std::vector<Class*> all_classes;
   {
-    MutexLock mu(classes_lock_);
+    MutexLock mu(*GlobalSynchronization::classlinker_classes_lock_);
     typedef Table::const_iterator It;  // TODO: C++0x auto
     for (It it = classes_.begin(), end = classes_.end(); it != end; ++it) {
       all_classes.push_back(it->second);
@@ -3513,22 +3554,22 @@
 }
 
 void ClassLinker::DumpForSigQuit(std::ostream& os) const {
-  MutexLock mu(classes_lock_);
+  MutexLock mu(*GlobalSynchronization::classlinker_classes_lock_);
   os << "Loaded classes: " << image_classes_.size() << " image classes; "
      << classes_.size() << " allocated classes\n";
 }
 
 size_t ClassLinker::NumLoadedClasses() const {
-  MutexLock mu(classes_lock_);
+  MutexLock mu(*GlobalSynchronization::classlinker_classes_lock_);
   return classes_.size() + image_classes_.size();
 }
 
 pid_t ClassLinker::GetClassesLockOwner() {
-  return classes_lock_.GetOwner();
+  return GlobalSynchronization::classlinker_classes_lock_->GetExclusiveOwnerTid();
 }
 
 pid_t ClassLinker::GetDexLockOwner() {
-  return dex_lock_.GetOwner();
+  return dex_lock_.GetExclusiveOwnerTid();
 }
 
 void ClassLinker::SetClassRoot(ClassRoot class_root, Class* klass) {
diff --git a/src/class_linker.h b/src/class_linker.h
index 8ad8d2d..c69c442 100644
--- a/src/class_linker.h
+++ b/src/class_linker.h
@@ -45,46 +45,61 @@
  public:
   // Creates the class linker by boot strapping from dex files.
   static ClassLinker* CreateFromCompiler(const std::vector<const DexFile*>& boot_class_path,
-                                         InternTable* intern_table);
+                                         InternTable* intern_table)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Creates the class linker from an image.
-  static ClassLinker* CreateFromImage(InternTable* intern_table);
+  static ClassLinker* CreateFromImage(InternTable* intern_table)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   ~ClassLinker();
 
   // Finds a class by its descriptor, loading it if necessary.
   // If class_loader is null, searches boot_class_path_.
-  Class* FindClass(const char* descriptor, ClassLoader* class_loader);
+  Class* FindClass(const char* descriptor, ClassLoader* class_loader)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Class* FindSystemClass(const char* descriptor);
+  Class* FindSystemClass(const char* descriptor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Define a new a class based on a ClassDef from a DexFile
   Class* DefineClass(const StringPiece& descriptor, ClassLoader* class_loader,
-                     const DexFile& dex_file, const DexFile::ClassDef& dex_class_def);
+                     const DexFile& dex_file, const DexFile::ClassDef& dex_class_def)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Finds a class by its descriptor, returning NULL if it isn't wasn't loaded
   // by the given 'class_loader'.
-  Class* LookupClass(const char* descriptor, const ClassLoader* class_loader);
+  Class* LookupClass(const char* descriptor, const ClassLoader* class_loader)
+      LOCKS_EXCLUDED(GlobalSynchronization::classlinker_classes_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Finds all the classes with the given descriptor, regardless of ClassLoader.
-  void LookupClasses(const char* descriptor, std::vector<Class*>& classes);
+  void LookupClasses(const char* descriptor, std::vector<Class*>& classes)
+      LOCKS_EXCLUDED(GlobalSynchronization::classlinker_classes_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Class* FindPrimitiveClass(char type);
+  Class* FindPrimitiveClass(char type) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // General class unloading is not supported, this is used to prune
   // unwanted classes during image writing.
-  bool RemoveClass(const char* descriptor, const ClassLoader* class_loader);
+  bool RemoveClass(const char* descriptor, const ClassLoader* class_loader)
+      LOCKS_EXCLUDED(GlobalSynchronization::classlinker_classes_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void DumpAllClasses(int flags) const;
+  void DumpAllClasses(int flags) const
+      LOCKS_EXCLUDED(GlobalSynchronization::classlinker_classes_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void DumpForSigQuit(std::ostream& os) const;
+  void DumpForSigQuit(std::ostream& os) const
+      LOCKS_EXCLUDED(GlobalSynchronization::classlinker_classes_lock_);
 
-  size_t NumLoadedClasses() const;
+  size_t NumLoadedClasses() const LOCKS_EXCLUDED(GlobalSynchronization::classlinker_classes_lock_);
 
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  String* ResolveString(uint32_t string_idx, const Method* referrer) {
+  String* ResolveString(uint32_t string_idx, const Method* referrer)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     String* resolved_string = referrer->GetDexCacheStrings()->Get(string_idx);
     if (UNLIKELY(resolved_string == NULL)) {
       Class* declaring_class = referrer->GetDeclaringClass();
@@ -97,14 +112,14 @@
 
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache.
-  String* ResolveString(const DexFile& dex_file, uint32_t string_idx, DexCache* dex_cache);
+  String* ResolveString(const DexFile& dex_file, uint32_t string_idx, DexCache* dex_cache)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identity the
   // target DexCache and ClassLoader to use for resolution.
-  Class* ResolveType(const DexFile& dex_file,
-                     uint16_t type_idx,
-                     const Class* referrer) {
+  Class* ResolveType(const DexFile& dex_file, uint16_t type_idx, const Class* referrer)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return ResolveType(dex_file,
                        type_idx,
                        referrer->GetDexCache(),
@@ -114,7 +129,8 @@
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  Class* ResolveType(uint16_t type_idx, const Method* referrer) {
+  Class* ResolveType(uint16_t type_idx, const Method* referrer)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Class* resolved_type = referrer->GetDexCacheResolvedTypes()->Get(type_idx);
     if (UNLIKELY(resolved_type == NULL)) {
       Class* declaring_class = referrer->GetDeclaringClass();
@@ -126,7 +142,8 @@
     return resolved_type;
   }
 
-  Class* ResolveType(uint16_t type_idx, const Field* referrer) {
+  Class* ResolveType(uint16_t type_idx, const Field* referrer)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Class* declaring_class = referrer->GetDeclaringClass();
     DexCache* dex_cache = declaring_class->GetDexCache();
     Class* resolved_type = dex_cache->GetResolvedType(type_idx);
@@ -145,7 +162,8 @@
   Class* ResolveType(const DexFile& dex_file,
                      uint16_t type_idx,
                      DexCache* dex_cache,
-                     ClassLoader* class_loader);
+                     ClassLoader* class_loader)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Resolve a method with a given ID from the DexFile, storing the
   // result in DexCache. The ClassLinker and ClassLoader are used as
@@ -156,9 +174,11 @@
                         uint32_t method_idx,
                         DexCache* dex_cache,
                         ClassLoader* class_loader,
-                        bool is_direct);
+                        bool is_direct)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* ResolveMethod(uint32_t method_idx, const Method* referrer, bool is_direct) {
+  Method* ResolveMethod(uint32_t method_idx, const Method* referrer, bool is_direct)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* resolved_method = referrer->GetDexCacheResolvedMethods()->Get(method_idx);
     if (UNLIKELY(resolved_method == NULL || resolved_method->IsRuntimeMethod())) {
       Class* declaring_class = referrer->GetDeclaringClass();
@@ -170,7 +190,8 @@
     return resolved_method;
   }
 
-  Field* ResolveField(uint32_t field_idx, const Method* referrer, bool is_static) {
+  Field* ResolveField(uint32_t field_idx, const Method* referrer, bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Field* resolved_field =
         referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx);
     if (UNLIKELY(resolved_field == NULL)) {
@@ -192,7 +213,8 @@
                       uint32_t field_idx,
                       DexCache* dex_cache,
                       ClassLoader* class_loader,
-                      bool is_static);
+                      bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Resolve a field with a given ID from the DexFile, storing the
   // result in DexCache. The ClassLinker and ClassLoader are used as
@@ -201,89 +223,132 @@
   Field* ResolveFieldJLS(const DexFile& dex_file,
                          uint32_t field_idx,
                          DexCache* dex_cache,
-                         ClassLoader* class_loader);
+                         ClassLoader* class_loader)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Get shorty from method index without resolution. Used to do handlerization.
-  const char* MethodShorty(uint32_t method_idx, Method* referrer, uint32_t* length);
+  const char* MethodShorty(uint32_t method_idx, Method* referrer, uint32_t* length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Returns true on success, false if there's an exception pending.
   // can_run_clinit=false allows the compiler to attempt to init a class,
   // given the restriction that no <clinit> execution is possible.
-  bool EnsureInitialized(Class* c, bool can_run_clinit, bool can_init_fields);
+  bool EnsureInitialized(Class* c, bool can_run_clinit, bool can_init_fields)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Initializes classes that have instances in the image but that have
   // <clinit> methods so they could not be initialized by the compiler.
-  void RunRootClinits();
+  void RunRootClinits() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void RegisterDexFile(const DexFile& dex_file);
-  void RegisterDexFile(const DexFile& dex_file, SirtRef<DexCache>& dex_cache);
+  void RegisterDexFile(const DexFile& dex_file)
+      LOCKS_EXCLUDED(dex_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void RegisterDexFile(const DexFile& dex_file, SirtRef<DexCache>& dex_cache)
+      LOCKS_EXCLUDED(dex_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void RegisterOatFile(const OatFile& oat_file);
+  void RegisterOatFile(const OatFile& oat_file)
+      LOCKS_EXCLUDED(dex_lock_);
 
   const std::vector<const DexFile*>& GetBootClassPath() {
     return boot_class_path_;
   }
 
-  void VisitClasses(ClassVisitor* visitor, void* arg) const;
+  void VisitClasses(ClassVisitor* visitor, void* arg) const
+      LOCKS_EXCLUDED(GlobalSynchronization::classlinker_classes_lock_);
+  // Less efficient variant of VisitClasses that doesn't hold the classlinker_classes_lock_
+  // when calling the visitor.
+  void VisitClassesWithoutClassesLock(ClassVisitor* visitor, void* arg) const
+      LOCKS_EXCLUDED(GlobalSynchronization::classlinker_classes_lock_);
 
-  void VisitRoots(Heap::RootVisitor* visitor, void* arg) const;
+  void VisitRoots(Heap::RootVisitor* visitor, void* arg) const
+      LOCKS_EXCLUDED(GlobalSynchronization::classlinker_classes_lock_, dex_lock_);
 
-  const DexFile& FindDexFile(const DexCache* dex_cache) const;
-  DexCache* FindDexCache(const DexFile& dex_file) const;
-  bool IsDexFileRegistered(const DexFile& dex_file) const;
-  void FixupDexCaches(Method* resolution_method) const;
+  const DexFile& FindDexFile(const DexCache* dex_cache) const
+      LOCKS_EXCLUDED(dex_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  DexCache* FindDexCache(const DexFile& dex_file) const
+      LOCKS_EXCLUDED(dex_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  bool IsDexFileRegistered(const DexFile& dex_file) const
+      LOCKS_EXCLUDED(dex_lock_);
+  void FixupDexCaches(Method* resolution_method) const
+      LOCKS_EXCLUDED(dex_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Generate an oat file from a dex file
   bool GenerateOatFile(const std::string& dex_filename,
                        int oat_fd,
                        const std::string& oat_cache_filename);
 
-  const OatFile* FindOatFileFromOatLocation(const std::string& location);
+  const OatFile* FindOatFileFromOatLocation(const std::string& location)
+      LOCKS_EXCLUDED(dex_lock_);
+
+  const OatFile* FindOatFileFromOatLocationLocked(const std::string& location)
+      EXCLUSIVE_LOCKS_REQUIRED(dex_lock_);
 
   // Finds the oat file for a dex location, generating the oat file if
   // it is missing or out of date. Returns the DexFile from within the
   // created oat file.
   const DexFile* FindOrCreateOatFileForDexLocation(const std::string& dex_location,
-                                                   const std::string& oat_location);
+                                                   const std::string& oat_location)
+      LOCKS_EXCLUDED(dex_lock_);
+  const DexFile* FindOrCreateOatFileForDexLocationLocked(const std::string& dex_location,
+                                                         const std::string& oat_location)
+      EXCLUSIVE_LOCKS_REQUIRED(dex_lock_);
   // Find a DexFile within an OatFile given a DexFile location. Note
   // that this returns null if the location checksum of the DexFile
   // does not match the OatFile.
-  const DexFile* FindDexFileInOatFileFromDexLocation(const std::string& location);
+  const DexFile* FindDexFileInOatFileFromDexLocation(const std::string& location)
+      LOCKS_EXCLUDED(dex_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 
   // Returns true if oat file contains the dex file with the given location and checksum
   static bool VerifyOatFileChecksums(const OatFile* oat_file,
                                      const std::string& dex_location,
-                                     uint32_t dex_location_checksum);
+                                     uint32_t dex_location_checksum)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // TODO: replace this with multiple methods that allocate the correct managed type.
   template <class T>
-  ObjectArray<T>* AllocObjectArray(size_t length) {
+  ObjectArray<T>* AllocObjectArray(size_t length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return ObjectArray<T>::Alloc(GetClassRoot(kObjectArrayClass), length);
   }
 
-  ObjectArray<Class>* AllocClassArray(size_t length) {
+  ObjectArray<Class>* AllocClassArray(size_t length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return ObjectArray<Class>::Alloc(GetClassRoot(kClassArrayClass), length);
   }
 
-  ObjectArray<StackTraceElement>* AllocStackTraceElementArray(size_t length);
+  ObjectArray<StackTraceElement>* AllocStackTraceElementArray(size_t length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void VerifyClass(Class* klass);
+  void VerifyClass(Class* klass) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   bool VerifyClassUsingOatFile(const DexFile& dex_file, Class* klass,
-                               Class::Status& oat_file_class_status);
-  void ResolveClassExceptionHandlerTypes(const DexFile& dex_file, Class* klass);
-  void ResolveMethodExceptionHandlerTypes(const DexFile& dex_file, Method* klass);
+                               Class::Status& oat_file_class_status)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void ResolveClassExceptionHandlerTypes(const DexFile& dex_file, Class* klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void ResolveMethodExceptionHandlerTypes(const DexFile& dex_file, Method* klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   Class* CreateProxyClass(String* name, ObjectArray<Class>* interfaces, ClassLoader* loader,
-                          ObjectArray<Method>* methods, ObjectArray<ObjectArray<Class> >* throws);
-  std::string GetDescriptorForProxy(const Class* proxy_class);
-  Method* FindMethodForProxy(const Class* proxy_class, const Method* proxy_method);
+                          ObjectArray<Method>* methods, ObjectArray<ObjectArray<Class> >* throws)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  std::string GetDescriptorForProxy(const Class* proxy_class)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  Method* FindMethodForProxy(const Class* proxy_class, const Method* proxy_method)
+      LOCKS_EXCLUDED(dex_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Get the oat code for a method when its class isn't yet initialized
-  const void* GetOatCodeFor(const Method* method);
+  const void* GetOatCodeFor(const Method* method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Relocate the OatFiles (ELF images)
-  void RelocateExecutable();
+  void RelocateExecutable() LOCKS_EXCLUDED(dex_lock_);
 
   pid_t GetClassesLockOwner(); // For SignalCatcher.
   pid_t GetDexLockOwner(); // For SignalCatcher.
@@ -291,47 +356,60 @@
  private:
   explicit ClassLinker(InternTable*);
 
-  const OatFile::OatMethod GetOatMethodFor(const Method* method);
+  const OatFile::OatMethod GetOatMethodFor(const Method* method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Initialize class linker by bootstraping from dex files
-  void InitFromCompiler(const std::vector<const DexFile*>& boot_class_path);
+  void InitFromCompiler(const std::vector<const DexFile*>& boot_class_path)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Initialize class linker from one or more images.
-  void InitFromImage();
-  OatFile* OpenOat(const ImageSpace* space);
-  static void InitFromImageCallback(Object* obj, void* arg);
+  void InitFromImage() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  OatFile* OpenOat(const ImageSpace* space)
+      LOCKS_EXCLUDED(dex_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void InitFromImageCallback(Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void FinishInit();
+  void FinishInit() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // For early bootstrapping by Init
-  Class* AllocClass(Class* java_lang_Class, size_t class_size);
+  Class* AllocClass(Class* java_lang_Class, size_t class_size)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Alloc* convenience functions to avoid needing to pass in Class*
   // values that are known to the ClassLinker such as
   // kObjectArrayClass and kJavaLangString etc.
-  Class* AllocClass(size_t class_size);
-  DexCache* AllocDexCache(const DexFile& dex_file);
-  Field* AllocField();
+  Class* AllocClass(size_t class_size) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  DexCache* AllocDexCache(const DexFile& dex_file)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  Field* AllocField() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  Method* AllocMethod() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* AllocMethod();
+  InterfaceEntry* AllocInterfaceEntry(Class* interface)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  InterfaceEntry* AllocInterfaceEntry(Class* interface);
-
-  Class* CreatePrimitiveClass(const char* descriptor, Primitive::Type type) {
+  Class* CreatePrimitiveClass(const char* descriptor, Primitive::Type type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return InitializePrimitiveClass(AllocClass(sizeof(Class)), descriptor, type);
   }
   Class* InitializePrimitiveClass(Class* primitive_class,
                                   const char* descriptor,
-                                  Primitive::Type type);
+                                  Primitive::Type type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 
-  Class* CreateArrayClass(const std::string& descriptor, ClassLoader* class_loader);
+  Class* CreateArrayClass(const std::string& descriptor, ClassLoader* class_loader)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void AppendToBootClassPath(const DexFile& dex_file);
-  void AppendToBootClassPath(const DexFile& dex_file, SirtRef<DexCache>& dex_cache);
+  void AppendToBootClassPath(const DexFile& dex_file)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void AppendToBootClassPath(const DexFile& dex_file, SirtRef<DexCache>& dex_cache)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void ConstructFieldMap(const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
-                         Class* c, SafeMap<uint32_t, Field*>& field_map);
+                         Class* c, SafeMap<uint32_t, Field*>& field_map)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   size_t SizeOfClass(const DexFile& dex_file,
                      const DexFile::ClassDef& dex_class_def);
@@ -339,15 +417,18 @@
   void LoadClass(const DexFile& dex_file,
                  const DexFile::ClassDef& dex_class_def,
                  SirtRef<Class>& klass,
-                 ClassLoader* class_loader);
+                 ClassLoader* class_loader)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void LoadField(const DexFile& dex_file, const ClassDataItemIterator& it, SirtRef<Class>& klass,
                  SirtRef<Field>& dst);
 
   void LoadMethod(const DexFile& dex_file, const ClassDataItemIterator& dex_method,
-                  SirtRef<Class>& klass, SirtRef<Method>& dst);
+                  SirtRef<Class>& klass, SirtRef<Method>& dst)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void FixupStaticTrampolines(Class* klass);
+  void FixupStaticTrampolines(Class* klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Finds the associated oat class for a dex_file and descriptor
   const OatFile::OatClass* GetOatClass(const DexFile& dex_file, const char* descriptor);
@@ -355,68 +436,97 @@
   // Attempts to insert a class into a class table.  Returns NULL if
   // the class was inserted, otherwise returns an existing class with
   // the same descriptor and ClassLoader.
-  Class* InsertClass(const StringPiece& descriptor, Class* klass, bool image_class);
+  Class* InsertClass(const StringPiece& descriptor, Class* klass, bool image_class)
+      LOCKS_EXCLUDED(GlobalSynchronization::classlinker_classes_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void RegisterDexFileLocked(const DexFile& dex_file, SirtRef<DexCache>& dex_cache) EXCLUSIVE_LOCKS_REQUIRED(dex_lock_);
+  void RegisterDexFileLocked(const DexFile& dex_file, SirtRef<DexCache>& dex_cache)
+      EXCLUSIVE_LOCKS_REQUIRED(dex_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   bool IsDexFileRegisteredLocked(const DexFile& dex_file) const EXCLUSIVE_LOCKS_REQUIRED(dex_lock_);
-  void RegisterOatFileLocked(const OatFile& oat_file) EXCLUSIVE_LOCKS_REQUIRED(dex_lock_);
+  void RegisterOatFileLocked(const OatFile& oat_file) EXCLUSIVE_LOCKS_REQUIRED(dex_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(dex_lock_);
 
-  bool InitializeClass(Class* klass, bool can_run_clinit, bool can_init_statics);
+  bool InitializeClass(Class* klass, bool can_run_clinit, bool can_init_statics)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   bool WaitForInitializeClass(Class* klass, Thread* self, ObjectLock& lock);
-  bool ValidateSuperClassDescriptors(const Class* klass);
-  bool InitializeSuperClass(Class* klass, bool can_run_clinit, bool can_init_fields);
+  bool ValidateSuperClassDescriptors(const Class* klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  bool InitializeSuperClass(Class* klass, bool can_run_clinit, bool can_init_fields)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   // Initialize static fields, returns true if fields were initialized.
-  bool InitializeStaticFields(Class* klass);
+  bool InitializeStaticFields(Class* klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool IsSameDescriptorInDifferentClassContexts(const char* descriptor,
                                                 const Class* klass1,
-                                                const Class* klass2);
+                                                const Class* klass2)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool IsSameMethodSignatureInDifferentClassContexts(const Method* descriptor,
                                                      const Class* klass1,
-                                                     const Class* klass2);
+                                                     const Class* klass2)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  bool LinkClass(SirtRef<Class>& klass, ObjectArray<Class>* interfaces);
+  bool LinkClass(SirtRef<Class>& klass, ObjectArray<Class>* interfaces)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  bool LinkSuperClass(SirtRef<Class>& klass);
+  bool LinkSuperClass(SirtRef<Class>& klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  bool LoadSuperAndInterfaces(SirtRef<Class>& klass, const DexFile& dex_file);
+  bool LoadSuperAndInterfaces(SirtRef<Class>& klass, const DexFile& dex_file)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  bool LinkMethods(SirtRef<Class>& klass, ObjectArray<Class>* interfaces);
+  bool LinkMethods(SirtRef<Class>& klass, ObjectArray<Class>* interfaces)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  bool LinkVirtualMethods(SirtRef<Class>& klass);
+  bool LinkVirtualMethods(SirtRef<Class>& klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  bool LinkInterfaceMethods(SirtRef<Class>& klass, ObjectArray<Class>* interfaces);
+  bool LinkInterfaceMethods(SirtRef<Class>& klass, ObjectArray<Class>* interfaces)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  bool LinkStaticFields(SirtRef<Class>& klass);
-  bool LinkInstanceFields(SirtRef<Class>& klass);
-  bool LinkFields(SirtRef<Class>& klass, bool is_static);
+  bool LinkStaticFields(SirtRef<Class>& klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  bool LinkInstanceFields(SirtRef<Class>& klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  bool LinkFields(SirtRef<Class>& klass, bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 
-  void CreateReferenceInstanceOffsets(SirtRef<Class>& klass);
-  void CreateReferenceStaticOffsets(SirtRef<Class>& klass);
+  void CreateReferenceInstanceOffsets(SirtRef<Class>& klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void CreateReferenceStaticOffsets(SirtRef<Class>& klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   void CreateReferenceOffsets(SirtRef<Class>& klass, bool is_static,
-                              uint32_t reference_offsets);
+                              uint32_t reference_offsets)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // For use by ImageWriter to find DexCaches for its roots
   const std::vector<DexCache*>& GetDexCaches() {
     return dex_caches_;
   }
 
-  const OatFile* FindOpenedOatFileForDexFile(const DexFile& dex_file);
-  const OatFile* FindOpenedOatFileFromDexLocation(const std::string& dex_location);
-  const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location);
+  const OatFile* FindOpenedOatFileForDexFile(const DexFile& dex_file)
+      LOCKS_EXCLUDED(dex_lock_);
+  const OatFile* FindOpenedOatFileFromDexLocation(const std::string& dex_location)
+      EXCLUSIVE_LOCKS_REQUIRED(dex_lock_);
+  const OatFile* FindOpenedOatFileFromOatLocation(const std::string& oat_location)
+      EXCLUSIVE_LOCKS_REQUIRED(dex_lock_);
   const DexFile* VerifyAndOpenDexFileFromOatFile(const OatFile* oat_file,
                                                  const std::string& dex_location,
                                                  uint32_t dex_location_checksum)
-      EXCLUSIVE_LOCKS_REQUIRED(dex_lock_);
+      EXCLUSIVE_LOCKS_REQUIRED(dex_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* CreateProxyConstructor(SirtRef<Class>& klass, Class* proxy_class);
-  Method* CreateProxyMethod(SirtRef<Class>& klass, SirtRef<Method>& prototype);
+  Method* CreateProxyConstructor(SirtRef<Class>& klass, Class* proxy_class)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  Method* CreateProxyMethod(SirtRef<Class>& klass, SirtRef<Method>& prototype)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   std::vector<const DexFile*> boot_class_path_;
 
-  mutable Mutex dex_lock_;
+  mutable Mutex dex_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::vector<const DexFile*> dex_files_ GUARDED_BY(dex_lock_);
   std::vector<DexCache*> dex_caches_ GUARDED_BY(dex_lock_);
   std::vector<const OatFile*> oat_files_ GUARDED_BY(dex_lock_);
@@ -425,13 +535,14 @@
   // multimap from a string hash code of a class descriptor to
   // Class* instances. Results should be compared for a matching
   // Class::descriptor_ and Class::class_loader_.
-  mutable Mutex classes_lock_;
   typedef std::multimap<size_t, Class*> Table;
-  Table image_classes_  GUARDED_BY(classes_lock_);
-  Table classes_ GUARDED_BY(classes_lock_);
+  Table image_classes_  GUARDED_BY(GlobalSynchronization::classlinker_classes_lock_);
+  Table classes_ GUARDED_BY(GlobalSynchronization::classlinker_classes_lock_);
 
   Class* LookupClassLocked(const char* descriptor, const ClassLoader* class_loader,
-                           size_t hash, const Table& classes) EXCLUSIVE_LOCKS_REQUIRED(classes_lock_);
+                           size_t hash, const Table& classes)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::classlinker_classes_lock_);
 
   // indexes into class_roots_.
   // needs to be kept in sync with class_roots_descriptors_.
@@ -447,8 +558,6 @@
     kJavaLangReflectMethod,
     kJavaLangReflectProxy,
     kJavaLangClassLoader,
-    kDalvikSystemBaseDexClassLoader,
-    kDalvikSystemPathClassLoader,
     kJavaLangThrowable,
     kJavaLangClassNotFoundException,
     kJavaLangStackTraceElement,
@@ -474,14 +583,16 @@
   };
   ObjectArray<Class>* class_roots_;
 
-  Class* GetClassRoot(ClassRoot class_root) {
+  Class* GetClassRoot(ClassRoot class_root)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(class_roots_ != NULL);
     Class* klass = class_roots_->Get(class_root);
     DCHECK(klass != NULL);
     return klass;
   }
 
-  void SetClassRoot(ClassRoot class_root, Class* klass);
+  void SetClassRoot(ClassRoot class_root, Class* klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   ObjectArray<Class>* GetClassRoots() {
     DCHECK(class_roots_ != NULL);
diff --git a/src/class_linker_test.cc b/src/class_linker_test.cc
index a7f9c66..1eb5e0d 100644
--- a/src/class_linker_test.cc
+++ b/src/class_linker_test.cc
@@ -29,7 +29,8 @@
 
 class ClassLinkerTest : public CommonTest {
  protected:
-  void AssertNonExistentClass(const std::string& descriptor) {
+  void AssertNonExistentClass(const std::string& descriptor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     EXPECT_TRUE(class_linker_->FindSystemClass(descriptor.c_str()) == NULL);
     Thread* self = Thread::Current();
     EXPECT_TRUE(self->IsExceptionPending());
@@ -39,11 +40,13 @@
     EXPECT_TRUE(exception->InstanceOf(exception_class));
   }
 
-  void AssertPrimitiveClass(const std::string& descriptor) {
+  void AssertPrimitiveClass(const std::string& descriptor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     AssertPrimitiveClass(descriptor, class_linker_->FindSystemClass(descriptor.c_str()));
   }
 
-  void AssertPrimitiveClass(const std::string& descriptor, const Class* primitive) {
+  void AssertPrimitiveClass(const std::string& descriptor, const Class* primitive)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ClassHelper primitive_ch(primitive);
     ASSERT_TRUE(primitive != NULL);
     ASSERT_TRUE(primitive->GetClass() != NULL);
@@ -79,7 +82,8 @@
 
   void AssertArrayClass(const std::string& array_descriptor,
                         const std::string& component_type,
-                        ClassLoader* class_loader) {
+                        ClassLoader* class_loader)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Class* array = class_linker_->FindClass(array_descriptor.c_str(), class_loader);
     ClassHelper array_component_ch(array->GetComponentType());
     EXPECT_STREQ(component_type.c_str(), array_component_ch.GetDescriptor());
@@ -87,7 +91,8 @@
     AssertArrayClass(array_descriptor, array);
   }
 
-  void AssertArrayClass(const std::string& array_descriptor, Class* array) {
+  void AssertArrayClass(const std::string& array_descriptor, Class* array)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ClassHelper kh(array);
     ASSERT_TRUE(array != NULL);
     ASSERT_TRUE(array->GetClass() != NULL);
@@ -130,7 +135,7 @@
     EXPECT_STREQ(kh.GetDescriptor(), "Ljava/io/Serializable;");
   }
 
-  void AssertMethod(Method* method) {
+  void AssertMethod(Method* method) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     MethodHelper mh(method);
     EXPECT_TRUE(method != NULL);
     EXPECT_TRUE(method->GetClass() != NULL);
@@ -151,7 +156,8 @@
               method->GetDexCacheInitializedStaticStorage());
   }
 
-  void AssertField(Class* klass, Field* field) {
+  void AssertField(Class* klass, Field* field)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     FieldHelper fh(field);
     EXPECT_TRUE(field != NULL);
     EXPECT_TRUE(field->GetClass() != NULL);
@@ -160,7 +166,8 @@
     EXPECT_TRUE(fh.GetType() != NULL);
   }
 
-  void AssertClass(const std::string& descriptor, Class* klass) {
+  void AssertClass(const std::string& descriptor, Class* klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ClassHelper kh(klass);
     EXPECT_STREQ(descriptor.c_str(), kh.GetDescriptor());
     if (descriptor == "Ljava/lang/Object;") {
@@ -283,7 +290,8 @@
               total_num_reference_instance_fields == 0);
   }
 
-  void AssertDexFileClass(ClassLoader* class_loader, const std::string& descriptor) {
+  void AssertDexFileClass(ClassLoader* class_loader, const std::string& descriptor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ASSERT_TRUE(descriptor != NULL);
     Class* klass = class_linker_->FindSystemClass(descriptor.c_str());
     ASSERT_TRUE(klass != NULL);
@@ -298,7 +306,8 @@
     }
   }
 
-  void AssertDexFile(const DexFile* dex, ClassLoader* class_loader) {
+  void AssertDexFile(const DexFile* dex, ClassLoader* class_loader)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ASSERT_TRUE(dex != NULL);
 
     // Verify all the classes defined in this file
@@ -341,7 +350,7 @@
   std::string class_descriptor;
   std::vector<CheckOffset> offsets;
 
-  bool Check() {
+  bool Check() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Class* klass = Runtime::Current()->GetClassLinker()->FindSystemClass(class_descriptor.c_str());
     CHECK(klass != NULL) << class_descriptor;
 
@@ -549,21 +558,6 @@
   };
 };
 
-struct BaseDexClassLoaderOffsets : public CheckOffsets<BaseDexClassLoader> {
-  BaseDexClassLoaderOffsets()
-    : CheckOffsets<BaseDexClassLoader>(false, "Ldalvik/system/BaseDexClassLoader;") {
-    // alphabetical references
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(BaseDexClassLoader, original_library_path_), "originalLibraryPath"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(BaseDexClassLoader, original_path_),         "originalPath"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(BaseDexClassLoader, path_list_),             "pathList"));
-  };
-};
-
-struct PathClassLoaderOffsets : public CheckOffsets<PathClassLoader> {
-  PathClassLoaderOffsets()
-    : CheckOffsets<PathClassLoader>(false, "Ldalvik/system/PathClassLoader;") {}
-};
-
 struct ProxyOffsets : public CheckOffsets<Proxy> {
   ProxyOffsets() : CheckOffsets<Proxy>(false, "Ljava/lang/reflect/Proxy;") {
     // alphabetical references
@@ -614,6 +608,7 @@
 // reorder the fields in the C++ class. Managed class fields are ordered by
 // ClassLinker::LinkFields.
 TEST_F(ClassLinkerTest, ValidateFieldOrderOfJavaCppUnionClasses) {
+  ScopedObjectAccess soa(Thread::Current());
   EXPECT_TRUE(ObjectOffsets().Check());
   EXPECT_TRUE(ConstructorOffsets().Check());
   EXPECT_TRUE(FieldOffsets().Check());
@@ -623,8 +618,6 @@
   EXPECT_TRUE(ThrowableOffsets().Check());
   EXPECT_TRUE(StackTraceElementOffsets().Check());
   EXPECT_TRUE(ClassLoaderOffsets().Check());
-  EXPECT_TRUE(BaseDexClassLoaderOffsets().Check());
-  EXPECT_TRUE(PathClassLoaderOffsets().Check());
   EXPECT_TRUE(ProxyOffsets().Check());
 
   EXPECT_TRUE(ClassClassOffsets().Check());
@@ -634,12 +627,14 @@
 }
 
 TEST_F(ClassLinkerTest, FindClassNonexistent) {
+  ScopedObjectAccess soa(Thread::Current());
   AssertNonExistentClass("NoSuchClass;");
   AssertNonExistentClass("LNoSuchClass;");
 }
 
 TEST_F(ClassLinkerTest, FindClassNested) {
-  SirtRef<ClassLoader> class_loader(LoadDex("Nested"));
+  ScopedObjectAccess soa(Thread::Current());
+  SirtRef<ClassLoader> class_loader(soa.Decode<ClassLoader*>(LoadDex("Nested")));
 
   Class* outer = class_linker_->FindClass("LNested;", class_loader.get());
   ASSERT_TRUE(outer != NULL);
@@ -653,6 +648,7 @@
 }
 
 TEST_F(ClassLinkerTest, FindClass_Primitives) {
+  ScopedObjectAccess soa(Thread::Current());
   const std::string expected("BCDFIJSZV");
   for (int ch = 1; ch < 256; ++ch) {
     std::string descriptor;
@@ -666,6 +662,7 @@
 }
 
 TEST_F(ClassLinkerTest, FindClass) {
+  ScopedObjectAccess soa(Thread::Current());
   Class* JavaLangObject = class_linker_->FindSystemClass("Ljava/lang/Object;");
   ClassHelper kh(JavaLangObject);
   ASSERT_TRUE(JavaLangObject != NULL);
@@ -701,7 +698,7 @@
   EXPECT_EQ(0U, JavaLangObject->NumStaticFields());
   EXPECT_EQ(0U, kh.NumDirectInterfaces());
 
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClass"));
+  SirtRef<ClassLoader> class_loader(soa.Decode<ClassLoader*>(LoadDex("MyClass")));
   AssertNonExistentClass("LMyClass;");
   Class* MyClass = class_linker_->FindClass("LMyClass;", class_loader.get());
   kh.ChangeClass(MyClass);
@@ -746,12 +743,14 @@
 }
 
 TEST_F(ClassLinkerTest, LibCore) {
+  ScopedObjectAccess soa(Thread::Current());
   AssertDexFile(java_lang_dex_file_, NULL);
 }
 
 // The first reference array element must be a multiple of 4 bytes from the
 // start of the object
 TEST_F(ClassLinkerTest, ValidateObjectArrayElementsOffset) {
+  ScopedObjectAccess soa(Thread::Current());
   Class* array_class = class_linker_->FindSystemClass("[Ljava/lang/String;");
   ObjectArray<String>* array = ObjectArray<String>::Alloc(array_class, 0);
   uint32_t array_offset = reinterpret_cast<uint32_t>(array);
@@ -765,6 +764,7 @@
 }
 
 TEST_F(ClassLinkerTest, ValidatePrimitiveArrayElementsOffset) {
+  ScopedObjectAccess soa(Thread::Current());
   SirtRef<LongArray> long_array(LongArray::Alloc(0));
   EXPECT_EQ(class_linker_->FindSystemClass("[J"), long_array->GetClass());
   uintptr_t data_offset = reinterpret_cast<uintptr_t>(long_array->GetData());
@@ -796,6 +796,7 @@
 TEST_F(ClassLinkerTest, ValidateBoxedTypes) {
   // Validate that the "value" field is always the 0th field in each of java.lang's box classes.
   // This lets UnboxPrimitive avoid searching for the field by name at runtime.
+  ScopedObjectAccess soa(Thread::Current());
   Class* c;
   c = class_linker_->FindClass("Ljava/lang/Boolean;", NULL);
   FieldHelper fh(c->GetIFields()->Get(0));
@@ -824,8 +825,9 @@
 }
 
 TEST_F(ClassLinkerTest, TwoClassLoadersOneClass) {
-  SirtRef<ClassLoader> class_loader_1(LoadDex("MyClass"));
-  SirtRef<ClassLoader> class_loader_2(LoadDex("MyClass"));
+  ScopedObjectAccess soa(Thread::Current());
+  SirtRef<ClassLoader> class_loader_1(soa.Decode<ClassLoader*>(LoadDex("MyClass")));
+  SirtRef<ClassLoader> class_loader_2(soa.Decode<ClassLoader*>(LoadDex("MyClass")));
   Class* MyClass_1 = class_linker_->FindClass("LMyClass;", class_loader_1.get());
   Class* MyClass_2 = class_linker_->FindClass("LMyClass;", class_loader_2.get());
   EXPECT_TRUE(MyClass_1 != NULL);
@@ -834,7 +836,8 @@
 }
 
 TEST_F(ClassLinkerTest, StaticFields) {
-  SirtRef<ClassLoader> class_loader(LoadDex("Statics"));
+  ScopedObjectAccess soa(Thread::Current());
+  SirtRef<ClassLoader> class_loader(soa.Decode<ClassLoader*>(LoadDex("Statics")));
   Class* statics = class_linker_->FindClass("LStatics;", class_loader.get());
   class_linker_->EnsureInitialized(statics, true, true);
 
@@ -915,7 +918,8 @@
 }
 
 TEST_F(ClassLinkerTest, Interfaces) {
-  SirtRef<ClassLoader> class_loader(LoadDex("Interfaces"));
+  ScopedObjectAccess soa(Thread::Current());
+  SirtRef<ClassLoader> class_loader(soa.Decode<ClassLoader*>(LoadDex("Interfaces")));
   Class* I = class_linker_->FindClass("LInterfaces$I;", class_loader.get());
   Class* J = class_linker_->FindClass("LInterfaces$J;", class_loader.get());
   Class* K = class_linker_->FindClass("LInterfaces$K;", class_loader.get());
@@ -973,8 +977,10 @@
   // case 1, get the uninitialized storage from StaticsFromCode.<clinit>
   // case 2, get the initialized storage from StaticsFromCode.getS0
 
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticsFromCode"));
-  const DexFile* dex_file = Runtime::Current()->GetCompileTimeClassPath(class_loader.get())[0];
+  ScopedObjectAccess soa(Thread::Current());
+  jobject jclass_loader = LoadDex("StaticsFromCode");
+  SirtRef<ClassLoader> class_loader(soa.Decode<ClassLoader*>(jclass_loader));
+  const DexFile* dex_file = Runtime::Current()->GetCompileTimeClassPath(jclass_loader)[0];
   CHECK(dex_file != NULL);
 
   Class* klass = class_linker_->FindClass("LStaticsFromCode;", class_loader.get());
@@ -995,6 +1001,7 @@
 }
 
 TEST_F(ClassLinkerTest, FinalizableBit) {
+  ScopedObjectAccess soa(Thread::Current());
   Class* c;
 
   // Object has a finalize method, but we know it's empty.
@@ -1028,6 +1035,7 @@
 }
 
 TEST_F(ClassLinkerTest, ClassRootDescriptors) {
+  ScopedObjectAccess soa(Thread::Current());
   ClassHelper kh;
   for (int i = 0; i < ClassLinker::kClassRootsMax; i++) {
     Class* klass = class_linker_->GetClassRoot(ClassLinker::ClassRoot(i));
diff --git a/src/class_loader.cc b/src/class_loader.cc
deleted file mode 100644
index 3adb4ec..0000000
--- a/src/class_loader.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "class_loader.h"
-
-#include "class_linker.h"
-#include "runtime.h"
-
-namespace art {
-
-// TODO: get global references for these
-Class* PathClassLoader::dalvik_system_PathClassLoader_ = NULL;
-
-PathClassLoader* PathClassLoader::AllocCompileTime(std::vector<const DexFile*>& dex_files) {
-  CHECK(!Runtime::Current()->IsStarted());
-  DCHECK(dalvik_system_PathClassLoader_ != NULL);
-  SirtRef<PathClassLoader> p(down_cast<PathClassLoader*>(dalvik_system_PathClassLoader_->AllocObject()));
-  Runtime::Current()->SetCompileTimeClassPath(p.get(), dex_files);
-  return p.get();
-}
-
-void PathClassLoader::SetClass(Class* dalvik_system_PathClassLoader) {
-  CHECK(dalvik_system_PathClassLoader_ == NULL);
-  CHECK(dalvik_system_PathClassLoader != NULL);
-  dalvik_system_PathClassLoader_ = dalvik_system_PathClassLoader;
-}
-
-void PathClassLoader::ResetClass() {
-  CHECK(dalvik_system_PathClassLoader_ != NULL);
-  dalvik_system_PathClassLoader_ = NULL;
-}
-
-}  // namespace art
diff --git a/src/class_loader.h b/src/class_loader.h
index 0e1148d..029c4a2 100644
--- a/src/class_loader.h
+++ b/src/class_loader.h
@@ -36,30 +36,6 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(ClassLoader);
 };
 
-// C++ mirror of dalvik.system.BaseDexClassLoader
-class MANAGED BaseDexClassLoader : public ClassLoader {
- private:
-  // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  String* original_library_path_;
-  String* original_path_;
-  Object* path_list_;
-
-  friend struct BaseDexClassLoaderOffsets;  // for verifying offset information
-  DISALLOW_IMPLICIT_CONSTRUCTORS(BaseDexClassLoader);
-};
-
-// C++ mirror of dalvik.system.PathClassLoader
-class MANAGED PathClassLoader : public BaseDexClassLoader {
- public:
-  static PathClassLoader* AllocCompileTime(std::vector<const DexFile*>& dex_files);
-  static void SetClass(Class* dalvik_system_PathClassLoader);
-  static void ResetClass();
- private:
-  static Class* dalvik_system_PathClassLoader_;
-  friend struct PathClassLoaderOffsets;  // for verifying offset information
-  DISALLOW_IMPLICIT_CONSTRUCTORS(PathClassLoader);
-};
-
 }  // namespace art
 
 #endif  // ART_SRC_CLASS_LOADER_H_
diff --git a/src/common_test.h b/src/common_test.h
index 58d0219..41dc76c 100644
--- a/src/common_test.h
+++ b/src/common_test.h
@@ -33,12 +33,15 @@
 #include "object_utils.h"
 #include "os.h"
 #include "runtime.h"
+#include "ScopedLocalRef.h"
+#include "scoped_thread_state_change.h"
 #include "stl_util.h"
 #include "stringprintf.h"
 #include "thread.h"
 #include "unicode/uclean.h"
 #include "unicode/uvernum.h"
 #include "UniquePtr.h"
+#include "well_known_classes.h"
 
 namespace art {
 
@@ -206,7 +209,7 @@
                                 );
   }
 
-  void MakeExecutable(Method* method) {
+  void MakeExecutable(Method* method) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK(method != NULL);
 
     MethodHelper mh(method);
@@ -328,7 +331,17 @@
     options.push_back(std::make_pair("-Xcheck:jni", reinterpret_cast<void*>(NULL)));
     options.push_back(std::make_pair(min_heap_string.c_str(), reinterpret_cast<void*>(NULL)));
     options.push_back(std::make_pair(max_heap_string.c_str(), reinterpret_cast<void*>(NULL)));
-    runtime_.reset(Runtime::Create(options, false));
+    if(!Runtime::Create(options, false)) {
+      LOG(FATAL) << "Failed to create runtime";
+      return;
+    }
+    runtime_.reset(Runtime::Current());
+    // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
+    // give it away now and then switch to a more managable ScopedObjectAccess.
+    Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+    // Whilst we're in native take the opportunity to initialize well known classes.
+    WellKnownClasses::InitClasses(Thread::Current()->GetJniEnv());
+    ScopedObjectAccess soa(Thread::Current());
     ASSERT_TRUE(runtime_.get() != NULL);
     class_linker_ = runtime_->GetClassLinker();
 
@@ -362,7 +375,7 @@
     compiler_.reset(new Compiler(instruction_set, true, 2, false, image_classes_.get(),
                                  true, true));
 
-    Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption before the test
+    runtime_->GetHeap()->VerifyHeap();  // Check for heap corruption before the test
   }
 
   virtual void TearDown() {
@@ -436,16 +449,20 @@
     return dex_file;
   }
 
-  ClassLoader* LoadDex(const char* dex_name) {
+  jobject LoadDex(const char* dex_name)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile* dex_file = OpenTestDexFile(dex_name);
     CHECK(dex_file != NULL);
     class_linker_->RegisterDexFile(*dex_file);
     std::vector<const DexFile*> class_path;
     class_path.push_back(dex_file);
-    SirtRef<ClassLoader> class_loader(PathClassLoader::AllocCompileTime(class_path));
-    CHECK(class_loader.get() != NULL);
-    Thread::Current()->SetClassLoaderOverride(class_loader.get());
-    return class_loader.get();
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    ScopedLocalRef<jobject> class_loader_local(soa.Env(),
+        soa.Env()->AllocObject(WellKnownClasses::dalvik_system_PathClassLoader));
+    jobject class_loader = soa.Env()->NewGlobalRef(class_loader_local.get());
+    soa.Self()->SetClassLoaderOverride(soa.Decode<ClassLoader*>(class_loader_local.get()));
+    Runtime::Current()->SetCompileTimeClassPath(class_loader, class_path);
+    return class_loader;
   }
 
   void CompileClass(ClassLoader* class_loader, const char* class_name) {
@@ -460,7 +477,7 @@
     }
   }
 
-  void CompileMethod(Method* method) {
+  void CompileMethod(Method* method) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK(method != NULL);
     compiler_->CompileOne(method);
     MakeExecutable(method);
@@ -471,7 +488,8 @@
   void CompileDirectMethod(ClassLoader* class_loader,
                            const char* class_name,
                            const char* method_name,
-                           const char* signature) {
+                           const char* signature)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
     Class* klass = class_linker_->FindClass(class_descriptor.c_str(), class_loader);
     CHECK(klass != NULL) << "Class not found " << class_name;
@@ -484,7 +502,8 @@
   void CompileVirtualMethod(ClassLoader* class_loader,
                             const char* class_name,
                             const char* method_name,
-                            const char* signature) {
+                            const char* signature)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     std::string class_descriptor(DotToDescriptor(class_name));
     Class* klass = class_linker_->FindClass(class_descriptor.c_str(), class_loader);
     CHECK(klass != NULL) << "Class not found " << class_name;
diff --git a/src/compiler.cc b/src/compiler.cc
index ceb9d11..bcbb77d 100644
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -27,9 +27,12 @@
 #include "jni_internal.h"
 #include "oat_compilation_unit.h"
 #include "oat_file.h"
+#include "oat/runtime/stub.h"
 #include "object_utils.h"
 #include "runtime.h"
 #include "space.h"
+#include "scoped_thread_state_change.h"
+#include "ScopedLocalRef.h"
 #include "stl_util.h"
 #include "timing_logger.h"
 #include "verifier/method_verifier.h"
@@ -40,17 +43,6 @@
 
 namespace art {
 
-namespace arm {
-  ByteArray* CreateAbstractMethodErrorStub();
-  ByteArray* ArmCreateResolutionTrampoline(Runtime::TrampolineType type);
-  ByteArray* CreateJniDlsymLookupStub();
-}
-namespace x86 {
-  ByteArray* CreateAbstractMethodErrorStub();
-  ByteArray* X86CreateResolutionTrampoline(Runtime::TrampolineType type);
-  ByteArray* CreateJniDlsymLookupStub();
-}
-
 static double Percentage(size_t x, size_t y) {
   return 100.0 * (static_cast<double>(x)) / (static_cast<double>(x + y));
 }
@@ -311,6 +303,7 @@
       image_(image),
       thread_count_(thread_count),
       support_debugging_(support_debugging),
+      start_ns_(0),
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
       dump_timings_(dump_timings),
@@ -435,7 +428,7 @@
   }
 }
 
-void Compiler::CompileAll(ClassLoader* class_loader,
+void Compiler::CompileAll(jobject class_loader,
                           const std::vector<const DexFile*>& dex_files) {
   DCHECK(!Runtime::Current()->IsStarted());
 
@@ -464,27 +457,41 @@
 
 void Compiler::CompileOne(const Method* method) {
   DCHECK(!Runtime::Current()->IsStarted());
+  Thread* self = Thread::Current();
+  jobject class_loader;
+  const DexCache* dex_cache;
+  const DexFile* dex_file;
+  {
+    ScopedObjectAccessUnchecked soa(self);
+    ScopedLocalRef<jobject>
+      local_class_loader(soa.Env(),
+                    soa.AddLocalReference<jobject>(method->GetDeclaringClass()->GetClassLoader()));
+    class_loader = soa.Env()->NewGlobalRef(local_class_loader.get());
+    // Find the dex_file
+    dex_cache = method->GetDeclaringClass()->GetDexCache();
+    dex_file = &Runtime::Current()->GetClassLinker()->FindDexFile(dex_cache);
+  }
+  self->TransitionFromRunnableToSuspended(kNative);
 
-  ClassLoader* class_loader = method->GetDeclaringClass()->GetClassLoader();
-
-  // Find the dex_file
-  const DexCache* dex_cache = method->GetDeclaringClass()->GetDexCache();
-  const DexFile& dex_file = Runtime::Current()->GetClassLinker()->FindDexFile(dex_cache);
   std::vector<const DexFile*> dex_files;
-  dex_files.push_back(&dex_file);
+  dex_files.push_back(dex_file);
 
   TimingLogger timings("CompileOne");
   PreCompile(class_loader, dex_files, timings);
 
   uint32_t method_idx = method->GetDexMethodIndex();
-  const DexFile::CodeItem* code_item = dex_file.GetCodeItem(method->GetCodeItemOffset());
-  CompileMethod(code_item, method->GetAccessFlags(), method_idx, class_loader, dex_file);
+  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
+  CompileMethod(code_item, method->GetAccessFlags(), method_idx, class_loader, *dex_file);
 
   PostCompile(class_loader, dex_files);
+
+  self->GetJniEnv()->DeleteGlobalRef(class_loader);
+
+  self->TransitionFromSuspendedToRunnable();
 }
 
-void Compiler::Resolve(ClassLoader* class_loader,
-                       const std::vector<const DexFile*>& dex_files, TimingLogger& timings) {
+void Compiler::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
+                       TimingLogger& timings) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -492,8 +499,8 @@
   }
 }
 
-void Compiler::PreCompile(ClassLoader* class_loader,
-                          const std::vector<const DexFile*>& dex_files, TimingLogger& timings) {
+void Compiler::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
+                          TimingLogger& timings) {
   Resolve(class_loader, dex_files, timings);
 
   Verify(class_loader, dex_files);
@@ -503,8 +510,7 @@
   timings.AddSplit("PreCompile.InitializeClassesWithoutClinit");
 }
 
-void Compiler::PostCompile(ClassLoader* class_loader,
-                           const std::vector<const DexFile*>& dex_files) {
+void Compiler::PostCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files) {
   SetGcMaps(class_loader, dex_files);
 }
 
@@ -515,8 +521,10 @@
   return image_classes_->find(descriptor) != image_classes_->end();
 }
 
-bool Compiler::CanAssumeTypeIsPresentInDexCache(const DexCache* dex_cache,
+bool Compiler::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file,
                                                 uint32_t type_idx) {
+  ScopedObjectAccess soa(Thread::Current());
+  DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
   if (!IsImage()) {
     stats_->TypeNotInDexCache();
     return false;
@@ -535,14 +543,19 @@
   return result;
 }
 
-bool Compiler::CanAssumeStringIsPresentInDexCache(const DexCache* dex_cache,
+bool Compiler::CanAssumeStringIsPresentInDexCache(const DexFile& dex_file,
                                                   uint32_t string_idx) {
   // TODO: Add support for loading strings referenced by image_classes_
   // See also Compiler::ResolveDexFile
 
   // The following is a test saying that if we're building the image without a restricted set of
   // image classes then we can assume the string is present in the dex cache if it is there now
-  bool result = IsImage() && image_classes_ == NULL && dex_cache->GetResolvedString(string_idx) != NULL;
+  bool result = IsImage() && image_classes_ == NULL;
+  if (result) {
+    ScopedObjectAccess soa(Thread::Current());
+    DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+    result = dex_cache->GetResolvedString(string_idx) != NULL;
+  }
   if (result) {
     stats_->StringInDexCache();
   } else {
@@ -551,8 +564,10 @@
   return result;
 }
 
-bool Compiler::CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexCache* dex_cache,
-                                          const DexFile& dex_file, uint32_t type_idx) {
+bool Compiler::CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
+                                          uint32_t type_idx) {
+  ScopedObjectAccess soa(Thread::Current());
+  DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
   // Get type from dex cache assuming it was populated by the verifier
   Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == NULL) {
@@ -577,9 +592,10 @@
 }
 
 bool Compiler::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx,
-                                                      const DexCache* dex_cache,
                                                       const DexFile& dex_file,
                                                       uint32_t type_idx) {
+  ScopedObjectAccess soa(Thread::Current());
+  DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
   // Get type from dex cache assuming it was populated by the verifier.
   Class* resolved_class = dex_cache->GetResolvedType(type_idx);
   if (resolved_class == NULL) {
@@ -603,36 +619,44 @@
   return result;
 }
 
-static Class* ComputeReferrerClass(OatCompilationUnit* mUnit) {
-  const DexFile::MethodId& referrer_method_id =
-    mUnit->dex_file_->GetMethodId(mUnit->method_idx_);
-
-  return mUnit->class_linker_->ResolveType(
-    *mUnit->dex_file_, referrer_method_id.class_idx_,
-    mUnit->dex_cache_, mUnit->class_loader_);
+static Class* ComputeReferrerClass(ScopedObjectAccess& soa,
+                                   OatCompilationUnit* mUnit)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  DexCache* dex_cache = mUnit->class_linker_->FindDexCache(*mUnit->dex_file_);
+  ClassLoader* class_loader = soa.Decode<ClassLoader*>(mUnit->class_loader_);
+  const DexFile::MethodId& referrer_method_id = mUnit->dex_file_->GetMethodId(mUnit->method_idx_);
+  return mUnit->class_linker_->ResolveType(*mUnit->dex_file_, referrer_method_id.class_idx_,
+                                           dex_cache, class_loader);
 }
 
-static Field* ComputeReferrerField(OatCompilationUnit* mUnit, uint32_t field_idx) {
-  return mUnit->class_linker_->ResolveField(
-    *mUnit->dex_file_, field_idx, mUnit->dex_cache_,
-    mUnit->class_loader_, false);
+static Field* ComputeReferrerField(ScopedObjectAccess& soa,
+                                   OatCompilationUnit* mUnit, uint32_t field_idx)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  DexCache* dex_cache = mUnit->class_linker_->FindDexCache(*mUnit->dex_file_);
+  ClassLoader* class_loader = soa.Decode<ClassLoader*>(mUnit->class_loader_);
+  return mUnit->class_linker_->ResolveField(*mUnit->dex_file_, field_idx, dex_cache,
+                                            class_loader, false);
 }
 
-static Method* ComputeReferrerMethod(OatCompilationUnit* mUnit, uint32_t method_idx) {
-  return mUnit->class_linker_->ResolveMethod(
-    *mUnit->dex_file_, method_idx, mUnit->dex_cache_,
-    mUnit->class_loader_, true);
+static Method* ComputeReferrerMethod(ScopedObjectAccess& soa,
+                                     OatCompilationUnit* mUnit, uint32_t method_idx)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  DexCache* dex_cache = mUnit->class_linker_->FindDexCache(*mUnit->dex_file_);
+  ClassLoader* class_loader = soa.Decode<ClassLoader*>(mUnit->class_loader_);
+  return mUnit->class_linker_->ResolveMethod(*mUnit->dex_file_, method_idx, dex_cache,
+                                             class_loader, true);
 }
 
 bool Compiler::ComputeInstanceFieldInfo(uint32_t field_idx, OatCompilationUnit* mUnit,
                                         int& field_offset, bool& is_volatile, bool is_put) {
+  ScopedObjectAccess soa(Thread::Current());
   // Conservative defaults
   field_offset = -1;
   is_volatile = true;
   // Try to resolve field
-  Field* resolved_field = ComputeReferrerField(mUnit, field_idx);
+  Field* resolved_field = ComputeReferrerField(soa, mUnit, field_idx);
   if (resolved_field != NULL) {
-    Class* referrer_class = ComputeReferrerClass(mUnit);
+    Class* referrer_class = ComputeReferrerClass(soa, mUnit);
     if (referrer_class != NULL) {
       Class* fields_class = resolved_field->GetDeclaringClass();
       bool access_ok = referrer_class->CanAccess(fields_class) &&
@@ -661,9 +685,8 @@
     }
   }
   // Clean up any exception left by field/type resolution
-  Thread* thread = Thread::Current();
-  if (thread->IsExceptionPending()) {
-      thread->ClearException();
+  if (soa.Self()->IsExceptionPending()) {
+    soa.Self()->ClearException();
   }
   stats_->UnresolvedInstanceField();
   return false;  // Incomplete knowledge needs slow path.
@@ -672,16 +695,17 @@
 bool Compiler::ComputeStaticFieldInfo(uint32_t field_idx, OatCompilationUnit* mUnit,
                                       int& field_offset, int& ssb_index,
                                       bool& is_referrers_class, bool& is_volatile, bool is_put) {
+  ScopedObjectAccess soa(Thread::Current());
   // Conservative defaults
   field_offset = -1;
   ssb_index = -1;
   is_referrers_class = false;
   is_volatile = true;
   // Try to resolve field
-  Field* resolved_field = ComputeReferrerField(mUnit, field_idx);
+  Field* resolved_field = ComputeReferrerField(soa, mUnit, field_idx);
   if (resolved_field != NULL) {
     DCHECK(resolved_field->IsStatic());
-    Class* referrer_class = ComputeReferrerClass(mUnit);
+    Class* referrer_class = ComputeReferrerClass(soa, mUnit);
     if (referrer_class != NULL) {
       Class* fields_class = resolved_field->GetDeclaringClass();
       if (fields_class == referrer_class) {
@@ -714,7 +738,8 @@
           // in its static storage base (which may fail if it doesn't have a slot for it)
           // TODO: for images we can elide the static storage base null check
           // if we know there's a non-null entry in the image
-          if (fields_class->GetDexCache() == mUnit->dex_cache_) {
+          DexCache* dex_cache = mUnit->class_linker_->FindDexCache(*mUnit->dex_file_);
+          if (fields_class->GetDexCache() == dex_cache) {
             // common case where the dex cache of both the referrer and the field are the same,
             // no need to search the dex file
             ssb_index = fields_class->GetDexTypeIndex();
@@ -745,9 +770,8 @@
     }
   }
   // Clean up any exception left by field/type resolution
-  Thread* thread = Thread::Current();
-  if (thread->IsExceptionPending()) {
-      thread->ClearException();
+  if (soa.Self()->IsExceptionPending()) {
+    soa.Self()->ClearException();
   }
   stats_->UnresolvedStaticField();
   return false;  // Incomplete knowledge needs slow path.
@@ -793,12 +817,13 @@
 bool Compiler::ComputeInvokeInfo(uint32_t method_idx, OatCompilationUnit* mUnit, InvokeType& type,
                                  int& vtable_idx, uintptr_t& direct_code,
                                  uintptr_t& direct_method) {
+  ScopedObjectAccess soa(Thread::Current());
   vtable_idx = -1;
   direct_code = 0;
   direct_method = 0;
-  Method* resolved_method = ComputeReferrerMethod(mUnit, method_idx);
+  Method* resolved_method = ComputeReferrerMethod(soa, mUnit, method_idx);
   if (resolved_method != NULL) {
-    Class* referrer_class = ComputeReferrerClass(mUnit);
+    Class* referrer_class = ComputeReferrerClass(soa, mUnit);
     if (referrer_class != NULL) {
       Class* methods_class = resolved_method->GetDeclaringClass();
       if (!referrer_class->CanAccess(methods_class) ||
@@ -847,40 +872,35 @@
     }
   }
   // Clean up any exception left by method/type resolution
-  Thread* thread = Thread::Current();
-  if (thread->IsExceptionPending()) {
-      thread->ClearException();
+  if (soa.Self()->IsExceptionPending()) {
+      soa.Self()->ClearException();
   }
   stats_->UnresolvedMethod(type);
   return false;  // Incomplete knowledge needs slow path.
 }
 
-void Compiler::AddCodePatch(DexCache* dex_cache,
-                            const DexFile* dex_file,
+void Compiler::AddCodePatch(const DexFile* dex_file,
                             uint32_t referrer_method_idx,
                             uint32_t referrer_access_flags,
                             uint32_t target_method_idx,
                             bool target_is_direct,
                             size_t literal_offset) {
   MutexLock mu(compiled_methods_lock_);
-  code_to_patch_.push_back(new PatchInformation(dex_cache,
-                                                dex_file,
+  code_to_patch_.push_back(new PatchInformation(dex_file,
                                                 referrer_method_idx,
                                                 referrer_access_flags,
                                                 target_method_idx,
                                                 target_is_direct,
                                                 literal_offset));
 }
-void Compiler::AddMethodPatch(DexCache* dex_cache,
-                              const DexFile* dex_file,
+void Compiler::AddMethodPatch(const DexFile* dex_file,
                               uint32_t referrer_method_idx,
                               uint32_t referrer_access_flags,
                               uint32_t target_method_idx,
                               bool target_is_direct,
                               size_t literal_offset) {
   MutexLock mu(compiled_methods_lock_);
-  methods_to_patch_.push_back(new PatchInformation(dex_cache,
-                                                   dex_file,
+  methods_to_patch_.push_back(new PatchInformation(dex_file,
                                                    referrer_method_idx,
                                                    referrer_access_flags,
                                                    target_method_idx,
@@ -888,73 +908,47 @@
                                                    literal_offset));
 }
 
-// Return true if the class should be skipped during compilation. We
-// never skip classes in the boot class loader. However, if we have a
-// non-boot class loader and we can resolve the class in the boot
-// class loader, we do skip the class. This happens if an app bundles
-// classes found in the boot classpath. Since at runtime we will
-// select the class from the boot classpath, do not attempt to resolve
-// or compile it now.
-static bool SkipClass(ClassLoader* class_loader,
-                      const DexFile& dex_file,
-                      const DexFile::ClassDef& class_def) {
-  if (class_loader == NULL) {
-    return false;
-  }
-  const char* descriptor = dex_file.GetClassDescriptor(class_def);
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  Class* klass = class_linker->FindClass(descriptor, NULL);
-  if (klass == NULL) {
-    Thread* self = Thread::Current();
-    CHECK(self->IsExceptionPending());
-    self->ClearException();
-    return false;
-  }
-  return true;
-}
-
 class CompilationContext {
  public:
   CompilationContext(ClassLinker* class_linker,
-          ClassLoader* class_loader,
+          jobject class_loader,
           Compiler* compiler,
-          DexCache* dex_cache,
           const DexFile* dex_file)
     : class_linker_(class_linker),
       class_loader_(class_loader),
       compiler_(compiler),
-      dex_cache_(dex_cache),
       dex_file_(dex_file) {}
 
-  ClassLinker* GetClassLinker() {
+  ClassLinker* GetClassLinker() const {
     CHECK(class_linker_ != NULL);
     return class_linker_;
   }
-  ClassLoader* GetClassLoader() {
+
+  jobject GetClassLoader() const {
     return class_loader_;
   }
-  Compiler* GetCompiler() {
+
+  Compiler* GetCompiler() const {
     CHECK(compiler_ != NULL);
     return compiler_;
   }
-  DexCache* GetDexCache() {
-    CHECK(dex_cache_ != NULL);
-    return dex_cache_;
-  }
-  const DexFile* GetDexFile() {
+
+  const DexFile* GetDexFile() const {
     CHECK(dex_file_ != NULL);
     return dex_file_;
   }
 
  private:
-  ClassLinker* class_linker_;
-  ClassLoader* class_loader_;
-  Compiler* compiler_;
-  DexCache* dex_cache_;
-  const DexFile* dex_file_;
+  ClassLinker* const class_linker_;
+  const jobject class_loader_;
+  Compiler* const compiler_;
+  const DexFile* const dex_file_;
 };
 
-typedef void Callback(CompilationContext* context, size_t index);
+typedef void Callback(const CompilationContext* context, size_t index);
+
+static void ForAll(CompilationContext* context, size_t begin, size_t end, Callback callback,
+                   size_t thread_count);
 
 class WorkerThread {
  public:
@@ -977,48 +971,49 @@
   }
 
  private:
-  static void* Go(void* arg) {
+  static void* Go(void* arg) LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
     WorkerThread* worker = reinterpret_cast<WorkerThread*>(arg);
     Runtime* runtime = Runtime::Current();
     if (worker->spawn_) {
       runtime->AttachCurrentThread("Compiler Worker", true, NULL);
     }
-    Thread::Current()->SetState(kRunnable);
     worker->Run();
     if (worker->spawn_) {
-      Thread::Current()->SetState(kNative);
       runtime->DetachCurrentThread();
     }
     return NULL;
   }
 
-  void Go() {
+  void Go() LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
     Go(this);
   }
 
-  void Run() {
+  void Run() LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
     Thread* self = Thread::Current();
     for (size_t i = begin_; i < end_; i += stripe_) {
       callback_(context_, i);
-      CHECK(!self->IsExceptionPending()) << PrettyTypeOf(self->GetException()) << " " << i;
+      self->AssertNoPendingException();
     }
   }
 
   pthread_t pthread_;
-  bool spawn_;
+  // Was this thread spawned or is it the main thread?
+  const bool spawn_;
 
-  CompilationContext* context_;
-  size_t begin_;
-  size_t end_;
-  Callback* callback_;
-  size_t stripe_;
+  const CompilationContext* const context_;
+  const size_t begin_;
+  const size_t end_;
+  const Callback* callback_;
+  const size_t stripe_;
 
   friend void ForAll(CompilationContext*, size_t, size_t, Callback, size_t);
 };
 
-void ForAll(CompilationContext* context, size_t begin, size_t end, Callback callback, size_t thread_count) {
+static void ForAll(CompilationContext* context, size_t begin, size_t end, Callback callback,
+                   size_t thread_count)
+    LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
   Thread* self = Thread::Current();
-  CHECK(!self->IsExceptionPending()) << PrettyTypeOf(self->GetException());
+  self->AssertNoPendingException();
   CHECK_GT(thread_count, 0U);
 
   std::vector<WorkerThread*> threads;
@@ -1027,12 +1022,45 @@
   }
   threads[0]->Go();
 
-  // Switch to kVmWait while we're blocked waiting for the other threads to finish.
-  ScopedThreadStateChange tsc(self, kVmWait);
+  // Ensure we're suspended while we're blocked waiting for the other threads to finish (worker
+  // thread destructor's called below perform join).
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_NE(self->GetState(), kRunnable);
+  }
   STLDeleteElements(&threads);
 }
 
-static void ResolveClassFieldsAndMethods(CompilationContext* context, size_t class_def_index) {
+// Return true if the class should be skipped during compilation. We
+// never skip classes in the boot class loader. However, if we have a
+// non-boot class loader and we can resolve the class in the boot
+// class loader, we do skip the class. This happens if an app bundles
+// classes found in the boot classpath. Since at runtime we will
+// select the class from the boot classpath, do not attempt to resolve
+// or compile it now.
+static bool SkipClass(ClassLoader* class_loader,
+                      const DexFile& dex_file,
+                      const DexFile::ClassDef& class_def)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  if (class_loader == NULL) {
+    return false;
+  }
+  const char* descriptor = dex_file.GetClassDescriptor(class_def);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Class* klass = class_linker->FindClass(descriptor, NULL);
+  if (klass == NULL) {
+    Thread* self = Thread::Current();
+    CHECK(self->IsExceptionPending());
+    self->ClearException();
+    return false;
+  }
+  return true;
+}
+
+static void ResolveClassFieldsAndMethods(const CompilationContext* context, size_t class_def_index)
+    LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
+  ScopedObjectAccess soa(Thread::Current());
+  ClassLoader* class_loader = soa.Decode<ClassLoader*>(context->GetClassLoader());
   const DexFile& dex_file = *context->GetDexFile();
 
   // Method and Field are the worst. We can't resolve without either
@@ -1043,7 +1071,7 @@
   // definitions, since many of them many never be referenced by
   // generated code.
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
-  if (SkipClass(context->GetClassLoader(), dex_file, class_def)) {
+  if (SkipClass(class_loader, dex_file, class_def)) {
     return;
   }
 
@@ -1061,7 +1089,7 @@
   ClassDataItemIterator it(dex_file, class_data);
   while (it.HasNextStaticField()) {
     Field* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(), dex_cache,
-                                              context->GetClassLoader(), true);
+                                              class_loader, true);
     if (field == NULL) {
       CHECK(self->IsExceptionPending());
       self->ClearException();
@@ -1070,7 +1098,7 @@
   }
   while (it.HasNextInstanceField()) {
     Field* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(), dex_cache,
-                                              context->GetClassLoader(), false);
+                                              class_loader, false);
     if (field == NULL) {
       CHECK(self->IsExceptionPending());
       self->ClearException();
@@ -1079,7 +1107,7 @@
   }
   while (it.HasNextDirectMethod()) {
     Method* method = class_linker->ResolveMethod(dex_file, it.GetMemberIndex(), dex_cache,
-                                                 context->GetClassLoader(), true);
+                                                 class_loader, true);
     if (method == NULL) {
       CHECK(self->IsExceptionPending());
       self->ClearException();
@@ -1088,7 +1116,7 @@
   }
   while (it.HasNextVirtualMethod()) {
     Method* method = class_linker->ResolveMethod(dex_file, it.GetMemberIndex(), dex_cache,
-                                                 context->GetClassLoader(), false);
+                                                 class_loader, false);
     if (method == NULL) {
       CHECK(self->IsExceptionPending());
       self->ClearException();
@@ -1098,43 +1126,38 @@
   DCHECK(!it.HasNext());
 }
 
-static void ResolveType(CompilationContext* context, size_t type_idx) {
+static void ResolveType(const CompilationContext* context, size_t type_idx)
+    LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
   // Class derived values are more complicated, they require the linker and loader.
-  Thread* self = Thread::Current();
-  Class* klass = context->GetClassLinker()->ResolveType(*context->GetDexFile(),
-                                                        type_idx,
-                                                        context->GetDexCache(),
-                                                        context->GetClassLoader());
+  ScopedObjectAccess soa(Thread::Current());
+  ClassLinker* class_linker = context->GetClassLinker();
+  const DexFile& dex_file = *context->GetDexFile();
+  DexCache* dex_cache = class_linker->FindDexCache(dex_file);
+  ClassLoader* class_loader = soa.Decode<ClassLoader*>(context->GetClassLoader());
+  Class* klass = class_linker->ResolveType(dex_file, type_idx, dex_cache, class_loader);
+
   if (klass == NULL) {
-    CHECK(self->IsExceptionPending());
+    CHECK(soa.Self()->IsExceptionPending());
     Thread::Current()->ClearException();
   }
 }
 
-void Compiler::ResolveDexFile(ClassLoader* class_loader, const DexFile& dex_file, TimingLogger& timings) {
+void Compiler::ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+                              TimingLogger& timings) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  DexCache* dex_cache = class_linker->FindDexCache(dex_file);
 
-  // Strings are easy in that they always are simply resolved to literals in the same file
-  if (image_ && image_classes_ == NULL) {
-    // TODO: Add support for loading strings referenced by image_classes_
-    // See also Compiler::CanAssumeTypeIsPresentInDexCache.
-    for (size_t string_idx = 0; string_idx < dex_cache->NumStrings(); string_idx++) {
-      class_linker->ResolveString(dex_file, string_idx, dex_cache);
-    }
-    timings.AddSplit("Resolve " + dex_file.GetLocation() + " Strings");
-  }
+  // TODO: we could resolve strings here, although the string table is largely filled with class
+  //       and method names.
 
-  CompilationContext context(class_linker, class_loader, this, dex_cache, &dex_file);
-  ForAll(&context, 0, dex_cache->NumResolvedTypes(), ResolveType, thread_count_);
+  CompilationContext context(class_linker, class_loader, this, &dex_file);
+  ForAll(&context, 0, dex_file.NumTypeIds(), ResolveType, thread_count_);
   timings.AddSplit("Resolve " + dex_file.GetLocation() + " Types");
 
   ForAll(&context, 0, dex_file.NumClassDefs(), ResolveClassFieldsAndMethods, thread_count_);
   timings.AddSplit("Resolve " + dex_file.GetLocation() + " MethodsAndFields");
 }
 
-void Compiler::Verify(ClassLoader* class_loader,
-                      const std::vector<const DexFile*>& dex_files) {
+void Compiler::Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -1142,10 +1165,14 @@
   }
 }
 
-static void VerifyClass(CompilationContext* context, size_t class_def_index) {
+static void VerifyClass(const CompilationContext* context, size_t class_def_index)
+    LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
+  ScopedObjectAccess soa(Thread::Current());
   const DexFile::ClassDef& class_def = context->GetDexFile()->GetClassDef(class_def_index);
   const char* descriptor = context->GetDexFile()->GetClassDescriptor(class_def);
-  Class* klass = context->GetClassLinker()->FindClass(descriptor, context->GetClassLoader());
+  Class* klass =
+      context->GetClassLinker()->FindClass(descriptor,
+                                           soa.Decode<ClassLoader*>(context->GetClassLoader()));
   if (klass == NULL) {
     Thread* self = Thread::Current();
     CHECK(self->IsExceptionPending());
@@ -1156,9 +1183,13 @@
      * This is to ensure the class is structurally sound for compilation. An unsound class
      * will be rejected by the verifier and later skipped during compilation in the compiler.
      */
+    DexCache* dex_cache =  context->GetClassLinker()->FindDexCache(*context->GetDexFile());
     std::string error_msg;
-    if (verifier::MethodVerifier::VerifyClass(context->GetDexFile(), context->GetDexCache(),
-        context->GetClassLoader(), class_def_index, error_msg) == verifier::MethodVerifier::kHardFailure) {
+    if (verifier::MethodVerifier::VerifyClass(context->GetDexFile(),
+                                              dex_cache,
+                                              soa.Decode<ClassLoader*>(context->GetClassLoader()),
+                                              class_def_index, error_msg) ==
+                                                  verifier::MethodVerifier::kHardFailure) {
       const DexFile::ClassDef& class_def = context->GetDexFile()->GetClassDef(class_def_index);
       LOG(ERROR) << "Verification failed on class "
                  << PrettyDescriptor(context->GetDexFile()->GetClassDescriptor(class_def))
@@ -1173,24 +1204,32 @@
     // ClassLinker::VerifyClass throws, which isn't useful in the compiler.
     CHECK(Thread::Current()->IsExceptionPending());
     Thread::Current()->ClearException();
-    art::Compiler::ClassReference ref(context->GetDexFile(), class_def_index);
   }
 
   CHECK(klass->IsCompileTimeVerified() || klass->IsErroneous()) << PrettyClass(klass);
   CHECK(!Thread::Current()->IsExceptionPending()) << PrettyTypeOf(Thread::Current()->GetException());
 }
 
-void Compiler::VerifyDexFile(ClassLoader* class_loader, const DexFile& dex_file) {
+void Compiler::VerifyDexFile(jobject class_loader, const DexFile& dex_file) {
   dex_file.ChangePermissions(PROT_READ | PROT_WRITE);
 
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  CompilationContext context(class_linker, class_loader, this, class_linker->FindDexCache(dex_file), &dex_file);
+  jobject dex_cache;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    ScopedLocalRef<jobject>
+        dex_cache_local(soa.Env(),
+                        soa.AddLocalReference<jobject>(class_linker->FindDexCache(dex_file)));
+    dex_cache = soa.Env()->NewGlobalRef(dex_cache_local.get());
+  }
+  CompilationContext context(class_linker, class_loader, this, &dex_file);
   ForAll(&context, 0, dex_file.NumClassDefs(), VerifyClass, thread_count_);
 
+  Thread::Current()->GetJniEnv()->DeleteGlobalRef(dex_cache);
   dex_file.ChangePermissions(PROT_READ);
 }
 
-void Compiler::InitializeClassesWithoutClinit(ClassLoader* class_loader,
+void Compiler::InitializeClassesWithoutClinit(jobject class_loader,
                                               const std::vector<const DexFile*>& dex_files) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
@@ -1199,7 +1238,9 @@
   }
 }
 
-void Compiler::InitializeClassesWithoutClinit(ClassLoader* class_loader, const DexFile& dex_file) {
+void Compiler::InitializeClassesWithoutClinit(jobject jni_class_loader, const DexFile& dex_file) {
+  ScopedObjectAccess soa(Thread::Current());
+  ClassLoader* class_loader = soa.Decode<ClassLoader*>(jni_class_loader);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   for (size_t class_def_index = 0; class_def_index < dex_file.NumClassDefs(); class_def_index++) {
     const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
@@ -1216,9 +1257,9 @@
       // record the final class status if necessary
       Class::Status status = klass->GetStatus();
       ClassReference ref(&dex_file, class_def_index);
-      MutexLock mu(compiled_classes_lock_);
       CompiledClass* compiled_class = GetCompiledClass(ref);
       if (compiled_class == NULL) {
+        MutexLock mu(compiled_classes_lock_);
         compiled_class = new CompiledClass(status);
         compiled_classes_.Put(ref, compiled_class);
       } else {
@@ -1274,10 +1315,11 @@
     if (worker->spawn_) {
       runtime->AttachCurrentThread("Compiler Worker", true, NULL);
     }
-    Thread::Current()->SetState(kRunnable);
-    worker->Run();
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      worker->Run();
+    }
     if (worker->spawn_) {
-      Thread::Current()->SetState(kNative);
       runtime->DetachCurrentThread();
     }
     return NULL;
@@ -1296,12 +1338,12 @@
     // Destroy the old context
     delete context_;
 
-    // TODO: Add a callback to let the client specify the class_linker and
-    //       dex_cache in the context for the current working dex file.
+    // TODO: Add a callback to let the client specify the class_linker in the context for the
+    //       current working dex file.
     context_ = new CompilationContext(/* class_linker */NULL,
                                       worker_context_->GetClassLoader(),
                                       worker_context_->GetCompiler(),
-                                      /* dex_cache */NULL, dex_file);
+                                      dex_file);
 
     CHECK(context_ != NULL);
   }
@@ -1314,8 +1356,7 @@
     SwitchToDexFile(0);
 
     while (true) {
-      size_t class_index =
-          static_cast<size_t>(android_atomic_inc(shared_class_index_));
+      size_t class_index = static_cast<size_t>(android_atomic_inc(shared_class_index_));
 
       const DexFile* dex_file;
       do {
@@ -1339,7 +1380,7 @@
 
       class_index -= class_index_base;
       class_callback_(context_, class_index);
-      CHECK(!self->IsExceptionPending()) << PrettyTypeOf(self->GetException());
+      self->AssertNoPendingException();
     }
   }
 
@@ -1362,7 +1403,7 @@
                              const std::vector<const DexFile*>& dex_files,
                              Callback class_callback, size_t thread_count) {
   Thread* self = Thread::Current();
-  CHECK(!self->IsExceptionPending()) << PrettyTypeOf(self->GetException());
+  self->AssertNoPendingException();
   CHECK_GT(thread_count, 0U);
 
   std::vector<DexFilesWorkerThread*> threads;
@@ -1375,13 +1416,16 @@
   }
   threads[0]->Go();
 
-  // Switch to kVmWait while we're blocked waiting for the other threads to finish.
-  ScopedThreadStateChange tsc(self, kVmWait);
+  // Ensure we're suspended while we're blocked waiting for the other threads to finish (worker
+  // thread destructor's called below perform join).
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_NE(self->GetState(), kRunnable);
+  }
   STLDeleteElements(&threads);
 }
 
-void Compiler::Compile(ClassLoader* class_loader,
-                       const std::vector<const DexFile*>& dex_files) {
+void Compiler::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files) {
 #if defined(ART_USE_LLVM_COMPILER)
   if (dex_files.size() <= 0) {
     return;  // No dex file
@@ -1397,12 +1441,16 @@
 #endif
 }
 
-void Compiler::CompileClass(CompilationContext* context, size_t class_def_index) {
-  ClassLoader* class_loader = context->GetClassLoader();
+void Compiler::CompileClass(const CompilationContext* context, size_t class_def_index) {
+  jobject class_loader = context->GetClassLoader();
   const DexFile& dex_file = *context->GetDexFile();
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
-  if (SkipClass(class_loader, dex_file, class_def)) {
-    return;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    ClassLoader* class_loader = soa.Decode<ClassLoader*>(context->GetClassLoader());
+    if (SkipClass(class_loader, dex_file, class_def)) {
+      return;
+    }
   }
   ClassReference ref(&dex_file, class_def_index);
   // Skip compiling classes with generic verifier failures since they will still fail at runtime
@@ -1455,8 +1503,8 @@
   DCHECK(!it.HasNext());
 }
 
-void Compiler::CompileDexFile(ClassLoader* class_loader, const DexFile& dex_file) {
-  CompilationContext context(NULL, class_loader, this, NULL, &dex_file);
+void Compiler::CompileDexFile(jobject class_loader, const DexFile& dex_file) {
+  CompilationContext context(NULL, class_loader, this, &dex_file);
   ForAll(&context, 0, dex_file.NumClassDefs(), Compiler::CompileClass, thread_count_);
 }
 
@@ -1469,7 +1517,7 @@
 }
 
 void Compiler::CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
-                             uint32_t method_idx, ClassLoader* class_loader,
+                             uint32_t method_idx, jobject class_loader,
                              const DexFile& dex_file) {
   CompiledMethod* compiled_method = NULL;
   uint64_t start_ns = NanoTime();
@@ -1492,8 +1540,10 @@
   if (compiled_method != NULL) {
     MethodReference ref(&dex_file, method_idx);
     CHECK(GetCompiledMethod(ref) == NULL) << PrettyMethod(method_idx, dex_file);
-    MutexLock mu(compiled_methods_lock_);
-    compiled_methods_.Put(ref, compiled_method);
+    {
+      MutexLock mu(compiled_methods_lock_);
+      compiled_methods_.Put(ref, compiled_method);
+    }
     DCHECK(GetCompiledMethod(ref) != NULL) << PrettyMethod(method_idx, dex_file);
   }
 
@@ -1519,7 +1569,11 @@
   }
 #endif
 
-  CHECK(!Thread::Current()->IsExceptionPending()) << PrettyMethod(method_idx, dex_file);
+  if (Thread::Current()->IsExceptionPending()) {
+    ScopedObjectAccess soa(Thread::Current());
+    LOG(FATAL) << "Unexpected exception compiling: " << PrettyMethod(method_idx, dex_file) << "\n"
+        << Thread::Current()->GetException()->Dump();
+  }
 }
 
 const CompiledInvokeStub* Compiler::FindInvokeStub(bool is_static, const char* shorty) const {
@@ -1595,7 +1649,7 @@
   return it->second;
 }
 
-void Compiler::SetGcMaps(ClassLoader* class_loader, const std::vector<const DexFile*>& dex_files) {
+void Compiler::SetGcMaps(jobject class_loader, const std::vector<const DexFile*>& dex_files) {
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
@@ -1603,7 +1657,9 @@
   }
 }
 
-void Compiler::SetGcMapsDexFile(ClassLoader* class_loader, const DexFile& dex_file) {
+void Compiler::SetGcMapsDexFile(jobject jni_class_loader, const DexFile& dex_file) {
+  ScopedObjectAccess soa(Thread::Current());
+  ClassLoader* class_loader = soa.Decode<ClassLoader*>(jni_class_loader);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   DexCache* dex_cache = class_linker->FindDexCache(dex_file);
   for (size_t class_def_index = 0; class_def_index < dex_file.NumClassDefs(); class_def_index++) {
diff --git a/src/compiler.h b/src/compiler.h
index 85f3ead..01ef037 100644
--- a/src/compiler.h
+++ b/src/compiler.h
@@ -52,11 +52,12 @@
 
   ~Compiler();
 
-  void CompileAll(ClassLoader* class_loader,
-                  const std::vector<const DexFile*>& dex_files);
+  void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
   // Compile a single Method
-  void CompileOne(const Method* method);
+  void CompileOne(const Method* method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool IsDebuggingSupported() {
     return support_debugging_;
@@ -71,75 +72,89 @@
   }
 
   // Stub to throw AbstractMethodError
-  static ByteArray* CreateAbstractMethodErrorStub(InstructionSet instruction_set);
+  static ByteArray* CreateAbstractMethodErrorStub(InstructionSet instruction_set)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 
   // Generate the trampoline that's invoked by unresolved direct methods
   static ByteArray* CreateResolutionStub(InstructionSet instruction_set,
-                                         Runtime::TrampolineType type);
+                                         Runtime::TrampolineType type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static ByteArray* CreateJniDlsymLookupStub(InstructionSet instruction_set);
+  static ByteArray* CreateJniDlsymLookupStub(InstructionSet instruction_set)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // A class is uniquely located by its DexFile and the class_defs_ table index into that DexFile
   typedef std::pair<const DexFile*, uint32_t> ClassReference;
 
-  CompiledClass* GetCompiledClass(ClassReference ref) const;
+  CompiledClass* GetCompiledClass(ClassReference ref) const
+      LOCKS_EXCLUDED(compiled_classes_lock_);
 
   // A method is uniquely located by its DexFile and the method_ids_ table index into that DexFile
   typedef std::pair<const DexFile*, uint32_t> MethodReference;
 
-  CompiledMethod* GetCompiledMethod(MethodReference ref) const;
+  CompiledMethod* GetCompiledMethod(MethodReference ref) const
+      LOCKS_EXCLUDED(compiled_methods_lock_);
 
   const CompiledInvokeStub* FindInvokeStub(bool is_static, const char* shorty) const;
-  const CompiledInvokeStub* FindInvokeStub(const std::string& key) const;
+  const CompiledInvokeStub* FindInvokeStub(const std::string& key) const
+      LOCKS_EXCLUDED(compiled_invoke_stubs_lock_);
 
 #if defined(ART_USE_LLVM_COMPILER)
   const CompiledInvokeStub* FindProxyStub(const char* shorty) const;
 #endif
 
-  // Callbacks from OAT/ART compiler to see what runtime checks must be generated
+  // Callbacks from compiler to see what runtime checks must be generated.
 
-  bool CanAssumeTypeIsPresentInDexCache(const DexCache* dex_cache, uint32_t type_idx);
+  bool CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
-  bool CanAssumeStringIsPresentInDexCache(const DexCache* dex_cache, uint32_t string_idx);
+  bool CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, uint32_t string_idx)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
   // Are runtime access checks necessary in the compiled code?
-  bool CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexCache* dex_cache,
-                                  const DexFile& dex_file, uint32_t type_idx);
+  bool CanAccessTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
+                                  uint32_t type_idx)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
   // Are runtime access and instantiable checks necessary in the code?
-  bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, const DexCache* dex_cache,
-                                              const DexFile& dex_file, uint32_t type_idx);
+  bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, const DexFile& dex_file,
+                                              uint32_t type_idx)
+     LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
-  // Can we fast path instance field access? Computes field's offset and volatility
+  // Can we fast path instance field access? Computes field's offset and volatility.
   bool ComputeInstanceFieldInfo(uint32_t field_idx, OatCompilationUnit* mUnit,
-                                int& field_offset, bool& is_volatile, bool is_put);
+                                int& field_offset, bool& is_volatile, bool is_put)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
   // Can we fastpath static field access? Computes field's offset, volatility and whether the
-  // field is within the referrer (which can avoid checking class initialization)
+  // field is within the referrer (which can avoid checking class initialization).
   bool ComputeStaticFieldInfo(uint32_t field_idx, OatCompilationUnit* mUnit,
                               int& field_offset, int& ssb_index,
-                              bool& is_referrers_class, bool& is_volatile, bool is_put);
+                              bool& is_referrers_class, bool& is_volatile, bool is_put)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
-  // Can we fastpath a interface, super class or virtual method call? Computes method's vtable index
+  // Can we fastpath a interface, super class or virtual method call? Computes method's vtable
+  // index.
   bool ComputeInvokeInfo(uint32_t method_idx, OatCompilationUnit* mUnit, InvokeType& type,
-                         int& vtable_idx, uintptr_t& direct_code, uintptr_t& direct_method);
+                         int& vtable_idx, uintptr_t& direct_code, uintptr_t& direct_method)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
-  // Record patch information for later fix up
-  void AddCodePatch(DexCache* dex_cache,
-                    const DexFile* dex_file,
+  // Record patch information for later fix up.
+  void AddCodePatch(const DexFile* dex_file,
                     uint32_t referrer_method_idx,
                     uint32_t referrer_access_flags,
                     uint32_t target_method_idx,
                     bool target_is_direct,
-                    size_t literal_offset);
-  void AddMethodPatch(DexCache* dex_cache,
-                      const DexFile* dex_file,
+                    size_t literal_offset)
+      LOCKS_EXCLUDED(compiled_methods_lock_);
+  void AddMethodPatch(const DexFile* dex_file,
                       uint32_t referrer_method_idx,
                       uint32_t referrer_access_flags,
                       uint32_t target_method_idx,
                       bool target_is_direct,
-                      size_t literal_offset);
+                      size_t literal_offset)
+      LOCKS_EXCLUDED(compiled_methods_lock_);
 
 #if defined(ART_USE_LLVM_COMPILER) || defined(ART_USE_QUICK_COMPILER)
   void SetBitcodeFileName(std::string const& filename);
@@ -159,9 +174,6 @@
 
   class PatchInformation {
    public:
-    DexCache* GetDexCache() const {
-      return dex_cache_;
-    }
     const DexFile& GetDexFile() const {
       return *dex_file_;
     }
@@ -182,15 +194,13 @@
     }
 
    private:
-    PatchInformation(DexCache* dex_cache,
-                     const DexFile* dex_file,
+    PatchInformation(const DexFile* dex_file,
                      uint32_t referrer_method_idx,
                      uint32_t referrer_access_flags,
                      uint32_t target_method_idx,
                      uint32_t target_is_direct,
                      size_t literal_offset)
-      : dex_cache_(dex_cache),
-        dex_file_(dex_file),
+      : dex_file_(dex_file),
         referrer_method_idx_(referrer_method_idx),
         referrer_is_direct_(Method::IsDirect(referrer_access_flags)),
         target_method_idx_(target_method_idx),
@@ -199,7 +209,6 @@
       CHECK(dex_file_ != NULL);
     }
 
-    DexCache* dex_cache_;
     const DexFile* dex_file_;
     uint32_t referrer_method_idx_;
     bool referrer_is_direct_;
@@ -221,39 +230,55 @@
  private:
   // Compute constant code and method pointers when possible
   void GetCodeAndMethodForDirectCall(InvokeType type, InvokeType sharp_type, Method* method,
-                                     uintptr_t& direct_code, uintptr_t& direct_method);
+                                     uintptr_t& direct_code, uintptr_t& direct_method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Checks if class specified by type_idx is one of the image_classes_
   bool IsImageClass(const std::string& descriptor) const;
 
-  void PreCompile(ClassLoader* class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger& timings);
-  void PostCompile(ClassLoader* class_loader, const std::vector<const DexFile*>& dex_files);
+  void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
+                  TimingLogger& timings)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
+  void PostCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files);
 
   // Attempt to resolve all type, methods, fields, and strings
   // referenced from code in the dex file following PathClassLoader
   // ordering semantics.
-  void Resolve(ClassLoader* class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger& timings);
-  void ResolveDexFile(ClassLoader* class_loader, const DexFile& dex_file, TimingLogger& timings);
+  void Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files,
+               TimingLogger& timings)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
+  void ResolveDexFile(jobject class_loader, const DexFile& dex_file, TimingLogger& timings)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
-  void Verify(ClassLoader* class_loader, const std::vector<const DexFile*>& dex_files);
-  void VerifyDexFile(ClassLoader* class_loader, const DexFile& dex_file);
+  void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files);
+  void VerifyDexFile(jobject class_loader, const DexFile& dex_file)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
-  void InitializeClassesWithoutClinit(ClassLoader* class_loader, const std::vector<const DexFile*>& dex_files);
-  void InitializeClassesWithoutClinit(ClassLoader* class_loader, const DexFile& dex_file);
+  void InitializeClassesWithoutClinit(jobject class_loader,
+                                      const std::vector<const DexFile*>& dex_files)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
+  void InitializeClassesWithoutClinit(jobject class_loader, const DexFile& dex_file)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_, compiled_classes_lock_);
 
-  void Compile(ClassLoader* class_loader,
-               const std::vector<const DexFile*>& dex_files);
-  void CompileDexFile(ClassLoader* class_loader, const DexFile& dex_file);
+  void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files);
+  void CompileDexFile(jobject class_loader, const DexFile& dex_file)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
   void CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags, uint32_t method_idx,
-                     ClassLoader* class_loader, const DexFile& dex_file);
+                     jobject class_loader, const DexFile& dex_file)
+      LOCKS_EXCLUDED(compiled_methods_lock_);
 
-  static void CompileClass(CompilationContext* context, size_t class_def_index);
+  static void CompileClass(const CompilationContext* context, size_t class_def_index)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
-  void SetGcMaps(ClassLoader* class_loader, const std::vector<const DexFile*>& dex_files);
-  void SetGcMapsDexFile(ClassLoader* class_loader, const DexFile& dex_file);
-  void SetGcMapsMethod(const DexFile& dex_file, Method* method);
+  void SetGcMaps(jobject class_loader, const std::vector<const DexFile*>& dex_files)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
+  void SetGcMapsDexFile(jobject class_loader, const DexFile& dex_file)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
+  void SetGcMapsMethod(const DexFile& dex_file, Method* method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void InsertInvokeStub(const std::string& key, const CompiledInvokeStub* compiled_invoke_stub);
+  void InsertInvokeStub(const std::string& key, const CompiledInvokeStub* compiled_invoke_stub)
+      LOCKS_EXCLUDED(compiled_invoke_stubs_lock_);
 
 #if defined(ART_USE_LLVM_COMPILER)
   void InsertProxyStub(const char* shorty, const CompiledInvokeStub* compiled_proxy_stub);
@@ -266,17 +291,17 @@
 
   typedef SafeMap<const ClassReference, CompiledClass*> ClassTable;
   // All class references that this compiler has compiled
-  mutable Mutex compiled_classes_lock_;
+  mutable Mutex compiled_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ClassTable compiled_classes_ GUARDED_BY(compiled_classes_lock_);
 
   typedef SafeMap<const MethodReference, CompiledMethod*> MethodTable;
   // All method references that this compiler has compiled
-  mutable Mutex compiled_methods_lock_;
+  mutable Mutex compiled_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   MethodTable compiled_methods_ GUARDED_BY(compiled_methods_lock_);
 
   typedef SafeMap<std::string, const CompiledInvokeStub*> InvokeStubTable;
   // Invocation stubs created to allow invocation of the compiled methods
-  mutable Mutex compiled_invoke_stubs_lock_;
+  mutable Mutex compiled_invoke_stubs_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   InvokeStubTable compiled_invoke_stubs_ GUARDED_BY(compiled_invoke_stubs_lock_);
 
 #if defined(ART_USE_LLVM_COMPILER)
@@ -308,7 +333,7 @@
   typedef CompiledMethod* (*CompilerFn)(Compiler& compiler,
                                         const DexFile::CodeItem* code_item,
                                         uint32_t access_flags, uint32_t method_idx,
-                                        ClassLoader* class_loader,
+                                        jobject class_loader,
                                         const DexFile& dex_file);
   CompilerFn compiler_;
 
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h
index 088768b..00ab796 100644
--- a/src/compiler/Compiler.h
+++ b/src/compiler/Compiler.h
@@ -204,7 +204,7 @@
 extern "C" art::CompiledMethod* ArtCompileMethod(art::Compiler& compiler,
                                                  const art::DexFile::CodeItem* code_item,
                                                  uint32_t access_flags, uint32_t method_idx,
-                                                 const art::ClassLoader* class_loader,
+                                                 jobject class_loader,
                                                  const art::DexFile& dex_file);
 
 #endif // ART_SRC_COMPILER_COMPILER_H_
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 1ecf61a..453ccdf 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -326,7 +326,6 @@
       compiler(NULL),
       class_linker(NULL),
       dex_file(NULL),
-      dex_cache(NULL),
       class_loader(NULL),
       method_idx(0),
       code_item(NULL),
@@ -436,8 +435,7 @@
   Compiler* compiler;            // Compiler driving this compiler
   ClassLinker* class_linker;     // Linker to resolve fields and methods
   const DexFile* dex_file;       // DexFile containing the method being compiled
-  DexCache* dex_cache;           // DexFile's corresponding cache
-  ClassLoader* class_loader;     // compiling method's class loader
+  jobject class_loader;          // compiling method's class loader
   uint32_t method_idx;                // compiling method's index into method_ids of DexFile
   const DexFile::CodeItem* code_item;  // compiling method's DexFile code_item
   uint32_t access_flags;              // compiling method's access flags
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc
index 33ef0ad..fc1d262 100644
--- a/src/compiler/Dataflow.cc
+++ b/src/compiler/Dataflow.cc
@@ -2274,7 +2274,7 @@
       return false;
   }
   OatCompilationUnit mUnit(cUnit->class_loader, cUnit->class_linker,
-                           *cUnit->dex_file, *cUnit->dex_cache,
+                           *cUnit->dex_file,
                            cUnit->code_item, cUnit->method_idx,
                            cUnit->access_flags);
   // TODO: add a flag so we don't counts the stats for this twice
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index 7585b77..6cdfebc 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -733,7 +733,7 @@
 CompiledMethod* oatCompileMethod(Compiler& compiler,
                                  const DexFile::CodeItem* code_item,
                                  uint32_t access_flags, uint32_t method_idx,
-                                 const ClassLoader* class_loader,
+                                 jobject class_loader,
                                  const DexFile& dex_file)
 {
   VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "...";
@@ -751,7 +751,6 @@
   cUnit->compiler = &compiler;
   cUnit->class_linker = class_linker;
   cUnit->dex_file = &dex_file;
-  cUnit->dex_cache = class_linker->FindDexCache(dex_file);
   cUnit->method_idx = method_idx;
   cUnit->code_item = code_item;
   cUnit->access_flags = access_flags;
@@ -1161,7 +1160,7 @@
     ArtCompileMethod(art::Compiler& compiler,
                      const art::DexFile::CodeItem* code_item,
                      uint32_t access_flags, uint32_t method_idx,
-                     const art::ClassLoader* class_loader,
+                     jobject class_loader,
                      const art::DexFile& dex_file)
 {
   CHECK_EQ(compiler.GetInstructionSet(), art::oatInstructionSet());
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc
index a33a162..dd7daba 100644
--- a/src/compiler/codegen/CodegenUtil.cc
+++ b/src/compiler/codegen/CodegenUtil.cc
@@ -644,7 +644,7 @@
   if (dataLIR != NULL) {
     while (dataLIR != NULL) {
       uint32_t target = dataLIR->operands[0];
-      cUnit->compiler->AddCodePatch(cUnit->dex_cache, cUnit->dex_file,
+      cUnit->compiler->AddCodePatch(cUnit->dex_file,
                                     cUnit->method_idx,
                                     cUnit->access_flags,
                                     target,
@@ -659,7 +659,7 @@
     dataLIR = cUnit->methodLiteralList;
     while (dataLIR != NULL) {
       uint32_t target = dataLIR->operands[0];
-      cUnit->compiler->AddMethodPatch(cUnit->dex_cache, cUnit->dex_file,
+      cUnit->compiler->AddMethodPatch(cUnit->dex_file,
                                       cUnit->method_idx,
                                       cUnit->access_flags,
                                       target,
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index baa4b48..6cb701f 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -502,7 +502,6 @@
   oatFlushAllRegs(cUnit);  /* Everything to home location */
   int funcOffset;
   if (cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
-                                                  cUnit->dex_cache,
                                                   *cUnit->dex_file,
                                                   type_idx)) {
     funcOffset = ENTRYPOINT_OFFSET(pAllocArrayFromCode);
@@ -527,7 +526,6 @@
   oatFlushAllRegs(cUnit);  /* Everything to home location */
   int funcOffset;
   if (cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
-                                                  cUnit->dex_cache,
                                                   *cUnit->dex_file,
                                                   typeIdx)) {
     funcOffset = ENTRYPOINT_OFFSET(pCheckAndAllocArrayFromCode);
@@ -637,10 +635,8 @@
   bool isVolatile;
   bool isReferrersClass;
 
-  OatCompilationUnit mUnit(cUnit->class_loader, cUnit->class_linker,
-                           *cUnit->dex_file, *cUnit->dex_cache,
-                           cUnit->code_item, cUnit->method_idx,
-                           cUnit->access_flags);
+  OatCompilationUnit mUnit(cUnit->class_loader, cUnit->class_linker, *cUnit->dex_file,
+                           cUnit->code_item, cUnit->method_idx, cUnit->access_flags);
 
   bool fastPath =
       cUnit->compiler->ComputeStaticFieldInfo(fieldIdx, &mUnit,
@@ -735,7 +731,7 @@
   bool isReferrersClass;
 
   OatCompilationUnit mUnit(cUnit->class_loader, cUnit->class_linker,
-                           *cUnit->dex_file, *cUnit->dex_cache,
+                           *cUnit->dex_file,
                            cUnit->code_item, cUnit->method_idx,
                            cUnit->access_flags);
 
@@ -978,7 +974,7 @@
                   int& fieldOffset, bool& isVolatile, bool isPut)
 {
   OatCompilationUnit mUnit(cUnit->class_loader, cUnit->class_linker,
-               *cUnit->dex_file, *cUnit->dex_cache,
+               *cUnit->dex_file,
                cUnit->code_item, cUnit->method_idx,
                cUnit->access_flags);
   return cUnit->compiler->ComputeInstanceFieldInfo(fieldIdx, &mUnit,
@@ -1102,7 +1098,6 @@
   int resReg = oatAllocTemp(cUnit);
   RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
   if (!cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
-                                                   cUnit->dex_cache,
                                                    *cUnit->dex_file,
                                                    type_idx)) {
     // Call out to helper which resolves type and verifies access.
@@ -1121,7 +1116,7 @@
         Array::DataOffset(sizeof(Class*)).Int32Value() + (sizeof(Class*)
                           * type_idx);
     loadWordDisp(cUnit, resReg, offset_of_type, rlResult.lowReg);
-    if (!cUnit->compiler->CanAssumeTypeIsPresentInDexCache(cUnit->dex_cache,
+    if (!cUnit->compiler->CanAssumeTypeIsPresentInDexCache(*cUnit->dex_file,
         type_idx) || SLOW_TYPE_PATH) {
       // Slow path, at runtime test if type is null and if so initialize
       oatFlushAllRegs(cUnit);
@@ -1164,7 +1159,7 @@
   int32_t offset_of_string = Array::DataOffset(sizeof(String*)).Int32Value() +
                  (sizeof(String*) * string_idx);
   if (!cUnit->compiler->CanAssumeStringIsPresentInDexCache(
-      cUnit->dex_cache, string_idx) || SLOW_STRING_PATH) {
+      *cUnit->dex_file, string_idx) || SLOW_STRING_PATH) {
     // slow path, resolve string if not in dex cache
     oatFlushAllRegs(cUnit);
     oatLockCallTemps(cUnit); // Using explicit registers
@@ -1222,7 +1217,7 @@
   // access because the verifier was unable to?
   int funcOffset;
   if (cUnit->compiler->CanAccessInstantiableTypeWithoutChecks(
-      cUnit->method_idx, cUnit->dex_cache, *cUnit->dex_file, type_idx)) {
+      cUnit->method_idx, *cUnit->dex_file, type_idx)) {
     funcOffset = ENTRYPOINT_OFFSET(pAllocObjectFromCode);
   } else {
     funcOffset = ENTRYPOINT_OFFSET(pAllocObjectFromCodeWithAccessCheck);
@@ -1248,7 +1243,6 @@
   loadCurrMethodDirect(cUnit, rARG1);  // rARG1 <= current Method*
   int classReg = rARG2;  // rARG2 will hold the Class*
   if (!cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
-                                                   cUnit->dex_cache,
                                                    *cUnit->dex_file,
                                                    type_idx)) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
@@ -1268,7 +1262,7 @@
         * type_idx);
     loadWordDisp(cUnit, classReg, offset_of_type, classReg);
     if (!cUnit->compiler->CanAssumeTypeIsPresentInDexCache(
-        cUnit->dex_cache, type_idx)) {
+        *cUnit->dex_file, type_idx)) {
       // Need to test presence of type in dex cache at runtime
       LIR* hopBranch = opCmpImmBranch(cUnit, kCondNe, classReg, 0, NULL);
       // Not resolved
@@ -1333,7 +1327,6 @@
   loadCurrMethodDirect(cUnit, rARG1);  // rARG1 <= current Method*
   int classReg = rARG2;  // rARG2 will hold the Class*
   if (!cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
-                                                   cUnit->dex_cache,
                                                    *cUnit->dex_file,
                                                    type_idx)) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
@@ -1352,7 +1345,7 @@
         (sizeof(Class*) * type_idx);
     loadWordDisp(cUnit, classReg, offset_of_type, classReg);
     if (!cUnit->compiler->CanAssumeTypeIsPresentInDexCache(
-        cUnit->dex_cache, type_idx)) {
+        *cUnit->dex_file, type_idx)) {
       // Need to test presence of type in dex cache at runtime
       LIR* hopBranch = opCmpImmBranch(cUnit, kCondNe, classReg, 0, NULL);
       // Not resolved
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index b93cbd9..e2a5f97 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -68,7 +68,7 @@
   oatLockCallTemps(cUnit);
 
   OatCompilationUnit mUnit(cUnit->class_loader, cUnit->class_linker,
-                           *cUnit->dex_file, *cUnit->dex_cache,
+                           *cUnit->dex_file,
                            cUnit->code_item, cUnit->method_idx,
                            cUnit->access_flags);
 
diff --git a/src/compiler_llvm/runtime_support_llvm.cc b/src/compiler_llvm/runtime_support_llvm.cc
index a42696c..2eb9c65 100644
--- a/src/compiler_llvm/runtime_support_llvm.cc
+++ b/src/compiler_llvm/runtime_support_llvm.cc
@@ -28,7 +28,7 @@
 #include "reflection.h"
 #include "runtime_support.h"
 #include "runtime_support_func_list.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "utils_llvm.h"
@@ -656,7 +656,7 @@
 
   // Start new JNI local reference state
   JNIEnvExt* env = thread->GetJniEnv();
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
   ScopedJniEnvLocalRefState env_state(env);
 
   // Create local ref. copies of the receiver
diff --git a/src/compiler_test.cc b/src/compiler_test.cc
index 088726f..0c53bb9 100644
--- a/src/compiler_test.cc
+++ b/src/compiler_test.cc
@@ -31,14 +31,16 @@
 
 class CompilerTest : public CommonTest {
  protected:
-  void CompileAll(ClassLoader* class_loader) {
+  void CompileAll(jobject class_loader) LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
     compiler_->CompileAll(class_loader, Runtime::Current()->GetCompileTimeClassPath(class_loader));
     MakeAllExecutable(class_loader);
   }
 
-  void EnsureCompiled(ClassLoader* class_loader, const char* class_name, const char* method,
-                      const char* signature, bool is_virtual) {
+  void EnsureCompiled(jobject class_loader, const char* class_name, const char* method,
+                      const char* signature, bool is_virtual)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
     CompileAll(class_loader);
+    Thread::Current()->TransitionFromSuspendedToRunnable();
     runtime_->Start();
     env_ = Thread::Current()->GetJniEnv();
     class_ = env_->FindClass(class_name);
@@ -51,7 +53,7 @@
     CHECK(mid_ != NULL) << "Method not found: " << class_name << "." << method << signature;
   }
 
-  void MakeAllExecutable(ClassLoader* class_loader) {
+  void MakeAllExecutable(jobject class_loader) {
     const std::vector<const DexFile*>& class_path
         = Runtime::Current()->GetCompileTimeClassPath(class_loader);
     for (size_t i = 0; i != class_path.size(); ++i) {
@@ -61,12 +63,13 @@
     }
   }
 
-  void MakeDexFileExecutable(ClassLoader* class_loader, const DexFile& dex_file) {
+  void MakeDexFileExecutable(jobject class_loader, const DexFile& dex_file) {
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
     for (size_t i = 0; i < dex_file.NumClassDefs(); i++) {
       const DexFile::ClassDef& class_def = dex_file.GetClassDef(i);
       const char* descriptor = dex_file.GetClassDescriptor(class_def);
-      Class* c = class_linker->FindClass(descriptor, class_loader);
+      ScopedObjectAccess soa(Thread::Current());
+      Class* c = class_linker->FindClass(descriptor, soa.Decode<ClassLoader*>(class_loader));
       CHECK(c != NULL);
       for (size_t i = 0; i < c->NumDirectMethods(); i++) {
         MakeExecutable(c->GetDirectMethod(i));
@@ -87,6 +90,7 @@
   CompileAll(NULL);
 
   // All libcore references should resolve
+  ScopedObjectAccess soa(Thread::Current());
   const DexFile* dex = java_lang_dex_file_;
   DexCache* dex_cache = class_linker_->FindDexCache(*dex);
   EXPECT_EQ(dex->NumStringIds(), dex_cache->NumStrings());
@@ -125,12 +129,15 @@
 }
 
 TEST_F(CompilerTest, AbstractMethodErrorStub) {
-  CompileVirtualMethod(NULL, "java.lang.Class", "isFinalizable", "()Z");
-  CompileDirectMethod(NULL, "java.lang.Object", "<init>", "()V");
-
-  SirtRef<ClassLoader> class_loader(LoadDex("AbstractMethod"));
-  ASSERT_TRUE(class_loader.get() != NULL);
-  EnsureCompiled(class_loader.get(), "AbstractClass", "foo", "()V", true);
+  jobject class_loader;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    CompileVirtualMethod(NULL, "java.lang.Class", "isFinalizable", "()Z");
+    CompileDirectMethod(NULL, "java.lang.Object", "<init>", "()V");
+    class_loader = LoadDex("AbstractMethod");
+  }
+  ASSERT_TRUE(class_loader != NULL);
+  EnsureCompiled(class_loader, "AbstractClass", "foo", "()V", true);
 
   // Create a jobj_ of ConcreteClass, NOT AbstractClass.
   jclass c_class = env_->FindClass("ConcreteClass");
@@ -138,11 +145,13 @@
   jobject jobj_ = env_->NewObject(c_class, constructor);
   ASSERT_TRUE(jobj_ != NULL);
 
-  Class* jlame = class_linker_->FindClass("Ljava/lang/AbstractMethodError;", class_loader.get());
   // Force non-virtual call to AbstractClass foo, will throw AbstractMethodError exception.
   env_->CallNonvirtualVoidMethod(jobj_, class_, mid_);
-  EXPECT_TRUE(Thread::Current()->IsExceptionPending());
-  EXPECT_TRUE(Thread::Current()->GetException()->InstanceOf(jlame));
+  EXPECT_EQ(env_->ExceptionCheck(), JNI_TRUE);
+  jthrowable exception = env_->ExceptionOccurred();
+  env_->ExceptionClear();
+  jclass jlame = env_->FindClass("java/lang/AbstractMethodError");
+  EXPECT_TRUE(env_->IsInstanceOf(exception, jlame));
   Thread::Current()->ClearException();
 }
 
diff --git a/src/debugger.cc b/src/debugger.cc
index cd52f82..edb6e7f 100644
--- a/src/debugger.cc
+++ b/src/debugger.cc
@@ -28,10 +28,9 @@
 #endif
 #include "object_utils.h"
 #include "safe_map.h"
-#include "scoped_jni_thread_state.h"
-#include "scoped_thread_list_lock.h"
 #include "ScopedLocalRef.h"
 #include "ScopedPrimitiveArray.h"
+#include "scoped_thread_state_change.h"
 #include "space.h"
 #include "stack_indirect_reference_table.h"
 #include "thread_list.h"
@@ -91,7 +90,7 @@
   }
 
  private:
-  Mutex lock_;
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   SafeMap<JDWP::ObjectId, Object*> map_;
 };
 
@@ -99,7 +98,7 @@
   Method* method;
   uint32_t dex_pc;
 
-  int32_t LineNumber() const {
+  int32_t LineNumber() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return MethodHelper(method).GetLineNumFromDexPC(dex_pc);
   }
 };
@@ -125,7 +124,8 @@
   Breakpoint(Method* method, uint32_t dex_pc) : method(method), dex_pc(dex_pc) {}
 };
 
-static std::ostream& operator<<(std::ostream& os, const Breakpoint& rhs) {
+static std::ostream& operator<<(std::ostream& os, const Breakpoint& rhs)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   os << StringPrintf("Breakpoint[%s @%#x]", PrettyMethod(rhs.method).c_str(), rhs.dex_pc);
   return os;
 }
@@ -171,17 +171,18 @@
 static ObjectRegistry* gRegistry = NULL;
 
 // Recent allocation tracking.
-static Mutex gAllocTrackerLock("AllocTracker lock");
+static Mutex gAllocTrackerLock DEFAULT_MUTEX_ACQUIRED_AFTER ("AllocTracker lock");
 AllocRecord* Dbg::recent_allocation_records_ PT_GUARDED_BY(gAllocTrackerLock) = NULL; // TODO: CircularBuffer<AllocRecord>
 static size_t gAllocRecordHead GUARDED_BY(gAllocTrackerLock) = 0;
 static size_t gAllocRecordCount GUARDED_BY(gAllocTrackerLock) = 0;
 
 // Breakpoints and single-stepping.
-static Mutex gBreakpointsLock("breakpoints lock");
+static Mutex gBreakpointsLock DEFAULT_MUTEX_ACQUIRED_AFTER ("breakpoints lock");
 static std::vector<Breakpoint> gBreakpoints GUARDED_BY(gBreakpointsLock);
 static SingleStepControl gSingleStepControl GUARDED_BY(gBreakpointsLock);
 
-static bool IsBreakpoint(Method* m, uint32_t dex_pc) {
+static bool IsBreakpoint(Method* m, uint32_t dex_pc)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   MutexLock mu(gBreakpointsLock);
   for (size_t i = 0; i < gBreakpoints.size(); ++i) {
     if (gBreakpoints[i].method == m && gBreakpoints[i].dex_pc == dex_pc) {
@@ -192,7 +193,8 @@
   return false;
 }
 
-static Array* DecodeArray(JDWP::RefTypeId id, JDWP::JdwpError& status) {
+static Array* DecodeArray(JDWP::RefTypeId id, JDWP::JdwpError& status)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Object* o = gRegistry->Get<Object*>(id);
   if (o == NULL || o == kInvalidObject) {
     status = JDWP::ERR_INVALID_OBJECT;
@@ -206,7 +208,8 @@
   return o->AsArray();
 }
 
-static Class* DecodeClass(JDWP::RefTypeId id, JDWP::JdwpError& status) {
+static Class* DecodeClass(JDWP::RefTypeId id, JDWP::JdwpError& status)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Object* o = gRegistry->Get<Object*>(id);
   if (o == NULL || o == kInvalidObject) {
     status = JDWP::ERR_INVALID_OBJECT;
@@ -220,13 +223,15 @@
   return o->AsClass();
 }
 
-static Thread* DecodeThread(JDWP::ObjectId threadId) {
-  ScopedJniThreadState ts(Thread::Current());
+static Thread* DecodeThread(ScopedObjectAccessUnchecked& soa, JDWP::ObjectId threadId)
+    LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Object* thread_peer = gRegistry->Get<Object*>(threadId);
   if (thread_peer == NULL || thread_peer == kInvalidObject) {
     return NULL;
   }
-  return Thread::FromManagedThread(ts, thread_peer);
+  Thread* thread = Thread::FromManagedThread(soa, thread_peer);
+  return thread;
 }
 
 static JDWP::JdwpTag BasicTagFromDescriptor(const char* descriptor) {
@@ -235,7 +240,8 @@
   return static_cast<JDWP::JdwpTag>(descriptor[0]);
 }
 
-static JDWP::JdwpTag TagFromClass(Class* c) {
+static JDWP::JdwpTag TagFromClass(Class* c)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   CHECK(c != NULL);
   if (c->IsArrayClass()) {
     return JDWP::JT_ARRAY;
@@ -265,7 +271,8 @@
  *
  * Null objects are tagged JT_OBJECT.
  */
-static JDWP::JdwpTag TagFromObject(const Object* o) {
+static JDWP::JdwpTag TagFromObject(const Object* o)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return (o == NULL) ? JDWP::JT_OBJECT : TagFromClass(o->GetClass());
 }
 
@@ -417,7 +424,7 @@
   // If a debugger has already attached, send the "welcome" message.
   // This may cause us to suspend all threads.
   if (gJdwpState->IsActive()) {
-    //ScopedThreadStateChange tsc(Thread::Current(), kRunnable);
+    ScopedObjectAccess soa(Thread::Current());
     if (!gJdwpState->PostVMStart()) {
       LOG(WARNING) << "Failed to post 'start' message to debugger";
     }
@@ -432,14 +439,17 @@
 
 void Dbg::GcDidFinish() {
   if (gDdmHpifWhen != HPIF_WHEN_NEVER) {
+    ScopedObjectAccess soa(Thread::Current());
     LOG(DEBUG) << "Sending heap info to DDM";
     DdmSendHeapInfo(gDdmHpifWhen);
   }
   if (gDdmHpsgWhen != HPSG_WHEN_NEVER) {
+    ScopedObjectAccess soa(Thread::Current());
     LOG(DEBUG) << "Dumping heap to DDM";
     DdmSendHeapSegments(false);
   }
   if (gDdmNhsgWhen != HPSG_WHEN_NEVER) {
+    ScopedObjectAccess soa(Thread::Current());
     LOG(DEBUG) << "Dumping native heap to DDM";
     DdmSendHeapSegments(true);
   }
@@ -481,6 +491,7 @@
 }
 
 static void SetDebuggerUpdatesEnabled(bool enabled) {
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
   Runtime::Current()->GetThreadList()->ForEach(SetDebuggerUpdatesEnabledCallback, &enabled);
 }
 
@@ -528,18 +539,6 @@
   return gJdwpState->LastDebuggerActivity();
 }
 
-int Dbg::ThreadRunning() {
-  return static_cast<int>(Thread::Current()->SetState(kRunnable));
-}
-
-int Dbg::ThreadWaiting() {
-  return static_cast<int>(Thread::Current()->SetState(kVmWait));
-}
-
-int Dbg::ThreadContinuing(int new_state) {
-  return static_cast<int>(Thread::Current()->SetState(static_cast<ThreadState>(new_state)));
-}
-
 void Dbg::UndoDebuggerSuspensions() {
   Runtime::Current()->GetThreadList()->UndoDebuggerSuspensions();
 }
@@ -829,7 +828,9 @@
   return JDWP::ERR_NONE;
 }
 
-JDWP::JdwpError Dbg::SetArrayElements(JDWP::ObjectId arrayId, int offset, int count, const uint8_t* src) {
+JDWP::JdwpError Dbg::SetArrayElements(JDWP::ObjectId arrayId, int offset, int count,
+                                      const uint8_t* src)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   JDWP::JdwpError status;
   Array* a = DecodeArray(arrayId, status);
   if (a == NULL) {
@@ -898,7 +899,8 @@
 /*
  * Used by Eclipse's "Display" view to evaluate "new byte[5]" to get "(byte[]) [0, 0, 0, 0, 0]".
  */
-JDWP::JdwpError Dbg::CreateArrayObject(JDWP::RefTypeId arrayClassId, uint32_t length, JDWP::ObjectId& new_array) {
+JDWP::JdwpError Dbg::CreateArrayObject(JDWP::RefTypeId arrayClassId, uint32_t length,
+                                       JDWP::ObjectId& new_array) {
   JDWP::JdwpError status;
   Class* c = DecodeClass(arrayClassId, status);
   if (c == NULL) {
@@ -917,7 +919,8 @@
   return c1->IsAssignableFrom(c2);
 }
 
-static JDWP::FieldId ToFieldId(const Field* f) {
+static JDWP::FieldId ToFieldId(const Field* f)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
 #ifdef MOVING_GARBAGE_COLLECTOR
   UNIMPLEMENTED(FATAL);
 #else
@@ -925,7 +928,8 @@
 #endif
 }
 
-static JDWP::MethodId ToMethodId(const Method* m) {
+static JDWP::MethodId ToMethodId(const Method* m)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
 #ifdef MOVING_GARBAGE_COLLECTOR
   UNIMPLEMENTED(FATAL);
 #else
@@ -933,7 +937,8 @@
 #endif
 }
 
-static Field* FromFieldId(JDWP::FieldId fid) {
+static Field* FromFieldId(JDWP::FieldId fid)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
 #ifdef MOVING_GARBAGE_COLLECTOR
   UNIMPLEMENTED(FATAL);
 #else
@@ -941,7 +946,8 @@
 #endif
 }
 
-static Method* FromMethodId(JDWP::MethodId mid) {
+static Method* FromMethodId(JDWP::MethodId mid)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
 #ifdef MOVING_GARBAGE_COLLECTOR
   UNIMPLEMENTED(FATAL);
 #else
@@ -949,7 +955,8 @@
 #endif
 }
 
-static void SetLocation(JDWP::JdwpLocation& location, Method* m, uint32_t dex_pc) {
+static void SetLocation(JDWP::JdwpLocation& location, Method* m, uint32_t dex_pc)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (m == NULL) {
     memset(&location, 0, sizeof(location));
   } else {
@@ -961,7 +968,8 @@
   }
 }
 
-std::string Dbg::GetMethodName(JDWP::RefTypeId, JDWP::MethodId methodId) {
+std::string Dbg::GetMethodName(JDWP::RefTypeId, JDWP::MethodId methodId)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Method* m = FromMethodId(methodId);
   return MethodHelper(m).GetName();
 }
@@ -1004,7 +1012,8 @@
   return newSlot;
 }
 
-static uint16_t DemangleSlot(uint16_t slot, Method* m) {
+static uint16_t DemangleSlot(uint16_t slot, Method* m)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (slot == kEclipseWorkaroundSlot) {
     return 0;
   } else if (slot == 0) {
@@ -1042,7 +1051,8 @@
   return JDWP::ERR_NONE;
 }
 
-JDWP::JdwpError Dbg::OutputDeclaredMethods(JDWP::RefTypeId classId, bool with_generic, JDWP::ExpandBuf* pReply) {
+JDWP::JdwpError Dbg::OutputDeclaredMethods(JDWP::RefTypeId classId, bool with_generic,
+                                           JDWP::ExpandBuf* pReply) {
   JDWP::JdwpError status;
   Class* c = DecodeClass(classId, status);
   if (c == NULL) {
@@ -1085,7 +1095,8 @@
   return JDWP::ERR_NONE;
 }
 
-void Dbg::OutputLineTable(JDWP::RefTypeId, JDWP::MethodId methodId, JDWP::ExpandBuf* pReply) {
+void Dbg::OutputLineTable(JDWP::RefTypeId, JDWP::MethodId methodId, JDWP::ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   struct DebugCallbackContext {
     int numItems;
     JDWP::ExpandBuf* pReply;
@@ -1098,7 +1109,6 @@
       return true;
     }
   };
-
   Method* m = FromMethodId(methodId);
   MethodHelper mh(m);
   uint64_t start, end;
@@ -1153,7 +1163,6 @@
       ++pContext->variable_count;
     }
   };
-
   Method* m = FromMethodId(methodId);
   MethodHelper mh(m);
   const DexFile::CodeItem* code_item = mh.GetCodeItem();
@@ -1186,7 +1195,10 @@
   return BasicTagFromDescriptor(FieldHelper(FromFieldId(fieldId)).GetTypeDescriptor());
 }
 
-static JDWP::JdwpError GetFieldValueImpl(JDWP::RefTypeId refTypeId, JDWP::ObjectId objectId, JDWP::FieldId fieldId, JDWP::ExpandBuf* pReply, bool is_static) {
+static JDWP::JdwpError GetFieldValueImpl(JDWP::RefTypeId refTypeId, JDWP::ObjectId objectId,
+                                         JDWP::FieldId fieldId, JDWP::ExpandBuf* pReply,
+                                         bool is_static)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   JDWP::JdwpError status;
   Class* c = DecodeClass(refTypeId, status);
   if (refTypeId != 0 && c == NULL) {
@@ -1245,7 +1257,8 @@
   return JDWP::ERR_NONE;
 }
 
-JDWP::JdwpError Dbg::GetFieldValue(JDWP::ObjectId objectId, JDWP::FieldId fieldId, JDWP::ExpandBuf* pReply) {
+JDWP::JdwpError Dbg::GetFieldValue(JDWP::ObjectId objectId, JDWP::FieldId fieldId,
+                                   JDWP::ExpandBuf* pReply) {
   return GetFieldValueImpl(0, objectId, fieldId, pReply, false);
 }
 
@@ -1253,7 +1266,9 @@
   return GetFieldValueImpl(refTypeId, 0, fieldId, pReply, true);
 }
 
-static JDWP::JdwpError SetFieldValueImpl(JDWP::ObjectId objectId, JDWP::FieldId fieldId, uint64_t value, int width, bool is_static) {
+static JDWP::JdwpError SetFieldValueImpl(JDWP::ObjectId objectId, JDWP::FieldId fieldId,
+                                         uint64_t value, int width, bool is_static)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Object* o = gRegistry->Get<Object*>(objectId);
   if ((!is_static && o == NULL) || o == kInvalidObject) {
     return JDWP::ERR_INVALID_OBJECT;
@@ -1300,7 +1315,8 @@
   return JDWP::ERR_NONE;
 }
 
-JDWP::JdwpError Dbg::SetFieldValue(JDWP::ObjectId objectId, JDWP::FieldId fieldId, uint64_t value, int width) {
+JDWP::JdwpError Dbg::SetFieldValue(JDWP::ObjectId objectId, JDWP::FieldId fieldId, uint64_t value,
+                                   int width) {
   return SetFieldValueImpl(objectId, fieldId, value, width, false);
 }
 
@@ -1314,8 +1330,9 @@
 }
 
 bool Dbg::GetThreadName(JDWP::ObjectId threadId, std::string& name) {
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = DecodeThread(threadId);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  Thread* thread = DecodeThread(soa, threadId);
   if (thread == NULL) {
     return false;
   }
@@ -1324,13 +1341,15 @@
 }
 
 JDWP::JdwpError Dbg::GetThreadGroup(JDWP::ObjectId threadId, JDWP::ExpandBuf* pReply) {
+  ScopedObjectAccess soa(Thread::Current());
   Object* thread = gRegistry->Get<Object*>(threadId);
   if (thread == kInvalidObject) {
     return JDWP::ERR_INVALID_OBJECT;
   }
 
   // Okay, so it's an object, but is it actually a thread?
-  if (DecodeThread(threadId) == NULL) {
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  if (DecodeThread(soa, threadId) == NULL) {
     return JDWP::ERR_INVALID_THREAD;
   }
 
@@ -1347,6 +1366,7 @@
 }
 
 std::string Dbg::GetThreadGroupName(JDWP::ObjectId threadGroupId) {
+  ScopedObjectAccess soa(Thread::Current());
   Object* thread_group = gRegistry->Get<Object*>(threadGroupId);
   CHECK(thread_group != NULL);
 
@@ -1371,27 +1391,30 @@
 }
 
 JDWP::ObjectId Dbg::GetSystemThreadGroupId() {
-  ScopedJniThreadState ts(Thread::Current());
+  ScopedObjectAccessUnchecked soa(Thread::Current());
   Object* group =
-      ts.DecodeField(WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup)->GetObject(NULL);
+      soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup)->GetObject(NULL);
   return gRegistry->Add(group);
 }
 
 JDWP::ObjectId Dbg::GetMainThreadGroupId() {
-  ScopedJniThreadState ts(Thread::Current());
+  ScopedObjectAccess soa(Thread::Current());
   Object* group =
-      ts.DecodeField(WellKnownClasses::java_lang_ThreadGroup_mainThreadGroup)->GetObject(NULL);
+      soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_mainThreadGroup)->GetObject(NULL);
   return gRegistry->Add(group);
 }
 
 bool Dbg::GetThreadStatus(JDWP::ObjectId threadId, JDWP::JdwpThreadStatus* pThreadStatus, JDWP::JdwpSuspendStatus* pSuspendStatus) {
-  ScopedThreadListLock thread_list_lock;
+  ScopedObjectAccess soa(Thread::Current());
 
-  Thread* thread = DecodeThread(threadId);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  Thread* thread = DecodeThread(soa, threadId);
   if (thread == NULL) {
     return false;
   }
 
+  MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+
   // TODO: if we're in Thread.sleep(long), we should return TS_SLEEPING,
   // even if it's implemented using Object.wait(long).
   switch (thread->GetState()) {
@@ -1402,7 +1425,16 @@
     case kWaiting:      *pThreadStatus = JDWP::TS_WAIT;     break;
     case kStarting:     *pThreadStatus = JDWP::TS_ZOMBIE;   break;
     case kNative:       *pThreadStatus = JDWP::TS_RUNNING;  break;
-    case kVmWait:       *pThreadStatus = JDWP::TS_WAIT;     break;
+    case kWaitingForGcToComplete:  // Fall-through.
+    case kWaitingPerformingGc:  // Fall-through.
+    case kWaitingForDebuggerSend:  // Fall-through.
+    case kWaitingForDebuggerToAttach:  // Fall-through.
+    case kWaitingInMainDebuggerLoop:  // Fall-through.
+    case kWaitingForDebuggerSuspension:  // Fall-through.
+    case kWaitingForJniOnLoad:  // Fall-through.
+    case kWaitingForSignalCatcherOutput:  // Fall-through.
+    case kWaitingInMainSignalCatcherLoop:
+                        *pThreadStatus = JDWP::TS_WAIT;     break;
     case kSuspended:    *pThreadStatus = JDWP::TS_RUNNING;  break;
     // Don't add a 'default' here so the compiler can spot incompatible enum changes.
   }
@@ -1412,34 +1444,49 @@
   return true;
 }
 
-JDWP::JdwpError Dbg::GetThreadSuspendCount(JDWP::ObjectId threadId, JDWP::ExpandBuf* pReply) {
-  Thread* thread = DecodeThread(threadId);
+JDWP::JdwpError Dbg::GetThreadDebugSuspendCount(JDWP::ObjectId threadId, JDWP::ExpandBuf* pReply) {
+  ScopedObjectAccess soa(Thread::Current());
+
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  Thread* thread = DecodeThread(soa, threadId);
   if (thread == NULL) {
     return JDWP::ERR_INVALID_THREAD;
   }
-  expandBufAdd4BE(pReply, thread->GetSuspendCount());
+  MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+  expandBufAdd4BE(pReply, thread->GetDebugSuspendCount());
   return JDWP::ERR_NONE;
 }
 
 bool Dbg::ThreadExists(JDWP::ObjectId threadId) {
-  return DecodeThread(threadId) != NULL;
+  ScopedObjectAccess soa(Thread::Current());
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  return DecodeThread(soa, threadId) != NULL;
 }
 
 bool Dbg::IsSuspended(JDWP::ObjectId threadId) {
-  return DecodeThread(threadId)->IsSuspended();
+  ScopedObjectAccess soa(Thread::Current());
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  Thread* thread = DecodeThread(soa, threadId);
+  CHECK(thread != NULL);
+  MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+  return thread->IsSuspended();
 }
 
 void Dbg::GetThreads(JDWP::ObjectId thread_group_id, std::vector<JDWP::ObjectId>& thread_ids) {
   class ThreadListVisitor {
    public:
-    ThreadListVisitor(const ScopedJniThreadState& ts, Object* thread_group, std::vector<JDWP::ObjectId>& thread_ids)
+    ThreadListVisitor(const ScopedObjectAccessUnchecked& ts, Object* thread_group,
+                      std::vector<JDWP::ObjectId>& thread_ids)
+        SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
         : ts_(ts), thread_group_(thread_group), thread_ids_(thread_ids) {}
 
     static void Visit(Thread* t, void* arg) {
       reinterpret_cast<ThreadListVisitor*>(arg)->Visit(t);
     }
 
-    void Visit(Thread* t) {
+    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
+    // annotalysis.
+    void Visit(Thread* t) NO_THREAD_SAFETY_ANALYSIS {
       if (t == Dbg::GetDebugThread()) {
         // Skip the JDWP thread. Some debuggers get bent out of shape when they can't suspend and
         // query all threads, so it's easier if we just don't tell them about this thread.
@@ -1451,19 +1498,20 @@
     }
 
    private:
-    const ScopedJniThreadState& ts_;
+    const ScopedObjectAccessUnchecked& ts_;
     Object* const thread_group_;
     std::vector<JDWP::ObjectId>& thread_ids_;
   };
 
-  ScopedJniThreadState ts(Thread::Current());
+  ScopedObjectAccessUnchecked soa(Thread::Current());
   Object* thread_group = gRegistry->Get<Object*>(thread_group_id);
-  ThreadListVisitor tlv(ts, thread_group, thread_ids);
+  ThreadListVisitor tlv(soa, thread_group, thread_ids);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
   Runtime::Current()->GetThreadList()->ForEach(ThreadListVisitor::Visit, &tlv);
 }
 
 void Dbg::GetChildThreadGroups(JDWP::ObjectId thread_group_id, std::vector<JDWP::ObjectId>& child_thread_group_ids) {
-  ScopedJniThreadState ts(Thread::Current());
+  ScopedObjectAccess soa(Thread::Current());
   Object* thread_group = gRegistry->Get<Object*>(thread_group_id);
 
   // Get the ArrayList<ThreadGroup> "groups" out of this thread group...
@@ -1482,7 +1530,8 @@
   }
 }
 
-static int GetStackDepth(Thread* thread) {
+static int GetStackDepth(Thread* thread)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   struct CountStackDepthVisitor : public StackVisitor {
     CountStackDepthVisitor(const ManagedStack* stack,
                            const std::vector<TraceStackFrame>* trace_stack)
@@ -1497,28 +1546,34 @@
     size_t depth;
   };
 
+  if (kIsDebugBuild) {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK(thread->IsSuspended());
+  }
   CountStackDepthVisitor visitor(thread->GetManagedStack(), thread->GetTraceStack());
   visitor.WalkStack();
   return visitor.depth;
 }
 
 int Dbg::GetThreadFrameCount(JDWP::ObjectId threadId) {
-  ScopedThreadListLock thread_list_lock;
-  return GetStackDepth(DecodeThread(threadId));
+  ScopedObjectAccess soa(Thread::Current());
+  return GetStackDepth(DecodeThread(soa, threadId));
 }
 
 JDWP::JdwpError Dbg::GetThreadFrames(JDWP::ObjectId thread_id, size_t start_frame, size_t frame_count, JDWP::ExpandBuf* buf) {
-  ScopedThreadListLock thread_list_lock;
   class GetFrameVisitor : public StackVisitor {
    public:
     GetFrameVisitor(const ManagedStack* stack, const std::vector<TraceStackFrame>* trace_stack,
                     size_t start_frame, size_t frame_count, JDWP::ExpandBuf* buf)
+        SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
         : StackVisitor(stack, trace_stack, NULL), depth_(0),
           start_frame_(start_frame), frame_count_(frame_count), buf_(buf) {
       expandBufAdd4BE(buf_, frame_count_);
     }
 
-    bool VisitFrame() {
+    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
+    // annotalysis.
+    virtual bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
       if (GetMethod()->IsRuntimeMethod()) {
         return true; // The debugger can't do anything useful with a frame that has no Method*.
       }
@@ -1543,7 +1598,9 @@
     const size_t frame_count_;
     JDWP::ExpandBuf* buf_;
   };
-  Thread* thread = DecodeThread(thread_id);
+
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  Thread* thread = DecodeThread(soa, thread_id);  // Caller already checked thread is suspended.
   GetFrameVisitor visitor(thread->GetManagedStack(), thread->GetTraceStack(), start_frame, frame_count, buf);
   visitor.WalkStack();
   return JDWP::ERR_NONE;
@@ -1554,36 +1611,51 @@
 }
 
 void Dbg::SuspendVM() {
-  ScopedThreadStateChange tsc(Thread::Current(), kRunnable); // TODO: do we really want to change back? should the JDWP thread be Runnable usually?
-  Runtime::Current()->GetThreadList()->SuspendAll(true);
+  Runtime::Current()->GetThreadList()->SuspendAllForDebugger();
 }
 
 void Dbg::ResumeVM() {
   Runtime::Current()->GetThreadList()->UndoDebuggerSuspensions();
 }
 
-void Dbg::SuspendThread(JDWP::ObjectId threadId) {
-  ScopedJniThreadState ts(Thread::Current());
-  Object* peer = gRegistry->Get<Object*>(threadId);
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = Thread::FromManagedThread(ts, peer);
-  if (thread == NULL) {
-    LOG(WARNING) << "No such thread for suspend: " << peer;
-    return;
+JDWP::JdwpError Dbg::SuspendThread(JDWP::ObjectId threadId, bool request_suspension) {
+
+  bool timeout;
+  ScopedLocalRef<jobject> peer(Thread::Current()->GetJniEnv(), NULL);
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    peer.reset(soa.AddLocalReference<jobject>(gRegistry->Get<Object*>(threadId)));
   }
-  Runtime::Current()->GetThreadList()->Suspend(thread, true);
+  if (peer.get() == NULL) {
+    LOG(WARNING) << "No such thread for suspend: " << threadId;
+    return JDWP::ERR_THREAD_NOT_ALIVE;
+  }
+  // Suspend thread to build stack trace.
+  Thread* thread = Thread::SuspendForDebugger(peer.get(), request_suspension, &timeout);
+  if (thread != NULL) {
+    return JDWP::ERR_NONE;
+  } else if (timeout) {
+    return JDWP::ERR_INTERNAL;
+  } else {
+    return JDWP::ERR_THREAD_NOT_ALIVE;
+  }
 }
 
 void Dbg::ResumeThread(JDWP::ObjectId threadId) {
-  ScopedJniThreadState ts(Thread::Current());
+  ScopedObjectAccessUnchecked soa(Thread::Current());
   Object* peer = gRegistry->Get<Object*>(threadId);
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = Thread::FromManagedThread(ts, peer);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  Thread* thread = Thread::FromManagedThread(soa, peer);
   if (thread == NULL) {
     LOG(WARNING) << "No such thread for resume: " << peer;
     return;
   }
-  if (thread->GetSuspendCount() > 0) {
+  bool needs_resume;
+  {
+    MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+    needs_resume = thread->GetSuspendCount() > 0;
+  }
+  if (needs_resume) {
     Runtime::Current()->GetThreadList()->Resume(thread, true);
   }
 }
@@ -1595,9 +1667,12 @@
 struct GetThisVisitor : public StackVisitor {
   GetThisVisitor(const ManagedStack* stack, const std::vector<TraceStackFrame>* trace_stack,
                  Context* context, JDWP::FrameId frameId)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : StackVisitor(stack, trace_stack, context), this_object(NULL), frame_id(frameId) {}
 
-  virtual bool VisitFrame() {
+  // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
+  // annotalysis.
+  virtual bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
     if (frame_id != GetFrameId()) {
       return true;  // continue
     }
@@ -1615,7 +1690,8 @@
   JDWP::FrameId frame_id;
 };
 
-static Object* GetThis(Thread* self, Method* m, size_t frame_id) {
+static Object* GetThis(Thread* self, Method* m, size_t frame_id)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // TODO: should we return the 'this' we passed through to non-static native methods?
   if (m->IsNative() || m->IsStatic()) {
     return NULL;
@@ -1627,12 +1703,21 @@
   return visitor.this_object;
 }
 
-JDWP::JdwpError Dbg::GetThisObject(JDWP::ObjectId thread_id, JDWP::FrameId frame_id, JDWP::ObjectId* result) {
-  Thread* thread = DecodeThread(thread_id);
-  if (thread == NULL) {
-    return JDWP::ERR_INVALID_THREAD;
+JDWP::JdwpError Dbg::GetThisObject(JDWP::ObjectId thread_id, JDWP::FrameId frame_id,
+                                   JDWP::ObjectId* result) {
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  Thread* thread;
+  {
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+    thread = DecodeThread(soa, thread_id);
+    if (thread == NULL) {
+      return JDWP::ERR_INVALID_THREAD;
+    }
+    MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+    if (!thread->IsSuspended()) {
+      return JDWP::ERR_THREAD_NOT_SUSPENDED;
+    }
   }
-
   UniquePtr<Context> context(Context::Create());
   GetThisVisitor visitor(thread->GetManagedStack(), thread->GetTraceStack(), context.get(), frame_id);
   visitor.WalkStack();
@@ -1640,15 +1725,19 @@
   return JDWP::ERR_NONE;
 }
 
-void Dbg::GetLocalValue(JDWP::ObjectId threadId, JDWP::FrameId frameId, int slot, JDWP::JdwpTag tag, uint8_t* buf, size_t width) {
+void Dbg::GetLocalValue(JDWP::ObjectId threadId, JDWP::FrameId frameId, int slot, JDWP::JdwpTag tag,
+                        uint8_t* buf, size_t width) {
   struct GetLocalVisitor : public StackVisitor {
     GetLocalVisitor(const ManagedStack* stack, const std::vector<TraceStackFrame>* trace_stack,
                     Context* context, JDWP::FrameId frameId, int slot, JDWP::JdwpTag tag,
                     uint8_t* buf, size_t width)
+        SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
         : StackVisitor(stack, trace_stack, context), frame_id_(frameId), slot_(slot), tag_(tag),
           buf_(buf), width_(width) {}
 
-    bool VisitFrame() {
+    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
+    // annotalysis.
+    bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
       if (GetFrameId() != frame_id_) {
         return true;  // Not our frame, carry on.
       }
@@ -1746,7 +1835,9 @@
     uint8_t* const buf_;
     const size_t width_;
   };
-  Thread* thread = DecodeThread(threadId);
+
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  Thread* thread = DecodeThread(soa, threadId);
   UniquePtr<Context> context(Context::Create());
   GetLocalVisitor visitor(thread->GetManagedStack(), thread->GetTraceStack(), context.get(),
                           frameId, slot, tag, buf, width);
@@ -1759,10 +1850,13 @@
     SetLocalVisitor(const ManagedStack* stack, const std::vector<TraceStackFrame>* trace_stack, Context* context,
                     JDWP::FrameId frame_id, int slot, JDWP::JdwpTag tag, uint64_t value,
                     size_t width)
+        SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
         : StackVisitor(stack, trace_stack, context),
           frame_id_(frame_id), slot_(slot), tag_(tag), value_(value), width_(width) {}
 
-    bool VisitFrame() {
+    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
+    // annotalysis.
+    bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
       if (GetFrameId() != frame_id_) {
         return true;  // Not our frame, carry on.
       }
@@ -1817,7 +1911,9 @@
     const uint64_t value_;
     const size_t width_;
   };
-  Thread* thread = DecodeThread(threadId);
+
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  Thread* thread = DecodeThread(soa, threadId);
   UniquePtr<Context> context(Context::Create());
   SetLocalVisitor visitor(thread->GetManagedStack(), thread->GetTraceStack(), context.get(),
                           frameId, slot, tag, value, width);
@@ -2018,14 +2114,15 @@
   }
 }
 
-JDWP::JdwpError Dbg::ConfigureStep(JDWP::ObjectId threadId, JDWP::JdwpStepSize step_size, JDWP::JdwpStepDepth step_depth) {
-  Thread* thread = DecodeThread(threadId);
+JDWP::JdwpError Dbg::ConfigureStep(JDWP::ObjectId threadId, JDWP::JdwpStepSize step_size,
+                                   JDWP::JdwpStepDepth step_depth) {
+  ScopedObjectAccessUnchecked soa(Thread::Current());
+  Thread* thread = DecodeThread(soa, threadId);
   if (thread == NULL) {
     return JDWP::ERR_INVALID_THREAD;
   }
 
   MutexLock mu(gBreakpointsLock);
-
   // TODO: there's no theoretical reason why we couldn't support single-stepping
   // of multiple threads at once, but we never did so historically.
   if (gSingleStepControl.thread != NULL && thread != gSingleStepControl.thread) {
@@ -2041,14 +2138,18 @@
   struct SingleStepStackVisitor : public StackVisitor {
     SingleStepStackVisitor(const ManagedStack* stack,
                            const std::vector<TraceStackFrame>* trace_stack)
+        EXCLUSIVE_LOCKS_REQUIRED(gBreakpointsLock)
+        SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
         : StackVisitor(stack, trace_stack, NULL) {
-      MutexLock mu(gBreakpointsLock); // Keep GCC happy.
+      gBreakpointsLock.AssertHeld();
       gSingleStepControl.method = NULL;
       gSingleStepControl.stack_depth = 0;
     }
 
-    bool VisitFrame() {
-      MutexLock mu(gBreakpointsLock); // Keep GCC happy.
+    // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
+    // annotalysis.
+    bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
+      gBreakpointsLock.AssertHeld();
       const Method* m = GetMethod();
       if (!m->IsRuntimeMethod()) {
         ++gSingleStepControl.stack_depth;
@@ -2185,14 +2286,21 @@
   }
 }
 
-JDWP::JdwpError Dbg::InvokeMethod(JDWP::ObjectId threadId, JDWP::ObjectId objectId, JDWP::RefTypeId classId, JDWP::MethodId methodId, uint32_t arg_count, uint64_t* arg_values, JDWP::JdwpTag* arg_types, uint32_t options, JDWP::JdwpTag* pResultTag, uint64_t* pResultValue, JDWP::ObjectId* pExceptionId) {
+JDWP::JdwpError Dbg::InvokeMethod(JDWP::ObjectId threadId, JDWP::ObjectId objectId,
+                                  JDWP::RefTypeId classId, JDWP::MethodId methodId,
+                                  uint32_t arg_count, uint64_t* arg_values,
+                                  JDWP::JdwpTag* arg_types, uint32_t options,
+                                  JDWP::JdwpTag* pResultTag, uint64_t* pResultValue,
+                                  JDWP::ObjectId* pExceptionId) {
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
 
   Thread* targetThread = NULL;
   DebugInvokeReq* req = NULL;
+  Thread* self = Thread::Current();
   {
-    ScopedThreadListLock thread_list_lock;
-    targetThread = DecodeThread(threadId);
+    ScopedObjectAccessUnchecked soa(self);
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+    targetThread = DecodeThread(soa, threadId);
     if (targetThread == NULL) {
       LOG(ERROR) << "InvokeMethod request for non-existent thread " << threadId;
       return JDWP::ERR_INVALID_THREAD;
@@ -2217,7 +2325,11 @@
      * by rejecting the method invocation request.  Without this, we will
      * be stuck waiting on a suspended thread.
      */
-    int suspend_count = targetThread->GetSuspendCount();
+    int suspend_count;
+    {
+      MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+      suspend_count = targetThread->GetSuspendCount();
+    }
     if (suspend_count > 1) {
       LOG(ERROR) << *targetThread << " suspend count too deep for method invocation: " << suspend_count;
       return JDWP::ERR_THREAD_SUSPENDED; // Probably not expected here.
@@ -2287,7 +2399,7 @@
      * run out of memory.  It's also a good idea to change it before locking
      * the invokeReq mutex, although that should never be held for long.
      */
-    ScopedThreadStateChange tsc(Thread::Current(), kVmWait);
+    self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSend);
 
     VLOG(jdwp) << "    Transferring control to event thread";
     {
@@ -2295,7 +2407,7 @@
 
       if ((options & JDWP::INVOKE_SINGLE_THREADED) == 0) {
         VLOG(jdwp) << "      Resuming all threads";
-        thread_list->ResumeAll(true);
+        thread_list->UndoDebuggerSuspensions();
       } else {
         VLOG(jdwp) << "      Resuming event thread only";
         thread_list->Resume(targetThread, true);
@@ -2309,8 +2421,8 @@
     VLOG(jdwp) << "    Control has returned from event thread";
 
     /* wait for thread to re-suspend itself */
-    targetThread->WaitUntilSuspended();
-    //dvmWaitForSuspend(targetThread);
+    SuspendThread(threadId, false /* request_suspension */ );
+    self->TransitionFromSuspendedToRunnable();
   }
 
   /*
@@ -2321,8 +2433,10 @@
    * so we want to resume the target thread once to keep the books straight.
    */
   if ((options & JDWP::INVOKE_SINGLE_THREADED) == 0) {
+    self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension);
     VLOG(jdwp) << "      Suspending all threads";
-    thread_list->SuspendAll(true);
+    thread_list->SuspendAllForDebugger();
+    self->TransitionFromSuspendedToRunnable();
     VLOG(jdwp) << "      Resuming event thread to balance the count";
     thread_list->Resume(targetThread, true);
   }
@@ -2339,12 +2453,12 @@
 }
 
 void Dbg::ExecuteMethod(DebugInvokeReq* pReq) {
-  ScopedJniThreadState ts(Thread::Current());
+  ScopedObjectAccess soa(Thread::Current());
 
   // We can be called while an exception is pending. We need
   // to preserve that across the method invocation.
-  SirtRef<Throwable> old_exception(ts.Self()->GetException());
-  ts.Self()->ClearException();
+  SirtRef<Throwable> old_exception(soa.Self()->GetException());
+  soa.Self()->ClearException();
 
   // Translate the method through the vtable, unless the debugger wants to suppress it.
   Method* m = pReq->method_;
@@ -2360,15 +2474,17 @@
 
   CHECK_EQ(sizeof(jvalue), sizeof(uint64_t));
 
-  LOG(INFO) << "self=" << ts.Self() << " pReq->receiver_=" << pReq->receiver_ << " m=" << m << " #" << pReq->arg_count_ << " " << pReq->arg_values_;
-  pReq->result_value = InvokeWithJValues(ts, pReq->receiver_, m, reinterpret_cast<JValue*>(pReq->arg_values_));
+  LOG(INFO) << "self=" << soa.Self() << " pReq->receiver_=" << pReq->receiver_ << " m=" << m
+      << " #" << pReq->arg_count_ << " " << pReq->arg_values_;
+  pReq->result_value = InvokeWithJValues(soa, pReq->receiver_, m,
+                                         reinterpret_cast<JValue*>(pReq->arg_values_));
 
-  pReq->exception = gRegistry->Add(ts.Self()->GetException());
+  pReq->exception = gRegistry->Add(soa.Self()->GetException());
   pReq->result_tag = BasicTagFromDescriptor(MethodHelper(m).GetShorty());
   if (pReq->exception != 0) {
-    Object* exc = ts.Self()->GetException();
+    Object* exc = soa.Self()->GetException();
     VLOG(jdwp) << "  JDWP invocation returning with exception=" << exc << " " << PrettyTypeOf(exc);
-    ts.Self()->ClearException();
+    soa.Self()->ClearException();
     pReq->result_value.SetJ(0);
   } else if (pReq->result_tag == JDWP::JT_OBJECT) {
     /* if no exception thrown, examine object result more closely */
@@ -2391,7 +2507,7 @@
   }
 
   if (old_exception.get() != NULL) {
-    ts.Self()->SetException(old_exception.get());
+    soa.Self()->SetException(old_exception.get());
   }
 }
 
@@ -2507,9 +2623,12 @@
   VLOG(jdwp) << "Broadcasting DDM " << (connect ? "connect" : "disconnect") << "...";
 
   Thread* self = Thread::Current();
-  if (self->GetState() != kRunnable) {
-    LOG(ERROR) << "DDM broadcast in thread state " << self->GetState();
-    /* try anyway? */
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    if (self->GetState() != kRunnable) {
+      LOG(ERROR) << "DDM broadcast in thread state " << self->GetState();
+      /* try anyway? */
+    }
   }
 
   JNIEnv* env = self->GetJniEnv();
@@ -2550,8 +2669,8 @@
     Dbg::DdmSendChunk(CHUNK_TYPE("THDE"), 4, buf);
   } else {
     CHECK(type == CHUNK_TYPE("THCR") || type == CHUNK_TYPE("THNM")) << type;
-    ScopedJniThreadState ts(Thread::Current());
-    SirtRef<String> name(t->GetThreadName(ts));
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    SirtRef<String> name(t->GetThreadName(soa));
     size_t char_count = (name.get() != NULL) ? name->GetLength() : 0;
     const jchar* chars = name->GetCharArray()->GetData();
 
@@ -2563,19 +2682,27 @@
   }
 }
 
-static void DdmSendThreadStartCallback(Thread* t, void*) {
-  Dbg::DdmSendThreadNotification(t, CHUNK_TYPE("THCR"));
-}
-
 void Dbg::DdmSetThreadNotification(bool enable) {
-  // We lock the thread list to avoid sending duplicate events or missing
-  // a thread change. We should be okay holding this lock while sending
-  // the messages out. (We have to hold it while accessing a live thread.)
-  ScopedThreadListLock thread_list_lock;
-
+  // Enable/disable thread notifications.
   gDdmThreadNotification = enable;
   if (enable) {
-    Runtime::Current()->GetThreadList()->ForEach(DdmSendThreadStartCallback, NULL);
+    // Suspend the VM then post thread start notifications for all threads. Threads attaching will
+    // see a suspension in progress and block until that ends. They then post their own start
+    // notification.
+    SuspendVM();
+    std::list<Thread*> threads;
+    {
+      MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+      threads = Runtime::Current()->GetThreadList()->GetList();
+    }
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      typedef std::list<Thread*>::const_iterator It; // TODO: C++0x auto
+      for (It it = threads.begin(), end = threads.end(); it != end; ++it) {
+        Dbg::DdmSendThreadNotification(*it, CHUNK_TYPE("THCR"));
+      }
+    }
+    ResumeVM();
   }
 }
 
@@ -2758,7 +2885,7 @@
     needHeader_ = false;
   }
 
-  void Flush() {
+  void Flush() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     // Patch the "length of piece" field.
     CHECK_LE(&buf_[0], pieceLenField_);
     CHECK_LE(pieceLenField_, p_);
@@ -2768,7 +2895,8 @@
     Reset();
   }
 
-  static void HeapChunkCallback(void* start, void* end, size_t used_bytes, void* arg) {
+  static void HeapChunkCallback(void* start, void* end, size_t used_bytes, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     reinterpret_cast<HeapChunkContext*>(arg)->HeapChunkCallback(start, end, used_bytes);
   }
 
@@ -2782,7 +2910,8 @@
     pieceLenField_ = NULL;
   }
 
-  void HeapChunkCallback(void* start, void* /*end*/, size_t used_bytes) {
+  void HeapChunkCallback(void* start, void* /*end*/, size_t used_bytes)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     // Note: heap call backs cannot manipulate the heap upon which they are crawling, care is taken
     // in the following code not to allocate memory, by ensuring buf_ is of the correct size
 
@@ -2834,10 +2963,17 @@
 
     // If we're looking at the native heap, we'll just return
     // (SOLIDITY_HARD, KIND_NATIVE) for all allocated chunks.
-    if (is_native_heap || !Runtime::Current()->GetHeap()->IsLiveObjectLocked(o)) {
+    if (is_native_heap) {
       return HPSG_STATE(SOLIDITY_HARD, KIND_NATIVE);
     }
 
+    {
+      ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      if (!Runtime::Current()->GetHeap()->IsLiveObjectLocked(o)) {
+        return HPSG_STATE(SOLIDITY_HARD, KIND_NATIVE);
+      }
+    }
+
     Class* c = o->GetClass();
     if (c == NULL) {
       // The object was probably just created but hasn't been initialized yet.
@@ -2942,9 +3078,12 @@
 struct AllocRecordStackVisitor : public StackVisitor {
   AllocRecordStackVisitor(const ManagedStack* stack,
                           const std::vector<TraceStackFrame>* trace_stack, AllocRecord* record)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : StackVisitor(stack, trace_stack, NULL), record(record), depth(0) {}
 
-  bool VisitFrame() {
+  // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses
+  // annotalysis.
+  bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS {
     if (depth >= kMaxAllocRecordStackDepth) {
       return false;
     }
@@ -3011,6 +3150,7 @@
 }
 
 void Dbg::DumpRecentAllocations() {
+  ScopedObjectAccess soa(Thread::Current());
   MutexLock mu(gAllocTrackerLock);
   if (recent_allocation_records_ == NULL) {
     LOG(INFO) << "Not recording tracked allocations";
diff --git a/src/debugger.h b/src/debugger.h
index 4c0c1b6..4db9bd7 100644
--- a/src/debugger.h
+++ b/src/debugger.h
@@ -38,7 +38,10 @@
  */
 struct DebugInvokeReq {
   DebugInvokeReq()
-      : invoke_needed_(false),
+      : ready(false), invoke_needed_(false),
+        receiver_(NULL), thread_(NULL), class_(NULL), method_(NULL),
+        arg_count_(0), arg_values_(NULL), options_(0), error(JDWP::ERR_NONE),
+        result_tag(JDWP::JT_VOID), exception(0),
         lock_("a DebugInvokeReq lock"),
         cond_("a DebugInvokeReq condition variable") {
   }
@@ -78,7 +81,7 @@
   static void StopJdwp();
 
   // Invoked by the GC in case we need to keep DDMS informed.
-  static void GcDidFinish();
+  static void GcDidFinish() LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
   // Return the DebugInvokeReq for the current thread.
   static DebugInvokeReq* GetInvokeReq();
@@ -111,15 +114,6 @@
    */
   static int64_t LastDebuggerActivity();
 
-  /*
-   * Block/allow GC depending on what we're doing.  These return the old
-   * status, which can be fed to ThreadContinuing() to restore the previous
-   * mode.
-   */
-  static int ThreadRunning();
-  static int ThreadWaiting();
-  static int ThreadContinuing(int status);
-
   static void UndoDebuggerSuspensions();
 
   static void Exit(int status);
@@ -129,84 +123,154 @@
   /*
    * Class, Object, Array
    */
-  static std::string GetClassName(JDWP::RefTypeId id);
-  static JDWP::JdwpError GetClassObject(JDWP::RefTypeId id, JDWP::ObjectId& classObjectId);
-  static JDWP::JdwpError GetSuperclass(JDWP::RefTypeId id, JDWP::RefTypeId& superclassId);
-  static JDWP::JdwpError GetClassLoader(JDWP::RefTypeId id, JDWP::ExpandBuf* pReply);
-  static JDWP::JdwpError GetModifiers(JDWP::RefTypeId id, JDWP::ExpandBuf* pReply);
-  static JDWP::JdwpError GetReflectedType(JDWP::RefTypeId classId, JDWP::ExpandBuf* pReply);
-  static void GetClassList(std::vector<JDWP::RefTypeId>& classes);
-  static JDWP::JdwpError GetClassInfo(JDWP::RefTypeId classId, JDWP::JdwpTypeTag* pTypeTag, uint32_t* pStatus, std::string* pDescriptor);
-  static void FindLoadedClassBySignature(const char* descriptor, std::vector<JDWP::RefTypeId>& ids);
+  static std::string GetClassName(JDWP::RefTypeId id)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError GetClassObject(JDWP::RefTypeId id, JDWP::ObjectId& classObjectId)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError GetSuperclass(JDWP::RefTypeId id, JDWP::RefTypeId& superclassId)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError GetClassLoader(JDWP::RefTypeId id, JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError GetModifiers(JDWP::RefTypeId id, JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError GetReflectedType(JDWP::RefTypeId classId, JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void GetClassList(std::vector<JDWP::RefTypeId>& classes)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError GetClassInfo(JDWP::RefTypeId classId, JDWP::JdwpTypeTag* pTypeTag,
+                                      uint32_t* pStatus, std::string* pDescriptor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void FindLoadedClassBySignature(const char* descriptor, std::vector<JDWP::RefTypeId>& ids)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static JDWP::JdwpError GetReferenceType(JDWP::ObjectId objectId, JDWP::ExpandBuf* pReply);
-  static JDWP::JdwpError GetSignature(JDWP::RefTypeId refTypeId, std::string& signature);
-  static JDWP::JdwpError GetSourceFile(JDWP::RefTypeId refTypeId, std::string& source_file);
-  static JDWP::JdwpError GetObjectTag(JDWP::ObjectId objectId, uint8_t& tag);
+  static JDWP::JdwpError GetSignature(JDWP::RefTypeId refTypeId, std::string& signature)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError GetSourceFile(JDWP::RefTypeId refTypeId, std::string& source_file)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError GetObjectTag(JDWP::ObjectId objectId, uint8_t& tag)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static size_t GetTagWidth(JDWP::JdwpTag tag);
 
-  static JDWP::JdwpError GetArrayLength(JDWP::ObjectId arrayId, int& length);
-  static JDWP::JdwpError OutputArray(JDWP::ObjectId arrayId, int firstIndex, int count, JDWP::ExpandBuf* pReply);
-  static JDWP::JdwpError SetArrayElements(JDWP::ObjectId arrayId, int firstIndex, int count, const uint8_t* buf);
+  static JDWP::JdwpError GetArrayLength(JDWP::ObjectId arrayId, int& length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError OutputArray(JDWP::ObjectId arrayId, int firstIndex, int count,
+                                     JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError SetArrayElements(JDWP::ObjectId arrayId, int firstIndex, int count,
+                                          const uint8_t* buf)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static JDWP::ObjectId CreateString(const std::string& str);
-  static JDWP::JdwpError CreateObject(JDWP::RefTypeId classId, JDWP::ObjectId& new_object);
-  static JDWP::JdwpError CreateArrayObject(JDWP::RefTypeId arrayTypeId, uint32_t length, JDWP::ObjectId& new_array);
+  static JDWP::ObjectId CreateString(const std::string& str)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError CreateObject(JDWP::RefTypeId classId, JDWP::ObjectId& new_object)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError CreateArrayObject(JDWP::RefTypeId arrayTypeId, uint32_t length,
+                                           JDWP::ObjectId& new_array)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static bool MatchType(JDWP::RefTypeId instClassId, JDWP::RefTypeId classId);
+  static bool MatchType(JDWP::RefTypeId instClassId, JDWP::RefTypeId classId)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Method and Field
    */
-  static std::string GetMethodName(JDWP::RefTypeId refTypeId, JDWP::MethodId id);
-  static JDWP::JdwpError OutputDeclaredFields(JDWP::RefTypeId refTypeId, bool withGeneric, JDWP::ExpandBuf* pReply);
-  static JDWP::JdwpError OutputDeclaredMethods(JDWP::RefTypeId refTypeId, bool withGeneric, JDWP::ExpandBuf* pReply);
-  static JDWP::JdwpError OutputDeclaredInterfaces(JDWP::RefTypeId refTypeId, JDWP::ExpandBuf* pReply);
-  static void OutputLineTable(JDWP::RefTypeId refTypeId, JDWP::MethodId methodId, JDWP::ExpandBuf* pReply);
-  static void OutputVariableTable(JDWP::RefTypeId refTypeId, JDWP::MethodId id, bool withGeneric, JDWP::ExpandBuf* pReply);
+  static std::string GetMethodName(JDWP::RefTypeId refTypeId, JDWP::MethodId id)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError OutputDeclaredFields(JDWP::RefTypeId refTypeId, bool withGeneric,
+                                              JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError OutputDeclaredMethods(JDWP::RefTypeId refTypeId, bool withGeneric,
+                                               JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError OutputDeclaredInterfaces(JDWP::RefTypeId refTypeId,
+                                                  JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void OutputLineTable(JDWP::RefTypeId refTypeId, JDWP::MethodId methodId,
+                              JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void OutputVariableTable(JDWP::RefTypeId refTypeId, JDWP::MethodId id, bool withGeneric,
+                                  JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static JDWP::JdwpTag GetFieldBasicTag(JDWP::FieldId fieldId);
-  static JDWP::JdwpTag GetStaticFieldBasicTag(JDWP::FieldId fieldId);
-  static JDWP::JdwpError GetFieldValue(JDWP::ObjectId objectId, JDWP::FieldId fieldId, JDWP::ExpandBuf* pReply);
-  static JDWP::JdwpError SetFieldValue(JDWP::ObjectId objectId, JDWP::FieldId fieldId, uint64_t value, int width);
-  static JDWP::JdwpError GetStaticFieldValue(JDWP::RefTypeId refTypeId, JDWP::FieldId fieldId, JDWP::ExpandBuf* pReply);
-  static JDWP::JdwpError SetStaticFieldValue(JDWP::FieldId fieldId, uint64_t value, int width);
+  static JDWP::JdwpTag GetFieldBasicTag(JDWP::FieldId fieldId)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpTag GetStaticFieldBasicTag(JDWP::FieldId fieldId)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);;
+  static JDWP::JdwpError GetFieldValue(JDWP::ObjectId objectId, JDWP::FieldId fieldId,
+                                       JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError SetFieldValue(JDWP::ObjectId objectId, JDWP::FieldId fieldId,
+                                       uint64_t value, int width)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError GetStaticFieldValue(JDWP::RefTypeId refTypeId, JDWP::FieldId fieldId,
+                                             JDWP::ExpandBuf* pReply)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError SetStaticFieldValue(JDWP::FieldId fieldId, uint64_t value, int width)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static std::string StringToUtf8(JDWP::ObjectId strId);
+  static std::string StringToUtf8(JDWP::ObjectId strId)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Thread, ThreadGroup, Frame
    */
-  static bool GetThreadName(JDWP::ObjectId threadId, std::string& name);
+  static bool GetThreadName(JDWP::ObjectId threadId, std::string& name)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_);
   static JDWP::JdwpError GetThreadGroup(JDWP::ObjectId threadId, JDWP::ExpandBuf* pReply);
   static std::string GetThreadGroupName(JDWP::ObjectId threadGroupId);
-  static JDWP::ObjectId GetThreadGroupParent(JDWP::ObjectId threadGroupId);
-  static JDWP::ObjectId GetSystemThreadGroupId();
+  static JDWP::ObjectId GetThreadGroupParent(JDWP::ObjectId threadGroupId)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::ObjectId GetSystemThreadGroupId()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static JDWP::ObjectId GetMainThreadGroupId();
 
   static bool GetThreadStatus(JDWP::ObjectId threadId, JDWP::JdwpThreadStatus* pThreadStatus, JDWP::JdwpSuspendStatus* pSuspendStatus);
-  static JDWP::JdwpError GetThreadSuspendCount(JDWP::ObjectId threadId, JDWP::ExpandBuf* pReply);
+  static JDWP::JdwpError GetThreadDebugSuspendCount(JDWP::ObjectId threadId, JDWP::ExpandBuf* pReply);
   static bool ThreadExists(JDWP::ObjectId threadId);
   static bool IsSuspended(JDWP::ObjectId threadId);
   //static void WaitForSuspend(JDWP::ObjectId threadId);
 
   // Fills 'thread_ids' with the threads in the given thread group. If thread_group_id == 0,
   // returns all threads.
-  static void GetThreads(JDWP::ObjectId thread_group_id, std::vector<JDWP::ObjectId>& thread_ids);
+  static void GetThreads(JDWP::ObjectId thread_group_id, std::vector<JDWP::ObjectId>& thread_ids)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static void GetChildThreadGroups(JDWP::ObjectId thread_group_id, std::vector<JDWP::ObjectId>& child_thread_group_ids);
 
   static int GetThreadFrameCount(JDWP::ObjectId threadId);
-  static JDWP::JdwpError GetThreadFrames(JDWP::ObjectId thread_id, size_t start_frame, size_t frame_count, JDWP::ExpandBuf* buf);
+  static JDWP::JdwpError GetThreadFrames(JDWP::ObjectId thread_id, size_t start_frame,
+                                         size_t frame_count, JDWP::ExpandBuf* buf)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static JDWP::ObjectId GetThreadSelfId();
-  static void SuspendVM();
+  static JDWP::ObjectId GetThreadSelfId()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void SuspendVM()
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
   static void ResumeVM();
-  static void SuspendThread(JDWP::ObjectId threadId);
-  static void ResumeThread(JDWP::ObjectId threadId);
+  static JDWP::JdwpError SuspendThread(JDWP::ObjectId threadId, bool request_suspension = true)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_,
+                     GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
+
+  static void ResumeThread(JDWP::ObjectId threadId)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static void SuspendSelf();
 
-  static JDWP::JdwpError GetThisObject(JDWP::ObjectId thread_id, JDWP::FrameId frame_id, JDWP::ObjectId* result);
-  static void GetLocalValue(JDWP::ObjectId threadId, JDWP::FrameId frameId, int slot, JDWP::JdwpTag tag, uint8_t* buf, size_t expectedLen);
-  static void SetLocalValue(JDWP::ObjectId threadId, JDWP::FrameId frameId, int slot, JDWP::JdwpTag tag, uint64_t value, size_t width);
+  static JDWP::JdwpError GetThisObject(JDWP::ObjectId thread_id, JDWP::FrameId frame_id,
+                                       JDWP::ObjectId* result)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void GetLocalValue(JDWP::ObjectId threadId, JDWP::FrameId frameId, int slot,
+                            JDWP::JdwpTag tag, uint8_t* buf, size_t expectedLen)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void SetLocalValue(JDWP::ObjectId threadId, JDWP::FrameId frameId, int slot,
+                            JDWP::JdwpTag tag, uint64_t value, size_t width)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Debugger notification
@@ -217,20 +281,41 @@
     kMethodEntry    = 0x04,
     kMethodExit     = 0x08,
   };
-  static void PostLocationEvent(const Method* method, int pcOffset, Object* thisPtr, int eventFlags);
-  static void PostException(Thread* thread, JDWP::FrameId throw_frame_id, Method* throw_method, uint32_t throw_dex_pc, Method* catch_method, uint32_t catch_dex_pc, Throwable* exception);
-  static void PostThreadStart(Thread* t);
-  static void PostThreadDeath(Thread* t);
-  static void PostClassPrepare(Class* c);
+  static void PostLocationEvent(const Method* method, int pcOffset, Object* thisPtr, int eventFlags)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void PostException(Thread* thread, JDWP::FrameId throw_frame_id, Method* throw_method,
+                            uint32_t throw_dex_pc, Method* catch_method, uint32_t catch_dex_pc,
+                            Throwable* exception)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void PostThreadStart(Thread* t)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void PostThreadDeath(Thread* t)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void PostClassPrepare(Class* c)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static void UpdateDebugger(int32_t dex_pc, Thread* self);
+  static void UpdateDebugger(int32_t dex_pc, Thread* self)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static void WatchLocation(const JDWP::JdwpLocation* pLoc);
-  static void UnwatchLocation(const JDWP::JdwpLocation* pLoc);
-  static JDWP::JdwpError ConfigureStep(JDWP::ObjectId threadId, JDWP::JdwpStepSize size, JDWP::JdwpStepDepth depth);
+  static void WatchLocation(const JDWP::JdwpLocation* pLoc)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void UnwatchLocation(const JDWP::JdwpLocation* pLoc)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static JDWP::JdwpError ConfigureStep(JDWP::ObjectId threadId, JDWP::JdwpStepSize size,
+                                       JDWP::JdwpStepDepth depth)
+      LOCKS_EXCLUDED(gBreakpointsLock)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static void UnconfigureStep(JDWP::ObjectId threadId);
 
-  static JDWP::JdwpError InvokeMethod(JDWP::ObjectId threadId, JDWP::ObjectId objectId, JDWP::RefTypeId classId, JDWP::MethodId methodId, uint32_t arg_count, uint64_t* arg_values, JDWP::JdwpTag* arg_types, uint32_t options, JDWP::JdwpTag* pResultTag, uint64_t* pResultValue, JDWP::ObjectId* pExceptObj);
+  static JDWP::JdwpError InvokeMethod(JDWP::ObjectId threadId, JDWP::ObjectId objectId,
+                                      JDWP::RefTypeId classId, JDWP::MethodId methodId,
+                                      uint32_t arg_count, uint64_t* arg_values,
+                                      JDWP::JdwpTag* arg_types, uint32_t options,
+                                      JDWP::JdwpTag* pResultTag, uint64_t* pResultValue,
+                                      JDWP::ObjectId* pExceptObj)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static void ExecuteMethod(DebugInvokeReq* pReq);
 
   /* perform "late registration" of an object ID */
@@ -239,22 +324,28 @@
   /*
    * DDM support.
    */
-  static void DdmSendThreadNotification(Thread* t, uint32_t type);
+  static void DdmSendThreadNotification(Thread* t, uint32_t type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static void DdmSetThreadNotification(bool enable);
   static bool DdmHandlePacket(const uint8_t* buf, int dataLen, uint8_t** pReplyBuf, int* pReplyLen);
-  static void DdmConnected();
-  static void DdmDisconnected();
-  static void DdmSendChunk(uint32_t type, const std::vector<uint8_t>& bytes);
-  static void DdmSendChunk(uint32_t type, size_t len, const uint8_t* buf);
-  static void DdmSendChunkV(uint32_t type, const iovec* iov, int iov_count);
+  static void DdmConnected() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void DdmDisconnected() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void DdmSendChunk(uint32_t type, const std::vector<uint8_t>& bytes)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void DdmSendChunk(uint32_t type, size_t len, const uint8_t* buf)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void DdmSendChunkV(uint32_t type, const iovec* iov, int iov_count)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Recent allocation tracking support.
    */
-  static void RecordAllocation(Class* type, size_t byte_count);
+  static void RecordAllocation(Class* type, size_t byte_count)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static void SetAllocTrackingEnabled(bool enabled);
   static inline bool IsAllocTrackingEnabled() { return recent_allocation_records_ != NULL; }
-  static jbyteArray GetRecentAllocations();
+  static jbyteArray GetRecentAllocations()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static void DumpRecentAllocations();
 
   enum HpifWhen {
@@ -263,7 +354,8 @@
     HPIF_WHEN_NEXT_GC = 2,
     HPIF_WHEN_EVERY_GC = 3
   };
-  static int DdmHandleHpifChunk(HpifWhen when);
+  static int DdmHandleHpifChunk(HpifWhen when)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   enum HpsgWhen {
     HPSG_WHEN_NEVER = 0,
@@ -275,12 +367,15 @@
   };
   static bool DdmHandleHpsgNhsgChunk(HpsgWhen when, HpsgWhat what, bool native);
 
-  static void DdmSendHeapInfo(HpifWhen reason);
-  static void DdmSendHeapSegments(bool native);
+  static void DdmSendHeapInfo(HpifWhen reason)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void DdmSendHeapSegments(bool native)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
  private:
-  static void DdmBroadcast(bool);
-  static void PostThreadStartOrStop(Thread*, uint32_t);
+  static void DdmBroadcast(bool) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void PostThreadStartOrStop(Thread*, uint32_t)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static AllocRecord* recent_allocation_records_;
 };
diff --git a/src/dex2oat.cc b/src/dex2oat.cc
index 4587b30..b68a75b 100644
--- a/src/dex2oat.cc
+++ b/src/dex2oat.cc
@@ -33,9 +33,12 @@
 #include "object_utils.h"
 #include "os.h"
 #include "runtime.h"
+#include "ScopedLocalRef.h"
+#include "scoped_thread_state_change.h"
 #include "stl_util.h"
 #include "stringpiece.h"
 #include "timing_logger.h"
+#include "well_known_classes.h"
 #include "zip_archive.h"
 
 namespace art {
@@ -118,13 +121,15 @@
 
 class Dex2Oat {
  public:
-  static Dex2Oat* Create(Runtime::Options& options, InstructionSet instruction_set,
-                         size_t thread_count, bool support_debugging) {
-    UniquePtr<Runtime> runtime(CreateRuntime(options, instruction_set));
-    if (runtime.get() == NULL) {
-      return NULL;
+  static bool Create(Dex2Oat** p_dex2oat, Runtime::Options& options, InstructionSet instruction_set,
+                         size_t thread_count, bool support_debugging)
+      SHARED_TRYLOCK_FUNCTION(true, GlobalSynchronization::mutator_lock_) {
+    if (!CreateRuntime(options, instruction_set)) {
+      *p_dex2oat = NULL;
+      return false;
     }
-    return new Dex2Oat(runtime.release(), instruction_set, thread_count, support_debugging);
+    *p_dex2oat = new Dex2Oat(Runtime::Current(), instruction_set, thread_count, support_debugging);
+    return true;
   }
 
   ~Dex2Oat() {
@@ -133,7 +138,8 @@
   }
 
   // Make a list of descriptors for classes to include in the image
-  const std::set<std::string>* GetImageClassDescriptors(const char* image_classes_filename) {
+  const std::set<std::string>* GetImageClassDescriptors(const char* image_classes_filename)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     UniquePtr<std::ifstream> image_classes_file(new std::ifstream(image_classes_filename, std::ifstream::in));
     if (image_classes_file.get() == NULL) {
       LOG(ERROR) << "Failed to open image classes file " << image_classes_filename;
@@ -206,14 +212,10 @@
                                 bool image,
                                 const std::set<std::string>* image_classes,
                                 bool dump_stats,
-                                bool dump_timings) {
+                                bool dump_timings)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     // SirtRef and ClassLoader creation needs to come after Runtime::Create
-    UniquePtr<SirtRef<ClassLoader> > class_loader(new SirtRef<ClassLoader>(NULL));
-    if (class_loader.get() == NULL) {
-      LOG(ERROR) << "Failed to create SirtRef for class loader";
-      return NULL;
-    }
-
+    jobject class_loader = NULL;
     if (!boot_image_option.empty()) {
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
       std::vector<const DexFile*> class_path_files(dex_files);
@@ -221,7 +223,12 @@
       for (size_t i = 0; i < class_path_files.size(); i++) {
         class_linker->RegisterDexFile(*class_path_files[i]);
       }
-      class_loader.get()->reset(PathClassLoader::AllocCompileTime(class_path_files));
+      ScopedObjectAccessUnchecked soa(Thread::Current());
+      soa.Env()->AllocObject(WellKnownClasses::dalvik_system_PathClassLoader);
+      ScopedLocalRef<jobject> class_loader_local(soa.Env(),
+          soa.Env()->AllocObject(WellKnownClasses::dalvik_system_PathClassLoader));
+      class_loader = soa.Env()->NewGlobalRef(class_loader_local.get());
+      Runtime::Current()->SetCompileTimeClassPath(class_loader, class_path_files);
     }
 
     UniquePtr<Compiler> compiler(new Compiler(instruction_set_,
@@ -236,7 +243,11 @@
     compiler->SetBitcodeFileName(bitcode_filename);
 #endif
 
-    compiler->CompileAll(class_loader->get(), dex_files);
+    Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+
+    compiler->CompileAll(class_loader, dex_files);
+
+    Thread::Current()->TransitionFromSuspendedToRunnable();
 
     std::string image_file_location;
     uint32_t image_file_location_checksum = 0;
@@ -251,7 +262,7 @@
     }
 
     if (!OatWriter::Create(oat_file,
-                           class_loader->get(),
+                           class_loader,
                            dex_files,
                            image_file_location_checksum,
                            image_file_location,
@@ -267,7 +278,8 @@
                        const std::set<std::string>* image_classes,
                        const std::string& oat_filename,
                        const std::string& oat_location,
-                       const Compiler& compiler) {
+                       const Compiler& compiler)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
     ImageWriter image_writer(image_classes);
     if (!image_writer.Write(image_filename, image_base, oat_filename, oat_location, compiler)) {
       LOG(ERROR) << "Failed to create image file " << image_filename;
@@ -286,13 +298,13 @@
         start_ns_(NanoTime()) {
   }
 
-  static Runtime* CreateRuntime(Runtime::Options& options, InstructionSet instruction_set) {
-    Runtime* runtime = Runtime::Create(options, false);
-    if (runtime == NULL) {
+  static bool CreateRuntime(Runtime::Options& options, InstructionSet instruction_set)
+      SHARED_TRYLOCK_FUNCTION(true, GlobalSynchronization::mutator_lock_) {
+    if (!Runtime::Create(options, false)) {
       LOG(ERROR) << "Failed to create runtime";
-      return NULL;
+      return false;
     }
-
+    Runtime* runtime = Runtime::Current();
     // if we loaded an existing image, we will reuse values from the image roots.
     if (!runtime->HasJniDlsymLookupStub()) {
       runtime->SetJniDlsymLookupStub(Compiler::CreateJniDlsymLookupStub(instruction_set));
@@ -316,11 +328,12 @@
       }
     }
     runtime->GetClassLinker()->FixupDexCaches(runtime->GetResolutionMethod());
-    return runtime;
+    return true;
   }
 
   static void ResolveExceptionsForMethod(MethodHelper* mh,
-                           std::set<std::pair<uint16_t, const DexFile*> >& exceptions_to_resolve) {
+                           std::set<std::pair<uint16_t, const DexFile*> >& exceptions_to_resolve)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile::CodeItem* code_item = mh->GetCodeItem();
     if (code_item == NULL) {
       return;  // native or abstract method
@@ -355,7 +368,9 @@
       }
     }
   }
-  static bool ResolveCatchBlockExceptionsClassVisitor(Class* c, void* arg) {
+
+  static bool ResolveCatchBlockExceptionsClassVisitor(Class* c, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     std::set<std::pair<uint16_t, const DexFile*> >* exceptions_to_resolve =
         reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*> >*>(arg);
     MethodHelper mh;
@@ -371,7 +386,9 @@
     }
     return true;
   }
-  static bool RecordImageClassesVisitor(Class* klass, void* arg) {
+
+  static bool RecordImageClassesVisitor(Class* klass, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     std::set<std::string>* image_classes = reinterpret_cast<std::set<std::string>*>(arg);
     if (klass->IsArrayClass() || klass->IsPrimitive()) {
       return true;
@@ -684,8 +701,18 @@
     options.push_back(std::make_pair(runtime_args[i], reinterpret_cast<void*>(NULL)));
   }
 
-  UniquePtr<Dex2Oat> dex2oat(Dex2Oat::Create(options, instruction_set, thread_count,
-                                             support_debugging));
+  Dex2Oat* p_dex2oat;
+  if (!Dex2Oat::Create(&p_dex2oat, options, instruction_set, thread_count, support_debugging)) {
+    LOG(ERROR) << "Failed to create dex2oat";
+    return EXIT_FAILURE;
+  }
+  UniquePtr<Dex2Oat> dex2oat(p_dex2oat);
+  // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
+  // give it away now and then switch to a more managable ScopedObjectAccess.
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  // Whilst we're in native take the opportunity to initialize well known classes.
+  WellKnownClasses::InitClasses(Thread::Current()->GetJniEnv());
+  ScopedObjectAccess soa(Thread::Current());
 
   // If --image-classes was specified, calculate the full list of classes to include in the image
   UniquePtr<const std::set<std::string> > image_classes(NULL);
@@ -744,12 +771,15 @@
     return EXIT_SUCCESS;
   }
 
-  if (!dex2oat->CreateImageFile(image_filename,
-                                image_base,
-                                image_classes.get(),
-                                oat_filename,
-                                oat_location,
-                                *compiler.get())) {
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  bool image_creation_success = dex2oat->CreateImageFile(image_filename,
+                                                         image_base,
+                                                         image_classes.get(),
+                                                         oat_filename,
+                                                         oat_location,
+                                                         *compiler.get());
+  Thread::Current()->TransitionFromSuspendedToRunnable();
+  if (!image_creation_success) {
     return EXIT_FAILURE;
   }
 
diff --git a/src/dex_cache.h b/src/dex_cache.h
index f604701..b3c5ce6 100644
--- a/src/dex_cache.h
+++ b/src/dex_cache.h
@@ -38,11 +38,12 @@
             ObjectArray<Class>* types,
             ObjectArray<Method>* methods,
             ObjectArray<Field>* fields,
-            ObjectArray<StaticStorageBase>* initialized_static_storage);
+            ObjectArray<StaticStorageBase>* initialized_static_storage)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void Fixup(Method* trampoline);
+  void Fixup(Method* trampoline) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  String* GetLocation() const {
+  String* GetLocation() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return Get(kLocation)->AsString();
   }
 
@@ -61,43 +62,49 @@
                         kResolvedMethods * sizeof(Object*));
   }
 
-  size_t NumStrings() const {
+  size_t NumStrings() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetStrings()->GetLength();
   }
 
-  size_t NumResolvedTypes() const {
+  size_t NumResolvedTypes() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetResolvedTypes()->GetLength();
   }
 
-  size_t NumResolvedMethods() const {
+  size_t NumResolvedMethods() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetResolvedMethods()->GetLength();
   }
 
-  size_t NumResolvedFields() const {
+  size_t NumResolvedFields() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetResolvedFields()->GetLength();
   }
 
-  size_t NumInitializedStaticStorage() const {
+  size_t NumInitializedStaticStorage() const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetInitializedStaticStorage()->GetLength();
   }
 
-  String* GetResolvedString(uint32_t string_idx) const {
+  String* GetResolvedString(uint32_t string_idx) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetStrings()->Get(string_idx);
   }
 
-  void SetResolvedString(uint32_t string_idx, String* resolved) {
+  void SetResolvedString(uint32_t string_idx, String* resolved)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     GetStrings()->Set(string_idx, resolved);
   }
 
-  Class* GetResolvedType(uint32_t type_idx) const {
+  Class* GetResolvedType(uint32_t type_idx) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetResolvedTypes()->Get(type_idx);
   }
 
-  void SetResolvedType(uint32_t type_idx, Class* resolved) {
+  void SetResolvedType(uint32_t type_idx, Class* resolved)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     GetResolvedTypes()->Set(type_idx, resolved);
   }
 
-  Method* GetResolvedMethod(uint32_t method_idx) const {
+  Method* GetResolvedMethod(uint32_t method_idx) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method = GetResolvedMethods()->Get(method_idx);
     // Hide resolution trampoline methods from the caller
     if (method != NULL && method->GetDexMethodIndex() == DexFile::kDexNoIndex16) {
@@ -108,31 +115,39 @@
     }
   }
 
-  void SetResolvedMethod(uint32_t method_idx, Method* resolved) {
+  void SetResolvedMethod(uint32_t method_idx, Method* resolved)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     GetResolvedMethods()->Set(method_idx, resolved);
   }
 
-  Field* GetResolvedField(uint32_t field_idx) const {
+  Field* GetResolvedField(uint32_t field_idx) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetResolvedFields()->Get(field_idx);
   }
 
-  void SetResolvedField(uint32_t field_idx, Field* resolved) {
+  void SetResolvedField(uint32_t field_idx, Field* resolved)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     GetResolvedFields()->Set(field_idx, resolved);
   }
 
-  ObjectArray<String>* GetStrings() const {
+  ObjectArray<String>* GetStrings() const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return static_cast<ObjectArray<String>*>(GetNonNull(kStrings));
   }
-  ObjectArray<Class>* GetResolvedTypes() const {
+  ObjectArray<Class>* GetResolvedTypes() const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return static_cast<ObjectArray<Class>*>(GetNonNull(kResolvedTypes));
   }
-  ObjectArray<Method>* GetResolvedMethods() const {
+  ObjectArray<Method>* GetResolvedMethods() const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return static_cast<ObjectArray<Method>*>(GetNonNull(kResolvedMethods));
   }
-  ObjectArray<Field>* GetResolvedFields() const {
+  ObjectArray<Field>* GetResolvedFields() const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return static_cast<ObjectArray<Field>*>(GetNonNull(kResolvedFields));
   }
-  ObjectArray<StaticStorageBase>* GetInitializedStaticStorage() const {
+  ObjectArray<StaticStorageBase>* GetInitializedStaticStorage() const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return static_cast<ObjectArray<StaticStorageBase>*>(GetNonNull(kInitializedStaticStorage));
   }
 
@@ -151,7 +166,8 @@
     kMax                      = 6,
   };
 
-  Object* GetNonNull(ArrayIndex array_index) const {
+  Object* GetNonNull(ArrayIndex array_index) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Object* obj = Get(array_index);
     DCHECK(obj != NULL);
     return obj;
diff --git a/src/dex_cache_test.cc b/src/dex_cache_test.cc
index 6340d79..da02125 100644
--- a/src/dex_cache_test.cc
+++ b/src/dex_cache_test.cc
@@ -27,6 +27,7 @@
 class DexCacheTest : public CommonTest {};
 
 TEST_F(DexCacheTest, Open) {
+  ScopedObjectAccess soa(Thread::Current());
   SirtRef<DexCache> dex_cache(class_linker_->AllocDexCache(*java_lang_dex_file_));
   ASSERT_TRUE(dex_cache.get() != NULL);
 
diff --git a/src/dex_file.cc b/src/dex_file.cc
index 2b81e72..e763e67 100644
--- a/src/dex_file.cc
+++ b/src/dex_file.cc
@@ -220,18 +220,37 @@
   // the global reference table is otherwise empty!
 }
 
-jobject DexFile::GetDexObject(JNIEnv* env) const {
-  MutexLock mu(dex_object_lock_);
-  if (dex_object_ != NULL) {
-    return dex_object_;
+class ScopedJniMonitorLock {
+ public:
+  ScopedJniMonitorLock(JNIEnv* env, jobject locked) : env_(env), locked_(locked){
+    env->MonitorEnter(locked_);
   }
+  ~ScopedJniMonitorLock() {
+    env_->MonitorExit(locked_);
+  }
+ private:
+  JNIEnv* const env_;
+  const jobject locked_;
+};
 
+jobject DexFile::GetDexObject(JNIEnv* env) const {
+  {
+    ScopedJniMonitorLock lock(env, WellKnownClasses::com_android_dex_Dex);
+    if (dex_object_ != NULL) {
+      return dex_object_;
+    }
+  }
   void* address = const_cast<void*>(reinterpret_cast<const void*>(begin_));
   jobject byte_buffer = env->NewDirectByteBuffer(address, size_);
   if (byte_buffer == NULL) {
     return NULL;
   }
 
+  ScopedJniMonitorLock lock(env, WellKnownClasses::com_android_dex_Dex);
+  // Re-test to see if someone beat us to the creation when we had the lock released.
+  if (dex_object_ != NULL) {
+    return dex_object_;
+  }
   jvalue args[1];
   args[0].l = byte_buffer;
   jobject local = env->CallStaticObjectMethodA(WellKnownClasses::com_android_dex_Dex,
diff --git a/src/dex_file.h b/src/dex_file.h
index f1f76a8..5f33ef8 100644
--- a/src/dex_file.h
+++ b/src/dex_file.h
@@ -773,7 +773,8 @@
   // Returns -2 for native methods (as expected in exception traces).
   //
   // This is used by runtime; therefore use art::Method not art::DexFile::Method.
-  int32_t GetLineNumFromPC(const Method* method, uint32_t rel_pc) const;
+  int32_t GetLineNumFromPC(const Method* method, uint32_t rel_pc) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void DecodeDebugInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
                        DexDebugNewPositionCb position_cb, DexDebugNewLocalCb local_cb,
@@ -833,7 +834,6 @@
         location_(location),
         location_checksum_(location_checksum),
         mem_map_(mem_map),
-        dex_object_lock_("a dex_object_lock_"),
         dex_object_(NULL),
         header_(0),
         string_ids_(0),
@@ -892,8 +892,7 @@
   UniquePtr<MemMap> mem_map_;
 
   // A cached com.android.dex.Dex instance, possibly NULL. Use GetDexObject.
-  mutable Mutex dex_object_lock_;
-  mutable jobject dex_object_ GUARDED_BY(dex_object_lock_);
+  mutable jobject dex_object_;
 
   // Points to the header section.
   const Header* header_;
@@ -1105,9 +1104,11 @@
 class EncodedStaticFieldValueIterator {
  public:
   EncodedStaticFieldValueIterator(const DexFile& dex_file, DexCache* dex_cache,
-                                  ClassLinker* linker, const DexFile::ClassDef& class_def);
+                                  ClassLinker* linker, const DexFile::ClassDef& class_def)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void ReadValueToField(Field* field) const;
+  void ReadValueToField(Field* field) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool HasNext() { return pos_ < array_size_; }
 
diff --git a/src/exception_test.cc b/src/exception_test.cc
index 5c7e1a6..06ae7fb 100644
--- a/src/exception_test.cc
+++ b/src/exception_test.cc
@@ -19,7 +19,7 @@
 #include "dex_file.h"
 #include "gtest/gtest.h"
 #include "runtime.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "UniquePtr.h"
 
@@ -30,7 +30,8 @@
   virtual void SetUp() {
     CommonTest::SetUp();
 
-    SirtRef<ClassLoader> class_loader(LoadDex("ExceptionHandle"));
+    ScopedObjectAccess soa(Thread::Current());
+    SirtRef<ClassLoader> class_loader(soa.Decode<ClassLoader*>(LoadDex("ExceptionHandle")));
     my_klass_ = class_linker_->FindClass("LExceptionHandle;", class_loader.get());
     ASSERT_TRUE(my_klass_ != NULL);
     class_linker_->EnsureInitialized(my_klass_, false, true);
@@ -116,7 +117,11 @@
 }
 
 TEST_F(ExceptionTest, StackTraceElement) {
+  Thread::Current()->TransitionFromSuspendedToRunnable();
   runtime_->Start();
+  Thread* thread = Thread::Current();
+  JNIEnv* env = thread->GetJniEnv();
+  ScopedObjectAccess soa(env);
 
   std::vector<uintptr_t> fake_stack;
   ASSERT_EQ(kStackAlignment, 16);
@@ -149,7 +154,6 @@
   fake_stack.push_back(0);
 
   // Set up thread to appear as if we called out of method_g_ at pc dex 3
-  Thread* thread = Thread::Current();
   thread->SetTopOfStack(&fake_stack[0], method_g_->ToNativePC(dex_pc) + 2);  // return pc
 #else
   // Create/push fake 20-byte shadow frame for method g
@@ -171,14 +175,12 @@
   thread->PushShadowFrame(reinterpret_cast<ShadowFrame*>(&fake_stack[0]));
 #endif
 
-  JNIEnv* env = thread->GetJniEnv();
-  ScopedJniThreadState ts(env);
-  jobject internal = thread->CreateInternalStackTrace(ts);
+  jobject internal = thread->CreateInternalStackTrace(soa);
   ASSERT_TRUE(internal != NULL);
   jobjectArray ste_array = Thread::InternalStackTraceToStackTraceElementArray(env, internal);
   ASSERT_TRUE(ste_array != NULL);
   ObjectArray<StackTraceElement>* trace_array =
-      ts.Decode<ObjectArray<StackTraceElement>*>(ste_array);
+      soa.Decode<ObjectArray<StackTraceElement>*>(ste_array);
 
   ASSERT_TRUE(trace_array != NULL);
   ASSERT_TRUE(trace_array->Get(0) != NULL);
diff --git a/src/heap.cc b/src/heap.cc
index 626adf9..658755e 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -31,10 +31,8 @@
 #include "object.h"
 #include "object_utils.h"
 #include "os.h"
-#include "scoped_heap_lock.h"
-#include "scoped_jni_thread_state.h"
-#include "scoped_thread_list_lock_releaser.h"
 #include "ScopedLocalRef.h"
+#include "scoped_thread_state_change.h"
 #include "space.h"
 #include "stl_util.h"
 #include "thread_list.h"
@@ -136,26 +134,26 @@
 }
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t capacity,
-           const std::string& original_image_file_name)
-    : lock_(NULL),
-      alloc_space_(NULL),
+           const std::string& original_image_file_name, bool concurrent_gc)
+    : alloc_space_(NULL),
       card_table_(NULL),
+      concurrent_gc_(concurrent_gc),
+      have_zygote_space_(false),
       card_marking_disabled_(false),
       is_gc_running_(false),
       concurrent_start_bytes_(std::numeric_limits<size_t>::max()),
       concurrent_start_size_(128 * KB),
       concurrent_min_free_(256 * KB),
-      try_running_gc_(false),
-      requesting_gc_(false),
       num_bytes_allocated_(0),
       num_objects_allocated_(0),
       last_trim_time_(0),
+      try_running_gc_(false),
+      requesting_gc_(false),
       reference_referent_offset_(0),
       reference_queue_offset_(0),
       reference_queueNext_offset_(0),
       reference_pendingNext_offset_(0),
       finalizer_reference_zombie_offset_(0),
-      have_zygote_space_(false),
       target_utilization_(0.5),
       verify_objects_(false) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
@@ -246,8 +244,9 @@
   // It's still too early to take a lock because there are no threads yet,
   // but we can create the heap lock now. We don't create it earlier to
   // make it clear that you can't use locks during heap initialization.
-  lock_.reset(new Mutex("Heap lock", kHeapLock));
-  condition_.reset(new ConditionVariable("Heap condition variable"));
+  statistics_lock_ = new Mutex("statistics lock");
+  gc_complete_lock_ =  new Mutex("GC complete lock");
+  gc_complete_cond_.reset(new ConditionVariable("GC complete condition variable"));
 
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() exiting";
@@ -263,6 +262,7 @@
 };
 
 void Heap::AddSpace(Space* space) {
+  WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
   DCHECK(space != NULL);
   DCHECK(space->GetLiveBitmap() != NULL);
   live_bitmap_->AddSpaceBitmap(space->GetLiveBitmap());
@@ -280,6 +280,9 @@
   // all daemon threads are suspended, and we also know that the threads list have been deleted, so
   // those threads can't resume. We're the only running thread, and we can do whatever we like...
   STLDeleteElements(&spaces_);
+  delete statistics_lock_;
+  delete gc_complete_lock_;
+
 }
 
 Space* Heap::FindSpaceFromObject(const Object* obj) const {
@@ -326,38 +329,39 @@
   int64_t total_bytes_free;
   size_t max_contiguous_allocation;
 
-  {
-    ScopedHeapLock heap_lock;
-    DCHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(Class)) ||
-           (c->IsVariableSize() || c->GetObjectSize() == byte_count) ||
-           strlen(ClassHelper(c).GetDescriptor()) == 0);
-    DCHECK_GE(byte_count, sizeof(Object));
-    Object* obj = AllocateLocked(byte_count);
-    if (obj != NULL) {
-      obj->SetClass(c);
-      if (Dbg::IsAllocTrackingEnabled()) {
-        Dbg::RecordAllocation(c, byte_count);
-      }
-
-      if (!is_gc_running_ && num_bytes_allocated_ >= concurrent_start_bytes_) {
-        // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
-        SirtRef<Object> ref(obj);
-        RequestConcurrentGC();
-      }
-      VerifyObject(obj);
-
-      // Additional verification to ensure that we did not allocate into a zygote space.
-      DCHECK(!have_zygote_space_ || !FindSpaceFromObject(obj)->IsZygoteSpace());
-
-      return obj;
+  DCHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(Class)) ||
+         (c->IsVariableSize() || c->GetObjectSize() == byte_count) ||
+         strlen(ClassHelper(c).GetDescriptor()) == 0);
+  DCHECK_GE(byte_count, sizeof(Object));
+  Object* obj = Allocate(byte_count);
+  if (obj != NULL) {
+    obj->SetClass(c);
+    if (Dbg::IsAllocTrackingEnabled()) {
+      Dbg::RecordAllocation(c, byte_count);
     }
-    total_bytes_free = GetFreeMemory();
-    max_contiguous_allocation = 0;
-    // TODO: C++0x auto
-    for (Spaces::const_iterator cur = spaces_.begin(); cur != spaces_.end(); ++cur) {
-      if ((*cur)->IsAllocSpace()) {
-        (*cur)->AsAllocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
-      }
+    bool request_concurrent_gc;
+    {
+      MutexLock mu(*statistics_lock_);
+      request_concurrent_gc = num_bytes_allocated_ >= concurrent_start_bytes_;
+    }
+    if (request_concurrent_gc) {
+      // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
+      SirtRef<Object> ref(obj);
+      RequestConcurrentGC();
+    }
+    VerifyObject(obj);
+
+    // Additional verification to ensure that we did not allocate into a zygote space.
+    DCHECK(!have_zygote_space_ || !FindSpaceFromObject(obj)->IsZygoteSpace());
+
+    return obj;
+  }
+  total_bytes_free = GetFreeMemory();
+  max_contiguous_allocation = 0;
+  // TODO: C++0x auto
+  for (Spaces::const_iterator cur = spaces_.begin(); cur != spaces_.end(); ++cur) {
+    if ((*cur)->IsAllocSpace()) {
+      (*cur)->AsAllocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
     }
   }
 
@@ -387,7 +391,7 @@
 }
 
 bool Heap::IsLiveObjectLocked(const Object* obj) {
-  lock_->AssertHeld();
+  GlobalSynchronization::heap_bitmap_lock_->AssertReaderHeld();
   return IsHeapAddress(obj) && GetLiveBitmap()->Test(obj);
 }
 
@@ -399,7 +403,7 @@
     return;
   }
   {
-    ScopedHeapLock heap_lock;
+    ReaderMutexLock mu(GlobalSynchronization::heap_bitmap_lock_);
     Heap::VerifyObjectLocked(obj);
   }
 }
@@ -413,7 +417,7 @@
 }
 
 void Heap::VerifyObjectLocked(const Object* obj) {
-  lock_->AssertHeld();
+  GlobalSynchronization::heap_bitmap_lock_->AssertReaderHeld();
   if (!IsAligned<kObjectAlignment>(obj)) {
     LOG(FATAL) << "Object isn't aligned: " << obj;
   } else if (!GetLiveBitmap()->Test(obj)) {
@@ -455,35 +459,35 @@
 }
 
 void Heap::VerifyHeap() {
-  ScopedHeapLock heap_lock;
+  ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-void Heap::RecordAllocationLocked(AllocSpace* space, const Object* obj) {
-#ifndef NDEBUG
-  if (Runtime::Current()->IsStarted()) {
-    lock_->AssertHeld();
-  }
-#endif
-  size_t size = space->AllocationSize(obj);
-  DCHECK_GT(size, 0u);
-  num_bytes_allocated_ += size;
-  num_objects_allocated_ += 1;
+void Heap::RecordAllocation(AllocSpace* space, const Object* obj) {
+  {
+    MutexLock mu(*statistics_lock_);
+    size_t size = space->AllocationSize(obj);
+    DCHECK_GT(size, 0u);
+    num_bytes_allocated_ += size;
+    num_objects_allocated_ += 1;
 
-  if (Runtime::Current()->HasStatsEnabled()) {
-    RuntimeStats* global_stats = Runtime::Current()->GetStats();
-    RuntimeStats* thread_stats = Thread::Current()->GetStats();
-    ++global_stats->allocated_objects;
-    ++thread_stats->allocated_objects;
-    global_stats->allocated_bytes += size;
-    thread_stats->allocated_bytes += size;
+    if (Runtime::Current()->HasStatsEnabled()) {
+      RuntimeStats* global_stats = Runtime::Current()->GetStats();
+      RuntimeStats* thread_stats = Thread::Current()->GetStats();
+      ++global_stats->allocated_objects;
+      ++thread_stats->allocated_objects;
+      global_stats->allocated_bytes += size;
+      thread_stats->allocated_bytes += size;
+    }
   }
-
-  live_bitmap_->Set(obj);
+  {
+    WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+    live_bitmap_->Set(obj);
+  }
 }
 
-void Heap::RecordFreeLocked(size_t freed_objects, size_t freed_bytes) {
-  lock_->AssertHeld();
+void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
+  MutexLock mu(*statistics_lock_);
 
   if (freed_objects < num_objects_allocated_) {
     num_objects_allocated_ -= freed_objects;
@@ -506,32 +510,39 @@
   }
 }
 
-Object* Heap::AllocateLocked(size_t size) {
-  lock_->AssertHeld();
-
-  // Try the default alloc space first.
-  Object* obj = AllocateLocked(alloc_space_, size);
+Object* Heap::Allocate(size_t size) {
+  Object* obj = Allocate(alloc_space_, size);
   if (obj != NULL) {
-    RecordAllocationLocked(alloc_space_, obj);
+    RecordAllocation(alloc_space_, obj);
     return obj;
   }
 
   return NULL;
 }
 
-Object* Heap::AllocateLocked(AllocSpace* space, size_t alloc_size) {
-  lock_->AssertHeld();
-
+Object* Heap::Allocate(AllocSpace* space, size_t alloc_size) {
+  Thread* self = Thread::Current();
   // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
   // done in the runnable state where suspension is expected.
-  DCHECK_EQ(Thread::Current()->GetState(), kRunnable);
-  Thread::Current()->AssertThreadSuspensionIsAllowable();
+#ifndef NDEBUG
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_EQ(self->GetState(), kRunnable);
+  }
+  self->AssertThreadSuspensionIsAllowable();
+#endif
 
   // Fail impossible allocations
   if (alloc_size > space->Capacity()) {
     // On failure collect soft references
     WaitForConcurrentGcToComplete();
-    CollectGarbageInternal(false, false, true);
+    if (Runtime::Current()->HasStatsEnabled()) {
+      ++Runtime::Current()->GetStats()->gc_for_alloc_count;
+      ++Thread::Current()->GetStats()->gc_for_alloc_count;
+    }
+    self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
+    CollectGarbageInternal(false, true);
+    self->TransitionFromSuspendedToRunnable();
     return NULL;
   }
 
@@ -540,43 +551,40 @@
     return ptr;
   }
 
-  // The allocation failed.  If the GC is running, block until it completes and retry.
-  if (is_gc_running_) {
-    // The GC is concurrently tracing the heap.  Release the heap lock, wait for the GC to
-    // complete, and retrying allocating.
-    WaitForConcurrentGcToComplete();
-    ptr = space->AllocWithoutGrowth(alloc_size);
-    if (ptr != NULL) {
-      return ptr;
+  // The allocation failed.  If the GC is running, block until it completes else request a
+  // foreground partial collection.
+  if (!WaitForConcurrentGcToComplete()) {
+    // No concurrent GC so perform a foreground collection.
+    if (Runtime::Current()->HasStatsEnabled()) {
+      ++Runtime::Current()->GetStats()->gc_for_alloc_count;
+      ++Thread::Current()->GetStats()->gc_for_alloc_count;
     }
+    self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
+    CollectGarbageInternal(have_zygote_space_, false);
+    self->TransitionFromSuspendedToRunnable();
   }
 
-  // Another failure.  Our thread was starved or there may be too many
-  // live objects.  Try a foreground GC.  This will have no effect if
-  // the concurrent GC is already running.
-  if (Runtime::Current()->HasStatsEnabled()) {
-    ++Runtime::Current()->GetStats()->gc_for_alloc_count;
-    ++Thread::Current()->GetStats()->gc_for_alloc_count;
-  }
-
-  if (have_zygote_space_) {
-    // We don't need a WaitForConcurrentGcToComplete here since we checked is_gc_running_ earlier
-    // and we are in a heap lock. Try partial GC first.
-    CollectGarbageInternal(true, false, false);
-    ptr = space->AllocWithoutGrowth(alloc_size);
-    if (ptr != NULL) {
-      return ptr;
-    }
-  }
-
-  // Partial GC didn't free enough memory, try a full GC.
-  CollectGarbageInternal(false, false, false);
   ptr = space->AllocWithoutGrowth(alloc_size);
   if (ptr != NULL) {
     return ptr;
   }
 
-  // Even that didn't work;  this is an exceptional state.
+  if (!have_zygote_space_) {
+    // Partial GC didn't free enough memory, try a full GC.
+    if (Runtime::Current()->HasStatsEnabled()) {
+      ++Runtime::Current()->GetStats()->gc_for_alloc_count;
+      ++Thread::Current()->GetStats()->gc_for_alloc_count;
+    }
+    self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
+    CollectGarbageInternal(false, false);
+    self->TransitionFromSuspendedToRunnable();
+    ptr = space->AllocWithoutGrowth(alloc_size);
+    if (ptr != NULL) {
+      return ptr;
+    }
+  }
+
+  // Allocations have failed after GCs;  this is an exceptional state.
   // Try harder, growing the heap if necessary.
   ptr = space->AllocWithGrowth(alloc_size);
   if (ptr != NULL) {
@@ -595,13 +603,20 @@
 
   // OLD-TODO: wait for the finalizers from the previous GC to finish
   VLOG(gc) << "Forcing collection of SoftReferences for " << PrettySize(alloc_size) << " allocation";
+
+  if (Runtime::Current()->HasStatsEnabled()) {
+    ++Runtime::Current()->GetStats()->gc_for_alloc_count;
+    ++Thread::Current()->GetStats()->gc_for_alloc_count;
+  }
   // We don't need a WaitForConcurrentGcToComplete here either.
-  CollectGarbageInternal(false, false, true);
+  self->TransitionFromRunnableToSuspended(kWaitingPerformingGc);
+  CollectGarbageInternal(false, true);
+  self->TransitionFromSuspendedToRunnable();
   ptr = space->AllocWithGrowth(alloc_size);
   if (ptr != NULL) {
     return ptr;
   }
-
+  // Allocation failed.
   return NULL;
 }
 
@@ -621,12 +636,14 @@
 }
 
 int64_t Heap::GetFreeMemory() {
+  MutexLock mu(*statistics_lock_);
   return GetMaxMemory() - num_bytes_allocated_;
 }
 
 class InstanceCounter {
  public:
   InstanceCounter(Class* c, bool count_assignable)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : class_(c), count_assignable_(count_assignable), count_(0) {
   }
 
@@ -634,12 +651,13 @@
     return count_;
   }
 
-  static void Callback(Object* o, void* arg) {
+  static void Callback(Object* o, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     reinterpret_cast<InstanceCounter*>(arg)->VisitInstance(o);
   }
 
  private:
-  void VisitInstance(Object* o) {
+  void VisitInstance(Object* o) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Class* instance_class = o->GetClass();
     if (count_assignable_) {
       if (instance_class == class_) {
@@ -658,23 +676,24 @@
 };
 
 int64_t Heap::CountInstances(Class* c, bool count_assignable) {
-  ScopedHeapLock heap_lock;
+  ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
   InstanceCounter counter(c, count_assignable);
   GetLiveBitmap()->Walk(InstanceCounter::Callback, &counter);
   return counter.GetCount();
 }
 
 void Heap::CollectGarbage(bool clear_soft_references) {
-  ScopedHeapLock heap_lock;
   // If we just waited for a GC to complete then we do not need to do another
   // GC unless we clear soft references.
   if (!WaitForConcurrentGcToComplete() || clear_soft_references) {
-    CollectGarbageInternal(have_zygote_space_, true, clear_soft_references);
+    ScopedThreadStateChange tsc(Thread::Current(), kWaitingPerformingGc);
+    CollectGarbageInternal(have_zygote_space_, clear_soft_references);
   }
 }
 
 void Heap::PreZygoteFork() {
-  ScopedHeapLock heap_lock;
+  static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock);
+  MutexLock mu(zygote_creation_lock_);
 
   // Try to see if we have any Zygote spaces.
   if (have_zygote_space_) {
@@ -702,20 +721,59 @@
   }
 }
 
-void Heap::CollectGarbageInternal(bool partial_gc, bool concurrent, bool clear_soft_references) {
-  lock_->AssertHeld();
+void Heap::CollectGarbageInternal(bool partial_gc, bool clear_soft_references) {
+  GlobalSynchronization::mutator_lock_->AssertNotHeld();
+#ifndef NDEBUG
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_EQ(Thread::Current()->GetState(), kWaitingPerformingGc);
+  }
+#endif
 
-  CHECK(!is_gc_running_) << "Attempted recursive GC";
-  is_gc_running_ = true;
+  // Ensure there is only one GC at a time.
+  bool start_collect = false;
+  while (!start_collect) {
+    {
+      MutexLock mu(*gc_complete_lock_);
+      if (!is_gc_running_) {
+        is_gc_running_ = true;
+        start_collect = true;
+      }
+    }
+    if (!start_collect) {
+      WaitForConcurrentGcToComplete();
+      // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
+      //       Not doing at the moment to ensure soft references are cleared.
+    }
+  }
+  gc_complete_lock_->AssertNotHeld();
+  if (concurrent_gc_) {
+    CollectGarbageConcurrentMarkSweepPlan(partial_gc, clear_soft_references);
+  } else {
+    CollectGarbageMarkSweepPlan(partial_gc, clear_soft_references);
+  }
+  gc_complete_lock_->AssertNotHeld();
+  MutexLock mu(*gc_complete_lock_);
+  is_gc_running_ = false;
+  // Wake anyone who may have been waiting for the GC to complete.
+  gc_complete_cond_->Broadcast();
+}
 
+void Heap::CollectGarbageMarkSweepPlan(bool partial_gc, bool clear_soft_references) {
   TimingLogger timings("CollectGarbageInternal");
-  uint64_t t0 = NanoTime(), root_end = 0, dirty_begin = 0, dirty_end = 0;
+  uint64_t t0 = NanoTime(), dirty_end = 0;
 
+  // Suspend all threads are get exclusive access to the heap.
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
   thread_list->SuspendAll();
   timings.AddSplit("SuspendAll");
+  GlobalSynchronization::mutator_lock_->AssertExclusiveHeld();
 
-  size_t initial_size = num_bytes_allocated_;
+  size_t initial_size;
+  {
+    MutexLock mu(*statistics_lock_);
+    initial_size = num_bytes_allocated_;
+  }
   Object* cleared_references = NULL;
   {
     MarkSweep mark_sweep(mark_stack_.get());
@@ -735,8 +793,6 @@
         mod_union_table_->ClearCards(*it);
       } else if (space->GetGcRetentionPolicy() == GCRP_FULL_COLLECT) {
         zygote_mod_union_table_->ClearCards(space);
-      } else if (concurrent) {
-        card_table_->ClearSpaceCards(space);
       }
     }
     timings.AddSplit("ClearCards");
@@ -746,6 +802,7 @@
     zygote_mod_union_table_->Verify();
 #endif
 
+    WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
     if (partial_gc) {
       // Copy the mark bits over from the live bits, do this as early as possible or else we can
       // accidentally un-mark roots.
@@ -760,15 +817,6 @@
     // Roots are marked on the bitmap and the mark_stack is empty.
     DCHECK(mark_sweep.IsMarkStackEmpty());
 
-    if (concurrent) {
-      // We need to resume before unlocking or else a thread waiting for the
-      // heap lock would re-suspend since we have not yet called ResumeAll.
-      thread_list->ResumeAll();
-      Unlock();
-      root_end = NanoTime();
-      timings.AddSplit("RootEnd");
-    }
-
     // Update zygote mod union table.
     if (partial_gc) {
       zygote_mod_union_table_->Update();
@@ -790,21 +838,6 @@
     mark_sweep.RecursiveMark(partial_gc);
     timings.AddSplit(partial_gc ? "PartialMark" : "RecursiveMark");
 
-    if (concurrent) {
-      dirty_begin = NanoTime();
-      Lock();
-      thread_list->SuspendAll();
-      timings.AddSplit("ReSuspend");
-
-      // Re-mark root set.
-      mark_sweep.ReMarkRoots();
-      timings.AddSplit("ReMarkRoots");
-
-      // Scan dirty objects, this is only required if we are not doing concurrent GC.
-      mark_sweep.RecursiveMarkDirtyObjects();
-      timings.AddSplit("RecursiveMarkDirtyObjects");
-    }
-
     mark_sweep.ProcessReferences(clear_soft_references);
     timings.AddSplit("ProcessReferences");
 
@@ -826,31 +859,17 @@
     mark_sweep.VerifyImageRoots();
     timings.AddSplit("VerifyImageRoots");
 
-    if (concurrent) {
-      thread_list->ResumeAll();
-      dirty_end = NanoTime();
-      Unlock();
-    }
-
     mark_sweep.Sweep(partial_gc);
     timings.AddSplit("Sweep");
 
     cleared_references = mark_sweep.GetClearedReferences();
   }
 
-  if (concurrent) {
-    // Relock since we unlocked earlier.
-    // TODO: We probably don't need to have the heap locked for all remainder of the function, except for GrowForUtilization.
-    Lock();
-  }
-
   GrowForUtilization();
   timings.AddSplit("GrowForUtilization");
 
-  if (!concurrent) {
-    thread_list->ResumeAll();
-    dirty_end = NanoTime();
-  }
+  thread_list->ResumeAll();
+  dirty_end = NanoTime();
 
   EnqueueClearedReferences(&cleared_references);
   RequestHeapTrim();
@@ -859,6 +878,7 @@
   if (VLOG_IS_ON(gc)) {
     uint64_t t1 = NanoTime();
 
+    MutexLock mu(*statistics_lock_);
     // TODO: somehow make the specific GC implementation (here MarkSweep) responsible for logging.
     // Reason: For CMS sometimes initial_size < num_bytes_allocated_ results in overflow (3GB freed message).
     size_t bytes_freed = initial_size - num_bytes_allocated_;
@@ -866,61 +886,241 @@
     duration_ns -= duration_ns % 1000;
 
     // If the GC was slow, then print timings in the log.
-    if (concurrent) {
-      uint64_t pause_roots = (root_end - t0) / 1000 * 1000;
-      uint64_t pause_dirty = (dirty_end - dirty_begin) / 1000 * 1000;
-      if (pause_roots > MsToNs(5) || pause_dirty > MsToNs(5)) {
-        LOG(INFO) << (partial_gc ? "Partial " : "")
-                  << "GC freed " << PrettySize(bytes_freed) << ", " << GetPercentFree() << "% free, "
-                  << PrettySize(num_bytes_allocated_) << "/" << PrettySize(GetTotalMemory()) << ", "
-                  << "paused " << PrettyDuration(pause_roots) << "+" << PrettyDuration(pause_dirty)
-                  << ", total " << PrettyDuration(duration_ns);
-      }
-    } else {
-      if (duration_ns > MsToNs(50)) {
-        uint64_t markSweepTime = (dirty_end - t0) / 1000 * 1000;
-        LOG(INFO) << (partial_gc ? "Partial " : "")
-                  << "GC freed " << PrettySize(bytes_freed) << ", " << GetPercentFree() << "% free, "
-                  << PrettySize(num_bytes_allocated_) << "/" << PrettySize(GetTotalMemory()) << ", "
-                  << "paused " << PrettyDuration(markSweepTime)
-                  << ", total " << PrettyDuration(duration_ns);
-      }
+    if (duration_ns > MsToNs(50)) {
+      uint64_t markSweepTime = (dirty_end - t0) / 1000 * 1000;
+      LOG(INFO) << (partial_gc ? "Partial " : "")
+                      << "GC freed " << PrettySize(bytes_freed) << ", " << GetPercentFree() << "% free, "
+                      << PrettySize(num_bytes_allocated_) << "/" << PrettySize(GetTotalMemory()) << ", "
+                      << "paused " << PrettyDuration(markSweepTime)
+                      << ", total " << PrettyDuration(duration_ns);
     }
   }
   Dbg::GcDidFinish();
   if (VLOG_IS_ON(heap)) {
     timings.Dump();
   }
+}
 
-  is_gc_running_ = false;
+void Heap::CollectGarbageConcurrentMarkSweepPlan(bool partial_gc, bool clear_soft_references) {
+  TimingLogger timings("CollectGarbageInternal");
+  uint64_t t0 = NanoTime(), root_end = 0, dirty_begin = 0, dirty_end = 0;
 
-  // Wake anyone who may have been waiting for the GC to complete.
-  condition_->Broadcast();
+  // Suspend all threads are get exclusive access to the heap.
+  ThreadList* thread_list = Runtime::Current()->GetThreadList();
+  thread_list->SuspendAll();
+  timings.AddSplit("SuspendAll");
+  GlobalSynchronization::mutator_lock_->AssertExclusiveHeld();
+
+  size_t initial_size;
+  {
+    MutexLock mu(*statistics_lock_);
+    initial_size = num_bytes_allocated_;
+  }
+  Object* cleared_references = NULL;
+  {
+    MarkSweep mark_sweep(mark_stack_.get());
+    timings.AddSplit("ctor");
+
+    mark_sweep.Init();
+    timings.AddSplit("Init");
+
+    // Make sure that the tables have the correct pointer for the mark sweep.
+    mod_union_table_->Init(&mark_sweep);
+    zygote_mod_union_table_->Init(&mark_sweep);
+
+    // Clear image space cards and keep track of cards we cleared in the mod-union table.
+    for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
+      Space* space = *it;
+      if (space->IsImageSpace()) {
+        mod_union_table_->ClearCards(*it);
+      } else if (space->GetGcRetentionPolicy() == GCRP_FULL_COLLECT) {
+        zygote_mod_union_table_->ClearCards(space);
+      } else {
+        card_table_->ClearSpaceCards(space);
+      }
+    }
+    timings.AddSplit("ClearCards");
+
+#if VERIFY_MOD_UNION
+    mod_union_table_->Verify();
+    zygote_mod_union_table_->Verify();
+#endif
+
+    if (partial_gc) {
+      // Copy the mark bits over from the live bits, do this as early as possible or else we can
+      // accidentally un-mark roots.
+      // Needed for scanning dirty objects.
+      mark_sweep.CopyMarkBits();
+      timings.AddSplit("CopyMarkBits");
+    }
+
+    {
+      WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      mark_sweep.MarkRoots();
+      timings.AddSplit("MarkRoots");
+    }
+
+    // Roots are marked on the bitmap and the mark_stack is empty.
+    DCHECK(mark_sweep.IsMarkStackEmpty());
+
+    // Allow mutators to go again, acquire share on mutator_lock_ to continue.
+    thread_list->ResumeAll();
+    {
+      ReaderMutexLock reader_lock(*GlobalSynchronization::mutator_lock_);
+      root_end = NanoTime();
+      timings.AddSplit("RootEnd");
+
+      {
+        ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+        // Update zygote mod union table.
+        if (partial_gc) {
+          zygote_mod_union_table_->Update();
+          timings.AddSplit("UpdateZygoteModUnionTable");
+
+          zygote_mod_union_table_->MarkReferences();
+          timings.AddSplit("ZygoteMarkReferences");
+        }
+
+        // Processes the cards we cleared earlier and adds their objects into the mod-union table.
+        mod_union_table_->Update();
+        timings.AddSplit("UpdateModUnionTable");
+      }
+      {
+        WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+        // Scans all objects in the mod-union table.
+        mod_union_table_->MarkReferences();
+        timings.AddSplit("MarkImageToAllocSpaceReferences");
+
+        // Recursively mark all the non-image bits set in the mark bitmap.
+        mark_sweep.RecursiveMark(partial_gc);
+        timings.AddSplit(partial_gc ? "PartialMark" : "RecursiveMark");
+      }
+    }
+    // Release share on mutator_lock_ and then get exclusive access.
+    dirty_begin = NanoTime();
+    thread_list->SuspendAll();
+    timings.AddSplit("ReSuspend");
+    GlobalSynchronization::mutator_lock_->AssertExclusiveHeld();
+
+    {
+      WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      // Re-mark root set.
+      mark_sweep.ReMarkRoots();
+      timings.AddSplit("ReMarkRoots");
+
+      // Scan dirty objects, this is only required if we are not doing concurrent GC.
+      mark_sweep.RecursiveMarkDirtyObjects();
+      timings.AddSplit("RecursiveMarkDirtyObjects");
+    }
+    {
+      ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      mark_sweep.ProcessReferences(clear_soft_references);
+      timings.AddSplit("ProcessReferences");
+    }
+    // Swap the live and mark bitmaps for each alloc space. This is needed since sweep re-swaps
+    // these bitmaps. Doing this enables us to sweep with the heap unlocked since new allocations
+    // set the live bit, but since we have the bitmaps reversed at this point, this sets the mark
+    // bit instead, resulting in no new allocated objects being incorrectly freed by sweep.
+    {
+      WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      for (Spaces::iterator it = spaces_.begin(); it != spaces_.end(); ++it) {
+        Space* space = *it;
+        // We never allocate into zygote spaces.
+        if (space->GetGcRetentionPolicy() == GCRP_ALWAYS_COLLECT) {
+          live_bitmap_->ReplaceBitmap(space->GetLiveBitmap(), space->GetMarkBitmap());
+          mark_bitmap_->ReplaceBitmap(space->GetMarkBitmap(), space->GetLiveBitmap());
+          space->AsAllocSpace()->SwapBitmaps();
+        }
+      }
+    }
+
+    if (kIsDebugBuild) {
+      // Verify that we only reach marked objects from the image space.
+      ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      mark_sweep.VerifyImageRoots();
+      timings.AddSplit("VerifyImageRoots");
+    }
+    thread_list->ResumeAll();
+    dirty_end = NanoTime();
+    GlobalSynchronization::mutator_lock_->AssertNotHeld();
+
+    {
+      // TODO: this lock shouldn't be necessary (it's why we did the bitmap flip above).
+      WriterMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      mark_sweep.Sweep(partial_gc);
+      timings.AddSplit("Sweep");
+    }
+
+    cleared_references = mark_sweep.GetClearedReferences();
+  }
+
+  GrowForUtilization();
+  timings.AddSplit("GrowForUtilization");
+
+  EnqueueClearedReferences(&cleared_references);
+  RequestHeapTrim();
+  timings.AddSplit("Finish");
+
+  if (VLOG_IS_ON(gc)) {
+    uint64_t t1 = NanoTime();
+
+    MutexLock mu(*statistics_lock_);
+    // TODO: somehow make the specific GC implementation (here MarkSweep) responsible for logging.
+    // Reason: For CMS sometimes initial_size < num_bytes_allocated_ results in overflow (3GB freed message).
+    size_t bytes_freed = initial_size - num_bytes_allocated_;
+    uint64_t duration_ns = t1 - t0;
+    duration_ns -= duration_ns % 1000;
+
+    // If the GC was slow, then print timings in the log.
+    uint64_t pause_roots = (root_end - t0) / 1000 * 1000;
+    uint64_t pause_dirty = (dirty_end - dirty_begin) / 1000 * 1000;
+    if (pause_roots > MsToNs(5) || pause_dirty > MsToNs(5)) {
+      LOG(INFO) << (partial_gc ? "Partial " : "")
+                      << "GC freed " << PrettySize(bytes_freed) << ", " << GetPercentFree() << "% free, "
+                      << PrettySize(num_bytes_allocated_) << "/" << PrettySize(GetTotalMemory()) << ", "
+                      << "paused " << PrettyDuration(pause_roots) << "+" << PrettyDuration(pause_dirty)
+                      << ", total " << PrettyDuration(duration_ns);
+    }
+  }
+  Dbg::GcDidFinish();
+  if (VLOG_IS_ON(heap)) {
+    timings.Dump();
+  }
 }
 
 bool Heap::WaitForConcurrentGcToComplete() {
-  lock_->AssertHeld();
-
-  // Busy wait for GC to finish
-  if (is_gc_running_) {
-    uint64_t wait_start = NanoTime();
-
-    do {
-      ScopedThreadStateChange tsc(Thread::Current(), kVmWait);
-      ScopedThreadListLockReleaser list_lock_releaser;
-      condition_->Wait(*lock_);
-    } while (is_gc_running_);
-    uint64_t wait_time = NanoTime() - wait_start;
-    if (wait_time > MsToNs(5)) {
-      LOG(INFO) << "WaitForConcurrentGcToComplete blocked for " << PrettyDuration(wait_time);
+  if (concurrent_gc_) {
+    bool do_wait = false;
+    uint64_t wait_start;
+    {
+      // Check if GC is running holding gc_complete_lock_.
+      MutexLock mu(*gc_complete_lock_);
+      if (is_gc_running_) {
+        wait_start = NanoTime();
+        do_wait = true;
+      }
     }
-    DCHECK(!is_gc_running_);
-    return true;
+    if (do_wait) {
+      // We must wait, change thread state then sleep on gc_complete_cond_;
+      ScopedThreadStateChange tsc(Thread::Current(), kWaitingForGcToComplete);
+      {
+        MutexLock mu(*gc_complete_lock_);
+        while (is_gc_running_) {
+          gc_complete_cond_->Wait(*gc_complete_lock_);
+        }
+      }
+      uint64_t wait_time = NanoTime() - wait_start;
+      if (wait_time > MsToNs(5)) {
+        LOG(INFO) << "WaitForConcurrentGcToComplete blocked for " << PrettyDuration(wait_time);
+      }
+      return true;
+    }
   }
   return false;
 }
 
 void Heap::DumpForSigQuit(std::ostream& os) {
+  MutexLock mu(*statistics_lock_);
   os << "Heap: " << GetPercentFree() << "% free, "
      << PrettySize(num_bytes_allocated_) << "/" << PrettySize(GetTotalMemory())
      << "; " << num_objects_allocated_ << " objects\n";
@@ -950,56 +1150,42 @@
 static const size_t kHeapMinFree = kHeapIdealFree / 4;
 
 void Heap::GrowForUtilization() {
-  lock_->AssertHeld();
+  size_t target_size;
+  bool use_footprint_limit = false;
+  {
+    MutexLock mu(*statistics_lock_);
+    // We know what our utilization is at this moment.
+    // This doesn't actually resize any memory. It just lets the heap grow more when necessary.
+    target_size = num_bytes_allocated_ / Heap::GetTargetHeapUtilization();
 
-  // We know what our utilization is at this moment.
-  // This doesn't actually resize any memory. It just lets the heap grow more
-  // when necessary.
-  size_t target_size(num_bytes_allocated_ / Heap::GetTargetHeapUtilization());
+    if (target_size > num_bytes_allocated_ + kHeapIdealFree) {
+      target_size = num_bytes_allocated_ + kHeapIdealFree;
+    } else if (target_size < num_bytes_allocated_ + kHeapMinFree) {
+      target_size = num_bytes_allocated_ + kHeapMinFree;
+    }
 
-  if (target_size > num_bytes_allocated_ + kHeapIdealFree) {
-    target_size = num_bytes_allocated_ + kHeapIdealFree;
-  } else if (target_size < num_bytes_allocated_ + kHeapMinFree) {
-    target_size = num_bytes_allocated_ + kHeapMinFree;
+    // Calculate when to perform the next ConcurrentGC.
+    if (GetTotalMemory() - num_bytes_allocated_ < concurrent_min_free_) {
+      // Not enough free memory to perform concurrent GC.
+      concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
+    } else {
+      // Compute below to avoid holding both the statistics and the alloc space lock
+      use_footprint_limit = true;
+    }
   }
-
-  // Calculate when to perform the next ConcurrentGC.
-  if (GetTotalMemory() - num_bytes_allocated_ < concurrent_min_free_) {
-    // Not enough free memory to perform concurrent GC.
-    concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
-  } else {
-    concurrent_start_bytes_ = alloc_space_->GetFootprintLimit() - concurrent_start_size_;
+  if (use_footprint_limit) {
+    size_t foot_print_limit = alloc_space_->GetFootprintLimit();
+    MutexLock mu(*statistics_lock_);
+    concurrent_start_bytes_ = foot_print_limit - concurrent_start_size_;
   }
-
   SetIdealFootprint(target_size);
 }
 
 void Heap::ClearGrowthLimit() {
-  ScopedHeapLock heap_lock;
   WaitForConcurrentGcToComplete();
   alloc_space_->ClearGrowthLimit();
 }
 
-pid_t Heap::GetLockOwner() {
-  return lock_->GetOwner();
-}
-
-void Heap::Lock() {
-  // Grab the lock, but put ourselves into kVmWait if it looks
-  // like we're going to have to wait on the mutex. This prevents
-  // deadlock if another thread is calling CollectGarbageInternal,
-  // since they will have the heap lock and be waiting for mutators to
-  // suspend.
-  if (!lock_->TryLock()) {
-    ScopedThreadStateChange tsc(Thread::Current(), kVmWait);
-    lock_->Lock();
-  }
-}
-
-void Heap::Unlock() {
-  lock_->Unlock();
-}
-
 void Heap::SetReferenceOffsets(MemberOffset reference_referent_offset,
     MemberOffset reference_queue_offset,
     MemberOffset reference_queueNext_offset,
@@ -1076,19 +1262,41 @@
 }
 
 void Heap::AddFinalizerReference(Thread* self, Object* object) {
-  ScopedJniThreadState ts(self);
+  ScopedObjectAccess soa(self);
   JValue args[1];
   args[0].SetL(object);
-  ts.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self, NULL, args, NULL);
+  soa.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self,
+                                                                                  NULL, args, NULL);
+}
+
+size_t Heap::GetBytesAllocated() const {
+  MutexLock mu(*statistics_lock_);
+  return num_bytes_allocated_;
+}
+
+size_t Heap::GetObjectsAllocated() const {
+  MutexLock mu(*statistics_lock_);
+  return num_objects_allocated_;
+}
+
+size_t Heap::GetConcurrentStartSize() const {
+  MutexLock mu(*statistics_lock_);
+  return concurrent_start_size_;
+}
+
+size_t Heap::GetConcurrentMinFree() const {
+  MutexLock mu(*statistics_lock_);
+  return concurrent_min_free_;
 }
 
 void Heap::EnqueueClearedReferences(Object** cleared) {
   DCHECK(cleared != NULL);
   if (*cleared != NULL) {
-    ScopedJniThreadState ts(Thread::Current());
+    ScopedObjectAccess soa(Thread::Current());
     JValue args[1];
     args[0].SetL(*cleared);
-    ts.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(ts.Self(), NULL, args, NULL);
+    soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
+                                                                                 NULL, args, NULL);
     *cleared = NULL;
   }
 }
@@ -1106,29 +1314,27 @@
   JNIEnv* env = Thread::Current()->GetJniEnv();
   DCHECK(WellKnownClasses::java_lang_Daemons != NULL);
   DCHECK(WellKnownClasses::java_lang_Daemons_requestGC != NULL);
-  env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons, WellKnownClasses::java_lang_Daemons_requestGC);
+  env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
+                            WellKnownClasses::java_lang_Daemons_requestGC);
   CHECK(!env->ExceptionCheck());
   requesting_gc_ = false;
 }
 
 void Heap::ConcurrentGC() {
-  if (Runtime::Current()->IsShuttingDown()) {
+  if (Runtime::Current()->IsShuttingDown() || !concurrent_gc_) {
     return;
   }
-  ScopedHeapLock heap_lock;
-  // We shouldn't need a WaitForConcurrentGcToComplete here since only
-  // concurrent GC resumes threads before the GC is completed and this function
-  // is only called within the GC daemon thread.
-  CHECK(!is_gc_running_);
-  // Current thread needs to be runnable or else we can't suspend all threads.
-  ScopedThreadStateChange tsc(Thread::Current(), kRunnable);
+  // TODO: We shouldn't need a WaitForConcurrentGcToComplete here since only
+  //       concurrent GC resumes threads before the GC is completed and this function
+  //       is only called within the GC daemon thread.
   if (!WaitForConcurrentGcToComplete()) {
-    CollectGarbageInternal(have_zygote_space_, true, false);
+    // Start a concurrent GC as one wasn't in progress
+    ScopedThreadStateChange tsc(Thread::Current(), kWaitingPerformingGc);
+    CollectGarbageInternal(have_zygote_space_, false);
   }
 }
 
 void Heap::Trim(AllocSpace* alloc_space) {
-  lock_->AssertHeld();
   WaitForConcurrentGcToComplete();
   alloc_space->Trim();
 }
@@ -1140,12 +1346,15 @@
   // to utilization (which is probably inversely proportional to how much benefit we can expect).
   // We could try mincore(2) but that's only a measure of how many pages we haven't given away,
   // not how much use we're making of those pages.
-  float utilization = static_cast<float>(num_bytes_allocated_) / alloc_space_->Size();
   uint64_t ms_time = NsToMs(NanoTime());
-  if (utilization > 0.75f || ms_time - last_trim_time_ < 2 * 1000) {
-    // Don't bother trimming the heap if it's more than 75% utilized, or if a
-    // heap trim occurred in the last two seconds.
-    return;
+  {
+    MutexLock mu(*statistics_lock_);
+    float utilization = static_cast<float>(num_bytes_allocated_) / alloc_space_->Size();
+    if ((utilization > 0.75f) || ((ms_time - last_trim_time_) < 2 * 1000)) {
+      // Don't bother trimming the heap if it's more than 75% utilized, or if a
+      // heap trim occurred in the last two seconds.
+      return;
+    }
   }
   if (!Runtime::Current()->IsFinishedStarting() || Runtime::Current()->IsShuttingDown()) {
     // Heap trimming isn't supported without a Java runtime or Daemons (such as at dex2oat time)
@@ -1156,7 +1365,8 @@
   JNIEnv* env = Thread::Current()->GetJniEnv();
   DCHECK(WellKnownClasses::java_lang_Daemons != NULL);
   DCHECK(WellKnownClasses::java_lang_Daemons_requestHeapTrim != NULL);
-  env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons, WellKnownClasses::java_lang_Daemons_requestHeapTrim);
+  env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons,
+                            WellKnownClasses::java_lang_Daemons_requestHeapTrim);
   CHECK(!env->ExceptionCheck());
 }
 
diff --git a/src/heap.h b/src/heap.h
index e908248..89b6ac4 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -62,12 +62,14 @@
   // image_file_names names specify Spaces to load based on
   // ImageWriter output.
   explicit Heap(size_t starting_size, size_t growth_limit, size_t capacity,
-                const std::string& image_file_name);
+                const std::string& image_file_name, bool concurrent_gc);
 
   ~Heap();
 
   // Allocates and initializes storage for an object instance.
-  Object* AllocObject(Class* klass, size_t num_bytes);
+  Object* AllocObject(Class* klass, size_t num_bytes)
+      LOCKS_EXCLUDED(statistics_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Check sanity of given reference. Requires the heap lock.
 #if VERIFY_OBJECT_ENABLED
@@ -86,10 +88,12 @@
 
   // Returns true if 'obj' is a live heap object, false otherwise (including for invalid addresses).
   // Requires the heap lock to be held.
-  bool IsLiveObjectLocked(const Object* obj);
+  bool IsLiveObjectLocked(const Object* obj)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   // Initiates an explicit garbage collection.
-  void CollectGarbage(bool clear_soft_references);
+  void CollectGarbage(bool clear_soft_references)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
   // Does a concurrent GC, should only be called by the GC daemon thread
   // through runtime.
@@ -100,10 +104,12 @@
   // Implements java.lang.Runtime.totalMemory.
   int64_t GetTotalMemory();
   // Implements java.lang.Runtime.freeMemory.
-  int64_t GetFreeMemory();
+  int64_t GetFreeMemory() LOCKS_EXCLUDED(statistics_lock_);
 
   // Implements VMDebug.countInstancesOfClass.
-  int64_t CountInstances(Class* c, bool count_assignable);
+  int64_t CountInstances(Class* c, bool count_assignable)
+      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Removes the growth limit on the alloc space so it may grow to its maximum capacity. Used to
   // implement dalvik.system.VMRuntime.clearGrowthLimit.
@@ -130,14 +136,6 @@
   // true if we waited for the GC to complete.
   bool WaitForConcurrentGcToComplete();
 
-  pid_t GetLockOwner(); // For SignalCatcher.
-  void AssertLockHeld() {
-    lock_->AssertHeld();
-  }
-  void AssertLockNotHeld() {
-    lock_->AssertNotHeld();
-  }
-
   const Spaces& GetSpaces() {
     return spaces_;
   }
@@ -178,8 +176,7 @@
     verify_objects_ = false;
   }
 
-  // Callers must hold the heap lock.
-  void RecordFreeLocked(size_t freed_objects, size_t freed_bytes);
+  void RecordFree(size_t freed_objects, size_t freed_bytes) LOCKS_EXCLUDED(statistics_lock_);
 
   // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
   // The call is not needed if NULL is stored in the field.
@@ -190,7 +187,8 @@
   }
 
   // Write barrier for array operations that update many field positions
-  void WriteBarrierArray(const Object* dst, int /*start_offset*/, size_t /*length TODO: element_count or byte_count?*/) {
+  void WriteBarrierArray(const Object* dst, int /*start_offset*/,
+                         size_t /*length TODO: element_count or byte_count?*/) {
     if (UNLIKELY(!card_marking_disabled_)) {
       card_table_->MarkCard(dst);
     }
@@ -207,34 +205,24 @@
 
   void AddFinalizerReference(Thread* self, Object* object);
 
-  size_t GetBytesAllocated() { return num_bytes_allocated_; }
-  size_t GetObjectsAllocated() { return num_objects_allocated_; }
-
-  size_t GetConcurrentStartSize() const { return concurrent_start_size_; }
-
-  void SetConcurrentStartSize(size_t size) {
-    concurrent_start_size_ = size;
-  }
-
-  size_t GetConcurrentMinFree() const { return concurrent_min_free_; }
-
-  void SetConcurrentMinFree(size_t size) {
-    concurrent_min_free_ = size;
-  }
+  size_t GetBytesAllocated() const LOCKS_EXCLUDED(statistics_lock_);
+  size_t GetObjectsAllocated() const LOCKS_EXCLUDED(statistics_lock_);
+  size_t GetConcurrentStartSize() const LOCKS_EXCLUDED(statistics_lock_);
+  size_t GetConcurrentMinFree() const LOCKS_EXCLUDED(statistics_lock_);
 
   // Functions for getting the bitmap which corresponds to an object's address.
   // This is probably slow, TODO: use better data structure like binary tree .
   Space* FindSpaceFromObject(const Object*) const;
 
-  void DumpForSigQuit(std::ostream& os);
+  void DumpForSigQuit(std::ostream& os) LOCKS_EXCLUDED(statistics_lock_);
 
   void Trim(AllocSpace* alloc_space);
 
-  HeapBitmap* GetLiveBitmap() {
+  HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
     return live_bitmap_.get();
   }
 
-  HeapBitmap* GetMarkBitmap() {
+  HeapBitmap* GetMarkBitmap() SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
     return mark_bitmap_.get();
   }
 
@@ -248,11 +236,11 @@
 
  private:
   // Allocates uninitialized storage.
-  Object* AllocateLocked(size_t num_bytes);
-  Object* AllocateLocked(AllocSpace* space, size_t num_bytes);
-
-  void Lock() EXCLUSIVE_LOCK_FUNCTION();
-  void Unlock() UNLOCK_FUNCTION();
+  Object* Allocate(size_t num_bytes)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  Object* Allocate(AllocSpace* space, size_t num_bytes)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Pushes a list of cleared references out to the managed heap.
   void EnqueueClearedReferences(Object** cleared_references);
@@ -260,35 +248,42 @@
   void RequestHeapTrim();
   void RequestConcurrentGC();
 
-  void RecordAllocationLocked(AllocSpace* space, const Object* object);
+  void RecordAllocation(AllocSpace* space, const Object* object)
+      LOCKS_EXCLUDED(statistics_lock_, GlobalSynchronization::heap_bitmap_lock_);
 
-  // TODO: can we teach GCC to understand the weird locking in here?
-  void CollectGarbageInternal(bool partial_gc, bool concurrent, bool clear_soft_references) NO_THREAD_SAFETY_ANALYSIS;
+  void CollectGarbageInternal(bool partial_gc, bool clear_soft_references)
+      LOCKS_EXCLUDED(gc_complete_lock_,
+                     GlobalSynchronization::heap_bitmap_lock_,
+                     GlobalSynchronization::mutator_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
+  void CollectGarbageMarkSweepPlan(bool partial_gc, bool clear_soft_references)
+      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_,
+                     GlobalSynchronization::mutator_lock_);
+  void CollectGarbageConcurrentMarkSweepPlan(bool partial_gc, bool clear_soft_references)
+      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_,
+                     GlobalSynchronization::mutator_lock_);
 
   // Given the current contents of the alloc space, increase the allowed heap footprint to match
   // the target utilization ratio.  This should only be called immediately after a full garbage
   // collection.
   void GrowForUtilization();
 
-  size_t GetPercentFree();
+  size_t GetPercentFree() EXCLUSIVE_LOCKS_REQUIRED(statistics_lock_);
 
-  void AddSpace(Space* space);
+  void AddSpace(Space* space) LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_);
 
-  void VerifyObjectLocked(const Object *obj);
+  void VerifyObjectLocked(const Object *obj)
+      SHARED_LOCKS_REQUIRED(GlobalSychronization::heap_bitmap_lock_);
 
-  void VerifyHeapLocked();
-
-  static void VerificationCallback(Object* obj, void* arg);
-
-  UniquePtr<Mutex> lock_;
-  UniquePtr<ConditionVariable> condition_;
+  static void VerificationCallback(Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSychronization::heap_bitmap_lock_);
 
   Spaces spaces_;
 
   // The alloc space which we are currently allocating into.
   AllocSpace* alloc_space_;
 
-  // The mod-union table remembers all of the referneces from the image space to the alloc /
+  // The mod-union table remembers all of the references from the image space to the alloc /
   // zygote spaces.
   UniquePtr<ModUnionTable> mod_union_table_;
 
@@ -297,20 +292,44 @@
 
   UniquePtr<CardTable> card_table_;
 
+  // True for concurrent mark sweep GC, false for mark sweep.
+  const bool concurrent_gc_;
+
+  // If we have a zygote space.
+  bool have_zygote_space_;
+
   // Used by the image writer to disable card marking on copied objects
   // TODO: remove
   bool card_marking_disabled_;
 
+  // Guards access to the state of GC, associated conditional variable is used to signal when a GC
+  // completes.
+  Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  UniquePtr<ConditionVariable> gc_complete_cond_ GUARDED_BY(gc_complete_lock_);
+
   // True while the garbage collector is running.
-  volatile bool is_gc_running_;
+  volatile bool is_gc_running_ GUARDED_BY(gc_complete_lock_);
+
+  // Guards access to heap statistics, some used to calculate when concurrent GC should occur.
+  // TODO: move bytes/objects allocated to thread-locals and remove need for lock?
+  Mutex* statistics_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   // Bytes until concurrent GC starts.
-  size_t concurrent_start_bytes_;
+  size_t concurrent_start_bytes_ GUARDED_BY(statistics_lock_);
   size_t concurrent_start_size_;
   size_t concurrent_min_free_;
 
-  UniquePtr<HeapBitmap> live_bitmap_;
-  UniquePtr<HeapBitmap> mark_bitmap_;
+  // Number of bytes allocated.  Adjusted after each allocation and free.
+  size_t num_bytes_allocated_ GUARDED_BY(statistics_lock_);
+
+  // Number of objects allocated.  Adjusted after each allocation and free.
+  size_t num_objects_allocated_ GUARDED_BY(statistics_lock_);
+
+  // Last trim time
+  uint64_t last_trim_time_;
+
+  UniquePtr<HeapBitmap> live_bitmap_ GUARDED_BY(GlobalSynchronization::heap_bitmap_lock_);
+  UniquePtr<HeapBitmap> mark_bitmap_ GUARDED_BY(GlobalSynchronization::heap_bitmap_lock_);
 
   // True while the garbage collector is trying to signal the GC daemon thread.
   // This flag is needed to prevent recursion from occurring when the JNI calls
@@ -318,20 +337,11 @@
   bool try_running_gc_;
 
   // Used to ensure that we don't ever recursively request GC.
-  bool requesting_gc_;
+  volatile bool requesting_gc_;
 
   // Mark stack that we reuse to avoid re-allocating the mark stack
   UniquePtr<MarkStack> mark_stack_;
 
-  // Number of bytes allocated.  Adjusted after each allocation and free.
-  size_t num_bytes_allocated_;
-
-  // Number of objects allocated.  Adjusted after each allocation and free.
-  size_t num_objects_allocated_;
-
-  // Last trim time
-  uint64_t last_trim_time_;
-
   // offset of java.lang.ref.Reference.referent
   MemberOffset reference_referent_offset_;
 
@@ -347,9 +357,6 @@
   // offset of java.lang.ref.FinalizerReference.zombie
   MemberOffset finalizer_reference_zombie_offset_;
 
-  // If we have a zygote space.
-  bool have_zygote_space_;
-
   // Target ideal heap utilization ratio
   float target_utilization_;
 
diff --git a/src/heap_bitmap.h b/src/heap_bitmap.h
index 4333199..98b42b3 100644
--- a/src/heap_bitmap.h
+++ b/src/heap_bitmap.h
@@ -25,13 +25,15 @@
 
   class HeapBitmap {
    public:
-    bool Test(const Object* obj) {
+    bool Test(const Object* obj)
+        SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
       SpaceBitmap* bitmap = GetSpaceBitmap(obj);
       DCHECK(bitmap != NULL);
       return bitmap->Test(obj);
     }
 
-    void Clear(const Object* obj) {
+    void Clear(const Object* obj)
+        EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
       SpaceBitmap* bitmap = GetSpaceBitmap(obj);
       DCHECK(bitmap != NULL)
         << "tried to clear object "
@@ -40,7 +42,8 @@
       return bitmap->Clear(obj);
     }
 
-    void Set(const Object* obj) {
+    void Set(const Object* obj)
+        EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
       SpaceBitmap* bitmap = GetSpaceBitmap(obj);
       DCHECK(bitmap != NULL)
         << "tried to mark object "
@@ -59,7 +62,8 @@
       return NULL;
     }
 
-    void Walk(SpaceBitmap::Callback* callback, void* arg) {
+    void Walk(SpaceBitmap::Callback* callback, void* arg)
+        SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
       // TODO: C++0x auto
       for (Bitmaps::iterator cur = bitmaps_.begin(); cur != bitmaps_.end(); ++cur) {
         (*cur)->Walk(callback, arg);
@@ -67,7 +71,8 @@
     }
 
     // Find and replace a bitmap pointer, this is used by for the bitmap swapping in the GC.
-    void ReplaceBitmap(SpaceBitmap* old_bitmap, SpaceBitmap* new_bitmap);
+    void ReplaceBitmap(SpaceBitmap* old_bitmap, SpaceBitmap* new_bitmap)
+        EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
     HeapBitmap(Heap* heap) : heap_(heap) {
 
diff --git a/src/heap_test.cc b/src/heap_test.cc
index 48aa425..d846db5 100644
--- a/src/heap_test.cc
+++ b/src/heap_test.cc
@@ -32,16 +32,18 @@
 }
 
 TEST_F(HeapTest, GarbageCollectClassLinkerInit) {
-  // garbage is created during ClassLinker::Init
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    // garbage is created during ClassLinker::Init
 
-  Class* c = class_linker_->FindSystemClass("[Ljava/lang/Object;");
-  for (size_t i = 0; i < 1024; ++i) {
-    SirtRef<ObjectArray<Object> > array(ObjectArray<Object>::Alloc(c, 2048));
-    for (size_t j = 0; j < 2048; ++j) {
-      array->Set(j, String::AllocFromModifiedUtf8("hello, world!"));
+    Class* c = class_linker_->FindSystemClass("[Ljava/lang/Object;");
+    for (size_t i = 0; i < 1024; ++i) {
+      SirtRef<ObjectArray<Object> > array(ObjectArray<Object>::Alloc(c, 2048));
+      for (size_t j = 0; j < 2048; ++j) {
+        array->Set(j, String::AllocFromModifiedUtf8("hello, world!"));
+      }
     }
   }
-
   Runtime::Current()->GetHeap()->CollectGarbage(false);
 }
 
diff --git a/src/hprof/hprof.cc b/src/hprof/hprof.cc
index d806d71..d0c87be 100644
--- a/src/hprof/hprof.cc
+++ b/src/hprof/hprof.cc
@@ -47,7 +47,7 @@
 #include "object_utils.h"
 #include "os.h"
 #include "safe_map.h"
-#include "scoped_heap_lock.h"
+#include "scoped_thread_state_change.h"
 #include "space.h"
 #include "stringprintf.h"
 #include "thread_list.h"
@@ -165,8 +165,8 @@
 typedef HprofId HprofStringId;
 typedef HprofId HprofObjectId;
 typedef HprofId HprofClassObjectId;
-typedef std::set<const Class*> ClassSet;
-typedef std::set<const Class*>::iterator ClassSetIterator;
+typedef std::set<Class*> ClassSet;
+typedef std::set<Class*>::iterator ClassSetIterator;
 typedef SafeMap<std::string, size_t> StringMap;
 typedef SafeMap<std::string, size_t>::iterator StringMapIterator;
 
@@ -401,11 +401,16 @@
     free(body_data_ptr_);
   }
 
-  void Dump() {
+  void Dump()
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_) {
     // Walk the roots and the heap.
     current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_SEGMENT, HPROF_TIME);
     Runtime::Current()->VisitRoots(RootVisitor, this);
-    Runtime::Current()->GetHeap()->GetLiveBitmap()->Walk(HeapBitmapCallback, this);
+    {
+      ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+      Runtime::Current()->GetHeap()->GetLiveBitmap()->Walk(HeapBitmapCallback, this);
+    }
     current_record_.StartNewRecord(body_fp_, HPROF_TAG_HEAP_DUMP_END, HPROF_TIME);
     current_record_.Flush();
     fflush(body_fp_);
@@ -464,27 +469,29 @@
   }
 
  private:
-  static void RootVisitor(const Object* obj, void* arg) {
+  static void RootVisitor(const Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK(arg != NULL);
     Hprof* hprof = reinterpret_cast<Hprof*>(arg);
     hprof->VisitRoot(obj);
   }
 
-  static void HeapBitmapCallback(Object* obj, void* arg) {
+  static void HeapBitmapCallback(Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK(obj != NULL);
     CHECK(arg != NULL);
     Hprof* hprof = reinterpret_cast<Hprof*>(arg);
     hprof->DumpHeapObject(obj);
   }
 
-  void VisitRoot(const Object* obj);
+  void VisitRoot(const Object* obj) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  int DumpHeapObject(const Object* obj);
+  int DumpHeapObject(Object* obj) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void Finish() {
   }
 
-  int WriteClassTable() {
+  int WriteClassTable() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     HprofRecord* rec = &current_record_;
     uint32_t nextSerialNumber = 1;
 
@@ -551,7 +558,8 @@
 
   int MarkRootObject(const Object* obj, jobject jniObj);
 
-  HprofClassObjectId LookupClassId(const Class* c) {
+  HprofClassObjectId LookupClassId(Class* c)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (c == NULL) {
       // c is the superclass of java.lang.Object or a primitive
       return (HprofClassObjectId)0;
@@ -585,7 +593,8 @@
     return id;
   }
 
-  HprofStringId LookupClassNameId(const Class* c) {
+  HprofStringId LookupClassNameId(const Class* c)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return LookupStringId(PrettyDescriptor(c));
   }
 
@@ -807,7 +816,7 @@
   return HPROF_NULL_STACK_TRACE;
 }
 
-int Hprof::DumpHeapObject(const Object* obj) {
+int Hprof::DumpHeapObject(Object* obj) {
   HprofRecord* rec = &current_record_;
   HprofHeapId desiredHeap = false ? HPROF_HEAP_ZYGOTE : HPROF_HEAP_APP; // TODO: zygote objects?
 
@@ -847,7 +856,7 @@
     // allocated which hasn't been initialized yet.
   } else {
     if (obj->IsClass()) {
-      const Class* thisClass = obj->AsClass();
+      Class* thisClass = obj->AsClass();
       // obj is a ClassObject.
       size_t sFieldCount = thisClass->NumStaticFields();
       if (sFieldCount != 0) {
@@ -1053,15 +1062,11 @@
 // Otherwise, "filename" is used to create an output file.
 void DumpHeap(const char* filename, int fd, bool direct_to_ddms) {
   CHECK(filename != NULL);
-  ScopedHeapLock heap_lock;
-  ScopedThreadStateChange tsc(Thread::Current(), kRunnable);
 
-  ThreadList* thread_list = Runtime::Current()->GetThreadList();
-  thread_list->SuspendAll();
-
+  Runtime::Current()->GetThreadList()->SuspendAll();
   Hprof hprof(filename, fd, direct_to_ddms);
   hprof.Dump();
-  thread_list->ResumeAll();
+  Runtime::Current()->GetThreadList()->ResumeAll();
 }
 
 }  // namespace hprof
diff --git a/src/image.h b/src/image.h
index 6286411..253b762 100644
--- a/src/image.h
+++ b/src/image.h
@@ -94,7 +94,8 @@
     kImageRootsMax,
   };
 
-  Object* GetImageRoot(ImageRoot image_root) const {
+  Object* GetImageRoot(ImageRoot image_root) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetImageRoots()->Get(image_root);
   }
 
diff --git a/src/image_test.cc b/src/image_test.cc
index f9c2d1c..9c947c1 100644
--- a/src/image_test.cc
+++ b/src/image_test.cc
@@ -32,19 +32,21 @@
 
 TEST_F(ImageTest, WriteRead) {
   ScratchFile tmp_oat;
-  std::vector<const DexFile*> dex_files;
-  dex_files.push_back(java_lang_dex_file_);
-  bool success_oat = OatWriter::Create(tmp_oat.GetFile(), NULL, dex_files, 0, "", *compiler_.get());
-  ASSERT_TRUE(success_oat);
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    std::vector<const DexFile*> dex_files;
+    dex_files.push_back(java_lang_dex_file_);
+    bool success_oat = OatWriter::Create(tmp_oat.GetFile(), NULL, dex_files, 0, "", *compiler_.get());
+    ASSERT_TRUE(success_oat);
 
-  // Force all system classes into memory
-  for (size_t i = 0; i < java_lang_dex_file_->NumClassDefs(); ++i) {
-    const DexFile::ClassDef& class_def = java_lang_dex_file_->GetClassDef(i);
-    const char* descriptor = java_lang_dex_file_->GetClassDescriptor(class_def);
-    Class* klass = class_linker_->FindSystemClass(descriptor);
-    EXPECT_TRUE(klass != NULL) << descriptor;
+    // Force all system classes into memory
+    for (size_t i = 0; i < java_lang_dex_file_->NumClassDefs(); ++i) {
+      const DexFile::ClassDef& class_def = java_lang_dex_file_->GetClassDef(i);
+      const char* descriptor = java_lang_dex_file_->GetClassDescriptor(class_def);
+      Class* klass = class_linker_->FindSystemClass(descriptor);
+      EXPECT_TRUE(klass != NULL) << descriptor;
+    }
   }
-
   ImageWriter writer(NULL);
   ScratchFile tmp_image;
   const uintptr_t requested_image_base = 0x60000000;
@@ -81,7 +83,15 @@
   image.append(tmp_image.GetFilename());
   options.push_back(std::make_pair(image.c_str(), reinterpret_cast<void*>(NULL)));
 
-  runtime_.reset(Runtime::Create(options, false));
+  if (!Runtime::Create(options, false)) {
+    LOG(FATAL) << "Failed to create runtime";
+    return;
+  }
+  runtime_.reset(Runtime::Current());
+  // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
+  // give it away now and then switch to a more managable ScopedObjectAccess.
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  ScopedObjectAccess soa(Thread::Current());
   ASSERT_TRUE(runtime_.get() != NULL);
   class_linker_ = runtime_->GetClassLinker();
 
diff --git a/src/image_writer.cc b/src/image_writer.cc
index 59b7e80..7c88c95 100644
--- a/src/image_writer.cc
+++ b/src/image_writer.cc
@@ -35,6 +35,7 @@
 #include "object.h"
 #include "object_utils.h"
 #include "runtime.h"
+#include "scoped_thread_state_change.h"
 #include "space.h"
 #include "UniquePtr.h"
 #include "utils.h"
@@ -71,9 +72,13 @@
   }
   class_linker->RegisterOatFile(*oat_file_);
 
-  PruneNonImageClasses();  // Remove junk
-  ComputeLazyFieldsForImageClasses();  // Add useful information
-  ComputeEagerResolvedStrings();
+  {
+    Thread::Current()->TransitionFromSuspendedToRunnable();
+    PruneNonImageClasses();  // Remove junk
+    ComputeLazyFieldsForImageClasses();  // Add useful information
+    ComputeEagerResolvedStrings();
+    Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  }
   heap->CollectGarbage(false);  // Remove garbage
   // Trim size of alloc spaces
   // TODO: C++0x auto
@@ -90,9 +95,13 @@
   CheckNonImageClassesRemoved();
 #endif
   heap->DisableCardMarking();
-  CalculateNewObjectOffsets();
-  CopyAndFixupObjects();
-  PatchOatCodeAndMethods(compiler);
+  {
+    Thread::Current()->TransitionFromSuspendedToRunnable();
+    CalculateNewObjectOffsets();
+    CopyAndFixupObjects();
+    PatchOatCodeAndMethods(compiler);
+    Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  }
 
   UniquePtr<File> file(OS::OpenFile(image_filename.c_str(), true));
   if (file.get() == NULL) {
@@ -145,7 +154,7 @@
 void ImageWriter::ComputeLazyFieldsForImageClasses() {
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
-  class_linker->VisitClasses(ComputeLazyFieldsForClassesVisitor, NULL);
+  class_linker->VisitClassesWithoutClassesLock(ComputeLazyFieldsForClassesVisitor, NULL);
 }
 
 bool ImageWriter::ComputeLazyFieldsForClassesVisitor(Class* c, void* /*arg*/) {
@@ -178,6 +187,7 @@
 
 void ImageWriter::ComputeEagerResolvedStrings() {
   // TODO: Check image spaces only?
+  ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
   Runtime::Current()->GetHeap()->GetLiveBitmap()->Walk(ComputeEagerResolvedStringsCallback, this);
 }
 
@@ -258,6 +268,7 @@
     return;
   }
 
+  ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
   Runtime::Current()->GetHeap()->GetLiveBitmap()->Walk(CheckNonImageClassesRemovedCallback, this);
 }
 
@@ -392,6 +403,7 @@
   // TODO: heap validation can't handle this fix up pass
   heap->DisableObjectValidation();
   // TODO: Image spaces only?
+  ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
   heap->GetLiveBitmap()->Walk(CopyAndFixupObjectsCallback, this);
 }
 
@@ -568,38 +580,43 @@
   }
 }
 
-static Method* GetReferrerMethod(const Compiler::PatchInformation* patch) {
+static Method* GetReferrerMethod(const Compiler::PatchInformation* patch)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  ScopedObjectAccessUnchecked soa(Thread::Current());
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  DexCache* dex_cache = class_linker->FindDexCache(patch->GetDexFile());
   Method* method = class_linker->ResolveMethod(patch->GetDexFile(),
                                                patch->GetReferrerMethodIdx(),
-                                               patch->GetDexCache(),
+                                               dex_cache,
                                                NULL,
                                                patch->GetReferrerIsDirect());
   CHECK(method != NULL)
     << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx();
   CHECK(!method->IsRuntimeMethod())
     << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx();
-  CHECK(patch->GetDexCache()->GetResolvedMethods()->Get(patch->GetReferrerMethodIdx()) == method)
+  CHECK(dex_cache->GetResolvedMethods()->Get(patch->GetReferrerMethodIdx()) == method)
     << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
-    << PrettyMethod(patch->GetDexCache()->GetResolvedMethods()->Get(patch->GetReferrerMethodIdx())) << " "
+    << PrettyMethod(dex_cache->GetResolvedMethods()->Get(patch->GetReferrerMethodIdx())) << " "
     << PrettyMethod(method);
   return method;
 }
 
-static Method* GetTargetMethod(const Compiler::PatchInformation* patch) {
+static Method* GetTargetMethod(const Compiler::PatchInformation* patch)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  DexCache* dex_cache = class_linker->FindDexCache(patch->GetDexFile());
   Method* method = class_linker->ResolveMethod(patch->GetDexFile(),
                                                patch->GetTargetMethodIdx(),
-                                               patch->GetDexCache(),
+                                               dex_cache,
                                                NULL,
                                                patch->GetTargetIsDirect());
   CHECK(method != NULL)
     << patch->GetDexFile().GetLocation() << " " << patch->GetTargetMethodIdx();
   CHECK(!method->IsRuntimeMethod())
     << patch->GetDexFile().GetLocation() << " " << patch->GetTargetMethodIdx();
-  CHECK(patch->GetDexCache()->GetResolvedMethods()->Get(patch->GetTargetMethodIdx()) == method)
+  CHECK(dex_cache->GetResolvedMethods()->Get(patch->GetTargetMethodIdx()) == method)
     << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
-    << PrettyMethod(patch->GetDexCache()->GetResolvedMethods()->Get(patch->GetTargetMethodIdx())) << " "
+    << PrettyMethod(dex_cache->GetResolvedMethods()->Get(patch->GetTargetMethodIdx())) << " "
     << PrettyMethod(method);
   return method;
 }
diff --git a/src/image_writer.h b/src/image_writer.h
index 07d55dc..f768d87 100644
--- a/src/image_writer.h
+++ b/src/image_writer.h
@@ -39,7 +39,8 @@
 class ImageWriter {
  public:
   explicit ImageWriter(const std::set<std::string>* image_classes)
-      : image_end_(0), image_begin_(NULL), image_classes_(image_classes), oat_begin_(NULL) {}
+      : oat_file_(NULL), image_end_(0), image_begin_(NULL), image_classes_(image_classes),
+        oat_begin_(NULL) {}
 
   ~ImageWriter() {}
 
@@ -47,13 +48,15 @@
              uintptr_t image_begin,
              const std::string& oat_filename,
              const std::string& oat_location,
-             const Compiler& compiler);
+             const Compiler& compiler)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
  private:
   bool AllocMemory();
 
   // we use the lock word to store the offset of the object in the image
-  void AssignImageOffset(Object* object) {
+  void AssignImageOffset(Object* object)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(object != NULL);
     SetImageOffset(object, image_end_);
     image_end_ += RoundUp(object->SizeOf(), 8);  // 64-bit alignment
@@ -105,38 +108,55 @@
     return oat_begin_ + offset;
   }
 
-  bool IsImageClass(const Class* klass);
+  bool IsImageClass(const Class* klass) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   void DumpImageClasses();
 
-  void ComputeLazyFieldsForImageClasses();
-  static bool ComputeLazyFieldsForClassesVisitor(Class* klass, void* arg);
+  void ComputeLazyFieldsForImageClasses()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static bool ComputeLazyFieldsForClassesVisitor(Class* klass, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Wire dex cache resolved strings to strings in the image to avoid runtime resolution
   void ComputeEagerResolvedStrings();
-  static void ComputeEagerResolvedStringsCallback(Object* obj, void* arg);
+  static void ComputeEagerResolvedStringsCallback(Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void PruneNonImageClasses();
-  static bool NonImageClassesVisitor(Class* c, void* arg);
+  void PruneNonImageClasses() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static bool NonImageClassesVisitor(Class* c, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void CheckNonImageClassesRemoved();
-  static void CheckNonImageClassesRemovedCallback(Object* obj, void* arg);
+  static void CheckNonImageClassesRemovedCallback(Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void CalculateNewObjectOffsets();
-  ObjectArray<Object>* CreateImageRoots() const;
-  static void CalculateNewObjectOffsetsCallback(Object* obj, void* arg);
+  void CalculateNewObjectOffsets() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  ObjectArray<Object>* CreateImageRoots() const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void CalculateNewObjectOffsetsCallback(Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void CopyAndFixupObjects();
-  static void CopyAndFixupObjectsCallback(Object* obj, void* arg);
-  void FixupClass(const Class* orig, Class* copy);
-  void FixupMethod(const Method* orig, Method* copy);
-  void FixupObject(const Object* orig, Object* copy);
-  void FixupObjectArray(const ObjectArray<Object>* orig, ObjectArray<Object>* copy);
-  void FixupInstanceFields(const Object* orig, Object* copy);
-  void FixupStaticFields(const Class* orig, Class* copy);
-  void FixupFields(const Object* orig, Object* copy, uint32_t ref_offsets, bool is_static);
+  static void CopyAndFixupObjectsCallback(Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void FixupClass(const Class* orig, Class* copy)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void FixupMethod(const Method* orig, Method* copy)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void FixupObject(const Object* orig, Object* copy)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void FixupObjectArray(const ObjectArray<Object>* orig, ObjectArray<Object>* copy)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void FixupInstanceFields(const Object* orig, Object* copy)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void FixupStaticFields(const Class* orig, Class* copy)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void FixupFields(const Object* orig, Object* copy, uint32_t ref_offsets, bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void PatchOatCodeAndMethods(const Compiler& compiler);
-  void SetPatchLocation(const Compiler::PatchInformation* patch, uint32_t value);
+  void PatchOatCodeAndMethods(const Compiler& compiler)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetPatchLocation(const Compiler::PatchInformation* patch, uint32_t value)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   SafeMap<const Object*, size_t> offsets_;
 
diff --git a/src/indirect_reference_table.cc b/src/indirect_reference_table.cc
index 81b87ef..958531d 100644
--- a/src/indirect_reference_table.cc
+++ b/src/indirect_reference_table.cc
@@ -18,6 +18,7 @@
 #include "jni_internal.h"
 #include "reference_table.h"
 #include "runtime.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "utils.h"
 
@@ -91,7 +92,7 @@
     if (topIndex == max_entries_) {
       LOG(FATAL) << "JNI ERROR (app bug): " << kind_ << " table overflow "
                  << "(max=" << max_entries_ << ")\n"
-                 << Dumpable<IndirectReferenceTable>(*this);
+                 << MutatorLockedDumpable<IndirectReferenceTable>(*this);
     }
 
     size_t newSize = alloc_entries_ * 2;
@@ -101,13 +102,14 @@
     DCHECK_GT(newSize, alloc_entries_);
 
     table_ = reinterpret_cast<const Object**>(realloc(table_, newSize * sizeof(const Object*)));
-    slot_data_ = reinterpret_cast<IndirectRefSlot*>(realloc(slot_data_, newSize * sizeof(IndirectRefSlot)));
+    slot_data_ = reinterpret_cast<IndirectRefSlot*>(realloc(slot_data_,
+                                                            newSize * sizeof(IndirectRefSlot)));
     if (table_ == NULL || slot_data_ == NULL) {
       LOG(FATAL) << "JNI ERROR (app bug): unable to expand "
                  << kind_ << " table (from "
                  << alloc_entries_ << " to " << newSize
                  << ", max=" << max_entries_ << ")\n"
-                 << Dumpable<IndirectReferenceTable>(*this);
+                 << MutatorLockedDumpable<IndirectReferenceTable>(*this);
     }
 
     // Clear the newly-allocated slot_data_ elements.
@@ -150,9 +152,10 @@
 }
 
 void IndirectReferenceTable::AssertEmpty() {
-  if (begin() != end()) {
+  if (UNLIKELY(begin() != end())) {
+    ScopedObjectAccess soa(Thread::Current());
     LOG(FATAL) << "Internal Error: non-empty local reference table\n"
-               << Dumpable<IndirectReferenceTable>(*this);
+               << MutatorLockedDumpable<IndirectReferenceTable>(*this);
   }
 }
 
diff --git a/src/indirect_reference_table.h b/src/indirect_reference_table.h
index 710e43f..c3e17b0 100644
--- a/src/indirect_reference_table.h
+++ b/src/indirect_reference_table.h
@@ -257,7 +257,8 @@
    * Returns NULL if the table is full (max entries reached, or alloc
    * failed during expansion).
    */
-  IndirectRef Add(uint32_t cookie, const Object* obj);
+  IndirectRef Add(uint32_t cookie, const Object* obj)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Given an IndirectRef in the table, return the Object it refers to.
@@ -287,7 +288,7 @@
 
   void AssertEmpty();
 
-  void Dump(std::ostream& os) const;
+  void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Return the #of entries in the entire table.  This includes holes, and
diff --git a/src/indirect_reference_table_test.cc b/src/indirect_reference_table_test.cc
index 387a2cd..1698f18 100644
--- a/src/indirect_reference_table_test.cc
+++ b/src/indirect_reference_table_test.cc
@@ -24,6 +24,7 @@
 };
 
 TEST_F(IndirectReferenceTableTest, BasicTest) {
+  ScopedObjectAccess soa(Thread::Current());
   static const size_t kTableInitial = 10;
   static const size_t kTableMax = 20;
   IndirectReferenceTable irt(kTableInitial, kTableMax, kGlobal);
diff --git a/src/intern_table.h b/src/intern_table.h
index 04c75d0..0d9e097 100644
--- a/src/intern_table.h
+++ b/src/intern_table.h
@@ -41,24 +41,27 @@
   InternTable();
 
   // Interns a potentially new string in the 'strong' table. (See above.)
-  String* InternStrong(int32_t utf16_length, const char* utf8_data);
+  String* InternStrong(int32_t utf16_length, const char* utf8_data)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Interns a potentially new string in the 'strong' table. (See above.)
-  String* InternStrong(const char* utf8_data);
+  String* InternStrong(const char* utf8_data)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Interns a potentially new string in the 'strong' table. (See above.)
-  String* InternStrong(String* s);
+  String* InternStrong(String* s) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Interns a potentially new string in the 'weak' table. (See above.)
-  String* InternWeak(String* s);
+  String* InternWeak(String* s) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Register a String trusting that it is safe to intern.
   // Used when reinitializing InternTable from an image.
-  void RegisterStrong(String* s);
+  void RegisterStrong(String* s) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void SweepInternTableWeaks(Heap::IsMarkedTester is_marked, void* arg);
+  void SweepInternTableWeaks(Heap::IsMarkedTester is_marked, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
-  bool ContainsWeak(String* s);
+  bool ContainsWeak(String* s) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   size_t Size() const;
 
@@ -69,9 +72,11 @@
  private:
   typedef std::multimap<int32_t, String*> Table;
 
-  String* Insert(String* s, bool is_strong);
+  String* Insert(String* s, bool is_strong)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  String* Lookup(Table& table, String* s, uint32_t hash_code);
+  String* Lookup(Table& table, String* s, uint32_t hash_code)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   String* Insert(Table& table, String* s, uint32_t hash_code);
   void Remove(Table& table, const String* s, uint32_t hash_code);
 
diff --git a/src/intern_table_test.cc b/src/intern_table_test.cc
index 52531ce..0d46076 100644
--- a/src/intern_table_test.cc
+++ b/src/intern_table_test.cc
@@ -24,6 +24,7 @@
 class InternTableTest : public CommonTest {};
 
 TEST_F(InternTableTest, Intern) {
+  ScopedObjectAccess soa(Thread::Current());
   InternTable intern_table;
   SirtRef<String> foo_1(intern_table.InternStrong(3, "foo"));
   SirtRef<String> foo_2(intern_table.InternStrong(3, "foo"));
@@ -41,6 +42,7 @@
 }
 
 TEST_F(InternTableTest, Size) {
+  ScopedObjectAccess soa(Thread::Current());
   InternTable t;
   EXPECT_EQ(0U, t.Size());
   t.InternStrong(3, "foo");
@@ -84,6 +86,7 @@
 }
 
 TEST_F(InternTableTest, SweepInternTableWeaks) {
+  ScopedObjectAccess soa(Thread::Current());
   InternTable t;
   t.InternStrong(3, "foo");
   t.InternStrong(3, "bar");
@@ -98,7 +101,10 @@
   TestPredicate p;
   p.Expect(s0.get());
   p.Expect(s1.get());
-  t.SweepInternTableWeaks(IsMarked, &p);
+  {
+    ReaderMutexLock mu(*GlobalSynchronization::heap_bitmap_lock_);
+    t.SweepInternTableWeaks(IsMarked, &p);
+  }
 
   EXPECT_EQ(2U, t.Size());
 
@@ -109,6 +115,7 @@
 }
 
 TEST_F(InternTableTest, ContainsWeak) {
+  ScopedObjectAccess soa(Thread::Current());
   {
     // Strongs are never weak.
     InternTable t;
diff --git a/src/jdwp/jdwp.h b/src/jdwp/jdwp.h
index 8534c8e..b80033c 100644
--- a/src/jdwp/jdwp.h
+++ b/src/jdwp/jdwp.h
@@ -78,7 +78,8 @@
   MethodId method_id;
   uint64_t dex_pc;
 };
-std::ostream& operator<<(std::ostream& os, const JdwpLocation& rhs);
+std::ostream& operator<<(std::ostream& os, const JdwpLocation& rhs)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 bool operator==(const JdwpLocation& lhs, const JdwpLocation& rhs);
 bool operator!=(const JdwpLocation& lhs, const JdwpLocation& rhs);
 
@@ -118,7 +119,8 @@
    *
    * Returns a newly-allocated JdwpState struct on success, or NULL on failure.
    */
-  static JdwpState* Create(const JdwpOptions* options) NO_THREAD_SAFETY_ANALYSIS; // TODO: make GCC understand.
+  static JdwpState* Create(const JdwpOptions* options)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
   ~JdwpState();
 
@@ -178,7 +180,7 @@
    * The VM has finished initializing.  Only called when the debugger is
    * connected at the time initialization completes.
    */
-  bool PostVMStart();
+  bool PostVMStart() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * A location of interest has been reached.  This is used for breakpoints,
@@ -190,24 +192,30 @@
    *
    * "eventFlags" indicates the types of events that have occurred.
    */
-  bool PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags);
+  bool PostLocationEvent(const JdwpLocation* pLoc, ObjectId thisPtr, int eventFlags)
+     SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * An exception has been thrown.
    *
    * Pass in a zeroed-out "*pCatchLoc" if the exception wasn't caught.
    */
-  bool PostException(const JdwpLocation* pThrowLoc, ObjectId excepId, RefTypeId excepClassId, const JdwpLocation* pCatchLoc, ObjectId thisPtr);
+  bool PostException(const JdwpLocation* pThrowLoc, ObjectId excepId, RefTypeId excepClassId,
+                     const JdwpLocation* pCatchLoc, ObjectId thisPtr)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * A thread has started or stopped.
    */
-  bool PostThreadChange(ObjectId threadId, bool start);
+  bool PostThreadChange(ObjectId threadId, bool start)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Class has been prepared.
    */
-  bool PostClassPrepare(JdwpTypeTag tag, RefTypeId refTypeId, const std::string& signature, int status);
+  bool PostClassPrepare(JdwpTypeTag tag, RefTypeId refTypeId, const std::string& signature,
+                        int status)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * The VM is about to stop.
@@ -215,12 +223,13 @@
   bool PostVMDeath();
 
   // Called if/when we realize we're talking to DDMS.
-  void NotifyDdmsActive();
+  void NotifyDdmsActive() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Send up a chunk of DDM data.
    */
-  void DdmSendChunkV(uint32_t type, const iovec* iov, int iov_count);
+  void DdmSendChunkV(uint32_t type, const iovec* iov, int iov_count)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Process a request from the debugger.
@@ -237,13 +246,17 @@
    */
   bool SendRequest(ExpandBuf* pReq);
 
-  void ResetState();
+  void ResetState()
+      LOCKS_EXCLUDED(event_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /* atomic ops to get next serial number */
   uint32_t NextRequestSerial();
   uint32_t NextEventSerial();
 
-  void Run();
+  void Run()
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
 
   /*
    * Register an event by adding it to the event list.
@@ -251,44 +264,60 @@
    * "*pEvent" must be storage allocated with jdwpEventAlloc().  The caller
    * may discard its pointer after calling this.
    */
-  JdwpError RegisterEvent(JdwpEvent* pEvent);
+  JdwpError RegisterEvent(JdwpEvent* pEvent)
+      LOCKS_EXCLUDED(event_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Unregister an event, given the requestId.
    */
-  void UnregisterEventById(uint32_t requestId);
+  void UnregisterEventById(uint32_t requestId)
+      LOCKS_EXCLUDED(event_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Unregister all events.
    */
-  void UnregisterAll();
+  void UnregisterAll()
+      LOCKS_EXCLUDED(event_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
  private:
   explicit JdwpState(const JdwpOptions* options);
   bool InvokeInProgress();
   bool IsConnected();
-  void SuspendByPolicy(JdwpSuspendPolicy suspend_policy);
+  void SuspendByPolicy(JdwpSuspendPolicy suspend_policy,  JDWP::ObjectId thread_self_id)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
+  void SendRequestAndPossiblySuspend(ExpandBuf* pReq, JdwpSuspendPolicy suspend_policy,
+                                     ObjectId threadId)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   void CleanupMatchList(JdwpEvent** match_list,
-                        int match_count) EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_);
+                        int match_count)
+      EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   void EventFinish(ExpandBuf* pReq);
   void FindMatchingEvents(JdwpEventKind eventKind,
                           ModBasket* basket,
                           JdwpEvent** match_list,
-                          int* pMatchCount) EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_);
-  void UnregisterEvent(JdwpEvent* pEvent) EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_);
+                          int* pMatchCount)
+      EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void UnregisterEvent(JdwpEvent* pEvent)
+      EXCLUSIVE_LOCKS_REQUIRED(event_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
  public: // TODO: fix privacy
   const JdwpOptions* options_;
 
  private:
   /* wait for creation of the JDWP thread */
-  Mutex thread_start_lock_;
-  ConditionVariable thread_start_cond_;
+  Mutex thread_start_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable thread_start_cond_ GUARDED_BY(thread_start_lock_);
 
   pthread_t pthread_;
   Thread* thread_;
 
-  volatile int32_t debug_thread_started_;
+  volatile int32_t debug_thread_started_ GUARDED_BY(thread_start_lock_);
   ObjectId debug_thread_id_;
 
  private:
@@ -300,14 +329,14 @@
 
  private:
   // For wait-for-debugger.
-  Mutex attach_lock_;
-  ConditionVariable attach_cond_;
+  Mutex attach_lock_ ACQUIRED_AFTER(thread_start_lock_);
+  ConditionVariable attach_cond_ GUARDED_BY(attach_lock_);
 
   // Time of last debugger activity, in milliseconds.
   int64_t last_activity_time_ms_;
 
   // Global counters and a mutex to protect them.
-  Mutex serial_lock_;
+  Mutex serial_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   uint32_t request_serial_ GUARDED_BY(serial_lock_);
   uint32_t event_serial_ GUARDED_BY(serial_lock_);
 
@@ -318,8 +347,8 @@
 
   // Used to synchronize suspension of the event thread (to avoid receiving "resume"
   // events before the thread has finished suspending itself).
-  Mutex event_thread_lock_;
-  ConditionVariable event_thread_cond_;
+  Mutex event_thread_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable event_thread_cond_ GUARDED_BY(event_thread_lock_);
   ObjectId event_thread_id_;
 
   bool ddm_is_active_;
diff --git a/src/jdwp/jdwp_event.cc b/src/jdwp/jdwp_event.cc
index 891ba53..0e7bb99 100644
--- a/src/jdwp/jdwp_event.cc
+++ b/src/jdwp/jdwp_event.cc
@@ -374,7 +374,8 @@
  * If we find a Count mod before rejecting an event, we decrement it.  We
  * need to do this even if later mods cause us to ignore the event.
  */
-static bool ModsMatch(JdwpEvent* pEvent, ModBasket* basket) {
+static bool ModsMatch(JdwpEvent* pEvent, ModBasket* basket)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   JdwpEventMod* pMod = pEvent->mods;
 
   for (int i = pEvent->modCount; i > 0; i--, pMod++) {
@@ -452,7 +453,8 @@
  * DO NOT call this multiple times for the same eventKind, as Count mods are
  * decremented during the scan.
  */
-void JdwpState::FindMatchingEvents(JdwpEventKind eventKind, ModBasket* basket, JdwpEvent** match_list, int* pMatchCount) {
+void JdwpState::FindMatchingEvents(JdwpEventKind eventKind, ModBasket* basket,
+                                   JdwpEvent** match_list, int* pMatchCount) {
   /* start after the existing entries */
   match_list += *pMatchCount;
 
@@ -490,7 +492,7 @@
  *  SP_EVENT_THREAD - suspend ourselves
  *  SP_ALL - suspend everybody except JDWP support thread
  */
-void JdwpState::SuspendByPolicy(JdwpSuspendPolicy suspend_policy) {
+void JdwpState::SuspendByPolicy(JdwpSuspendPolicy suspend_policy, JDWP::ObjectId thread_self_id) {
   VLOG(jdwp) << "SuspendByPolicy(" << suspend_policy << ")";
   if (suspend_policy == SP_NONE) {
     return;
@@ -503,7 +505,7 @@
   }
 
   /* this is rare but possible -- see CLASS_PREPARE handling */
-  if (Dbg::GetThreadSelfId() == debug_thread_id_) {
+  if (thread_self_id == debug_thread_id_) {
     LOG(INFO) << "NOTE: SuspendByPolicy not suspending JDWP thread";
     return;
   }
@@ -524,7 +526,7 @@
     }
 
     /* grab this before posting/suspending again */
-    SetWaitForEventThread(Dbg::GetThreadSelfId());
+    SetWaitForEventThread(thread_self_id);
 
     /* leave pReq->invoke_needed_ raised so we can check reentrancy */
     Dbg::ExecuteMethod(pReq);
@@ -540,6 +542,23 @@
   }
 }
 
+void JdwpState::SendRequestAndPossiblySuspend(ExpandBuf* pReq, JdwpSuspendPolicy suspend_policy,
+                                              ObjectId threadId) {
+  Thread* self = Thread::Current();
+  self->AssertThreadSuspensionIsAllowable();
+  /* send request and possibly suspend ourselves */
+  if (pReq != NULL) {
+    JDWP::ObjectId thread_self_id = Dbg::GetThreadSelfId();
+    self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSend);
+    if (suspend_policy != SP_NONE) {
+      SetWaitForEventThread(threadId);
+    }
+    EventFinish(pReq);
+    SuspendByPolicy(suspend_policy, thread_self_id);
+    self->TransitionFromSuspendedToRunnable();
+  }
+}
+
 /*
  * Determine if there is a method invocation in progress in the current
  * thread.
@@ -670,17 +689,7 @@
   }
 
   /* send request and possibly suspend ourselves */
-  if (pReq != NULL) {
-    int old_state = Dbg::ThreadWaiting();
-    if (suspend_policy != SP_NONE) {
-      SetWaitForEventThread(threadId);
-    }
-
-    EventFinish(pReq);
-
-    SuspendByPolicy(suspend_policy);
-    Dbg::ThreadContinuing(old_state);
-  }
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, threadId);
 
   return true;
 }
@@ -787,18 +796,7 @@
     CleanupMatchList(match_list, match_count);
   }
 
-  /* send request and possibly suspend ourselves */
-  if (pReq != NULL) {
-    int old_state = Dbg::ThreadWaiting();
-    if (suspend_policy != SP_NONE) {
-      SetWaitForEventThread(basket.threadId);
-    }
-
-    EventFinish(pReq);
-
-    SuspendByPolicy(suspend_policy);
-    Dbg::ThreadContinuing(old_state);
-  }
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
 
   return match_count != 0;
 }
@@ -859,17 +857,7 @@
     CleanupMatchList(match_list, match_count);
   }
 
-  /* send request and possibly suspend ourselves */
-  if (pReq != NULL) {
-    int old_state = Dbg::ThreadWaiting();
-    if (suspend_policy != SP_NONE) {
-      SetWaitForEventThread(basket.threadId);
-    }
-    EventFinish(pReq);
-
-    SuspendByPolicy(suspend_policy);
-    Dbg::ThreadContinuing(old_state);
-  }
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
 
   return match_count != 0;
 }
@@ -968,18 +956,7 @@
     CleanupMatchList(match_list, match_count);
   }
 
-  /* send request and possibly suspend ourselves */
-  if (pReq != NULL) {
-    int old_state = Dbg::ThreadWaiting();
-    if (suspend_policy != SP_NONE) {
-      SetWaitForEventThread(basket.threadId);
-    }
-
-    EventFinish(pReq);
-
-    SuspendByPolicy(suspend_policy);
-    Dbg::ThreadContinuing(old_state);
-  }
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
 
   return match_count != 0;
 }
@@ -990,7 +967,8 @@
  * Valid mods:
  *  Count, ThreadOnly, ClassOnly, ClassMatch, ClassExclude
  */
-bool JdwpState::PostClassPrepare(JdwpTypeTag tag, RefTypeId refTypeId, const std::string& signature, int status) {
+bool JdwpState::PostClassPrepare(JdwpTypeTag tag, RefTypeId refTypeId, const std::string& signature,
+                                 int status) {
   ModBasket basket;
 
   memset(&basket, 0, sizeof(basket));
@@ -1049,17 +1027,7 @@
     CleanupMatchList(match_list, match_count);
   }
 
-  /* send request and possibly suspend ourselves */
-  if (pReq != NULL) {
-    int old_state = Dbg::ThreadWaiting();
-    if (suspend_policy != SP_NONE) {
-      SetWaitForEventThread(basket.threadId);
-    }
-    EventFinish(pReq);
-
-    SuspendByPolicy(suspend_policy);
-    Dbg::ThreadContinuing(old_state);
-  }
+  SendRequestAndPossiblySuspend(pReq, suspend_policy, basket.threadId);
 
   return match_count != 0;
 }
@@ -1105,9 +1073,10 @@
   /*
    * Make sure we're in VMWAIT in case the write blocks.
    */
-  int old_state = Dbg::ThreadWaiting();
+  Thread* self = Thread::Current();
+  self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSend);
   (*transport_->sendBufferedRequest)(this, wrapiov, iov_count + 1);
-  Dbg::ThreadContinuing(old_state);
+  self->TransitionFromSuspendedToRunnable();
 }
 
 }  // namespace JDWP
diff --git a/src/jdwp/jdwp_handler.cc b/src/jdwp/jdwp_handler.cc
index 36fbaf1..fbe9192 100644
--- a/src/jdwp/jdwp_handler.cc
+++ b/src/jdwp/jdwp_handler.cc
@@ -91,13 +91,16 @@
  */
 static JdwpError FinishInvoke(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply,
                               ObjectId thread_id, ObjectId object_id,
-                              RefTypeId class_id, MethodId method_id, bool is_constructor) {
+                              RefTypeId class_id, MethodId method_id, bool is_constructor)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   CHECK(!is_constructor || object_id != 0);
 
   uint32_t arg_count = Read4BE(&buf);
 
   VLOG(jdwp) << StringPrintf("    --> thread_id=%#llx object_id=%#llx", thread_id, object_id);
-  VLOG(jdwp) << StringPrintf("        class_id=%#llx method_id=%x %s.%s", class_id, method_id, Dbg::GetClassName(class_id).c_str(), Dbg::GetMethodName(class_id, method_id).c_str());
+  VLOG(jdwp) << StringPrintf("        class_id=%#llx method_id=%x %s.%s", class_id,
+                             method_id, Dbg::GetClassName(class_id).c_str(),
+                             Dbg::GetMethodName(class_id, method_id).c_str());
   VLOG(jdwp) << StringPrintf("        %d args:", arg_count);
 
   UniquePtr<JdwpTag[]> argTypes(arg_count > 0 ? new JdwpTag[arg_count] : NULL);
@@ -110,7 +113,9 @@
   }
 
   uint32_t options = Read4BE(&buf);  /* enum InvokeOptions bit flags */
-  VLOG(jdwp) << StringPrintf("        options=0x%04x%s%s", options, (options & INVOKE_SINGLE_THREADED) ? " (SINGLE_THREADED)" : "", (options & INVOKE_NONVIRTUAL) ? " (NONVIRTUAL)" : "");
+  VLOG(jdwp) << StringPrintf("        options=0x%04x%s%s", options,
+                             (options & INVOKE_SINGLE_THREADED) ? " (SINGLE_THREADED)" : "",
+                             (options & INVOKE_NONVIRTUAL) ? " (NONVIRTUAL)" : "");
 
   JdwpTag resultTag;
   uint64_t resultValue;
@@ -155,7 +160,8 @@
 /*
  * Request for version info.
  */
-static JdwpError VM_Version(JdwpState*, const uint8_t*, int, ExpandBuf* pReply) {
+static JdwpError VM_Version(JdwpState*, const uint8_t*, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   /* text information on runtime version */
   std::string version(StringPrintf("Android Runtime %s", Runtime::Current()->GetVersion()));
   expandBufAddUtf8String(pReply, version);
@@ -175,7 +181,8 @@
  * referenceTypeID.  We need to send back more than one if the class has
  * been loaded by multiple class loaders.
  */
-static JdwpError VM_ClassesBySignature(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError VM_ClassesBySignature(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   std::string classDescriptor(ReadNewUtf8String(&buf));
   VLOG(jdwp) << "  Req for class by signature '" << classDescriptor << "'";
 
@@ -207,7 +214,8 @@
  * We exclude ourselves from the list, because we don't allow ourselves
  * to be suspended, and that violates some JDWP expectations.
  */
-static JdwpError VM_AllThreads(JdwpState*, const uint8_t*, int, ExpandBuf* pReply) {
+static JdwpError VM_AllThreads(JdwpState*, const uint8_t*, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   std::vector<ObjectId> thread_ids;
   Dbg::GetThreads(0, thread_ids);
 
@@ -222,7 +230,8 @@
 /*
  * List all thread groups that do not have a parent.
  */
-static JdwpError VM_TopLevelThreadGroups(JdwpState*, const uint8_t*, int, ExpandBuf* pReply) {
+static JdwpError VM_TopLevelThreadGroups(JdwpState*, const uint8_t*, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   /*
    * TODO: maintain a list of parentless thread groups in the VM.
    *
@@ -244,7 +253,8 @@
  *
  * All IDs are 8 bytes.
  */
-static JdwpError VM_IDSizes(JdwpState*, const uint8_t*, int, ExpandBuf* pReply) {
+static JdwpError VM_IDSizes(JdwpState*, const uint8_t*, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   expandBufAdd4BE(pReply, sizeof(FieldId));
   expandBufAdd4BE(pReply, sizeof(MethodId));
   expandBufAdd4BE(pReply, sizeof(ObjectId));
@@ -253,7 +263,8 @@
   return ERR_NONE;
 }
 
-static JdwpError VM_Dispose(JdwpState*, const uint8_t*, int, ExpandBuf*) {
+static JdwpError VM_Dispose(JdwpState*, const uint8_t*, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Dbg::Disposed();
   return ERR_NONE;
 }
@@ -264,7 +275,8 @@
  *
  * This needs to increment the "suspend count" on all threads.
  */
-static JdwpError VM_Suspend(JdwpState*, const uint8_t*, int, ExpandBuf*) {
+static JdwpError VM_Suspend(JdwpState*, const uint8_t*, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Dbg::SuspendVM();
   return ERR_NONE;
 }
@@ -272,7 +284,8 @@
 /*
  * Resume execution.  Decrements the "suspend count" of all threads.
  */
-static JdwpError VM_Resume(JdwpState*, const uint8_t*, int, ExpandBuf*) {
+static JdwpError VM_Resume(JdwpState*, const uint8_t*, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Dbg::ResumeVM();
   return ERR_NONE;
 }
@@ -280,7 +293,8 @@
 /*
  * The debugger wants the entire VM to exit.
  */
-static JdwpError VM_Exit(JdwpState*, const uint8_t* buf, int, ExpandBuf*) {
+static JdwpError VM_Exit(JdwpState*, const uint8_t* buf, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   uint32_t exitCode = Get4BE(buf);
 
   LOG(WARNING) << "Debugger is telling the VM to exit with code=" << exitCode;
@@ -295,7 +309,8 @@
  * (Ctrl-Shift-I in Eclipse on an array of objects causes it to create the
  * string "java.util.Arrays".)
  */
-static JdwpError VM_CreateString(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError VM_CreateString(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   std::string str(ReadNewUtf8String(&buf));
   VLOG(jdwp) << "  Req to create string '" << str << "'";
   ObjectId stringId = Dbg::CreateString(str);
@@ -309,7 +324,8 @@
 /*
  * Tell the debugger what we are capable of.
  */
-static JdwpError VM_Capabilities(JdwpState*, const uint8_t*, int, ExpandBuf* pReply) {
+static JdwpError VM_Capabilities(JdwpState*, const uint8_t*, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   expandBufAdd1(pReply, false);   /* canWatchFieldModification */
   expandBufAdd1(pReply, false);   /* canWatchFieldAccess */
   expandBufAdd1(pReply, false);   /* canGetBytecodes */
@@ -320,7 +336,8 @@
   return ERR_NONE;
 }
 
-static JdwpError VM_ClassPaths(JdwpState*, const uint8_t*, int, ExpandBuf* pReply) {
+static JdwpError VM_ClassPaths(JdwpState*, const uint8_t*, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   expandBufAddUtf8String(pReply, "/");
 
   std::vector<std::string> class_path;
@@ -345,14 +362,16 @@
  *
  * Currently does nothing.
  */
-static JdwpError VM_DisposeObjects(JdwpState*, const uint8_t*, int, ExpandBuf*) {
+static JdwpError VM_DisposeObjects(JdwpState*, const uint8_t*, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return ERR_NONE;
 }
 
 /*
  * Tell the debugger what we are capable of.
  */
-static JdwpError VM_CapabilitiesNew(JdwpState*, const uint8_t*, int, ExpandBuf* pReply) {
+static JdwpError VM_CapabilitiesNew(JdwpState*, const uint8_t*, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   expandBufAdd1(pReply, false);   /* canWatchFieldModification */
   expandBufAdd1(pReply, false);   /* canWatchFieldAccess */
   expandBufAdd1(pReply, false);   /* canGetBytecodes */
@@ -382,7 +401,8 @@
   return ERR_NONE;
 }
 
-static JdwpError VM_AllClassesImpl(ExpandBuf* pReply, bool descriptor_and_status, bool generic) {
+static JdwpError VM_AllClassesImpl(ExpandBuf* pReply, bool descriptor_and_status, bool generic)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   std::vector<JDWP::RefTypeId> classes;
   Dbg::GetClassList(classes);
 
@@ -412,15 +432,18 @@
   return ERR_NONE;
 }
 
-static JdwpError VM_AllClasses(JdwpState*, const uint8_t*, int, ExpandBuf* pReply) {
+static JdwpError VM_AllClasses(JdwpState*, const uint8_t*, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return VM_AllClassesImpl(pReply, true, false);
 }
 
-static JdwpError VM_AllClassesWithGeneric(JdwpState*, const uint8_t*, int, ExpandBuf* pReply) {
+static JdwpError VM_AllClassesWithGeneric(JdwpState*, const uint8_t*, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return VM_AllClassesImpl(pReply, true, true);
 }
 
-static JdwpError RT_Modifiers(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_Modifiers(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   return Dbg::GetModifiers(refTypeId, pReply);
 }
@@ -428,7 +451,8 @@
 /*
  * Get values from static fields in a reference type.
  */
-static JdwpError RT_GetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_GetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   uint32_t field_count = Read4BE(&buf);
   expandBufAdd4BE(pReply, field_count);
@@ -445,7 +469,8 @@
 /*
  * Get the name of the source file in which a reference type was declared.
  */
-static JdwpError RT_SourceFile(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_SourceFile(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   std::string source_file;
   JdwpError status = Dbg::GetSourceFile(refTypeId, source_file);
@@ -459,7 +484,8 @@
 /*
  * Return the current status of the reference type.
  */
-static JdwpError RT_Status(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_Status(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   JDWP::JdwpTypeTag type_tag;
   uint32_t class_status;
@@ -474,7 +500,8 @@
 /*
  * Return interfaces implemented directly by this class.
  */
-static JdwpError RT_Interfaces(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_Interfaces(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   VLOG(jdwp) << StringPrintf("  Req for interfaces in %#llx (%s)", refTypeId, Dbg::GetClassName(refTypeId).c_str());
   return Dbg::OutputDeclaredInterfaces(refTypeId, pReply);
@@ -483,7 +510,8 @@
 /*
  * Return the class object corresponding to this type.
  */
-static JdwpError RT_ClassObject(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_ClassObject(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   ObjectId classObjectId;
   JdwpError status = Dbg::GetClassObject(refTypeId, classObjectId);
@@ -500,12 +528,15 @@
  *
  * JDB seems interested, but DEX files don't currently support this.
  */
-static JdwpError RT_SourceDebugExtension(JdwpState*, const uint8_t*, int, ExpandBuf*) {
+static JdwpError RT_SourceDebugExtension(JdwpState*, const uint8_t*, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   /* referenceTypeId in, string out */
   return ERR_ABSENT_INFORMATION;
 }
 
-static JdwpError RT_Signature(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply, bool with_generic) {
+static JdwpError RT_Signature(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply,
+                              bool with_generic)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
 
   VLOG(jdwp) << StringPrintf("  Req for signature of refTypeId=%#llx", refTypeId);
@@ -522,11 +553,14 @@
   return ERR_NONE;
 }
 
-static JdwpError RT_Signature(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply) {
+static JdwpError RT_Signature(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return RT_Signature(state, buf, dataLen, pReply, false);
 }
 
-static JdwpError RT_SignatureWithGeneric(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply) {
+static JdwpError RT_SignatureWithGeneric(JdwpState* state, const uint8_t* buf, int dataLen,
+                                         ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return RT_Signature(state, buf, dataLen, pReply, true);
 }
 
@@ -534,12 +568,14 @@
  * Return the instance of java.lang.ClassLoader that loaded the specified
  * reference type, or null if it was loaded by the system loader.
  */
-static JdwpError RT_ClassLoader(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_ClassLoader(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   return Dbg::GetClassLoader(refTypeId, pReply);
 }
 
-static std::string Describe(const RefTypeId& refTypeId) {
+static std::string Describe(const RefTypeId& refTypeId)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   std::string signature("unknown");
   Dbg::GetSignature(refTypeId, signature);
   return StringPrintf("refTypeId=%#llx (%s)", refTypeId, signature.c_str());
@@ -549,14 +585,16 @@
  * Given a referenceTypeId, return a block of stuff that describes the
  * fields declared by a class.
  */
-static JdwpError RT_FieldsWithGeneric(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_FieldsWithGeneric(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   VLOG(jdwp) << "  Req for fields in " << Describe(refTypeId);
   return Dbg::OutputDeclaredFields(refTypeId, true, pReply);
 }
 
 // Obsolete equivalent of FieldsWithGeneric, without the generic type information.
-static JdwpError RT_Fields(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_Fields(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   VLOG(jdwp) << "  Req for fields in " << Describe(refTypeId);
   return Dbg::OutputDeclaredFields(refTypeId, false, pReply);
@@ -566,14 +604,16 @@
  * Given a referenceTypeID, return a block of goodies describing the
  * methods declared by a class.
  */
-static JdwpError RT_MethodsWithGeneric(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_MethodsWithGeneric(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   VLOG(jdwp) << "  Req for methods in " << Describe(refTypeId);
   return Dbg::OutputDeclaredMethods(refTypeId, true, pReply);
 }
 
 // Obsolete equivalent of MethodsWithGeneric, without the generic type information.
-static JdwpError RT_Methods(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError RT_Methods(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   VLOG(jdwp) << "  Req for methods in " << Describe(refTypeId);
   return Dbg::OutputDeclaredMethods(refTypeId, false, pReply);
@@ -582,7 +622,8 @@
 /*
  * Return the immediate superclass of a class.
  */
-static JdwpError CT_Superclass(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError CT_Superclass(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId class_id = ReadRefTypeId(&buf);
   RefTypeId superClassId;
   JdwpError status = Dbg::GetSuperclass(class_id, superClassId);
@@ -596,7 +637,8 @@
 /*
  * Set static class values.
  */
-static JdwpError CT_SetValues(JdwpState* , const uint8_t* buf, int, ExpandBuf*) {
+static JdwpError CT_SetValues(JdwpState* , const uint8_t* buf, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId class_id = ReadRefTypeId(&buf);
   uint32_t values = Read4BE(&buf);
 
@@ -624,7 +666,9 @@
  * Example: Eclipse sometimes uses java/lang/Class.forName(String s) on
  * values in the "variables" display.
  */
-static JdwpError CT_InvokeMethod(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply) {
+static JdwpError CT_InvokeMethod(JdwpState* state, const uint8_t* buf, int dataLen,
+                                 ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId class_id = ReadRefTypeId(&buf);
   ObjectId thread_id = ReadObjectId(&buf);
   MethodId method_id = ReadMethodId(&buf);
@@ -639,7 +683,9 @@
  * Example: in IntelliJ, create a watch on "new String(myByteArray)" to
  * see the contents of a byte[] as a string.
  */
-static JdwpError CT_NewInstance(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply) {
+static JdwpError CT_NewInstance(JdwpState* state, const uint8_t* buf, int dataLen,
+                                ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId class_id = ReadRefTypeId(&buf);
   ObjectId thread_id = ReadObjectId(&buf);
   MethodId method_id = ReadMethodId(&buf);
@@ -659,7 +705,8 @@
 /*
  * Create a new array object of the requested type and length.
  */
-static JdwpError AT_newInstance(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError AT_newInstance(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId arrayTypeId = ReadRefTypeId(&buf);
   uint32_t length = Read4BE(&buf);
 
@@ -680,7 +727,8 @@
 /*
  * Return line number information for the method, if present.
  */
-static JdwpError M_LineTable(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError M_LineTable(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId refTypeId = ReadRefTypeId(&buf);
   MethodId method_id = ReadMethodId(&buf);
 
@@ -691,11 +739,15 @@
   return ERR_NONE;
 }
 
-static JdwpError M_VariableTable(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply, bool generic) {
+static JdwpError M_VariableTable(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply,
+                                 bool generic)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId class_id = ReadRefTypeId(&buf);
   MethodId method_id = ReadMethodId(&buf);
 
-  VLOG(jdwp) << StringPrintf("  Req for LocalVarTab in class=%s method=%s", Dbg::GetClassName(class_id).c_str(), Dbg::GetMethodName(class_id, method_id).c_str());
+  VLOG(jdwp) << StringPrintf("  Req for LocalVarTab in class=%s method=%s",
+                             Dbg::GetClassName(class_id).c_str(),
+                             Dbg::GetMethodName(class_id, method_id).c_str());
 
   // We could return ERR_ABSENT_INFORMATION here if the DEX file was built without local variable
   // information. That will cause Eclipse to make a best-effort attempt at displaying local
@@ -705,11 +757,15 @@
   return ERR_NONE;
 }
 
-static JdwpError M_VariableTable(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply) {
+static JdwpError M_VariableTable(JdwpState* state, const uint8_t* buf, int dataLen,
+                                 ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return M_VariableTable(state, buf, dataLen, pReply, false);
 }
 
-static JdwpError M_VariableTableWithGeneric(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply) {
+static JdwpError M_VariableTableWithGeneric(JdwpState* state, const uint8_t* buf, int dataLen,
+                                            ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return M_VariableTable(state, buf, dataLen, pReply, true);
 }
 
@@ -720,7 +776,8 @@
  * This can get called on different things, e.g. thread_id gets
  * passed in here.
  */
-static JdwpError OR_ReferenceType(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError OR_ReferenceType(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId object_id = ReadObjectId(&buf);
   VLOG(jdwp) << StringPrintf("  Req for type of object_id=%#llx", object_id);
   return Dbg::GetReferenceType(object_id, pReply);
@@ -729,7 +786,8 @@
 /*
  * Get values from the fields of an object.
  */
-static JdwpError OR_GetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError OR_GetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId object_id = ReadObjectId(&buf);
   uint32_t field_count = Read4BE(&buf);
 
@@ -751,7 +809,8 @@
 /*
  * Set values in the fields of an object.
  */
-static JdwpError OR_SetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf*) {
+static JdwpError OR_SetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId object_id = ReadObjectId(&buf);
   uint32_t field_count = Read4BE(&buf);
 
@@ -785,7 +844,9 @@
  * object), it will try to invoke the object's toString() function.  This
  * feature becomes crucial when examining ArrayLists with Eclipse.
  */
-static JdwpError OR_InvokeMethod(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply) {
+static JdwpError OR_InvokeMethod(JdwpState* state, const uint8_t* buf, int dataLen,
+                                 ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId object_id = ReadObjectId(&buf);
   ObjectId thread_id = ReadObjectId(&buf);
   RefTypeId class_id = ReadRefTypeId(&buf);
@@ -797,7 +858,8 @@
 /*
  * Disable garbage collection of the specified object.
  */
-static JdwpError OR_DisableCollection(JdwpState*, const uint8_t*, int, ExpandBuf*) {
+static JdwpError OR_DisableCollection(JdwpState*, const uint8_t*, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // this is currently a no-op
   return ERR_NONE;
 }
@@ -805,7 +867,8 @@
 /*
  * Enable garbage collection of the specified object.
  */
-static JdwpError OR_EnableCollection(JdwpState*, const uint8_t*, int, ExpandBuf*) {
+static JdwpError OR_EnableCollection(JdwpState*, const uint8_t*, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // this is currently a no-op
   return ERR_NONE;
 }
@@ -813,7 +876,8 @@
 /*
  * Determine whether an object has been garbage collected.
  */
-static JdwpError OR_IsCollected(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError OR_IsCollected(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId object_id;
 
   object_id = ReadObjectId(&buf);
@@ -828,7 +892,8 @@
 /*
  * Return the string value in a string object.
  */
-static JdwpError SR_Value(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError SR_Value(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId stringObject = ReadObjectId(&buf);
   std::string str(Dbg::StringToUtf8(stringObject));
 
@@ -842,7 +907,8 @@
 /*
  * Return a thread's name.
  */
-static JdwpError TR_Name(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError TR_Name(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
 
   VLOG(jdwp) << StringPrintf("  Req for name of thread %#llx", thread_id);
@@ -862,7 +928,8 @@
  * It's supposed to remain suspended even if interpreted code wants to
  * resume it; only the JDI is allowed to resume it.
  */
-static JdwpError TR_Suspend(JdwpState*, const uint8_t* buf, int, ExpandBuf*) {
+static JdwpError TR_Suspend(JdwpState*, const uint8_t* buf, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
 
   if (thread_id == Dbg::GetThreadSelfId()) {
@@ -870,14 +937,18 @@
     return ERR_THREAD_NOT_SUSPENDED;
   }
   VLOG(jdwp) << StringPrintf("  Req to suspend thread %#llx", thread_id);
-  Dbg::SuspendThread(thread_id);
-  return ERR_NONE;
+  Thread* self = Thread::Current();
+  self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSend);
+  JdwpError result = Dbg::SuspendThread(thread_id);
+  self->TransitionFromSuspendedToRunnable();
+  return result;
 }
 
 /*
  * Resume the specified thread.
  */
-static JdwpError TR_Resume(JdwpState*, const uint8_t* buf, int, ExpandBuf*) {
+static JdwpError TR_Resume(JdwpState*, const uint8_t* buf, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
 
   if (thread_id == Dbg::GetThreadSelfId()) {
@@ -892,7 +963,8 @@
 /*
  * Return status of specified thread.
  */
-static JdwpError TR_Status(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError TR_Status(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
 
   VLOG(jdwp) << StringPrintf("  Req for status of thread %#llx", thread_id);
@@ -914,7 +986,8 @@
 /*
  * Return the thread group that the specified thread is a member of.
  */
-static JdwpError TR_ThreadGroup(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError TR_ThreadGroup(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
   return Dbg::GetThreadGroup(thread_id, pReply);
 }
@@ -925,7 +998,8 @@
  * If the thread isn't suspended, the error code isn't defined, but should
  * be THREAD_NOT_SUSPENDED.
  */
-static JdwpError TR_Frames(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError TR_Frames(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
   uint32_t start_frame = Read4BE(&buf);
   uint32_t length = Read4BE(&buf);
@@ -961,7 +1035,8 @@
 /*
  * Returns the #of frames on the specified thread, which must be suspended.
  */
-static JdwpError TR_FrameCount(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError TR_FrameCount(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
 
   if (!Dbg::ThreadExists(thread_id)) {
@@ -984,7 +1059,8 @@
 /*
  * Get the monitor that the thread is waiting on.
  */
-static JdwpError TR_CurrentContendedMonitor(JdwpState*, const uint8_t* buf, int, ExpandBuf*) {
+static JdwpError TR_CurrentContendedMonitor(JdwpState*, const uint8_t* buf, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ReadObjectId(&buf);  // thread_id
 
   // TODO: create an Object to represent the monitor (we're currently
@@ -994,14 +1070,15 @@
 }
 
 /*
- * Return the suspend count for the specified thread.
+ * Return the debug suspend count for the specified thread.
  *
  * (The thread *might* still be running -- it might not have examined
  * its suspend count recently.)
  */
-static JdwpError TR_SuspendCount(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError TR_DebugSuspendCount(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
-  return Dbg::GetThreadSuspendCount(thread_id, pReply);
+  return Dbg::GetThreadDebugSuspendCount(thread_id, pReply);
 }
 
 /*
@@ -1009,7 +1086,8 @@
  *
  * The Eclipse debugger recognizes "main" and "system" as special.
  */
-static JdwpError TGR_Name(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError TGR_Name(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_group_id = ReadObjectId(&buf);
   VLOG(jdwp) << StringPrintf("  Req for name of thread_group_id=%#llx", thread_group_id);
 
@@ -1022,7 +1100,8 @@
  * Returns the thread group -- if any -- that contains the specified
  * thread group.
  */
-static JdwpError TGR_Parent(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError TGR_Parent(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_group_id = ReadObjectId(&buf);
 
   ObjectId parentGroup = Dbg::GetThreadGroupParent(thread_group_id);
@@ -1035,7 +1114,8 @@
  * Return the active threads and thread groups that are part of the
  * specified thread group.
  */
-static JdwpError TGR_Children(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError TGR_Children(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_group_id = ReadObjectId(&buf);
   VLOG(jdwp) << StringPrintf("  Req for threads in thread_group_id=%#llx", thread_group_id);
 
@@ -1059,7 +1139,8 @@
 /*
  * Return the #of components in the array.
  */
-static JdwpError AR_Length(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError AR_Length(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId arrayId = ReadObjectId(&buf);
   VLOG(jdwp) << StringPrintf("  Req for length of array %#llx", arrayId);
 
@@ -1078,7 +1159,8 @@
 /*
  * Return the values from an array.
  */
-static JdwpError AR_GetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError AR_GetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId arrayId = ReadObjectId(&buf);
   uint32_t firstIndex = Read4BE(&buf);
   uint32_t length = Read4BE(&buf);
@@ -1090,17 +1172,20 @@
 /*
  * Set values in an array.
  */
-static JdwpError AR_SetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf*) {
+static JdwpError AR_SetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId arrayId = ReadObjectId(&buf);
   uint32_t firstIndex = Read4BE(&buf);
   uint32_t values = Read4BE(&buf);
 
-  VLOG(jdwp) << StringPrintf("  Req to set array values %#llx first=%d count=%d", arrayId, firstIndex, values);
+  VLOG(jdwp) << StringPrintf("  Req to set array values %#llx first=%d count=%d", arrayId,
+                             firstIndex, values);
 
   return Dbg::SetArrayElements(arrayId, firstIndex, values, buf);
 }
 
-static JdwpError CLR_VisibleClasses(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError CLR_VisibleClasses(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ReadObjectId(&buf);  // classLoaderObject
   // TODO: we should only return classes which have the given class loader as a defining or
   // initiating loader. The former would be easy; the latter is hard, because we don't have
@@ -1113,7 +1198,8 @@
  *
  * Reply with a requestID.
  */
-static JdwpError ER_Set(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply) {
+static JdwpError ER_Set(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   const uint8_t* origBuf = buf;
 
   uint8_t eventKind = Read1(&buf);
@@ -1282,7 +1368,8 @@
  * Clear an event.  Failure to find an event with a matching ID is a no-op
  * and does not return an error.
  */
-static JdwpError ER_Clear(JdwpState* state, const uint8_t* buf, int, ExpandBuf*) {
+static JdwpError ER_Clear(JdwpState* state, const uint8_t* buf, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   uint8_t eventKind;
   eventKind = Read1(&buf);
   uint32_t requestId = Read4BE(&buf);
@@ -1297,7 +1384,8 @@
 /*
  * Return the values of arguments and local variables.
  */
-static JdwpError SF_GetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError SF_GetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
   FrameId frame_id = ReadFrameId(&buf);
   uint32_t slots = Read4BE(&buf);
@@ -1322,7 +1410,8 @@
 /*
  * Set the values of arguments and local variables.
  */
-static JdwpError SF_SetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf*) {
+static JdwpError SF_SetValues(JdwpState*, const uint8_t* buf, int, ExpandBuf*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
   FrameId frame_id = ReadFrameId(&buf);
   uint32_t slots = Read4BE(&buf);
@@ -1345,7 +1434,8 @@
 /*
  * Returns the value of "this" for the specified frame.
  */
-static JdwpError SF_ThisObject(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError SF_ThisObject(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ObjectId thread_id = ReadObjectId(&buf);
   FrameId frame_id = ReadFrameId(&buf);
 
@@ -1361,7 +1451,8 @@
     return rc;
   }
 
-  VLOG(jdwp) << StringPrintf("  Req for 'this' in thread_id=%#llx frame=%lld --> %#llx '%c'", thread_id, frame_id, id, static_cast<char>(tag));
+  VLOG(jdwp) << StringPrintf("  Req for 'this' in thread_id=%#llx frame=%lld --> %#llx '%c'",
+                             thread_id, frame_id, id, static_cast<char>(tag));
   expandBufAdd1(pReply, tag);
   expandBufAddObjectId(pReply, id);
 
@@ -1375,16 +1466,19 @@
  * reused, whereas ClassIds can be recycled like any other object.  (Either
  * that, or I have no idea what this is for.)
  */
-static JdwpError COR_ReflectedType(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply) {
+static JdwpError COR_ReflectedType(JdwpState*, const uint8_t* buf, int, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   RefTypeId classObjectId = ReadRefTypeId(&buf);
-  VLOG(jdwp) << StringPrintf("  Req for refTypeId for class=%#llx (%s)", classObjectId, Dbg::GetClassName(classObjectId).c_str());
+  VLOG(jdwp) << StringPrintf("  Req for refTypeId for class=%#llx (%s)", classObjectId,
+                             Dbg::GetClassName(classObjectId).c_str());
   return Dbg::GetReflectedType(classObjectId, pReply);
 }
 
 /*
  * Handle a DDM packet with a single chunk in it.
  */
-static JdwpError DDM_Chunk(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply) {
+static JdwpError DDM_Chunk(JdwpState* state, const uint8_t* buf, int dataLen, ExpandBuf* pReply)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   uint8_t* replyBuf = NULL;
   int replyLen = -1;
 
@@ -1518,7 +1612,7 @@
   { 11,   9,  TR_CurrentContendedMonitor, "ThreadReference.CurrentContendedMonitor" },
   { 11,   10, NULL, "ThreadReference.Stop" },
   { 11,   11, NULL, "ThreadReference.Interrupt" },
-  { 11,   12, TR_SuspendCount,  "ThreadReference.SuspendCount" },
+  { 11,   12, TR_DebugSuspendCount, "ThreadReference.SuspendCount" },
   { 11,   13, NULL, "ThreadReference.OwnedMonitorsStackDepthInfo" },
   { 11,   14, NULL, "ThreadReference.ForceEarlyReturn" },
 
@@ -1612,7 +1706,8 @@
    * Tell the VM that we're running and shouldn't be interrupted by GC.
    * Do this after anything that can stall indefinitely.
    */
-  Dbg::ThreadRunning();
+  Thread* self = Thread::Current();
+  ThreadState old_state = self->TransitionFromSuspendedToRunnable();
 
   expandBufAddSpace(pReply, kJDWPHeaderLen);
 
@@ -1660,7 +1755,8 @@
   }
 
   /* tell the VM that GC is okay again */
-  Dbg::ThreadWaiting();
+  self->TransitionFromRunnableToSuspended(old_state);
+
 }
 
 }  // namespace JDWP
diff --git a/src/jdwp/jdwp_main.cc b/src/jdwp/jdwp_main.cc
index dfe83ff..beec7af 100644
--- a/src/jdwp/jdwp_main.cc
+++ b/src/jdwp/jdwp_main.cc
@@ -22,6 +22,7 @@
 #include "debugger.h"
 #include "jdwp/jdwp_priv.h"
 #include "logging.h"
+#include "scoped_thread_state_change.h"
 
 #include <stdlib.h>
 #include <unistd.h>
@@ -88,6 +89,8 @@
     : options_(options),
       thread_start_lock_("JDWP thread start lock"),
       thread_start_cond_("JDWP thread start condition variable"),
+      pthread_(0),
+      thread_(NULL),
       debug_thread_started_(false),
       debug_thread_id_(0),
       run(false),
@@ -115,6 +118,7 @@
  * the thread is accepting network connections.
  */
 JdwpState* JdwpState::Create(const JdwpOptions* options) {
+  GlobalSynchronization::mutator_lock_->AssertNotHeld();
   UniquePtr<JdwpState> state(new JdwpState(options));
   switch (options->transport) {
   case kJdwpTransportSocket:
@@ -139,53 +143,62 @@
    * Grab a mutex or two before starting the thread.  This ensures they
    * won't signal the cond var before we're waiting.
    */
-  state->thread_start_lock_.Lock();
-  const bool should_suspend = options->suspend;
-  if (should_suspend) {
-    state->attach_lock_.Lock();
-  }
+  {
+    MutexLock thread_start_locker(state->thread_start_lock_);
+    const bool should_suspend = options->suspend;
+    if (!should_suspend) {
+      /*
+       * We have bound to a port, or are trying to connect outbound to a
+       * debugger.  Create the JDWP thread and let it continue the mission.
+       */
+      CHECK_PTHREAD_CALL(pthread_create, (&state->pthread_, NULL, StartJdwpThread, state.get()), "JDWP thread");
 
-  /*
-   * We have bound to a port, or are trying to connect outbound to a
-   * debugger.  Create the JDWP thread and let it continue the mission.
-   */
-  CHECK_PTHREAD_CALL(pthread_create, (&state->pthread_, NULL, StartJdwpThread, state.get()), "JDWP thread");
+      /*
+       * Wait until the thread finishes basic initialization.
+       * TODO: cond vars should be waited upon in a loop
+       */
+      state->thread_start_cond_.Wait(state->thread_start_lock_);
+    } else {
+      {
+        MutexLock attach_locker(state->attach_lock_);
+        /*
+         * We have bound to a port, or are trying to connect outbound to a
+         * debugger.  Create the JDWP thread and let it continue the mission.
+         */
+        CHECK_PTHREAD_CALL(pthread_create, (&state->pthread_, NULL, StartJdwpThread, state.get()), "JDWP thread");
 
-  /*
-   * Wait until the thread finishes basic initialization.
-   * TODO: cond vars should be waited upon in a loop
-   */
-  state->thread_start_cond_.Wait(state->thread_start_lock_);
-  state->thread_start_lock_.Unlock();
+        /*
+         * Wait until the thread finishes basic initialization.
+         * TODO: cond vars should be waited upon in a loop
+         */
+        state->thread_start_cond_.Wait(state->thread_start_lock_);
 
-  /*
-   * For suspend=y, wait for the debugger to connect to us or for us to
-   * connect to the debugger.
-   *
-   * The JDWP thread will signal us when it connects successfully or
-   * times out (for timeout=xxx), so we have to check to see what happened
-   * when we wake up.
-   */
-  if (should_suspend) {
-    {
-      ScopedThreadStateChange tsc(Thread::Current(), kVmWait);
+        /*
+         * For suspend=y, wait for the debugger to connect to us or for us to
+         * connect to the debugger.
+         *
+         * The JDWP thread will signal us when it connects successfully or
+         * times out (for timeout=xxx), so we have to check to see what happened
+         * when we wake up.
+         */
+        {
+          ScopedThreadStateChange tsc(Thread::Current(), kWaitingForDebuggerToAttach);
+          state->attach_cond_.Wait(state->attach_lock_);
+        }
+      }
+      if (!state->IsActive()) {
+        LOG(ERROR) << "JDWP connection failed";
+        return NULL;
+      }
 
-      state->attach_cond_.Wait(state->attach_lock_);
-      state->attach_lock_.Unlock();
+      LOG(INFO) << "JDWP connected";
+
+      /*
+       * Ordinarily we would pause briefly to allow the debugger to set
+       * breakpoints and so on, but for "suspend=y" the VM init code will
+       * pause the VM when it sends the VM_START message.
+       */
     }
-
-    if (!state->IsActive()) {
-      LOG(ERROR) << "JDWP connection failed";
-      return NULL;
-    }
-
-    LOG(INFO) << "JDWP connected";
-
-    /*
-     * Ordinarily we would pause briefly to allow the debugger to set
-     * breakpoints and so on, but for "suspend=y" the VM init code will
-     * pause the VM when it sends the VM_START message.
-     */
   }
 
   return state.release();
@@ -280,14 +293,18 @@
    */
   thread_ = Thread::Current();
   run = true;
-  android_atomic_release_store(true, &debug_thread_started_);
 
   thread_start_lock_.Lock();
+  debug_thread_started_ = true;
   thread_start_cond_.Broadcast();
   thread_start_lock_.Unlock();
 
-  /* set the thread state to VMWAIT so GCs don't wait for us */
-  Dbg::ThreadWaiting();
+  /* set the thread state to kWaitingInMainDebuggerLoop so GCs don't wait for us */
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_EQ(thread_->GetState(), kNative);
+    thread_->SetState(kWaitingInMainDebuggerLoop);
+  }
 
   /*
    * Loop forever if we're in server mode, processing connections.  In
@@ -327,10 +344,10 @@
     /* process requests until the debugger drops */
     bool first = true;
     while (!Dbg::IsDisposed()) {
-      // sanity check -- shouldn't happen?
-      if (Thread::Current()->GetState() != kVmWait) {
-        LOG(ERROR) << "JDWP thread no longer in VMWAIT (now " << Thread::Current()->GetState() << "); resetting";
-        Dbg::ThreadWaiting();
+      {
+        // sanity check -- shouldn't happen?
+        MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+        CHECK_EQ(thread_->GetState(), kWaitingInMainDebuggerLoop);
       }
 
       if (!(*transport_->processIncoming)(this)) {
@@ -343,7 +360,10 @@
         first = false;
 
         /* set thread ID; requires object registry to be active */
-        debug_thread_id_ = Dbg::GetThreadSelfId();
+        {
+          ScopedObjectAccess soa(thread_);
+          debug_thread_id_ = Dbg::GetThreadSelfId();
+        }
 
         /* wake anybody who's waiting for us */
         MutexLock mu(attach_lock_);
@@ -357,14 +377,16 @@
       ddm_is_active_ = false;
 
       /* broadcast the disconnect; must be in RUNNING state */
-      Dbg::ThreadRunning();
+      thread_->TransitionFromSuspendedToRunnable();
       Dbg::DdmDisconnected();
-      Dbg::ThreadWaiting();
+      thread_->TransitionFromRunnableToSuspended(kWaitingInMainDebuggerLoop);
     }
 
     /* release session state, e.g. remove breakpoint instructions */
-    ResetState();
-
+    {
+      ScopedObjectAccess soa(thread_);
+      ResetState();
+    }
     /* tell the interpreter that the debugger is no longer around */
     Dbg::Disconnected();
 
@@ -377,8 +399,12 @@
     }
   }
 
-  /* back to running, for thread shutdown */
-  Dbg::ThreadRunning();
+  /* back to native, for thread shutdown */
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_EQ(thread_->GetState(), kWaitingInMainDebuggerLoop);
+    thread_->SetState(kNative);
+  }
 
   VLOG(jdwp) << "JDWP: thread detaching and exiting...";
   runtime->DetachCurrentThread();
diff --git a/src/jni_compiler_test.cc b/src/jni_compiler_test.cc
index 22b4b2c..e1332d3 100644
--- a/src/jni_compiler_test.cc
+++ b/src/jni_compiler_test.cc
@@ -22,7 +22,8 @@
 #include "jni_internal.h"
 #include "mem_map.h"
 #include "runtime.h"
-#include "scoped_jni_thread_state.h"
+#include "ScopedLocalRef.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "UniquePtr.h"
 
@@ -38,10 +39,11 @@
 
 class JniCompilerTest : public CommonTest {
  protected:
-  void CompileForTest(ClassLoader* class_loader, bool direct,
+  void CompileForTest(jobject class_loader, bool direct,
                       const char* method_name, const char* method_sig) {
+    ScopedObjectAccess soa(Thread::Current());
     // Compile the native method before starting the runtime
-    Class* c = class_linker_->FindClass("LMyClassNatives;", class_loader);
+    Class* c = class_linker_->FindClass("LMyClassNatives;", soa.Decode<ClassLoader*>(class_loader));
     Method* method;
     if (direct) {
       method = c->FindDirectMethod(method_name, method_sig);
@@ -56,15 +58,20 @@
     ASSERT_TRUE(method->GetCode() != NULL);
   }
 
-  void SetUpForTest(ClassLoader* class_loader, bool direct,
-                    const char* method_name, const char* method_sig,
+  void SetUpForTest(bool direct, const char* method_name, const char* method_sig,
                     void* native_fnptr) {
-    CompileForTest(class_loader, direct, method_name, method_sig);
-    if (!runtime_->IsStarted()) {
+    // Initialize class loader and compile method when runtime not started.
+    if (!runtime_->IsStarted()){
+      {
+        ScopedObjectAccess soa(Thread::Current());
+        class_loader_ = LoadDex("MyClassNatives");
+      }
+      CompileForTest(class_loader_, direct, method_name, method_sig);
+      // Start runtime.
+      Thread::Current()->TransitionFromSuspendedToRunnable();
       runtime_->Start();
     }
-
-    // JNI operations after runtime start
+    // JNI operations after runtime start.
     env_ = Thread::Current()->GetJniEnv();
     jklass_ = env_->FindClass("MyClassNatives");
     ASSERT_TRUE(jklass_ != NULL);
@@ -91,6 +98,8 @@
  public:
   static jclass jklass_;
   static jobject jobj_;
+  static jobject class_loader_;
+
 
  protected:
   JNIEnv* env_;
@@ -99,12 +108,17 @@
 
 jclass JniCompilerTest::jklass_;
 jobject JniCompilerTest::jobj_;
+jobject JniCompilerTest::class_loader_;
 
 int gJava_MyClassNatives_foo_calls = 0;
 void Java_MyClassNatives_foo(JNIEnv* env, jobject thisObj) {
-  // 2 = SirtRef<ClassLoader> + thisObj
-  EXPECT_EQ(2U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+  // 1 = thisObj
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+    GlobalSynchronization::mutator_lock_->AssertNotHeld();
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
@@ -112,8 +126,7 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunNoArgMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "foo", "()V",
+  SetUpForTest(false, "foo", "()V",
                reinterpret_cast<void*>(&Java_MyClassNatives_foo));
 
   EXPECT_EQ(0, gJava_MyClassNatives_foo_calls);
@@ -124,26 +137,28 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunIntMethodThroughStub) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "bar", "(I)I",
+  SetUpForTest(false, "bar", "(I)I",
                NULL /* calling through stub will link with &Java_MyClassNatives_bar */);
 
+  ScopedObjectAccess soa(Thread::Current());
   std::string reason;
-  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", class_loader.get(), reason))
-      << reason;
+  ASSERT_TRUE(
+      Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", soa.Decode<ClassLoader*>(class_loader_),
+                                                         reason)) << reason;
 
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24);
   EXPECT_EQ(25, result);
 }
 
 TEST_F(JniCompilerTest, CompileAndRunStaticIntMethodThroughStub) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), true, "sbar", "(I)I",
+  SetUpForTest(true, "sbar", "(I)I",
                NULL /* calling through stub will link with &Java_MyClassNatives_sbar */);
 
+  ScopedObjectAccess soa(Thread::Current());
   std::string reason;
-  ASSERT_TRUE(Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", class_loader.get(), reason))
-      << reason;
+  ASSERT_TRUE(
+      Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", soa.Decode<ClassLoader*>(class_loader_),
+                                                         reason)) << reason;
 
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42);
   EXPECT_EQ(43, result);
@@ -151,9 +166,12 @@
 
 int gJava_MyClassNatives_fooI_calls = 0;
 jint Java_MyClassNatives_fooI(JNIEnv* env, jobject thisObj, jint x) {
-  // 2 = SirtRef<ClassLoader> + thisObj
-  EXPECT_EQ(2U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+  // 1 = thisObj
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
@@ -162,8 +180,7 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunIntMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "fooI", "(I)I",
+  SetUpForTest(false, "fooI", "(I)I",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooI));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooI_calls);
@@ -177,9 +194,12 @@
 
 int gJava_MyClassNatives_fooII_calls = 0;
 jint Java_MyClassNatives_fooII(JNIEnv* env, jobject thisObj, jint x, jint y) {
-  // 2 = SirtRef<ClassLoader> + thisObj
-  EXPECT_EQ(2U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+  // 1 = thisObj
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
@@ -188,8 +208,7 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunIntIntMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "fooII", "(II)I",
+  SetUpForTest(false, "fooII", "(II)I",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooII));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooII_calls);
@@ -204,9 +223,12 @@
 
 int gJava_MyClassNatives_fooJJ_calls = 0;
 jlong Java_MyClassNatives_fooJJ(JNIEnv* env, jobject thisObj, jlong x, jlong y) {
-  // 2 = SirtRef<ClassLoader> + thisObj
-  EXPECT_EQ(2U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+  // 1 = thisObj
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
@@ -215,8 +237,7 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunLongLongMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "fooJJ", "(JJ)J",
+  SetUpForTest(false, "fooJJ", "(JJ)J",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooJJ));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooJJ_calls);
@@ -232,9 +253,12 @@
 
 int gJava_MyClassNatives_fooDD_calls = 0;
 jdouble Java_MyClassNatives_fooDD(JNIEnv* env, jobject thisObj, jdouble x, jdouble y) {
-  // 2 = SirtRef<ClassLoader> + thisObj
-  EXPECT_EQ(2U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+  // 1 = thisObj
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
@@ -243,8 +267,7 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunDoubleDoubleMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "fooDD", "(DD)D",
+  SetUpForTest(false, "fooDD", "(DD)D",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooDD));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooDD_calls);
@@ -261,9 +284,12 @@
 
 int gJava_MyClassNatives_fooJJ_synchronized_calls = 0;
 jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv* env, jobject thisObj, jlong x, jlong y) {
-  // 2 = SirtRef<ClassLoader> + thisObj
-  EXPECT_EQ(2U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+  // 1 = thisObj
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
@@ -272,8 +298,7 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRun_fooJJ_synchronized) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "fooJJ_synchronized", "(JJ)J",
+  SetUpForTest(false, "fooJJ_synchronized", "(JJ)J",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooJJ_synchronized));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooJJ_synchronized_calls);
@@ -287,9 +312,12 @@
 int gJava_MyClassNatives_fooIOO_calls = 0;
 jobject Java_MyClassNatives_fooIOO(JNIEnv* env, jobject thisObj, jint x, jobject y,
                             jobject z) {
-  // 4 = SirtRef<ClassLoader> + this + y + z
-  EXPECT_EQ(4U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+  // 3 = this + y + z
+  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
@@ -305,8 +333,7 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunIntObjectObjectMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "fooIOO",
+  SetUpForTest(false, "fooIOO",
                "(ILjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooIOO));
 
@@ -338,9 +365,12 @@
 
 int gJava_MyClassNatives_fooSII_calls = 0;
 jint Java_MyClassNatives_fooSII(JNIEnv* env, jclass klass, jint x, jint y) {
-  // 2 = SirtRef<ClassLoader> + klass
-  EXPECT_EQ(2U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+  // 1 = klass
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
@@ -349,8 +379,7 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunStaticIntIntMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), true, "fooSII", "(II)I",
+  SetUpForTest(true, "fooSII", "(II)I",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooSII));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooSII_calls);
@@ -361,9 +390,12 @@
 
 int gJava_MyClassNatives_fooSDD_calls = 0;
 jdouble Java_MyClassNatives_fooSDD(JNIEnv* env, jclass klass, jdouble x, jdouble y) {
-  // 2 = SirtRef<ClassLoader> + klass
-  EXPECT_EQ(2U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+  // 1 = klass
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
@@ -372,8 +404,7 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunStaticDoubleDoubleMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), true, "fooSDD", "(DD)D",
+  SetUpForTest(true, "fooSDD", "(DD)D",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooSDD));
 
   EXPECT_EQ(0, gJava_MyClassNatives_fooSDD_calls);
@@ -390,9 +421,12 @@
 int gJava_MyClassNatives_fooSIOO_calls = 0;
 jobject Java_MyClassNatives_fooSIOO(JNIEnv* env, jclass klass, jint x, jobject y,
                              jobject z) {
-  // 4 = SirtRef<ClassLoader> + klass + y + z
-  EXPECT_EQ(4U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+  // 3 = klass + y + z
+  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
@@ -409,8 +443,7 @@
 
 
 TEST_F(JniCompilerTest, CompileAndRunStaticIntObjectObjectMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), true, "fooSIOO",
+  SetUpForTest(true, "fooSIOO",
                "(ILjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooSIOO));
 
@@ -441,11 +474,13 @@
 }
 
 int gJava_MyClassNatives_fooSSIOO_calls = 0;
-jobject Java_MyClassNatives_fooSSIOO(JNIEnv* env, jclass klass, jint x, jobject y,
-                             jobject z) {
-  // 4 = SirtRef<ClassLoader> + klass + y + z
-  EXPECT_EQ(4U, Thread::Current()->NumStackReferences());
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
+jobject Java_MyClassNatives_fooSSIOO(JNIEnv* env, jclass klass, jint x, jobject y, jobject z) {
+  // 3 = klass + y + z
+  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  }
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
@@ -461,8 +496,7 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunStaticSynchronizedIntObjectObjectMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), true, "fooSSIOO",
+  SetUpForTest(true, "fooSSIOO",
                "(ILjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooSSIOO));
 
@@ -498,34 +532,42 @@
 }
 
 TEST_F(JniCompilerTest, ExceptionHandling) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
+  {
+    ASSERT_FALSE(runtime_->IsStarted());
+    ScopedObjectAccess soa(Thread::Current());
+    class_loader_ = LoadDex("MyClassNatives");
 
-  // all compilation needs to happen before SetUpForTest calls Runtime::Start
-  CompileForTest(class_loader.get(), false, "foo", "()V");
-  CompileForTest(class_loader.get(), false, "throwException", "()V");
-  CompileForTest(class_loader.get(), false, "foo", "()V");
+    // all compilation needs to happen before Runtime::Start
+    CompileForTest(class_loader_, false, "foo", "()V");
+    CompileForTest(class_loader_, false, "throwException", "()V");
+    CompileForTest(class_loader_, false, "foo", "()V");
+  }
+  // Start runtime to avoid re-initialization in SetupForTest.
+  Thread::Current()->TransitionFromSuspendedToRunnable();
+  runtime_->Start();
 
   gJava_MyClassNatives_foo_calls = 0;
 
   // Check a single call of a JNI method is ok
-  SetUpForTest(class_loader.get(), false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo));
+  SetUpForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo));
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
   EXPECT_EQ(1, gJava_MyClassNatives_foo_calls);
   EXPECT_FALSE(Thread::Current()->IsExceptionPending());
 
   // Get class for exception we expect to be thrown
-  Class* jlre = class_linker_->FindClass("Ljava/lang/RuntimeException;", class_loader.get());
-  SetUpForTest(class_loader.get(), false, "throwException", "()V",
+  ScopedLocalRef<jclass> jlre(env_, env_->FindClass("java/lang/RuntimeException"));
+  SetUpForTest(false, "throwException", "()V",
                reinterpret_cast<void*>(&Java_MyClassNatives_throwException));
   // Call Java_MyClassNatives_throwException (JNI method that throws exception)
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
   EXPECT_EQ(1, gJava_MyClassNatives_foo_calls);
-  EXPECT_TRUE(Thread::Current()->IsExceptionPending());
-  EXPECT_TRUE(Thread::Current()->GetException()->InstanceOf(jlre));
-  Thread::Current()->ClearException();
+  EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE);
+  ScopedLocalRef<jthrowable> exception(env_, env_->ExceptionOccurred());
+  env_->ExceptionClear();
+  EXPECT_TRUE(env_->IsInstanceOf(exception.get(), jlre.get()));
 
   // Check a single call of a JNI method is ok
-  SetUpForTest(class_loader.get(), false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo));
+  SetUpForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo));
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
   EXPECT_EQ(2, gJava_MyClassNatives_foo_calls);
 }
@@ -533,13 +575,13 @@
 jint Java_MyClassNatives_nativeUpCall(JNIEnv* env, jobject thisObj, jint i) {
   if (i <= 0) {
     // We want to check raw Object*/Array* below
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
 
     // Build stack trace
-    jobject internal = Thread::Current()->CreateInternalStackTrace(ts);
+    jobject internal = Thread::Current()->CreateInternalStackTrace(soa);
     jobjectArray ste_array = Thread::InternalStackTraceToStackTraceElementArray(env, internal);
     ObjectArray<StackTraceElement>* trace_array =
-        ts.Decode<ObjectArray<StackTraceElement>*>(ste_array);
+        soa.Decode<ObjectArray<StackTraceElement>*>(ste_array);
     EXPECT_TRUE(trace_array != NULL);
     EXPECT_EQ(11, trace_array->GetLength());
 
@@ -569,8 +611,7 @@
 }
 
 TEST_F(JniCompilerTest, NativeStackTraceElement) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "fooI", "(I)I",
+  SetUpForTest(false, "fooI", "(I)I",
                reinterpret_cast<void*>(&Java_MyClassNatives_nativeUpCall));
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 10);
   EXPECT_EQ(10+9+8+7+6+5+4+3+2+1, result);
@@ -581,8 +622,7 @@
 }
 
 TEST_F(JniCompilerTest, ReturnGlobalRef) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "fooO", "(Ljava/lang/Object;)Ljava/lang/Object;",
+  SetUpForTest(false, "fooO", "(Ljava/lang/Object;)Ljava/lang/Object;",
                reinterpret_cast<void*>(&Java_MyClassNatives_fooO));
   jobject result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, jobj_);
   EXPECT_EQ(JNILocalRefType, env_->GetObjectRefType(result));
@@ -591,16 +631,15 @@
 
 jint local_ref_test(JNIEnv* env, jobject thisObj, jint x) {
   // Add 10 local references
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
   for (int i = 0; i < 10; i++) {
-    ts.AddLocalReference<jobject>(ts.Decode<Object*>(thisObj));
+    soa.AddLocalReference<jobject>(soa.Decode<Object*>(thisObj));
   }
   return x+1;
 }
 
 TEST_F(JniCompilerTest, LocalReferenceTableClearingTest) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "fooI", "(I)I", reinterpret_cast<void*>(&local_ref_test));
+  SetUpForTest(false, "fooI", "(I)I", reinterpret_cast<void*>(&local_ref_test));
   // 1000 invocations of a method that adds 10 local references
   for (int i = 0; i < 1000; i++) {
     jint result = env_->CallIntMethod(jobj_, jmethod_, i);
@@ -618,8 +657,7 @@
 }
 
 TEST_F(JniCompilerTest, JavaLangSystemArrayCopy) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), true, "arraycopy", "(Ljava/lang/Object;ILjava/lang/Object;II)V",
+  SetUpForTest(true, "arraycopy", "(Ljava/lang/Object;ILjava/lang/Object;II)V",
                reinterpret_cast<void*>(&my_arraycopy));
   env_->CallStaticVoidMethod(jklass_, jmethod_, jobj_, 1234, jklass_, 5678, 9876);
 }
@@ -634,8 +672,7 @@
 }
 
 TEST_F(JniCompilerTest, CompareAndSwapInt) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "compareAndSwapInt", "(Ljava/lang/Object;JII)Z",
+  SetUpForTest(false, "compareAndSwapInt", "(Ljava/lang/Object;JII)Z",
                reinterpret_cast<void*>(&my_casi));
   jboolean result = env_->CallBooleanMethod(jobj_, jmethod_, jobj_, 0x12345678ABCDEF88ll, 0xCAFEF00D, 0xEBADF00D);
   EXPECT_EQ(result, JNI_TRUE);
@@ -651,8 +688,7 @@
 }
 
 TEST_F(JniCompilerTest, GetText) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), true, "getText", "(JLjava/lang/Object;JLjava/lang/Object;)I",
+  SetUpForTest(true, "getText", "(JLjava/lang/Object;JLjava/lang/Object;)I",
                reinterpret_cast<void*>(&my_gettext));
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 0x12345678ABCDEF88ll, jobj_,
                                           0x7FEDCBA987654321ll, jobj_);
@@ -670,8 +706,7 @@
 }
 
 TEST_F(JniCompilerTest, UpcallReturnTypeChecking_Instance) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "instanceMethodThatShouldReturnClass", "()Ljava/lang/Class;",
+  SetUpForTest(false, "instanceMethodThatShouldReturnClass", "()Ljava/lang/Class;",
                reinterpret_cast<void*>(&Java_MyClassNatives_instanceMethodThatShouldReturnClass));
 
   CheckJniAbortCatcher check_jni_abort_catcher;
@@ -688,8 +723,7 @@
 }
 
 TEST_F(JniCompilerTest, UpcallReturnTypeChecking_Static) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), true, "staticMethodThatShouldReturnClass", "()Ljava/lang/Class;",
+  SetUpForTest(true, "staticMethodThatShouldReturnClass", "()Ljava/lang/Class;",
                reinterpret_cast<void*>(&Java_MyClassNatives_staticMethodThatShouldReturnClass));
 
   CheckJniAbortCatcher check_jni_abort_catcher;
@@ -714,8 +748,7 @@
 }
 
 TEST_F(JniCompilerTest, UpcallArgumentTypeChecking_Instance) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), false, "instanceMethodThatShouldTakeClass", "(ILjava/lang/Class;)V",
+  SetUpForTest(false, "instanceMethodThatShouldTakeClass", "(ILjava/lang/Class;)V",
                reinterpret_cast<void*>(&Java_MyClassNatives_instanceMethodThatShouldTakeClass));
 
   CheckJniAbortCatcher check_jni_abort_catcher;
@@ -725,8 +758,7 @@
 }
 
 TEST_F(JniCompilerTest, UpcallArgumentTypeChecking_Static) {
-  SirtRef<ClassLoader> class_loader(LoadDex("MyClassNatives"));
-  SetUpForTest(class_loader.get(), true, "staticMethodThatShouldTakeClass", "(ILjava/lang/Class;)V",
+  SetUpForTest(true, "staticMethodThatShouldTakeClass", "(ILjava/lang/Class;)V",
                reinterpret_cast<void*>(&Java_MyClassNatives_staticMethodThatShouldTakeClass));
 
   CheckJniAbortCatcher check_jni_abort_catcher;
diff --git a/src/jni_internal.cc b/src/jni_internal.cc
index 74b740a..dbdc149 100644
--- a/src/jni_internal.cc
+++ b/src/jni_internal.cc
@@ -26,11 +26,12 @@
 #include "class_loader.h"
 #include "jni.h"
 #include "logging.h"
+#include "mutex.h"
 #include "object.h"
 #include "object_utils.h"
 #include "runtime.h"
 #include "safe_map.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "stl_util.h"
 #include "stringpiece.h"
@@ -55,7 +56,8 @@
 static const size_t kWeakGlobalsInitial = 16; // Arbitrary.
 static const size_t kWeakGlobalsMax = 51200; // Arbitrary sanity check.
 
-void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods, size_t method_count) {
+void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods,
+                           size_t method_count) {
   ScopedLocalRef<jclass> c(env, env->FindClass(jni_class_name));
   if (c.get() == NULL) {
     LOG(FATAL) << "Couldn't find class: " << jni_class_name;
@@ -91,7 +93,7 @@
 
 class ArgArray {
  public:
-  explicit ArgArray(Method* method) {
+  explicit ArgArray(Method* method) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     MethodHelper mh(method);
     shorty_ = mh.GetShorty();
     shorty_len_ = mh.GetShortyLength();
@@ -107,7 +109,8 @@
     return arg_array_;
   }
 
-  void BuildArgArray(const ScopedJniThreadState& ts, va_list ap) {
+  void BuildArgArray(const ScopedObjectAccess& soa, va_list ap)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     for (size_t i = 1, offset = 0; i < shorty_len_; ++i, ++offset) {
       switch (shorty_[i]) {
         case 'Z':
@@ -129,7 +132,7 @@
           arg_array_[offset].SetF(va_arg(ap, jdouble));
           break;
         case 'L':
-          arg_array_[offset].SetL(ts.Decode<Object*>(va_arg(ap, jobject)));
+          arg_array_[offset].SetL(soa.Decode<Object*>(va_arg(ap, jobject)));
           break;
         case 'D':
           arg_array_[offset].SetD(va_arg(ap, jdouble));
@@ -141,7 +144,8 @@
     }
   }
 
-  void BuildArgArray(const ScopedJniThreadState& ts, jvalue* args) {
+  void BuildArgArray(const ScopedObjectAccess& soa, jvalue* args)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     for (size_t i = 1, offset = 0; i < shorty_len_; ++i, ++offset) {
       switch (shorty_[i]) {
         case 'Z':
@@ -163,7 +167,7 @@
           arg_array_[offset].SetF(args[offset].f);
           break;
         case 'L':
-          arg_array_[offset].SetL(ts.Decode<Object*>(args[offset].l));
+          arg_array_[offset].SetL(soa.Decode<Object*>(args[offset].l));
           break;
         case 'D':
           arg_array_[offset].SetD(args[offset].d);
@@ -184,18 +188,20 @@
   UniquePtr<JValue[]> large_arg_array_;
 };
 
-static jweak AddWeakGlobalReference(ScopedJniThreadState& ts, Object* obj) {
+static jweak AddWeakGlobalReference(ScopedObjectAccess& soa, Object* obj)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (obj == NULL) {
     return NULL;
   }
-  JavaVMExt* vm = ts.Vm();
+  JavaVMExt* vm = soa.Vm();
   IndirectReferenceTable& weak_globals = vm->weak_globals;
   MutexLock mu(vm->weak_globals_lock);
   IndirectRef ref = weak_globals.Add(IRT_FIRST_SEGMENT, obj);
   return reinterpret_cast<jweak>(ref);
 }
 
-static void CheckMethodArguments(Method* m, JValue* args) {
+static void CheckMethodArguments(Method* m, JValue* args)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   MethodHelper mh(m);
   ObjectArray<Class>* parameter_types = mh.GetParameterTypes();
   CHECK(parameter_types != NULL);
@@ -219,45 +225,50 @@
   }
 }
 
-static JValue InvokeWithArgArray(const ScopedJniThreadState& ts, Object* receiver, Method* method,
-                                 JValue* args) {
-  if (UNLIKELY(ts.Env()->check_jni)) {
+static JValue InvokeWithArgArray(const ScopedObjectAccess& soa, Object* receiver,
+                                 Method* method, JValue* args)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  if (UNLIKELY(soa.Env()->check_jni)) {
     CheckMethodArguments(method, args);
   }
   JValue result;
-  method->Invoke(ts.Self(), receiver, args, &result);
+  method->Invoke(soa.Self(), receiver, args, &result);
   return result;
 }
 
-static JValue InvokeWithVarArgs(const ScopedJniThreadState& ts, jobject obj, jmethodID mid,
-                                va_list args) {
-  Object* receiver = ts.Decode<Object*>(obj);
-  Method* method = ts.DecodeMethod(mid);
+static JValue InvokeWithVarArgs(const ScopedObjectAccess& soa, jobject obj,
+                                jmethodID mid, va_list args)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  Object* receiver = soa.Decode<Object*>(obj);
+  Method* method = soa.DecodeMethod(mid);
   ArgArray arg_array(method);
-  arg_array.BuildArgArray(ts, args);
-  return InvokeWithArgArray(ts, receiver, method, arg_array.get());
+  arg_array.BuildArgArray(soa, args);
+  return InvokeWithArgArray(soa, receiver, method, arg_array.get());
 }
 
-static Method* FindVirtualMethod(Object* receiver, Method* method) {
+static Method* FindVirtualMethod(Object* receiver, Method* method)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(method);
 }
 
-static JValue InvokeVirtualOrInterfaceWithJValues(const ScopedJniThreadState& ts, jobject obj,
-                                                  jmethodID mid, jvalue* args) {
-  Object* receiver = ts.Decode<Object*>(obj);
-  Method* method = FindVirtualMethod(receiver, ts.DecodeMethod(mid));
+static JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccess& soa,
+                                                  jobject obj, jmethodID mid, jvalue* args)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  Object* receiver = soa.Decode<Object*>(obj);
+  Method* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   ArgArray arg_array(method);
-  arg_array.BuildArgArray(ts, args);
-  return InvokeWithArgArray(ts, receiver, method, arg_array.get());
+  arg_array.BuildArgArray(soa, args);
+  return InvokeWithArgArray(soa, receiver, method, arg_array.get());
 }
 
-static JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedJniThreadState& ts, jobject obj,
-                                                  jmethodID mid, va_list args) {
-  Object* receiver = ts.Decode<Object*>(obj);
-  Method* method = FindVirtualMethod(receiver, ts.DecodeMethod(mid));
+static JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccess& soa,
+                                                  jobject obj, jmethodID mid, va_list args)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  Object* receiver = soa.Decode<Object*>(obj);
+  Method* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   ArgArray arg_array(method);
-  arg_array.BuildArgArray(ts, args);
-  return InvokeWithArgArray(ts, receiver, method, arg_array.get());
+  arg_array.BuildArgArray(soa, args);
+  return InvokeWithArgArray(soa, receiver, method, arg_array.get());
 }
 
 // Section 12.3.2 of the JNI spec describes JNI class descriptors. They're
@@ -284,13 +295,17 @@
   return result;
 }
 
-static void ThrowNoSuchMethodError(ScopedJniThreadState& ts, Class* c, const char* name, const char* sig, const char* kind) {
-  ts.Self()->ThrowNewExceptionF("Ljava/lang/NoSuchMethodError;",
+static void ThrowNoSuchMethodError(ScopedObjectAccess& soa, Class* c,
+                                   const char* name, const char* sig, const char* kind)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  soa.Self()->ThrowNewExceptionF("Ljava/lang/NoSuchMethodError;",
       "no %s method \"%s.%s%s\"", kind, ClassHelper(c).GetDescriptor(), name, sig);
 }
 
-static jmethodID FindMethodID(ScopedJniThreadState& ts, jclass jni_class, const char* name, const char* sig, bool is_static) {
-  Class* c = ts.Decode<Class*>(jni_class);
+static jmethodID FindMethodID(ScopedObjectAccess& soa, jclass jni_class,
+                              const char* name, const char* sig, bool is_static)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  Class* c = soa.Decode<Class*>(jni_class);
   if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(c, true, true)) {
     return NULL;
   }
@@ -308,14 +323,15 @@
   }
 
   if (method == NULL || method->IsStatic() != is_static) {
-    ThrowNoSuchMethodError(ts, c, name, sig, is_static ? "static" : "non-static");
+    ThrowNoSuchMethodError(soa, c, name, sig, is_static ? "static" : "non-static");
     return NULL;
   }
 
-  return ts.EncodeMethod(method);
+  return soa.EncodeMethod(method);
 }
 
-static ClassLoader* GetClassLoader(Thread* self) {
+static ClassLoader* GetClassLoader(Thread* self)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Method* method = self->GetCurrentMethod();
   if (method == NULL || PrettyMethod(method, false) == "java.lang.Runtime.nativeLoad") {
     return self->GetClassLoaderOverride();
@@ -323,9 +339,10 @@
   return method->GetDeclaringClass()->GetClassLoader();
 }
 
-static jfieldID FindFieldID(const ScopedJniThreadState& ts, jclass jni_class, const char* name,
-                            const char* sig, bool is_static) {
-  Class* c = ts.Decode<Class*>(jni_class);
+static jfieldID FindFieldID(const ScopedObjectAccess& soa, jclass jni_class, const char* name,
+                            const char* sig, bool is_static)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  Class* c = soa.Decode<Class*>(jni_class);
   if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(c, true, true)) {
     return NULL;
   }
@@ -334,16 +351,16 @@
   Class* field_type;
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   if (sig[1] != '\0') {
-    ClassLoader* cl = GetClassLoader(ts.Self());
+    ClassLoader* cl = GetClassLoader(soa.Self());
     field_type = class_linker->FindClass(sig, cl);
   } else {
     field_type = class_linker->FindPrimitiveClass(*sig);
   }
   if (field_type == NULL) {
     // Failed to find type from the signature of the field.
-    DCHECK(ts.Self()->IsExceptionPending());
-    ts.Self()->ClearException();
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/NoSuchFieldError;",
+    DCHECK(soa.Self()->IsExceptionPending());
+    soa.Self()->ClearException();
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/NoSuchFieldError;",
         "no type \"%s\" found and so no field \"%s\" could be found in class "
         "\"%s\" or its superclasses", sig, name, ClassHelper(c).GetDescriptor());
     return NULL;
@@ -354,125 +371,85 @@
     field = c->FindInstanceField(name, ClassHelper(field_type).GetDescriptor());
   }
   if (field == NULL) {
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/NoSuchFieldError;",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/NoSuchFieldError;",
         "no \"%s\" field \"%s\" in class \"%s\" or its superclasses", sig,
         name, ClassHelper(c).GetDescriptor());
     return NULL;
   }
-  return ts.EncodeField(field);
+  return soa.EncodeField(field);
 }
 
-static void PinPrimitiveArray(const ScopedJniThreadState& ts, const Array* array) {
-  JavaVMExt* vm = ts.Vm();
+static void PinPrimitiveArray(const ScopedObjectAccess& soa, const Array* array)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  JavaVMExt* vm = soa.Vm();
   MutexLock mu(vm->pins_lock);
   vm->pin_table.Add(array);
 }
 
-static void UnpinPrimitiveArray(const ScopedJniThreadState& ts, const Array* array) {
-  JavaVMExt* vm = ts.Vm();
+static void UnpinPrimitiveArray(const ScopedObjectAccess& soa, const Array* array)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  JavaVMExt* vm = soa.Vm();
   MutexLock mu(vm->pins_lock);
   vm->pin_table.Remove(array);
 }
 
-template<typename JniT, typename ArtT>
-static JniT NewPrimitiveArray(const ScopedJniThreadState& ts, jsize length) {
-  CHECK_GE(length, 0); // TODO: ReportJniError
-  ArtT* result = ArtT::Alloc(length);
-  return ts.AddLocalReference<JniT>(result);
-}
-
-template <typename ArrayT, typename CArrayT, typename ArtArrayT>
-static CArrayT GetPrimitiveArray(ScopedJniThreadState& ts, ArrayT java_array, jboolean* is_copy) {
-  ArtArrayT* array = ts.Decode<ArtArrayT*>(java_array);
-  PinPrimitiveArray(ts, array);
-  if (is_copy != NULL) {
-    *is_copy = JNI_FALSE;
-  }
-  return array->GetData();
-}
-
-template <typename ArrayT>
-static void ReleasePrimitiveArray(ScopedJniThreadState& ts, ArrayT java_array, jint mode) {
-  if (mode != JNI_COMMIT) {
-    Array* array = ts.Decode<Array*>(java_array);
-    UnpinPrimitiveArray(ts, array);
-  }
-}
-
-static void ThrowAIOOBE(ScopedJniThreadState& ts, Array* array, jsize start, jsize length, const char* identifier) {
+static void ThrowAIOOBE(ScopedObjectAccess& soa, Array* array, jsize start,
+                        jsize length, const char* identifier)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   std::string type(PrettyTypeOf(array));
-  ts.Self()->ThrowNewExceptionF("Ljava/lang/ArrayIndexOutOfBoundsException;",
+  soa.Self()->ThrowNewExceptionF("Ljava/lang/ArrayIndexOutOfBoundsException;",
       "%s offset=%d length=%d %s.length=%d",
       type.c_str(), start, length, identifier, array->GetLength());
 }
 
-static void ThrowSIOOBE(ScopedJniThreadState& ts, jsize start, jsize length, jsize array_length) {
-  ts.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
+static void ThrowSIOOBE(ScopedObjectAccess& soa, jsize start, jsize length,
+                        jsize array_length)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
       "offset=%d length=%d string.length()=%d", start, length, array_length);
 }
 
-template <typename JavaArrayT, typename JavaT, typename ArrayT>
-static void GetPrimitiveArrayRegion(ScopedJniThreadState& ts, JavaArrayT java_array, jsize start, jsize length, JavaT* buf) {
-  ArrayT* array = ts.Decode<ArrayT*>(java_array);
-  if (start < 0 || length < 0 || start + length > array->GetLength()) {
-    ThrowAIOOBE(ts, array, start, length, "src");
-  } else {
-    JavaT* data = array->GetData();
-    memcpy(buf, data + start, length * sizeof(JavaT));
+int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause)
+    LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
+  ScopedObjectAccess soa(env);
+
+  // Turn the const char* into a java.lang.String.
+  ScopedLocalRef<jstring> s(env, env->NewStringUTF(msg));
+  if (msg != NULL && s.get() == NULL) {
+    return JNI_ERR;
   }
-}
 
-template <typename JavaArrayT, typename JavaT, typename ArrayT>
-static void SetPrimitiveArrayRegion(ScopedJniThreadState& ts, JavaArrayT java_array, jsize start, jsize length, const JavaT* buf) {
-  ArrayT* array = ts.Decode<ArrayT*>(java_array);
-  if (start < 0 || length < 0 || start + length > array->GetLength()) {
-    ThrowAIOOBE(ts, array, start, length, "dst");
+  // Choose an appropriate constructor and set up the arguments.
+  jvalue args[2];
+  const char* signature;
+  if (msg == NULL && cause == NULL) {
+    signature = "()V";
+  } else if (msg != NULL && cause == NULL) {
+    signature = "(Ljava/lang/String;)V";
+    args[0].l = s.get();
+  } else if (msg == NULL && cause != NULL) {
+    signature = "(Ljava/lang/Throwable;)V";
+    args[0].l = cause;
   } else {
-    JavaT* data = array->GetData();
-    memcpy(data + start, buf, length * sizeof(JavaT));
+    signature = "(Ljava/lang/String;Ljava/lang/Throwable;)V";
+    args[0].l = s.get();
+    args[1].l = cause;
   }
-}
+  jmethodID mid = env->GetMethodID(exception_class, "<init>", signature);
+  if (mid == NULL) {
+    LOG(ERROR) << "No <init>" << signature << " in "
+        << PrettyClass(soa.Decode<Class*>(exception_class));
+    return JNI_ERR;
+  }
 
-int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause) {
-    ScopedJniThreadState ts(env);
+  ScopedLocalRef<jthrowable> exception(env, reinterpret_cast<jthrowable>(env->NewObjectA(exception_class, mid, args)));
+  if (exception.get() == NULL) {
+    return JNI_ERR;
+  }
 
-    // Turn the const char* into a java.lang.String.
-    ScopedLocalRef<jstring> s(env, env->NewStringUTF(msg));
-    if (msg != NULL && s.get() == NULL) {
-      return JNI_ERR;
-    }
+  soa.Self()->SetException(soa.Decode<Throwable*>(exception.get()));
 
-    // Choose an appropriate constructor and set up the arguments.
-    jvalue args[2];
-    const char* signature;
-    if (msg == NULL && cause == NULL) {
-      signature = "()V";
-    } else if (msg != NULL && cause == NULL) {
-      signature = "(Ljava/lang/String;)V";
-      args[0].l = s.get();
-    } else if (msg == NULL && cause != NULL) {
-      signature = "(Ljava/lang/Throwable;)V";
-      args[0].l = cause;
-    } else {
-      signature = "(Ljava/lang/String;Ljava/lang/Throwable;)V";
-      args[0].l = s.get();
-      args[1].l = cause;
-    }
-    jmethodID mid = env->GetMethodID(exception_class, "<init>", signature);
-    if (mid == NULL) {
-      LOG(ERROR) << "No <init>" << signature << " in "
-          << PrettyClass(ts.Decode<Class*>(exception_class));
-      return JNI_ERR;
-    }
-
-    ScopedLocalRef<jthrowable> exception(env, reinterpret_cast<jthrowable>(env->NewObjectA(exception_class, mid, args)));
-    if (exception.get() == NULL) {
-      return JNI_ERR;
-    }
-
-    ts.Self()->SetException(ts.Decode<Throwable*>(exception.get()));
-
-    return JNI_OK;
+  return JNI_OK;
 }
 
 static jint JII_AttachCurrentThread(JavaVM* vm, JNIEnv** p_env, void* raw_args, bool as_daemon) {
@@ -533,32 +510,36 @@
    * Check the result of an earlier call to JNI_OnLoad on this library.
    * If the call has not yet finished in another thread, wait for it.
    */
-  bool CheckOnLoadResult() {
-    MutexLock mu(jni_on_load_lock_);
-
+  bool CheckOnLoadResult()
+      LOCKS_EXCLUDED(jni_on_load_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Thread* self = Thread::Current();
-    if (jni_on_load_thread_id_ == self->GetThinLockId()) {
-      // Check this so we don't end up waiting for ourselves.  We need
-      // to return "true" so the caller can continue.
-      LOG(INFO) << *self << " recursive attempt to load library "
-                << "\"" << path_ << "\"";
-      return true;
-    }
+    self->TransitionFromRunnableToSuspended(kWaitingForJniOnLoad);
+    bool okay;
+    {
+      MutexLock mu(jni_on_load_lock_);
 
-    while (jni_on_load_result_ == kPending) {
-      VLOG(jni) << "[" << *self << " waiting for \"" << path_ << "\" "
-                << "JNI_OnLoad...]";
-      ScopedThreadStateChange tsc(self, kVmWait);
-      jni_on_load_cond_.Wait(jni_on_load_lock_);
-    }
+      if (jni_on_load_thread_id_ == self->GetThinLockId()) {
+        // Check this so we don't end up waiting for ourselves.  We need to return "true" so the
+        // caller can continue.
+        LOG(INFO) << *self << " recursive attempt to load library " << "\"" << path_ << "\"";
+        okay = true;
+      } else {
+        while (jni_on_load_result_ == kPending) {
+          VLOG(jni) << "[" << *self << " waiting for \"" << path_ << "\" " << "JNI_OnLoad...]";
+          jni_on_load_cond_.Wait(jni_on_load_lock_);
+        }
 
-    bool okay = (jni_on_load_result_ == kOkay);
-    VLOG(jni) << "[Earlier JNI_OnLoad for \"" << path_ << "\" "
-              << (okay ? "succeeded" : "failed") << "]";
+        okay = (jni_on_load_result_ == kOkay);
+        VLOG(jni) << "[Earlier JNI_OnLoad for \"" << path_ << "\" "
+            << (okay ? "succeeded" : "failed") << "]";
+      }
+    }
+    self->TransitionFromSuspendedToRunnable();
     return okay;
   }
 
-  void SetResult(bool result) {
+  void SetResult(bool result) LOCKS_EXCLUDED(jni_on_load_lock_) {
     MutexLock mu(jni_on_load_lock_);
 
     jni_on_load_result_ = result ? kOkay : kFailed;
@@ -589,7 +570,7 @@
   Object* class_loader_;
 
   // Guards remaining items.
-  Mutex jni_on_load_lock_;
+  Mutex jni_on_load_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // Wait for JNI_OnLoad in other thread.
   ConditionVariable jni_on_load_cond_;
   // Recursive invocation guard.
@@ -633,7 +614,8 @@
   }
 
   // See section 11.3 "Linking Native Methods" of the JNI spec.
-  void* FindNativeMethod(const Method* m, std::string& detail) {
+  void* FindNativeMethod(const Method* m, std::string& detail)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     std::string jni_short_name(JniShortName(m));
     std::string jni_long_name(JniLongName(m));
     const ClassLoader* declaring_class_loader = m->GetDeclaringClass()->GetClassLoader();
@@ -667,109 +649,112 @@
   SafeMap<std::string, SharedLibrary*> libraries_;
 };
 
-JValue InvokeWithJValues(const ScopedJniThreadState& ts, jobject obj, jmethodID mid, jvalue* args) {
-  Object* receiver = ts.Decode<Object*>(obj);
-  Method* method = ts.DecodeMethod(mid);
+JValue InvokeWithJValues(const ScopedObjectAccess& soa, jobject obj, jmethodID mid,
+                         jvalue* args) {
+  Object* receiver = soa.Decode<Object*>(obj);
+  Method* method = soa.DecodeMethod(mid);
   ArgArray arg_array(method);
-  arg_array.BuildArgArray(ts, args);
-  return InvokeWithArgArray(ts, receiver, method, arg_array.get());
+  arg_array.BuildArgArray(soa, args);
+  return InvokeWithArgArray(soa, receiver, method, arg_array.get());
 }
 
-JValue InvokeWithJValues(const ScopedJniThreadState& ts, Object* receiver, Method* m, JValue* args) {
-  return InvokeWithArgArray(ts, receiver, m, args);
+JValue InvokeWithJValues(const ScopedObjectAccess& soa, Object* receiver, Method* m,
+                         JValue* args)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  return InvokeWithArgArray(soa, receiver, m, args);
 }
 
 class JNI {
  public:
   static jint GetVersion(JNIEnv* env) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     return JNI_VERSION_1_6;
   }
 
   static jclass DefineClass(JNIEnv* env, const char*, jobject, const jbyte*, jsize) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     LOG(WARNING) << "JNI DefineClass is not supported";
     return NULL;
   }
 
   static jclass FindClass(JNIEnv* env, const char* name) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     Runtime* runtime = Runtime::Current();
     ClassLinker* class_linker = runtime->GetClassLinker();
     std::string descriptor(NormalizeJniClassDescriptor(name));
     Class* c = NULL;
     if (runtime->IsStarted()) {
-      ClassLoader* cl = GetClassLoader(ts.Self());
+      ClassLoader* cl = GetClassLoader(soa.Self());
       c = class_linker->FindClass(descriptor.c_str(), cl);
     } else {
       c = class_linker->FindSystemClass(descriptor.c_str());
     }
-    return ts.AddLocalReference<jclass>(c);
+    return soa.AddLocalReference<jclass>(c);
   }
 
   static jmethodID FromReflectedMethod(JNIEnv* env, jobject java_method) {
-    ScopedJniThreadState ts(env);
-    Method* method = ts.Decode<Method*>(java_method);
-    return ts.EncodeMethod(method);
+    ScopedObjectAccess soa(env);
+    Method* method = soa.Decode<Method*>(java_method);
+    return soa.EncodeMethod(method);
   }
 
   static jfieldID FromReflectedField(JNIEnv* env, jobject java_field) {
-    ScopedJniThreadState ts(env);
-    Field* field = ts.Decode<Field*>(java_field);
-    return ts.EncodeField(field);
+    ScopedObjectAccess soa(env);
+    Field* field = soa.Decode<Field*>(java_field);
+    return soa.EncodeField(field);
   }
 
   static jobject ToReflectedMethod(JNIEnv* env, jclass, jmethodID mid, jboolean) {
-    ScopedJniThreadState ts(env);
-    Method* method = ts.DecodeMethod(mid);
-    return ts.AddLocalReference<jobject>(method);
+    ScopedObjectAccess soa(env);
+    Method* method = soa.DecodeMethod(mid);
+    return soa.AddLocalReference<jobject>(method);
   }
 
   static jobject ToReflectedField(JNIEnv* env, jclass, jfieldID fid, jboolean) {
-    ScopedJniThreadState ts(env);
-    Field* field = ts.DecodeField(fid);
-    return ts.AddLocalReference<jobject>(field);
+    ScopedObjectAccess soa(env);
+    Field* field = soa.DecodeField(fid);
+    return soa.AddLocalReference<jobject>(field);
   }
 
   static jclass GetObjectClass(JNIEnv* env, jobject java_object) {
-    ScopedJniThreadState ts(env);
-    Object* o = ts.Decode<Object*>(java_object);
-    return ts.AddLocalReference<jclass>(o->GetClass());
+    ScopedObjectAccess soa(env);
+    Object* o = soa.Decode<Object*>(java_object);
+    return soa.AddLocalReference<jclass>(o->GetClass());
   }
 
   static jclass GetSuperclass(JNIEnv* env, jclass java_class) {
-    ScopedJniThreadState ts(env);
-    Class* c = ts.Decode<Class*>(java_class);
-    return ts.AddLocalReference<jclass>(c->GetSuperClass());
+    ScopedObjectAccess soa(env);
+    Class* c = soa.Decode<Class*>(java_class);
+    return soa.AddLocalReference<jclass>(c->GetSuperClass());
   }
 
   static jboolean IsAssignableFrom(JNIEnv* env, jclass java_class1, jclass java_class2) {
-    ScopedJniThreadState ts(env);
-    Class* c1 = ts.Decode<Class*>(java_class1);
-    Class* c2 = ts.Decode<Class*>(java_class2);
+    ScopedObjectAccess soa(env);
+    Class* c1 = soa.Decode<Class*>(java_class1);
+    Class* c2 = soa.Decode<Class*>(java_class2);
     return c1->IsAssignableFrom(c2) ? JNI_TRUE : JNI_FALSE;
   }
 
   static jboolean IsInstanceOf(JNIEnv* env, jobject jobj, jclass java_class) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     CHECK_NE(static_cast<jclass>(NULL), java_class); // TODO: ReportJniError
     if (jobj == NULL) {
       // Note: JNI is different from regular Java instanceof in this respect
       return JNI_TRUE;
     } else {
-      Object* obj = ts.Decode<Object*>(jobj);
-      Class* c = ts.Decode<Class*>(java_class);
+      Object* obj = soa.Decode<Object*>(jobj);
+      Class* c = soa.Decode<Class*>(java_class);
       return obj->InstanceOf(c) ? JNI_TRUE : JNI_FALSE;
     }
   }
 
   static jint Throw(JNIEnv* env, jthrowable java_exception) {
-    ScopedJniThreadState ts(env);
-    Throwable* exception = ts.Decode<Throwable*>(java_exception);
+    ScopedObjectAccess soa(env);
+    Throwable* exception = soa.Decode<Throwable*>(java_exception);
     if (exception == NULL) {
       return JNI_ERR;
     }
-    ts.Self()->SetException(exception);
+    soa.Self()->SetException(exception);
     return JNI_OK;
   }
 
@@ -778,23 +763,23 @@
   }
 
   static jboolean ExceptionCheck(JNIEnv* env) {
-    ScopedJniThreadState ts(env);
-    return ts.Self()->IsExceptionPending() ? JNI_TRUE : JNI_FALSE;
+    ScopedObjectAccess soa(env);
+    return soa.Self()->IsExceptionPending() ? JNI_TRUE : JNI_FALSE;
   }
 
   static void ExceptionClear(JNIEnv* env) {
-    ScopedJniThreadState ts(env);
-    ts.Self()->ClearException();
+    ScopedObjectAccess soa(env);
+    soa.Self()->ClearException();
   }
 
   static void ExceptionDescribe(JNIEnv* env) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
 
-    Thread* self = ts.Self();
+    Thread* self = soa.Self();
     Throwable* original_exception = self->GetException();
     self->ClearException();
 
-    ScopedLocalRef<jthrowable> exception(env, ts.AddLocalReference<jthrowable>(original_exception));
+    ScopedLocalRef<jthrowable> exception(env, soa.AddLocalReference<jthrowable>(original_exception));
     ScopedLocalRef<jclass> exception_class(env, env->GetObjectClass(exception.get()));
     jmethodID mid = env->GetMethodID(exception_class.get(), "printStackTrace", "()V");
     if (mid == NULL) {
@@ -813,72 +798,58 @@
   }
 
   static jthrowable ExceptionOccurred(JNIEnv* env) {
-    ScopedJniThreadState ts(env);
-    Object* exception = ts.Self()->GetException();
-    return ts.AddLocalReference<jthrowable>(exception);
+    ScopedObjectAccess soa(env);
+    Object* exception = soa.Self()->GetException();
+    return soa.AddLocalReference<jthrowable>(exception);
   }
 
   static void FatalError(JNIEnv* env, const char* msg) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     LOG(FATAL) << "JNI FatalError called: " << msg;
   }
 
   static jint PushLocalFrame(JNIEnv* env, jint capacity) {
-    ScopedJniThreadState ts(env);
-    if (EnsureLocalCapacity(ts, capacity, "PushLocalFrame") != JNI_OK) {
+    ScopedObjectAccess soa(env);
+    if (EnsureLocalCapacity(soa, capacity, "PushLocalFrame") != JNI_OK) {
       return JNI_ERR;
     }
-    ts.Env()->PushFrame(capacity);
+    soa.Env()->PushFrame(capacity);
     return JNI_OK;
   }
 
   static jobject PopLocalFrame(JNIEnv* env, jobject java_survivor) {
-    ScopedJniThreadState ts(env);
-    Object* survivor = ts.Decode<Object*>(java_survivor);
-    ts.Env()->PopFrame();
-    return ts.AddLocalReference<jobject>(survivor);
+    ScopedObjectAccess soa(env);
+    Object* survivor = soa.Decode<Object*>(java_survivor);
+    soa.Env()->PopFrame();
+    return soa.AddLocalReference<jobject>(survivor);
   }
 
   static jint EnsureLocalCapacity(JNIEnv* env, jint desired_capacity) {
-    ScopedJniThreadState ts(env);
-    return EnsureLocalCapacity(ts, desired_capacity, "EnsureLocalCapacity");
-  }
-
-  static jint EnsureLocalCapacity(const ScopedJniThreadState& ts, jint desired_capacity, const char* caller) {
-    // TODO: we should try to expand the table if necessary.
-    if (desired_capacity < 1 || desired_capacity > static_cast<jint>(kLocalsMax)) {
-      LOG(ERROR) << "Invalid capacity given to " << caller << ": " << desired_capacity;
-      return JNI_ERR;
-    }
-    // TODO: this isn't quite right, since "capacity" includes holes.
-    size_t capacity = ts.Env()->locals.Capacity();
-    bool okay = (static_cast<jint>(kLocalsMax - capacity) >= desired_capacity);
-    if (!okay) {
-      ts.Self()->ThrowOutOfMemoryError(caller);
-    }
-    return okay ? JNI_OK : JNI_ERR;
+    ScopedObjectAccess soa(env);
+    return EnsureLocalCapacity(soa, desired_capacity, "EnsureLocalCapacity");
   }
 
   static jobject NewGlobalRef(JNIEnv* env, jobject obj) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     if (obj == NULL) {
       return NULL;
     }
 
-    JavaVMExt* vm = ts.Vm();
+    JavaVMExt* vm = soa.Vm();
     IndirectReferenceTable& globals = vm->globals;
+    Object* decoded_obj = soa.Decode<Object*>(obj);
     MutexLock mu(vm->globals_lock);
-    IndirectRef ref = globals.Add(IRT_FIRST_SEGMENT, ts.Decode<Object*>(obj));
+    IndirectRef ref = globals.Add(IRT_FIRST_SEGMENT, decoded_obj);
     return reinterpret_cast<jobject>(ref);
   }
 
   static void DeleteGlobalRef(JNIEnv* env, jobject obj) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     if (obj == NULL) {
       return;
     }
 
-    JavaVMExt* vm = ts.Vm();
+    JavaVMExt* vm = soa.Vm();
     IndirectReferenceTable& globals = vm->globals;
     MutexLock mu(vm->globals_lock);
 
@@ -889,17 +860,17 @@
   }
 
   static jweak NewWeakGlobalRef(JNIEnv* env, jobject obj) {
-    ScopedJniThreadState ts(env);
-    return AddWeakGlobalReference(ts, ts.Decode<Object*>(obj));
+    ScopedObjectAccess soa(env);
+    return AddWeakGlobalReference(soa, soa.Decode<Object*>(obj));
   }
 
   static void DeleteWeakGlobalRef(JNIEnv* env, jweak obj) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     if (obj == NULL) {
       return;
     }
 
-    JavaVMExt* vm = ts.Vm();
+    JavaVMExt* vm = soa.Vm();
     IndirectReferenceTable& weak_globals = vm->weak_globals;
     MutexLock mu(vm->weak_globals_lock);
 
@@ -910,27 +881,27 @@
   }
 
   static jobject NewLocalRef(JNIEnv* env, jobject obj) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     if (obj == NULL) {
       return NULL;
     }
 
-    IndirectReferenceTable& locals = ts.Env()->locals;
+    IndirectReferenceTable& locals = soa.Env()->locals;
 
-    uint32_t cookie = ts.Env()->local_ref_cookie;
-    IndirectRef ref = locals.Add(cookie, ts.Decode<Object*>(obj));
+    uint32_t cookie = soa.Env()->local_ref_cookie;
+    IndirectRef ref = locals.Add(cookie, soa.Decode<Object*>(obj));
     return reinterpret_cast<jobject>(ref);
   }
 
   static void DeleteLocalRef(JNIEnv* env, jobject obj) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     if (obj == NULL) {
       return;
     }
 
-    IndirectReferenceTable& locals = ts.Env()->locals;
+    IndirectReferenceTable& locals = soa.Env()->locals;
 
-    uint32_t cookie = ts.Env()->local_ref_cookie;
+    uint32_t cookie = soa.Env()->local_ref_cookie;
     if (!locals.Remove(cookie, obj)) {
       // Attempting to delete a local reference that is not in the
       // topmost local reference frame is a no-op.  DeleteLocalRef returns
@@ -943,22 +914,22 @@
   }
 
   static jboolean IsSameObject(JNIEnv* env, jobject obj1, jobject obj2) {
-    ScopedJniThreadState ts(env);
-    return (ts.Decode<Object*>(obj1) == ts.Decode<Object*>(obj2))
+    ScopedObjectAccess soa(env);
+    return (soa.Decode<Object*>(obj1) == soa.Decode<Object*>(obj2))
         ? JNI_TRUE : JNI_FALSE;
   }
 
   static jobject AllocObject(JNIEnv* env, jclass java_class) {
-    ScopedJniThreadState ts(env);
-    Class* c = ts.Decode<Class*>(java_class);
+    ScopedObjectAccess soa(env);
+    Class* c = soa.Decode<Class*>(java_class);
     if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(c, true, true)) {
       return NULL;
     }
-    return ts.AddLocalReference<jobject>(c->AllocObject());
+    return soa.AddLocalReference<jobject>(c->AllocObject());
   }
 
   static jobject NewObject(JNIEnv* env, jclass c, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list args;
     va_start(args, mid);
     jobject result = NewObjectV(env, c, mid, args);
@@ -967,8 +938,8 @@
   }
 
   static jobject NewObjectV(JNIEnv* env, jclass java_class, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    Class* c = ts.Decode<Class*>(java_class);
+    ScopedObjectAccess soa(env);
+    Class* c = soa.Decode<Class*>(java_class);
     if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(c, true, true)) {
       return NULL;
     }
@@ -976,9 +947,9 @@
     if (result == NULL) {
       return NULL;
     }
-    jobject local_result = ts.AddLocalReference<jobject>(result);
+    jobject local_result = soa.AddLocalReference<jobject>(result);
     CallNonvirtualVoidMethodV(env, local_result, java_class, mid, args);
-    if (!ts.Self()->IsExceptionPending()) {
+    if (!soa.Self()->IsExceptionPending()) {
       return local_result;
     } else {
       return NULL;
@@ -986,8 +957,8 @@
   }
 
   static jobject NewObjectA(JNIEnv* env, jclass java_class, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    Class* c = ts.Decode<Class*>(java_class);
+    ScopedObjectAccess soa(env);
+    Class* c = soa.Decode<Class*>(java_class);
     if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(c, true, true)) {
       return NULL;
     }
@@ -995,9 +966,9 @@
     if (result == NULL) {
       return NULL;
     }
-    jobject local_result = ts.AddLocalReference<jobjectArray>(result);
+    jobject local_result = soa.AddLocalReference<jobjectArray>(result);
     CallNonvirtualVoidMethodA(env, local_result, java_class, mid, args);
-    if (!ts.Self()->IsExceptionPending()) {
+    if (!soa.Self()->IsExceptionPending()) {
       return local_result;
     } else {
       return NULL;
@@ -1005,468 +976,468 @@
   }
 
   static jmethodID GetMethodID(JNIEnv* env, jclass c, const char* name, const char* sig) {
-    ScopedJniThreadState ts(env);
-    return FindMethodID(ts, c, name, sig, false);
+    ScopedObjectAccess soa(env);
+    return FindMethodID(soa, c, name, sig, false);
   }
 
   static jmethodID GetStaticMethodID(JNIEnv* env, jclass c, const char* name, const char* sig) {
-    ScopedJniThreadState ts(env);
-    return FindMethodID(ts, c, name, sig, true);
+    ScopedObjectAccess soa(env);
+    return FindMethodID(soa, c, name, sig, true);
   }
 
   static jobject CallObjectMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
-    return ts.AddLocalReference<jobject>(result.GetL());
+    return soa.AddLocalReference<jobject>(result.GetL());
   }
 
   static jobject CallObjectMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, args));
-    return ts.AddLocalReference<jobject>(result.GetL());
+    ScopedObjectAccess soa(env);
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args));
+    return soa.AddLocalReference<jobject>(result.GetL());
   }
 
   static jobject CallObjectMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    JValue result(InvokeVirtualOrInterfaceWithJValues(ts, obj, mid, args));
-    return ts.AddLocalReference<jobject>(result.GetL());
+    ScopedObjectAccess soa(env);
+    JValue result(InvokeVirtualOrInterfaceWithJValues(soa, obj, mid, args));
+    return soa.AddLocalReference<jobject>(result.GetL());
   }
 
   static jboolean CallBooleanMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetZ();
   }
 
   static jboolean CallBooleanMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, args).GetZ();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetZ();
   }
 
   static jboolean CallBooleanMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithJValues(ts, obj, mid, args).GetZ();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithJValues(soa, obj, mid, args).GetZ();
   }
 
   static jbyte CallByteMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetB();
   }
 
   static jbyte CallByteMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, args).GetB();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetB();
   }
 
   static jbyte CallByteMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithJValues(ts, obj, mid, args).GetB();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithJValues(soa, obj, mid, args).GetB();
   }
 
   static jchar CallCharMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetC();
   }
 
   static jchar CallCharMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, args).GetC();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetC();
   }
 
   static jchar CallCharMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithJValues(ts, obj, mid, args).GetC();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithJValues(soa, obj, mid, args).GetC();
   }
 
   static jdouble CallDoubleMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetD();
   }
 
   static jdouble CallDoubleMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, args).GetD();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetD();
   }
 
   static jdouble CallDoubleMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithJValues(ts, obj, mid, args).GetD();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithJValues(soa, obj, mid, args).GetD();
   }
 
   static jfloat CallFloatMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetF();
   }
 
   static jfloat CallFloatMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, args).GetF();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetF();
   }
 
   static jfloat CallFloatMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithJValues(ts, obj, mid, args).GetF();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithJValues(soa, obj, mid, args).GetF();
   }
 
   static jint CallIntMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetI();
   }
 
   static jint CallIntMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, args).GetI();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetI();
   }
 
   static jint CallIntMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithJValues(ts, obj, mid, args).GetI();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithJValues(soa, obj, mid, args).GetI();
   }
 
   static jlong CallLongMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetJ();
   }
 
   static jlong CallLongMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, args).GetJ();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetJ();
   }
 
   static jlong CallLongMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithJValues(ts, obj, mid, args).GetJ();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithJValues(soa, obj, mid, args).GetJ();
   }
 
   static jshort CallShortMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetS();
   }
 
   static jshort CallShortMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, args).GetS();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args).GetS();
   }
 
   static jshort CallShortMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeVirtualOrInterfaceWithJValues(ts, obj, mid, args).GetS();
+    ScopedObjectAccess soa(env);
+    return InvokeVirtualOrInterfaceWithJValues(soa, obj, mid, args).GetS();
   }
 
   static void CallVoidMethod(JNIEnv* env, jobject obj, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
   }
 
   static void CallVoidMethodV(JNIEnv* env, jobject obj, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    InvokeVirtualOrInterfaceWithVarArgs(ts, obj, mid, args);
+    ScopedObjectAccess soa(env);
+    InvokeVirtualOrInterfaceWithVarArgs(soa, obj, mid, args);
   }
 
   static void CallVoidMethodA(JNIEnv* env, jobject obj, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    InvokeVirtualOrInterfaceWithJValues(ts, obj, mid, args);
+    ScopedObjectAccess soa(env);
+    InvokeVirtualOrInterfaceWithJValues(soa, obj, mid, args);
   }
 
   static jobject CallNonvirtualObjectMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, obj, mid, ap));
-    jobject local_result = ts.AddLocalReference<jobject>(result.GetL());
+    JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
+    jobject local_result = soa.AddLocalReference<jobject>(result.GetL());
     va_end(ap);
     return local_result;
   }
 
   static jobject CallNonvirtualObjectMethodV(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    JValue result(InvokeWithVarArgs(ts, obj, mid, args));
-    return ts.AddLocalReference<jobject>(result.GetL());
+    ScopedObjectAccess soa(env);
+    JValue result(InvokeWithVarArgs(soa, obj, mid, args));
+    return soa.AddLocalReference<jobject>(result.GetL());
   }
 
   static jobject CallNonvirtualObjectMethodA(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    JValue result(InvokeWithJValues(ts, obj, mid, args));
-    return ts.AddLocalReference<jobject>(result.GetL());
+    ScopedObjectAccess soa(env);
+    JValue result(InvokeWithJValues(soa, obj, mid, args));
+    return soa.AddLocalReference<jobject>(result.GetL());
   }
 
   static jboolean CallNonvirtualBooleanMethod(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetZ();
   }
 
   static jboolean CallNonvirtualBooleanMethodV(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, obj, mid, args).GetZ();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, obj, mid, args).GetZ();
   }
 
   static jboolean CallNonvirtualBooleanMethodA(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, obj, mid, args).GetZ();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, obj, mid, args).GetZ();
   }
 
   static jbyte CallNonvirtualByteMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetB();
   }
 
   static jbyte CallNonvirtualByteMethodV(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, obj, mid, args).GetB();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, obj, mid, args).GetB();
   }
 
   static jbyte CallNonvirtualByteMethodA(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, obj, mid, args).GetB();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, obj, mid, args).GetB();
   }
 
   static jchar CallNonvirtualCharMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetC();
   }
 
   static jchar CallNonvirtualCharMethodV(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, obj, mid, args).GetC();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, obj, mid, args).GetC();
   }
 
   static jchar CallNonvirtualCharMethodA(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, obj, mid, args).GetC();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, obj, mid, args).GetC();
   }
 
   static jshort CallNonvirtualShortMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetS();
   }
 
   static jshort CallNonvirtualShortMethodV(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, obj, mid, args).GetS();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, obj, mid, args).GetS();
   }
 
   static jshort CallNonvirtualShortMethodA(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, obj, mid, args).GetS();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, obj, mid, args).GetS();
   }
 
   static jint CallNonvirtualIntMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetI();
   }
 
   static jint CallNonvirtualIntMethodV(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, obj, mid, args).GetI();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, obj, mid, args).GetI();
   }
 
   static jint CallNonvirtualIntMethodA(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, obj, mid, args).GetI();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, obj, mid, args).GetI();
   }
 
   static jlong CallNonvirtualLongMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetJ();
   }
 
   static jlong CallNonvirtualLongMethodV(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, obj, mid, args).GetJ();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, obj, mid, args).GetJ();
   }
 
   static jlong CallNonvirtualLongMethodA(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, obj, mid, args).GetJ();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, obj, mid, args).GetJ();
   }
 
   static jfloat CallNonvirtualFloatMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetF();
   }
 
   static jfloat CallNonvirtualFloatMethodV(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, obj, mid, args).GetF();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, obj, mid, args).GetF();
   }
 
   static jfloat CallNonvirtualFloatMethodA(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, obj, mid, args).GetF();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, obj, mid, args).GetF();
   }
 
   static jdouble CallNonvirtualDoubleMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, obj, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, obj, mid, ap));
     va_end(ap);
     return result.GetD();
   }
 
   static jdouble CallNonvirtualDoubleMethodV(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, obj, mid, args).GetD();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, obj, mid, args).GetD();
   }
 
   static jdouble CallNonvirtualDoubleMethodA(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, obj, mid, args).GetD();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, obj, mid, args).GetD();
   }
 
   static void CallNonvirtualVoidMethod(JNIEnv* env, jobject obj, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    InvokeWithVarArgs(ts, obj, mid, ap);
+    InvokeWithVarArgs(soa, obj, mid, ap);
     va_end(ap);
   }
 
   static void CallNonvirtualVoidMethodV(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    InvokeWithVarArgs(ts, obj, mid, args);
+    ScopedObjectAccess soa(env);
+    InvokeWithVarArgs(soa, obj, mid, args);
   }
 
   static void CallNonvirtualVoidMethodA(JNIEnv* env,
       jobject obj, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    InvokeWithJValues(ts, obj, mid, args);
+    ScopedObjectAccess soa(env);
+    InvokeWithJValues(soa, obj, mid, args);
   }
 
   static jfieldID GetFieldID(JNIEnv* env, jclass c, const char* name, const char* sig) {
-    ScopedJniThreadState ts(env);
-    return FindFieldID(ts, c, name, sig, false);
+    ScopedObjectAccess soa(env);
+    return FindFieldID(soa, c, name, sig, false);
   }
 
 
   static jfieldID GetStaticFieldID(JNIEnv* env, jclass c, const char* name, const char* sig) {
-    ScopedJniThreadState ts(env);
-    return FindFieldID(ts, c, name, sig, true);
+    ScopedObjectAccess soa(env);
+    return FindFieldID(soa, c, name, sig, true);
   }
 
   static jobject GetObjectField(JNIEnv* env, jobject obj, jfieldID fid) {
-    ScopedJniThreadState ts(env);
-    Object* o = ts.Decode<Object*>(obj);
-    Field* f = ts.DecodeField(fid);
-    return ts.AddLocalReference<jobject>(f->GetObject(o));
+    ScopedObjectAccess soa(env);
+    Object* o = soa.Decode<Object*>(obj);
+    Field* f = soa.DecodeField(fid);
+    return soa.AddLocalReference<jobject>(f->GetObject(o));
   }
 
   static jobject GetStaticObjectField(JNIEnv* env, jclass, jfieldID fid) {
-    ScopedJniThreadState ts(env);
-    Field* f = ts.DecodeField(fid);
-    return ts.AddLocalReference<jobject>(f->GetObject(NULL));
+    ScopedObjectAccess soa(env);
+    Field* f = soa.DecodeField(fid);
+    return soa.AddLocalReference<jobject>(f->GetObject(NULL));
   }
 
   static void SetObjectField(JNIEnv* env, jobject java_object, jfieldID fid, jobject java_value) {
-    ScopedJniThreadState ts(env);
-    Object* o = ts.Decode<Object*>(java_object);
-    Object* v = ts.Decode<Object*>(java_value);
-    Field* f = ts.DecodeField(fid);
+    ScopedObjectAccess soa(env);
+    Object* o = soa.Decode<Object*>(java_object);
+    Object* v = soa.Decode<Object*>(java_value);
+    Field* f = soa.DecodeField(fid);
     f->SetObject(o, v);
   }
 
   static void SetStaticObjectField(JNIEnv* env, jclass, jfieldID fid, jobject java_value) {
-    ScopedJniThreadState ts(env);
-    Object* v = ts.Decode<Object*>(java_value);
-    Field* f = ts.DecodeField(fid);
+    ScopedObjectAccess soa(env);
+    Object* v = soa.Decode<Object*>(java_value);
+    Field* f = soa.DecodeField(fid);
     f->SetObject(NULL, v);
   }
 
 #define GET_PRIMITIVE_FIELD(fn, instance) \
-  ScopedJniThreadState ts(env); \
-  Object* o = ts.Decode<Object*>(instance); \
-  Field* f = ts.DecodeField(fid); \
+  ScopedObjectAccess soa(env); \
+  Object* o = soa.Decode<Object*>(instance); \
+  Field* f = soa.DecodeField(fid); \
   return f->fn(o)
 
 #define SET_PRIMITIVE_FIELD(fn, instance, value) \
-  ScopedJniThreadState ts(env); \
-  Object* o = ts.Decode<Object*>(instance); \
-  Field* f = ts.DecodeField(fid); \
+  ScopedObjectAccess soa(env); \
+  Object* o = soa.Decode<Object*>(instance); \
+  Field* f = soa.DecodeField(fid); \
   f->fn(o, value)
 
   static jboolean GetBooleanField(JNIEnv* env, jobject obj, jfieldID fid) {
@@ -1598,227 +1569,227 @@
   }
 
   static jobject CallStaticObjectMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, NULL, mid, ap));
-    jobject local_result = ts.AddLocalReference<jobject>(result.GetL());
+    JValue result(InvokeWithVarArgs(soa, NULL, mid, ap));
+    jobject local_result = soa.AddLocalReference<jobject>(result.GetL());
     va_end(ap);
     return local_result;
   }
 
   static jobject CallStaticObjectMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    JValue result(InvokeWithVarArgs(ts, NULL, mid, args));
-    return ts.AddLocalReference<jobject>(result.GetL());
+    ScopedObjectAccess soa(env);
+    JValue result(InvokeWithVarArgs(soa, NULL, mid, args));
+    return soa.AddLocalReference<jobject>(result.GetL());
   }
 
   static jobject CallStaticObjectMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    JValue result(InvokeWithJValues(ts, NULL, mid, args));
-    return ts.AddLocalReference<jobject>(result.GetL());
+    ScopedObjectAccess soa(env);
+    JValue result(InvokeWithJValues(soa, NULL, mid, args));
+    return soa.AddLocalReference<jobject>(result.GetL());
   }
 
   static jboolean CallStaticBooleanMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, NULL, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, NULL, mid, ap));
     va_end(ap);
     return result.GetZ();
   }
 
   static jboolean CallStaticBooleanMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, NULL, mid, args).GetZ();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, NULL, mid, args).GetZ();
   }
 
   static jboolean CallStaticBooleanMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, NULL, mid, args).GetZ();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, NULL, mid, args).GetZ();
   }
 
   static jbyte CallStaticByteMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, NULL, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, NULL, mid, ap));
     va_end(ap);
     return result.GetB();
   }
 
   static jbyte CallStaticByteMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, NULL, mid, args).GetB();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, NULL, mid, args).GetB();
   }
 
   static jbyte CallStaticByteMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, NULL, mid, args).GetB();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, NULL, mid, args).GetB();
   }
 
   static jchar CallStaticCharMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, NULL, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, NULL, mid, ap));
     va_end(ap);
     return result.GetC();
   }
 
   static jchar CallStaticCharMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, NULL, mid, args).GetC();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, NULL, mid, args).GetC();
   }
 
   static jchar CallStaticCharMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, NULL, mid, args).GetC();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, NULL, mid, args).GetC();
   }
 
   static jshort CallStaticShortMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, NULL, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, NULL, mid, ap));
     va_end(ap);
     return result.GetS();
   }
 
   static jshort CallStaticShortMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, NULL, mid, args).GetS();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, NULL, mid, args).GetS();
   }
 
   static jshort CallStaticShortMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, NULL, mid, args).GetS();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, NULL, mid, args).GetS();
   }
 
   static jint CallStaticIntMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, NULL, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, NULL, mid, ap));
     va_end(ap);
     return result.GetI();
   }
 
   static jint CallStaticIntMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, NULL, mid, args).GetI();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, NULL, mid, args).GetI();
   }
 
   static jint CallStaticIntMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, NULL, mid, args).GetI();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, NULL, mid, args).GetI();
   }
 
   static jlong CallStaticLongMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, NULL, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, NULL, mid, ap));
     va_end(ap);
     return result.GetJ();
   }
 
   static jlong CallStaticLongMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, NULL, mid, args).GetJ();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, NULL, mid, args).GetJ();
   }
 
   static jlong CallStaticLongMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, NULL, mid, args).GetJ();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, NULL, mid, args).GetJ();
   }
 
   static jfloat CallStaticFloatMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, NULL, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, NULL, mid, ap));
     va_end(ap);
     return result.GetF();
   }
 
   static jfloat CallStaticFloatMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, NULL, mid, args).GetF();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, NULL, mid, args).GetF();
   }
 
   static jfloat CallStaticFloatMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, NULL, mid, args).GetF();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, NULL, mid, args).GetF();
   }
 
   static jdouble CallStaticDoubleMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    JValue result(InvokeWithVarArgs(ts, NULL, mid, ap));
+    JValue result(InvokeWithVarArgs(soa, NULL, mid, ap));
     va_end(ap);
     return result.GetD();
   }
 
   static jdouble CallStaticDoubleMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithVarArgs(ts, NULL, mid, args).GetD();
+    ScopedObjectAccess soa(env);
+    return InvokeWithVarArgs(soa, NULL, mid, args).GetD();
   }
 
   static jdouble CallStaticDoubleMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    return InvokeWithJValues(ts, NULL, mid, args).GetD();
+    ScopedObjectAccess soa(env);
+    return InvokeWithJValues(soa, NULL, mid, args).GetD();
   }
 
   static void CallStaticVoidMethod(JNIEnv* env, jclass, jmethodID mid, ...) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     va_list ap;
     va_start(ap, mid);
-    InvokeWithVarArgs(ts, NULL, mid, ap);
+    InvokeWithVarArgs(soa, NULL, mid, ap);
     va_end(ap);
   }
 
   static void CallStaticVoidMethodV(JNIEnv* env, jclass, jmethodID mid, va_list args) {
-    ScopedJniThreadState ts(env);
-    InvokeWithVarArgs(ts, NULL, mid, args);
+    ScopedObjectAccess soa(env);
+    InvokeWithVarArgs(soa, NULL, mid, args);
   }
 
   static void CallStaticVoidMethodA(JNIEnv* env, jclass, jmethodID mid, jvalue* args) {
-    ScopedJniThreadState ts(env);
-    InvokeWithJValues(ts, NULL, mid, args);
+    ScopedObjectAccess soa(env);
+    InvokeWithJValues(soa, NULL, mid, args);
   }
 
   static jstring NewString(JNIEnv* env, const jchar* chars, jsize char_count) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     String* result = String::AllocFromUtf16(char_count, chars);
-    return ts.AddLocalReference<jstring>(result);
+    return soa.AddLocalReference<jstring>(result);
   }
 
   static jstring NewStringUTF(JNIEnv* env, const char* utf) {
     if (utf == NULL) {
       return NULL;
     }
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     String* result = String::AllocFromModifiedUtf8(utf);
-    return ts.AddLocalReference<jstring>(result);
+    return soa.AddLocalReference<jstring>(result);
   }
 
   static jsize GetStringLength(JNIEnv* env, jstring java_string) {
-    ScopedJniThreadState ts(env);
-    return ts.Decode<String*>(java_string)->GetLength();
+    ScopedObjectAccess soa(env);
+    return soa.Decode<String*>(java_string)->GetLength();
   }
 
   static jsize GetStringUTFLength(JNIEnv* env, jstring java_string) {
-    ScopedJniThreadState ts(env);
-    return ts.Decode<String*>(java_string)->GetUtfLength();
+    ScopedObjectAccess soa(env);
+    return soa.Decode<String*>(java_string)->GetUtfLength();
   }
 
   static void GetStringRegion(JNIEnv* env, jstring java_string, jsize start, jsize length, jchar* buf) {
-    ScopedJniThreadState ts(env);
-    String* s = ts.Decode<String*>(java_string);
+    ScopedObjectAccess soa(env);
+    String* s = soa.Decode<String*>(java_string);
     if (start < 0 || length < 0 || start + length > s->GetLength()) {
-      ThrowSIOOBE(ts, start, length, s->GetLength());
+      ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       const jchar* chars = s->GetCharArray()->GetData() + s->GetOffset();
       memcpy(buf, chars + start, length * sizeof(jchar));
@@ -1826,10 +1797,10 @@
   }
 
   static void GetStringUTFRegion(JNIEnv* env, jstring java_string, jsize start, jsize length, char* buf) {
-    ScopedJniThreadState ts(env);
-    String* s = ts.Decode<String*>(java_string);
+    ScopedObjectAccess soa(env);
+    String* s = soa.Decode<String*>(java_string);
     if (start < 0 || length < 0 || start + length > s->GetLength()) {
-      ThrowSIOOBE(ts, start, length, s->GetLength());
+      ThrowSIOOBE(soa, start, length, s->GetLength());
     } else {
       const jchar* chars = s->GetCharArray()->GetData() + s->GetOffset();
       ConvertUtf16ToModifiedUtf8(buf, chars + start, length);
@@ -1837,10 +1808,10 @@
   }
 
   static const jchar* GetStringChars(JNIEnv* env, jstring java_string, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
-    String* s = ts.Decode<String*>(java_string);
+    ScopedObjectAccess soa(env);
+    String* s = soa.Decode<String*>(java_string);
     const CharArray* chars = s->GetCharArray();
-    PinPrimitiveArray(ts, chars);
+    PinPrimitiveArray(soa, chars);
     if (is_copy != NULL) {
       *is_copy = JNI_FALSE;
     }
@@ -1848,29 +1819,29 @@
   }
 
   static void ReleaseStringChars(JNIEnv* env, jstring java_string, const jchar*) {
-    ScopedJniThreadState ts(env);
-    UnpinPrimitiveArray(ts, ts.Decode<String*>(java_string)->GetCharArray());
+    ScopedObjectAccess soa(env);
+    UnpinPrimitiveArray(soa, soa.Decode<String*>(java_string)->GetCharArray());
   }
 
   static const jchar* GetStringCritical(JNIEnv* env, jstring java_string, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     return GetStringChars(env, java_string, is_copy);
   }
 
   static void ReleaseStringCritical(JNIEnv* env, jstring java_string, const jchar* chars) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     return ReleaseStringChars(env, java_string, chars);
   }
 
   static const char* GetStringUTFChars(JNIEnv* env, jstring java_string, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     if (java_string == NULL) {
       return NULL;
     }
     if (is_copy != NULL) {
       *is_copy = JNI_TRUE;
     }
-    String* s = ts.Decode<String*>(java_string);
+    String* s = soa.Decode<String*>(java_string);
     size_t byte_count = s->GetUtfLength();
     char* bytes = new char[byte_count + 1];
     CHECK(bytes != NULL); // bionic aborts anyway.
@@ -1881,73 +1852,73 @@
   }
 
   static void ReleaseStringUTFChars(JNIEnv* env, jstring, const char* chars) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     delete[] chars;
   }
 
   static jsize GetArrayLength(JNIEnv* env, jarray java_array) {
-    ScopedJniThreadState ts(env);
-    Object* obj = ts.Decode<Object*>(java_array);
+    ScopedObjectAccess soa(env);
+    Object* obj = soa.Decode<Object*>(java_array);
     CHECK(obj->IsArrayInstance()); // TODO: ReportJniError
     Array* array = obj->AsArray();
     return array->GetLength();
   }
 
   static jobject GetObjectArrayElement(JNIEnv* env, jobjectArray java_array, jsize index) {
-    ScopedJniThreadState ts(env);
-    ObjectArray<Object>* array = ts.Decode<ObjectArray<Object>*>(java_array);
-    return ts.AddLocalReference<jobject>(array->Get(index));
+    ScopedObjectAccess soa(env);
+    ObjectArray<Object>* array = soa.Decode<ObjectArray<Object>*>(java_array);
+    return soa.AddLocalReference<jobject>(array->Get(index));
   }
 
   static void SetObjectArrayElement(JNIEnv* env,
       jobjectArray java_array, jsize index, jobject java_value) {
-    ScopedJniThreadState ts(env);
-    ObjectArray<Object>* array = ts.Decode<ObjectArray<Object>*>(java_array);
-    Object* value = ts.Decode<Object*>(java_value);
+    ScopedObjectAccess soa(env);
+    ObjectArray<Object>* array = soa.Decode<ObjectArray<Object>*>(java_array);
+    Object* value = soa.Decode<Object*>(java_value);
     array->Set(index, value);
   }
 
   static jbooleanArray NewBooleanArray(JNIEnv* env, jsize length) {
-    ScopedJniThreadState ts(env);
-    return NewPrimitiveArray<jbooleanArray, BooleanArray>(ts, length);
+    ScopedObjectAccess soa(env);
+    return NewPrimitiveArray<jbooleanArray, BooleanArray>(soa, length);
   }
 
   static jbyteArray NewByteArray(JNIEnv* env, jsize length) {
-    ScopedJniThreadState ts(env);
-    return NewPrimitiveArray<jbyteArray, ByteArray>(ts, length);
+    ScopedObjectAccess soa(env);
+    return NewPrimitiveArray<jbyteArray, ByteArray>(soa, length);
   }
 
   static jcharArray NewCharArray(JNIEnv* env, jsize length) {
-    ScopedJniThreadState ts(env);
-    return NewPrimitiveArray<jcharArray, CharArray>(ts, length);
+    ScopedObjectAccess soa(env);
+    return NewPrimitiveArray<jcharArray, CharArray>(soa, length);
   }
 
   static jdoubleArray NewDoubleArray(JNIEnv* env, jsize length) {
-    ScopedJniThreadState ts(env);
-    return NewPrimitiveArray<jdoubleArray, DoubleArray>(ts, length);
+    ScopedObjectAccess soa(env);
+    return NewPrimitiveArray<jdoubleArray, DoubleArray>(soa, length);
   }
 
   static jfloatArray NewFloatArray(JNIEnv* env, jsize length) {
-    ScopedJniThreadState ts(env);
-    return NewPrimitiveArray<jfloatArray, FloatArray>(ts, length);
+    ScopedObjectAccess soa(env);
+    return NewPrimitiveArray<jfloatArray, FloatArray>(soa, length);
   }
 
   static jintArray NewIntArray(JNIEnv* env, jsize length) {
-    ScopedJniThreadState ts(env);
-    return NewPrimitiveArray<jintArray, IntArray>(ts, length);
+    ScopedObjectAccess soa(env);
+    return NewPrimitiveArray<jintArray, IntArray>(soa, length);
   }
 
   static jlongArray NewLongArray(JNIEnv* env, jsize length) {
-    ScopedJniThreadState ts(env);
-    return NewPrimitiveArray<jlongArray, LongArray>(ts, length);
+    ScopedObjectAccess soa(env);
+    return NewPrimitiveArray<jlongArray, LongArray>(soa, length);
   }
 
   static jobjectArray NewObjectArray(JNIEnv* env, jsize length, jclass element_jclass, jobject initial_element) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     CHECK_GE(length, 0); // TODO: ReportJniError
 
     // Compute the array class corresponding to the given element class.
-    Class* element_class = ts.Decode<Class*>(element_jclass);
+    Class* element_class = soa.Decode<Class*>(element_jclass);
     std::string descriptor;
     descriptor += "[";
     descriptor += ClassHelper(element_class).GetDescriptor();
@@ -1959,26 +1930,26 @@
     }
 
     // Allocate and initialize if necessary.
-    Class* array_class = ts.Decode<Class*>(java_array_class.get());
+    Class* array_class = soa.Decode<Class*>(java_array_class.get());
     ObjectArray<Object>* result = ObjectArray<Object>::Alloc(array_class, length);
     if (initial_element != NULL) {
-      Object* initial_object = ts.Decode<Object*>(initial_element);
+      Object* initial_object = soa.Decode<Object*>(initial_element);
       for (jsize i = 0; i < length; ++i) {
         result->Set(i, initial_object);
       }
     }
-    return ts.AddLocalReference<jobjectArray>(result);
+    return soa.AddLocalReference<jobjectArray>(result);
   }
 
   static jshortArray NewShortArray(JNIEnv* env, jsize length) {
-    ScopedJniThreadState ts(env);
-    return NewPrimitiveArray<jshortArray, ShortArray>(ts, length);
+    ScopedObjectAccess soa(env);
+    return NewPrimitiveArray<jshortArray, ShortArray>(soa, length);
   }
 
   static void* GetPrimitiveArrayCritical(JNIEnv* env, jarray java_array, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
-    Array* array = ts.Decode<Array*>(java_array);
-    PinPrimitiveArray(ts, array);
+    ScopedObjectAccess soa(env);
+    Array* array = soa.Decode<Array*>(java_array);
+    PinPrimitiveArray(soa, array);
     if (is_copy != NULL) {
       *is_copy = JNI_FALSE;
     }
@@ -1986,173 +1957,173 @@
   }
 
   static void ReleasePrimitiveArrayCritical(JNIEnv* env, jarray array, void*, jint mode) {
-    ScopedJniThreadState ts(env);
-    ReleasePrimitiveArray(ts, array, mode);
+    ScopedObjectAccess soa(env);
+    ReleasePrimitiveArray(soa, array, mode);
   }
 
   static jboolean* GetBooleanArrayElements(JNIEnv* env, jbooleanArray array, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
-    return GetPrimitiveArray<jbooleanArray, jboolean*, BooleanArray>(ts, array, is_copy);
+    ScopedObjectAccess soa(env);
+    return GetPrimitiveArray<jbooleanArray, jboolean*, BooleanArray>(soa, array, is_copy);
   }
 
   static jbyte* GetByteArrayElements(JNIEnv* env, jbyteArray array, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
-    return GetPrimitiveArray<jbyteArray, jbyte*, ByteArray>(ts, array, is_copy);
+    ScopedObjectAccess soa(env);
+    return GetPrimitiveArray<jbyteArray, jbyte*, ByteArray>(soa, array, is_copy);
   }
 
   static jchar* GetCharArrayElements(JNIEnv* env, jcharArray array, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
-    return GetPrimitiveArray<jcharArray, jchar*, CharArray>(ts, array, is_copy);
+    ScopedObjectAccess soa(env);
+    return GetPrimitiveArray<jcharArray, jchar*, CharArray>(soa, array, is_copy);
   }
 
   static jdouble* GetDoubleArrayElements(JNIEnv* env, jdoubleArray array, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
-    return GetPrimitiveArray<jdoubleArray, jdouble*, DoubleArray>(ts, array, is_copy);
+    ScopedObjectAccess soa(env);
+    return GetPrimitiveArray<jdoubleArray, jdouble*, DoubleArray>(soa, array, is_copy);
   }
 
   static jfloat* GetFloatArrayElements(JNIEnv* env, jfloatArray array, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
-    return GetPrimitiveArray<jfloatArray, jfloat*, FloatArray>(ts, array, is_copy);
+    ScopedObjectAccess soa(env);
+    return GetPrimitiveArray<jfloatArray, jfloat*, FloatArray>(soa, array, is_copy);
   }
 
   static jint* GetIntArrayElements(JNIEnv* env, jintArray array, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
-    return GetPrimitiveArray<jintArray, jint*, IntArray>(ts, array, is_copy);
+    ScopedObjectAccess soa(env);
+    return GetPrimitiveArray<jintArray, jint*, IntArray>(soa, array, is_copy);
   }
 
   static jlong* GetLongArrayElements(JNIEnv* env, jlongArray array, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
-    return GetPrimitiveArray<jlongArray, jlong*, LongArray>(ts, array, is_copy);
+    ScopedObjectAccess soa(env);
+    return GetPrimitiveArray<jlongArray, jlong*, LongArray>(soa, array, is_copy);
   }
 
   static jshort* GetShortArrayElements(JNIEnv* env, jshortArray array, jboolean* is_copy) {
-    ScopedJniThreadState ts(env);
-    return GetPrimitiveArray<jshortArray, jshort*, ShortArray>(ts, array, is_copy);
+    ScopedObjectAccess soa(env);
+    return GetPrimitiveArray<jshortArray, jshort*, ShortArray>(soa, array, is_copy);
   }
 
   static void ReleaseBooleanArrayElements(JNIEnv* env, jbooleanArray array, jboolean*, jint mode) {
-    ScopedJniThreadState ts(env);
-    ReleasePrimitiveArray(ts, array, mode);
+    ScopedObjectAccess soa(env);
+    ReleasePrimitiveArray(soa, array, mode);
   }
 
   static void ReleaseByteArrayElements(JNIEnv* env, jbyteArray array, jbyte*, jint mode) {
-    ScopedJniThreadState ts(env);
-    ReleasePrimitiveArray(ts, array, mode);
+    ScopedObjectAccess soa(env);
+    ReleasePrimitiveArray(soa, array, mode);
   }
 
   static void ReleaseCharArrayElements(JNIEnv* env, jcharArray array, jchar*, jint mode) {
-    ScopedJniThreadState ts(env);
-    ReleasePrimitiveArray(ts, array, mode);
+    ScopedObjectAccess soa(env);
+    ReleasePrimitiveArray(soa, array, mode);
   }
 
   static void ReleaseDoubleArrayElements(JNIEnv* env, jdoubleArray array, jdouble*, jint mode) {
-    ScopedJniThreadState ts(env);
-    ReleasePrimitiveArray(ts, array, mode);
+    ScopedObjectAccess soa(env);
+    ReleasePrimitiveArray(soa, array, mode);
   }
 
   static void ReleaseFloatArrayElements(JNIEnv* env, jfloatArray array, jfloat*, jint mode) {
-    ScopedJniThreadState ts(env);
-    ReleasePrimitiveArray(ts, array, mode);
+    ScopedObjectAccess soa(env);
+    ReleasePrimitiveArray(soa, array, mode);
   }
 
   static void ReleaseIntArrayElements(JNIEnv* env, jintArray array, jint*, jint mode) {
-    ScopedJniThreadState ts(env);
-    ReleasePrimitiveArray(ts, array, mode);
+    ScopedObjectAccess soa(env);
+    ReleasePrimitiveArray(soa, array, mode);
   }
 
   static void ReleaseLongArrayElements(JNIEnv* env, jlongArray array, jlong*, jint mode) {
-    ScopedJniThreadState ts(env);
-    ReleasePrimitiveArray(ts, array, mode);
+    ScopedObjectAccess soa(env);
+    ReleasePrimitiveArray(soa, array, mode);
   }
 
   static void ReleaseShortArrayElements(JNIEnv* env, jshortArray array, jshort*, jint mode) {
-    ScopedJniThreadState ts(env);
-    ReleasePrimitiveArray(ts, array, mode);
+    ScopedObjectAccess soa(env);
+    ReleasePrimitiveArray(soa, array, mode);
   }
 
   static void GetBooleanArrayRegion(JNIEnv* env, jbooleanArray array, jsize start, jsize length, jboolean* buf) {
-    ScopedJniThreadState ts(env);
-    GetPrimitiveArrayRegion<jbooleanArray, jboolean, BooleanArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    GetPrimitiveArrayRegion<jbooleanArray, jboolean, BooleanArray>(soa, array, start, length, buf);
   }
 
   static void GetByteArrayRegion(JNIEnv* env, jbyteArray array, jsize start, jsize length, jbyte* buf) {
-    ScopedJniThreadState ts(env);
-    GetPrimitiveArrayRegion<jbyteArray, jbyte, ByteArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    GetPrimitiveArrayRegion<jbyteArray, jbyte, ByteArray>(soa, array, start, length, buf);
   }
 
   static void GetCharArrayRegion(JNIEnv* env, jcharArray array, jsize start, jsize length, jchar* buf) {
-    ScopedJniThreadState ts(env);
-    GetPrimitiveArrayRegion<jcharArray, jchar, CharArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    GetPrimitiveArrayRegion<jcharArray, jchar, CharArray>(soa, array, start, length, buf);
   }
 
   static void GetDoubleArrayRegion(JNIEnv* env, jdoubleArray array, jsize start, jsize length, jdouble* buf) {
-    ScopedJniThreadState ts(env);
-    GetPrimitiveArrayRegion<jdoubleArray, jdouble, DoubleArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    GetPrimitiveArrayRegion<jdoubleArray, jdouble, DoubleArray>(soa, array, start, length, buf);
   }
 
   static void GetFloatArrayRegion(JNIEnv* env, jfloatArray array, jsize start, jsize length, jfloat* buf) {
-    ScopedJniThreadState ts(env);
-    GetPrimitiveArrayRegion<jfloatArray, jfloat, FloatArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    GetPrimitiveArrayRegion<jfloatArray, jfloat, FloatArray>(soa, array, start, length, buf);
   }
 
   static void GetIntArrayRegion(JNIEnv* env, jintArray array, jsize start, jsize length, jint* buf) {
-    ScopedJniThreadState ts(env);
-    GetPrimitiveArrayRegion<jintArray, jint, IntArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    GetPrimitiveArrayRegion<jintArray, jint, IntArray>(soa, array, start, length, buf);
   }
 
   static void GetLongArrayRegion(JNIEnv* env, jlongArray array, jsize start, jsize length, jlong* buf) {
-    ScopedJniThreadState ts(env);
-    GetPrimitiveArrayRegion<jlongArray, jlong, LongArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    GetPrimitiveArrayRegion<jlongArray, jlong, LongArray>(soa, array, start, length, buf);
   }
 
   static void GetShortArrayRegion(JNIEnv* env, jshortArray array, jsize start, jsize length, jshort* buf) {
-    ScopedJniThreadState ts(env);
-    GetPrimitiveArrayRegion<jshortArray, jshort, ShortArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    GetPrimitiveArrayRegion<jshortArray, jshort, ShortArray>(soa, array, start, length, buf);
   }
 
   static void SetBooleanArrayRegion(JNIEnv* env, jbooleanArray array, jsize start, jsize length, const jboolean* buf) {
-    ScopedJniThreadState ts(env);
-    SetPrimitiveArrayRegion<jbooleanArray, jboolean, BooleanArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    SetPrimitiveArrayRegion<jbooleanArray, jboolean, BooleanArray>(soa, array, start, length, buf);
   }
 
   static void SetByteArrayRegion(JNIEnv* env, jbyteArray array, jsize start, jsize length, const jbyte* buf) {
-    ScopedJniThreadState ts(env);
-    SetPrimitiveArrayRegion<jbyteArray, jbyte, ByteArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    SetPrimitiveArrayRegion<jbyteArray, jbyte, ByteArray>(soa, array, start, length, buf);
   }
 
   static void SetCharArrayRegion(JNIEnv* env, jcharArray array, jsize start, jsize length, const jchar* buf) {
-    ScopedJniThreadState ts(env);
-    SetPrimitiveArrayRegion<jcharArray, jchar, CharArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    SetPrimitiveArrayRegion<jcharArray, jchar, CharArray>(soa, array, start, length, buf);
   }
 
   static void SetDoubleArrayRegion(JNIEnv* env, jdoubleArray array, jsize start, jsize length, const jdouble* buf) {
-    ScopedJniThreadState ts(env);
-    SetPrimitiveArrayRegion<jdoubleArray, jdouble, DoubleArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    SetPrimitiveArrayRegion<jdoubleArray, jdouble, DoubleArray>(soa, array, start, length, buf);
   }
 
   static void SetFloatArrayRegion(JNIEnv* env, jfloatArray array, jsize start, jsize length, const jfloat* buf) {
-    ScopedJniThreadState ts(env);
-    SetPrimitiveArrayRegion<jfloatArray, jfloat, FloatArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    SetPrimitiveArrayRegion<jfloatArray, jfloat, FloatArray>(soa, array, start, length, buf);
   }
 
   static void SetIntArrayRegion(JNIEnv* env, jintArray array, jsize start, jsize length, const jint* buf) {
-    ScopedJniThreadState ts(env);
-    SetPrimitiveArrayRegion<jintArray, jint, IntArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    SetPrimitiveArrayRegion<jintArray, jint, IntArray>(soa, array, start, length, buf);
   }
 
   static void SetLongArrayRegion(JNIEnv* env, jlongArray array, jsize start, jsize length, const jlong* buf) {
-    ScopedJniThreadState ts(env);
-    SetPrimitiveArrayRegion<jlongArray, jlong, LongArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    SetPrimitiveArrayRegion<jlongArray, jlong, LongArray>(soa, array, start, length, buf);
   }
 
   static void SetShortArrayRegion(JNIEnv* env, jshortArray array, jsize start, jsize length, const jshort* buf) {
-    ScopedJniThreadState ts(env);
-    SetPrimitiveArrayRegion<jshortArray, jshort, ShortArray>(ts, array, start, length, buf);
+    ScopedObjectAccess soa(env);
+    SetPrimitiveArrayRegion<jshortArray, jshort, ShortArray>(soa, array, start, length, buf);
   }
 
   static jint RegisterNatives(JNIEnv* env, jclass java_class, const JNINativeMethod* methods, jint method_count) {
-    ScopedJniThreadState ts(env);
-    Class* c = ts.Decode<Class*>(java_class);
+    ScopedObjectAccess soa(env);
+    Class* c = soa.Decode<Class*>(java_class);
 
     for (int i = 0; i < method_count; i++) {
       const char* name = methods[i].name;
@@ -2169,67 +2140,69 @@
       }
       if (m == NULL) {
         LOG(INFO) << "Failed to register native method " << name << sig;
-        ThrowNoSuchMethodError(ts, c, name, sig, "static or non-static");
+        ThrowNoSuchMethodError(soa, c, name, sig, "static or non-static");
         return JNI_ERR;
       } else if (!m->IsNative()) {
         LOG(INFO) << "Failed to register non-native method " << name << sig << " as native";
-        ThrowNoSuchMethodError(ts, c, name, sig, "native");
+        ThrowNoSuchMethodError(soa, c, name, sig, "native");
         return JNI_ERR;
       }
 
       VLOG(jni) << "[Registering JNI native method " << PrettyMethod(m) << "]";
 
-      m->RegisterNative(ts.Self(), methods[i].fnPtr);
+      m->RegisterNative(soa.Self(), methods[i].fnPtr);
     }
     return JNI_OK;
   }
 
   static jint UnregisterNatives(JNIEnv* env, jclass java_class) {
-    ScopedJniThreadState ts(env);
-    Class* c = ts.Decode<Class*>(java_class);
+    ScopedObjectAccess soa(env);
+    Class* c = soa.Decode<Class*>(java_class);
 
     VLOG(jni) << "[Unregistering JNI native methods for " << PrettyClass(c) << "]";
 
     for (size_t i = 0; i < c->NumDirectMethods(); ++i) {
       Method* m = c->GetDirectMethod(i);
       if (m->IsNative()) {
-        m->UnregisterNative(ts.Self());
+        m->UnregisterNative(soa.Self());
       }
     }
     for (size_t i = 0; i < c->NumVirtualMethods(); ++i) {
       Method* m = c->GetVirtualMethod(i);
       if (m->IsNative()) {
-        m->UnregisterNative(ts.Self());
+        m->UnregisterNative(soa.Self());
       }
     }
 
     return JNI_OK;
   }
 
-  static jint MonitorEnter(JNIEnv* env, jobject java_object) {
-    ScopedJniThreadState ts(env);
-    Object* o = ts.Decode<Object*>(java_object);
-    o->MonitorEnter(ts.Self());
-    if (ts.Self()->IsExceptionPending()) {
+  static jint MonitorEnter(JNIEnv* env, jobject java_object)
+      EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
+    ScopedObjectAccess soa(env);
+    Object* o = soa.Decode<Object*>(java_object);
+    o->MonitorEnter(soa.Self());
+    if (soa.Self()->IsExceptionPending()) {
       return JNI_ERR;
     }
-    ts.Env()->monitors.Add(o);
+    soa.Env()->monitors.Add(o);
     return JNI_OK;
   }
 
-  static jint MonitorExit(JNIEnv* env, jobject java_object) {
-    ScopedJniThreadState ts(env);
-    Object* o = ts.Decode<Object*>(java_object);
-    o->MonitorExit(ts.Self());
-    if (ts.Self()->IsExceptionPending()) {
+  static jint MonitorExit(JNIEnv* env, jobject java_object)
+      UNLOCK_FUNCTION(monitor_lock_) {
+    ScopedObjectAccess soa(env);
+    Object* o = soa.Decode<Object*>(java_object);
+    o->MonitorExit(soa.Self());
+    if (soa.Self()->IsExceptionPending()) {
       return JNI_ERR;
     }
-    ts.Env()->monitors.Remove(o);
+    soa.Env()->monitors.Remove(o);
     return JNI_OK;
   }
 
   static jint GetJavaVM(JNIEnv* env, JavaVM** vm) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     Runtime* runtime = Runtime::Current();
     if (runtime != NULL) {
       *vm = runtime->GetJavaVM();
@@ -2240,13 +2213,13 @@
   }
 
   static jobject NewDirectByteBuffer(JNIEnv* env, void* address, jlong capacity) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
 
     // The address may not be NULL, and the capacity must be > 0.
     CHECK(address != NULL); // TODO: ReportJniError
     CHECK_GT(capacity, 0); // TODO: ReportJniError
 
-    // At the moment, the Java side is limited to 32 bits.
+    // At the moment, the Java side is limited to 32 bisoa.
     CHECK_LE(reinterpret_cast<uintptr_t>(address), 0xffffffff);
     CHECK_LE(capacity, 0xffffffff);
     jint address_arg = reinterpret_cast<jint>(address);
@@ -2255,21 +2228,21 @@
     jobject result = env->NewObject(WellKnownClasses::java_nio_ReadWriteDirectByteBuffer,
                                     WellKnownClasses::java_nio_ReadWriteDirectByteBuffer_init,
                                     address_arg, capacity_arg);
-    return ts.Self()->IsExceptionPending() ? NULL : result;
+    return soa.Self()->IsExceptionPending() ? NULL : result;
   }
 
   static void* GetDirectBufferAddress(JNIEnv* env, jobject java_buffer) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     return reinterpret_cast<void*>(env->GetIntField(java_buffer, WellKnownClasses::java_nio_ReadWriteDirectByteBuffer_effectiveDirectAddress));
   }
 
   static jlong GetDirectBufferCapacity(JNIEnv* env, jobject java_buffer) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
     return static_cast<jlong>(env->GetIntField(java_buffer, WellKnownClasses::java_nio_ReadWriteDirectByteBuffer_capacity));
   }
 
   static jobjectRefType GetObjectRefType(JNIEnv* env, jobject java_object) {
-    ScopedJniThreadState ts(env);
+    ScopedObjectAccess soa(env);
 
     CHECK(java_object != NULL); // TODO: ReportJniError
 
@@ -2278,7 +2251,7 @@
     IndirectRefKind kind = GetIndirectRefKind(ref);
     switch (kind) {
     case kLocal:
-      if (ts.Env()->locals.Get(ref) != kInvalidIndirectRefObject) {
+      if (soa.Env()->locals.Get(ref) != kInvalidIndirectRefObject) {
         return JNILocalRefType;
       }
       return JNIInvalidRefType;
@@ -2288,18 +2261,18 @@
       return JNIWeakGlobalRefType;
     case kSirtOrInvalid:
       // Is it in a stack IRT?
-      if (ts.Self()->SirtContains(java_object)) {
+      if (soa.Self()->SirtContains(java_object)) {
         return JNILocalRefType;
       }
 
-      if (!ts.Vm()->work_around_app_jni_bugs) {
+      if (!soa.Vm()->work_around_app_jni_bugs) {
         return JNIInvalidRefType;
       }
 
       // If we're handing out direct pointers, check whether it's a direct pointer
       // to a local reference.
-      if (ts.Decode<Object*>(java_object) == reinterpret_cast<Object*>(java_object)) {
-        if (ts.Env()->locals.ContainsDirectPointer(reinterpret_cast<Object*>(java_object))) {
+      if (soa.Decode<Object*>(java_object) == reinterpret_cast<Object*>(java_object)) {
+        if (soa.Env()->locals.ContainsDirectPointer(reinterpret_cast<Object*>(java_object))) {
           return JNILocalRefType;
         }
       }
@@ -2309,6 +2282,80 @@
     LOG(FATAL) << "IndirectRefKind[" << kind << "]";
     return JNIInvalidRefType;
   }
+
+ private:
+  static jint EnsureLocalCapacity(const ScopedObjectAccess& soa, jint desired_capacity,
+                                  const char* caller)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    // TODO: we should try to expand the table if necessary.
+    if (desired_capacity < 1 || desired_capacity > static_cast<jint>(kLocalsMax)) {
+      LOG(ERROR) << "Invalid capacity given to " << caller << ": " << desired_capacity;
+      return JNI_ERR;
+    }
+    // TODO: this isn't quite right, since "capacity" includes holes.
+    size_t capacity = soa.Env()->locals.Capacity();
+    bool okay = (static_cast<jint>(kLocalsMax - capacity) >= desired_capacity);
+    if (!okay) {
+      soa.Self()->ThrowOutOfMemoryError(caller);
+    }
+    return okay ? JNI_OK : JNI_ERR;
+  }
+
+  template<typename JniT, typename ArtT>
+  static JniT NewPrimitiveArray(const ScopedObjectAccess& soa, jsize length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    CHECK_GE(length, 0); // TODO: ReportJniError
+    ArtT* result = ArtT::Alloc(length);
+    return soa.AddLocalReference<JniT>(result);
+  }
+
+  template <typename ArrayT, typename CArrayT, typename ArtArrayT>
+  static CArrayT GetPrimitiveArray(ScopedObjectAccess& soa, ArrayT java_array,
+                                   jboolean* is_copy)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    ArtArrayT* array = soa.Decode<ArtArrayT*>(java_array);
+    PinPrimitiveArray(soa, array);
+    if (is_copy != NULL) {
+      *is_copy = JNI_FALSE;
+    }
+    return array->GetData();
+  }
+
+  template <typename ArrayT>
+  static void ReleasePrimitiveArray(ScopedObjectAccess& soa, ArrayT java_array,
+                                    jint mode)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    if (mode != JNI_COMMIT) {
+      Array* array = soa.Decode<Array*>(java_array);
+      UnpinPrimitiveArray(soa, array);
+    }
+  }
+
+  template <typename JavaArrayT, typename JavaT, typename ArrayT>
+  static void GetPrimitiveArrayRegion(ScopedObjectAccess& soa, JavaArrayT java_array,
+                                      jsize start, jsize length, JavaT* buf)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    ArrayT* array = soa.Decode<ArrayT*>(java_array);
+    if (start < 0 || length < 0 || start + length > array->GetLength()) {
+      ThrowAIOOBE(soa, array, start, length, "src");
+    } else {
+      JavaT* data = array->GetData();
+      memcpy(buf, data + start, length * sizeof(JavaT));
+    }
+  }
+
+  template <typename JavaArrayT, typename JavaT, typename ArrayT>
+  static void SetPrimitiveArrayRegion(ScopedObjectAccess& soa, JavaArrayT java_array,
+                                      jsize start, jsize length, const JavaT* buf)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    ArrayT* array = soa.Decode<ArrayT*>(java_array);
+    if (start < 0 || length < 0 || start + length > array->GetLength()) {
+      ThrowAIOOBE(soa, array, start, length, "dst");
+    } else {
+      JavaT* data = array->GetData();
+      memcpy(data + start, buf, length * sizeof(JavaT));
+    }
+  }
 };
 
 const JNINativeInterface gJniNativeInterface = {
@@ -2603,10 +2650,10 @@
     options.push_back(std::make_pair(std::string(option->optionString), option->extraInfo));
   }
   bool ignore_unrecognized = args->ignoreUnrecognized;
-  Runtime* runtime = Runtime::Create(options, ignore_unrecognized);
-  if (runtime == NULL) {
+  if (!Runtime::Create(options, ignore_unrecognized)) {
     return JNI_ERR;
   }
+  Runtime* runtime = Runtime::Current();
   runtime->Start();
   *p_env = Thread::Current()->GetJniEnv();
   *p_vm = runtime->GetJavaVM();
@@ -2700,7 +2747,7 @@
       globals(gGlobalsInitial, gGlobalsMax, kGlobal),
       weak_globals_lock("JNI weak global reference table lock"),
       weak_globals(kWeakGlobalsInitial, kWeakGlobalsMax, kWeakGlobal),
-      libraries_lock("JNI shared libraries map lock"),
+      libraries_lock("JNI shared libraries map lock", kLoadLibraryLock),
       libraries(new Libraries) {
   functions = unchecked_functions = &gJniInvokeInterface;
   if (options->check_jni_) {
@@ -2760,7 +2807,8 @@
   }
 }
 
-bool JavaVMExt::LoadNativeLibrary(const std::string& path, ClassLoader* class_loader, std::string& detail) {
+bool JavaVMExt::LoadNativeLibrary(const std::string& path, ClassLoader* class_loader,
+                                  std::string& detail) {
   detail.clear();
 
   // See if we've already loaded this library.  If we have, and the class loader
@@ -2815,18 +2863,18 @@
   //   - write a trivial app that calls sleep() then dlopen(), attach
   //     to it with "strace -p <pid>" while it sleeps, and watch for
   //     attempts to open nonexistent dependent shared libs
-
   // TODO: automate some of these checks!
 
+  // Below we dlopen but there is no paired dlclose, this would be necessary if we supported
+  // class unloading. Libraries will only be unloaded when the reference count (incremented by
+  // dlopen) becomes zero from dlclose.
+
   // This can execute slowly for a large library on a busy system, so we
-  // want to switch from kRunnable to kVmWait while it executes.  This allows
-  // the GC to ignore us.
+  // want to switch from kRunnable while it executes.  This allows the GC to ignore us.
   Thread* self = Thread::Current();
-  void* handle = NULL;
-  {
-    ScopedThreadStateChange tsc(self, kVmWait);
-    handle = dlopen(path.empty() ? NULL : path.c_str(), RTLD_LAZY);
-  }
+  self->TransitionFromRunnableToSuspended(kWaitingForJniOnLoad);
+  void* handle = dlopen(path.empty() ? NULL : path.c_str(), RTLD_LAZY);
+  self->TransitionFromSuspendedToRunnable();
 
   VLOG(jni) << "[Call to dlopen(\"" << path << "\") returned " << handle << "]";
 
@@ -2836,17 +2884,21 @@
   }
 
   // Create a new entry.
+  // TODO: move the locking (and more of this logic) into Libraries.
+  bool created_library = false;
   {
-    // TODO: move the locking (and more of this logic) into Libraries.
     MutexLock mu(libraries_lock);
     library = libraries->Get(path);
-    if (library != NULL) {
-      LOG(INFO) << "WOW: we lost a race to add shared library: "
-                << "\"" << path << "\" ClassLoader=" << class_loader;
-      return library->CheckOnLoadResult();
+    if (library == NULL) {  // We won race to get libraries_lock
+      library = new SharedLibrary(path, handle, class_loader);
+      libraries->Put(path, library);
+      created_library = true;
     }
-    library = new SharedLibrary(path, handle, class_loader);
-    libraries->Put(path, library);
+  }
+  if (!created_library) {
+    LOG(INFO) << "WOW: we lost a race to add shared library: "
+        << "\"" << path << "\" ClassLoader=" << class_loader;
+    return library->CheckOnLoadResult();
   }
 
   VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader << "]";
diff --git a/src/jni_internal.h b/src/jni_internal.h
index b96a4d7..fad06e1 100644
--- a/src/jni_internal.h
+++ b/src/jni_internal.h
@@ -43,17 +43,20 @@
 union JValue;
 class Libraries;
 class Method;
-class ScopedJniThreadState;
+class ScopedObjectAccess;
 class Thread;
 
 void SetJniGlobalsMax(size_t max);
 void JniAbortF(const char* jni_function_name, const char* fmt, ...);
 void* FindNativeMethod(Thread* thread);
-void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods, size_t method_count);
+void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods,
+                           size_t method_count);
 
 size_t NumArgArrayBytes(const char* shorty, uint32_t shorty_len);
-JValue InvokeWithJValues(const ScopedJniThreadState&, jobject obj, jmethodID mid, jvalue* args);
-JValue InvokeWithJValues(const ScopedJniThreadState&, Object* receiver, Method* m, JValue* args);
+JValue InvokeWithJValues(const ScopedObjectAccess&, jobject obj, jmethodID mid, jvalue* args)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+JValue InvokeWithJValues(const ScopedObjectAccess&, Object* receiver, Method* m, JValue* args)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause);
 
@@ -67,17 +70,20 @@
    * Returns 'true' on success. On failure, sets 'detail' to a
    * human-readable description of the error.
    */
-  bool LoadNativeLibrary(const std::string& path, ClassLoader* class_loader, std::string& detail);
+  bool LoadNativeLibrary(const std::string& path, ClassLoader* class_loader, std::string& detail)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /**
    * Returns a pointer to the code for the native method 'm', found
    * using dlsym(3) on every native library that's been loaded so far.
    */
-  void* FindCodeForNativeMethod(Method* m);
+  void* FindCodeForNativeMethod(Method* m)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void DumpForSigQuit(std::ostream& os);
 
-  void DumpReferenceTables(std::ostream& os);
+  void DumpReferenceTables(std::ostream& os)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void SetCheckJniEnabled(bool enabled);
 
@@ -100,18 +106,18 @@
   bool work_around_app_jni_bugs;
 
   // Used to hold references to pinned primitive arrays.
-  Mutex pins_lock;
+  Mutex pins_lock DEFAULT_MUTEX_ACQUIRED_AFTER;
   ReferenceTable pin_table GUARDED_BY(pins_lock);
 
   // JNI global references.
-  Mutex globals_lock;
+  Mutex globals_lock DEFAULT_MUTEX_ACQUIRED_AFTER;
   IndirectReferenceTable globals GUARDED_BY(globals_lock);
 
   // JNI weak global references.
-  Mutex weak_globals_lock;
+  Mutex weak_globals_lock DEFAULT_MUTEX_ACQUIRED_AFTER;
   IndirectReferenceTable weak_globals GUARDED_BY(weak_globals_lock);
 
-  Mutex libraries_lock;
+  Mutex libraries_lock DEFAULT_MUTEX_ACQUIRED_AFTER;
   Libraries* libraries GUARDED_BY(libraries_lock);
 
   // Used by -Xcheck:jni.
@@ -122,7 +128,8 @@
   JNIEnvExt(Thread* self, JavaVMExt* vm);
   ~JNIEnvExt();
 
-  void DumpReferenceTables(std::ostream& os);
+  void DumpReferenceTables(std::ostream& os)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void SetCheckJniEnabled(bool enabled);
 
diff --git a/src/jni_internal_test.cc b/src/jni_internal_test.cc
index daca1b5..64461b0 100644
--- a/src/jni_internal_test.cc
+++ b/src/jni_internal_test.cc
@@ -20,7 +20,6 @@
 
 #include "common_test.h"
 #include "ScopedLocalRef.h"
-#include "scoped_jni_thread_state.h"
 
 namespace art {
 
@@ -69,9 +68,12 @@
     CommonTest::TearDown();
   }
 
-  Method::InvokeStub* DoCompile(Method*& method, Object*& receiver, bool is_static, const char* method_name, const char* method_signature) {
+  Method::InvokeStub* DoCompile(Method*& method, Object*& receiver, bool is_static,
+                                const char* method_name, const char* method_signature)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const char* class_name = is_static ? "StaticLeafMethods" : "NonStaticLeafMethods";
-    SirtRef<ClassLoader> class_loader(LoadDex(class_name));
+    jobject jclass_loader(LoadDex(class_name));
+    SirtRef<ClassLoader> class_loader(ScopedObjectAccessUnchecked(Thread::Current()).Decode<ClassLoader*>(jclass_loader));
     if (is_static) {
       CompileDirectMethod(class_loader.get(), class_name, method_name, method_signature);
     } else {
@@ -83,7 +85,8 @@
     Class* c = class_linker_->FindClass(DotToDescriptor(class_name).c_str(), class_loader.get());
     CHECK(c != NULL);
 
-    method = is_static ? c->FindDirectMethod(method_name, method_signature) : c->FindVirtualMethod(method_name, method_signature);
+    method = is_static ? c->FindDirectMethod(method_name, method_signature)
+                       : c->FindVirtualMethod(method_name, method_signature);
     CHECK(method != NULL);
 
     receiver = (is_static ? NULL : c->AllocObject());
@@ -94,14 +97,15 @@
     return stub;
   }
 
-  void InvokeNopMethod(bool is_static) {
+  void InvokeNopMethod(bool is_static) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "nop", "()V");
     (*stub)(method, receiver, Thread::Current(), NULL, NULL);
   }
 
-  void InvokeIdentityByteMethod(bool is_static) {
+  void InvokeIdentityByteMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "identity", "(B)B");
@@ -130,7 +134,8 @@
     EXPECT_EQ(SCHAR_MIN, result.GetB());
   }
 
-  void InvokeIdentityIntMethod(bool is_static) {
+  void InvokeIdentityIntMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "identity", "(I)I");
@@ -159,7 +164,8 @@
     EXPECT_EQ(INT_MIN, result.GetI());
   }
 
-  void InvokeIdentityDoubleMethod(bool is_static) {
+  void InvokeIdentityDoubleMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "identity", "(D)D");
@@ -188,7 +194,8 @@
     EXPECT_EQ(DBL_MIN, result.GetD());
   }
 
-  void InvokeSumIntIntMethod(bool is_static) {
+  void InvokeSumIntIntMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(II)I");
@@ -226,7 +233,8 @@
     EXPECT_EQ(-2, result.GetI());
   }
 
-  void InvokeSumIntIntIntMethod(bool is_static) {
+  void InvokeSumIntIntIntMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(III)I");
@@ -269,7 +277,8 @@
     EXPECT_EQ(2147483645, result.GetI());
   }
 
-  void InvokeSumIntIntIntIntMethod(bool is_static) {
+  void InvokeSumIntIntIntIntMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(IIII)I");
@@ -317,7 +326,8 @@
     EXPECT_EQ(-4, result.GetI());
   }
 
-  void InvokeSumIntIntIntIntIntMethod(bool is_static) {
+  void InvokeSumIntIntIntIntIntMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(IIIII)I");
@@ -370,7 +380,8 @@
     EXPECT_EQ(2147483643, result.GetI());
   }
 
-  void InvokeSumDoubleDoubleMethod(bool is_static) {
+  void InvokeSumDoubleDoubleMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(DD)D");
@@ -409,7 +420,8 @@
     EXPECT_EQ(INFINITY, result.GetD());
   }
 
-  void InvokeSumDoubleDoubleDoubleMethod(bool is_static) {
+  void InvokeSumDoubleDoubleDoubleMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(DDD)D");
@@ -439,7 +451,8 @@
     EXPECT_EQ(2.0, result.GetD());
   }
 
-  void InvokeSumDoubleDoubleDoubleDoubleMethod(bool is_static) {
+  void InvokeSumDoubleDoubleDoubleDoubleMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(DDDD)D");
@@ -472,7 +485,8 @@
     EXPECT_EQ(-2.0, result.GetD());
   }
 
-  void InvokeSumDoubleDoubleDoubleDoubleDoubleMethod(bool is_static) {
+  void InvokeSumDoubleDoubleDoubleDoubleDoubleMethod(bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method;
     Object* receiver;
     Method::InvokeStub* stub = DoCompile(method, receiver, is_static, "sum", "(DDDDD)D");
@@ -1143,7 +1157,8 @@
 
 #if !defined(ART_USE_LLVM_COMPILER)
 TEST_F(JniInternalTest, GetPrimitiveField_SetPrimitiveField) {
-  SirtRef<ClassLoader> class_loader(LoadDex("AllFields"));
+  Thread::Current()->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
   runtime_->Start();
 
   jclass c = env_->FindClass("AllFields");
@@ -1171,7 +1186,8 @@
 }
 
 TEST_F(JniInternalTest, GetObjectField_SetObjectField) {
-  SirtRef<ClassLoader> class_loader(LoadDex("AllFields"));
+  Thread::Current()->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
   runtime_->Start();
 
   jclass c = env_->FindClass("AllFields");
@@ -1228,7 +1244,7 @@
   {
     CheckJniAbortCatcher check_jni_abort_catcher;
     env_->DeleteLocalRef(s);
-    check_jni_abort_catcher.Check("native code passing in reference to invalid local reference: 0x200001");
+    check_jni_abort_catcher.Check("native code passing in reference to invalid local reference: 0x1400001");
   }
 
   s = env_->NewStringUTF("");
@@ -1246,7 +1262,7 @@
 
   jobject outer;
   jobject inner1, inner2;
-  ScopedJniThreadState ts(env_);
+  ScopedObjectAccess soa(env_);
   Object* inner2_direct_pointer;
   {
     env_->PushLocalFrame(4);
@@ -1256,7 +1272,7 @@
       env_->PushLocalFrame(4);
       inner1 = env_->NewLocalRef(outer);
       inner2 = env_->NewStringUTF("survivor");
-      inner2_direct_pointer = ts.Decode<Object*>(inner2);
+      inner2_direct_pointer = soa.Decode<Object*>(inner2);
       env_->PopLocalFrame(inner2);
     }
 
@@ -1309,7 +1325,7 @@
   {
     CheckJniAbortCatcher check_jni_abort_catcher;
     env_->DeleteGlobalRef(o);
-    check_jni_abort_catcher.Check("native code passing in reference to invalid global reference: 0x10000e");
+    check_jni_abort_catcher.Check("native code passing in reference to invalid global reference: 0x100056");
   }
 
   jobject o1 = env_->NewGlobalRef(s);
@@ -1364,7 +1380,9 @@
 }
 
 TEST_F(JniInternalTest, StaticMainMethod) {
-  SirtRef<ClassLoader> class_loader(LoadDex("Main"));
+  ScopedObjectAccess soa(Thread::Current());
+  jobject jclass_loader = LoadDex("Main");
+  SirtRef<ClassLoader> class_loader(soa.Decode<ClassLoader*>(jclass_loader));
   CompileDirectMethod(class_loader.get(), "Main", "main", "([Ljava/lang/String;)V");
 
   Class* klass = class_linker_->FindClass("LMain;", class_loader.get());
@@ -1382,98 +1400,122 @@
 }
 
 TEST_F(JniInternalTest, StaticNopMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeNopMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticNopMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeNopMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticIdentityByteMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeIdentityByteMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticIdentityByteMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeIdentityByteMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticIdentityIntMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeIdentityIntMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticIdentityIntMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeIdentityIntMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticIdentityDoubleMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeIdentityDoubleMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticIdentityDoubleMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeIdentityDoubleMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumIntIntMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumIntIntMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticSumIntIntMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumIntIntMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumIntIntIntMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumIntIntIntMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticSumIntIntIntMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumIntIntIntMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumIntIntIntIntMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumIntIntIntIntMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticSumIntIntIntIntMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumIntIntIntIntMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumIntIntIntIntIntMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumIntIntIntIntIntMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticSumIntIntIntIntIntMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumIntIntIntIntIntMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumDoubleDoubleMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumDoubleDoubleMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticSumDoubleDoubleMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumDoubleDoubleMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumDoubleDoubleDoubleMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumDoubleDoubleDoubleMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticSumDoubleDoubleDoubleMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumDoubleDoubleDoubleMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumDoubleDoubleDoubleDoubleMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumDoubleDoubleDoubleDoubleMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticSumDoubleDoubleDoubleDoubleMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumDoubleDoubleDoubleDoubleMethod(false);
 }
 
 TEST_F(JniInternalTest, StaticSumDoubleDoubleDoubleDoubleDoubleMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumDoubleDoubleDoubleDoubleDoubleMethod(true);
 }
 
 TEST_F(JniInternalTest, NonStaticSumDoubleDoubleDoubleDoubleDoubleMethod) {
+  ScopedObjectAccess soa(Thread::Current());
   InvokeSumDoubleDoubleDoubleDoubleDoubleMethod(false);
 }
 
diff --git a/src/jobject_comparator.cc b/src/jobject_comparator.cc
new file mode 100644
index 0000000..edd0727
--- /dev/null
+++ b/src/jobject_comparator.cc
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jobject_comparator.h"
+
+#include "object.h"
+#include "scoped_thread_state_change.h"
+
+namespace art {
+
+bool JobjectComparator::operator()(jobject jobj1, jobject jobj2) const {
+  // Ensure null references and cleared jweaks appear at the end.
+  if (jobj1 == NULL) {
+    return true;
+  } else if (jobj2 == NULL) {
+    return false;
+  }
+  ScopedObjectAccess soa(Thread::Current());
+  Object* obj1 = soa.Decode<Object*>(jobj1);
+  Object* obj2 = soa.Decode<Object*>(jobj2);
+  if (obj1 == NULL) {
+    return true;
+  } else if (obj2 == NULL) {
+    return false;
+  }
+  // Sort by class...
+  if (obj1->GetClass() != obj2->GetClass()) {
+    return obj1->GetClass()->IdentityHashCode() < obj2->IdentityHashCode();
+  } else {
+    // ...then by size...
+    size_t count1 = obj1->SizeOf();
+    size_t count2 = obj2->SizeOf();
+    if (count1 != count2) {
+      return count1 < count2;
+    } else {
+      // ...and finally by identity hash code.
+      return obj1->IdentityHashCode() < obj2->IdentityHashCode();
+    }
+  }
+}
+
+}  // namespace art
diff --git a/src/scoped_thread_list_lock.h b/src/jobject_comparator.h
similarity index 68%
rename from src/scoped_thread_list_lock.h
rename to src/jobject_comparator.h
index 8650c57..17098aa 100644
--- a/src/scoped_thread_list_lock.h
+++ b/src/jobject_comparator.h
@@ -14,22 +14,17 @@
  * limitations under the License.
  */
 
-#ifndef ART_SRC_SCOPED_THREAD_LIST_LOCK_H_
-#define ART_SRC_SCOPED_THREAD_LIST_LOCK_H_
+#ifndef ART_SRC_JOBJECT_COMPARATOR_H_
+#define ART_SRC_JOBJECT_COMPARATOR_H_
 
-#include "macros.h"
+#include <jni.h>
 
 namespace art {
 
-class ScopedThreadListLock {
- public:
-  ScopedThreadListLock();
-  ~ScopedThreadListLock();
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(ScopedThreadListLock);
+struct JobjectComparator {
+  bool operator()(jobject jobj1, jobject jobj2) const;
 };
 
 }  // namespace art
 
-#endif  // ART_SRC_SCOPED_THREAD_LIST_LOCK_H_
+#endif  // ART_SRC_JOBJECT_COMPARATOR_H_
diff --git a/src/logging.cc b/src/logging.cc
index 30063a1..712c02b 100644
--- a/src/logging.cc
+++ b/src/logging.cc
@@ -29,11 +29,6 @@
 static std::string* gProgramInvocationName;
 static std::string* gProgramInvocationShortName;
 
-static Mutex& GetLoggingLock() {
-  static Mutex logging_lock("LogMessage lock");
-  return logging_lock;
-}
-
 const char* GetCmdLine() {
   return (gCmdLine != NULL) ? gCmdLine->c_str() : NULL;
 }
@@ -55,6 +50,9 @@
 // and a letter indicating the minimum priority level we're expected to log.
 // This can be used to reveal or conceal logs with specific tags.
 void InitLogging(char* argv[]) {
+  // TODO: Move this to a more obvious InitART...
+  GlobalSynchronization::Init();
+
   // Stash the command line for later use. We can use /proc/self/cmdline on Linux to recover this,
   // but we don't have that luxury on the Mac, and there are a couple of argv[0] variants that are
   // commonly used.
@@ -106,7 +104,7 @@
 
   // Do the actual logging with the lock held.
   {
-    MutexLock mu(GetLoggingLock());
+    MutexLock mu(*GlobalSynchronization::logging_lock_);
     if (msg.find('\n') == std::string::npos) {
       LogLine(msg.c_str());
     } else {
diff --git a/src/logging.h b/src/logging.h
index 94dd2c6..ce86a72 100644
--- a/src/logging.h
+++ b/src/logging.h
@@ -187,7 +187,7 @@
 class LogMessage {
  public:
   LogMessage(const char* file, int line, LogSeverity severity, int error);
-  ~LogMessage();
+  ~LogMessage() LOCKS_EXCLUDED(GlobalSynchronization::logging_lock_);
   std::ostream& stream();
 
  private:
@@ -249,6 +249,35 @@
   return os;
 }
 
+template<typename T>
+class MutatorLockedDumpable {
+ public:
+  explicit MutatorLockedDumpable(T& value)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) : value_(value) {
+  }
+
+  void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    value_.Dump(os);
+  }
+
+ private:
+  T& value_;
+
+// TODO: Remove the #if when Mac OS build server no longer uses GCC 4.2.*.
+#if GCC_VERSION >= 40300
+  DISALLOW_COPY_AND_ASSIGN(MutatorLockedDumpable);
+#endif
+};
+
+template<typename T>
+std::ostream& operator<<(std::ostream& os, const MutatorLockedDumpable<T>& rhs)
+// TODO: should be SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) however annotalysis
+//       currently fails for this.
+    NO_THREAD_SAFETY_ANALYSIS {
+  rhs.Dump(os);
+  return os;
+}
+
 // Helps you use operator<< in a const char*-like context such as our various 'F' methods with
 // format strings.
 template<typename T>
diff --git a/src/mark_sweep.cc b/src/mark_sweep.cc
index 7adc344..227614d 100644
--- a/src/mark_sweep.cc
+++ b/src/mark_sweep.cc
@@ -30,7 +30,6 @@
 #include "monitor.h"
 #include "object.h"
 #include "runtime.h"
-#include "scoped_heap_lock.h"
 #include "space.h"
 #include "timing_logger.h"
 #include "thread.h"
@@ -133,7 +132,9 @@
 
   }
 
-  void operator ()(const Object* obj, const Object* ref, MemberOffset offset, bool is_static) const {
+  void operator ()(const Object* obj, const Object* ref, MemberOffset offset, bool is_static) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     mark_sweep_->CheckReference(obj, ref, offset, is_static);
   }
 
@@ -171,10 +172,11 @@
 class ScanImageRootVisitor {
  public:
   ScanImageRootVisitor(MarkSweep* const mark_sweep) : mark_sweep_(mark_sweep) {
-
   }
 
-  void operator ()(const Object* root) const {
+  void operator ()(const Object* root) const
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(root != NULL);
     mark_sweep_->ScanObject(root);
   }
@@ -225,7 +227,9 @@
 
   }
 
-  void operator ()(const Object* obj) const {
+  void operator ()(const Object* obj) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     DCHECK(obj != NULL);
     mark_sweep_->CheckObject(obj);
   }
@@ -322,7 +326,7 @@
 };
 
 void MarkSweep::SweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
-  ScopedHeapLock lock;
+  GlobalSynchronization::heap_bitmap_lock_->AssertExclusiveHeld();
 
   size_t freed_objects = num_ptrs;
   size_t freed_bytes = 0;
@@ -348,11 +352,12 @@
       space->Free(obj);
     }
   }
-  heap->RecordFreeLocked(freed_objects, freed_bytes);
+  heap->RecordFree(freed_objects, freed_bytes);
 }
 
 void MarkSweep::ZygoteSweepCallback(size_t num_ptrs, Object** ptrs, void* arg) {
-  ScopedHeapLock lock;
+  GlobalSynchronization::heap_bitmap_lock_->AssertExclusiveHeld();
+
   SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
   Heap* heap = context->heap;
   // We don't free any actual memory to avoid dirtying the shared zygote pages.
diff --git a/src/mark_sweep.h b/src/mark_sweep.h
index 189462f..bb48b7a 100644
--- a/src/mark_sweep.h
+++ b/src/mark_sweep.h
@@ -44,36 +44,46 @@
   void Init();
 
   // Marks the root set at the start of a garbage collection.
-  void MarkRoots();
+  void MarkRoots()
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Marks the roots in the image space on dirty cards.
-  void ScanDirtyImageRoots();
+  void ScanDirtyImageRoots() EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   // Verify that image roots point to only marked objects within the alloc space.
-  void VerifyImageRoots();
+  void VerifyImageRoots() EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   bool IsMarkStackEmpty() const {
     return mark_stack_->IsEmpty();
   }
 
   // Builds a mark stack and recursively mark until it empties.
-  void RecursiveMark(bool partial);
+  void RecursiveMark(bool partial)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Copies mark bits from live bitmap of zygote space to mark bitmap for partial GCs.
   void CopyMarkBits();
 
   // Builds a mark stack with objects on dirty cards and recursively mark
   // until it empties.
-  void RecursiveMarkDirtyObjects();
+  void RecursiveMarkDirtyObjects()
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Remarks the root set after completing the concurrent mark.
-  void ReMarkRoots();
+  void ReMarkRoots()
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   Heap* GetHeap() {
     return heap_;
   }
 
-  void ProcessReferences(bool clear_soft_references) {
+  void ProcessReferences(bool clear_soft_references)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ProcessReferences(&soft_reference_list_, clear_soft_references,
                       &weak_reference_list_,
                       &finalizer_reference_list_,
@@ -81,59 +91,83 @@
   }
 
   // Sweeps unmarked objects to complete the garbage collection.
-  void Sweep(bool partial);
+  void Sweep(bool partial) EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   Object* GetClearedReferences() {
     return cleared_reference_list_;
   }
 
   // Blackens an object.
-  void ScanObject(const Object* obj);
+  void ScanObject(const Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
  private:
   // Returns true if the object has its bit set in the mark bitmap.
-  bool IsMarked(const Object* object) const {
+  bool IsMarked(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
     if (current_mark_bitmap_->HasAddress(object)) {
       return current_mark_bitmap_->Test(object);
     }
     return heap_->GetMarkBitmap()->Test(object);
   }
 
-  static bool IsMarkedCallback(const Object* object, void* arg) {
+  static bool IsMarkedCallback(const Object* object, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
     return reinterpret_cast<MarkSweep*>(arg)->IsMarked(object);
   }
 
-  static bool IsLiveCallback(const Object* object, void* arg) {
+  static bool IsLiveCallback(const Object* object, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
     return reinterpret_cast<MarkSweep*>(arg)->GetHeap()->GetLiveBitmap()->Test(object);
   }
 
-  static void MarkObjectVisitor(const Object* root, void* arg);
+  static void MarkObjectVisitor(const Object* root, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
-  static void ReMarkObjectVisitor(const Object* root, void* arg);
+  static void ReMarkObjectVisitor(const Object* root, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
-  static void VerifyImageRootVisitor(Object* root, void* arg);
+  static void VerifyImageRootVisitor(Object* root, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_);
 
-  static void ScanDirtyCardCallback(Object* obj, void* arg);
+  static void ScanDirtyCardCallback(Object* obj, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Marks an object.
-  void MarkObject(const Object* obj);
+  void MarkObject(const Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   // Yuck.
-  void MarkObject0(const Object* obj, bool check_finger);
+  void MarkObject0(const Object* obj, bool check_finger)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
-  static void ScanBitmapCallback(Object* obj, void* finger, void* arg);
+  static void ScanBitmapCallback(Object* obj, void* finger, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static void SweepCallback(size_t num_ptrs, Object** ptrs, void* arg);
+
+  static void SweepCallback(size_t num_ptrs, Object** ptrs, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   // Special sweep for zygote that just marks objects / dirties cards.
-  static void ZygoteSweepCallback(size_t num_ptrs, Object** ptrs, void* arg);
+  static void ZygoteSweepCallback(size_t num_ptrs, Object** ptrs, void* arg)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
-  void CheckReference(const Object* obj, const Object* ref, MemberOffset offset, bool is_static);
+  void CheckReference(const Object* obj, const Object* ref, MemberOffset offset, bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_);
 
-  void CheckObject(const Object* obj);
+  void CheckObject(const Object* obj)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitObjectReferences(const Object* obj, const Visitor& visitor) {
+  void VisitObjectReferences(const Object* obj, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     DCHECK(obj != NULL);
     DCHECK(obj->GetClass() != NULL);
     if (obj->IsClass()) {
@@ -146,10 +180,14 @@
   }
 
   // Grays references in instance fields.
-  void ScanInstanceFields(const Object* obj);
+  void ScanInstanceFields(const Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitInstanceFieldsReferences(const Object* obj, const Visitor& visitor) {
+  void VisitInstanceFieldsReferences(const Object* obj, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(obj != NULL);
     Class* klass = obj->GetClass();
     DCHECK(klass != NULL);
@@ -157,28 +195,42 @@
   }
 
   // Blackens a class object.
-  void ScanClass(const Object* obj);
+  void ScanClass(const Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+
 
   template <typename Visitor>
-  void VisitClassReferences(const Object* obj, const Visitor& visitor) {
+  void VisitClassReferences(const Object* obj, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     VisitInstanceFieldsReferences(obj, visitor);
     VisitStaticFieldsReferences(obj->AsClass(), visitor);
   }
 
   // Grays references in static fields.
-  void ScanStaticFields(const Class* klass);
+  void ScanStaticFields(const Class* klass)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitStaticFieldsReferences(const Class* klass, const Visitor& visitor) {
+  void VisitStaticFieldsReferences(const Class* klass, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {\
     DCHECK(klass != NULL);
     VisitFieldsReferences(klass, klass->GetReferenceStaticOffsets(), true, visitor);
   }
 
   // Used by ScanInstanceFields and ScanStaticFields
-  void ScanFields(const Object* obj, uint32_t ref_offsets, bool is_static);
+  void ScanFields(const Object* obj, uint32_t ref_offsets, bool is_static)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitFieldsReferences(const Object* obj, uint32_t ref_offsets, bool is_static, const Visitor& visitor) {
+  void VisitFieldsReferences(const Object* obj, uint32_t ref_offsets, bool is_static,
+                             const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     if (ref_offsets != CLASS_WALK_SUPER) {
       // Found a reference offset bitmap.  Mark the specified offsets.
       while (ref_offsets != 0) {
@@ -212,10 +264,14 @@
   }
 
   // Grays references in an array.
-  void ScanArray(const Object* obj);
+  void ScanArray(const Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitArrayReferences(const Object* obj, const Visitor& visitor) {
+  void VisitArrayReferences(const Object* obj, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     visitor(obj, obj->GetClass(), Object::ClassOffset(), false);
     if (obj->IsObjectArray()) {
       const ObjectArray<Object>* array = obj->AsObjectArray<Object>();
@@ -227,35 +283,51 @@
     }
   }
 
-  void ScanOther(const Object* obj);
+  void ScanOther(const Object* obj)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   template <typename Visitor>
-  void VisitOtherReferences(const Object* obj, const Visitor& visitor) {
+  void VisitOtherReferences(const Object* obj, const Visitor& visitor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     return VisitInstanceFieldsReferences(obj, visitor);
   }
 
   // Blackens objects grayed during a garbage collection.
-  void ScanGrayObjects();
+  void ScanGrayObjects() EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   // Schedules an unmarked object for reference processing.
-  void DelayReferenceReferent(Object* reference);
+  void DelayReferenceReferent(Object* reference)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   // Recursively blackens objects on the mark stack.
-  void ProcessMarkStack();
+  void ProcessMarkStack()
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void EnqueueFinalizerReferences(Object** ref);
+  void EnqueueFinalizerReferences(Object** ref)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void PreserveSomeSoftReferences(Object** ref);
+  void PreserveSomeSoftReferences(Object** ref)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void ClearWhiteReferences(Object** list);
+  void ClearWhiteReferences(Object** list)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   void ProcessReferences(Object** soft_references, bool clear_soft_references,
                          Object** weak_references,
                          Object** finalizer_references,
-                         Object** phantom_references);
+                         Object** phantom_references)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void SweepSystemWeaks(bool swap_bitmaps);
-  void SweepJniWeakGlobals(HeapBitmap* bitmap);
+  void SweepSystemWeaks(bool swap_bitmaps)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
+  void SweepJniWeakGlobals(HeapBitmap* bitmap)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   // Current space, we check this space first to avoid searching for the appropriate space for an object.
   SpaceBitmap* current_mark_bitmap_;
diff --git a/src/mod_union_table.cc b/src/mod_union_table.cc
index 410bf62..3b928e3 100644
--- a/src/mod_union_table.cc
+++ b/src/mod_union_table.cc
@@ -56,7 +56,9 @@
       bitmap_(bitmap) {
   }
 
-  void operator ()(Object* obj) const {
+  void operator ()(Object* obj) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     DCHECK(obj != NULL);
     // We don't have an early exit since we use the visitor pattern, an early
     // exit should significantly speed this up.
@@ -136,10 +138,11 @@
 class ModUnionScanImageRootVisitor {
  public:
   ModUnionScanImageRootVisitor(MarkSweep* const mark_sweep) : mark_sweep_(mark_sweep) {
-
   }
 
-  void operator ()(const Object* root) const {
+  void operator ()(const Object* root) const
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(root != NULL);
     mark_sweep_->ScanObject(root);
   }
@@ -208,7 +211,9 @@
       references_(references) {
   }
 
-  void operator ()(Object* obj) const {
+  void operator ()(Object* obj) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_,
+                            GlobalSynchronization::mutator_lock_) {
     DCHECK(obj != NULL);
     // We don't have an early exit since we use the visitor pattern, an early
     // exit should significantly speed this up.
diff --git a/src/mod_union_table.h b/src/mod_union_table.h
index f44004b..424f2f3 100644
--- a/src/mod_union_table.h
+++ b/src/mod_union_table.h
@@ -84,10 +84,12 @@
   void ClearCards(Space* space);
 
   // Update table based on cleared cards.
-  void Update();
+  void Update()
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Mark all references to the alloc space(s).
-  void MarkReferences();
+  void MarkReferences() EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
  protected:
   // Cleared card array, used to update the mod-union table.
@@ -111,10 +113,12 @@
   void ClearCards(Space* space);
 
   // Update table based on cleared cards.
-  void Update();
+  void Update()
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Mark all references to the alloc space(s).
-  void MarkReferences();
+  void MarkReferences() EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
   // Verify the mod-union table.
   void Verify();
@@ -146,7 +150,9 @@
   void Update() {}
 
   // Mark all references to the alloc space(s).
-  void MarkReferences();
+  void MarkReferences()
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Nothing to verify.
   void Verify() {}
diff --git a/src/monitor.cc b/src/monitor.cc
index 149babf..0e6735d 100644
--- a/src/monitor.cc
+++ b/src/monitor.cc
@@ -31,8 +31,7 @@
 #include "mutex.h"
 #include "object.h"
 #include "object_utils.h"
-#include "scoped_jni_thread_state.h"
-#include "scoped_thread_list_lock.h"
+#include "scoped_thread_state_change.h"
 #include "stl_util.h"
 #include "thread.h"
 #include "thread_list.h"
@@ -117,14 +116,26 @@
   is_sensitive_thread_hook_ = is_sensitive_thread_hook;
 }
 
-Monitor::Monitor(Object* obj)
-    : owner_(NULL),
+Monitor::Monitor(Thread* owner, Object* obj)
+    : monitor_lock_("a monitor lock", kMonitorLock),
+      owner_(owner),
       lock_count_(0),
       obj_(obj),
       wait_set_(NULL),
-      lock_("a monitor lock"),
       locking_method_(NULL),
       locking_dex_pc_(0) {
+  monitor_lock_.Lock();
+  // Propagate the lock state.
+  uint32_t thin = *obj->GetRawLockWordAddress();
+  lock_count_ = LW_LOCK_COUNT(thin);
+  thin &= LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT;
+  thin |= reinterpret_cast<uint32_t>(this) | LW_SHAPE_FAT;
+  // Publish the updated lock word.
+  android_atomic_release_store(thin, obj->GetRawLockWordAddress());
+  // Lock profiling.
+  if (lock_profiling_threshold_ != 0) {
+    locking_method_ = owner->GetCurrentMethod(&locking_dex_pc_);
+  }
 }
 
 Monitor::~Monitor() {
@@ -190,7 +201,7 @@
     return;
   }
 
-  if (!lock_.TryLock()) {
+  if (!monitor_lock_.TryLock()) {
     uint64_t waitStart = 0;
     uint64_t waitEnd = 0;
     uint32_t wait_threshold = lock_profiling_threshold_;
@@ -204,7 +215,7 @@
       current_locking_method = locking_method_;
       current_locking_dex_pc = locking_dex_pc_;
 
-      lock_.Lock();
+      monitor_lock_.Lock();
       if (wait_threshold != 0) {
         waitEnd = NanoTime() / 1000;
       }
@@ -240,7 +251,8 @@
 static void ThrowIllegalMonitorStateExceptionF(const char* fmt, ...)
                                               __attribute__((format(printf, 1, 2)));
 
-static void ThrowIllegalMonitorStateExceptionF(const char* fmt, ...) {
+static void ThrowIllegalMonitorStateExceptionF(const char* fmt, ...)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   va_list args;
   va_start(args, fmt);
   Thread::Current()->ThrowNewExceptionV("Ljava/lang/IllegalMonitorStateException;", fmt, args);
@@ -272,7 +284,7 @@
   {
     // TODO: isn't this too late to prevent threads from disappearing?
     // Acquire thread list lock so threads won't disappear from under us.
-    ScopedThreadListLock thread_list_lock;
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
     // Re-read owner now that we hold lock.
     current_owner = (monitor != NULL) ? monitor->owner_ : NULL;
     // Get short descriptions of the threads involved.
@@ -322,7 +334,7 @@
   }
 }
 
-bool Monitor::Unlock(Thread* self) {
+bool Monitor::Unlock(Thread* self, bool for_wait) {
   DCHECK(self != NULL);
   Thread* owner = owner_;
   if (owner == self) {
@@ -331,10 +343,17 @@
       owner_ = NULL;
       locking_method_ = NULL;
       locking_dex_pc_ = 0;
-      lock_.Unlock();
+      monitor_lock_.Unlock();
     } else {
       --lock_count_;
     }
+  } else if (for_wait) {
+    // Wait should have already cleared the fields.
+    DCHECK_EQ(lock_count_, 0);
+    DCHECK(owner == NULL);
+    DCHECK(locking_method_ == NULL);
+    DCHECK_EQ(locking_dex_pc_, 0u);
+    monitor_lock_.Unlock();
   } else {
     // We don't own this, so we're not allowed to unlock it.
     // The JNI spec says that we should throw IllegalMonitorStateException
@@ -346,7 +365,8 @@
 }
 
 // Converts the given waiting time (relative to "now") into an absolute time in 'ts'.
-static void ToAbsoluteTime(int64_t ms, int32_t ns, timespec* ts) {
+static void ToAbsoluteTime(int64_t ms, int32_t ns, timespec* ts)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   int64_t endSec;
 
 #ifdef HAVE_TIMEDWAIT_MONOTONIC
@@ -407,7 +427,11 @@
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before wait()");
     return;
   }
+  monitor_lock_.AssertHeld();
+  WaitWithLock(self, ms, ns, interruptShouldThrow);
+}
 
+void Monitor::WaitWithLock(Thread* self, int64_t ms, int32_t ns, bool interruptShouldThrow) {
   // Enforce the timeout range.
   if (ms < 0 || ns < 0 || ns > 999999) {
     Thread::Current()->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
@@ -447,57 +471,52 @@
    * that we won't touch any references in this state, and we'll check
    * our suspend mode before we transition out.
    */
-  if (timed) {
-    self->SetState(kTimedWaiting);
-  } else {
-    self->SetState(kWaiting);
-  }
+  self->TransitionFromRunnableToSuspended(timed ? kTimedWaiting : kWaiting);
 
-  self->wait_mutex_->Lock();
-
-  /*
-   * Set wait_monitor_ to the monitor object we will be waiting on.
-   * When wait_monitor_ is non-NULL a notifying or interrupting thread
-   * must signal the thread's wait_cond_ to wake it up.
-   */
-  DCHECK(self->wait_monitor_ == NULL);
-  self->wait_monitor_ = this;
-
-  /*
-   * Handle the case where the thread was interrupted before we called
-   * wait().
-   */
   bool wasInterrupted = false;
-  if (self->interrupted_) {
-    wasInterrupted = true;
+  {
+    // Pseudo-atomically wait on self's wait_cond_ and release the monitor lock.
+    MutexLock mu(*self->wait_mutex_);
+
+    // Set wait_monitor_ to the monitor object we will be waiting on. When wait_monitor_ is
+    // non-NULL a notifying or interrupting thread must signal the thread's wait_cond_ to wake it
+    // up.
+    DCHECK(self->wait_monitor_ == NULL);
+    self->wait_monitor_ = this;
+
+    // Release the monitor lock.
+    Unlock(self, true);
+
+    /*
+     * Handle the case where the thread was interrupted before we called
+     * wait().
+     */
+    if (self->interrupted_) {
+      wasInterrupted = true;
+    } else {
+      // Wait for a notification or a timeout to occur.
+      if (!timed) {
+        self->wait_cond_->Wait(*self->wait_mutex_);
+      } else {
+        self->wait_cond_->TimedWait(*self->wait_mutex_, ts);
+      }
+      if (self->interrupted_) {
+        wasInterrupted = true;
+      }
+      self->interrupted_ = false;
+    }
     self->wait_monitor_ = NULL;
-    self->wait_mutex_->Unlock();
-    goto done;
   }
 
-  /*
-   * Release the monitor lock and wait for a notification or
-   * a timeout to occur.
-   */
-  lock_.Unlock();
+  // Set self->status back to kRunnable, and self-suspend if needed.
+  self->TransitionFromSuspendedToRunnable();
 
-  if (!timed) {
-    self->wait_cond_->Wait(*self->wait_mutex_);
-  } else {
-    self->wait_cond_->TimedWait(*self->wait_mutex_, ts);
-  }
-  if (self->interrupted_) {
-    wasInterrupted = true;
-  }
-
-  self->interrupted_ = false;
-  self->wait_monitor_ = NULL;
-  self->wait_mutex_->Unlock();
-
-  // Reacquire the monitor lock.
+  // Re-acquire the monitor lock.
   Lock(self);
 
- done:
+
+  self->wait_mutex_->AssertNotHeld();
+
   /*
    * We remove our thread from wait set after restoring the count
    * and owner fields so the subroutine can check that the calling
@@ -510,9 +529,6 @@
   locking_dex_pc_ = saved_dex_pc;
   RemoveFromWaitSet(self);
 
-  /* set self->status back to kRunnable, and self-suspend if needed */
-  self->SetState(kRunnable);
-
   if (wasInterrupted) {
     /*
      * We were interrupted while waiting, or somebody interrupted an
@@ -521,7 +537,10 @@
      * The doc sayeth: "The interrupted status of the current thread is
      * cleared when this exception is thrown."
      */
-    self->interrupted_ = false;
+    {
+      MutexLock mu(*self->wait_mutex_);
+      self->interrupted_ = false;
+    }
     if (interruptShouldThrow) {
       Thread::Current()->ThrowNewException("Ljava/lang/InterruptedException;", NULL);
     }
@@ -530,12 +549,16 @@
 
 void Monitor::Notify(Thread* self) {
   DCHECK(self != NULL);
-
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notify()");
     return;
   }
+  monitor_lock_.AssertHeld();
+  NotifyWithLock();
+}
+
+void Monitor::NotifyWithLock() {
   // Signal the first waiting thread in the wait set.
   while (wait_set_ != NULL) {
     Thread* thread = wait_set_;
@@ -553,12 +576,16 @@
 
 void Monitor::NotifyAll(Thread* self) {
   DCHECK(self != NULL);
-
   // Make sure that we hold the lock.
   if (owner_ != self) {
     ThrowIllegalMonitorStateExceptionF("object not locked by thread before notifyAll()");
     return;
   }
+  monitor_lock_.AssertHeld();
+  NotifyAllWithLock();
+}
+
+void Monitor::NotifyAllWithLock() {
   // Signal all threads in the wait set.
   while (wait_set_ != NULL) {
     Thread* thread = wait_set_;
@@ -579,18 +606,10 @@
   DCHECK_EQ(LW_LOCK_OWNER(*obj->GetRawLockWordAddress()), static_cast<int32_t>(self->GetThinLockId()));
 
   // Allocate and acquire a new monitor.
-  Monitor* m = new Monitor(obj);
+  Monitor* m = new Monitor(self, obj);
   VLOG(monitor) << "monitor: thread " << self->GetThinLockId()
                 << " created monitor " << m << " for object " << obj;
   Runtime::Current()->GetMonitorList()->Add(m);
-  m->Lock(self);
-  // Propagate the lock state.
-  uint32_t thin = *obj->GetRawLockWordAddress();
-  m->lock_count_ = LW_LOCK_COUNT(thin);
-  thin &= LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT;
-  thin |= reinterpret_cast<uint32_t>(m) | LW_SHAPE_FAT;
-  // Publish the updated lock word.
-  android_atomic_release_store(thin, obj->GetRawLockWordAddress());
 }
 
 void Monitor::MonitorEnter(Thread* self, Object* obj) {
@@ -639,7 +658,7 @@
                                     threadId, thinp, PrettyTypeOf(obj).c_str(), LW_LOCK_OWNER(thin));
       // The lock is owned by another thread. Notify the runtime that we are about to wait.
       self->monitor_enter_object_ = obj;
-      ThreadState oldStatus = self->SetState(kBlocked);
+      self->TransitionFromRunnableToSuspended(kBlocked);
       // Spin until the thin lock is released or inflated.
       sleepDelayNs = 0;
       for (;;) {
@@ -677,14 +696,14 @@
           // waiting and try again.
           VLOG(monitor) << StringPrintf("monitor: thread %d found lock %p surprise-fattened by another thread", threadId, thinp);
           self->monitor_enter_object_ = NULL;
-          self->SetState(oldStatus);
+          self->TransitionFromSuspendedToRunnable();
           goto retry;
         }
       }
       VLOG(monitor) << StringPrintf("monitor: thread %d spin on lock %p done", threadId, thinp);
       // We have acquired the thin lock. Let the runtime know that we are no longer waiting.
       self->monitor_enter_object_ = NULL;
-      self->SetState(oldStatus);
+      self->TransitionFromSuspendedToRunnable();
       // Fatten the lock.
       Inflate(self, obj);
       VLOG(monitor) << StringPrintf("monitor: thread %d fattened lock %p", threadId, thinp);
@@ -750,7 +769,7 @@
      * raised any exceptions before continuing.
      */
     DCHECK(LW_MONITOR(*thinp) != NULL);
-    if (!LW_MONITOR(*thinp)->Unlock(self)) {
+    if (!LW_MONITOR(*thinp)->Unlock(self, false)) {
       // An exception has been raised.  Do not fall through.
       return false;
     }
@@ -796,6 +815,7 @@
       return;
     }
     // no-op;  there are no waiters to notify.
+    Inflate(self, obj);
   } else {
     // It's a fat lock.
     LW_MONITOR(thin)->Notify(self);
@@ -814,6 +834,7 @@
       return;
     }
     // no-op;  there are no waiters to notify.
+    Inflate(self, obj);
   } else {
     // It's a fat lock.
     LW_MONITOR(thin)->NotifyAll(self);
@@ -830,17 +851,17 @@
 }
 
 static uint32_t LockOwnerFromThreadLock(Object* thread_lock) {
-  ScopedJniThreadState ts(Thread::Current());
+  ScopedObjectAccess soa(Thread::Current());
   if (thread_lock == NULL ||
-      thread_lock->GetClass() != ts.Decode<Class*>(WellKnownClasses::java_lang_ThreadLock)) {
+      thread_lock->GetClass() != soa.Decode<Class*>(WellKnownClasses::java_lang_ThreadLock)) {
     return ThreadList::kInvalidId;
   }
-  Field* thread_field = ts.DecodeField(WellKnownClasses::java_lang_ThreadLock_thread);
+  Field* thread_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadLock_thread);
   Object* managed_thread = thread_field->GetObject(thread_lock);
   if (managed_thread == NULL) {
     return ThreadList::kInvalidId;
   }
-  Field* vmData_field = ts.DecodeField(WellKnownClasses::java_lang_Thread_vmData);
+  Field* vmData_field = soa.DecodeField(WellKnownClasses::java_lang_Thread_vmData);
   uintptr_t vmData = static_cast<uintptr_t>(vmData_field->GetInt(managed_thread));
   Thread* thread = reinterpret_cast<Thread*>(vmData);
   if (thread == NULL) {
@@ -850,13 +871,21 @@
 }
 
 void Monitor::DescribeWait(std::ostream& os, const Thread* thread) {
-  ThreadState state = thread->GetState();
+  ThreadState state;
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    state = thread->GetState();
+  }
 
   Object* object = NULL;
   uint32_t lock_owner = ThreadList::kInvalidId;
   if (state == kWaiting || state == kTimedWaiting) {
     os << "  - waiting on ";
-    Monitor* monitor = thread->wait_monitor_;
+    Monitor* monitor;
+    {
+      MutexLock mu(*thread->wait_mutex_);
+      monitor = thread->wait_monitor_;
+    }
     if (monitor != NULL) {
       object = monitor->obj_;
     }
@@ -883,7 +912,8 @@
   os << "\n";
 }
 
-static void DumpLockedObject(std::ostream& os, Object* o) {
+static void DumpLockedObject(std::ostream& os, Object* o)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   os << "  - locked <" << o << "> (a " << PrettyTypeOf(o) << ")\n";
 }
 
@@ -968,21 +998,21 @@
   line_number = mh.GetLineNumFromDexPC(dex_pc);
 }
 
-MonitorList::MonitorList() : lock_("MonitorList lock") {
+MonitorList::MonitorList() : monitor_list_lock_("MonitorList lock") {
 }
 
 MonitorList::~MonitorList() {
-  MutexLock mu(lock_);
+  MutexLock mu(monitor_list_lock_);
   STLDeleteElements(&list_);
 }
 
 void MonitorList::Add(Monitor* m) {
-  MutexLock mu(lock_);
+  MutexLock mu(monitor_list_lock_);
   list_.push_front(m);
 }
 
 void MonitorList::SweepMonitorList(Heap::IsMarkedTester is_marked, void* arg) {
-  MutexLock mu(lock_);
+  MutexLock mu(monitor_list_lock_);
   typedef std::list<Monitor*>::iterator It; // TODO: C++0x auto
   It it = list_.begin();
   while (it != list_.end()) {
diff --git a/src/monitor.h b/src/monitor.h
index d72ff73..b506b39 100644
--- a/src/monitor.h
+++ b/src/monitor.h
@@ -25,6 +25,7 @@
 
 #include "heap.h"
 #include "mutex.h"
+#include "thread.h"
 
 namespace art {
 
@@ -67,66 +68,96 @@
   static bool IsSensitiveThread();
   static void Init(uint32_t lock_profiling_threshold, bool (*is_sensitive_thread_hook)());
 
-  static uint32_t GetThinLockId(uint32_t raw_lock_word);
+  static uint32_t GetThinLockId(uint32_t raw_lock_word)
+      NO_THREAD_SAFETY_ANALYSIS;  // Reading lock owner without holding lock is racy.
 
-  static void MonitorEnter(Thread* thread, Object* obj);
-  static bool MonitorExit(Thread* thread, Object* obj);
+  static void MonitorEnter(Thread* thread, Object* obj)
+      EXCLUSIVE_LOCK_FUNCTION(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static bool MonitorExit(Thread* thread, Object* obj)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      UNLOCK_FUNCTION(monitor_lock_);
 
-  static void Notify(Thread* self, Object* obj);
-  static void NotifyAll(Thread* self, Object* obj);
-  static void Wait(Thread* self, Object* obj, int64_t ms, int32_t ns, bool interruptShouldThrow);
+  static void Notify(Thread* self, Object* obj)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void NotifyAll(Thread* self, Object* obj)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void Wait(Thread* self, Object* obj, int64_t ms, int32_t ns, bool interruptShouldThrow)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static void DescribeWait(std::ostream& os, const Thread* thread);
-  static void DescribeLocks(std::ostream& os, StackVisitor* stack_visitor);
+  static void DescribeWait(std::ostream& os, const Thread* thread)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void DescribeLocks(std::ostream& os, StackVisitor* stack_visitor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   Object* GetObject();
 
  private:
-  explicit Monitor(Object* obj);
+  explicit Monitor(Thread* owner, Object* obj)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void AppendToWaitSet(Thread* thread);
-  void RemoveFromWaitSet(Thread* thread);
+  void AppendToWaitSet(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_);
+  void RemoveFromWaitSet(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_);
 
-  static void Inflate(Thread* self, Object* obj);
+  static void Inflate(Thread* self, Object* obj)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent, const char* owner_filename, uint32_t owner_line_number);
+  void LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
+                          const char* owner_filename, uint32_t owner_line_number)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static void FailedUnlock(Object* obj, Thread* expected_owner, Thread* found_owner, Monitor* mon);
+  static void FailedUnlock(Object* obj, Thread* expected_owner, Thread* found_owner, Monitor* mon)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void Lock(Thread* self) NO_THREAD_SAFETY_ANALYSIS; // TODO: mark Object LOCKABLE.
-  bool Unlock(Thread* thread) NO_THREAD_SAFETY_ANALYSIS; // TODO: mark Object LOCKABLE.
+  void Lock(Thread* self) EXCLUSIVE_LOCK_FUNCTION(monitor_lock_);
+  bool Unlock(Thread* thread, bool for_wait) UNLOCK_FUNCTION(monitor_lock_);
 
-  void Notify(Thread* self);
-  void NotifyAll(Thread* self);
+  void Notify(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
+  void NotifyWithLock()
+      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void Wait(Thread* self, int64_t msec, int32_t nsec, bool interruptShouldThrow) NO_THREAD_SAFETY_ANALYSIS; // TODO: mark Object LOCKABLE.
+  void NotifyAll(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
+  void NotifyAllWithLock()
+      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+
+
+  void Wait(Thread* self, int64_t msec, int32_t nsec, bool interruptShouldThrow)
+      NO_THREAD_SAFETY_ANALYSIS;
+  void WaitWithLock(Thread* self, int64_t ms, int32_t ns, bool interruptShouldThrow)
+      EXCLUSIVE_LOCKS_REQUIRED(monitor_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Translates the provided method and pc into its declaring class' source file and line number.
   void TranslateLocation(const Method* method, uint32_t pc,
-                         const char*& source_file, uint32_t& line_number) const;
+                         const char*& source_file, uint32_t& line_number) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static bool (*is_sensitive_thread_hook_)();
   static uint32_t lock_profiling_threshold_;
 
+  Mutex monitor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
   // Which thread currently owns the lock?
   Thread* volatile owner_;
 
   // Owner's recursive lock depth.
-  int lock_count_;
+  int lock_count_ GUARDED_BY(monitor_lock_);
 
   // What object are we part of (for debugging).
   Object* const obj_;
 
   // Threads currently waiting on this monitor.
-  Thread* wait_set_;
-
-  Mutex lock_;
+  Thread* wait_set_ GUARDED_BY(monitor_lock_);
 
   // Method and dex pc where the lock owner acquired the lock, used when lock
   // sampling is enabled. locking_method_ may be null if the lock is currently
   // unlocked, or if the lock is acquired by the system when the stack is empty.
-  const Method* locking_method_;
-  uint32_t locking_dex_pc_;
+  const Method* locking_method_ GUARDED_BY(monitor_lock_);
+  uint32_t locking_dex_pc_ GUARDED_BY(monitor_lock_);
 
   friend class MonitorList;
   friend class Object;
@@ -140,11 +171,12 @@
 
   void Add(Monitor* m);
 
-  void SweepMonitorList(Heap::IsMarkedTester is_marked, void* arg);
+  void SweepMonitorList(Heap::IsMarkedTester is_marked, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_);
 
  private:
-  Mutex lock_;
-  std::list<Monitor*> list_;
+  Mutex monitor_list_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  std::list<Monitor*> list_ GUARDED_BY(monitor_list_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(MonitorList);
 };
diff --git a/src/monitor_android.cc b/src/monitor_android.cc
index 94f86e8..ce42ee7 100644
--- a/src/monitor_android.cc
+++ b/src/monitor_android.cc
@@ -50,7 +50,8 @@
   return dst + len;
 }
 
-void Monitor::LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent, const char* owner_filename, uint32_t owner_line_number) {
+void Monitor::LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
+                                 const char* owner_filename, uint32_t owner_line_number) {
   // Emit the event list length, 1 byte.
   char eventBuffer[174];
   char* cp = eventBuffer;
@@ -103,7 +104,8 @@
   cp = EventLogWriteInt(cp, sample_percent);
 
   CHECK_LE((size_t)(cp - eventBuffer), sizeof(eventBuffer));
-  android_btWriteLog(EVENT_LOG_TAG_dvm_lock_sample, EVENT_TYPE_LIST, eventBuffer, (size_t)(cp - eventBuffer));
+  android_btWriteLog(EVENT_LOG_TAG_dvm_lock_sample, EVENT_TYPE_LIST, eventBuffer,
+                     (size_t)(cp - eventBuffer));
 }
 
 }  // namespace art
diff --git a/src/mutex.cc b/src/mutex.cc
index c5551bd..182f6f0 100644
--- a/src/mutex.cc
+++ b/src/mutex.cc
@@ -37,44 +37,158 @@
 
 // This works on Mac OS 10.7, but hasn't been tested on older releases.
 struct __attribute__((__may_alias__)) darwin_pthread_mutex_t {
-  uint32_t padding0[2];
-  uint32_t value;
-  uint32_t padding1[5];
-  uint64_t owner_tid;
+  uint32_t padding0[4];
+  intptr_t padding1;
+  uintptr_t owner_tid;
+  // ...other stuff we don't care about.
+};
+
+struct __attribute__((__may_alias__)) darwin_pthread_rwlock_t {
+  int32_t padding0[4];
+  intptr_t padding1[2];
+  uintptr_t rw_owner_tid;
   // ...other stuff we don't care about.
 };
 
 struct __attribute__((__may_alias__)) glibc_pthread_mutex_t {
-  int lock;
-  unsigned int count;
+  int32_t padding0[2];
   int owner;
   // ...other stuff we don't care about.
 };
 
-static inline void CheckSafeToLockOrUnlock(MutexRank rank, bool is_locking) {
-  if (!kIsDebugBuild) {
-    return;
+struct __attribute__((__may_alias__)) glibc_pthread_rwlock_t {
+#ifdef __LP64__
+  int32_t padding0[6];
+#else
+  int32_t padding0[7];
+#endif
+  int writer;
+  // ...other stuff we don't care about.
+};
+
+ReaderWriterMutex* GlobalSynchronization::mutator_lock_ = NULL;
+Mutex* GlobalSynchronization::thread_list_lock_ = NULL;
+Mutex* GlobalSynchronization::classlinker_classes_lock_ = NULL;
+ReaderWriterMutex* GlobalSynchronization::heap_bitmap_lock_ = NULL;
+Mutex* GlobalSynchronization::abort_lock_ = NULL;
+Mutex* GlobalSynchronization::logging_lock_ = NULL;
+Mutex* GlobalSynchronization::unexpected_signal_lock_ = NULL;
+Mutex* GlobalSynchronization::thread_suspend_count_lock_ = NULL;
+
+void GlobalSynchronization::Init() {
+  if (logging_lock_ != NULL) {
+    // Already initialized.
+    DCHECK(mutator_lock_ != NULL);
+    DCHECK(thread_list_lock_ != NULL);
+    DCHECK(classlinker_classes_lock_ != NULL);
+    DCHECK(heap_bitmap_lock_ != NULL);
+    DCHECK(abort_lock_ != NULL);
+    DCHECK(logging_lock_ != NULL);
+    DCHECK(unexpected_signal_lock_ != NULL);
+    DCHECK(thread_suspend_count_lock_ != NULL);
+  } else {
+    logging_lock_ = new Mutex("logging lock", kLoggingLock, true);
+    abort_lock_ = new Mutex("abort lock", kAbortLock, true);
+    DCHECK(mutator_lock_ == NULL);
+    mutator_lock_ = new ReaderWriterMutex("mutator lock", kMutatorLock);
+    DCHECK(thread_list_lock_ == NULL);
+    thread_list_lock_ = new Mutex("thread list lock", kThreadListLock);
+    DCHECK(classlinker_classes_lock_ == NULL);
+    classlinker_classes_lock_ = new Mutex("ClassLinker classes lock", kClassLinkerClassesLock);
+    DCHECK(heap_bitmap_lock_ == NULL);
+    heap_bitmap_lock_ = new ReaderWriterMutex("heap bitmap lock", kHeapBitmapLock);
+    DCHECK(unexpected_signal_lock_ == NULL);
+    unexpected_signal_lock_ = new Mutex("unexpected signal lock", kUnexpectedSignalLock, true);
+    DCHECK(thread_suspend_count_lock_ == NULL);
+    thread_suspend_count_lock_ = new Mutex("thread suspend count lock", kThreadSuspendCountLock);
   }
-  if (rank == -1) {
-    return;
-  }
-  Thread::Current()->CheckSafeToLockOrUnlock(rank, is_locking);
 }
 
-static inline void CheckSafeToWait(MutexRank rank) {
-  if (!kIsDebugBuild) {
-    return;
-  }
-  Thread::Current()->CheckSafeToWait(rank);
+BaseMutex::BaseMutex(const char* name, MutexLevel level) : level_(level), name_(name) {}
+
+static void CheckUnattachedThread(MutexLevel level) {
+  // The check below enumerates the cases where we expect not to be able to sanity check locks
+  // on a thread. TODO: tighten this check.
+  Runtime* runtime = Runtime::Current();
+  CHECK(runtime == NULL || !runtime->IsStarted() || runtime->IsShuttingDown() ||
+        level == kDefaultMutexLevel  || level == kThreadListLock ||
+        level == kLoggingLock || level == kAbortLock);
 }
 
-Mutex::Mutex(const char* name, MutexRank rank) : name_(name), rank_(rank) {
-  // Like Java, we use recursive mutexes.
+void BaseMutex::RegisterAsLockedWithCurrentThread() {
+  Thread* self = Thread::Current();
+  if (self == NULL) {
+    CheckUnattachedThread(level_);
+    return;
+  }
+  // Check if a bad Mutex of this level or lower is held.
+  bool bad_mutexes_held = false;
+  for (int i = level_; i >= 0; --i) {
+    BaseMutex* held_mutex = self->GetHeldMutex(static_cast<MutexLevel>(i));
+    if (UNLIKELY(held_mutex != NULL)) {
+      LOG(ERROR) << "Lock level violation: holding \"" << held_mutex->name_ << "\" (level " << i
+          << ") while locking \"" << name_ << "\" (level " << static_cast<int>(level_) << ")";
+      if (i > kAbortLock) {
+        // Only abort in the check below if this is more than abort level lock.
+        bad_mutexes_held = true;
+      }
+    }
+  }
+  CHECK(!bad_mutexes_held);
+  // Don't record monitors as they are outside the scope of analysis. They may be inspected off of
+  // the monitor list.
+  if (level_ != kMonitorLock) {
+    self->SetHeldMutex(level_, this);
+  }
+}
+
+void BaseMutex::RegisterAsUnlockedWithCurrentThread() {
+  Thread* self = Thread::Current();
+  if (self == NULL) {
+    CheckUnattachedThread(level_);
+    return;
+  }
+  if (level_ != kMonitorLock) {
+    CHECK(self->GetHeldMutex(level_) == this) << "Unlocking on unacquired mutex: " << name_;
+    self->SetHeldMutex(level_, NULL);
+  }
+}
+
+void BaseMutex::CheckSafeToWait() {
+  Thread* self = Thread::Current();
+  if (self == NULL) {
+    CheckUnattachedThread(level_);
+    return;
+  }
+  CHECK(self->GetHeldMutex(level_) == this) << "Waiting on unacquired mutex: " << name_;
+  bool bad_mutexes_held = false;
+  for (int i = kMaxMutexLevel; i >= 0; --i) {
+    if (i != level_) {
+      BaseMutex* held_mutex = self->GetHeldMutex(static_cast<MutexLevel>(i));
+      if (held_mutex != NULL) {
+        LOG(ERROR) << "Holding " << held_mutex->name_ << " (level " << i
+            << ") while performing wait on: "
+            << name_ << " (level " << static_cast<int>(level_) << ")";
+        bad_mutexes_held = true;
+      }
+    }
+  }
+  CHECK(!bad_mutexes_held);
+}
+
+Mutex::Mutex(const char* name, MutexLevel level, bool recursive)
+    : BaseMutex(name, level), recursive_(recursive), recursion_count_(0) {
+#if defined(__BIONIC__)
+  // Use recursive mutexes as Bionic's non-recursive mutexes don't have TIDs to check lock
+  // ownership of.
   pthread_mutexattr_t attributes;
   CHECK_MUTEX_CALL(pthread_mutexattr_init, (&attributes));
   CHECK_MUTEX_CALL(pthread_mutexattr_settype, (&attributes, PTHREAD_MUTEX_RECURSIVE));
   CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, &attributes));
   CHECK_MUTEX_CALL(pthread_mutexattr_destroy, (&attributes));
+#else
+  CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, NULL));
+#endif
 }
 
 Mutex::~Mutex() {
@@ -89,55 +203,69 @@
   }
 }
 
-void Mutex::Lock() {
-  CheckSafeToLockOrUnlock(rank_, true);
-  CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
+void Mutex::ExclusiveLock() {
+  bool is_held = IsExclusiveHeld();
+  CHECK(recursive_ || !is_held)
+      << "Error attempt to recursively lock non-recursive lock \"" << name_ << "\"";
+  if (!is_held) {
+    CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
+    RegisterAsLockedWithCurrentThread();
+  }
+  recursion_count_++;
+  DCHECK(recursion_count_ == 1 || recursive_) << "Unexpected recursion count on mutex: "
+      << name_ << " " << recursion_count_;
   AssertHeld();
 }
 
-bool Mutex::TryLock() {
-  int result = pthread_mutex_trylock(&mutex_);
-  if (result == EBUSY) {
-    return false;
+bool Mutex::ExclusiveTryLock() {
+  bool is_held = IsExclusiveHeld();
+  CHECK(recursive_ || !is_held)
+      << "Error attempt to recursively lock non-recursive lock \"" << name_ << "\"";
+  if (!is_held) {
+    int result = pthread_mutex_trylock(&mutex_);
+    if (result == EBUSY) {
+      return false;
+    }
+    if (result != 0) {
+      errno = result;
+      PLOG(FATAL) << "pthread_mutex_trylock failed for " << name_;
+    }
+    RegisterAsLockedWithCurrentThread();
   }
-  if (result != 0) {
-    errno = result;
-    PLOG(FATAL) << "pthread_mutex_trylock failed for " << name_;
-  }
-  CheckSafeToLockOrUnlock(rank_, true);
+  recursion_count_++;
   AssertHeld();
   return true;
 }
 
-void Mutex::Unlock() {
+void Mutex::ExclusiveUnlock() {
   AssertHeld();
-  CheckSafeToLockOrUnlock(rank_, false);
-  CHECK_MUTEX_CALL(pthread_mutex_unlock, (&mutex_));
+  recursion_count_--;
+  if (!recursive_ || recursion_count_ == 0) {
+    DCHECK(recursion_count_ == 0 || recursive_) << "Unexpected recursion count on mutex: "
+        << name_ << " " << recursion_count_;
+    RegisterAsUnlockedWithCurrentThread();
+    CHECK_MUTEX_CALL(pthread_mutex_unlock, (&mutex_));
+  }
 }
 
-#if !defined(NDEBUG)
-#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED < 1060
-// Mac OS 10.5 didn't have anything we could implement GetTid() with. One thing we could try would
-// be using pthread_t instead of the actual tid; this would be acceptable in most places, and more
-// portable. 10.5 is already obsolete, though, so doing so would probably be all pain for no gain.
-void Mutex::AssertHeld() {}
-void Mutex::AssertNotHeld() {}
-#else
-void Mutex::AssertHeld() {
-  DCHECK_EQ(GetOwner(), static_cast<uint64_t>(GetTid()));
+bool Mutex::IsExclusiveHeld() const {
+  Thread* self = Thread::Current();
+  bool result;
+  if (self == NULL || level_ == kMonitorLock) {  // Handle unattached threads and monitors.
+    result = (GetExclusiveOwnerTid() == static_cast<uint64_t>(GetTid()));
+  } else {
+    result = (self->GetHeldMutex(level_) == this);
+    // Sanity debug check that if we think it is locked, so does the pthread.
+    DCHECK(result == (GetExclusiveOwnerTid() == static_cast<uint64_t>(GetTid())));
+  }
+  return result;
 }
 
-void Mutex::AssertNotHeld() {
-  DCHECK_NE(GetOwner(), static_cast<uint64_t>(GetTid()));
-}
-#endif
-#endif
-
-uint64_t Mutex::GetOwner() {
+uint64_t Mutex::GetExclusiveOwnerTid() const {
 #if defined(__BIONIC__)
   return static_cast<uint64_t>((mutex_.value >> 16) & 0xffff);
 #elif defined(__GLIBC__)
-  return reinterpret_cast<glibc_pthread_mutex_t*>(&mutex_)->owner;
+  return reinterpret_cast<const glibc_pthread_mutex_t*>(&mutex_)->owner;
 #elif defined(__APPLE__)
   return reinterpret_cast<darwin_pthread_mutex_t*>(&mutex_)->owner_tid;
 #else
@@ -145,24 +273,104 @@
 #endif
 }
 
-uint32_t Mutex::GetDepth() {
-  bool held = (GetOwner() == static_cast<uint64_t>(GetTid()));
-  if (!held) {
-    return 0;
+ReaderWriterMutex::ReaderWriterMutex(const char* name, MutexLevel level) : BaseMutex(name, level) {
+  CHECK_MUTEX_CALL(pthread_rwlock_init, (&rwlock_, NULL));
+}
+
+ReaderWriterMutex::~ReaderWriterMutex() {
+  // We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
+  // may still be using locks.
+  int rc = pthread_rwlock_destroy(&rwlock_);
+  if (rc != 0) {
+    errno = rc;
+    // TODO: should we just not log at all if shutting down? this could be the logging mutex!
+    bool shutting_down = Runtime::Current()->IsShuttingDown();
+    PLOG(shutting_down ? WARNING : FATAL) << "pthread_mutex_destroy failed for " << name_;
   }
-  uint32_t depth;
+}
+
+void ReaderWriterMutex::ExclusiveLock() {
+  AssertNotExclusiveHeld();
+  CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
+  RegisterAsLockedWithCurrentThread();
+  AssertExclusiveHeld();
+}
+
+void ReaderWriterMutex::ExclusiveUnlock() {
+  AssertExclusiveHeld();
+  RegisterAsUnlockedWithCurrentThread();
+  CHECK_MUTEX_CALL(pthread_rwlock_unlock, (&rwlock_));
+}
+
+bool ReaderWriterMutex::ExclusiveLockWithTimeout(const timespec& abs_timeout) {
+  int result = pthread_rwlock_timedwrlock(&rwlock_, &abs_timeout);
+  if (result == ETIMEDOUT) {
+    return false;
+  }
+  if (result != 0) {
+    errno = result;
+    PLOG(FATAL) << "pthread_mutex_trylock failed for " << name_;
+  }
+  RegisterAsLockedWithCurrentThread();
+  AssertSharedHeld();
+  return true;
+}
+
+void ReaderWriterMutex::SharedLock() {
+  CHECK_MUTEX_CALL(pthread_rwlock_rdlock, (&rwlock_));
+  RegisterAsLockedWithCurrentThread();
+  AssertSharedHeld();
+}
+
+bool ReaderWriterMutex::SharedTryLock() {
+  int result = pthread_rwlock_tryrdlock(&rwlock_);
+  if (result == EBUSY) {
+    return false;
+  }
+  if (result != 0) {
+    errno = result;
+    PLOG(FATAL) << "pthread_mutex_trylock failed for " << name_;
+  }
+  RegisterAsLockedWithCurrentThread();
+  AssertSharedHeld();
+  return true;
+}
+
+void ReaderWriterMutex::SharedUnlock() {
+  AssertSharedHeld();
+  RegisterAsUnlockedWithCurrentThread();
+  CHECK_MUTEX_CALL(pthread_rwlock_unlock, (&rwlock_));
+}
+
+bool ReaderWriterMutex::IsExclusiveHeld() const {
+  bool result = (GetExclusiveOwnerTid() == static_cast<uint64_t>(GetTid()));
+  // Sanity that if the pthread thinks we own the lock the Thread agrees.
+  Thread* self = Thread::Current();
+  DCHECK((self == NULL) || !result || (self->GetHeldMutex(level_) == this));
+  return result;
+}
+
+bool ReaderWriterMutex::IsSharedHeld() const {
+  Thread* self = Thread::Current();
+  bool result;
+  if (UNLIKELY(self == NULL)) {  // Handle unattached threads.
+    result = IsExclusiveHeld(); // TODO: a better best effort here.
+  } else {
+    result = (self->GetHeldMutex(level_) == this);
+  }
+  return result;
+}
+
+uint64_t ReaderWriterMutex::GetExclusiveOwnerTid() const {
 #if defined(__BIONIC__)
-  depth = static_cast<uint32_t>((mutex_.value >> 2) & 0x7ff) + 1;
+  return rwlock_.writerThreadId;
 #elif defined(__GLIBC__)
-  depth = reinterpret_cast<glibc_pthread_mutex_t*>(&mutex_)->count;
+  return reinterpret_cast<const glibc_pthread_rwlock_t*>(&rwlock_)->writer;
 #elif defined(__APPLE__)
-  darwin_pthread_mutex_t* darwin_mutex = reinterpret_cast<darwin_pthread_mutex_t*>(&mutex_);
-  depth = ((darwin_mutex->value >> 16) & 0xffff);
+  return reinterpret_cast<const darwin_pthread_rwlock_t*>(&rwlock_)->rw_owner_tid;
 #else
 #error unsupported C library
 #endif
-  CHECK_NE(depth, 0U) << "owner=" << GetOwner() << " tid=" << GetTid();
-  return depth;
 }
 
 ConditionVariable::ConditionVariable(const std::string& name) : name_(name) {
@@ -189,10 +397,11 @@
 }
 
 void ConditionVariable::Wait(Mutex& mutex) {
-  CheckSafeToWait(mutex.rank_);
-  uint unlock_depth = UnlockBeforeWait(mutex);
+  mutex.CheckSafeToWait();
+  unsigned int old_recursion_count = mutex.recursion_count_;
+  mutex.recursion_count_ = 0;
   CHECK_MUTEX_CALL(pthread_cond_wait, (&cond_, &mutex.mutex_));
-  RelockAfterWait(mutex, unlock_depth);
+  mutex.recursion_count_ = old_recursion_count;
 }
 
 void ConditionVariable::TimedWait(Mutex& mutex, const timespec& ts) {
@@ -201,32 +410,15 @@
 #else
 #define TIMEDWAIT pthread_cond_timedwait
 #endif
-  CheckSafeToWait(mutex.rank_);
-  uint unlock_depth = UnlockBeforeWait(mutex);
+  mutex.CheckSafeToWait();
+  unsigned int old_recursion_count = mutex.recursion_count_;
+  mutex.recursion_count_ = 0;
   int rc = TIMEDWAIT(&cond_, &mutex.mutex_, &ts);
-  RelockAfterWait(mutex, unlock_depth);
+  mutex.recursion_count_ = old_recursion_count;
   if (rc != 0 && rc != ETIMEDOUT) {
     errno = rc;
     PLOG(FATAL) << "TimedWait failed for " << name_;
   }
 }
 
-// Unlock a mutex down to depth == 1 so pthread conditional waiting can be used.
-// After waiting, use RelockAfterWait to restore the lock depth.
-uint32_t ConditionVariable::UnlockBeforeWait(Mutex& mutex) {
-  uint32_t unlock_count = 0;
-  CHECK_GT(mutex.GetDepth(), 0U);
-  while (mutex.GetDepth() != 1) {
-    mutex.Unlock();
-    unlock_count++;
-  }
-  return unlock_count;
-}
-
-void ConditionVariable::RelockAfterWait(Mutex& mutex, uint32_t unlock_count) {
-  for (uint32_t i = 0; i < unlock_count; i++) {
-    mutex.Lock();
-  }
-}
-
 }  // namespace art
diff --git a/src/mutex.h b/src/mutex.h
index 4c5d537..4899382 100644
--- a/src/mutex.h
+++ b/src/mutex.h
@@ -23,71 +23,316 @@
 #include <iosfwd>
 #include <string>
 
+#include "globals.h"
 #include "gtest/gtest.h"
 #include "logging.h"
 #include "macros.h"
 
 namespace art {
 
-enum MutexRank {
-  kNoMutexRank = -1,
-  kHeapLock = 0,
-  kThreadListLock = 1,
-  kThreadSuspendCountLock = 2,
-  kMaxMutexRank = kThreadSuspendCountLock,
-};
-std::ostream& operator<<(std::ostream& os, const MutexRank& rhs);
+class LOCKABLE Mutex;
+class LOCKABLE ReaderWriterMutex;
 
-class LOCKABLE Mutex {
+// MutexLevel is used to impose a lock hierarchy [1] where acquisition of a Mutex at a higher or
+// equal level to a lock a thread holds is invalid. The lock hierarchy achieves a cycle free
+// partial ordering and thereby cause deadlock situations to fail checks.
+//
+// [1] http://www.drdobbs.com/parallel/use-lock-hierarchies-to-avoid-deadlock/204801163
+enum MutexLevel {
+  kLoggingLock = 0,
+  kUnexpectedSignalLock = 1,
+  kThreadSuspendCountLock = 2,
+  kAbortLock = 3,
+  kDefaultMutexLevel = 4,
+  kLoadLibraryLock = 5,
+  kClassLinkerClassesLock = 6,
+  kThreadListLock = 7,
+  kHeapBitmapLock = 8,
+  kZygoteCreationLock = 9,
+  kMonitorLock = 10,
+  kMutatorLock = 11,
+  kMaxMutexLevel = kMutatorLock,
+};
+std::ostream& operator<<(std::ostream& os, const MutexLevel& rhs);
+
+// Global mutexes corresponding to the levels above.
+class GlobalSynchronization {
  public:
-  explicit Mutex(const char* name, MutexRank rank = kNoMutexRank);
+  static void Init();
+
+  // The mutator_lock_ is used to allow mutators to execute in a shared (reader) mode or to block
+  // mutators by having an exclusive (writer) owner. In normal execution each mutator thread holds
+  // a share on the mutator_lock_. The garbage collector may also execute with shared access but
+  // at times requires exclusive access to the heap (not to be confused with the heap meta-data
+  // guarded by the heap_lock_ below). When the garbage collector requires exclusive access it asks
+  // the mutators to suspend themselves which also involves usage of the thread_suspend_count_lock_
+  // to cover weaknesses in using ReaderWriterMutexes with ConditionVariables. We use a condition
+  // variable to wait upon in the suspension logic as releasing and then re-acquiring a share on
+  // the mutator lock doesn't necessarily allow the exclusive user (e.g the garbage collector)
+  // chance to acquire the lock.
+  //
+  // Thread suspension:
+  // Shared users                                  | Exclusive user
+  // (holding mutator lock and in kRunnable state) |   .. running ..
+  //   .. running ..                               | Request thread suspension by:
+  //   .. running ..                               |   - acquiring thread_suspend_count_lock_
+  //   .. running ..                               |   - incrementing Thread::suspend_count_ on
+  //   .. running ..                               |     all mutator threads
+  //   .. running ..                               |   - releasing thread_suspend_count_lock_
+  //   .. running ..                               | Block trying to acquire exclusive mutator lock
+  // Poll Thread::suspend_count_ and enter full    |   .. blocked ..
+  // suspend code.                                 |   .. blocked ..
+  // Change state to kSuspended                    |   .. blocked ..
+  // x: Release share on mutator_lock_             | Carry out exclusive access
+  // Acquire thread_suspend_count_lock_            |   .. exclusive ..
+  // while Thread::suspend_count_ > 0              |   .. exclusive ..
+  //   - wait on Thread::resume_cond_              |   .. exclusive ..
+  //     (releases thread_suspend_count_lock_)     |   .. exclusive ..
+  //   .. waiting ..                               | Release mutator_lock_
+  //   .. waiting ..                               | Request thread resumption by:
+  //   .. waiting ..                               |   - acquiring thread_suspend_count_lock_
+  //   .. waiting ..                               |   - decrementing Thread::suspend_count_ on
+  //   .. waiting ..                               |     all mutator threads
+  //   .. waiting ..                               |   - notifying on Thread::resume_cond_
+  //    - re-acquire thread_suspend_count_lock_    |   - releasing thread_suspend_count_lock_
+  // Release thread_suspend_count_lock_            |  .. running ..
+  // Acquire share on mutator_lock_                |  .. running ..
+  //  - This could block but the thread still      |  .. running ..
+  //    has a state of kSuspended and so this      |  .. running ..
+  //    isn't an issue.                            |  .. running ..
+  // Acquire thread_suspend_count_lock_            |  .. running ..
+  //  - we poll here as we're transitioning into   |  .. running ..
+  //    kRunnable and an individual thread suspend |  .. running ..
+  //    request (e.g for debugging) won't try      |  .. running ..
+  //    to acquire the mutator lock (which would   |  .. running ..
+  //    block as we hold the mutator lock). This   |  .. running ..
+  //    poll ensures that if the suspender thought |  .. running ..
+  //    we were suspended by incrementing our      |  .. running ..
+  //    Thread::suspend_count_ and then reading    |  .. running ..
+  //    our state we go back to waiting on         |  .. running ..
+  //    Thread::resume_cond_.                      |  .. running ..
+  // can_go_runnable = Thread::suspend_count_ == 0 |  .. running ..
+  // Release thread_suspend_count_lock_            |  .. running ..
+  // if can_go_runnable                            |  .. running ..
+  //   Change state to kRunnable                   |  .. running ..
+  // else                                          |  .. running ..
+  //   Goto x                                      |  .. running ..
+  //  .. running ..                                |  .. running ..
+  static ReaderWriterMutex* mutator_lock_;
+
+  // Allow reader-writer mutual exclusion on the mark and live bitmaps of the heap.
+  static ReaderWriterMutex* heap_bitmap_lock_ ACQUIRED_AFTER(mutator_lock_);
+
+  // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
+  // attaching and detaching.
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
+
+  // Guards lists of classes within the class linker.
+  static Mutex* classlinker_classes_lock_ ACQUIRED_AFTER(thread_list_lock_);
+
+  // When declaring any Mutex add DEFAULT_MUTEX_ACQUIRED_AFTER to use annotalysis to check the code
+  // doesn't try to hold a higher level Mutex.
+  #define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(classlinker_classes_lock_)
+
+  // Have an exclusive aborting thread.
+  static Mutex* abort_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+
+  // Allow mutual exclusion when manipulating Thread::suspend_count_.
+  // TODO: Does the trade-off of a per-thread lock make sense?
+  static Mutex* thread_suspend_count_lock_ ACQUIRED_AFTER(abort_lock_);
+
+  // One unexpected signal at a time lock.
+  static Mutex* unexpected_signal_lock_ ACQUIRED_AFTER(thread_suspend_count_lock_);
+
+  // Have an exclusive logging thread.
+  static Mutex* logging_lock_ ACQUIRED_AFTER(unexpected_signal_lock_);
+};
+
+// Base class for all Mutex implementations
+class BaseMutex {
+ public:
+  const std::string& GetName() const {
+    return name_;
+  }
+
+  virtual bool IsMutex() const { return false; }
+  virtual bool IsReaderWriterMutex() const { return false; }
+
+ protected:
+  friend class ConditionVariable;
+
+  BaseMutex(const char* name, MutexLevel level);
+  virtual ~BaseMutex() {}
+  void RegisterAsLockedWithCurrentThread();
+  void RegisterAsUnlockedWithCurrentThread();
+  void CheckSafeToWait();
+
+  const MutexLevel level_;  // Support for lock hierarchy.
+  const std::string name_;
+};
+
+// A Mutex is used to achieve mutual exclusion between threads. A Mutex can be used to gain
+// exclusive access to what it guards. A Mutex can be in one of two states:
+// - Free - not owned by any thread,
+// - Exclusive - owned by a single thread.
+//
+// The effect of locking and unlocking operations on the state is:
+// State     | ExclusiveLock | ExclusiveUnlock
+// -------------------------------------------
+// Free      | Exclusive     | error
+// Exclusive | Block*        | Free
+// * Mutex is not reentrant and so an attempt to ExclusiveLock on the same thread will result in
+//   an error. Being non-reentrant simplifies Waiting on ConditionVariables.
+class LOCKABLE Mutex : public BaseMutex {
+ public:
+  explicit Mutex(const char* name, MutexLevel level = kDefaultMutexLevel, bool recursive = false);
   ~Mutex();
 
-  void Lock() EXCLUSIVE_LOCK_FUNCTION();
+  virtual bool IsMutex() const { return true; }
 
-  bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true);
+  // Block until mutex is free then acquire exclusive access.
+  void ExclusiveLock() EXCLUSIVE_LOCK_FUNCTION();
+  void Lock() EXCLUSIVE_LOCK_FUNCTION() {  ExclusiveLock(); }
 
-  void Unlock() UNLOCK_FUNCTION();
+  // Returns true if acquires exclusive access, false otherwise.
+  bool ExclusiveTryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true);
+  bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true) { return ExclusiveTryLock(); }
 
-#if !defined(NDEBUG)
-  void AssertHeld();
-  void AssertNotHeld();
-#else
-  void AssertHeld() {}
-  void AssertNotHeld() {}
-#endif
+  // Release exclusive access.
+  void ExclusiveUnlock() UNLOCK_FUNCTION();
+  void Unlock() UNLOCK_FUNCTION() {  ExclusiveUnlock(); }
 
-  uint64_t GetOwner();
+  // Is the current thread the exclusive holder of the Mutex.
+  bool IsExclusiveHeld() const;
+
+  // Assert that the Mutex is exclusively held by the current thread.
+  void AssertExclusiveHeld() {
+    if (kIsDebugBuild) {
+      CHECK(IsExclusiveHeld());
+    }
+  }
+  void AssertHeld() { AssertExclusiveHeld(); }
+
+  // Assert that the Mutex is not held by the current thread.
+  void AssertNotHeldExclusive() {
+    if (kIsDebugBuild) {
+      CHECK(!IsExclusiveHeld());
+    }
+  }
+  void AssertNotHeld() { AssertNotHeldExclusive(); }
+
+  // Id associated with exclusive owner.
+  uint64_t GetExclusiveOwnerTid() const;
+
+  // Returns how many times this Mutex has been locked, it is better to use AssertHeld/NotHeld.
+  unsigned int GetDepth() const {
+    return recursion_count_;
+  }
 
  private:
-  uint32_t GetDepth();
-
   pthread_mutex_t mutex_;
-  const std::string name_;
-  const MutexRank rank_;
-
+  const bool recursive_;  // Can the lock be recursively held?
+  unsigned int recursion_count_;
   friend class ConditionVariable;
   friend class MutexTester;
   DISALLOW_COPY_AND_ASSIGN(Mutex);
 };
 
-class SCOPED_LOCKABLE MutexLock {
+// A ReaderWriterMutex is used to achieve mutual exclusion between threads, similar to a Mutex.
+// Unlike a Mutex a ReaderWriterMutex can be used to gain exclusive (writer) or shared (reader)
+// access to what it guards. A flaw in relation to a Mutex is that it cannot be used with a
+// condition variable. A ReaderWriterMutex can be in one of three states:
+// - Free - not owned by any thread,
+// - Exclusive - owned by a single thread,
+// - Shared(n) - shared amongst n threads.
+//
+// The effect of locking and unlocking operations on the state is:
+//
+// State     | ExclusiveLock | ExclusiveUnlock | SharedLock       | SharedUnlock
+// ----------------------------------------------------------------------------
+// Free      | Exclusive     | error           | SharedLock(1)    | error
+// Exclusive | Block         | Free            | Block            | error
+// Shared(n) | Block         | error           | SharedLock(n+1)* | Shared(n-1) or Free
+// * for large values of n the SharedLock may block.
+class LOCKABLE ReaderWriterMutex : public BaseMutex {
  public:
-  explicit MutexLock(Mutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(mu) {
-    mu_.Lock();
+  explicit ReaderWriterMutex(const char* name, MutexLevel level = kDefaultMutexLevel);
+  ~ReaderWriterMutex();
+
+  virtual bool IsReaderWriterMutex() const { return true; }
+
+  // Block until ReaderWriterMutex is free then acquire exclusive access.
+  void ExclusiveLock() EXCLUSIVE_LOCK_FUNCTION();
+  void WriterLock() EXCLUSIVE_LOCK_FUNCTION() {  ExclusiveLock(); }
+
+  // Release exclusive access.
+  void ExclusiveUnlock() UNLOCK_FUNCTION();
+  void WriterUnlock() UNLOCK_FUNCTION() {  ExclusiveUnlock(); }
+
+  // Block until ReaderWriterMutex is free and acquire exclusive access. Returns true on success
+  // or false if timeout is reached.
+  bool ExclusiveLockWithTimeout(const timespec& abs_timeout) EXCLUSIVE_TRYLOCK_FUNCTION(true);
+
+  // Block until ReaderWriterMutex is shared or free then acquire a share on the access.
+  void SharedLock() SHARED_LOCK_FUNCTION();
+  void ReaderLock() SHARED_LOCK_FUNCTION() { SharedLock(); }
+
+  // Try to acquire share of ReaderWriterMutex.
+  bool SharedTryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true);
+
+  // Release a share of the access.
+  void SharedUnlock() UNLOCK_FUNCTION();
+  void ReaderUnlock() UNLOCK_FUNCTION() { SharedUnlock(); }
+
+  // Is the current thread the exclusive holder of the ReaderWriterMutex.
+  bool IsExclusiveHeld() const;
+
+  // Assert the current thread has exclusive access to the ReaderWriterMutex.
+  void AssertExclusiveHeld() {
+    if (kIsDebugBuild) {
+      CHECK(IsExclusiveHeld());
+    }
+  }
+  void AssertWriterHeld() { AssertExclusiveHeld(); }
+
+  // Assert the current thread doesn't have exclusive access to the ReaderWriterMutex.
+  void AssertNotExclusiveHeld() {
+    if (kIsDebugBuild) {
+      CHECK(!IsExclusiveHeld());
+    }
+  }
+  void AssertNotWriterHeld() { AssertNotExclusiveHeld(); }
+
+  // Is the current thread a shared holder of the ReaderWriterMutex.
+  bool IsSharedHeld() const;
+
+  // Assert the current thread has shared access to the ReaderWriterMutex.
+  void AssertSharedHeld() {
+    if (kIsDebugBuild) {
+      CHECK(IsSharedHeld());
+    }
+  }
+  void AssertReaderHeld() { AssertSharedHeld(); }
+
+  // Assert the current thread doesn't hold this ReaderWriterMutex either in shared or exclusive
+  // mode.
+  void AssertNotHeld() {
+    if (kIsDebugBuild) {
+      CHECK(!IsSharedHeld());
+    }
   }
 
-  ~MutexLock() UNLOCK_FUNCTION() {
-    mu_.Unlock();
-  }
-
+  // Id associated with exclusive owner.
+  uint64_t GetExclusiveOwnerTid() const;
  private:
-  Mutex& mu_;
-  DISALLOW_COPY_AND_ASSIGN(MutexLock);
-};
-// Catch bug where variable name is omitted. "MutexLock (lock);" instead of "MutexLock mu(lock)".
-#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_declaration_missing_variable_name)
+  pthread_rwlock_t rwlock_;
 
+  friend class MutexTester;
+  DISALLOW_COPY_AND_ASSIGN(ReaderWriterMutex);
+};
+
+// ConditionVariables allow threads to queue and sleep. Threads may then be resumed individually
+// (Signal) or all at once (Broadcast).
 class ConditionVariable {
  public:
   explicit ConditionVariable(const std::string& name);
@@ -99,14 +344,91 @@
   void TimedWait(Mutex& mutex, const timespec& ts);
 
  private:
-  uint32_t UnlockBeforeWait(Mutex& mutex) NO_THREAD_SAFETY_ANALYSIS;
-  void RelockAfterWait(Mutex& mutex, uint32_t unlock_count) NO_THREAD_SAFETY_ANALYSIS;
-
   pthread_cond_t cond_;
   std::string name_;
   DISALLOW_COPY_AND_ASSIGN(ConditionVariable);
 };
 
+// Scoped locker/unlocker for a regular Mutex that acquires mu upon construction and releases it
+// upon destruction.
+class SCOPED_LOCKABLE MutexLock {
+ public:
+  explicit MutexLock(Mutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(mu) {
+    mu_.ExclusiveLock();
+  }
+
+  ~MutexLock() UNLOCK_FUNCTION() {
+    mu_.ExclusiveUnlock();
+  }
+
+ private:
+  Mutex& mu_;
+  DISALLOW_COPY_AND_ASSIGN(MutexLock);
+};
+// Catch bug where variable name is omitted. "MutexLock (lock);" instead of "MutexLock mu(lock)".
+#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_declaration_missing_variable_name)
+
+// Scoped locker/unlocker for a ReaderWriterMutex that acquires read access to mu upon
+// construction and releases it upon destruction.
+class SCOPED_LOCKABLE ReaderMutexLock {
+ public:
+  explicit ReaderMutexLock(ReaderWriterMutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(mu) {
+    mu_.SharedLock();
+  }
+
+  ~ReaderMutexLock() UNLOCK_FUNCTION() {
+    mu_.SharedUnlock();
+  }
+
+ private:
+  ReaderWriterMutex& mu_;
+  DISALLOW_COPY_AND_ASSIGN(ReaderMutexLock);
+};
+// Catch bug where variable name is omitted. "ReaderMutexLock (lock);" instead of
+// "ReaderMutexLock mu(lock)".
+#define ReaderMutexLock(x) COMPILE_ASSERT(0, reader_mutex_lock_declaration_missing_variable_name)
+
+// Scoped locker/unlocker for a ReaderWriterMutex that acquires write access to mu upon
+// construction and releases it upon destruction.
+class SCOPED_LOCKABLE WriterMutexLock {
+ public:
+  explicit WriterMutexLock(ReaderWriterMutex& mu) EXCLUSIVE_LOCK_FUNCTION(mu) : mu_(mu) {
+    mu_.ExclusiveLock();
+  }
+
+  ~WriterMutexLock() UNLOCK_FUNCTION() {
+    mu_.ExclusiveUnlock();
+  }
+
+ private:
+  ReaderWriterMutex& mu_;
+  DISALLOW_COPY_AND_ASSIGN(WriterMutexLock);
+};
+// Catch bug where variable name is omitted. "WriterMutexLock (lock);" instead of
+// "WriterMutexLock mu(lock)".
+#define WriterMutexLock(x) COMPILE_ASSERT(0, writer_mutex_lock_declaration_missing_variable_name)
+
+// Scoped unlocker/locker for a ReaderWriterMutex that releases read access to mu upon
+// construction and acquires it again upon destruction.
+class ReaderMutexUnlock {
+ public:
+  explicit ReaderMutexUnlock(ReaderWriterMutex& mu) UNLOCK_FUNCTION(mu) : mu_(mu) {
+    mu_.SharedUnlock();
+  }
+
+  ~ReaderMutexUnlock() SHARED_LOCK_FUNCTION(mu_) {
+    mu_.SharedLock();
+  }
+
+ private:
+  ReaderWriterMutex& mu_;
+  DISALLOW_COPY_AND_ASSIGN(ReaderMutexUnlock);
+};
+// Catch bug where variable name is omitted. "ReaderMutexUnlock (lock);" instead of
+// "ReaderMutexUnlock mu(lock)".
+#define ReaderMutexUnlock(x) \
+    COMPILE_ASSERT(0, reader_mutex_unlock_declaration_missing_variable_name)
+
 }  // namespace art
 
 #endif  // ART_SRC_MUTEX_H_
diff --git a/src/mutex_test.cc b/src/mutex_test.cc
index 69507d1..8a40cd6 100644
--- a/src/mutex_test.cc
+++ b/src/mutex_test.cc
@@ -60,7 +60,7 @@
 
 // GCC has trouble with our mutex tests, so we have to turn off thread safety analysis.
 static void RecursiveLockUnlockTest() NO_THREAD_SAFETY_ANALYSIS {
-  Mutex mu("test mutex");
+  Mutex mu("test mutex", kDefaultMutexLevel, true);
   MutexTester::AssertDepth(mu, 0U);
   mu.Lock();
   MutexTester::AssertDepth(mu, 1U);
@@ -78,7 +78,7 @@
 
 // GCC has trouble with our mutex tests, so we have to turn off thread safety analysis.
 static void RecursiveTryLockUnlockTest() NO_THREAD_SAFETY_ANALYSIS {
-  Mutex mu("test mutex");
+  Mutex mu("test mutex", kDefaultMutexLevel, true);
   MutexTester::AssertDepth(mu, 0U);
   ASSERT_TRUE(mu.TryLock());
   MutexTester::AssertDepth(mu, 1U);
@@ -96,7 +96,9 @@
 
 
 struct RecursiveLockWait {
-  explicit RecursiveLockWait() : mu("test mutex"), cv("test condition variable") {}
+  explicit RecursiveLockWait()
+      : mu("test mutex", kDefaultMutexLevel, true), cv("test condition variable") {
+  }
 
   static void* Callback(void* arg) {
     RecursiveLockWait* state = reinterpret_cast<RecursiveLockWait*>(arg);
@@ -132,4 +134,38 @@
   RecursiveLockWaitTest();
 }
 
+TEST_F(MutexTest, SharedLockUnlock) {
+  ReaderWriterMutex mu("test rwmutex");
+  mu.AssertNotHeld();
+  mu.SharedLock();
+  mu.AssertSharedHeld();
+  mu.AssertNotExclusiveHeld();
+  mu.SharedUnlock();
+  mu.AssertNotHeld();
+}
+
+TEST_F(MutexTest, ExclusiveLockUnlock) {
+  ReaderWriterMutex mu("test rwmutex");
+  mu.AssertNotHeld();
+  mu.ExclusiveLock();
+  mu.AssertSharedHeld();
+  mu.AssertExclusiveHeld();
+  mu.ExclusiveUnlock();
+  mu.AssertNotHeld();
+}
+
+// GCC has trouble with our mutex tests, so we have to turn off thread safety analysis.
+static void SharedTryLockUnlockTest() NO_THREAD_SAFETY_ANALYSIS {
+  ReaderWriterMutex mu("test rwmutex");
+  mu.AssertNotHeld();
+  ASSERT_TRUE(mu.SharedTryLock());
+  mu.AssertSharedHeld();
+  mu.SharedUnlock();
+  mu.AssertNotHeld();
+}
+
+TEST_F(MutexTest, SharedTryLockUnlock) {
+  SharedTryLockUnlockTest();
+}
+
 }  // namespace art
diff --git a/src/native/dalvik_system_DexFile.cc b/src/native/dalvik_system_DexFile.cc
index 3e749e5..30f411c 100644
--- a/src/native/dalvik_system_DexFile.cc
+++ b/src/native/dalvik_system_DexFile.cc
@@ -24,7 +24,7 @@
 #include "logging.h"
 #include "os.h"
 #include "runtime.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "space.h"
@@ -89,12 +89,14 @@
   if (env->ExceptionCheck()) {
     return 0;
   }
+  ScopedObjectAccess soa(env);
   const DexFile* dex_file;
   if (outputName.c_str() == NULL) {
     dex_file = Runtime::Current()->GetClassLinker()->FindDexFileInOatFileFromDexLocation(source);
   } else {
     std::string output(outputName.c_str());
-    dex_file = Runtime::Current()->GetClassLinker()->FindOrCreateOatFileForDexLocation(source, output);
+    dex_file =
+        Runtime::Current()->GetClassLinker()->FindOrCreateOatFileForDexLocation(source, output);
   }
   if (dex_file == NULL) {
     LOG(WARNING) << "Failed to open dex file: " << source;
@@ -105,7 +107,8 @@
   return static_cast<jint>(reinterpret_cast<uintptr_t>(dex_file));
 }
 
-static const DexFile* toDexFile(int dex_file_address) {
+static const DexFile* toDexFile(int dex_file_address)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   const DexFile* dex_file = reinterpret_cast<const DexFile*>(static_cast<uintptr_t>(dex_file_address));
   if (dex_file == NULL) {
     Thread::Current()->ThrowNewExceptionF("Ljava/lang/NullPointerException;", "dex_file == null");
@@ -113,8 +116,12 @@
   return dex_file;
 }
 
-static void DexFile_closeDexFile(JNIEnv*, jclass, jint cookie) {
-  const DexFile* dex_file = toDexFile(cookie);
+static void DexFile_closeDexFile(JNIEnv* env, jclass, jint cookie) {
+  const DexFile* dex_file;
+  {
+    ScopedObjectAccess soa(env);
+    dex_file = toDexFile(cookie);
+  }
   if (dex_file == NULL) {
     return;
   }
@@ -126,7 +133,7 @@
 
 static jclass DexFile_defineClassNative(JNIEnv* env, jclass, jstring javaName, jobject javaLoader,
                                         jint cookie) {
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
   const DexFile* dex_file = toDexFile(cookie);
   if (dex_file == NULL) {
     return NULL;
@@ -142,14 +149,18 @@
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   class_linker->RegisterDexFile(*dex_file);
-  Object* class_loader_object = ts.Decode<Object*>(javaLoader);
+  Object* class_loader_object = soa.Decode<Object*>(javaLoader);
   ClassLoader* class_loader = down_cast<ClassLoader*>(class_loader_object);
   Class* result = class_linker->DefineClass(descriptor, class_loader, *dex_file, *dex_class_def);
-  return ts.AddLocalReference<jclass>(result);
+  return soa.AddLocalReference<jclass>(result);
 }
 
 static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jint cookie) {
-  const DexFile* dex_file = toDexFile(cookie);
+  const DexFile* dex_file;
+  {
+    ScopedObjectAccess soa(env);
+    dex_file = toDexFile(cookie);
+  }
   if (dex_file == NULL) {
     return NULL;
   }
@@ -174,6 +185,7 @@
 
   if (!OS::FileExists(filename.c_str())) {
     LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename.c_str() << "' does not exist";
+    ScopedObjectAccess soa(env);
     Thread::Current()->ThrowNewExceptionF("Ljava/io/FileNotFoundException;", "%s", filename.c_str());
     return JNI_TRUE;
   }
@@ -205,6 +217,7 @@
       }
       return JNI_FALSE;
     }
+    ScopedObjectAccess soa(env);
     if (ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename.c_str(), location_checksum)) {
       if (debug_logging) {
         LOG(INFO) << "DexFile_isDexOptNeeded precompiled file " << oat_filename
@@ -232,6 +245,7 @@
       // TODO: Ensure this works with multiple image spaces.
       const ImageHeader& image_header = (*cur)->AsImageSpace()->GetImageHeader();
       if (oat_file->GetOatHeader().GetImageFileLocationChecksum() != image_header.GetOatChecksum()) {
+        ScopedObjectAccess soa(env);
         LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
                   << " has out-of-date checksum compared to "
                   << image_header.GetImageRoot(ImageHeader::kOatLocation)->AsString()->ToModifiedUtf8();
@@ -246,9 +260,10 @@
     return JNI_TRUE;
   }
 
+  ScopedObjectAccess soa(env);
   if (!ClassLinker::VerifyOatFileChecksums(oat_file.get(), filename.c_str(), location_checksum)) {
     LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
-              << " has out-of-date checksum compared to " << filename.c_str();
+        << " has out-of-date checksum compared to " << filename.c_str();
     return JNI_TRUE;
   }
 
diff --git a/src/native/dalvik_system_VMDebug.cc b/src/native/dalvik_system_VMDebug.cc
index 70067fe..3799bbe 100644
--- a/src/native/dalvik_system_VMDebug.cc
+++ b/src/native/dalvik_system_VMDebug.cc
@@ -22,7 +22,7 @@
 #include "hprof/hprof.h"
 #include "jni_internal.h"
 #include "ScopedUtfChars.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "toStringArray.h"
 #include "trace.h"
 
@@ -57,7 +57,8 @@
   Trace::Start("[DDMS]", -1, bufferSize, flags, true);
 }
 
-static void VMDebug_startMethodTracingFd(JNIEnv* env, jclass, jstring javaTraceFilename, jobject javaFd, jint bufferSize, jint flags) {
+static void VMDebug_startMethodTracingFd(JNIEnv* env, jclass, jstring javaTraceFilename,
+                                         jobject javaFd, jint bufferSize, jint flags) {
   int originalFd = jniGetFDFromFileDescriptor(env, javaFd);
   if (originalFd < 0) {
     return;
@@ -65,7 +66,9 @@
 
   int fd = dup(originalFd);
   if (fd < 0) {
-    Thread::Current()->ThrowNewExceptionF("Ljava/lang/RuntimeException;", "dup(%d) failed: %s", originalFd, strerror(errno));
+    ScopedObjectAccess soa(env);
+    Thread::Current()->ThrowNewExceptionF("Ljava/lang/RuntimeException;",
+                                          "dup(%d) failed: %s", originalFd, strerror(errno));
     return;
   }
 
@@ -76,7 +79,8 @@
   Trace::Start(traceFilename.c_str(), fd, bufferSize, flags, false);
 }
 
-static void VMDebug_startMethodTracingFilename(JNIEnv* env, jclass, jstring javaTraceFilename, jint bufferSize, jint flags) {
+static void VMDebug_startMethodTracingFilename(JNIEnv* env, jclass, jstring javaTraceFilename,
+                                               jint bufferSize, jint flags) {
   ScopedUtfChars traceFilename(env, javaTraceFilename);
   if (traceFilename.c_str() == NULL) {
     return;
@@ -114,23 +118,28 @@
   return Dbg::LastDebuggerActivity();
 }
 
-static void VMDebug_startInstructionCounting(JNIEnv*, jclass) {
+static void VMDebug_startInstructionCounting(JNIEnv* env, jclass) {
+  ScopedObjectAccess soa(env);
   Thread::Current()->ThrowNewException("Ljava/lang/UnsupportedOperationException;", "");
 }
 
-static void VMDebug_stopInstructionCounting(JNIEnv*, jclass) {
+static void VMDebug_stopInstructionCounting(JNIEnv* env, jclass) {
+  ScopedObjectAccess soa(env);
   Thread::Current()->ThrowNewException("Ljava/lang/UnsupportedOperationException;", "");
 }
 
-static void VMDebug_getInstructionCount(JNIEnv*, jclass, jintArray /*javaCounts*/) {
+static void VMDebug_getInstructionCount(JNIEnv* env, jclass, jintArray /*javaCounts*/) {
+  ScopedObjectAccess soa(env);
   Thread::Current()->ThrowNewException("Ljava/lang/UnsupportedOperationException;", "");
 }
 
-static void VMDebug_resetInstructionCount(JNIEnv*, jclass) {
+static void VMDebug_resetInstructionCount(JNIEnv* env, jclass) {
+  ScopedObjectAccess soa(env);
   Thread::Current()->ThrowNewException("Ljava/lang/UnsupportedOperationException;", "");
 }
 
-static void VMDebug_printLoadedClasses(JNIEnv*, jclass, jint flags) {
+static void VMDebug_printLoadedClasses(JNIEnv* env, jclass, jint flags) {
+  ScopedObjectAccess soa(env);
   return Runtime::Current()->GetClassLinker()->DumpAllClasses(flags);
 }
 
@@ -155,7 +164,9 @@
 static void VMDebug_dumpHprofData(JNIEnv* env, jclass, jstring javaFilename, jobject javaFd) {
   // Only one of these may be NULL.
   if (javaFilename == NULL && javaFd == NULL) {
-    Thread::Current()->ThrowNewException("Ljava/lang/NullPointerException;", "fileName == null && fd == null");
+    ScopedObjectAccess soa(env);
+    Thread::Current()->ThrowNewException("Ljava/lang/NullPointerException;",
+                                         "fileName == null && fd == null");
     return;
   }
 
@@ -174,7 +185,9 @@
   if (javaFd != NULL) {
     fd = jniGetFDFromFileDescriptor(env, javaFd);
     if (fd < 0) {
-      Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;", "Invalid file descriptor");
+      ScopedObjectAccess soa(env);
+      Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;",
+                                           "Invalid file descriptor");
       return;
     }
   }
@@ -187,11 +200,11 @@
 }
 
 static void VMDebug_dumpReferenceTables(JNIEnv* env, jclass) {
+  ScopedObjectAccess soa(env);
   LOG(INFO) << "--- reference table dump ---";
 
-  JNIEnvExt* e = reinterpret_cast<JNIEnvExt*>(env);
-  e->DumpReferenceTables(LOG(INFO));
-  e->vm->DumpReferenceTables(LOG(INFO));
+  soa.Env()->DumpReferenceTables(LOG(INFO));
+  soa.Vm()->DumpReferenceTables(LOG(INFO));
 
   LOG(INFO) << "---";
 }
@@ -204,9 +217,10 @@
   LOG(INFO) << "VMDebug infopoint " << id << " hit";
 }
 
-static jlong VMDebug_countInstancesOfClass(JNIEnv* env, jclass, jclass javaClass, jboolean countAssignable) {
-  ScopedJniThreadState ts(env);
-  Class* c = ts.Decode<Class*>(javaClass);
+static jlong VMDebug_countInstancesOfClass(JNIEnv* env, jclass, jclass javaClass,
+                                           jboolean countAssignable) {
+  ScopedObjectAccess soa(env);
+  Class* c = soa.Decode<Class*>(javaClass);
   if (c == NULL) {
     return 0;
   }
diff --git a/src/native/dalvik_system_VMRuntime.cc b/src/native/dalvik_system_VMRuntime.cc
index 4ec1b92..8dbbc77 100644
--- a/src/native/dalvik_system_VMRuntime.cc
+++ b/src/native/dalvik_system_VMRuntime.cc
@@ -21,9 +21,7 @@
 #include "jni_internal.h"
 #include "object.h"
 #include "object_utils.h"
-#include "scoped_heap_lock.h"
-#include "scoped_jni_thread_state.h"
-#include "scoped_thread_list_lock.h"
+#include "scoped_thread_state_change.h"
 #include "space.h"
 #include "thread.h"
 #include "thread_list.h"
@@ -49,7 +47,7 @@
 }
 
 static jobject VMRuntime_newNonMovableArray(JNIEnv* env, jobject, jclass javaElementClass, jint length) {
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
 #ifdef MOVING_GARBAGE_COLLECTOR
   // TODO: right now, we don't have a copying collector, so there's no need
   // to do anything special here, but we ought to pass the non-movability
@@ -57,7 +55,7 @@
   UNIMPLEMENTED(FATAL);
 #endif
 
-  Class* element_class = ts.Decode<Class*>(javaElementClass);
+  Class* element_class = soa.Decode<Class*>(javaElementClass);
   if (element_class == NULL) {
     Thread::Current()->ThrowNewException("Ljava/lang/NullPointerException;", "element class == null");
     return NULL;
@@ -76,15 +74,15 @@
   if (result == NULL) {
     return NULL;
   }
-  return ts.AddLocalReference<jobject>(result);
+  return soa.AddLocalReference<jobject>(result);
 }
 
 static jlong VMRuntime_addressOf(JNIEnv* env, jobject, jobject javaArray) {
   if (javaArray == NULL) {  // Most likely allocation failed
     return 0;
   }
-  ScopedJniThreadState ts(env);
-  Array* array = ts.Decode<Array*>(javaArray);
+  ScopedObjectAccess soa(env);
+  Array* array = soa.Decode<Array*>(javaArray);
   if (!array->IsArrayInstance()) {
     Thread::Current()->ThrowNewException("Ljava/lang/IllegalArgumentException;", "not an array");
     return 0;
@@ -143,7 +141,7 @@
 #if !defined(ART_USE_LLVM_COMPILER)
     if (vm->check_jni) {
       LOG(WARNING) << "Turning off CheckJNI so we can turn on JNI app bug workarounds...";
-      ScopedThreadListLock thread_list_lock;
+      MutexLock mu(*GlobalSynchronization::thread_list_lock_);
       vm->SetCheckJniEnabled(false);
       runtime->GetThreadList()->ForEach(DisableCheckJniCallback, NULL);
     }
@@ -160,8 +158,6 @@
 }
 
 static void VMRuntime_trimHeap(JNIEnv*, jobject) {
-  ScopedHeapLock heap_lock;
-
   // Trim the managed heap.
   Heap* heap = Runtime::Current()->GetHeap();
   const Spaces& spaces = heap->GetSpaces();
diff --git a/src/native/dalvik_system_VMStack.cc b/src/native/dalvik_system_VMStack.cc
index 12fa8db..3284c97 100644
--- a/src/native/dalvik_system_VMStack.cc
+++ b/src/native/dalvik_system_VMStack.cc
@@ -18,19 +18,34 @@
 #include "jni_internal.h"
 #include "nth_caller_visitor.h"
 #include "object.h"
-#include "scoped_heap_lock.h"
-#include "scoped_jni_thread_state.h"
-#include "scoped_thread_list_lock.h"
+#include "scoped_thread_state_change.h"
 #include "thread_list.h"
 
 namespace art {
 
-static jobject GetThreadStack(JNIEnv* env, jobject javaThread) {
-  ScopedJniThreadState ts(env);
-  ScopedHeapLock heap_lock;
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = Thread::FromManagedThread(ts, javaThread);
-  return (thread != NULL) ? GetThreadStack(ts, thread) : NULL;
+static jobject GetThreadStack(JNIEnv* env, jobject peer) {
+  bool timeout;
+  {
+    ScopedObjectAccess soa(env);
+    Thread* self = Thread::Current();
+    if (soa.Decode<Object*>(peer) == self->GetPeer()) {
+      return self->CreateInternalStackTrace(soa);
+    }
+  }
+  // Suspend thread to build stack trace.
+  Thread* thread = Thread::SuspendForDebugger(peer, true, &timeout);
+  if (thread != NULL) {
+    jobject trace;
+    {
+      ScopedObjectAccess soa(env);
+      trace = thread->CreateInternalStackTrace(soa);
+    }
+    // Restart suspended thread.
+    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    return trace;
+  } else {
+    return NULL;
+  }
 }
 
 static jint VMStack_fillStackTraceElements(JNIEnv* env, jclass, jobject javaThread, jobjectArray javaSteArray) {
@@ -45,10 +60,10 @@
 
 // Returns the defining class loader of the caller's caller.
 static jobject VMStack_getCallingClassLoader(JNIEnv* env, jclass) {
-  ScopedJniThreadState ts(env);
-  NthCallerVisitor visitor(ts.Self()->GetManagedStack(), ts.Self()->GetTraceStack(), 2);
+  ScopedObjectAccess soa(env);
+  NthCallerVisitor visitor(soa.Self()->GetManagedStack(), soa.Self()->GetTraceStack(), 2);
   visitor.WalkStack();
-  return ts.AddLocalReference<jobject>(visitor.caller->GetDeclaringClass()->GetClassLoader());
+  return soa.AddLocalReference<jobject>(visitor.caller->GetDeclaringClass()->GetClassLoader());
 }
 
 static jobject VMStack_getClosestUserClassLoader(JNIEnv* env, jclass, jobject javaBootstrap, jobject javaSystem) {
@@ -74,21 +89,21 @@
     Object* system;
     Object* class_loader;
   };
-  ScopedJniThreadState ts(env);
-  Object* bootstrap = ts.Decode<Object*>(javaBootstrap);
-  Object* system = ts.Decode<Object*>(javaSystem);
-  ClosestUserClassLoaderVisitor visitor(ts.Self()->GetManagedStack(), ts.Self()->GetTraceStack(),
+  ScopedObjectAccess soa(env);
+  Object* bootstrap = soa.Decode<Object*>(javaBootstrap);
+  Object* system = soa.Decode<Object*>(javaSystem);
+  ClosestUserClassLoaderVisitor visitor(soa.Self()->GetManagedStack(), soa.Self()->GetTraceStack(),
                                         bootstrap, system);
   visitor.WalkStack();
-  return ts.AddLocalReference<jobject>(visitor.class_loader);
+  return soa.AddLocalReference<jobject>(visitor.class_loader);
 }
 
 // Returns the class of the caller's caller's caller.
 static jclass VMStack_getStackClass2(JNIEnv* env, jclass) {
-  ScopedJniThreadState ts(env);
-  NthCallerVisitor visitor(ts.Self()->GetManagedStack(), ts.Self()->GetTraceStack(), 3);
+  ScopedObjectAccess soa(env);
+  NthCallerVisitor visitor(soa.Self()->GetManagedStack(), soa.Self()->GetTraceStack(), 3);
   visitor.WalkStack();
-  return ts.AddLocalReference<jclass>(visitor.caller->GetDeclaringClass());
+  return soa.AddLocalReference<jclass>(visitor.caller->GetDeclaringClass());
 }
 
 static jobjectArray VMStack_getThreadStackTrace(JNIEnv* env, jclass, jobject javaThread) {
diff --git a/src/native/java_lang_Class.cc b/src/native/java_lang_Class.cc
index bc1d0de..e63cf1a 100644
--- a/src/native/java_lang_Class.cc
+++ b/src/native/java_lang_Class.cc
@@ -20,15 +20,16 @@
 #include "nth_caller_visitor.h"
 #include "object.h"
 #include "object_utils.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "well_known_classes.h"
 
 namespace art {
 
-static Class* DecodeClass(const ScopedJniThreadState& ts, jobject java_class) {
-  Class* c = ts.Decode<Class*>(java_class);
+static Class* DecodeClass(const ScopedObjectAccess& soa, jobject java_class)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  Class* c = soa.Decode<Class*>(java_class);
   DCHECK(c != NULL);
   DCHECK(c->IsClass());
   // TODO: we could EnsureInitialized here, rather than on every reflective get/set or invoke .
@@ -39,7 +40,7 @@
 
 // "name" is in "binary name" format, e.g. "dalvik.system.Debug$1".
 static jclass Class_classForName(JNIEnv* env, jclass, jstring javaName, jboolean initialize, jobject javaLoader) {
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
   ScopedUtfChars name(env, javaName);
   if (name.c_str() == NULL) {
     return NULL;
@@ -55,7 +56,7 @@
   }
 
   std::string descriptor(DotToDescriptor(name.c_str()));
-  ClassLoader* class_loader = ts.Decode<ClassLoader*>(javaLoader);
+  ClassLoader* class_loader = soa.Decode<ClassLoader*>(javaLoader);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Class* c = class_linker->FindClass(descriptor.c_str(), class_loader);
   if (c == NULL) {
@@ -70,12 +71,12 @@
   if (initialize) {
     class_linker->EnsureInitialized(c, true, true);
   }
-  return ts.AddLocalReference<jclass>(c);
+  return soa.AddLocalReference<jclass>(c);
 }
 
 static jint Class_getAnnotationDirectoryOffset(JNIEnv* env, jclass javaClass) {
-  ScopedJniThreadState ts(env);
-  Class* c = DecodeClass(ts, javaClass);
+  ScopedObjectAccess soa(env);
+  Class* c = DecodeClass(soa, javaClass);
   if (c->IsPrimitive() || c->IsArrayClass() || c->IsProxyClass()) {
     return 0;  // primitive, array and proxy classes don't have class definitions
   }
@@ -87,17 +88,22 @@
   }
 }
 
+// TODO: Remove this redundant struct when GCC annotalysis works correctly on top-level functions.
+struct WorkAroundGccAnnotalysisBug {
 template<typename T>
-static jobjectArray ToArray(const ScopedJniThreadState& ts, const char* array_class_name,
-                            const std::vector<T*>& objects) {
-  ScopedLocalRef<jclass> array_class(ts.Env(), ts.Env()->FindClass(array_class_name));
-  jobjectArray result = ts.Env()->NewObjectArray(objects.size(), array_class.get(), NULL);
+static jobjectArray ToArray(const ScopedObjectAccessUnchecked& soa, const char* array_class_name,
+                            const std::vector<T*>& objects)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  ScopedLocalRef<jclass> array_class(soa.Env(), soa.Env()->FindClass(array_class_name));
+  jobjectArray result = soa.Env()->NewObjectArray(objects.size(), array_class.get(), NULL);
   for (size_t i = 0; i < objects.size(); ++i) {
-    ScopedLocalRef<jobject> object(ts.Env(), ts.AddLocalReference<jobject>(objects[i]));
-    ts.Env()->SetObjectArrayElement(result, i, object.get());
+    ScopedLocalRef<jobject> object(soa.Env(), soa.AddLocalReference<jobject>(objects[i]));
+    soa.Env()->SetObjectArrayElement(result, i, object.get());
   }
   return result;
 }
+};
+#define ToArray(a, b, c) WorkAroundGccAnnotalysisBug::ToArray(a, b, c)
 
 static bool IsVisibleConstructor(Method* m, bool public_only) {
   if (public_only && !m->IsPublic()) {
@@ -110,8 +116,8 @@
 }
 
 static jobjectArray Class_getDeclaredConstructors(JNIEnv* env, jclass javaClass, jboolean publicOnly) {
-  ScopedJniThreadState ts(env);
-  Class* c = DecodeClass(ts, javaClass);
+  ScopedObjectAccess soa(env);
+  Class* c = DecodeClass(soa, javaClass);
   std::vector<Method*> constructors;
   for (size_t i = 0; i < c->NumDirectMethods(); ++i) {
     Method* m = c->GetDirectMethod(i);
@@ -120,7 +126,7 @@
     }
   }
 
-  return ToArray(ts, "java/lang/reflect/Constructor", constructors);
+  return ToArray(soa, "java/lang/reflect/Constructor", constructors);
 }
 
 static bool IsVisibleField(Field* f, bool public_only) {
@@ -131,8 +137,8 @@
 }
 
 static jobjectArray Class_getDeclaredFields(JNIEnv* env, jclass javaClass, jboolean publicOnly) {
-  ScopedJniThreadState ts(env);
-  Class* c = DecodeClass(ts, javaClass);
+  ScopedObjectAccess soa(env);
+  Class* c = DecodeClass(soa, javaClass);
   std::vector<Field*> fields;
   FieldHelper fh;
   for (size_t i = 0; i < c->NumInstanceFields(); ++i) {
@@ -164,7 +170,7 @@
     }
   }
 
-  return ToArray(ts, "java/lang/reflect/Field", fields);
+  return ToArray(soa, "java/lang/reflect/Field", fields);
 }
 
 static bool IsVisibleMethod(Method* m, bool public_only) {
@@ -181,8 +187,8 @@
 }
 
 static jobjectArray Class_getDeclaredMethods(JNIEnv* env, jclass javaClass, jboolean publicOnly) {
-  ScopedJniThreadState ts(env);
-  Class* c = DecodeClass(ts, javaClass);
+  ScopedObjectAccess soa(env);
+  Class* c = DecodeClass(soa, javaClass);
   if (c == NULL) {
     return NULL;
   }
@@ -218,12 +224,12 @@
     }
   }
 
-  return ToArray(ts, "java/lang/reflect/Method", methods);
+  return ToArray(soa, "java/lang/reflect/Method", methods);
 }
 
 static jobject Class_getDex(JNIEnv* env, jobject javaClass) {
-  ScopedJniThreadState ts(env);
-  Class* c = DecodeClass(ts, javaClass);
+  ScopedObjectAccess soa(env);
+  Class* c = DecodeClass(soa, javaClass);
 
   DexCache* dex_cache = c->GetDexCache();
   if (dex_cache == NULL) {
@@ -233,7 +239,8 @@
   return Runtime::Current()->GetClassLinker()->FindDexFile(dex_cache).GetDexObject(env);
 }
 
-static bool MethodMatches(MethodHelper* mh, const std::string& name, ObjectArray<Class>* arg_array) {
+static bool MethodMatches(MethodHelper* mh, const std::string& name, ObjectArray<Class>* arg_array)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (name != mh->GetName()) {
     return false;
   }
@@ -254,7 +261,8 @@
 }
 
 static Method* FindConstructorOrMethodInArray(ObjectArray<Method>* methods, const std::string& name,
-                                              ObjectArray<Class>* arg_array) {
+                                              ObjectArray<Class>* arg_array)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (methods == NULL) {
     return NULL;
   }
@@ -282,10 +290,10 @@
 
 static jobject Class_getDeclaredConstructorOrMethod(JNIEnv* env, jclass javaClass, jstring javaName,
                                                     jobjectArray javaArgs) {
-  ScopedJniThreadState ts(env);
-  Class* c = DecodeClass(ts, javaClass);
-  std::string name(ts.Decode<String*>(javaName)->ToModifiedUtf8());
-  ObjectArray<Class>* arg_array = ts.Decode<ObjectArray<Class>*>(javaArgs);
+  ScopedObjectAccess soa(env);
+  Class* c = DecodeClass(soa, javaClass);
+  std::string name(soa.Decode<String*>(javaName)->ToModifiedUtf8());
+  ObjectArray<Class>* arg_array = soa.Decode<ObjectArray<Class>*>(javaArgs);
 
   Method* m = FindConstructorOrMethodInArray(c->GetDirectMethods(), name, arg_array);
   if (m == NULL) {
@@ -293,16 +301,16 @@
   }
 
   if (m != NULL) {
-    return ts.AddLocalReference<jobject>(m);
+    return soa.AddLocalReference<jobject>(m);
   } else {
     return NULL;
   }
 }
 
 static jobject Class_getDeclaredFieldNative(JNIEnv* env, jclass java_class, jobject jname) {
-  ScopedJniThreadState ts(env);
-  Class* c = DecodeClass(ts, java_class);
-  String* name = ts.Decode<String*>(jname);
+  ScopedObjectAccess soa(env);
+  Class* c = DecodeClass(soa, java_class);
+  String* name = soa.Decode<String*>(jname);
   DCHECK(name->GetClass()->IsStringClass());
 
   FieldHelper fh;
@@ -314,7 +322,7 @@
         DCHECK(env->ExceptionOccurred());
         return NULL;
       }
-      return ts.AddLocalReference<jclass>(f);
+      return soa.AddLocalReference<jclass>(f);
     }
   }
   for (size_t i = 0; i < c->NumStaticFields(); ++i) {
@@ -325,40 +333,40 @@
         DCHECK(env->ExceptionOccurred());
         return NULL;
       }
-      return ts.AddLocalReference<jclass>(f);
+      return soa.AddLocalReference<jclass>(f);
     }
   }
   return NULL;
 }
 
 static jstring Class_getNameNative(JNIEnv* env, jobject javaThis) {
-  ScopedJniThreadState ts(env);
-  Class* c = DecodeClass(ts, javaThis);
-  return ts.AddLocalReference<jstring>(c->ComputeName());
+  ScopedObjectAccess soa(env);
+  Class* c = DecodeClass(soa, javaThis);
+  return soa.AddLocalReference<jstring>(c->ComputeName());
 }
 
 static jobjectArray Class_getProxyInterfaces(JNIEnv* env, jobject javaThis) {
-  ScopedJniThreadState ts(env);
-  SynthesizedProxyClass* c = down_cast<SynthesizedProxyClass*>(DecodeClass(ts, javaThis));
-  return ts.AddLocalReference<jobjectArray>(c->GetInterfaces()->Clone());
+  ScopedObjectAccess soa(env);
+  SynthesizedProxyClass* c = down_cast<SynthesizedProxyClass*>(DecodeClass(soa, javaThis));
+  return soa.AddLocalReference<jobjectArray>(c->GetInterfaces()->Clone());
 }
 
 static jboolean Class_isAssignableFrom(JNIEnv* env, jobject javaLhs, jclass javaRhs) {
-  ScopedJniThreadState ts(env);
-  Class* lhs = DecodeClass(ts, javaLhs);
-  Class* rhs = ts.Decode<Class*>(javaRhs); // Can be null.
+  ScopedObjectAccess soa(env);
+  Class* lhs = DecodeClass(soa, javaLhs);
+  Class* rhs = soa.Decode<Class*>(javaRhs); // Can be null.
   if (rhs == NULL) {
-    ts.Self()->ThrowNewException("Ljava/lang/NullPointerException;", "class == null");
+    soa.Self()->ThrowNewException("Ljava/lang/NullPointerException;", "class == null");
     return JNI_FALSE;
   }
   return lhs->IsAssignableFrom(rhs) ? JNI_TRUE : JNI_FALSE;
 }
 
 static jobject Class_newInstanceImpl(JNIEnv* env, jobject javaThis) {
-  ScopedJniThreadState ts(env);
-  Class* c = DecodeClass(ts, javaThis);
+  ScopedObjectAccess soa(env);
+  Class* c = DecodeClass(soa, javaThis);
   if (c->IsPrimitive() || c->IsInterface() || c->IsArrayClass() || c->IsAbstract()) {
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
         "Class %s can not be instantiated", PrettyDescriptor(ClassHelper(c).GetDescriptor()).c_str());
     return NULL;
   }
@@ -369,7 +377,7 @@
 
   Method* init = c->FindDeclaredDirectMethod("<init>", "()V");
   if (init == NULL) {
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
         "Class %s has no default <init>()V constructor", PrettyDescriptor(ClassHelper(c).GetDescriptor()).c_str());
     return NULL;
   }
@@ -383,20 +391,20 @@
   // constructor must be public or, if the caller is in the same package,
   // have package scope.
 
-  NthCallerVisitor visitor(ts.Self()->GetManagedStack(), ts.Self()->GetTraceStack(), 2);
+  NthCallerVisitor visitor(soa.Self()->GetManagedStack(), soa.Self()->GetTraceStack(), 2);
   visitor.WalkStack();
   Class* caller_class = visitor.caller->GetDeclaringClass();
 
   ClassHelper caller_ch(caller_class);
   if (!caller_class->CanAccess(c)) {
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/IllegalAccessException;",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/IllegalAccessException;",
         "Class %s is not accessible from class %s",
         PrettyDescriptor(ClassHelper(c).GetDescriptor()).c_str(),
         PrettyDescriptor(caller_ch.GetDescriptor()).c_str());
     return NULL;
   }
   if (!caller_class->CanAccessMember(init->GetDeclaringClass(), init->GetAccessFlags())) {
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/IllegalAccessException;",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/IllegalAccessException;",
         "%s is not accessible from class %s",
         PrettyMethod(init).c_str(),
         PrettyDescriptor(caller_ch.GetDescriptor()).c_str());
@@ -405,13 +413,13 @@
 
   Object* new_obj = c->AllocObject();
   if (new_obj == NULL) {
-    DCHECK(ts.Self()->IsExceptionPending());
+    DCHECK(soa.Self()->IsExceptionPending());
     return NULL;
   }
 
   // invoke constructor; unlike reflection calls, we don't wrap exceptions
-  jclass java_class = ts.AddLocalReference<jclass>(c);
-  jmethodID mid = ts.EncodeMethod(init);
+  jclass java_class = soa.AddLocalReference<jclass>(c);
+  jmethodID mid = soa.EncodeMethod(init);
   return env->NewObject(java_class, mid);
 }
 
diff --git a/src/native/java_lang_Object.cc b/src/native/java_lang_Object.cc
index d6b1bd6..89019f7 100644
--- a/src/native/java_lang_Object.cc
+++ b/src/native/java_lang_Object.cc
@@ -16,31 +16,31 @@
 
 #include "jni_internal.h"
 #include "object.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
 static jobject Object_internalClone(JNIEnv* env, jobject javaThis) {
-  ScopedJniThreadState ts(env);
-  Object* o = ts.Decode<Object*>(javaThis);
-  return ts.AddLocalReference<jobject>(o->Clone());
+  ScopedObjectAccess soa(env);
+  Object* o = soa.Decode<Object*>(javaThis);
+  return soa.AddLocalReference<jobject>(o->Clone());
 }
 
 static void Object_notify(JNIEnv* env, jobject javaThis) {
-  ScopedJniThreadState ts(env);
-  Object* o = ts.Decode<Object*>(javaThis);
+  ScopedObjectAccess soa(env);
+  Object* o = soa.Decode<Object*>(javaThis);
   o->Notify();
 }
 
 static void Object_notifyAll(JNIEnv* env, jobject javaThis) {
-  ScopedJniThreadState ts(env);
-  Object* o = ts.Decode<Object*>(javaThis);
+  ScopedObjectAccess soa(env);
+  Object* o = soa.Decode<Object*>(javaThis);
   o->NotifyAll();
 }
 
 static void Object_wait(JNIEnv* env, jobject javaThis, jlong ms, jint ns) {
-  ScopedJniThreadState ts(env);
-  Object* o = ts.Decode<Object*>(javaThis);
+  ScopedObjectAccess soa(env);
+  Object* o = soa.Decode<Object*>(javaThis);
   o->Wait(ms, ns);
 }
 
diff --git a/src/native/java_lang_Runtime.cc b/src/native/java_lang_Runtime.cc
index 1b657b1..6dc850e 100644
--- a/src/native/java_lang_Runtime.cc
+++ b/src/native/java_lang_Runtime.cc
@@ -22,13 +22,12 @@
 #include "jni_internal.h"
 #include "object.h"
 #include "runtime.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedUtfChars.h"
 
 namespace art {
 
-static void Runtime_gc(JNIEnv* env, jclass) {
-  ScopedJniThreadState ts(env);
+static void Runtime_gc(JNIEnv*, jclass) {
   Runtime::Current()->GetHeap()->CollectGarbage(false);
 }
 
@@ -45,13 +44,13 @@
  * message on failure.
  */
 static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader) {
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
   ScopedUtfChars filename(env, javaFilename);
   if (filename.c_str() == NULL) {
     return NULL;
   }
 
-  ClassLoader* classLoader = ts.Decode<ClassLoader*>(javaLoader);
+  ClassLoader* classLoader = soa.Decode<ClassLoader*>(javaLoader);
   std::string detail;
   JavaVMExt* vm = Runtime::Current()->GetJavaVM();
   bool success = vm->LoadNativeLibrary(filename.c_str(), classLoader, detail);
diff --git a/src/native/java_lang_String.cc b/src/native/java_lang_String.cc
index 96fcf96..bfdc31a 100644
--- a/src/native/java_lang_String.cc
+++ b/src/native/java_lang_String.cc
@@ -16,7 +16,7 @@
 
 #include "jni_internal.h"
 #include "object.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 
 #ifdef HAVE__MEMCMP16
 // "count" is in 16-bit units.
@@ -36,9 +36,9 @@
 namespace art {
 
 static jint String_compareTo(JNIEnv* env, jobject javaThis, jobject javaRhs) {
-  ScopedJniThreadState ts(env);
-  String* lhs = ts.Decode<String*>(javaThis);
-  String* rhs = ts.Decode<String*>(javaRhs);
+  ScopedObjectAccess soa(env);
+  String* lhs = soa.Decode<String*>(javaThis);
+  String* rhs = soa.Decode<String*>(javaRhs);
 
   if (rhs == NULL) {
     Thread::Current()->ThrowNewException("Ljava/lang/NullPointerException;", "rhs == null");
@@ -70,11 +70,11 @@
 }
 
 static jint String_fastIndexOf(JNIEnv* env, jobject java_this, jint ch, jint start) {
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
   // This method does not handle supplementary characters. They're dealt with in managed code.
   DCHECK_LE(ch, 0xffff);
 
-  String* s = ts.Decode<String*>(java_this);
+  String* s = soa.Decode<String*>(java_this);
 
   jint count = s->GetLength();
   if (start < 0) {
@@ -96,10 +96,10 @@
 }
 
 static jstring String_intern(JNIEnv* env, jobject javaThis) {
-  ScopedJniThreadState ts(env);
-  String* s = ts.Decode<String*>(javaThis);
+  ScopedObjectAccess soa(env);
+  String* s = soa.Decode<String*>(javaThis);
   String* result = s->Intern();
-  return ts.AddLocalReference<jstring>(result);
+  return soa.AddLocalReference<jstring>(result);
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/src/native/java_lang_System.cc b/src/native/java_lang_System.cc
index 76ac670..f4fe6ca 100644
--- a/src/native/java_lang_System.cc
+++ b/src/native/java_lang_System.cc
@@ -16,7 +16,7 @@
 
 #include "jni_internal.h"
 #include "object.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 
 /*
  * We make guarantees about the atomicity of accesses to primitive
@@ -101,28 +101,29 @@
 
 namespace art {
 
-static void ThrowArrayStoreException_NotAnArray(const char* identifier, Object* array) {
+static void ThrowArrayStoreException_NotAnArray(const char* identifier, Object* array)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   std::string actualType(PrettyTypeOf(array));
   Thread::Current()->ThrowNewExceptionF("Ljava/lang/ArrayStoreException;",
       "%s of type %s is not an array", identifier, actualType.c_str());
 }
 
 static void System_arraycopy(JNIEnv* env, jclass, jobject javaSrc, jint srcPos, jobject javaDst, jint dstPos, jint length) {
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
 
   // Null pointer checks.
   if (javaSrc == NULL) {
-    ts.Self()->ThrowNewException("Ljava/lang/NullPointerException;", "src == null");
+    soa.Self()->ThrowNewException("Ljava/lang/NullPointerException;", "src == null");
     return;
   }
   if (javaDst == NULL) {
-    ts.Self()->ThrowNewException("Ljava/lang/NullPointerException;", "dst == null");
+    soa.Self()->ThrowNewException("Ljava/lang/NullPointerException;", "dst == null");
     return;
   }
 
   // Make sure source and destination are both arrays.
-  Object* srcObject = ts.Decode<Object*>(javaSrc);
-  Object* dstObject = ts.Decode<Object*>(javaDst);
+  Object* srcObject = soa.Decode<Object*>(javaSrc);
+  Object* dstObject = soa.Decode<Object*>(javaDst);
   if (!srcObject->IsArrayInstance()) {
     ThrowArrayStoreException_NotAnArray("source", srcObject);
     return;
@@ -138,7 +139,7 @@
 
   // Bounds checking.
   if (srcPos < 0 || dstPos < 0 || length < 0 || srcPos > srcArray->GetLength() - length || dstPos > dstArray->GetLength() - length) {
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/ArrayIndexOutOfBoundsException;",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/ArrayIndexOutOfBoundsException;",
         "src.length=%d srcPos=%d dst.length=%d dstPos=%d length=%d",
         srcArray->GetLength(), srcPos, dstArray->GetLength(), dstPos, length);
     return;
@@ -150,7 +151,7 @@
     if (srcComponentType->IsPrimitive() != dstComponentType->IsPrimitive() || srcComponentType != dstComponentType) {
       std::string srcType(PrettyTypeOf(srcArray));
       std::string dstType(PrettyTypeOf(dstArray));
-      ts.Self()->ThrowNewExceptionF("Ljava/lang/ArrayStoreException;",
+      soa.Self()->ThrowNewExceptionF("Ljava/lang/ArrayStoreException;",
           "Incompatible types: src=%s, dst=%s", srcType.c_str(), dstType.c_str());
       return;
     }
@@ -233,7 +234,7 @@
   if (i != length) {
     std::string actualSrcType(PrettyTypeOf(o));
     std::string dstType(PrettyTypeOf(dstArray));
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/ArrayStoreException;",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/ArrayStoreException;",
         "source[%d] of type %s cannot be stored in destination array of type %s",
         srcPos + i, actualSrcType.c_str(), dstType.c_str());
     return;
@@ -241,9 +242,9 @@
 }
 
 static jint System_identityHashCode(JNIEnv* env, jclass, jobject javaObject) {
-  ScopedJniThreadState ts(env);
-  Object* o = ts.Decode<Object*>(javaObject);
-  return static_cast<jint>(reinterpret_cast<uintptr_t>(o));
+  ScopedObjectAccess soa(env);
+  Object* o = soa.Decode<Object*>(javaObject);
+  return static_cast<jint>(o->IdentityHashCode());
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/src/native/java_lang_Thread.cc b/src/native/java_lang_Thread.cc
index 626255e..65042e4 100644
--- a/src/native/java_lang_Thread.cc
+++ b/src/native/java_lang_Thread.cc
@@ -17,8 +17,7 @@
 #include "debugger.h"
 #include "jni_internal.h"
 #include "object.h"
-#include "scoped_jni_thread_state.h"
-#include "scoped_thread_list_lock.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedUtfChars.h"
 #include "thread.h"
 #include "thread_list.h"
@@ -26,19 +25,18 @@
 namespace art {
 
 static jobject Thread_currentThread(JNIEnv* env, jclass) {
-  ScopedJniThreadState ts(env);
-  return ts.AddLocalReference<jobject>(ts.Self()->GetPeer());
+  ScopedObjectAccess soa(env);
+  return soa.AddLocalReference<jobject>(soa.Self()->GetPeer());
 }
 
 static jboolean Thread_interrupted(JNIEnv* env, jclass) {
-  ScopedJniThreadState ts(env, kNative);  // Doesn't touch objects, so keep in native state.
-  return ts.Self()->Interrupted();
+  return reinterpret_cast<JNIEnvExt*>(env)->self->Interrupted() ? JNI_TRUE : JNI_FALSE;
 }
 
 static jboolean Thread_isInterrupted(JNIEnv* env, jobject java_thread) {
-  ScopedJniThreadState ts(env);
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = Thread::FromManagedThread(ts, java_thread);
+  ScopedObjectAccess soa(env);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  Thread* thread = Thread::FromManagedThread(soa, java_thread);
   return (thread != NULL) ? thread->IsInterrupted() : JNI_FALSE;
 }
 
@@ -56,53 +54,62 @@
   const jint kJavaTimedWaiting = 4;
   const jint kJavaTerminated = 5;
 
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
   ThreadState internal_thread_state = (has_been_started ? kTerminated : kStarting);
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = Thread::FromManagedThread(ts, java_thread);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  Thread* thread = Thread::FromManagedThread(soa, java_thread);
   if (thread != NULL) {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
     internal_thread_state = thread->GetState();
   }
   switch (internal_thread_state) {
-    case kTerminated:   return kJavaTerminated;
-    case kRunnable:     return kJavaRunnable;
-    case kTimedWaiting: return kJavaTimedWaiting;
-    case kBlocked:      return kJavaBlocked;
-    case kWaiting:      return kJavaWaiting;
-    case kStarting:     return kJavaNew;
-    case kNative:       return kJavaRunnable;
-    case kVmWait:       return kJavaWaiting;
-    case kSuspended:    return kJavaRunnable;
+    case kTerminated:                     return kJavaTerminated;
+    case kRunnable:                       return kJavaRunnable;
+    case kTimedWaiting:                   return kJavaTimedWaiting;
+    case kBlocked:                        return kJavaBlocked;
+    case kWaiting:                        return kJavaWaiting;
+    case kStarting:                       return kJavaNew;
+    case kNative:                         return kJavaRunnable;
+    case kWaitingForGcToComplete:         return kJavaWaiting;
+    case kWaitingPerformingGc:            return kJavaWaiting;
+    case kWaitingForDebuggerSend:         return kJavaWaiting;
+    case kWaitingForDebuggerToAttach:     return kJavaWaiting;
+    case kWaitingInMainDebuggerLoop:      return kJavaWaiting;
+    case kWaitingForDebuggerSuspension:   return kJavaWaiting;
+    case kWaitingForJniOnLoad:            return kJavaWaiting;
+    case kWaitingForSignalCatcherOutput:  return kJavaWaiting;
+    case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
+    case kSuspended:                      return kJavaRunnable;
     // Don't add a 'default' here so the compiler can spot incompatible enum changes.
   }
   return -1; // Unreachable.
 }
 
 static jboolean Thread_nativeHoldsLock(JNIEnv* env, jobject java_thread, jobject java_object) {
-  ScopedJniThreadState ts(env);
-  Object* object = ts.Decode<Object*>(java_object);
+  ScopedObjectAccess soa(env);
+  Object* object = soa.Decode<Object*>(java_object);
   if (object == NULL) {
     Thread::Current()->ThrowNewException("Ljava/lang/NullPointerException;", "object == null");
     return JNI_FALSE;
   }
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = Thread::FromManagedThread(ts, java_thread);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  Thread* thread = Thread::FromManagedThread(soa, java_thread);
   return thread->HoldsLock(object);
 }
 
 static void Thread_nativeInterrupt(JNIEnv* env, jobject java_thread) {
-  ScopedJniThreadState ts(env);
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = Thread::FromManagedThread(ts, java_thread);
+  ScopedObjectAccess soa(env);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  Thread* thread = Thread::FromManagedThread(soa, java_thread);
   if (thread != NULL) {
     thread->Interrupt();
   }
 }
 
 static void Thread_nativeSetName(JNIEnv* env, jobject java_thread, jstring java_name) {
-  ScopedJniThreadState ts(env);
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = Thread::FromManagedThread(ts, java_thread);
+  ScopedObjectAccess soa(env);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  Thread* thread = Thread::FromManagedThread(soa, java_thread);
   if (thread == NULL) {
     return;
   }
@@ -119,9 +126,9 @@
  * threads at Thread.NORM_PRIORITY (5).
  */
 static void Thread_nativeSetPriority(JNIEnv* env, jobject java_thread, jint new_priority) {
-  ScopedJniThreadState ts(env);
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = Thread::FromManagedThread(ts, java_thread);
+  ScopedObjectAccess soa(env);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  Thread* thread = Thread::FromManagedThread(soa, java_thread);
   if (thread != NULL) {
     thread->SetNativePriority(new_priority);
   }
diff --git a/src/native/java_lang_Throwable.cc b/src/native/java_lang_Throwable.cc
index 1c59a34..332a130 100644
--- a/src/native/java_lang_Throwable.cc
+++ b/src/native/java_lang_Throwable.cc
@@ -15,14 +15,14 @@
  */
 
 #include "jni_internal.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 
 namespace art {
 
 static jobject Throwable_nativeFillInStackTrace(JNIEnv* env, jclass) {
-  ScopedJniThreadState ts(env);
-  return ts.Self()->CreateInternalStackTrace(ts);
+  ScopedObjectAccess soa(env);
+  return soa.Self()->CreateInternalStackTrace(soa);
 }
 
 static jobjectArray Throwable_nativeGetStackTrace(JNIEnv* env, jclass, jobject javaStackState) {
diff --git a/src/native/java_lang_VMClassLoader.cc b/src/native/java_lang_VMClassLoader.cc
index 0689f74..4b5c31c 100644
--- a/src/native/java_lang_VMClassLoader.cc
+++ b/src/native/java_lang_VMClassLoader.cc
@@ -17,15 +17,15 @@
 #include "class_linker.h"
 #include "class_loader.h"
 #include "jni_internal.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedUtfChars.h"
 #include "zip_archive.h"
 
 namespace art {
 
 static jclass VMClassLoader_findLoadedClass(JNIEnv* env, jclass, jobject javaLoader, jstring javaName) {
-  ScopedJniThreadState ts(env);
-  ClassLoader* loader = ts.Decode<ClassLoader*>(javaLoader);
+  ScopedObjectAccess soa(env);
+  ClassLoader* loader = soa.Decode<ClassLoader*>(javaLoader);
   ScopedUtfChars name(env, javaName);
   if (name.c_str() == NULL) {
     return NULL;
@@ -34,7 +34,7 @@
   std::string descriptor(DotToDescriptor(name.c_str()));
   Class* c = Runtime::Current()->GetClassLinker()->LookupClass(descriptor.c_str(), loader);
   if (c != NULL && c->IsResolved()) {
-    return ts.AddLocalReference<jclass>(c);
+    return soa.AddLocalReference<jclass>(c);
   } else {
     // Class wasn't resolved so it may be erroneous or not yet ready, force the caller to go into
     // the regular loadClass code.
diff --git a/src/native/java_lang_reflect_Array.cc b/src/native/java_lang_reflect_Array.cc
index 729312e..fa59750 100644
--- a/src/native/java_lang_reflect_Array.cc
+++ b/src/native/java_lang_reflect_Array.cc
@@ -18,13 +18,14 @@
 #include "jni_internal.h"
 #include "object.h"
 #include "object_utils.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
 // Recursively create an array with multiple dimensions.  Elements may be
 // Objects or primitive types.
-static Array* CreateMultiArray(Class* array_class, int current_dimension, IntArray* dimensions) {
+static Array* CreateMultiArray(Class* array_class, int current_dimension, IntArray* dimensions)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   int32_t array_length = dimensions->Get(current_dimension++);
   SirtRef<Array> new_array(Array::Alloc(array_class, array_length));
   if (new_array.get() == NULL) {
@@ -69,12 +70,12 @@
 // subtract pieces off.  Besides, we want to start with the outermost
 // piece and work our way in.
 static jobject Array_createMultiArray(JNIEnv* env, jclass, jclass javaElementClass, jobject javaDimArray) {
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
   DCHECK(javaElementClass != NULL);
-  Class* element_class = ts.Decode<Class*>(javaElementClass);
+  Class* element_class = soa.Decode<Class*>(javaElementClass);
   DCHECK(element_class->IsClass());
   DCHECK(javaDimArray != NULL);
-  Object* dimensions_obj = ts.Decode<Object*>(javaDimArray);
+  Object* dimensions_obj = soa.Decode<Object*>(javaDimArray);
   DCHECK(dimensions_obj->IsArrayInstance());
   DCHECK_STREQ(ClassHelper(dimensions_obj->GetClass()).GetDescriptor(), "[I");
   IntArray* dimensions_array = down_cast<IntArray*>(dimensions_obj);
@@ -90,7 +91,7 @@
   for (int i = 0; i < num_dimensions; i++) {
     int dimension = dimensions_array->Get(i);
     if (dimension < 0) {
-      ts.Self()->ThrowNewExceptionF("Ljava/lang/NegativeArraySizeException;",
+      soa.Self()->ThrowNewExceptionF("Ljava/lang/NegativeArraySizeException;",
           "Dimension %d: %d", i, dimension);
       return NULL;
     }
@@ -113,15 +114,15 @@
     CHECK(Thread::Current()->IsExceptionPending());
     return NULL;
   }
-  return ts.AddLocalReference<jobject>(new_array);
+  return soa.AddLocalReference<jobject>(new_array);
 }
 
 static jobject Array_createObjectArray(JNIEnv* env, jclass, jclass javaElementClass, jint length) {
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
   DCHECK(javaElementClass != NULL);
-  Class* element_class = ts.Decode<Class*>(javaElementClass);
+  Class* element_class = soa.Decode<Class*>(javaElementClass);
   if (length < 0) {
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/NegativeArraySizeException;", "%d", length);
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/NegativeArraySizeException;", "%d", length);
     return NULL;
   }
   std::string descriptor;
@@ -131,16 +132,16 @@
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Class* array_class = class_linker->FindClass(descriptor.c_str(), element_class->GetClassLoader());
   if (array_class == NULL) {
-    CHECK(ts.Self()->IsExceptionPending());
+    CHECK(soa.Self()->IsExceptionPending());
     return NULL;
   }
   DCHECK(array_class->IsArrayClass());
   Array* new_array = Array::Alloc(array_class, length);
   if (new_array == NULL) {
-    CHECK(ts.Self()->IsExceptionPending());
+    CHECK(soa.Self()->IsExceptionPending());
     return NULL;
   }
-  return ts.AddLocalReference<jobject>(new_array);
+  return soa.AddLocalReference<jobject>(new_array);
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/src/native/java_lang_reflect_Constructor.cc b/src/native/java_lang_reflect_Constructor.cc
index 564d6db..a6bd450 100644
--- a/src/native/java_lang_reflect_Constructor.cc
+++ b/src/native/java_lang_reflect_Constructor.cc
@@ -19,7 +19,7 @@
 #include "object.h"
 #include "object_utils.h"
 #include "reflection.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
@@ -31,17 +31,17 @@
  * with an interface, array, or primitive class.
  */
 static jobject Constructor_newInstance(JNIEnv* env, jobject javaMethod, jobjectArray javaArgs) {
-  ScopedJniThreadState ts(env);
-  Method* m = ts.Decode<Object*>(javaMethod)->AsMethod();
+  ScopedObjectAccess soa(env);
+  Method* m = soa.Decode<Object*>(javaMethod)->AsMethod();
   Class* c = m->GetDeclaringClass();
   if (c->IsAbstract()) {
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
         "Can't instantiate abstract class %s", PrettyDescriptor(c).c_str());
     return NULL;
   }
 
   if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(c, true, true)) {
-    DCHECK(ts.Self()->IsExceptionPending());
+    DCHECK(soa.Self()->IsExceptionPending());
     return NULL;
   }
 
@@ -50,8 +50,8 @@
     return NULL;
   }
 
-  jobject javaReceiver = ts.AddLocalReference<jobject>(receiver);
-  InvokeMethod(ts, javaMethod, javaReceiver, javaArgs);
+  jobject javaReceiver = soa.AddLocalReference<jobject>(receiver);
+  InvokeMethod(soa, javaMethod, javaReceiver, javaArgs);
 
   // Constructors are ()V methods, so we shouldn't touch the result of InvokeMethod.
   return javaReceiver;
diff --git a/src/native/java_lang_reflect_Field.cc b/src/native/java_lang_reflect_Field.cc
index b2ede63..e764b25 100644
--- a/src/native/java_lang_reflect_Field.cc
+++ b/src/native/java_lang_reflect_Field.cc
@@ -19,14 +19,16 @@
 #include "object.h"
 #include "object_utils.h"
 #include "reflection.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
-static bool GetFieldValue(const ScopedJniThreadState& ts, Object* o, Field* f, JValue& value,
-                          bool allow_references) {
+static bool GetFieldValue(const ScopedObjectAccess& soa, Object* o, Field* f,
+                          JValue& value, bool allow_references)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   DCHECK_EQ(value.GetJ(), 0LL);
-  if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(f->GetDeclaringClass(), true, true)) {
+  if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(f->GetDeclaringClass(),
+                                                               true, true)) {
     return false;
   }
   switch (FieldHelper(f).GetTypeAsPrimitiveType()) {
@@ -65,18 +67,20 @@
     // Never okay.
     break;
   }
-  ts.Self()->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
+  soa.Self()->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
       "Not a primitive field: %s", PrettyField(f).c_str());
   return false;
 }
 
-static bool CheckReceiver(const ScopedJniThreadState& ts, jobject javaObj, Field* f, Object*& o) {
+static bool CheckReceiver(const ScopedObjectAccess& soa, jobject javaObj, Field* f,
+                          Object*& o)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (f->IsStatic()) {
     o = NULL;
     return true;
   }
 
-  o = ts.Decode<Object*>(javaObj);
+  o = soa.Decode<Object*>(javaObj);
   Class* declaringClass = f->GetDeclaringClass();
   if (!VerifyObjectInClass(o, declaringClass)) {
     return false;
@@ -85,34 +89,34 @@
 }
 
 static jobject Field_get(JNIEnv* env, jobject javaField, jobject javaObj) {
-  ScopedJniThreadState ts(env);
-  Field* f = ts.DecodeField(env->FromReflectedField(javaField));
+  ScopedObjectAccess soa(env);
+  Field* f = soa.DecodeField(env->FromReflectedField(javaField));
   Object* o = NULL;
-  if (!CheckReceiver(ts, javaObj, f, o)) {
+  if (!CheckReceiver(soa, javaObj, f, o)) {
     return NULL;
   }
 
   // Get the field's value, boxing if necessary.
   JValue value;
-  if (!GetFieldValue(ts, o, f, value, true)) {
+  if (!GetFieldValue(soa, o, f, value, true)) {
     return NULL;
   }
   BoxPrimitive(FieldHelper(f).GetTypeAsPrimitiveType(), value);
 
-  return ts.AddLocalReference<jobject>(value.GetL());
+  return soa.AddLocalReference<jobject>(value.GetL());
 }
 
 static JValue GetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj, char dst_descriptor) {
-  ScopedJniThreadState ts(env);
-  Field* f = ts.DecodeField(env->FromReflectedField(javaField));
+  ScopedObjectAccess soa(env);
+  Field* f = soa.DecodeField(env->FromReflectedField(javaField));
   Object* o = NULL;
-  if (!CheckReceiver(ts, javaObj, f, o)) {
+  if (!CheckReceiver(soa, javaObj, f, o)) {
     return JValue();
   }
 
   // Read the value.
   JValue field_value;
-  if (!GetFieldValue(ts, o, f, field_value, false)) {
+  if (!GetFieldValue(soa, o, f, field_value, false)) {
     return JValue();
   }
 
@@ -158,8 +162,10 @@
   return GetPrimitiveField(env, javaField, javaObj, 'S').GetS();
 }
 
-static void SetFieldValue(Object* o, Field* f, const JValue& new_value, bool allow_references) {
-  if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(f->GetDeclaringClass(), true, true)) {
+static void SetFieldValue(Object* o, Field* f, const JValue& new_value, bool allow_references)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(f->GetDeclaringClass(),
+                                                               true, true)) {
     return;
   }
   switch (FieldHelper(f).GetTypeAsPrimitiveType()) {
@@ -208,11 +214,11 @@
 }
 
 static void Field_set(JNIEnv* env, jobject javaField, jobject javaObj, jobject javaValue) {
-  ScopedJniThreadState ts(env);
-  Field* f = ts.DecodeField(env->FromReflectedField(javaField));
+  ScopedObjectAccess soa(env);
+  Field* f = soa.DecodeField(env->FromReflectedField(javaField));
 
   // Unbox the value, if necessary.
-  Object* boxed_value = ts.Decode<Object*>(javaValue);
+  Object* boxed_value = soa.Decode<Object*>(javaValue);
   JValue unboxed_value;
   if (!UnboxPrimitiveForField(boxed_value, FieldHelper(f).GetType(), unboxed_value, f)) {
     return;
@@ -220,7 +226,7 @@
 
   // Check that the receiver is non-null and an instance of the field's declaring class.
   Object* o = NULL;
-  if (!CheckReceiver(ts, javaObj, f, o)) {
+  if (!CheckReceiver(soa, javaObj, f, o)) {
     return;
   }
 
@@ -229,15 +235,15 @@
 
 static void SetPrimitiveField(JNIEnv* env, jobject javaField, jobject javaObj, char src_descriptor,
                               const JValue& new_value) {
-  ScopedJniThreadState ts(env);
-  Field* f = ts.DecodeField(env->FromReflectedField(javaField));
+  ScopedObjectAccess soa(env);
+  Field* f = soa.DecodeField(env->FromReflectedField(javaField));
   Object* o = NULL;
-  if (!CheckReceiver(ts, javaObj, f, o)) {
+  if (!CheckReceiver(soa, javaObj, f, o)) {
     return;
   }
   FieldHelper fh(f);
   if (!fh.IsPrimitiveType()) {
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
         "Not a primitive field: %s", PrettyField(f).c_str());
     return;
   }
diff --git a/src/native/java_lang_reflect_Method.cc b/src/native/java_lang_reflect_Method.cc
index 2695822..2a6ee50 100644
--- a/src/native/java_lang_reflect_Method.cc
+++ b/src/native/java_lang_reflect_Method.cc
@@ -19,18 +19,18 @@
 #include "object.h"
 #include "object_utils.h"
 #include "reflection.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
 static jobject Method_invoke(JNIEnv* env, jobject javaMethod, jobject javaReceiver, jobject javaArgs) {
-  ScopedJniThreadState ts(env);
-  return InvokeMethod(ts, javaMethod, javaReceiver, javaArgs);
+  ScopedObjectAccess soa(env);
+  return InvokeMethod(soa, javaMethod, javaReceiver, javaArgs);
 }
 
 static jobject Method_getExceptionTypesNative(JNIEnv* env, jobject javaMethod) {
-  ScopedJniThreadState ts(env);
-  Method* proxy_method = ts.Decode<Object*>(javaMethod)->AsMethod();
+  ScopedObjectAccess soa(env);
+  Method* proxy_method = soa.Decode<Object*>(javaMethod)->AsMethod();
   CHECK(proxy_method->GetDeclaringClass()->IsProxyClass());
   SynthesizedProxyClass* proxy_class =
       down_cast<SynthesizedProxyClass*>(proxy_method->GetDeclaringClass());
@@ -44,13 +44,13 @@
   }
   CHECK_NE(throws_index, -1);
   ObjectArray<Class>* declared_exceptions = proxy_class->GetThrows()->Get(throws_index);
-  return ts.AddLocalReference<jobject>(declared_exceptions->Clone());
+  return soa.AddLocalReference<jobject>(declared_exceptions->Clone());
 }
 
 static jobject Method_findOverriddenMethodNative(JNIEnv* env, jobject javaMethod) {
-  ScopedJniThreadState ts(env);
-  Method* method = ts.Decode<Object*>(javaMethod)->AsMethod();
-  return ts.AddLocalReference<jobject>(method->FindOverriddenMethod());
+  ScopedObjectAccess soa(env);
+  Method* method = soa.Decode<Object*>(javaMethod)->AsMethod();
+  return soa.AddLocalReference<jobject>(method->FindOverriddenMethod());
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/src/native/java_lang_reflect_Proxy.cc b/src/native/java_lang_reflect_Proxy.cc
index a1337a6..81e3f169 100644
--- a/src/native/java_lang_reflect_Proxy.cc
+++ b/src/native/java_lang_reflect_Proxy.cc
@@ -18,20 +18,20 @@
 #include "class_loader.h"
 #include "jni_internal.h"
 #include "object.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
 static jclass Proxy_generateProxy(JNIEnv* env, jclass, jstring javaName, jobjectArray javaInterfaces, jobject javaLoader, jobjectArray javaMethods, jobjectArray javaThrows) {
-  ScopedJniThreadState ts(env);
-  String* name = ts.Decode<String*>(javaName);
-  ObjectArray<Class>* interfaces = ts.Decode<ObjectArray<Class>*>(javaInterfaces);
-  ClassLoader* loader = ts.Decode<ClassLoader*>(javaLoader);
-  ObjectArray<Method>* methods = ts.Decode<ObjectArray<Method>*>(javaMethods);
-  ObjectArray<ObjectArray<Class> >* throws = ts.Decode<ObjectArray<ObjectArray<Class> >*>(javaThrows);
+  ScopedObjectAccess soa(env);
+  String* name = soa.Decode<String*>(javaName);
+  ObjectArray<Class>* interfaces = soa.Decode<ObjectArray<Class>*>(javaInterfaces);
+  ClassLoader* loader = soa.Decode<ClassLoader*>(javaLoader);
+  ObjectArray<Method>* methods = soa.Decode<ObjectArray<Method>*>(javaMethods);
+  ObjectArray<ObjectArray<Class> >* throws = soa.Decode<ObjectArray<ObjectArray<Class> >*>(javaThrows);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Class* result = class_linker->CreateProxyClass(name, interfaces, loader, methods, throws);
-  return ts.AddLocalReference<jclass>(result);
+  return soa.AddLocalReference<jclass>(result);
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/src/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc b/src/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
index e3c31b0..fe95746 100644
--- a/src/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
+++ b/src/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
@@ -16,12 +16,14 @@
 
 #include "debugger.h"
 #include "logging.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedPrimitiveArray.h"
 
 namespace art {
 
 static void DdmServer_nativeSendChunk(JNIEnv* env, jclass, jint type,
                                       jbyteArray javaData, jint offset, jint length) {
+  ScopedObjectAccess soa(env);
   ScopedByteArrayRO data(env, javaData);
   DCHECK_LE(offset + length, static_cast<int32_t>(data.size()));
   Dbg::DdmSendChunk(type, length, reinterpret_cast<const uint8_t*>(&data[offset]));
diff --git a/src/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/src/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index 87d2b22..b14d6ff 100644
--- a/src/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/src/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -17,9 +17,9 @@
 #include "debugger.h"
 #include "jni_internal.h"
 #include "logging.h"
-#include "scoped_heap_lock.h"
-#include "scoped_jni_thread_state.h"
-#include "scoped_thread_list_lock.h"
+#include "../mutex.h"  // Avoid pulling in icu4c's mutex.h
+#include "scoped_thread_state_change.h"
+#include "ScopedLocalRef.h"
 #include "ScopedPrimitiveArray.h"
 #include "stack.h"
 #include "thread_list.h"
@@ -30,7 +30,8 @@
   Dbg::SetAllocTrackingEnabled(enable);
 }
 
-static jbyteArray DdmVmInternal_getRecentAllocations(JNIEnv*, jclass) {
+static jbyteArray DdmVmInternal_getRecentAllocations(JNIEnv* env, jclass) {
+  ScopedObjectAccess soa(env);
   return Dbg::GetRecentAllocations();
 }
 
@@ -38,7 +39,7 @@
   return Dbg::IsAllocTrackingEnabled();
 }
 
-static Thread* FindThreadByThinLockId(uint32_t thin_lock_id) {
+static jobject FindThreadByThinLockId(JNIEnv* env, uint32_t thin_lock_id) {
   struct ThreadFinder {
     explicit ThreadFinder(uint32_t thin_lock_id) : thin_lock_id(thin_lock_id), thread(NULL) {
     }
@@ -54,8 +55,16 @@
     Thread* thread;
   };
   ThreadFinder finder(thin_lock_id);
-  Runtime::Current()->GetThreadList()->ForEach(ThreadFinder::Callback, &finder);
-  return finder.thread;
+  {
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+    Runtime::Current()->GetThreadList()->ForEach(ThreadFinder::Callback, &finder);
+  }
+  if (finder.thread != NULL) {
+    ScopedObjectAccess soa(env);
+    return soa.AddLocalReference<jobject>(finder.thread->GetPeer());
+  } else {
+    return NULL;
+  }
 }
 
 /*
@@ -63,15 +72,26 @@
  * NULL on failure, e.g. if the threadId couldn't be found.
  */
 static jobjectArray DdmVmInternal_getStackTraceById(JNIEnv* env, jclass, jint thin_lock_id) {
-  ScopedHeapLock heap_lock;
-  ScopedThreadListLock thread_list_lock;
-  Thread* thread = FindThreadByThinLockId(static_cast<uint32_t>(thin_lock_id));
-  if (thread == NULL) {
+  ScopedLocalRef<jobject> peer(env,
+                               FindThreadByThinLockId(env, static_cast<uint32_t>(thin_lock_id)));
+  if (peer.get() == NULL) {
     return NULL;
   }
-  ScopedJniThreadState ts(env);
-  jobject stack = GetThreadStack(ts, thread);
-  return (stack != NULL) ? Thread::InternalStackTraceToStackTraceElementArray(env, stack) : NULL;
+  bool timeout;
+  // Suspend thread to build stack trace.
+  Thread* thread = Thread::SuspendForDebugger(peer.get(), true, &timeout);
+  if (thread != NULL) {
+    jobject trace;
+    {
+      ScopedObjectAccess soa(env);
+      trace = thread->CreateInternalStackTrace(soa);
+    }
+    // Restart suspended thread.
+    Runtime::Current()->GetThreadList()->Resume(thread, true);
+    return Thread::InternalStackTraceToStackTraceElementArray(env, trace);
+  } else {
+    return NULL;
+  }
 }
 
 static void ThreadCountCallback(Thread*, void* context) {
@@ -109,7 +129,10 @@
 
   std::vector<uint8_t>& bytes = *reinterpret_cast<std::vector<uint8_t>*>(context);
   JDWP::Append4BE(bytes, t->GetThinLockId());
-  JDWP::Append1BE(bytes, t->GetState());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    JDWP::Append1BE(bytes, t->GetState());
+  }
   JDWP::Append4BE(bytes, t->GetTid());
   JDWP::Append4BE(bytes, utime);
   JDWP::Append4BE(bytes, stime);
@@ -119,7 +142,7 @@
 static jbyteArray DdmVmInternal_getThreadStats(JNIEnv* env, jclass) {
   std::vector<uint8_t> bytes;
   {
-    ScopedThreadListLock thread_list_lock;
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
     ThreadList* thread_list = Runtime::Current()->GetThreadList();
 
     uint16_t thread_count = 0;
@@ -139,7 +162,8 @@
   return result;
 }
 
-static jint DdmVmInternal_heapInfoNotify(JNIEnv*, jclass, jint when) {
+static jint DdmVmInternal_heapInfoNotify(JNIEnv* env, jclass, jint when) {
+  ScopedObjectAccess soa(env);
   return Dbg::DdmHandleHpifChunk(static_cast<Dbg::HpifWhen>(when));
 }
 
diff --git a/src/native/sun_misc_Unsafe.cc b/src/native/sun_misc_Unsafe.cc
index dfddd86..282731d 100644
--- a/src/native/sun_misc_Unsafe.cc
+++ b/src/native/sun_misc_Unsafe.cc
@@ -16,34 +16,34 @@
 
 #include "jni_internal.h"
 #include "object.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 
 namespace art {
 
 static jlong Unsafe_objectFieldOffset0(JNIEnv* env, jclass, jobject javaField) {
   // TODO: move to Java code
   jfieldID fid = env->FromReflectedField(javaField);
-  ScopedJniThreadState ts(env);
-  Field* field = ts.DecodeField(fid);
+  ScopedObjectAccess soa(env);
+  Field* field = soa.DecodeField(fid);
   return field->GetOffset().Int32Value();
 }
 
 static jint Unsafe_arrayBaseOffset0(JNIEnv* env, jclass, jclass javaArrayClass) {
   // TODO: move to Java code
-  ScopedJniThreadState ts(env);
-  Class* array_class = ts.Decode<Class*>(javaArrayClass);
+  ScopedObjectAccess soa(env);
+  Class* array_class = soa.Decode<Class*>(javaArrayClass);
   return Array::DataOffset(array_class->GetComponentSize()).Int32Value();
 }
 
 static jint Unsafe_arrayIndexScale0(JNIEnv* env, jclass, jclass javaClass) {
-  ScopedJniThreadState ts(env);
-  Class* c = ts.Decode<Class*>(javaClass);
+  ScopedObjectAccess soa(env);
+  Class* c = soa.Decode<Class*>(javaClass);
   return c->GetComponentSize();
 }
 
 static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint expectedValue, jint newValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
   volatile int32_t* address = reinterpret_cast<volatile int32_t*>(raw_addr);
   // Note: android_atomic_release_cas() returns 0 on success, not failure.
@@ -52,8 +52,8 @@
 }
 
 static jboolean Unsafe_compareAndSwapLong(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong expectedValue, jlong newValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
   volatile int64_t* address = reinterpret_cast<volatile int64_t*>(raw_addr);
   // Note: android_atomic_cmpxchg() returns 0 on success, not failure.
@@ -62,10 +62,10 @@
 }
 
 static jboolean Unsafe_compareAndSwapObject(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaExpectedValue, jobject javaNewValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
-  Object* expectedValue = ts.Decode<Object*>(javaExpectedValue);
-  Object* newValue = ts.Decode<Object*>(javaNewValue);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
+  Object* expectedValue = soa.Decode<Object*>(javaExpectedValue);
+  Object* newValue = soa.Decode<Object*>(javaNewValue);
   byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
   int32_t* address = reinterpret_cast<int32_t*>(raw_addr);
   // Note: android_atomic_cmpxchg() returns 0 on success, not failure.
@@ -78,105 +78,105 @@
 }
 
 static jint Unsafe_getInt(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   return obj->GetField32(MemberOffset(offset), false);
 }
 
 static jint Unsafe_getIntVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
   volatile int32_t* address = reinterpret_cast<volatile int32_t*>(raw_addr);
   return android_atomic_acquire_load(address);
 }
 
 static void Unsafe_putInt(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint newValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   obj->SetField32(MemberOffset(offset), newValue, false);
 }
 
 static void Unsafe_putIntVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint newValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
   volatile int32_t* address = reinterpret_cast<volatile int32_t*>(raw_addr);
   android_atomic_release_store(newValue, address);
 }
 
 static void Unsafe_putOrderedInt(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint newValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   ANDROID_MEMBAR_STORE();
   obj->SetField32(MemberOffset(offset), newValue, false);
 }
 
 static jlong Unsafe_getLong(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
   int64_t* address = reinterpret_cast<int64_t*>(raw_addr);
   return *address;
 }
 
 static jlong Unsafe_getLongVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   return obj->GetField64(MemberOffset(offset), true);
 }
 
 static void Unsafe_putLong(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong newValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   obj->SetField64(MemberOffset(offset), newValue, false);
 }
 
 static void Unsafe_putLongVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong newValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   obj->SetField64(MemberOffset(offset), newValue, true);
 }
 
 static void Unsafe_putOrderedLong(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong newValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   ANDROID_MEMBAR_STORE();
   obj->SetField64(MemberOffset(offset), newValue, false);
 }
 
 static jobject Unsafe_getObjectVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   Object* value = obj->GetFieldObject<Object*>(MemberOffset(offset), true);
-  return ts.AddLocalReference<jobject>(value);
+  return soa.AddLocalReference<jobject>(value);
 }
 
 static jobject Unsafe_getObject(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
   Object* value = obj->GetFieldObject<Object*>(MemberOffset(offset), false);
-  return ts.AddLocalReference<jobject>(value);
+  return soa.AddLocalReference<jobject>(value);
 }
 
 static void Unsafe_putObject(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaNewValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
-  Object* newValue = ts.Decode<Object*>(javaNewValue);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
+  Object* newValue = soa.Decode<Object*>(javaNewValue);
   obj->SetFieldObject(MemberOffset(offset), newValue, false);
 }
 
 static void Unsafe_putObjectVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaNewValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
-  Object* newValue = ts.Decode<Object*>(javaNewValue);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
+  Object* newValue = soa.Decode<Object*>(javaNewValue);
   obj->SetFieldObject(MemberOffset(offset), newValue, true);
 }
 
 static void Unsafe_putOrderedObject(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaNewValue) {
-  ScopedJniThreadState ts(env);
-  Object* obj = ts.Decode<Object*>(javaObj);
-  Object* newValue = ts.Decode<Object*>(javaNewValue);
+  ScopedObjectAccess soa(env);
+  Object* obj = soa.Decode<Object*>(javaObj);
+  Object* newValue = soa.Decode<Object*>(javaNewValue);
   ANDROID_MEMBAR_STORE();
   obj->SetFieldObject(MemberOffset(offset), newValue, false);
 }
diff --git a/src/oat/jni/arm/calling_convention_arm.cc b/src/oat/jni/arm/calling_convention_arm.cc
index 75c0380..e06a583 100644
--- a/src/oat/jni/arm/calling_convention_arm.cc
+++ b/src/oat/jni/arm/calling_convention_arm.cc
@@ -53,48 +53,27 @@
   return ReturnRegisterForShorty(GetShorty());
 }
 
-// Managed runtime calling convention
+ManagedRegister ArmJniCallingConvention::IntReturnRegister() {
+  return ArmManagedRegister::FromCoreRegister(R0);
+}
 
-std::vector<ManagedRegister> ArmManagedRuntimeCallingConvention::entry_spills_;
+// Managed runtime calling convention
 
 ManagedRegister ArmManagedRuntimeCallingConvention::MethodRegister() {
   return ArmManagedRegister::FromCoreRegister(R0);
 }
 
 bool ArmManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
-  return itr_slots_ < 3;
+  return false;  // Everything moved to stack on entry.
 }
 
 bool ArmManagedRuntimeCallingConvention::IsCurrentParamOnStack() {
-  if (itr_slots_ < 2) {
-    return false;
-  } else if (itr_slots_ > 2) {
-    return true;
-  } else {
-    // handle funny case of a long/double straddling registers and the stack
-    return IsParamALongOrDouble(itr_args_);
-  }
+  return true;
 }
 
-static const Register kManagedArgumentRegisters[] = {
-  R1, R2, R3
-};
 ManagedRegister ArmManagedRuntimeCallingConvention::CurrentParamRegister() {
-  CHECK(IsCurrentParamInRegister());
-  if (IsParamALongOrDouble(itr_args_)) {
-    if (itr_slots_ == 0) {
-      return ArmManagedRegister::FromRegisterPair(R1_R2);
-    } else if (itr_slots_ == 1) {
-      return ArmManagedRegister::FromRegisterPair(R2_R3);
-    } else {
-      // This is a long/double split between registers and the stack
-      return ArmManagedRegister::FromCoreRegister(
-        kManagedArgumentRegisters[itr_slots_]);
-    }
-  } else {
-    return
-      ArmManagedRegister::FromCoreRegister(kManagedArgumentRegisters[itr_slots_]);
-  }
+  LOG(FATAL) << "Should not reach here";
+  return ManagedRegister::NoRegister();
 }
 
 FrameOffset ArmManagedRuntimeCallingConvention::CurrentParamStackOffset() {
@@ -103,15 +82,26 @@
       FrameOffset(displacement_.Int32Value() +   // displacement
                   kPointerSize +                 // Method*
                   (itr_slots_ * kPointerSize));  // offset into in args
-  if (itr_slots_ == 2) {
-    // the odd spanning case, bump the offset to skip the first half of the
-    // input which is in a register
-    CHECK(IsCurrentParamInRegister());
-    result = FrameOffset(result.Int32Value() + 4);
-  }
   return result;
 }
 
+const std::vector<ManagedRegister>& ArmManagedRuntimeCallingConvention::EntrySpills() {
+  // We spill the argument registers on ARM to free them up for scratch use, we then assume
+  // all arguments are on the stack.
+  if (entry_spills_.size() == 0) {
+    size_t num_spills = NumArgs() + NumLongOrDoubleArgs();
+    if (num_spills > 0) {
+      entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R1));
+      if (num_spills > 1) {
+        entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R2));
+        if (num_spills > 2) {
+          entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R3));
+        }
+      }
+    }
+  }
+  return entry_spills_;
+}
 // JNI calling convention
 
 ArmJniCallingConvention::ArmJniCallingConvention(bool is_static, bool is_synchronized,
@@ -165,11 +155,6 @@
                  kStackAlignment);
 }
 
-// Will reg be crushed by an outgoing argument?
-bool ArmJniCallingConvention::IsMethodRegisterClobberedPreCall() {
-  return true;  // The method register R0 is always clobbered by the JNIEnv
-}
-
 // JniCallingConvention ABI follows AAPCS where longs and doubles must occur
 // in even register numbers and stack slots
 void ArmJniCallingConvention::Next() {
diff --git a/src/oat/jni/arm/calling_convention_arm.h b/src/oat/jni/arm/calling_convention_arm.h
index 527ffa1..b536b6b 100644
--- a/src/oat/jni/arm/calling_convention_arm.h
+++ b/src/oat/jni/arm/calling_convention_arm.h
@@ -36,12 +36,10 @@
   virtual bool IsCurrentParamOnStack();
   virtual ManagedRegister CurrentParamRegister();
   virtual FrameOffset CurrentParamStackOffset();
-  virtual const std::vector<ManagedRegister>& EntrySpills() {
-    DCHECK(entry_spills_.empty());
-    return entry_spills_;
-  }
+  virtual const std::vector<ManagedRegister>& EntrySpills();
+
  private:
-  static std::vector<ManagedRegister> entry_spills_;
+  std::vector<ManagedRegister> entry_spills_;
 
   DISALLOW_COPY_AND_ASSIGN(ArmManagedRuntimeCallingConvention);
 };
@@ -52,6 +50,7 @@
   virtual ~ArmJniCallingConvention() {}
   // Calling convention
   virtual ManagedRegister ReturnRegister();
+  virtual ManagedRegister IntReturnRegister();
   virtual ManagedRegister InterproceduralScratchRegister();
   // JNI calling convention
   virtual void Next();  // Override default behavior for AAPCS
@@ -65,7 +64,6 @@
   virtual uint32_t FpSpillMask() const {
     return 0;  // Floats aren't spilled in JNI down call
   }
-  virtual bool IsMethodRegisterClobberedPreCall();
   virtual bool IsCurrentParamInRegister();
   virtual bool IsCurrentParamOnStack();
   virtual ManagedRegister CurrentParamRegister();
diff --git a/src/oat/jni/calling_convention.h b/src/oat/jni/calling_convention.h
index ae6c7ed..121d1f8 100644
--- a/src/oat/jni/calling_convention.h
+++ b/src/oat/jni/calling_convention.h
@@ -41,9 +41,9 @@
     return result;
   }
 
-  // Register that holds result of this method
+  // Register that holds result of this method invocation.
   virtual ManagedRegister ReturnRegister() = 0;
-  // Register reserved for scratch usage during procedure calls
+  // Register reserved for scratch usage during procedure calls.
   virtual ManagedRegister InterproceduralScratchRegister() = 0;
 
   // Offset of Method within the frame
@@ -224,6 +224,8 @@
   // Location where the return value of a call can be squirreled if another
   // call is made following the native call
   FrameOffset ReturnValueSaveLocation() const;
+  // Register that holds result if it is integer.
+  virtual ManagedRegister IntReturnRegister() = 0;
 
   // Callee save registers to spill prior to native code (which may clobber)
   virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const = 0;
@@ -232,10 +234,6 @@
   virtual uint32_t CoreSpillMask() const = 0;
   virtual uint32_t FpSpillMask() const = 0;
 
-  // Returns true if the method register will have been clobbered during argument
-  // set up
-  virtual bool IsMethodRegisterClobberedPreCall() = 0;
-
   // An extra scratch register live after the call
   virtual ManagedRegister ReturnScratchRegister() const = 0;
 
diff --git a/src/oat/jni/jni_compiler.cc b/src/oat/jni/jni_compiler.cc
index 4916072..a9a1bca 100644
--- a/src/oat/jni/jni_compiler.cc
+++ b/src/oat/jni/jni_compiler.cc
@@ -36,31 +36,336 @@
 
 namespace art {
 
-static void ChangeThreadState(Assembler* jni_asm, ThreadState new_state,
-                              ManagedRegister scratch, ManagedRegister return_reg,
-                              FrameOffset return_save_location,
-                              size_t return_size) {
-  /*
-   * This code mirrors that of Thread::SetState where detail is given on why
-   * barriers occur when they do.
-   */
-  if (new_state == kRunnable) {
-    /*
-     * Change our status to kRunnable.  The transition requires
-     * that we check for pending suspension, because the runtime considers
-     * us to be "asleep" in all other states, and another thread could
-     * be performing a GC now.
-     */
-    __ StoreImmediateToThread(Thread::StateOffset(), kRunnable, scratch);
-    __ MemoryBarrier(scratch);
-    __ SuspendPoll(scratch, return_reg, return_save_location, return_size);
-  } else {
-    /*
-     * Not changing to kRunnable. No additional work required.
-     */
-    __ MemoryBarrier(scratch);
-    __ StoreImmediateToThread(Thread::StateOffset(), new_state, scratch);
+static void CopyParameter(Assembler* jni_asm,
+                          ManagedRuntimeCallingConvention* mr_conv,
+                          JniCallingConvention* jni_conv,
+                          size_t frame_size, size_t out_arg_size);
+static void SetNativeParameter(Assembler* jni_asm,
+                               JniCallingConvention* jni_conv,
+                               ManagedRegister in_reg);
+
+// Generate the JNI bridge for the given method, general contract:
+// - Arguments are in the managed runtime format, either on stack or in
+//   registers, a reference to the method object is supplied as part of this
+//   convention.
+//
+CompiledMethod* ArtJniCompileMethodInternal(Compiler& compiler,
+                                            uint32_t access_flags, uint32_t method_idx,
+                                            const DexFile& dex_file) {
+  const bool is_native = (access_flags & kAccNative) != 0;
+  CHECK(is_native);
+  const bool is_static = (access_flags & kAccStatic) != 0;
+  const bool is_synchronized = (access_flags & kAccSynchronized) != 0;
+  const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
+  InstructionSet instruction_set = compiler.GetInstructionSet();
+  if (instruction_set == kThumb2) {
+    instruction_set = kArm;
   }
+  // Calling conventions used to iterate over parameters to method
+  UniquePtr<JniCallingConvention> jni_conv(
+      JniCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
+  UniquePtr<ManagedRuntimeCallingConvention> mr_conv(
+      ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
+
+  // Assembler that holds generated instructions
+  UniquePtr<Assembler> jni_asm(Assembler::Create(instruction_set));
+  bool should_disassemble = false;
+
+  // Offsets into data structures
+  // TODO: if cross compiling these offsets are for the host not the target
+  const Offset functions(OFFSETOF_MEMBER(JNIEnvExt, functions));
+  const Offset monitor_enter(OFFSETOF_MEMBER(JNINativeInterface, MonitorEnter));
+  const Offset monitor_exit(OFFSETOF_MEMBER(JNINativeInterface, MonitorExit));
+
+  // 1. Build the frame saving all callee saves
+  const size_t frame_size(jni_conv->FrameSize());
+  const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
+  __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
+
+  // 2. Set up the StackIndirectReferenceTable
+  mr_conv->ResetIterator(FrameOffset(frame_size));
+  jni_conv->ResetIterator(FrameOffset(0));
+  __ StoreImmediateToFrame(jni_conv->SirtNumRefsOffset(),
+                           jni_conv->ReferenceCount(),
+                           mr_conv->InterproceduralScratchRegister());
+  __ CopyRawPtrFromThread(jni_conv->SirtLinkOffset(),
+                          Thread::TopSirtOffset(),
+                          mr_conv->InterproceduralScratchRegister());
+  __ StoreStackOffsetToThread(Thread::TopSirtOffset(),
+                              jni_conv->SirtOffset(),
+                              mr_conv->InterproceduralScratchRegister());
+
+  // 3. Place incoming reference arguments into SIRT
+  jni_conv->Next();  // Skip JNIEnv*
+  // 3.5. Create Class argument for static methods out of passed method
+  if (is_static) {
+    FrameOffset sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
+    // Check sirt offset is within frame
+    CHECK_LT(sirt_offset.Uint32Value(), frame_size);
+    __ LoadRef(jni_conv->InterproceduralScratchRegister(),
+               mr_conv->MethodRegister(), Method::DeclaringClassOffset());
+    __ VerifyObject(jni_conv->InterproceduralScratchRegister(), false);
+    __ StoreRef(sirt_offset, jni_conv->InterproceduralScratchRegister());
+    jni_conv->Next();  // in SIRT so move to next argument
+  }
+  while (mr_conv->HasNext()) {
+    CHECK(jni_conv->HasNext());
+    bool ref_param = jni_conv->IsCurrentParamAReference();
+    CHECK(!ref_param || mr_conv->IsCurrentParamAReference());
+    // References need placing in SIRT and the entry value passing
+    if (ref_param) {
+      // Compute SIRT entry, note null is placed in the SIRT but its boxed value
+      // must be NULL
+      FrameOffset sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
+      // Check SIRT offset is within frame and doesn't run into the saved segment state
+      CHECK_LT(sirt_offset.Uint32Value(), frame_size);
+      CHECK_NE(sirt_offset.Uint32Value(),
+               jni_conv->SavedLocalReferenceCookieOffset().Uint32Value());
+      bool input_in_reg = mr_conv->IsCurrentParamInRegister();
+      bool input_on_stack = mr_conv->IsCurrentParamOnStack();
+      CHECK(input_in_reg || input_on_stack);
+
+      if (input_in_reg) {
+        ManagedRegister in_reg  =  mr_conv->CurrentParamRegister();
+        __ VerifyObject(in_reg, mr_conv->IsCurrentArgPossiblyNull());
+        __ StoreRef(sirt_offset, in_reg);
+      } else if (input_on_stack) {
+        FrameOffset in_off  = mr_conv->CurrentParamStackOffset();
+        __ VerifyObject(in_off, mr_conv->IsCurrentArgPossiblyNull());
+        __ CopyRef(sirt_offset, in_off,
+                   mr_conv->InterproceduralScratchRegister());
+      }
+    }
+    mr_conv->Next();
+    jni_conv->Next();
+  }
+
+  // 4. Write out the end of the quick frames.
+  __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset());
+  __ StoreImmediateToThread(Thread::TopOfManagedStackPcOffset(), 0,
+                            mr_conv->InterproceduralScratchRegister());
+
+  // 5. Move frame down to allow space for out going args.
+  const size_t out_arg_size = jni_conv->OutArgSize();
+  __ IncreaseFrameSize(out_arg_size);
+
+
+  // 6. Call into appropriate JniMethodStart passing Thread* so that transition out of Runnable
+  //    can occur. The result is the saved JNI local state that is restored by the exit call. We
+  //    abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
+  //    arguments.
+  uintptr_t jni_start = is_synchronized ? ENTRYPOINT_OFFSET(pJniMethodStartSynchronized)
+                                        : ENTRYPOINT_OFFSET(pJniMethodStart);
+  jni_conv->ResetIterator(FrameOffset(out_arg_size));
+  FrameOffset locked_object_sirt_offset(0);
+  if (is_synchronized) {
+    // Pass object for locking.
+    jni_conv->Next();  // Skip JNIEnv.
+    locked_object_sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
+    jni_conv->ResetIterator(FrameOffset(out_arg_size));
+    if (jni_conv->IsCurrentParamOnStack()) {
+      FrameOffset out_off = jni_conv->CurrentParamStackOffset();
+      __ CreateSirtEntry(out_off, locked_object_sirt_offset,
+                         mr_conv->InterproceduralScratchRegister(),
+                         false);
+    } else {
+      ManagedRegister out_reg = jni_conv->CurrentParamRegister();
+      __ CreateSirtEntry(out_reg, locked_object_sirt_offset,
+                         ManagedRegister::NoRegister(), false);
+    }
+    jni_conv->Next();
+  }
+  if (jni_conv->IsCurrentParamInRegister()) {
+    __ GetCurrentThread(jni_conv->CurrentParamRegister());
+    __ Call(jni_conv->CurrentParamRegister(), Offset(jni_start),
+            jni_conv->InterproceduralScratchRegister());
+  } else {
+    __ GetCurrentThread(jni_conv->CurrentParamStackOffset(),
+                        jni_conv->InterproceduralScratchRegister());
+    __ Call(ThreadOffset(jni_start), jni_conv->InterproceduralScratchRegister());
+  }
+  if (is_synchronized) {  // Check for exceptions from monitor enter.
+    __ ExceptionPoll(jni_conv->InterproceduralScratchRegister(), out_arg_size);
+  }
+  FrameOffset saved_cookie_offset = jni_conv->SavedLocalReferenceCookieOffset();
+  __ Store(saved_cookie_offset, jni_conv->IntReturnRegister(), 4);
+
+  // 7. Iterate over arguments placing values from managed calling convention in
+  //    to the convention required for a native call (shuffling). For references
+  //    place an index/pointer to the reference after checking whether it is
+  //    NULL (which must be encoded as NULL).
+  //    Note: we do this prior to materializing the JNIEnv* and static's jclass to
+  //    give as many free registers for the shuffle as possible
+  mr_conv->ResetIterator(FrameOffset(frame_size+out_arg_size));
+  uint32_t args_count = 0;
+  while (mr_conv->HasNext()) {
+    args_count++;
+    mr_conv->Next();
+  }
+
+  // Do a backward pass over arguments, so that the generated code will be "mov
+  // R2, R3; mov R1, R2" instead of "mov R1, R2; mov R2, R3."
+  // TODO: A reverse iterator to improve readability.
+  for (uint32_t i = 0; i < args_count; ++i) {
+    mr_conv->ResetIterator(FrameOffset(frame_size + out_arg_size));
+    jni_conv->ResetIterator(FrameOffset(out_arg_size));
+    jni_conv->Next();  // Skip JNIEnv*.
+    if (is_static) {
+      jni_conv->Next();  // Skip Class for now.
+    }
+    // Skip to the argument we're interested in.
+    for (uint32_t j = 0; j < args_count - i - 1; ++j) {
+      mr_conv->Next();
+      jni_conv->Next();
+    }
+    CopyParameter(jni_asm.get(), mr_conv.get(), jni_conv.get(), frame_size, out_arg_size);
+  }
+  if (is_static) {
+    // Create argument for Class
+    mr_conv->ResetIterator(FrameOffset(frame_size+out_arg_size));
+    jni_conv->ResetIterator(FrameOffset(out_arg_size));
+    jni_conv->Next();  // Skip JNIEnv*
+    FrameOffset sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
+    if (jni_conv->IsCurrentParamOnStack()) {
+      FrameOffset out_off = jni_conv->CurrentParamStackOffset();
+      __ CreateSirtEntry(out_off, sirt_offset,
+                         mr_conv->InterproceduralScratchRegister(),
+                         false);
+    } else {
+      ManagedRegister out_reg = jni_conv->CurrentParamRegister();
+      __ CreateSirtEntry(out_reg, sirt_offset,
+                         ManagedRegister::NoRegister(), false);
+    }
+  }
+
+  // 8. Create 1st argument, the JNI environment ptr.
+  jni_conv->ResetIterator(FrameOffset(out_arg_size));
+  // Register that will hold local indirect reference table
+  if (jni_conv->IsCurrentParamInRegister()) {
+    ManagedRegister jni_env = jni_conv->CurrentParamRegister();
+    DCHECK(!jni_env.Equals(jni_conv->InterproceduralScratchRegister()));
+    __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset());
+  } else {
+    FrameOffset jni_env = jni_conv->CurrentParamStackOffset();
+    __ CopyRawPtrFromThread(jni_env, Thread::JniEnvOffset(),
+                            jni_conv->InterproceduralScratchRegister());
+  }
+
+  // 9. Plant call to native code associated with method.
+  __ Call(jni_conv->MethodStackOffset(), Method::NativeMethodOffset(),
+          mr_conv->InterproceduralScratchRegister());
+
+  // 10. Fix differences in result widths.
+  if (instruction_set == kX86) {
+    if (jni_conv->GetReturnType() == Primitive::kPrimByte ||
+        jni_conv->GetReturnType() == Primitive::kPrimShort) {
+      __ SignExtend(jni_conv->ReturnRegister(),
+                    Primitive::ComponentSize(jni_conv->GetReturnType()));
+    } else if (jni_conv->GetReturnType() == Primitive::kPrimBoolean ||
+               jni_conv->GetReturnType() == Primitive::kPrimChar) {
+      __ ZeroExtend(jni_conv->ReturnRegister(),
+                    Primitive::ComponentSize(jni_conv->GetReturnType()));
+    }
+  }
+
+  // 11. Save return value
+  bool reference_return = jni_conv->IsReturnAReference();
+  FrameOffset return_save_location = jni_conv->ReturnValueSaveLocation();
+  if (jni_conv->SizeOfReturnValue() != 0 && !reference_return) {
+    CHECK_LT(return_save_location.Uint32Value(), frame_size+out_arg_size);
+    __ Store(return_save_location, jni_conv->ReturnRegister(), jni_conv->SizeOfReturnValue());
+  }
+
+  // 12. Call into JNI method end possibly passing a returned reference, the method and the current
+  //     thread.
+  {
+    // Modify iterator for call, important offsets were saved above.
+    size_t jni_end_arg_count = 0;
+    if (reference_return) { jni_end_arg_count++; }
+    if (is_synchronized) { jni_end_arg_count++; }
+    const char* jni_end_shorty = jni_end_arg_count == 0 ? "I"
+                                                        : (jni_end_arg_count == 1 ? "II" : "III");
+    jni_conv.reset(JniCallingConvention::Create(is_static, is_synchronized, jni_end_shorty,
+                                                instruction_set));
+    // Ensure out arguments will fit in space taken before (we expect this due to stack alignment).
+    size_t jni_end_out_arg_size = jni_conv->OutArgSize();
+    CHECK_LE(jni_end_out_arg_size, out_arg_size);
+    jni_conv->ResetIterator(FrameOffset(jni_end_out_arg_size));
+  }
+  uintptr_t jni_end;
+  if (reference_return) {
+    // Pass result.
+    jni_end = is_synchronized ? ENTRYPOINT_OFFSET(pJniMethodEndWithReferenceSynchronized)
+                              : ENTRYPOINT_OFFSET(pJniMethodEndWithReference);
+    SetNativeParameter(jni_asm.get(), jni_conv.get(), jni_conv->ReturnRegister());
+    jni_conv->Next();
+  } else {
+    jni_end = is_synchronized ? ENTRYPOINT_OFFSET(pJniMethodEndSynchronized)
+                              : ENTRYPOINT_OFFSET(pJniMethodEnd);
+  }
+  // Pass saved local reference state.
+  if (jni_conv->IsCurrentParamOnStack()) {
+    FrameOffset out_off = jni_conv->CurrentParamStackOffset();
+    __ Copy(out_off, saved_cookie_offset, jni_conv->InterproceduralScratchRegister(), 4);
+  } else {
+    ManagedRegister out_reg = jni_conv->CurrentParamRegister();
+    __ Load(out_reg, saved_cookie_offset, 4);
+  }
+  jni_conv->Next();
+  if (is_synchronized) {
+    // Pass object for unlocking.
+    if (jni_conv->IsCurrentParamOnStack()) {
+      FrameOffset out_off = jni_conv->CurrentParamStackOffset();
+      __ CreateSirtEntry(out_off, locked_object_sirt_offset,
+                         jni_conv->InterproceduralScratchRegister(),
+                         false);
+    } else {
+      ManagedRegister out_reg = jni_conv->CurrentParamRegister();
+      __ CreateSirtEntry(out_reg, locked_object_sirt_offset,
+                         ManagedRegister::NoRegister(), false);
+    }
+    jni_conv->Next();
+  }
+  if (jni_conv->IsCurrentParamInRegister()) {
+    __ GetCurrentThread(jni_conv->CurrentParamRegister());
+    __ Call(jni_conv->CurrentParamRegister(), Offset(jni_end),
+            jni_conv->InterproceduralScratchRegister());
+  } else {
+    __ GetCurrentThread(jni_conv->CurrentParamStackOffset(),
+                        jni_conv->InterproceduralScratchRegister());
+    __ Call(ThreadOffset(jni_end), jni_conv->InterproceduralScratchRegister());
+  }
+
+  // 13. Reload return value
+  if (jni_conv->SizeOfReturnValue() != 0 && !reference_return) {
+    __ Load(mr_conv->ReturnRegister(), return_save_location, mr_conv->SizeOfReturnValue());
+  }
+
+  // 14. Move frame up now we're done with the out arg space.
+  __ DecreaseFrameSize(out_arg_size);
+
+  // 15. Process pending exceptions from JNI call or monitor exit.
+  __ ExceptionPoll(jni_conv->InterproceduralScratchRegister(), 0);
+
+  // 16. Remove activation - no need to restore callee save registers because we didn't clobber
+  //     them.
+  __ RemoveFrame(frame_size, std::vector<ManagedRegister>());
+
+  // 17. Finalize code generation
+  __ EmitSlowPaths();
+  size_t cs = __ CodeSize();
+  std::vector<uint8_t> managed_code(cs);
+  MemoryRegion code(&managed_code[0], managed_code.size());
+  __ FinalizeInstructions(code);
+  if (should_disassemble) {
+    UniquePtr<Disassembler> disassembler(Disassembler::Create(instruction_set));
+    disassembler->Dump(LOG(INFO), &managed_code[0], &managed_code[managed_code.size()]);
+  }
+  return new CompiledMethod(instruction_set,
+                            managed_code,
+                            frame_size,
+                            jni_conv->CoreSpillMask(),
+                            jni_conv->FpSpillMask());
 }
 
 // Copy a single parameter from the managed to the JNI calling convention
@@ -166,441 +471,6 @@
   }
 }
 
-static bool IsRegisterPair(InstructionSet instruction_set, ManagedRegister r) {
-  return ((instruction_set == kArm && r.AsArm().IsRegisterPair()) ||
-          (instruction_set == kX86 && r.AsX86().IsRegisterPair()));
-}
-
-// Generate the JNI bridge for the given method, general contract:
-// - Arguments are in the managed runtime format, either on stack or in
-//   registers, a reference to the method object is supplied as part of this
-//   convention.
-//
-CompiledMethod* ArtJniCompileMethodInternal(Compiler& compiler,
-                                            uint32_t access_flags, uint32_t method_idx,
-                                            const DexFile& dex_file) {
-  const bool is_native = (access_flags & kAccNative) != 0;
-  CHECK(is_native);
-  const bool is_static = (access_flags & kAccStatic) != 0;
-  const bool is_synchronized = (access_flags & kAccSynchronized) != 0;
-  const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx));
-  InstructionSet instruction_set = compiler.GetInstructionSet();
-  if (instruction_set == kThumb2) {
-    instruction_set = kArm;
-  }
-  // Calling conventions used to iterate over parameters to method
-  UniquePtr<JniCallingConvention> jni_conv(
-      JniCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
-  UniquePtr<ManagedRuntimeCallingConvention> mr_conv(
-      ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, instruction_set));
-
-  // Assembler that holds generated instructions
-  UniquePtr<Assembler> jni_asm(Assembler::Create(instruction_set));
-  bool should_disassemble = false;
-
-  // Offsets into data structures
-  // TODO: if cross compiling these offsets are for the host not the target
-  const Offset functions(OFFSETOF_MEMBER(JNIEnvExt, functions));
-  const Offset monitor_enter(OFFSETOF_MEMBER(JNINativeInterface, MonitorEnter));
-  const Offset monitor_exit(OFFSETOF_MEMBER(JNINativeInterface, MonitorExit));
-
-  // 1. Build the frame saving all callee saves
-  const size_t frame_size(jni_conv->FrameSize());
-  const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
-  __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
-
-  // 2. Set up the StackIndirectReferenceTable
-  mr_conv->ResetIterator(FrameOffset(frame_size));
-  jni_conv->ResetIterator(FrameOffset(0));
-  __ StoreImmediateToFrame(jni_conv->SirtNumRefsOffset(),
-                           jni_conv->ReferenceCount(),
-                           mr_conv->InterproceduralScratchRegister());
-  __ CopyRawPtrFromThread(jni_conv->SirtLinkOffset(),
-                          Thread::TopSirtOffset(),
-                          mr_conv->InterproceduralScratchRegister());
-  __ StoreStackOffsetToThread(Thread::TopSirtOffset(),
-                              jni_conv->SirtOffset(),
-                              mr_conv->InterproceduralScratchRegister());
-
-  // 3. Place incoming reference arguments into SIRT
-  jni_conv->Next();  // Skip JNIEnv*
-  // 3.5. Create Class argument for static methods out of passed method
-  if (is_static) {
-    FrameOffset sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
-    // Check sirt offset is within frame
-    CHECK_LT(sirt_offset.Uint32Value(), frame_size);
-    __ LoadRef(jni_conv->InterproceduralScratchRegister(),
-               mr_conv->MethodRegister(), Method::DeclaringClassOffset());
-    __ VerifyObject(jni_conv->InterproceduralScratchRegister(), false);
-    __ StoreRef(sirt_offset, jni_conv->InterproceduralScratchRegister());
-    jni_conv->Next();  // in SIRT so move to next argument
-  }
-  while (mr_conv->HasNext()) {
-    CHECK(jni_conv->HasNext());
-    bool ref_param = jni_conv->IsCurrentParamAReference();
-    CHECK(!ref_param || mr_conv->IsCurrentParamAReference());
-    // References need placing in SIRT and the entry value passing
-    if (ref_param) {
-      // Compute SIRT entry, note null is placed in the SIRT but its boxed value
-      // must be NULL
-      FrameOffset sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
-      // Check SIRT offset is within frame and doesn't run into the saved segment state
-      CHECK_LT(sirt_offset.Uint32Value(), frame_size);
-      CHECK_NE(sirt_offset.Uint32Value(),
-               jni_conv->SavedLocalReferenceCookieOffset().Uint32Value());
-      bool input_in_reg = mr_conv->IsCurrentParamInRegister();
-      bool input_on_stack = mr_conv->IsCurrentParamOnStack();
-      CHECK(input_in_reg || input_on_stack);
-
-      if (input_in_reg) {
-        ManagedRegister in_reg  =  mr_conv->CurrentParamRegister();
-        __ VerifyObject(in_reg, mr_conv->IsCurrentArgPossiblyNull());
-        __ StoreRef(sirt_offset, in_reg);
-      } else if (input_on_stack) {
-        FrameOffset in_off  = mr_conv->CurrentParamStackOffset();
-        __ VerifyObject(in_off, mr_conv->IsCurrentArgPossiblyNull());
-        __ CopyRef(sirt_offset, in_off,
-                   mr_conv->InterproceduralScratchRegister());
-      }
-    }
-    mr_conv->Next();
-    jni_conv->Next();
-  }
-
-  // 4. Transition from being in managed to native code. Save the top_of_managed_stack_
-  // so that the managed stack can be crawled while in native code. Clear the corresponding
-  // PC value that has no meaning for the this frame.
-  __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset());
-  __ StoreImmediateToThread(Thread::TopOfManagedStackPcOffset(), 0,
-                            mr_conv->InterproceduralScratchRegister());
-  ChangeThreadState(jni_asm.get(), kNative,
-                    mr_conv->InterproceduralScratchRegister(),
-                    ManagedRegister::NoRegister(), FrameOffset(0), 0);
-
-  // 5. Move frame down to allow space for out going args. Do for as short a
-  //    time as possible to aid profiling..
-  const size_t out_arg_size = jni_conv->OutArgSize();
-  __ IncreaseFrameSize(out_arg_size);
-
-  // 6. Acquire lock for synchronized methods.
-  if (is_synchronized) {
-    // Compute arguments in registers to preserve
-    mr_conv->ResetIterator(FrameOffset(frame_size + out_arg_size));
-    std::vector<ManagedRegister> live_argument_regs;
-    std::vector<size_t> live_argument_regs_size;
-    while (mr_conv->HasNext()) {
-      if (mr_conv->IsCurrentParamInRegister()) {
-        live_argument_regs.push_back(mr_conv->CurrentParamRegister());
-        live_argument_regs_size.push_back(mr_conv->CurrentParamSize());
-      }
-      mr_conv->Next();
-    }
-
-    // Copy arguments to preserve to callee save registers
-    CHECK_LE(live_argument_regs.size(), callee_save_regs.size());
-    for (size_t in = 0, out = 0; in < live_argument_regs.size(); ++in) {
-      size_t size = live_argument_regs_size.at(in);
-      if (IsRegisterPair(instruction_set, live_argument_regs.at(in))) {
-        CHECK_EQ(instruction_set, kArm);
-        arm::ArmManagedRegister pair(live_argument_regs.at(in).AsArm());
-        arm::Register lo(pair.AsRegisterPairLow());
-        arm::Register hi(pair.AsRegisterPairHigh());
-        __ Move(callee_save_regs.at(out++), arm::ArmManagedRegister::FromCoreRegister(lo), size / 2);
-        __ Move(callee_save_regs.at(out++), arm::ArmManagedRegister::FromCoreRegister(hi), size / 2);
-      } else {
-        __ Move(callee_save_regs.at(out++), live_argument_regs.at(in), size);
-      }
-    }
-
-    // Get SIRT entry for 1st argument (jclass or this) to be 1st argument to
-    // monitor enter
-    mr_conv->ResetIterator(FrameOffset(frame_size + out_arg_size));
-    jni_conv->ResetIterator(FrameOffset(out_arg_size));
-    jni_conv->Next();  // Skip JNIEnv*
-    if (is_static) {
-      FrameOffset sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
-      if (jni_conv->IsCurrentParamOnStack()) {
-        FrameOffset out_off = jni_conv->CurrentParamStackOffset();
-        __ CreateSirtEntry(out_off, sirt_offset,
-                           mr_conv->InterproceduralScratchRegister(),
-                           false);
-      } else {
-        ManagedRegister out_reg = jni_conv->CurrentParamRegister();
-        __ CreateSirtEntry(out_reg, sirt_offset,
-                           ManagedRegister::NoRegister(), false);
-      }
-    } else {
-      CopyParameter(jni_asm.get(), mr_conv.get(), jni_conv.get(), frame_size,
-                    out_arg_size);
-    }
-
-    // Generate JNIEnv* in place and leave a copy in jni_fns_register
-    jni_conv->ResetIterator(FrameOffset(out_arg_size));
-    ManagedRegister jni_fns_register =
-        jni_conv->InterproceduralScratchRegister();
-    __ LoadRawPtrFromThread(jni_fns_register, Thread::JniEnvOffset());
-    SetNativeParameter(jni_asm.get(), jni_conv.get(), jni_fns_register);
-
-    // Call JNIEnv->MonitorEnter(object)
-    __ LoadRawPtr(jni_fns_register, jni_fns_register, functions);
-    __ Call(jni_fns_register, monitor_enter,
-                  jni_conv->InterproceduralScratchRegister());
-
-    // Check for exceptions
-    __ ExceptionPoll(jni_conv->InterproceduralScratchRegister());
-
-    // Restore live arguments
-    for (size_t in = 0, out = 0; out < live_argument_regs.size(); ++out) {
-      size_t size = live_argument_regs_size.at(out);
-      if (IsRegisterPair(instruction_set, live_argument_regs.at(out))) {
-        CHECK_EQ(instruction_set, kArm);
-        arm::ArmManagedRegister pair(live_argument_regs.at(out).AsArm());
-        arm::Register lo(pair.AsRegisterPairLow());
-        arm::Register hi(pair.AsRegisterPairHigh());
-        __ Move(arm::ArmManagedRegister::FromCoreRegister(lo), callee_save_regs.at(in++), size / 2);
-        __ Move(arm::ArmManagedRegister::FromCoreRegister(hi), callee_save_regs.at(in++), size / 2);
-      } else {
-        __ Move(live_argument_regs.at(out), callee_save_regs.at(in++), size);
-      }
-    }
-  }
-
-  // 7. Iterate over arguments placing values from managed calling convention in
-  //    to the convention required for a native call (shuffling). For references
-  //    place an index/pointer to the reference after checking whether it is
-  //    NULL (which must be encoded as NULL).
-  //    Note: we do this prior to materializing the JNIEnv* and static's jclass to
-  //    give as many free registers for the shuffle as possible
-  mr_conv->ResetIterator(FrameOffset(frame_size+out_arg_size));
-  uint32_t args_count = 0;
-  while (mr_conv->HasNext()) {
-    args_count++;
-    mr_conv->Next();
-  }
-
-  // Do a backward pass over arguments, so that the generated code will be "mov
-  // R2, R3; mov R1, R2" instead of "mov R1, R2; mov R2, R3."
-  // TODO: A reverse iterator to improve readability.
-  for (uint32_t i = 0; i < args_count; ++i) {
-    mr_conv->ResetIterator(FrameOffset(frame_size + out_arg_size));
-    jni_conv->ResetIterator(FrameOffset(out_arg_size));
-    jni_conv->Next();  // Skip JNIEnv*
-    if (is_static) {
-      jni_conv->Next();  // Skip Class for now
-    }
-    for (uint32_t j = 0; j < args_count - i - 1; ++j) {
-      mr_conv->Next();
-      jni_conv->Next();
-    }
-    CopyParameter(jni_asm.get(), mr_conv.get(), jni_conv.get(), frame_size, out_arg_size);
-  }
-
-  if (is_static) {
-    // Create argument for Class
-    mr_conv->ResetIterator(FrameOffset(frame_size+out_arg_size));
-    jni_conv->ResetIterator(FrameOffset(out_arg_size));
-    jni_conv->Next();  // Skip JNIEnv*
-    FrameOffset sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
-    if (jni_conv->IsCurrentParamOnStack()) {
-      FrameOffset out_off = jni_conv->CurrentParamStackOffset();
-      __ CreateSirtEntry(out_off, sirt_offset,
-                         mr_conv->InterproceduralScratchRegister(),
-                         false);
-    } else {
-      ManagedRegister out_reg = jni_conv->CurrentParamRegister();
-      __ CreateSirtEntry(out_reg, sirt_offset,
-                         ManagedRegister::NoRegister(), false);
-    }
-  }
-  // 8. Create 1st argument, the JNI environment ptr and save the top of the local reference table
-  jni_conv->ResetIterator(FrameOffset(out_arg_size));
-  // Register that will hold local indirect reference table
-  if (jni_conv->IsCurrentParamInRegister()) {
-    ManagedRegister jni_env = jni_conv->CurrentParamRegister();
-    DCHECK(!jni_env.Equals(jni_conv->InterproceduralScratchRegister()));
-    __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset());
-    // Frame[saved_local_ref_cookie_offset] = env->local_ref_cookie
-    __ Copy(jni_conv->SavedLocalReferenceCookieOffset(),
-            jni_env, JNIEnvExt::LocalRefCookieOffset(),
-            jni_conv->InterproceduralScratchRegister(), 4);
-    // env->local_ref_cookie = env->locals.segment_state
-    __ Copy(jni_env, JNIEnvExt::LocalRefCookieOffset(),
-            jni_env, JNIEnvExt::SegmentStateOffset(),
-            jni_conv->InterproceduralScratchRegister(), 4);
-  } else {
-    FrameOffset jni_env = jni_conv->CurrentParamStackOffset();
-    __ CopyRawPtrFromThread(jni_env, Thread::JniEnvOffset(),
-                            jni_conv->InterproceduralScratchRegister());
-    // Frame[saved_local_ref_cookie_offset] = env->local_ref_cookie
-    __ Copy(jni_conv->SavedLocalReferenceCookieOffset(),
-            jni_env, JNIEnvExt::LocalRefCookieOffset(),
-            jni_conv->InterproceduralScratchRegister(), 4);
-    // env->local_ref_cookie = env->locals.segment_state
-    __ Copy(jni_env, JNIEnvExt::LocalRefCookieOffset(),
-            jni_env, JNIEnvExt::SegmentStateOffset(),
-            jni_conv->InterproceduralScratchRegister(), 4);
-  }
-
-  // 9. Plant call to native code associated with method
-  if (!jni_conv->IsMethodRegisterClobberedPreCall()) {
-    // Method register shouldn't have been crushed by setting up outgoing
-    // arguments
-    __ Call(mr_conv->MethodRegister(), Method::NativeMethodOffset(),
-            mr_conv->InterproceduralScratchRegister());
-  } else {
-    __ Call(jni_conv->MethodStackOffset(), Method::NativeMethodOffset(),
-            mr_conv->InterproceduralScratchRegister());
-  }
-
-  // 10. Release lock for synchronized methods.
-  if (is_synchronized) {
-    mr_conv->ResetIterator(FrameOffset(frame_size+out_arg_size));
-    jni_conv->ResetIterator(FrameOffset(out_arg_size));
-    jni_conv->Next();  // Skip JNIEnv*
-    // Save return value
-    FrameOffset return_save_location = jni_conv->ReturnValueSaveLocation();
-    if (jni_conv->SizeOfReturnValue() != 0) {
-      FrameOffset return_save_location = jni_conv->ReturnValueSaveLocation();
-      CHECK_LT(return_save_location.Uint32Value(), frame_size+out_arg_size);
-      __ Store(return_save_location, jni_conv->ReturnRegister(),
-               jni_conv->SizeOfReturnValue());
-    }
-    // Get SIRT entry for 1st argument
-    if (is_static) {
-      FrameOffset sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
-      if (jni_conv->IsCurrentParamOnStack()) {
-        FrameOffset out_off = jni_conv->CurrentParamStackOffset();
-        __ CreateSirtEntry(out_off, sirt_offset,
-                           mr_conv->InterproceduralScratchRegister(),
-                           false);
-      } else {
-        ManagedRegister out_reg = jni_conv->CurrentParamRegister();
-        __ CreateSirtEntry(out_reg, sirt_offset,
-                           ManagedRegister::NoRegister(), false);
-      }
-    } else {
-      CopyParameter(jni_asm.get(), mr_conv.get(), jni_conv.get(), frame_size,
-                    out_arg_size);
-    }
-    // Generate JNIEnv* in place and leave a copy in jni_env_register
-    jni_conv->ResetIterator(FrameOffset(out_arg_size));
-    ManagedRegister jni_env_register =
-        jni_conv->InterproceduralScratchRegister();
-    __ LoadRawPtrFromThread(jni_env_register, Thread::JniEnvOffset());
-    SetNativeParameter(jni_asm.get(), jni_conv.get(), jni_env_register);
-    // Call JNIEnv->MonitorExit(object)
-    __ LoadRawPtr(jni_env_register, jni_env_register, functions);
-    __ Call(jni_env_register, monitor_exit,
-            jni_conv->InterproceduralScratchRegister());
-    // Reload return value
-    if (jni_conv->SizeOfReturnValue() != 0) {
-      __ Load(jni_conv->ReturnRegister(), return_save_location,
-              jni_conv->SizeOfReturnValue());
-    }
-  }
-
-  // 11. Release outgoing argument area
-  __ DecreaseFrameSize(out_arg_size);
-  mr_conv->ResetIterator(FrameOffset(frame_size));
-  jni_conv->ResetIterator(FrameOffset(0));
-
-  // 12. Transition from being in native to managed code, possibly entering a
-  //     safepoint
-  // Don't clobber result
-  CHECK(!jni_conv->InterproceduralScratchRegister().Equals(jni_conv->ReturnRegister()));
-  // Location to preserve result on slow path, ensuring its within the frame
-  FrameOffset return_save_location = jni_conv->ReturnValueSaveLocation();
-  CHECK(return_save_location.Uint32Value() < frame_size ||
-        jni_conv->SizeOfReturnValue() == 0);
-  ChangeThreadState(jni_asm.get(), kRunnable,
-                    jni_conv->InterproceduralScratchRegister(),
-                    jni_conv->ReturnRegister(), return_save_location,
-                    jni_conv->SizeOfReturnValue());
-
-  // 13. Place result in correct register possibly loading from indirect
-  //     reference table
-  if (jni_conv->IsReturnAReference()) {
-    __ IncreaseFrameSize(out_arg_size);
-    jni_conv->ResetIterator(FrameOffset(out_arg_size));
-
-    jni_conv->Next();  // Skip Thread* argument
-    // Pass result as arg2
-    SetNativeParameter(jni_asm.get(), jni_conv.get(),
-                       jni_conv->ReturnRegister());
-
-    // Pass Thread*
-    jni_conv->ResetIterator(FrameOffset(out_arg_size));
-    if (jni_conv->IsCurrentParamInRegister()) {
-      __ GetCurrentThread(jni_conv->CurrentParamRegister());
-      __ Call(jni_conv->CurrentParamRegister(),
-              Offset(ENTRYPOINT_OFFSET(pDecodeJObjectInThread)),
-              jni_conv->InterproceduralScratchRegister());
-    } else {
-      __ GetCurrentThread(jni_conv->CurrentParamStackOffset(),
-                          jni_conv->InterproceduralScratchRegister());
-      __ Call(ThreadOffset(ENTRYPOINT_OFFSET(pDecodeJObjectInThread)),
-              jni_conv->InterproceduralScratchRegister());
-    }
-
-    __ DecreaseFrameSize(out_arg_size);
-    jni_conv->ResetIterator(FrameOffset(0));
-  } else if (instruction_set == kX86) {
-    if (jni_conv->GetReturnType() == Primitive::kPrimByte ||
-        jni_conv->GetReturnType() == Primitive::kPrimShort) {
-      __ SignExtend(jni_conv->ReturnRegister(), Primitive::ComponentSize(jni_conv->GetReturnType()));
-    } else if (jni_conv->GetReturnType() == Primitive::kPrimBoolean ||
-               jni_conv->GetReturnType() == Primitive::kPrimChar) {
-      __ ZeroExtend(jni_conv->ReturnRegister(), Primitive::ComponentSize(jni_conv->GetReturnType()));
-    }
-  }
-  DCHECK_EQ(mr_conv->SizeOfReturnValue(), jni_conv->SizeOfReturnValue());
-  __ Move(mr_conv->ReturnRegister(), jni_conv->ReturnRegister(), mr_conv->SizeOfReturnValue());
-
-  // 14. Restore segment state and remove SIRT from thread
-  {
-    ManagedRegister jni_env = jni_conv->InterproceduralScratchRegister();
-    __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset());
-    // env->locals.segment_state = env->local_ref_cookie
-    __ Copy(jni_env, JNIEnvExt::SegmentStateOffset(),
-            jni_env, JNIEnvExt::LocalRefCookieOffset(),
-            jni_conv->ReturnScratchRegister(), 4);
-    // env->local_ref_cookie = Frame[saved_local_ref_cookie_offset]
-    __ Copy(jni_env, JNIEnvExt::LocalRefCookieOffset(),
-            jni_conv->SavedLocalReferenceCookieOffset(),
-            jni_conv->ReturnScratchRegister(), 4);
-  }
-  __ CopyRawPtrToThread(Thread::TopSirtOffset(), jni_conv->SirtLinkOffset(),
-                        jni_conv->InterproceduralScratchRegister());
-
-  // 15. Check for pending exception and forward if there
-  __ ExceptionPoll(jni_conv->InterproceduralScratchRegister());
-
-  // 16. Remove activation
-  if (is_synchronized) {
-    __ RemoveFrame(frame_size, callee_save_regs);
-  } else {
-    // no need to restore callee save registers because we didn't
-    // clobber them while locking the monitor.
-    __ RemoveFrame(frame_size, std::vector<ManagedRegister>());
-  }
-
-  // 17. Finalize code generation
-  __ EmitSlowPaths();
-  size_t cs = __ CodeSize();
-  std::vector<uint8_t> managed_code(cs);
-  MemoryRegion code(&managed_code[0], managed_code.size());
-  __ FinalizeInstructions(code);
-  if (should_disassemble) {
-    UniquePtr<Disassembler> disassembler(Disassembler::Create(instruction_set));
-    disassembler->Dump(LOG(INFO), &managed_code[0], &managed_code[managed_code.size()]);
-  }
-  return new CompiledMethod(instruction_set,
-                            managed_code,
-                            frame_size,
-                            jni_conv->CoreSpillMask(),
-                            jni_conv->FpSpillMask());
-}
-
 }  // namespace art
 
 extern "C" art::CompiledMethod* ArtJniCompileMethod(art::Compiler& compiler,
diff --git a/src/oat/jni/x86/calling_convention_x86.cc b/src/oat/jni/x86/calling_convention_x86.cc
index 1cd849c..90c050c 100644
--- a/src/oat/jni/x86/calling_convention_x86.cc
+++ b/src/oat/jni/x86/calling_convention_x86.cc
@@ -61,6 +61,10 @@
   return ReturnRegisterForShorty(GetShorty(), true);
 }
 
+ManagedRegister X86JniCallingConvention::IntReturnRegister() {
+  return X86ManagedRegister::FromCpuRegister(EAX);
+}
+
 // Managed runtime calling convention
 
 ManagedRegister X86ManagedRuntimeCallingConvention::MethodRegister() {
@@ -131,10 +135,6 @@
   return RoundUp(NumberOfOutgoingStackArgs() * kPointerSize, kStackAlignment);
 }
 
-bool X86JniCallingConvention::IsMethodRegisterClobberedPreCall() {
-  return IsSynchronized();  // Monitor enter crushes the method register
-}
-
 bool X86JniCallingConvention::IsCurrentParamInRegister() {
   return false;  // Everything is passed by stack.
 }
@@ -149,15 +149,17 @@
 }
 
 FrameOffset X86JniCallingConvention::CurrentParamStackOffset() {
-  return FrameOffset(displacement_.Int32Value() - OutArgSize() +
-                     (itr_slots_ * kPointerSize));
+  return FrameOffset(displacement_.Int32Value() - OutArgSize() + (itr_slots_ * kPointerSize));
 }
 
 size_t X86JniCallingConvention::NumberOfOutgoingStackArgs() {
   size_t static_args = IsStatic() ? 1 : 0;  // count jclass
   // regular argument parameters and this
   size_t param_args = NumArgs() + NumLongOrDoubleArgs();
-  return static_args + param_args + 2;  // count JNIEnv* and return pc (pushed after Method*)
+  // count JNIEnv* and return pc (pushed after Method*)
+  size_t total_args = static_args + param_args + 2;
+  return total_args;
+
 }
 
 }  // namespace x86
diff --git a/src/oat/jni/x86/calling_convention_x86.h b/src/oat/jni/x86/calling_convention_x86.h
index 959a37f..5116a46 100644
--- a/src/oat/jni/x86/calling_convention_x86.h
+++ b/src/oat/jni/x86/calling_convention_x86.h
@@ -49,6 +49,7 @@
   virtual ~X86JniCallingConvention() {}
   // Calling convention
   virtual ManagedRegister ReturnRegister();
+  virtual ManagedRegister IntReturnRegister();
   virtual ManagedRegister InterproceduralScratchRegister();
   // JNI calling convention
   virtual size_t FrameSize();
@@ -61,7 +62,6 @@
   virtual uint32_t FpSpillMask() const {
     return 0;
   }
-  virtual bool IsMethodRegisterClobberedPreCall();
   virtual bool IsCurrentParamInRegister();
   virtual bool IsCurrentParamOnStack();
   virtual ManagedRegister CurrentParamRegister();
diff --git a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
index 37454fd..1aa069e 100644
--- a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
+++ b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
@@ -60,8 +60,17 @@
 extern "C" void art_handle_fill_data_from_code(void*, void*);
 
 // JNI entrypoints.
-extern Object* DecodeJObjectInThread(Thread* thread, jobject obj);
 extern void* FindNativeMethod(Thread* thread);
+extern uint32_t JniMethodStart(Thread* self);
+extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self);
+extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self);
+extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
+                                     Thread* self);
+extern Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
+                                         Thread* self);
+extern Object* JniMethodEndWithReferenceSynchronized(jobject result,
+                                                     uint32_t saved_local_ref_cookie,
+                                                     jobject locked, Thread* self);
 
 // Lock entrypoints.
 extern "C" void art_lock_object_from_code(void*);
@@ -182,8 +191,13 @@
   points->pHandleFillArrayDataFromCode = art_handle_fill_data_from_code;
 
   // JNI
-  points->pDecodeJObjectInThread = DecodeJObjectInThread;
   points->pFindNativeMethod = FindNativeMethod;
+  points->pJniMethodStart = JniMethodStart;
+  points->pJniMethodStartSynchronized = JniMethodStartSynchronized;
+  points->pJniMethodEnd = JniMethodEnd;
+  points->pJniMethodEndSynchronized = JniMethodEndSynchronized;
+  points->pJniMethodEndWithReference = JniMethodEndWithReference;
+  points->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
 
   // Locks
   points->pLockObjectFromCode = art_lock_object_from_code;
diff --git a/src/oat/runtime/arm/stub_arm.cc b/src/oat/runtime/arm/stub_arm.cc
index 5a20748..d3c94a8 100644
--- a/src/oat/runtime/arm/stub_arm.cc
+++ b/src/oat/runtime/arm/stub_arm.cc
@@ -17,6 +17,7 @@
 #include "jni_internal.h"
 #include "oat/utils/arm/assembler_arm.h"
 #include "oat/runtime/oat_support_entrypoints.h"
+#include "oat/runtime/stub.h"
 #include "object.h"
 #include "stack_indirect_reference_table.h"
 
diff --git a/src/oat/runtime/callee_save_frame.h b/src/oat/runtime/callee_save_frame.h
index 1509553..14ba046 100644
--- a/src/oat/runtime/callee_save_frame.h
+++ b/src/oat/runtime/callee_save_frame.h
@@ -23,9 +23,11 @@
 
 class Method;
 
-// Place a special frame at the TOS that will save the callee saves for the given type
-static void  FinishCalleeSaveFrameSetup(Thread* self, Method** sp, Runtime::CalleeSaveType type) {
-  // Be aware the store below may well stomp on an incoming argument
+// Place a special frame at the TOS that will save the callee saves for the given type.
+static void  FinishCalleeSaveFrameSetup(Thread* self, Method** sp, Runtime::CalleeSaveType type)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  // Be aware the store below may well stomp on an incoming argument.
+  GlobalSynchronization::mutator_lock_->AssertSharedHeld();
   *sp = Runtime::Current()->GetCalleeSaveMethod(type);
   self->SetTopOfStack(sp, 0);
   self->VerifyStack();
diff --git a/src/oat/runtime/oat_support_entrypoints.h b/src/oat/runtime/oat_support_entrypoints.h
index a235e4f..39d9eab 100644
--- a/src/oat/runtime/oat_support_entrypoints.h
+++ b/src/oat/runtime/oat_support_entrypoints.h
@@ -72,8 +72,14 @@
   void (*pHandleFillArrayDataFromCode)(void*, void*);
 
   // JNI
-  Object* (*pDecodeJObjectInThread)(Thread* thread, jobject obj);
   void* (*pFindNativeMethod)(Thread* thread);
+  uint32_t (*pJniMethodStart)(Thread*);
+  uint32_t (*pJniMethodStartSynchronized)(jobject to_lock, Thread* self);
+  void (*pJniMethodEnd)(uint32_t cookie, Thread* self);
+  void (*pJniMethodEndSynchronized)(uint32_t cookie, jobject locked, Thread* self);
+  Object* (*pJniMethodEndWithReference)(jobject result, uint32_t cookie, Thread* self);
+  Object* (*pJniMethodEndWithReferenceSynchronized)(jobject result, uint32_t cookie,
+                                                    jobject locked, Thread* self);
 
   // Locks
   void (*pLockObjectFromCode)(void*);
diff --git a/src/oat/runtime/stub.h b/src/oat/runtime/stub.h
new file mode 100644
index 0000000..5d8b37d
--- /dev/null
+++ b/src/oat/runtime/stub.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_SRC_OAT_RUNTIME_OAT_RUNTIME_STUB_H_
+#define ART_SRC_OAT_RUNTIME_OAT_RUNTIME_STUB_H_
+
+#include "runtime.h"
+
+namespace art {
+
+namespace arm {
+  ByteArray* CreateAbstractMethodErrorStub()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  ByteArray* ArmCreateResolutionTrampoline(Runtime::TrampolineType type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  ByteArray* CreateJniDlsymLookupStub()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+}
+
+namespace x86 {
+  ByteArray* CreateAbstractMethodErrorStub()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  ByteArray* X86CreateResolutionTrampoline(Runtime::TrampolineType type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  ByteArray* CreateJniDlsymLookupStub()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+}
+
+}  // namespace art
+
+#endif  // ART_SRC_OAT_RUNTIME_OAT_RUNTIME_STUB_H_
diff --git a/src/oat/runtime/support_alloc.cc b/src/oat/runtime/support_alloc.cc
index d9394d2..4a03f98 100644
--- a/src/oat/runtime/support_alloc.cc
+++ b/src/oat/runtime/support_alloc.cc
@@ -20,39 +20,45 @@
 namespace art {
 
 extern "C" Object* artAllocObjectFromCode(uint32_t type_idx, Method* method,
-                                          Thread* self, Method** sp) {
+                                          Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   return AllocObjectFromCode(type_idx, method, self, false);
 }
 
 extern "C" Object* artAllocObjectFromCodeWithAccessCheck(uint32_t type_idx, Method* method,
-                                                         Thread* self, Method** sp) {
+                                                         Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   return AllocObjectFromCode(type_idx, method, self, true);
 }
 
 extern "C" Array* artAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t component_count,
-                                        Thread* self, Method** sp) {
+                                        Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   return AllocArrayFromCode(type_idx, method, component_count, self, false);
 }
 
 extern "C" Array* artAllocArrayFromCodeWithAccessCheck(uint32_t type_idx, Method* method,
                                                        int32_t component_count,
-                                                       Thread* self, Method** sp) {
+                                                       Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   return AllocArrayFromCode(type_idx, method, component_count, self, true);
 }
 
 extern "C" Array* artCheckAndAllocArrayFromCode(uint32_t type_idx, Method* method,
-                                               int32_t component_count, Thread* self, Method** sp) {
+                                               int32_t component_count, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, false);
 }
 
 extern "C" Array* artCheckAndAllocArrayFromCodeWithAccessCheck(uint32_t type_idx, Method* method,
                                                                int32_t component_count,
-                                                               Thread* self, Method** sp) {
+                                                               Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, true);
 }
diff --git a/src/oat/runtime/support_cast.cc b/src/oat/runtime/support_cast.cc
index 139239f..ea083f1 100644
--- a/src/oat/runtime/support_cast.cc
+++ b/src/oat/runtime/support_cast.cc
@@ -20,14 +20,16 @@
 namespace art {
 
 // Assignable test for code, won't throw.  Null and equality tests already performed
-extern "C" uint32_t artIsAssignableFromCode(const Class* klass, const Class* ref_class) {
+extern "C" uint32_t artIsAssignableFromCode(const Class* klass, const Class* ref_class)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   DCHECK(klass != NULL);
   DCHECK(ref_class != NULL);
   return klass->IsAssignableFrom(ref_class) ? 1 : 0;
 }
 
 // Check whether it is safe to cast one class to the other, throw exception and return -1 on failure
-extern "C" int artCheckCastFromCode(const Class* a, const Class* b, Thread* self, Method** sp) {
+extern "C" int artCheckCastFromCode(const Class* a, const Class* b, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   DCHECK(a->IsClass()) << PrettyClass(a);
   DCHECK(b->IsClass()) << PrettyClass(b);
   if (LIKELY(b->IsAssignableFrom(a))) {
@@ -45,7 +47,8 @@
 // Tests whether 'element' can be assigned into an array of type 'array_class'.
 // Returns 0 on success and -1 if an exception is pending.
 extern "C" int artCanPutArrayElementFromCode(const Object* element, const Class* array_class,
-                                             Thread* self, Method** sp) {
+                                             Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   DCHECK(array_class != NULL);
   // element can't be NULL as we catch this is screened in runtime_support
   Class* element_class = element->GetClass();
diff --git a/src/oat/runtime/support_debug.cc b/src/oat/runtime/support_debug.cc
index ef6e0b1..9968043 100644
--- a/src/oat/runtime/support_debug.cc
+++ b/src/oat/runtime/support_debug.cc
@@ -25,13 +25,15 @@
  * method entry and offset 0 within the method, we'll use an offset of -1
  * to denote method entry.
  */
-extern "C" void artUpdateDebuggerFromCode(int32_t dex_pc, Thread* self, Method** sp) {
+extern "C" void artUpdateDebuggerFromCode(int32_t dex_pc, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp,  Runtime::kRefsAndArgs);
   Dbg::UpdateDebugger(dex_pc, self);
 }
 
 // Temporary debugging hook for compiler.
-extern void DebugMe(Method* method, uint32_t info) {
+extern void DebugMe(Method* method, uint32_t info)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   LOG(INFO) << "DebugMe";
   if (method != NULL) {
     LOG(INFO) << PrettyMethod(method);
diff --git a/src/oat/runtime/support_dexcache.cc b/src/oat/runtime/support_dexcache.cc
index 49e038d..8e7c2ad 100644
--- a/src/oat/runtime/support_dexcache.cc
+++ b/src/oat/runtime/support_dexcache.cc
@@ -20,7 +20,8 @@
 namespace art {
 
 extern "C" Class* artInitializeStaticStorageFromCode(uint32_t type_idx, const Method* referrer,
-                                                     Thread* self, Method** sp) {
+                                                     Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // Called to ensure static storage base is initialized for direct static field reads and writes.
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
@@ -29,7 +30,8 @@
 }
 
 extern "C" Class* artInitializeTypeFromCode(uint32_t type_idx, const Method* referrer, Thread* self,
-                                            Method** sp) {
+                                            Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   return ResolveVerifyAndClinit(type_idx, referrer, self, false, false);
@@ -37,7 +39,8 @@
 
 extern "C" Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx,
                                                            const Method* referrer, Thread* self,
-                                                           Method** sp) {
+                                                           Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
@@ -45,7 +48,8 @@
 }
 
 extern "C" String* artResolveStringFromCode(Method* referrer, int32_t string_idx,
-                                            Thread* self, Method** sp) {
+                                            Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   return ResolveStringFromCode(referrer, string_idx);
 }
diff --git a/src/oat/runtime/support_field.cc b/src/oat/runtime/support_field.cc
index 77fe618..99e3a94 100644
--- a/src/oat/runtime/support_field.cc
+++ b/src/oat/runtime/support_field.cc
@@ -22,7 +22,8 @@
 namespace art {
 
 extern "C" uint32_t artGet32StaticFromCode(uint32_t field_idx, const Method* referrer,
-                                           Thread* self, Method** sp) {
+                                           Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, true, false, sizeof(int32_t));
   if (LIKELY(field != NULL)) {
     return field->Get32(NULL);
@@ -36,7 +37,8 @@
 }
 
 extern "C" uint64_t artGet64StaticFromCode(uint32_t field_idx, const Method* referrer,
-                                           Thread* self, Method** sp) {
+                                           Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, true, false, sizeof(int64_t));
   if (LIKELY(field != NULL)) {
     return field->Get64(NULL);
@@ -50,7 +52,8 @@
 }
 
 extern "C" Object* artGetObjStaticFromCode(uint32_t field_idx, const Method* referrer,
-                                           Thread* self, Method** sp) {
+                                           Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, false, false, sizeof(Object*));
   if (LIKELY(field != NULL)) {
     return field->GetObj(NULL);
@@ -64,7 +67,8 @@
 }
 
 extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx, Object* obj,
-                                             const Method* referrer, Thread* self, Method** sp) {
+                                             const Method* referrer, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, true, false, sizeof(int32_t));
   if (LIKELY(field != NULL && obj != NULL)) {
     return field->Get32(obj);
@@ -82,7 +86,8 @@
 }
 
 extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx, Object* obj,
-                                             const Method* referrer, Thread* self, Method** sp) {
+                                             const Method* referrer, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, true, false, sizeof(int64_t));
   if (LIKELY(field != NULL && obj != NULL)) {
     return field->Get64(obj);
@@ -100,7 +105,8 @@
 }
 
 extern "C" Object* artGetObjInstanceFromCode(uint32_t field_idx, Object* obj,
-                                              const Method* referrer, Thread* self, Method** sp) {
+                                              const Method* referrer, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, false, false, sizeof(Object*));
   if (LIKELY(field != NULL && obj != NULL)) {
     return field->GetObj(obj);
@@ -118,7 +124,8 @@
 }
 
 extern "C" int artSet32StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                      const Method* referrer, Thread* self, Method** sp) {
+                                      const Method* referrer, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, true, true, sizeof(int32_t));
   if (LIKELY(field != NULL)) {
     field->Set32(NULL, new_value);
@@ -134,7 +141,8 @@
 }
 
 extern "C" int artSet64StaticFromCode(uint32_t field_idx, const Method* referrer,
-                                      uint64_t new_value, Thread* self, Method** sp) {
+                                      uint64_t new_value, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, true, true, sizeof(int64_t));
   if (LIKELY(field != NULL)) {
     field->Set64(NULL, new_value);
@@ -150,7 +158,8 @@
 }
 
 extern "C" int artSetObjStaticFromCode(uint32_t field_idx, Object* new_value,
-                                       const Method* referrer, Thread* self, Method** sp) {
+                                       const Method* referrer, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, false, true, sizeof(Object*));
   if (LIKELY(field != NULL)) {
     if (LIKELY(!FieldHelper(field).IsPrimitiveType())) {
@@ -168,7 +177,8 @@
 }
 
 extern "C" int artSet32InstanceFromCode(uint32_t field_idx, Object* obj, uint32_t new_value,
-                                        const Method* referrer, Thread* self, Method** sp) {
+                                        const Method* referrer, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, true, true, sizeof(int32_t));
   if (LIKELY(field != NULL && obj != NULL)) {
     field->Set32(obj, new_value);
@@ -188,7 +198,8 @@
 }
 
 extern "C" int artSet64InstanceFromCode(uint32_t field_idx, Object* obj, uint64_t new_value,
-                                        Thread* self, Method** sp) {
+                                        Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Method* callee_save = Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsOnly);
   Method* referrer = sp[callee_save->GetFrameSizeInBytes() / sizeof(Method*)];
   Field* field = FindFieldFast(field_idx, referrer, true, true, sizeof(int64_t));
@@ -211,7 +222,8 @@
 }
 
 extern "C" int artSetObjInstanceFromCode(uint32_t field_idx, Object* obj, Object* new_value,
-                                         const Method* referrer, Thread* self, Method** sp) {
+                                         const Method* referrer, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* field = FindFieldFast(field_idx, referrer, false, true, sizeof(Object*));
   if (LIKELY(field != NULL && obj != NULL)) {
     field->SetObj(obj, new_value);
diff --git a/src/oat/runtime/support_fillarray.cc b/src/oat/runtime/support_fillarray.cc
index 7227f6b..8561bd8 100644
--- a/src/oat/runtime/support_fillarray.cc
+++ b/src/oat/runtime/support_fillarray.cc
@@ -37,7 +37,8 @@
  */
 extern "C" int artHandleFillArrayDataFromCode(Array* array,
                                               const Instruction::ArrayDataPayload* payload,
-                                              Thread* self, Method** sp) {
+                                              Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   DCHECK_EQ(payload->ident, static_cast<uint16_t>(Instruction::kArrayDataSignature));
   if (UNLIKELY(array == NULL)) {
diff --git a/src/oat/runtime/support_invoke.cc b/src/oat/runtime/support_invoke.cc
index 4669688..9c7b3a2 100644
--- a/src/oat/runtime/support_invoke.cc
+++ b/src/oat/runtime/support_invoke.cc
@@ -20,7 +20,8 @@
 namespace art {
 
 static uint64_t artInvokeCommon(uint32_t method_idx, Object* this_object, Method* caller_method,
-                                Thread* self, Method** sp, bool access_check, InvokeType type) {
+                                Thread* self, Method** sp, bool access_check, InvokeType type)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Method* method = FindMethodFast(method_idx, this_object, caller_method, access_check, type);
   if (UNLIKELY(method == NULL)) {
     FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs);
@@ -54,14 +55,16 @@
 // See comments in runtime_support_asm.S
 extern "C" uint64_t artInvokeInterfaceTrampoline(uint32_t method_idx, Object* this_object,
                                                  Method* caller_method, Thread* self,
-                                                 Method** sp) {
+                                                 Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return artInvokeCommon(method_idx, this_object, caller_method, self, sp, false, kInterface);
 }
 
 extern "C" uint64_t artInvokeInterfaceTrampolineWithAccessCheck(uint32_t method_idx,
                                                                 Object* this_object,
                                                                 Method* caller_method, Thread* self,
-                                                                Method** sp) {
+                                                                Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return artInvokeCommon(method_idx, this_object, caller_method, self, sp, true, kInterface);
 }
 
@@ -69,28 +72,32 @@
 extern "C" uint64_t artInvokeDirectTrampolineWithAccessCheck(uint32_t method_idx,
                                                              Object* this_object,
                                                              Method* caller_method, Thread* self,
-                                                             Method** sp) {
+                                                             Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return artInvokeCommon(method_idx, this_object, caller_method, self, sp, true, kDirect);
 }
 
 extern "C" uint64_t artInvokeStaticTrampolineWithAccessCheck(uint32_t method_idx,
                                                             Object* this_object,
                                                             Method* caller_method, Thread* self,
-                                                            Method** sp) {
+                                                            Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return artInvokeCommon(method_idx, this_object, caller_method, self, sp, true, kStatic);
 }
 
 extern "C" uint64_t artInvokeSuperTrampolineWithAccessCheck(uint32_t method_idx,
                                                             Object* this_object,
                                                             Method* caller_method, Thread* self,
-                                                            Method** sp) {
+                                                            Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return artInvokeCommon(method_idx, this_object, caller_method, self, sp, true, kSuper);
 }
 
 extern "C" uint64_t artInvokeVirtualTrampolineWithAccessCheck(uint32_t method_idx,
                                                               Object* this_object,
                                                               Method* caller_method, Thread* self,
-                                                              Method** sp) {
+                                                              Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   return artInvokeCommon(method_idx, this_object, caller_method, self, sp, true, kVirtual);
 }
 
diff --git a/src/oat/runtime/support_jni.cc b/src/oat/runtime/support_jni.cc
index cfa1a11..bbff673 100644
--- a/src/oat/runtime/support_jni.cc
+++ b/src/oat/runtime/support_jni.cc
@@ -16,20 +16,23 @@
 
 #include "object.h"
 #include "object_utils.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 
 namespace art {
 
 // Used by the JNI dlsym stub to find the native method to invoke if none is registered.
-extern void* FindNativeMethod(Thread* self) {
+extern void* FindNativeMethod(Thread* self) LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_) {
+  GlobalSynchronization::mutator_lock_->AssertNotHeld();  // We come here as Native.
   DCHECK(Thread::Current() == self);
+  ScopedObjectAccess soa(self);
 
-  Method* method = const_cast<Method*>(self->GetCurrentMethod());
+  Method* method = self->GetCurrentMethod();
   DCHECK(method != NULL);
 
   // Lookup symbol address for method, on failure we'll return NULL with an
   // exception set, otherwise we return the address of the method we found.
-  void* native_code = self->GetJniEnv()->vm->FindCodeForNativeMethod(method);
+  void* native_code = soa.Vm()->FindCodeForNativeMethod(method);
   if (native_code == NULL) {
     DCHECK(self->IsExceptionPending());
     return NULL;
@@ -40,23 +43,61 @@
   }
 }
 
-// Return value helper for jobject return types, used for JNI return values.
-extern Object* DecodeJObjectInThread(Thread* self, jobject java_object) {
-  if (self->IsExceptionPending()) {
-    return NULL;
-  }
-  Object* o = self->DecodeJObject(java_object);
-  if (o == NULL || !self->GetJniEnv()->check_jni) {
-    return o;
-  }
+// Called on entry to JNI, transition out of Runnable and release share of mutator_lock_.
+extern uint32_t JniMethodStart(Thread* self) UNLOCK_FUNCTION(GlobalSynchronizatio::mutator_lock_) {
+  JNIEnvExt* env = self->GetJniEnv();
+  uint32_t saved_local_ref_cookie = env->local_ref_cookie;
+  env->local_ref_cookie = env->locals.GetSegmentState();
+  self->TransitionFromRunnableToSuspended(kNative);
+  return saved_local_ref_cookie;
+}
 
+extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self)
+    UNLOCK_FUNCTION(GlobalSynchronization::mutator_lock_) {
+  self->DecodeJObject(to_lock)->MonitorEnter(self);
+  return JniMethodStart(self);
+}
+
+static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self) {
+  JNIEnvExt* env = self->GetJniEnv();
+  env->locals.SetSegmentState(env->local_ref_cookie);
+  env->local_ref_cookie = saved_local_ref_cookie;
+  self->PopSirt();
+}
+
+static void UnlockJniSynchronizedMethod(jobject locked, Thread* self)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+    UNLOCK_FUNCTION(monitor_lock_) {
+  // Save any pending exception over monitor exit call.
+  Throwable* saved_exception = NULL;
+  if (UNLIKELY(self->IsExceptionPending())) {
+    saved_exception = self->GetException();
+    self->ClearException();
+  }
+  // Decode locked object and unlock, before popping local references.
+  self->DecodeJObject(locked)->MonitorExit(self);
+  if (UNLIKELY(self->IsExceptionPending())) {
+    LOG(FATAL) << "Synchronized JNI code returning with an exception:\n"
+        << saved_exception->Dump()
+        << "\nEncountered second exception during implicit MonitorExit:\n"
+        << self->GetException()->Dump();
+  }
+  // Restore pending exception.
+  if (saved_exception != NULL) {
+    self->SetException(saved_exception);
+  }
+}
+
+static void CheckReferenceResult(Object* o, Thread* self)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  if (o == NULL) {
+    return;
+  }
   if (o == kInvalidIndirectRefObject) {
     JniAbortF(NULL, "invalid reference returned from %s",
               PrettyMethod(self->GetCurrentMethod()).c_str());
   }
-
-  // Make sure that the result is an instance of the type this
-  // method was expected to return.
+  // Make sure that the result is an instance of the type this method was expected to return.
   Method* m = self->GetCurrentMethod();
   MethodHelper mh(m);
   Class* return_type = mh.GetReturnType();
@@ -65,7 +106,53 @@
     JniAbortF(NULL, "attempt to return an instance of %s from %s",
               PrettyTypeOf(o).c_str(), PrettyMethod(m).c_str());
   }
+}
 
+extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self)
+    SHARED_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_) {
+  self->TransitionFromSuspendedToRunnable();
+  PopLocalReferences(saved_local_ref_cookie, self);
+}
+
+
+extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked, Thread* self)
+    SHARED_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_) {
+  self->TransitionFromSuspendedToRunnable();
+  UnlockJniSynchronizedMethod(locked, self);  // Must decode before pop.
+  PopLocalReferences(saved_local_ref_cookie, self);
+}
+
+extern Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
+                                         Thread* self)
+    SHARED_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_) {
+  self->TransitionFromSuspendedToRunnable();
+  Object* o = self->DecodeJObject(result);  // Must decode before pop.
+  PopLocalReferences(saved_local_ref_cookie, self);
+  // Process result.
+  if (UNLIKELY(self->GetJniEnv()->check_jni)) {
+    if (self->IsExceptionPending()) {
+      return NULL;
+    }
+    CheckReferenceResult(o, self);
+  }
+  return o;
+}
+
+extern Object* JniMethodEndWithReferenceSynchronized(jobject result,
+                                                     uint32_t saved_local_ref_cookie,
+                                                     jobject locked, Thread* self)
+    SHARED_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_) {
+  self->TransitionFromSuspendedToRunnable();
+  UnlockJniSynchronizedMethod(locked, self);  // Must decode before pop.
+  Object* o = self->DecodeJObject(result);
+  PopLocalReferences(saved_local_ref_cookie, self);
+  // Process result.
+  if (UNLIKELY(self->GetJniEnv()->check_jni)) {
+    if (self->IsExceptionPending()) {
+      return NULL;
+    }
+    CheckReferenceResult(o, self);
+  }
   return o;
 }
 
@@ -77,7 +164,8 @@
   *arg_ptr = reinterpret_cast<intptr_t>(value_as_work_around_rep);
 }
 
-extern "C" const void* artWorkAroundAppJniBugs(Thread* self, intptr_t* sp) {
+extern "C" const void* artWorkAroundAppJniBugs(Thread* self, intptr_t* sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_){
   DCHECK(Thread::Current() == self);
   // TODO: this code is specific to ARM
   // On entry the stack pointed by sp is:
diff --git a/src/oat/runtime/support_locks.cc b/src/oat/runtime/support_locks.cc
index 30fc567..9d44e55 100644
--- a/src/oat/runtime/support_locks.cc
+++ b/src/oat/runtime/support_locks.cc
@@ -19,14 +19,16 @@
 
 namespace art {
 
-extern "C" int artUnlockObjectFromCode(Object* obj, Thread* self, Method** sp) {
+extern "C" int artUnlockObjectFromCode(Object* obj, Thread* self, Method** sp)
+    UNLOCK_FUNCTION(monitor_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   DCHECK(obj != NULL);  // Assumed to have been checked before entry
   // MonitorExit may throw exception
   return obj->MonitorExit(self) ? 0 /* Success */ : -1 /* Failure */;
 }
 
-extern "C" void artLockObjectFromCode(Object* obj, Thread* thread, Method** sp) {
+extern "C" void artLockObjectFromCode(Object* obj, Thread* thread, Method** sp)
+    EXCLUSIVE_LOCK_FUNCTION(monitor_lock_) {
   FinishCalleeSaveFrameSetup(thread, sp, Runtime::kRefsOnly);
   DCHECK(obj != NULL);        // Assumed to have been checked before entry
   obj->MonitorEnter(thread);  // May block
diff --git a/src/oat/runtime/support_proxy.cc b/src/oat/runtime/support_proxy.cc
index 83d2265..972779d 100644
--- a/src/oat/runtime/support_proxy.cc
+++ b/src/oat/runtime/support_proxy.cc
@@ -18,7 +18,7 @@
 #include "object_utils.h"
 #include "reflection.h"
 #include "runtime_support.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "well_known_classes.h"
 
@@ -43,7 +43,8 @@
 // reference arguments (so they survive GC) and create a boxed argument array. Finally we invoke
 // the invocation handler which is a field within the proxy object receiver.
 extern "C" void artProxyInvokeHandler(Method* proxy_method, Object* receiver,
-                                      Thread* self, byte* stack_args) {
+                                      Thread* self, byte* stack_args)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // Register the top of the managed stack
   Method** proxy_sp = reinterpret_cast<Method**>(stack_args - SP_OFFSET_IN_BYTES);
   DCHECK_EQ(*proxy_sp, proxy_method);
@@ -51,11 +52,11 @@
   DCHECK_EQ(proxy_method->GetFrameSizeInBytes(), FRAME_SIZE_IN_BYTES);
   // Start new JNI local reference state
   JNIEnvExt* env = self->GetJniEnv();
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccessUnchecked soa(env);
   ScopedJniEnvLocalRefState env_state(env);
   // Create local ref. copies of proxy method and the receiver
-  jobject rcvr_jobj = ts.AddLocalReference<jobject>(receiver);
-  jobject proxy_method_jobj = ts.AddLocalReference<jobject>(proxy_method);
+  jobject rcvr_jobj = soa.AddLocalReference<jobject>(receiver);
+  jobject proxy_method_jobj = soa.AddLocalReference<jobject>(proxy_method);
 
   // Placing into local references incoming arguments from the caller's register arguments,
   // replacing original Object* with jobject
@@ -74,7 +75,7 @@
   while (cur_arg < args_in_regs && param_index < num_params) {
     if (proxy_mh.IsParamAReference(param_index)) {
       Object* obj = *reinterpret_cast<Object**>(stack_args + (cur_arg * kPointerSize));
-      jobject jobj = ts.AddLocalReference<jobject>(obj);
+      jobject jobj = soa.AddLocalReference<jobject>(obj);
       *reinterpret_cast<jobject*>(stack_args + (cur_arg * kPointerSize)) = jobj;
     }
     cur_arg = cur_arg + (proxy_mh.IsParamALongOrDouble(param_index) ? 2 : 1);
@@ -85,7 +86,7 @@
   while (param_index < num_params) {
     if (proxy_mh.IsParamAReference(param_index)) {
       Object* obj = *reinterpret_cast<Object**>(stack_args + (cur_arg * kPointerSize));
-      jobject jobj = ts.AddLocalReference<jobject>(obj);
+      jobject jobj = soa.AddLocalReference<jobject>(obj);
       *reinterpret_cast<jobject*>(stack_args + (cur_arg * kPointerSize)) = jobj;
     }
     cur_arg = cur_arg + (proxy_mh.IsParamALongOrDouble(param_index) ? 2 : 1);
@@ -104,13 +105,13 @@
       CHECK(self->IsExceptionPending());
       return;
     }
-    args_jobj[2].l = ts.AddLocalReference<jobjectArray>(args);
+    args_jobj[2].l = soa.AddLocalReference<jobjectArray>(args);
   }
   // Convert proxy method into expected interface method
   Method* interface_method = proxy_method->FindOverriddenMethod();
   DCHECK(interface_method != NULL);
   DCHECK(!interface_method->IsProxyMethod()) << PrettyMethod(interface_method);
-  args_jobj[1].l = ts.AddLocalReference<jobject>(interface_method);
+  args_jobj[1].l = soa.AddLocalReference<jobject>(interface_method);
   // Box arguments
   cur_arg = 0;  // reset stack location to read to start
   // reset index, will index into param type array which doesn't include the receiver
diff --git a/src/oat/runtime/support_stubs.cc b/src/oat/runtime/support_stubs.cc
index 3f6bc8f..013f885 100644
--- a/src/oat/runtime/support_stubs.cc
+++ b/src/oat/runtime/support_stubs.cc
@@ -23,7 +23,7 @@
 #if defined(ART_USE_LLVM_COMPILER)
 #include "nth_caller_visitor.h"
 #endif
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 
 // Architecture specific assembler helper to deliver exception.
 extern "C" void art_deliver_exception_from_code(void*);
@@ -33,7 +33,8 @@
 #if !defined(ART_USE_LLVM_COMPILER)
 // Lazily resolve a method. Called by stub code.
 const void* UnresolvedDirectMethodTrampolineFromCode(Method* called, Method** sp, Thread* thread,
-                                                     Runtime::TrampolineType type) {
+                                                     Runtime::TrampolineType type)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
 #if defined(__arm__)
   // On entry the stack pointed by sp is:
   // | argN       |  |
@@ -82,7 +83,7 @@
   FinishCalleeSaveFrameSetup(thread, sp, Runtime::kRefsAndArgs);
   // Start new JNI local reference state
   JNIEnvExt* env = thread->GetJniEnv();
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccessUnchecked soa(env);
   ScopedJniEnvLocalRefState env_state(env);
 
   // Compute details about the called method (avoid GCs)
@@ -147,7 +148,7 @@
       // If we thought we had fewer than 3 arguments in registers, account for the receiver
       args_in_regs++;
     }
-    ts.AddLocalReference<jobject>(obj);
+    soa.AddLocalReference<jobject>(obj);
   }
   size_t shorty_index = 1;  // skip return value
   // Iterate while arguments and arguments in registers (less 1 from cur_arg which is offset to skip
@@ -157,7 +158,7 @@
     shorty_index++;
     if (c == 'L') {
       Object* obj = reinterpret_cast<Object*>(regs[cur_arg]);
-      ts.AddLocalReference<jobject>(obj);
+      soa.AddLocalReference<jobject>(obj);
     }
     cur_arg = cur_arg + (c == 'J' || c == 'D' ? 2 : 1);
   }
@@ -168,7 +169,7 @@
     shorty_index++;
     if (c == 'L') {
       Object* obj = reinterpret_cast<Object*>(regs[cur_arg]);
-      ts.AddLocalReference<jobject>(obj);
+      soa.AddLocalReference<jobject>(obj);
     }
     cur_arg = cur_arg + (c == 'J' || c == 'D' ? 2 : 1);
   }
@@ -308,7 +309,8 @@
 
 #if !defined(ART_USE_LLVM_COMPILER)
 // Called by the AbstractMethodError. Called by stub code.
-extern void ThrowAbstractMethodErrorFromCode(Method* method, Thread* thread, Method** sp) {
+extern void ThrowAbstractMethodErrorFromCode(Method* method, Thread* thread, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(thread, sp, Runtime::kSaveAll);
   thread->ThrowNewExceptionF("Ljava/lang/AbstractMethodError;",
                              "abstract method \"%s\"", PrettyMethod(method).c_str());
diff --git a/src/oat/runtime/support_thread.cc b/src/oat/runtime/support_thread.cc
index 6cd595b..32284bb 100644
--- a/src/oat/runtime/support_thread.cc
+++ b/src/oat/runtime/support_thread.cc
@@ -20,15 +20,18 @@
 
 namespace art {
 
-void CheckSuspendFromCode(Thread* thread) {
-  // Called when thread->suspend_count_ != 0
-  Runtime::Current()->GetThreadList()->FullSuspendCheck(thread);
+void CheckSuspendFromCode(Thread* thread)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  // Called when thread->suspend_count_ != 0 on JNI return. JNI method acts as callee-save frame.
+  thread->VerifyStack();
+  thread->FullSuspendCheck();
 }
 
-extern "C" void artTestSuspendFromCode(Thread* thread, Method** sp) {
+extern "C" void artTestSuspendFromCode(Thread* thread, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // Called when suspend count check value is 0 and thread->suspend_count_ != 0
   FinishCalleeSaveFrameSetup(thread, sp, Runtime::kRefsOnly);
-  Runtime::Current()->GetThreadList()->FullSuspendCheck(thread);
+  thread->FullSuspendCheck();
 }
 
 }  // namespace art
diff --git a/src/oat/runtime/support_throw.cc b/src/oat/runtime/support_throw.cc
index 31cf7d9..4fa2387 100644
--- a/src/oat/runtime/support_throw.cc
+++ b/src/oat/runtime/support_throw.cc
@@ -23,13 +23,15 @@
 namespace art {
 
 // Deliver an exception that's pending on thread helping set up a callee save frame on the way.
-extern "C" void artDeliverPendingExceptionFromCode(Thread* thread, Method** sp) {
+extern "C" void artDeliverPendingExceptionFromCode(Thread* thread, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(thread, sp, Runtime::kSaveAll);
   thread->DeliverException();
 }
 
 // Called by generated call to throw an exception.
-extern "C" void artDeliverExceptionFromCode(Throwable* exception, Thread* thread, Method** sp) {
+extern "C" void artDeliverExceptionFromCode(Throwable* exception, Thread* thread, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   /*
    * exception may be NULL, in which case this routine should
    * throw NPE.  NOTE: this is a convenience for generated code,
@@ -47,7 +49,8 @@
 }
 
 // Called by generated call to throw a NPE exception.
-extern "C" void artThrowNullPointerExceptionFromCode(Thread* self, Method** sp) {
+extern "C" void artThrowNullPointerExceptionFromCode(Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   uint32_t dex_pc;
   Method* throw_method = self->GetCurrentMethod(&dex_pc);
@@ -56,21 +59,24 @@
 }
 
 // Called by generated call to throw an arithmetic divide by zero exception.
-extern "C" void artThrowDivZeroFromCode(Thread* thread, Method** sp) {
+extern "C" void artThrowDivZeroFromCode(Thread* thread, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(thread, sp, Runtime::kSaveAll);
   thread->ThrowNewException("Ljava/lang/ArithmeticException;", "divide by zero");
   thread->DeliverException();
 }
 
 // Called by generated call to throw an array index out of bounds exception.
-extern "C" void artThrowArrayBoundsFromCode(int index, int limit, Thread* thread, Method** sp) {
+extern "C" void artThrowArrayBoundsFromCode(int index, int limit, Thread* thread, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(thread, sp, Runtime::kSaveAll);
   thread->ThrowNewExceptionF("Ljava/lang/ArrayIndexOutOfBoundsException;",
                              "length=%d; index=%d", limit, index);
   thread->DeliverException();
 }
 
-extern "C" void artThrowStackOverflowFromCode(Thread* thread, Method** sp) {
+extern "C" void artThrowStackOverflowFromCode(Thread* thread, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(thread, sp, Runtime::kSaveAll);
   // Remove extra entry pushed onto second stack during method tracing.
   if (Runtime::Current()->IsMethodTracingActive()) {
@@ -83,7 +89,8 @@
   thread->DeliverException();
 }
 
-extern "C" void artThrowNoSuchMethodFromCode(int32_t method_idx, Thread* self, Method** sp) {
+extern "C" void artThrowNoSuchMethodFromCode(int32_t method_idx, Thread* self, Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   Method* method = self->GetCurrentMethod();
   self->ThrowNewException("Ljava/lang/NoSuchMethodError;",
@@ -91,7 +98,9 @@
   self->DeliverException();
 }
 
-extern "C" void artThrowVerificationErrorFromCode(int32_t kind, int32_t ref, Thread* self, Method** sp) {
+extern "C" void artThrowVerificationErrorFromCode(int32_t kind, int32_t ref, Thread* self,
+                                                  Method** sp)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
   Method* method = self->GetCurrentMethod();
   ThrowVerificationError(self, method, kind, ref);
diff --git a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
index a28a898..e52569d 100644
--- a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
+++ b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
@@ -59,8 +59,17 @@
 extern "C" void art_handle_fill_data_from_code(void*, void*);
 
 // JNI entrypoints.
-extern Object* DecodeJObjectInThread(Thread* thread, jobject obj);
 extern void* FindNativeMethod(Thread* thread);
+extern uint32_t JniMethodStart(Thread* self);
+extern uint32_t JniMethodStartSynchronized(jobject to_lock, Thread* self);
+extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self);
+extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
+                                     Thread* self);
+extern Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
+                                         Thread* self);
+extern Object* JniMethodEndWithReferenceSynchronized(jobject result,
+                                                     uint32_t saved_local_ref_cookie,
+                                                     jobject locked, Thread* self);
 
 // Lock entrypoints.
 extern "C" void art_lock_object_from_code(void*);
@@ -153,8 +162,13 @@
   points->pHandleFillArrayDataFromCode = art_handle_fill_data_from_code;
 
   // JNI
-  points->pDecodeJObjectInThread = DecodeJObjectInThread;
   points->pFindNativeMethod = FindNativeMethod;
+  points->pJniMethodStart = JniMethodStart;
+  points->pJniMethodStartSynchronized = JniMethodStartSynchronized;
+  points->pJniMethodEnd = JniMethodEnd;
+  points->pJniMethodEndSynchronized = JniMethodEndSynchronized;
+  points->pJniMethodEndWithReference = JniMethodEndWithReference;
+  points->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
 
   // Locks
   points->pLockObjectFromCode = art_lock_object_from_code;
diff --git a/src/oat/runtime/x86/stub_x86.cc b/src/oat/runtime/x86/stub_x86.cc
index a9db314..74e0f39 100644
--- a/src/oat/runtime/x86/stub_x86.cc
+++ b/src/oat/runtime/x86/stub_x86.cc
@@ -16,6 +16,7 @@
 
 #include "jni_internal.h"
 #include "oat/runtime/oat_support_entrypoints.h"
+#include "oat/runtime/stub.h"
 #include "oat/utils/x86/assembler_x86.h"
 #include "object.h"
 #include "stack_indirect_reference_table.h"
diff --git a/src/oat/utils/arm/assembler_arm.cc b/src/oat/utils/arm/assembler_arm.cc
index 55b6187..de665dd 100644
--- a/src/oat/utils/arm/assembler_arm.cc
+++ b/src/oat/utils/arm/assembler_arm.cc
@@ -1440,10 +1440,9 @@
                               const std::vector<ManagedRegister>& callee_save_regs,
                               const std::vector<ManagedRegister>& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  DCHECK_EQ(entry_spills.size(), 0u);
   CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
 
-  // Push callee saves and link register
+  // Push callee saves and link register.
   RegList push_list = 1 << LR;
   size_t pushed_values = 1;
   for (size_t i = 0; i < callee_save_regs.size(); i++) {
@@ -1453,13 +1452,19 @@
   }
   PushList(push_list);
 
-  // Increase frame to required size
+  // Increase frame to required size.
   CHECK_GT(frame_size, pushed_values * kPointerSize);  // Must be at least space to push Method*
   size_t adjust = frame_size - (pushed_values * kPointerSize);
   IncreaseFrameSize(adjust);
 
-  // Write out Method*
+  // Write out Method*.
   StoreToOffset(kStoreWord, R0, SP, 0);
+
+  // Write out entry spills.
+  for (size_t i = 0; i < entry_spills.size(); ++i) {
+    Register reg = entry_spills.at(i).AsArm().AsCoreRegister();
+    StoreToOffset(kStoreWord, reg, SP, frame_size + kPointerSize + (i * kPointerSize));
+  }
 }
 
 void ArmAssembler::RemoveFrame(size_t frame_size,
@@ -1891,9 +1896,9 @@
 #undef __
 }
 
-void ArmAssembler::ExceptionPoll(ManagedRegister mscratch) {
+void ArmAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
   ArmManagedRegister scratch = mscratch.AsArm();
-  ArmExceptionSlowPath* slow = new ArmExceptionSlowPath(scratch);
+  ArmExceptionSlowPath* slow = new ArmExceptionSlowPath(scratch, stack_adjust);
   buffer_.EnqueueSlowPath(slow);
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  TR, Thread::ExceptionOffset().Int32Value());
@@ -1905,7 +1910,9 @@
   ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm);
 #define __ sp_asm->
   __ Bind(&entry_);
-
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    __ DecreaseFrameSize(stack_adjust_);
+  }
   // Pass exception object as argument
   // Don't care about preserving R0 as this call won't return
   __ mov(R0, ShifterOperand(scratch_.AsCoreRegister()));
diff --git a/src/oat/utils/arm/assembler_arm.h b/src/oat/utils/arm/assembler_arm.h
index edfaf30..2410bac 100644
--- a/src/oat/utils/arm/assembler_arm.h
+++ b/src/oat/utils/arm/assembler_arm.h
@@ -564,7 +564,7 @@
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  virtual void ExceptionPoll(ManagedRegister scratch);
+  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust);
 
  private:
   void EmitType01(Condition cond,
@@ -650,10 +650,13 @@
 // Slowpath entered when Thread::Current()->_exception is non-null
 class ArmExceptionSlowPath : public SlowPath {
  public:
-  explicit ArmExceptionSlowPath(ArmManagedRegister scratch) : scratch_(scratch) {}
+  explicit ArmExceptionSlowPath(ArmManagedRegister scratch, size_t stack_adjust)
+      : scratch_(scratch), stack_adjust_(stack_adjust) {
+  }
   virtual void Emit(Assembler *sp_asm);
  private:
   const ArmManagedRegister scratch_;
+  const size_t stack_adjust_;
 };
 
 // Slowpath entered when Thread::Current()->_suspend_count is non-zero
diff --git a/src/oat/utils/assembler.h b/src/oat/utils/assembler.h
index dabd321..68108e7 100644
--- a/src/oat/utils/assembler.h
+++ b/src/oat/utils/assembler.h
@@ -446,7 +446,7 @@
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  virtual void ExceptionPoll(ManagedRegister scratch) = 0;
+  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
 
   virtual ~Assembler() {}
 
diff --git a/src/oat/utils/x86/assembler_x86.cc b/src/oat/utils/x86/assembler_x86.cc
index b7f0c1f..78f2b57 100644
--- a/src/oat/utils/x86/assembler_x86.cc
+++ b/src/oat/utils/x86/assembler_x86.cc
@@ -1862,8 +1862,8 @@
 #undef __
 }
 
-void X86Assembler::ExceptionPoll(ManagedRegister /*scratch*/) {
-  X86ExceptionSlowPath* slow = new X86ExceptionSlowPath();
+void X86Assembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+  X86ExceptionSlowPath* slow = new X86ExceptionSlowPath(stack_adjust);
   buffer_.EnqueueSlowPath(slow);
   fs()->cmpl(Address::Absolute(Thread::ExceptionOffset()), Immediate(0));
   j(kNotEqual, slow->Entry());
@@ -1874,6 +1874,9 @@
 #define __ sp_asm->
   __ Bind(&entry_);
   // Note: the return value is dead
+  if (stack_adjust_ != 0) {  // Fix up the frame.
+    __ DecreaseFrameSize(stack_adjust_);
+  }
   // Pass exception as argument in EAX
   __ fs()->movl(EAX, Address::Absolute(Thread::ExceptionOffset()));
   __ fs()->call(Address::Absolute(ENTRYPOINT_OFFSET(pDeliverException)));
diff --git a/src/oat/utils/x86/assembler_x86.h b/src/oat/utils/x86/assembler_x86.h
index c8edf44..7291211 100644
--- a/src/oat/utils/x86/assembler_x86.h
+++ b/src/oat/utils/x86/assembler_x86.h
@@ -598,7 +598,7 @@
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  virtual void ExceptionPoll(ManagedRegister scratch);
+  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust);
 
  private:
   inline void EmitUint8(uint8_t value);
@@ -650,8 +650,10 @@
 // Slowpath entered when Thread::Current()->_exception is non-null
 class X86ExceptionSlowPath : public SlowPath {
  public:
-  X86ExceptionSlowPath() {}
+  X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {}
   virtual void Emit(Assembler *sp_asm);
+ private:
+  const size_t stack_adjust_;
 };
 
 // Slowpath entered when Thread::Current()->_suspend_count is non-zero
diff --git a/src/oat_compilation_unit.h b/src/oat_compilation_unit.h
index 41c1847..97815ac 100644
--- a/src/oat_compilation_unit.h
+++ b/src/oat_compilation_unit.h
@@ -30,23 +30,19 @@
 
 class OatCompilationUnit {
  public:
-  OatCompilationUnit(ClassLoader* class_loader, ClassLinker* class_linker,
-                     const DexFile& dex_file, DexCache& dex_cache,
-                     const DexFile::CodeItem* code_item,
-                     uint32_t method_idx, uint32_t access_flags)
-      : class_loader_(class_loader), class_linker_(class_linker),
-        dex_file_(&dex_file), dex_cache_(&dex_cache), code_item_(code_item),
-        method_idx_(method_idx), access_flags_(access_flags) {
+  OatCompilationUnit(jobject class_loader, ClassLinker* class_linker, const DexFile& dex_file,
+                     const DexFile::CodeItem* code_item, uint32_t method_idx, uint32_t access_flags)
+      : class_loader_(class_loader), class_linker_(class_linker), dex_file_(&dex_file),
+        code_item_(code_item), method_idx_(method_idx), access_flags_(access_flags) {
   }
 
   OatCompilationUnit* GetCallee(uint32_t callee_method_idx,
                                 uint32_t callee_access_flags) {
-    return new OatCompilationUnit(class_loader_, class_linker_, *dex_file_,
-                                  *dex_cache_, NULL, callee_method_idx,
-                                  callee_access_flags);
+    return new OatCompilationUnit(class_loader_, class_linker_, *dex_file_, NULL,
+                                  callee_method_idx, callee_access_flags);
   }
 
-  ClassLoader* GetClassLoader() const {
+  jobject GetClassLoader() const {
     return class_loader_;
   }
 
@@ -58,10 +54,6 @@
     return dex_file_;
   }
 
-  DexCache* GetDexCache() const {
-    return dex_cache_;
-  }
-
   uint32_t GetDexMethodIndex() const {
     return method_idx_;
   }
@@ -85,15 +77,14 @@
   }
 
  public:
-  ClassLoader* class_loader_;
-  ClassLinker* class_linker_;
+  jobject class_loader_;
+  ClassLinker* const class_linker_;
 
-  const DexFile* dex_file_;
-  DexCache* dex_cache_;
+  const DexFile* const dex_file_;
 
-  const DexFile::CodeItem* code_item_;
-  uint32_t method_idx_;
-  uint32_t access_flags_;
+  const DexFile::CodeItem* const code_item_;
+  const uint32_t method_idx_;
+  const uint32_t access_flags_;
 };
 
 } // namespace art
diff --git a/src/oat_test.cc b/src/oat_test.cc
index dae61bb..288854b 100644
--- a/src/oat_test.cc
+++ b/src/oat_test.cc
@@ -25,7 +25,8 @@
  protected:
   void CheckMethod(Method* method,
                    const OatFile::OatMethod& oat_method,
-                   const DexFile* dex_file) {
+                   const DexFile* dex_file)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const CompiledMethod* compiled_method =
         compiler_->GetCompiledMethod(Compiler::MethodReference(dex_file,
                                                                method->GetDexMethodIndex()));
@@ -62,15 +63,16 @@
   const bool compile = false;  // DISABLED_ due to the time to compile libcore
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
-  SirtRef<ClassLoader> class_loader(NULL);
+  jobject class_loader = NULL;
   if (compile) {
     compiler_.reset(new Compiler(kThumb2, false, 2, false, NULL, true, true));
-    compiler_->CompileAll(class_loader.get(), class_linker->GetBootClassPath());
+    compiler_->CompileAll(class_loader, class_linker->GetBootClassPath());
   }
 
+  ScopedObjectAccess soa(Thread::Current());
   ScratchFile tmp;
   bool success = OatWriter::Create(tmp.GetFile(),
-                                   class_loader.get(),
+                                   class_loader,
                                    class_linker->GetBootClassPath(),
                                    42U,
                                    "lue.art",
@@ -78,7 +80,7 @@
   ASSERT_TRUE(success);
 
   if (compile) {  // OatWriter strips the code, regenerate to compare
-    compiler_->CompileAll(class_loader.get(), class_linker->GetBootClassPath());
+    compiler_->CompileAll(class_loader, class_linker->GetBootClassPath());
   }
   UniquePtr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(),
                                             tmp.GetFilename(),
@@ -105,7 +107,7 @@
 
     UniquePtr<const OatFile::OatClass> oat_class(oat_dex_file->GetOatClass(i));
 
-    Class* klass = class_linker->FindClass(descriptor, class_loader.get());
+    Class* klass = class_linker->FindClass(descriptor, NULL);
 
     size_t method_index = 0;
     for (size_t i = 0; i < klass->NumDirectMethods(); i++, method_index++) {
diff --git a/src/oat_writer.cc b/src/oat_writer.cc
index 013a561..0546f2b 100644
--- a/src/oat_writer.cc
+++ b/src/oat_writer.cc
@@ -23,13 +23,14 @@
 #include "file.h"
 #include "os.h"
 #include "safe_map.h"
+#include "scoped_thread_state_change.h"
 #include "space.h"
 #include "stl_util.h"
 
 namespace art {
 
 bool OatWriter::Create(File* file,
-                       ClassLoader* class_loader,
+                       jobject class_loader,
                        const std::vector<const DexFile*>& dex_files,
                        uint32_t image_file_location_checksum,
                        const std::string& image_file_location,
@@ -45,7 +46,7 @@
 OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
                      uint32_t image_file_location_checksum,
                      const std::string& image_file_location,
-                     ClassLoader* class_loader,
+                     jobject class_loader,
                      const Compiler& compiler) {
   compiler_ = &compiler;
   class_loader_ = class_loader;
@@ -380,8 +381,10 @@
   if (compiler_->IsImage()) {
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
     DexCache* dex_cache = linker->FindDexCache(*dex_file);
-    Method* method = linker->ResolveMethod(*dex_file, method_idx, dex_cache, class_loader_,
-                                           is_direct);
+    // Unchecked as we hold mutator_lock_ on entry.
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    Method* method = linker->ResolveMethod(*dex_file, method_idx, dex_cache,
+                                           soa.Decode<ClassLoader*>(class_loader_), is_direct);
     CHECK(method != NULL);
     method->SetFrameSizeInBytes(frame_size_in_bytes);
     method->SetCoreSpillMask(core_spill_mask);
diff --git a/src/oat_writer.h b/src/oat_writer.h
index b748dbc..60a79a2 100644
--- a/src/oat_writer.h
+++ b/src/oat_writer.h
@@ -64,18 +64,19 @@
  public:
   // Write an oat file. Returns true on success, false on failure.
   static bool Create(File* file,
-                     ClassLoader* class_loader,
+                     jobject class_loader,
                      const std::vector<const DexFile*>& dex_files,
                      uint32_t image_file_location_checksum,
                      const std::string& image_file_location,
-                     const Compiler& compiler);
+                     const Compiler& compiler)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
  private:
   OatWriter(const std::vector<const DexFile*>& dex_files,
             uint32_t image_file_location_checksum,
             const std::string& image_file_location,
-            ClassLoader* class_loader,
-            const Compiler& compiler);
+            jobject class_loader,
+            const Compiler& compiler) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   ~OatWriter();
 
   size_t InitOatHeader();
@@ -83,17 +84,21 @@
   size_t InitDexFiles(size_t offset);
   size_t InitOatClasses(size_t offset);
   size_t InitOatCode(size_t offset);
-  size_t InitOatCodeDexFiles(size_t offset);
+  size_t InitOatCodeDexFiles(size_t offset)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   size_t InitOatCodeDexFile(size_t offset,
                             size_t& oat_class_index,
-                            const DexFile& dex_file);
+                            const DexFile& dex_file)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   size_t InitOatCodeClassDef(size_t offset,
                              size_t oat_class_index, size_t class_def_index,
                              const DexFile& dex_file,
-                             const DexFile::ClassDef& class_def);
+                             const DexFile::ClassDef& class_def)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   size_t InitOatCodeMethod(size_t offset, size_t oat_class_index, size_t class_def_index,
                            size_t class_def_method_index, bool is_native, bool is_static,
-                           bool is_direct, uint32_t method_idx, const DexFile*);
+                           bool is_direct, uint32_t method_idx, const DexFile*)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool Write(File* file);
   bool WriteTables(File* file);
@@ -146,7 +151,7 @@
   const Compiler* compiler_;
 
   // TODO: remove the ClassLoader when the code storage moves out of Method
-  ClassLoader* class_loader_;
+  jobject class_loader_;
 
   // note OatFile does not take ownership of the DexFiles
   const std::vector<const DexFile*>* dex_files_;
diff --git a/src/oatdump.cc b/src/oatdump.cc
index b1aa47e..2fc728e 100644
--- a/src/oatdump.cc
+++ b/src/oatdump.cc
@@ -32,6 +32,7 @@
 #include "os.h"
 #include "runtime.h"
 #include "safe_map.h"
+#include "scoped_thread_state_change.h"
 #include "space.h"
 #include "stringpiece.h"
 #include "verifier/gc_map.h"
@@ -155,7 +156,7 @@
     return oat_file_.GetOatHeader().GetInstructionSet();
   }
 
-  const void* GetOatCode(Method* m) {
+  const void* GetOatCode(Method* m) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     MethodHelper mh(m);
     for (size_t i = 0; i < oat_dex_files_.size(); i++) {
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
@@ -486,7 +487,7 @@
       : os_(os), image_filename_(image_filename), host_prefix_(host_prefix),
         image_space_(image_space), image_header_(image_header) {}
 
-  void Dump() {
+  void Dump() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     os_ << "MAGIC:\n";
     os_ << image_header_.GetMagic() << "\n\n";
 
@@ -578,7 +579,8 @@
   }
 
  private:
-  static void PrettyObjectValue(std::string& summary, Class* type, Object* value) {
+  static void PrettyObjectValue(std::string& summary, Class* type, Object* value)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK(type != NULL);
     if (value == NULL) {
       StringAppendF(&summary, "null   %s\n", PrettyDescriptor(type).c_str());
@@ -599,7 +601,8 @@
     }
   }
 
-  static void PrintField(std::string& summary, Field* field, Object* obj) {
+  static void PrintField(std::string& summary, Field* field, Object* obj)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     FieldHelper fh(field);
     Class* type = fh.GetType();
     StringAppendF(&summary, "\t%s: ", fh.GetName());
@@ -617,7 +620,8 @@
     }
   }
 
-  static void DumpFields(std::string& summary, Object* obj, Class* klass) {
+  static void DumpFields(std::string& summary, Object* obj, Class* klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Class* super = klass->GetSuperClass();
     if (super != NULL) {
       DumpFields(summary, obj, super);
@@ -635,7 +639,8 @@
     return image_space_.Contains(object);
   }
 
-  const void* GetOatCodeBegin(Method* m) {
+  const void* GetOatCodeBegin(Method* m)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Runtime* runtime = Runtime::Current();
     const void* code = m->GetCode();
     if (code == runtime->GetResolutionStubArray(Runtime::kStaticMethod)->GetData()) {
@@ -647,7 +652,8 @@
     return code;
   }
 
-  uint32_t GetOatCodeSize(Method* m) {
+  uint32_t GetOatCodeSize(Method* m)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const uint32_t* oat_code_begin = reinterpret_cast<const uint32_t*>(GetOatCodeBegin(m));
     if (oat_code_begin == NULL) {
       return 0;
@@ -655,7 +661,8 @@
     return oat_code_begin[-1];
   }
 
-  const void* GetOatCodeEnd(Method* m) {
+  const void* GetOatCodeEnd(Method* m)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const uint8_t* oat_code_begin = reinterpret_cast<const uint8_t*>(GetOatCodeBegin(m));
     if (oat_code_begin == NULL) {
       return NULL;
@@ -663,7 +670,8 @@
     return oat_code_begin + GetOatCodeSize(m);
   }
 
-  static void Callback(Object* obj, void* arg) {
+  static void Callback(Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(obj != NULL);
     DCHECK(arg != NULL);
     ImageDumper* state = reinterpret_cast<ImageDumper*>(arg);
@@ -921,7 +929,8 @@
       method_outlier.push_back(method);
     }
 
-    void DumpOutliers(std::ostream& os) {
+    void DumpOutliers(std::ostream& os)
+        SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
       size_t sum_of_sizes = 0;
       size_t sum_of_sizes_squared = 0;
       size_t sum_of_expansion = 0;
@@ -1021,7 +1030,7 @@
       os << "\n" << std::flush;
     }
 
-    void Dump(std::ostream& os) {
+    void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
       os << "\tart_file_bytes = " << PrettySize(file_bytes) << "\n\n"
          << "\tart_file_bytes = header_bytes + object_bytes + alignment_bytes\n"
          << StringPrintf("\theader_bytes    =  %8zd (%2.0f%% of art file bytes)\n"
@@ -1197,11 +1206,15 @@
     options.push_back(std::make_pair("host-prefix", host_prefix->c_str()));
   }
 
-  UniquePtr<Runtime> runtime(Runtime::Create(options, false));
-  if (runtime.get() == NULL) {
+  if (!Runtime::Create(options, false)) {
     fprintf(stderr, "Failed to create runtime\n");
     return EXIT_FAILURE;
   }
+  UniquePtr<Runtime> runtime(Runtime::Current());
+  // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start,
+  // give it away now and then switch to a more managable ScopedObjectAccess.
+  Thread::Current()->TransitionFromRunnableToSuspended(kNative);
+  ScopedObjectAccess soa(Thread::Current());
 
   Heap* heap = Runtime::Current()->GetHeap();
   ImageSpace* image_space = heap->GetImageSpace();
diff --git a/src/object.cc b/src/object.cc
index ecaef3b..dd984fc 100644
--- a/src/object.cc
+++ b/src/object.cc
@@ -425,7 +425,8 @@
   return result;
 }
 
-static const void* GetOatCode(const Method* m) {
+static const void* GetOatCode(const Method* m)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Runtime* runtime = Runtime::Current();
   const void* code = m->GetCode();
   // Peel off any method tracing trampoline.
@@ -513,10 +514,13 @@
 }
 
 void Method::Invoke(Thread* self, Object* receiver, JValue* args, JValue* result) const {
-  // Push a transition back into managed code onto the linked list in thread.
-  CHECK_EQ(kRunnable, self->GetState());
-  self->AssertThreadSuspensionIsAllowable();
+  if (kIsDebugBuild) {
+    self->AssertThreadSuspensionIsAllowable();
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_EQ(kRunnable, self->GetState());
+  }
 
+  // Push a transition back into managed code onto the linked list in thread.
   ManagedStack fragment;
   self->PushManagedStackFragment(&fragment);
 
@@ -1219,6 +1223,7 @@
 }
 
 Array* Array::Alloc(Class* array_class, int32_t component_count) {
+  DCHECK(array_class->IsArrayClass());
   return Alloc(array_class, component_count, array_class->GetComponentSize());
 }
 
diff --git a/src/object.h b/src/object.h
index 0fc6acc..03ed132 100644
--- a/src/object.h
+++ b/src/object.h
@@ -209,35 +209,48 @@
 
   void SetClass(Class* new_klass);
 
-  bool InstanceOf(const Class* klass) const;
+  bool InstanceOf(const Class* klass) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  size_t SizeOf() const;
+  size_t SizeOf() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Object* Clone();
+  Object* Clone() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+
+  int32_t IdentityHashCode() const {
+  #ifdef MOVING_GARBAGE_COLLECTOR
+    // TODO: we'll need to use the Object's internal concept of identity
+      UNIMPLEMENTED(FATAL);
+  #endif
+    return reinterpret_cast<int32_t>(this);
+  }
 
   static MemberOffset MonitorOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Object, monitor_);
   }
 
   volatile int32_t* GetRawLockWordAddress() {
-    byte* raw_addr = reinterpret_cast<byte*>(this) + OFFSET_OF_OBJECT_MEMBER(Object, monitor_).Int32Value();
+    byte* raw_addr = reinterpret_cast<byte*>(this) +
+        OFFSET_OF_OBJECT_MEMBER(Object, monitor_).Int32Value();
     int32_t* word_addr = reinterpret_cast<int32_t*>(raw_addr);
     return const_cast<volatile int32_t*>(word_addr);
   }
 
   uint32_t GetThinLockId();
 
-  void MonitorEnter(Thread* thread);
+  void MonitorEnter(Thread* thread) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      EXCLUSIVE_LOCK_FUNCTION(monitor_lock_);
 
-  bool MonitorExit(Thread* thread);
+  bool MonitorExit(Thread* thread) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      UNLOCK_FUNCTION(monitor_lock_);
 
-  void Notify();
+  void Notify() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void NotifyAll();
+  void NotifyAll() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void Wait(int64_t timeout);
+  void Wait(int64_t timeout) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void Wait(int64_t timeout, int32_t nanos);
+  void Wait(int64_t timeout, int32_t nanos)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool IsClass() const;
 
@@ -285,14 +298,14 @@
     return down_cast<const Method*>(this);
   }
 
-  bool IsField() const;
+  bool IsField() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Field* AsField() {
+  Field* AsField() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(IsField());
     return down_cast<Field*>(this);
   }
 
-  const Field* AsField() const {
+  const Field* AsField() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(IsField());
     return down_cast<const Field*>(this);
   }
@@ -403,16 +416,6 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(Object);
 };
 
-struct ObjectIdentityHash {
-  size_t operator()(const Object* const& obj) const {
-#ifdef MOVING_GARBAGE_COLLECTOR
-  // TODO: we'll need to use the Object's internal concept of identity
-    UNIMPLEMENTED(FATAL);
-#endif
-    return reinterpret_cast<size_t>(obj);
-  }
-};
-
 // C++ mirror of java.lang.reflect.Field
 class MANAGED Field : public Object {
  public:
@@ -458,32 +461,56 @@
   void SetOffset(MemberOffset num_bytes);
 
   // field access, null object for static fields
-  bool GetBoolean(const Object* object) const;
-  void SetBoolean(Object* object, bool z) const;
-  int8_t GetByte(const Object* object) const;
-  void SetByte(Object* object, int8_t b) const;
-  uint16_t GetChar(const Object* object) const;
-  void SetChar(Object* object, uint16_t c) const;
-  int16_t GetShort(const Object* object) const;
-  void SetShort(Object* object, int16_t s) const;
-  int32_t GetInt(const Object* object) const;
-  void SetInt(Object* object, int32_t i) const;
-  int64_t GetLong(const Object* object) const;
-  void SetLong(Object* object, int64_t j) const;
-  float GetFloat(const Object* object) const;
-  void SetFloat(Object* object, float f) const;
-  double GetDouble(const Object* object) const;
-  void SetDouble(Object* object, double d) const;
-  Object* GetObject(const Object* object) const;
-  void SetObject(Object* object, const Object* l) const;
+  bool GetBoolean(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetBoolean(Object* object, bool z) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  int8_t GetByte(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetByte(Object* object, int8_t b) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  uint16_t GetChar(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetChar(Object* object, uint16_t c) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  int16_t GetShort(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetShort(Object* object, int16_t s) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  int32_t GetInt(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetInt(Object* object, int32_t i) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  int64_t GetLong(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetLong(Object* object, int64_t j) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  float GetFloat(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetFloat(Object* object, float f) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  double GetDouble(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetDouble(Object* object, double d) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  Object* GetObject(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetObject(Object* object, const Object* l) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // raw field accesses
-  uint32_t Get32(const Object* object) const;
-  void Set32(Object* object, uint32_t new_value) const;
-  uint64_t Get64(const Object* object) const;
-  void Set64(Object* object, uint64_t new_value) const;
-  Object* GetObj(const Object* object) const;
-  void SetObj(Object* object, const Object* new_value) const;
+  uint32_t Get32(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void Set32(Object* object, uint32_t new_value) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  uint64_t Get64(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void Set64(Object* object, uint64_t new_value) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  Object* GetObj(const Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void SetObj(Object* object, const Object* new_value) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static Class* GetJavaLangReflectField() {
     DCHECK(java_lang_reflect_Field_ != NULL);
@@ -658,9 +685,10 @@
   void SetDexCacheInitializedStaticStorage(ObjectArray<StaticStorageBase>* new_value);
 
   // Find the method that this method overrides
-  Method* FindOverriddenMethod() const;
+  Method* FindOverriddenMethod() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void Invoke(Thread* self, Object* receiver, JValue* args, JValue* result) const;
+  void Invoke(Thread* self, Object* receiver, JValue* args, JValue* result) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   const void* GetCode() const {
     return GetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(Method, code_), false);
@@ -670,7 +698,7 @@
     SetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(Method, code_), code, false);
   }
 
-  uint32_t GetCodeSize() const {
+  uint32_t GetCodeSize() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(!IsRuntimeMethod() && !IsProxyMethod()) << PrettyMethod(this);
     uintptr_t code = reinterpret_cast<uintptr_t>(GetCode());
     if (code == 0) {
@@ -681,7 +709,8 @@
     return reinterpret_cast<uint32_t*>(code)[-1];
   }
 
-  bool IsWithinCode(uintptr_t pc) const {
+  bool IsWithinCode(uintptr_t pc) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     uintptr_t code = reinterpret_cast<uintptr_t>(GetCode());
     if (code == 0) {
       return pc == 0;
@@ -689,7 +718,8 @@
     return (code <= pc && pc < code + GetCodeSize());
   }
 
-  void AssertPcIsWithinCode(uintptr_t pc) const;
+  void AssertPcIsWithinCode(uintptr_t pc) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   uint32_t GetOatCodeOffset() const {
     DCHECK(!Runtime::Current()->IsStarted());
@@ -813,9 +843,10 @@
 
   bool IsRegistered() const;
 
-  void RegisterNative(Thread* self, const void* native_method);
+  void RegisterNative(Thread* self, const void* native_method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void UnregisterNative(Thread* self);
+  void UnregisterNative(Thread* self) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static MemberOffset NativeMethodOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Method, native_method_);
@@ -915,14 +946,17 @@
 
   // Converts a native PC to a dex PC.  TODO: this is a no-op
   // until we associate a PC mapping table with each method.
-  uint32_t ToDexPC(const uintptr_t pc) const;
+  uint32_t ToDexPC(const uintptr_t pc) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Converts a dex PC to a native PC.  TODO: this is a no-op
   // until we associate a PC mapping table with each method.
-  uintptr_t ToNativePC(const uint32_t dex_pc) const;
+  uintptr_t ToNativePC(const uint32_t dex_pc) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Find the catch block for the given exception type and dex_pc
-  uint32_t FindCatchBlock(Class* exception_type, uint32_t dex_pc) const;
+  uint32_t FindCatchBlock(Class* exception_type, uint32_t dex_pc) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static void SetClasses(Class* java_lang_reflect_Constructor, Class* java_lang_reflect_Method);
 
@@ -1012,9 +1046,11 @@
  public:
   // A convenience for code that doesn't know the component size,
   // and doesn't want to have to work it out itself.
-  static Array* Alloc(Class* array_class, int32_t component_count);
+  static Array* Alloc(Class* array_class, int32_t component_count)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static Array* Alloc(Class* array_class, int32_t component_count, size_t component_size);
+  static Array* Alloc(Class* array_class, int32_t component_count, size_t component_size)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   size_t SizeOf() const;
 
@@ -1051,7 +1087,8 @@
   }
 
  protected:
-  bool IsValidIndex(int32_t index) const {
+  bool IsValidIndex(int32_t index) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (UNLIKELY(index < 0 || index >= length_)) {
       return ThrowArrayIndexOutOfBoundsException(index);
     }
@@ -1059,8 +1096,10 @@
   }
 
  protected:
-  bool ThrowArrayIndexOutOfBoundsException(int32_t index) const;
-  bool ThrowArrayStoreException(Object* object) const;
+  bool ThrowArrayIndexOutOfBoundsException(int32_t index) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  bool ThrowArrayStoreException(Object* object) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
  private:
   // The number of array elements.
@@ -1074,23 +1113,27 @@
 template<class T>
 class MANAGED ObjectArray : public Array {
  public:
-  static ObjectArray<T>* Alloc(Class* object_array_class, int32_t length);
+  static ObjectArray<T>* Alloc(Class* object_array_class, int32_t length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  T* Get(int32_t i) const;
+  T* Get(int32_t i) const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void Set(int32_t i, T* object);
+  void Set(int32_t i, T* object) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Set element without bound and element type checks, to be used in limited
   // circumstances, such as during boot image writing
-  void SetWithoutChecks(int32_t i, T* object);
+  void SetWithoutChecks(int32_t i, T* object)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  T* GetWithoutChecks(int32_t i) const;
+  T* GetWithoutChecks(int32_t i) const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static void Copy(const ObjectArray<T>* src, int src_pos,
                    ObjectArray<T>* dst, int dst_pos,
-                   size_t length);
+                   size_t length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  ObjectArray<T>* CopyOf(int32_t new_length);
+  ObjectArray<T>* CopyOf(int32_t new_length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
  private:
   DISALLOW_IMPLICIT_CONSTRUCTORS(ObjectArray);
@@ -1181,7 +1224,7 @@
     return static_cast<Status>(GetField32(OFFSET_OF_OBJECT_MEMBER(Class, status_), false));
   }
 
-  void SetStatus(Status new_status);
+  void SetStatus(Status new_status) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Returns true if the class has failed to link.
   bool IsErroneous() const {
@@ -1291,7 +1334,8 @@
 
   String* GetName() const; // Returns the cached name
   void SetName(String* name);  // Sets the cached name
-  String* ComputeName();  // Computes the name, then sets the cached value
+  String* ComputeName()  // Computes the name, then sets the cached value
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool IsProxyClass() const {
     // Read access flags without using getter as whether something is a proxy can be check in
@@ -1370,7 +1414,7 @@
 
   bool IsStringClass() const;
 
-  bool IsThrowableClass() const;
+  bool IsThrowableClass() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   Class* GetComponentType() const {
     return GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(Class, component_type_), false);
@@ -1394,7 +1438,7 @@
   }
 
   // Creates a raw object instance but does not invoke the default constructor.
-  Object* AllocObject();
+  Object* AllocObject() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool IsVariableSize() const {
     // Classes and arrays vary in size, and so the object_size_ field cannot
@@ -1412,9 +1456,10 @@
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, class_size_), false);
   }
 
-  void SetClassSize(size_t new_class_size);
+  void SetClassSize(size_t new_class_size)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  size_t GetObjectSize() const {
+  size_t GetObjectSize() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK(!IsVariableSize()) << " class=" << PrettyTypeOf(this);
     DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     size_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, object_size_), false);
@@ -1429,19 +1474,21 @@
   }
 
   // Returns true if this class is in the same packages as that class.
-  bool IsInSamePackage(const Class* that) const;
+  bool IsInSamePackage(const Class* that) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static bool IsInSamePackage(const StringPiece& descriptor1, const StringPiece& descriptor2);
 
   // Returns true if this class can access that class.
-  bool CanAccess(Class* that) const {
+  bool CanAccess(Class* that) const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return that->IsPublic() || this->IsInSamePackage(that);
   }
 
   // Can this class access a member in the provided class with the provided member access flags?
   // Note that access to the class isn't checked in case the declaring class is protected and the
   // method has been exposed by a public sub-class
-  bool CanAccessMember(Class* access_to, uint32_t member_flags) const {
+  bool CanAccessMember(Class* access_to, uint32_t member_flags) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     // Classes can access all of their own members
     if (this == access_to) {
       return true;
@@ -1464,14 +1511,16 @@
     return this->IsInSamePackage(access_to);
   }
 
-  bool IsSubClass(const Class* klass) const;
+  bool IsSubClass(const Class* klass) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Can src be assigned to this class? For example, String can be assigned to Object (by an
   // upcast), however, an Object cannot be assigned to a String as a potentially exception throwing
   // downcast would be necessary. Similarly for interfaces, a class that implements (or an interface
   // that extends) another can be assigned to its parent, but not vice-versa. All Classes may assign
   // to themselves. Classes for primitive types may not assign to each other.
-  bool IsAssignableFrom(const Class* src) const {
+  bool IsAssignableFrom(const Class* src) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(src != NULL);
     if (this == src) {
       // Can always assign to things of the same type
@@ -1526,7 +1575,8 @@
     kDumpClassInitialized = (1 << 2),
   };
 
-  void DumpClass(std::ostream& os, int flags) const;
+  void DumpClass(std::ostream& os, int flags) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   DexCache* GetDexCache() const;
 
@@ -1546,11 +1596,13 @@
                    new_direct_methods, false);
   }
 
-  Method* GetDirectMethod(int32_t i) const {
+  Method* GetDirectMethod(int32_t i) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetDirectMethods()->Get(i);
   }
 
-  void SetDirectMethod(uint32_t i, Method* f) {  // TODO: uint16_t
+  void SetDirectMethod(uint32_t i, Method* f)  // TODO: uint16_t
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_){
     ObjectArray<Method>* direct_methods =
         GetFieldObject<ObjectArray<Method>*>(
             OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_), false);
@@ -1581,17 +1633,20 @@
     return (GetVirtualMethods() != NULL) ? GetVirtualMethods()->GetLength() : 0;
   }
 
-  Method* GetVirtualMethod(uint32_t i) const {
+  Method* GetVirtualMethod(uint32_t i) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(IsResolved() || IsErroneous());
     return GetVirtualMethods()->Get(i);
   }
 
-  Method* GetVirtualMethodDuringLinking(uint32_t i) const {
+  Method* GetVirtualMethodDuringLinking(uint32_t i) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(IsLoaded() || IsErroneous());
     return GetVirtualMethods()->Get(i);
   }
 
-  void SetVirtualMethod(uint32_t i, Method* f) {  // TODO: uint16_t
+  void SetVirtualMethod(uint32_t i, Method* f)  // TODO: uint16_t
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ObjectArray<Method>* virtual_methods =
         GetFieldObject<ObjectArray<Method>*>(
             OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_), false);
@@ -1619,7 +1674,8 @@
   // Given a method implemented by this class but potentially from a
   // super class, return the specific implementation
   // method for this class.
-  Method* FindVirtualMethodForVirtual(Method* method) {
+  Method* FindVirtualMethodForVirtual(Method* method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(!method->GetDeclaringClass()->IsInterface());
     // The argument method may from a super class.
     // Use the index to a potentially overridden one for this instance's class.
@@ -1629,13 +1685,17 @@
   // Given a method implemented by this class, but potentially from a
   // super class or interface, return the specific implementation
   // method for this class.
-  Method* FindVirtualMethodForInterface(Method* method);
+  Method* FindVirtualMethodForInterface(Method* method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* FindInterfaceMethod(const StringPiece& name, const StringPiece& descriptor) const;
+  Method* FindInterfaceMethod(const StringPiece& name, const StringPiece& descriptor) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const;
+  Method* FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* FindVirtualMethodForVirtualOrInterface(Method* method) {
+  Method* FindVirtualMethodForVirtualOrInterface(Method* method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (method->IsDirect()) {
       return method;
     }
@@ -1645,21 +1705,29 @@
     return FindVirtualMethodForVirtual(method);
   }
 
-  Method* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) const;
+  Method* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const;
+  Method* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* FindVirtualMethod(const StringPiece& name, const StringPiece& descriptor) const;
+  Method* FindVirtualMethod(const StringPiece& name, const StringPiece& descriptor) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const;
+  Method* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) const;
+  Method* FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const;
+  Method* FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* FindDirectMethod(const StringPiece& name, const StringPiece& signature) const;
+  Method* FindDirectMethod(const StringPiece& name, const StringPiece& signature) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const;
+  Method* FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   int32_t GetIfTableCount() const {
     ObjectArray<InterfaceEntry>* iftable = GetIfTable();
@@ -1695,12 +1763,14 @@
     return (GetIFields() != NULL) ? GetIFields()->GetLength() : 0;
   }
 
-  Field* GetInstanceField(uint32_t i) const {  // TODO: uint16_t
+  Field* GetInstanceField(uint32_t i) const  // TODO: uint16_t
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_){
     DCHECK_NE(NumInstanceFields(), 0U);
     return GetIFields()->Get(i);
   }
 
-  void SetInstanceField(uint32_t i, Field* f) {  // TODO: uint16_t
+  void SetInstanceField(uint32_t i, Field* f)  // TODO: uint16_t
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_){
     ObjectArray<Field>* ifields= GetFieldObject<ObjectArray<Field>*>(
         OFFSET_OF_OBJECT_MEMBER(Class, ifields_), false);
     ifields->Set(i, f);
@@ -1770,11 +1840,13 @@
     return (GetSFields() != NULL) ? GetSFields()->GetLength() : 0;
   }
 
-  Field* GetStaticField(uint32_t i) const {  // TODO: uint16_t
+  Field* GetStaticField(uint32_t i) const  // TODO: uint16_t
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetSFields()->Get(i);
   }
 
-  void SetStaticField(uint32_t i, Field* f) {  // TODO: uint16_t
+  void SetStaticField(uint32_t i, Field* f)  // TODO: uint16_t
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ObjectArray<Field>* sfields= GetFieldObject<ObjectArray<Field>*>(
         OFFSET_OF_OBJECT_MEMBER(Class, sfields_), false);
     sfields->Set(i, f);
@@ -1787,29 +1859,38 @@
   void SetReferenceStaticOffsets(uint32_t new_reference_offsets);
 
   // Find a static or instance field using the JLS resolution order
-  Field* FindField(const StringPiece& name, const StringPiece& type);
+  Field* FindField(const StringPiece& name, const StringPiece& type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Finds the given instance field in this class or a superclass.
-  Field* FindInstanceField(const StringPiece& name, const StringPiece& type);
+  Field* FindInstanceField(const StringPiece& name, const StringPiece& type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Finds the given instance field in this class or a superclass, only searches classes that
   // have the same dex cache.
-  Field* FindInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx);
+  Field* FindInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Field* FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type);
+  Field* FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Field* FindDeclaredInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx);
+  Field* FindDeclaredInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Finds the given static field in this class or a superclass.
-  Field* FindStaticField(const StringPiece& name, const StringPiece& type);
+  Field* FindStaticField(const StringPiece& name, const StringPiece& type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Finds the given static field in this class or superclass, only searches classes that
   // have the same dex cache.
-  Field* FindStaticField(const DexCache* dex_cache, uint32_t dex_field_idx);
+  Field* FindStaticField(const DexCache* dex_cache, uint32_t dex_field_idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Field* FindDeclaredStaticField(const StringPiece& name, const StringPiece& type);
+  Field* FindDeclaredStaticField(const StringPiece& name, const StringPiece& type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Field* FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex_field_idx);
+  Field* FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex_field_idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   pid_t GetClinitThreadId() const {
     DCHECK(IsIdxLoaded() || IsErroneous());
@@ -1834,14 +1915,18 @@
   }
 
  private:
-  void SetVerifyErrorClass(Class* klass) {
+  void SetVerifyErrorClass(Class* klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK(klass != NULL) << PrettyClass(this);
     SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), klass, false);
   }
 
-  bool Implements(const Class* klass) const;
-  bool IsArrayAssignableFromArray(const Class* klass) const;
-  bool IsAssignableFromArray(const Class* klass) const;
+  bool Implements(const Class* klass) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  bool IsArrayAssignableFromArray(const Class* klass) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  bool IsAssignableFromArray(const Class* klass) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // defining class loader, or NULL for the "bootstrap" system loader
   ClassLoader* class_loader_;
@@ -1995,8 +2080,7 @@
 
 inline bool Object::IsField() const {
   Class* java_lang_Class = klass_->klass_;
-  Class* java_lang_reflect_Field =
-      java_lang_Class->GetInstanceField(0)->GetClass();
+  Class* java_lang_reflect_Field = java_lang_Class->GetInstanceField(0)->GetClass();
   return GetClass() == java_lang_reflect_Field;
 }
 
@@ -2178,7 +2262,8 @@
  public:
   typedef T ElementType;
 
-  static PrimitiveArray<T>* Alloc(size_t length);
+  static PrimitiveArray<T>* Alloc(size_t length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   const T* GetData() const {
     intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(sizeof(T)).Int32Value();
@@ -2190,14 +2275,14 @@
     return reinterpret_cast<T*>(data);
   }
 
-  T Get(int32_t i) const {
+  T Get(int32_t i) const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (!IsValidIndex(i)) {
       return T(0);
     }
     return GetData()[i];
   }
 
-  void Set(int32_t i, T value) {
+  void Set(int32_t i, T value) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     // TODO: ArrayStoreException
     if (IsValidIndex(i)) {
       GetData()[i] = value;
@@ -2251,9 +2336,9 @@
 
   int32_t GetLength() const;
 
-  int32_t GetHashCode();
+  int32_t GetHashCode() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void ComputeHashCode() {
+  void ComputeHashCode() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     SetHashCode(ComputeUtf16Hash(GetCharArray(), GetOffset(), GetLength()));
   }
 
@@ -2261,36 +2346,44 @@
     return CountUtf8Bytes(GetCharArray()->GetData() + GetOffset(), GetLength());
   }
 
-  uint16_t CharAt(int32_t index) const;
+  uint16_t CharAt(int32_t index) const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  String* Intern();
+  String* Intern() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static String* AllocFromUtf16(int32_t utf16_length,
                                 const uint16_t* utf16_data_in,
-                                int32_t hash_code = 0);
+                                int32_t hash_code = 0)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static String* AllocFromModifiedUtf8(const char* utf);
+  static String* AllocFromModifiedUtf8(const char* utf)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static String* AllocFromModifiedUtf8(int32_t utf16_length,
-                                       const char* utf8_data_in);
+                                       const char* utf8_data_in)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static String* Alloc(Class* java_lang_String, int32_t utf16_length);
+  static String* Alloc(Class* java_lang_String, int32_t utf16_length)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static String* Alloc(Class* java_lang_String, CharArray* array);
+  static String* Alloc(Class* java_lang_String, CharArray* array)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  bool Equals(const char* modified_utf8) const;
+  bool Equals(const char* modified_utf8) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // TODO: do we need this overload? give it a more intention-revealing name.
-  bool Equals(const StringPiece& modified_utf8) const;
+  bool Equals(const StringPiece& modified_utf8) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  bool Equals(const String* that) const;
+  bool Equals(const String* that) const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Compare UTF-16 code point values not in a locale-sensitive manner
   int Compare(int32_t utf16_length, const char* utf8_data_in);
 
   // TODO: do we need this overload? give it a more intention-revealing name.
   bool Equals(const uint16_t* that_chars, int32_t that_offset,
-              int32_t that_length) const;
+              int32_t that_length) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Create a modified UTF-8 encoded std::string from a java/lang/String object.
   std::string ToModifiedUtf8() const;
@@ -2343,6 +2436,7 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(String);
 };
 
+// TODO: remove? only used in a unit test of itself.
 struct StringHashCode {
   int32_t operator()(String* string) const {
     return string->GetHashCode();
@@ -2425,13 +2519,13 @@
   String* GetDetailMessage() const {
     return GetFieldObject<String*>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_), false);
   }
-  std::string Dump() const;
+  std::string Dump() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // This is a runtime version of initCause, you shouldn't use it if initCause may have been
   // overridden. Also it asserts rather than throwing exceptions. Currently this is only used
   // in cases like the verifier where the checks cannot fail and initCause isn't overridden.
   void SetCause(Throwable* cause);
-  bool IsCheckedException() const;
+  bool IsCheckedException() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static Class* GetJavaLangThrowable() {
     DCHECK(java_lang_Throwable_ != NULL);
@@ -2485,7 +2579,8 @@
   static StackTraceElement* Alloc(String* declaring_class,
                                   String* method_name,
                                   String* file_name,
-                                  int32_t line_number);
+                                  int32_t line_number)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static void SetClass(Class* java_lang_StackTraceElement);
 
@@ -2511,20 +2606,20 @@
 
 class MANAGED InterfaceEntry : public ObjectArray<Object> {
  public:
-  Class* GetInterface() const {
+  Class* GetInterface() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Class* interface = Get(kInterface)->AsClass();
     DCHECK(interface != NULL);
     return interface;
   }
 
-  void SetInterface(Class* interface) {
+  void SetInterface(Class* interface) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(interface != NULL);
     DCHECK(interface->IsInterface());
     DCHECK(Get(kInterface) == NULL);
     Set(kInterface, interface);
   }
 
-  size_t GetMethodArrayCount() const {
+  size_t GetMethodArrayCount() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ObjectArray<Method>* method_array = down_cast<ObjectArray<Method>*>(Get(kMethodArray));
     if (method_array == NULL) {
       return 0;
@@ -2532,13 +2627,15 @@
     return method_array->GetLength();
   }
 
-  ObjectArray<Method>* GetMethodArray() const {
+  ObjectArray<Method>* GetMethodArray() const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ObjectArray<Method>* method_array = down_cast<ObjectArray<Method>*>(Get(kMethodArray));
     DCHECK(method_array != NULL);
     return method_array;
   }
 
-  void SetMethodArray(ObjectArray<Method>* new_ma) {
+  void SetMethodArray(ObjectArray<Method>* new_ma)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(new_ma != NULL);
     DCHECK(Get(kMethodArray) == NULL);
     Set(kMethodArray, new_ma);
diff --git a/src/object_test.cc b/src/object_test.cc
index c0049a3..bbb96c6 100644
--- a/src/object_test.cc
+++ b/src/object_test.cc
@@ -34,7 +34,8 @@
   void AssertString(int32_t length,
                     const char* utf8_in,
                     const char* utf16_expected_le,
-                    int32_t expected_hash) {
+                    int32_t expected_hash)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     UniquePtr<uint16_t[]> utf16_expected(new uint16_t[length]);
     for (int32_t i = 0; i < length; i++) {
       uint16_t ch = (((utf16_expected_le[i*2 + 0] & 0xff) << 8) |
@@ -74,6 +75,7 @@
 }
 
 TEST_F(ObjectTest, Clone) {
+  ScopedObjectAccess soa(Thread::Current());
   SirtRef<ObjectArray<Object> > a1(class_linker_->AllocObjectArray<Object>(256));
   size_t s1 = a1->SizeOf();
   Object* clone = a1->Clone();
@@ -82,6 +84,7 @@
 }
 
 TEST_F(ObjectTest, AllocObjectArray) {
+  ScopedObjectAccess soa(Thread::Current());
   SirtRef<ObjectArray<Object> > oa(class_linker_->AllocObjectArray<Object>(2));
   EXPECT_EQ(2, oa->GetLength());
   EXPECT_TRUE(oa->Get(0) == NULL);
@@ -114,6 +117,7 @@
 }
 
 TEST_F(ObjectTest, AllocArray) {
+  ScopedObjectAccess soa(Thread::Current());
   Class* c = class_linker_->FindSystemClass("[I");
   SirtRef<Array> a(Array::Alloc(c, 1));
   ASSERT_TRUE(c == a->GetClass());
@@ -129,6 +133,7 @@
 
 template<typename ArrayT>
 void TestPrimitiveArray(ClassLinker* cl) {
+  ScopedObjectAccess soa(Thread::Current());
   typedef typename ArrayT::ElementType T;
 
   ArrayT* a = ArrayT::Alloc(2);
@@ -183,6 +188,7 @@
 
 TEST_F(ObjectTest, CheckAndAllocArrayFromCode) {
   // pretend we are trying to call 'new char[3]' from String.toCharArray
+  ScopedObjectAccess soa(Thread::Current());
   Class* java_util_Arrays = class_linker_->FindSystemClass("Ljava/util/Arrays;");
   Method* sort = java_util_Arrays->FindDirectMethod("sort", "([I)V");
   const DexFile::StringId* string_id = java_lang_dex_file_->FindStringId("[I");
@@ -200,11 +206,13 @@
 
 TEST_F(ObjectTest, StaticFieldFromCode) {
   // pretend we are trying to access 'Static.s0' from StaticsFromCode.<clinit>
-  SirtRef<ClassLoader> class_loader(LoadDex("StaticsFromCode"));
-  const DexFile* dex_file = Runtime::Current()->GetCompileTimeClassPath(class_loader.get())[0];
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("StaticsFromCode");
+  const DexFile* dex_file = Runtime::Current()->GetCompileTimeClassPath(class_loader)[0];
   CHECK(dex_file != NULL);
 
-  Class* klass = class_linker_->FindClass("LStaticsFromCode;", class_loader.get());
+  Class* klass =
+      class_linker_->FindClass("LStaticsFromCode;", soa.Decode<ClassLoader*>(class_loader));
   Method* clinit = klass->FindDirectMethod("<clinit>", "()V");
   const DexFile::StringId* klass_string_id = dex_file->FindStringId("LStaticsFromCode;");
   ASSERT_TRUE(klass_string_id != NULL);
@@ -242,6 +250,7 @@
 }
 
 TEST_F(ObjectTest, String) {
+  ScopedObjectAccess soa(Thread::Current());
   // Test the empty string.
   AssertString(0, "",     "", 0);
 
@@ -265,6 +274,7 @@
 }
 
 TEST_F(ObjectTest, StringEqualsUtf8) {
+  ScopedObjectAccess soa(Thread::Current());
   SirtRef<String> string(String::AllocFromModifiedUtf8("android"));
   EXPECT_TRUE(string->Equals("android"));
   EXPECT_FALSE(string->Equals("Android"));
@@ -279,6 +289,7 @@
 }
 
 TEST_F(ObjectTest, StringEquals) {
+  ScopedObjectAccess soa(Thread::Current());
   SirtRef<String> string(String::AllocFromModifiedUtf8("android"));
   SirtRef<String> string_2(String::AllocFromModifiedUtf8("android"));
   EXPECT_TRUE(string->Equals(string_2.get()));
@@ -294,6 +305,7 @@
 }
 
 TEST_F(ObjectTest, StringLength) {
+  ScopedObjectAccess soa(Thread::Current());
   SirtRef<String> string(String::AllocFromModifiedUtf8("android"));
   EXPECT_EQ(string->GetLength(), 7);
   EXPECT_EQ(string->GetUtfLength(), 7);
@@ -306,10 +318,13 @@
 }
 
 TEST_F(ObjectTest, DescriptorCompare) {
+  ScopedObjectAccess soa(Thread::Current());
   ClassLinker* linker = class_linker_;
 
-  SirtRef<ClassLoader> class_loader_1(LoadDex("ProtoCompare"));
-  SirtRef<ClassLoader> class_loader_2(LoadDex("ProtoCompare2"));
+  jobject jclass_loader_1 = LoadDex("ProtoCompare");
+  jobject jclass_loader_2 = LoadDex("ProtoCompare2");
+  SirtRef<ClassLoader> class_loader_1(soa.Decode<ClassLoader*>(jclass_loader_1));
+  SirtRef<ClassLoader> class_loader_2(soa.Decode<ClassLoader*>(jclass_loader_2));
 
   Class* klass1 = linker->FindClass("LProtoCompare;", class_loader_1.get());
   ASSERT_TRUE(klass1 != NULL);
@@ -365,6 +380,7 @@
 
 
 TEST_F(ObjectTest, StringHashCode) {
+  ScopedObjectAccess soa(Thread::Current());
   SirtRef<String> empty(String::AllocFromModifiedUtf8(""));
   SirtRef<String> A(String::AllocFromModifiedUtf8("A"));
   SirtRef<String> ABC(String::AllocFromModifiedUtf8("ABC"));
@@ -375,7 +391,10 @@
 }
 
 TEST_F(ObjectTest, InstanceOf) {
-  SirtRef<ClassLoader> class_loader(LoadDex("XandY"));
+  ScopedObjectAccess soa(Thread::Current());
+  jobject jclass_loader = LoadDex("XandY");
+  SirtRef<ClassLoader> class_loader(soa.Decode<ClassLoader*>(jclass_loader));
+
   Class* X = class_linker_->FindClass("LX;", class_loader.get());
   Class* Y = class_linker_->FindClass("LY;", class_loader.get());
   ASSERT_TRUE(X != NULL);
@@ -406,7 +425,9 @@
 }
 
 TEST_F(ObjectTest, IsAssignableFrom) {
-  SirtRef<ClassLoader> class_loader(LoadDex("XandY"));
+  ScopedObjectAccess soa(Thread::Current());
+  jobject jclass_loader = LoadDex("XandY");
+  SirtRef<ClassLoader> class_loader(soa.Decode<ClassLoader*>(jclass_loader));
   Class* X = class_linker_->FindClass("LX;", class_loader.get());
   Class* Y = class_linker_->FindClass("LY;", class_loader.get());
 
@@ -441,7 +462,9 @@
 }
 
 TEST_F(ObjectTest, IsAssignableFromArray) {
-  SirtRef<ClassLoader> class_loader(LoadDex("XandY"));
+  ScopedObjectAccess soa(Thread::Current());
+  jobject jclass_loader = LoadDex("XandY");
+  SirtRef<ClassLoader> class_loader(soa.Decode<ClassLoader*>(jclass_loader));
   Class* X = class_linker_->FindClass("LX;", class_loader.get());
   Class* Y = class_linker_->FindClass("LY;", class_loader.get());
   ASSERT_TRUE(X != NULL);
@@ -492,6 +515,7 @@
 }
 
 TEST_F(ObjectTest, FindInstanceField) {
+  ScopedObjectAccess soa(Thread::Current());
   SirtRef<String> s(String::AllocFromModifiedUtf8("ABC"));
   ASSERT_TRUE(s.get() != NULL);
   Class* c = s->GetClass();
@@ -524,6 +548,7 @@
 }
 
 TEST_F(ObjectTest, FindStaticField) {
+  ScopedObjectAccess soa(Thread::Current());
   SirtRef<String> s(String::AllocFromModifiedUtf8("ABC"));
   ASSERT_TRUE(s.get() != NULL);
   Class* c = s->GetClass();
diff --git a/src/object_utils.h b/src/object_utils.h
index 8b2aab9..d523ecc 100644
--- a/src/object_utils.h
+++ b/src/object_utils.h
@@ -32,29 +32,30 @@
 
 class ObjectLock {
  public:
-  explicit ObjectLock(Object* object) : self_(Thread::Current()), obj_(object) {
+  explicit ObjectLock(Object* object) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      : self_(Thread::Current()), obj_(object) {
     CHECK(object != NULL);
     obj_->MonitorEnter(self_);
   }
 
-  ~ObjectLock() {
+  ~ObjectLock() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     obj_->MonitorExit(self_);
   }
 
-  void Wait() {
+  void Wait() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return Monitor::Wait(self_, obj_, 0, 0, false);
   }
 
-  void Notify() {
+  void Notify() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     obj_->Notify();
   }
 
-  void NotifyAll() {
+  void NotifyAll() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     obj_->NotifyAll();
   }
 
  private:
-  Thread* self_;
+  Thread* const self_;
   Object* obj_;
   DISALLOW_COPY_AND_ASSIGN(ObjectLock);
 };
@@ -62,6 +63,7 @@
 class ClassHelper {
  public:
   ClassHelper(const Class* c = NULL, ClassLinker* l = NULL)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : class_def_(NULL),
         class_linker_(l),
         dex_cache_(NULL),
@@ -73,7 +75,8 @@
     }
   }
 
-  void ChangeClass(const Class* new_c) {
+  void ChangeClass(const Class* new_c)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK(new_c != NULL) << "klass_=" << klass_;  // Log what we were changing from if any
     CHECK(new_c->IsClass()) << "new_c=" << new_c;
     if (dex_cache_ != NULL) {
@@ -90,7 +93,7 @@
 
   // The returned const char* is only guaranteed to be valid for the lifetime of the ClassHelper.
   // If you need it longer, copy it into a std::string.
-  const char* GetDescriptor() {
+  const char* GetDescriptor() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK(klass_ != NULL);
     if (UNLIKELY(klass_->IsArrayClass())) {
       return GetArrayDescriptor();
@@ -106,7 +109,7 @@
     }
   }
 
-  const char* GetArrayDescriptor() {
+  const char* GetArrayDescriptor() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     std::string result("[");
     const Class* saved_klass = klass_;
     CHECK(saved_klass != NULL);
@@ -117,7 +120,8 @@
     return descriptor_.c_str();
   }
 
-  const DexFile::ClassDef* GetClassDef() {
+  const DexFile::ClassDef* GetClassDef()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile::ClassDef* result = class_def_;
     if (result == NULL) {
       result = GetDexFile().FindClassDef(GetDescriptor());
@@ -126,7 +130,7 @@
     return result;
   }
 
-  uint32_t NumDirectInterfaces() {
+  uint32_t NumDirectInterfaces() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(klass_ != NULL);
     if (klass_->IsPrimitive()) {
       return 0;
@@ -144,14 +148,16 @@
     }
   }
 
-  uint16_t GetDirectInterfaceTypeIdx(uint32_t idx) {
+  uint16_t GetDirectInterfaceTypeIdx(uint32_t idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(klass_ != NULL);
     DCHECK(!klass_->IsPrimitive());
     DCHECK(!klass_->IsArrayClass());
     return GetInterfaceTypeList()->GetTypeItem(idx).type_idx_;
   }
 
-  Class* GetDirectInterface(uint32_t idx) {
+  Class* GetDirectInterface(uint32_t idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(klass_ != NULL);
     DCHECK(!klass_->IsPrimitive());
     if (klass_->IsArrayClass()) {
@@ -174,7 +180,7 @@
     }
   }
 
-  const char* GetSourceFile() {
+  const char* GetSourceFile() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     std::string descriptor(GetDescriptor());
     const DexFile& dex_file = GetDexFile();
     const DexFile::ClassDef* dex_class_def = dex_file.FindClassDef(descriptor);
@@ -182,7 +188,7 @@
     return dex_file.GetSourceFile(*dex_class_def);
   }
 
-  std::string GetLocation() {
+  std::string GetLocation() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DexCache* dex_cache = GetDexCache();
     if (dex_cache != NULL && !klass_->IsProxyClass()) {
       return dex_cache->GetLocation()->ToModifiedUtf8();
@@ -192,7 +198,7 @@
     }
   }
 
-  const DexFile& GetDexFile() {
+  const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile* result = dex_file_;
     if (result == NULL) {
       const DexCache* dex_cache = GetDexCache();
@@ -202,7 +208,7 @@
     return *result;
   }
 
-  DexCache* GetDexCache() {
+  DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DexCache* result = dex_cache_;
     if (result == NULL) {
       DCHECK(klass_ != NULL);
@@ -213,7 +219,8 @@
   }
 
  private:
-  const DexFile::TypeList* GetInterfaceTypeList() {
+  const DexFile::TypeList* GetInterfaceTypeList()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile::TypeList* result = interface_type_list_;
     if (result == NULL) {
       const DexFile::ClassDef* class_def = GetClassDef();
@@ -263,7 +270,7 @@
     }
     field_ = new_f;
   }
-  const char* GetName() {
+  const char* GetName() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     uint32_t field_index = field_->GetDexFieldIndex();
     if (!field_->GetDeclaringClass()->IsProxyClass()) {
       const DexFile& dex_file = GetDexFile();
@@ -284,7 +291,7 @@
       return Runtime::Current()->GetInternTable()->InternStrong(GetName());
     }
   }
-  Class* GetType() {
+  Class* GetType() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     uint32_t field_index = field_->GetDexFieldIndex();
     if (!field_->GetDeclaringClass()->IsProxyClass()) {
       const DexFile& dex_file = GetDexFile();
@@ -299,7 +306,7 @@
       return GetClassLinker()->FindSystemClass(GetTypeDescriptor());
     }
   }
-  const char* GetTypeDescriptor() {
+  const char* GetTypeDescriptor() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     uint32_t field_index = field_->GetDexFieldIndex();
     if (!field_->GetDeclaringClass()->IsProxyClass()) {
       const DexFile& dex_file = GetDexFile();
@@ -312,27 +319,29 @@
       return field_index == 0 ? "[Ljava/lang/Class;" : "[[Ljava/lang/Class;";
     }
   }
-  Primitive::Type GetTypeAsPrimitiveType() {
+  Primitive::Type GetTypeAsPrimitiveType()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return Primitive::GetType(GetTypeDescriptor()[0]);
   }
-  bool IsPrimitiveType() {
+  bool IsPrimitiveType() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Primitive::Type type = GetTypeAsPrimitiveType();
     return type != Primitive::kPrimNot;
   }
-  size_t FieldSize() {
+  size_t FieldSize() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Primitive::Type type = GetTypeAsPrimitiveType();
     return Primitive::FieldSize(type);
   }
 
   // The returned const char* is only guaranteed to be valid for the lifetime of the FieldHelper.
   // If you need it longer, copy it into a std::string.
-  const char* GetDeclaringClassDescriptor() {
+  const char* GetDeclaringClassDescriptor()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     uint16_t type_idx = field_->GetDeclaringClass()->GetDexTypeIndex();
     if (type_idx != DexFile::kDexNoIndex16) {
       const DexFile& dex_file = GetDexFile();
       return dex_file.GetTypeDescriptor(dex_file.GetTypeId(type_idx));
     } else {
-      // Most likely a proxy class
+      // Most likely a proxy class.
       ClassHelper kh(field_->GetDeclaringClass());
       declaring_class_descriptor_ = kh.GetDescriptor();
       return declaring_class_descriptor_.c_str();
@@ -340,7 +349,7 @@
   }
 
  private:
-  DexCache* GetDexCache() {
+  DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DexCache* result = dex_cache_;
     if (result == NULL) {
       result = field_->GetDeclaringClass()->GetDexCache();
@@ -356,7 +365,7 @@
     }
     return result;
   }
-  const DexFile& GetDexFile() {
+  const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile* result = dex_file_;
     if (result == NULL) {
       const DexCache* dex_cache = GetDexCache();
@@ -378,22 +387,24 @@
 class MethodHelper {
  public:
   MethodHelper()
-      : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), method_(NULL), shorty_(NULL),
-        shorty_len_(0) {}
+     : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), method_(NULL), shorty_(NULL),
+       shorty_len_(0) {}
 
   explicit MethodHelper(const Method* m)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : class_linker_(NULL), dex_cache_(NULL), dex_file_(NULL), method_(NULL), shorty_(NULL),
         shorty_len_(0) {
     SetMethod(m);
   }
 
   MethodHelper(const Method* m, ClassLinker* l)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : class_linker_(l), dex_cache_(NULL), dex_file_(NULL), method_(NULL), shorty_(NULL),
         shorty_len_(0) {
     SetMethod(m);
   }
 
-  void ChangeMethod(Method* new_m) {
+  void ChangeMethod(Method* new_m) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(new_m != NULL);
     if (dex_cache_ != NULL) {
       Class* klass = new_m->GetDeclaringClass();
@@ -412,7 +423,7 @@
     shorty_ = NULL;
   }
 
-  const char* GetName() {
+  const char* GetName() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
     if (dex_method_idx != DexFile::kDexNoIndex16) {
@@ -433,14 +444,15 @@
     }
   }
 
-  String* GetNameAsString() {
+  String* GetNameAsString() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
     const DexFile::MethodId& method_id = dex_file.GetMethodId(dex_method_idx);
     return GetClassLinker()->ResolveString(dex_file, method_id.name_idx_, GetDexCache());
   }
 
-  const char* GetShorty() {
+  const char* GetShorty() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const char* result = shorty_;
     if (result == NULL) {
       const DexFile& dex_file = GetDexFile();
@@ -451,14 +463,14 @@
     return result;
   }
 
-  uint32_t GetShortyLength() {
+  uint32_t GetShortyLength() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (shorty_ == NULL) {
       GetShorty();
     }
     return shorty_len_;
   }
 
-  const std::string GetSignature() {
+  const std::string GetSignature() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     uint32_t dex_method_idx = method_->GetDexMethodIndex();
     if (dex_method_idx != DexFile::kDexNoIndex16) {
@@ -468,17 +480,20 @@
     }
   }
 
-  const DexFile::ProtoId& GetPrototype() {
+  const DexFile::ProtoId& GetPrototype()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     return dex_file.GetMethodPrototype(dex_file.GetMethodId(method_->GetDexMethodIndex()));
   }
 
-  const DexFile::TypeList* GetParameterTypeList() {
+  const DexFile::TypeList* GetParameterTypeList()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile::ProtoId& proto = GetPrototype();
     return GetDexFile().GetProtoParameters(proto);
   }
 
-  ObjectArray<Class>* GetParameterTypes() {
+  ObjectArray<Class>* GetParameterTypes()
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile::TypeList* params = GetParameterTypeList();
     Class* array_class = GetClassLinker()->FindSystemClass("[Ljava/lang/Class;");
     uint32_t num_params = params == NULL ? 0 : params->Size();
@@ -494,7 +509,7 @@
     return result;
   }
 
-  Class* GetReturnType() {
+  Class* GetReturnType() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     const DexFile::MethodId& method_id = dex_file.GetMethodId(method_->GetDexMethodIndex());
     const DexFile::ProtoId& proto_id = dex_file.GetMethodPrototype(method_id);
@@ -502,7 +517,8 @@
     return GetClassFromTypeIdx(return_type_idx);
   }
 
-  const char* GetReturnTypeDescriptor() {
+  const char* GetReturnTypeDescriptor()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     const DexFile::MethodId& method_id = dex_file.GetMethodId(method_->GetDexMethodIndex());
     const DexFile::ProtoId& proto_id = dex_file.GetMethodPrototype(method_id);
@@ -510,7 +526,8 @@
     return dex_file.GetTypeDescriptor(dex_file.GetTypeId(return_type_idx));
   }
 
-  int32_t GetLineNumFromDexPC(uint32_t dex_pc) {
+  int32_t GetLineNumFromDexPC(uint32_t dex_pc)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (dex_pc == DexFile::kDexNoIndex) {
       return method_->IsNative() ? -2 : -1;
     } else {
@@ -519,7 +536,8 @@
     }
   }
 
-  const char* GetDeclaringClassDescriptor() {
+  const char* GetDeclaringClassDescriptor()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Class* klass = method_->GetDeclaringClass();
     DCHECK(!klass->IsProxyClass());
     uint16_t type_idx = klass->GetDexTypeIndex();
@@ -527,7 +545,8 @@
     return dex_file.GetTypeDescriptor(dex_file.GetTypeId(type_idx));
   }
 
-  const char* GetDeclaringClassSourceFile() {
+  const char* GetDeclaringClassSourceFile()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const char* descriptor = GetDeclaringClassDescriptor();
     const DexFile& dex_file = GetDexFile();
     const DexFile::ClassDef* dex_class_def = dex_file.FindClassDef(descriptor);
@@ -535,7 +554,8 @@
     return dex_file.GetSourceFile(*dex_class_def);
   }
 
-  uint32_t GetClassDefIndex() {
+  uint32_t GetClassDefIndex()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const char* descriptor = GetDeclaringClassDescriptor();
     const DexFile& dex_file = GetDexFile();
     uint32_t index;
@@ -543,26 +563,29 @@
     return index;
   }
 
-  ClassLoader* GetClassLoader() {
+  ClassLoader* GetClassLoader()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return method_->GetDeclaringClass()->GetClassLoader();
   }
 
-  bool IsStatic() {
+  bool IsStatic()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return method_->IsStatic();
   }
 
-  bool IsClassInitializer() {
+  bool IsClassInitializer() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return IsStatic() && StringPiece(GetName()) == "<clinit>";
   }
 
-  size_t NumArgs() {
+  size_t NumArgs() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     // "1 +" because the first in Args is the receiver.
     // "- 1" because we don't count the return type.
     return (IsStatic() ? 0 : 1) + GetShortyLength() - 1;
   }
 
   // Is the specified parameter a long or double, where parameter 0 is 'this' for instance methods
-  bool IsParamALongOrDouble(size_t param) {
+  bool IsParamALongOrDouble(size_t param)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK_LT(param, NumArgs());
     if (IsStatic()) {
       param++;  // 0th argument must skip return value at start of the shorty
@@ -574,7 +597,7 @@
   }
 
   // Is the specified parameter a reference, where parameter 0 is 'this' for instance methods
-  bool IsParamAReference(size_t param) {
+  bool IsParamAReference(size_t param) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     CHECK_LT(param, NumArgs());
     if (IsStatic()) {
       param++;  // 0th argument must skip return value at start of the shorty
@@ -584,7 +607,8 @@
     return GetShorty()[param] == 'L';  // An array also has a shorty character of 'L' (not '[')
   }
 
-  bool HasSameNameAndSignature(MethodHelper* other) {
+  bool HasSameNameAndSignature(MethodHelper* other)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (GetDexCache() == other->GetDexCache()) {
       const DexFile& dex_file = GetDexFile();
       const DexFile::MethodId& mid = dex_file.GetMethodId(method_->GetDexMethodIndex());
@@ -597,15 +621,18 @@
     return name == other_name && GetSignature() == other->GetSignature();
   }
 
-  const DexFile::CodeItem* GetCodeItem() {
+  const DexFile::CodeItem* GetCodeItem()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetDexFile().GetCodeItem(method_->GetCodeItemOffset());
   }
 
-  bool IsResolvedTypeIdx(uint16_t type_idx) const {
+  bool IsResolvedTypeIdx(uint16_t type_idx) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return method_->GetDexCacheResolvedTypes()->Get(type_idx) != NULL;
   }
 
-  Class* GetClassFromTypeIdx(uint16_t type_idx) {
+  Class* GetClassFromTypeIdx(uint16_t type_idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Class* type = method_->GetDexCacheResolvedTypes()->Get(type_idx);
     if (type == NULL) {
       type = GetClassLinker()->ResolveType(type_idx, method_);
@@ -614,16 +641,18 @@
     return type;
   }
 
-  const char* GetTypeDescriptorFromTypeIdx(uint16_t type_idx) {
+  const char* GetTypeDescriptorFromTypeIdx(uint16_t type_idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     return dex_file.GetTypeDescriptor(dex_file.GetTypeId(type_idx));
   }
 
-  Class* GetDexCacheResolvedType(uint16_t type_idx) {
+  Class* GetDexCacheResolvedType(uint16_t type_idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetDexCache()->GetResolvedType(type_idx);
   }
 
-  const DexFile& GetDexFile() {
+  const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     const DexFile* result = dex_file_;
     if (result == NULL) {
       const DexCache* dex_cache = GetDexCache();
@@ -633,7 +662,7 @@
     return *result;
   }
 
-  DexCache* GetDexCache() {
+  DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DexCache* result = dex_cache_;
     if (result == NULL) {
       Class* klass = method_->GetDeclaringClass();
@@ -646,7 +675,8 @@
  private:
   // Set the method_ field, for proxy methods looking up the interface method via the resolved
   // methods table.
-  void SetMethod(const Method* method) {
+  void SetMethod(const Method* method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (method != NULL) {
       Class* klass = method->GetDeclaringClass();
       if (klass->IsProxyClass()) {
diff --git a/src/reference_table.cc b/src/reference_table.cc
index ee1760b..a2b54d6 100644
--- a/src/reference_table.cc
+++ b/src/reference_table.cc
@@ -60,7 +60,10 @@
 }
 
 struct ObjectComparator {
-  bool operator()(const Object* obj1, const Object* obj2) {
+  bool operator()(const Object* obj1, const Object* obj2)
+    // TODO: enable analysis when analysis can work with the STL.
+      NO_THREAD_SAFETY_ANALYSIS {
+    GlobalSynchronization::mutator_lock_->AssertSharedHeld();
     // Ensure null references and cleared jweaks appear at the end.
     if (obj1 == NULL) {
       return true;
@@ -75,8 +78,7 @@
 
     // Sort by class...
     if (obj1->GetClass() != obj2->GetClass()) {
-      return reinterpret_cast<uintptr_t>(obj1->GetClass()) <
-          reinterpret_cast<uintptr_t>(obj2->GetClass());
+      return obj1->GetClass()->IdentityHashCode() < obj2->IdentityHashCode();
     } else {
       // ...then by size...
       size_t count1 = obj1->SizeOf();
@@ -84,9 +86,8 @@
       if (count1 != count2) {
         return count1 < count2;
       } else {
-        // ...and finally by address.
-        return reinterpret_cast<uintptr_t>(obj1) <
-            reinterpret_cast<uintptr_t>(obj2);
+        // ...and finally by identity hash code.
+        return obj1->IdentityHashCode() < obj2->IdentityHashCode();
       }
     }
   }
@@ -97,7 +98,9 @@
 // Pass in the number of elements in the array (or 0 if this is not an
 // array object), and the number of additional objects that are identical
 // or equivalent to the original.
-static void DumpSummaryLine(std::ostream& os, const Object* obj, size_t element_count, int identical, int equiv) {
+static void DumpSummaryLine(std::ostream& os, const Object* obj, size_t element_count,
+                            int identical, int equiv)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (obj == NULL) {
     os << "    NULL reference (count=" << equiv << ")\n";
     return;
diff --git a/src/reference_table.h b/src/reference_table.h
index 28af887..de9d45d 100644
--- a/src/reference_table.h
+++ b/src/reference_table.h
@@ -43,13 +43,14 @@
 
   size_t Size() const;
 
-  void Dump(std::ostream& os) const;
+  void Dump(std::ostream& os) const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void VisitRoots(Heap::RootVisitor* visitor, void* arg);
 
  private:
   typedef std::vector<const Object*> Table;
-  static void Dump(std::ostream& os, const Table& entries);
+  static void Dump(std::ostream& os, const Table& entries)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   friend class IndirectReferenceTable; // For Dump.
 
   std::string name_;
diff --git a/src/reference_table_test.cc b/src/reference_table_test.cc
index c7c1cc6..4bb5c97 100644
--- a/src/reference_table_test.cc
+++ b/src/reference_table_test.cc
@@ -24,6 +24,7 @@
 };
 
 TEST_F(ReferenceTableTest, Basics) {
+  ScopedObjectAccess soa(Thread::Current());
   Object* o1 = String::AllocFromModifiedUtf8("hello");
   Object* o2 = ShortArray::Alloc(0);
 
diff --git a/src/reflection.cc b/src/reflection.cc
index 7726998..542f1a2 100644
--- a/src/reflection.cc
+++ b/src/reflection.cc
@@ -20,35 +20,15 @@
 #include "jni_internal.h"
 #include "object.h"
 #include "object_utils.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
+#include "well_known_classes.h"
 
 namespace art {
 
-Method* gBoolean_valueOf;
-Method* gByte_valueOf;
-Method* gCharacter_valueOf;
-Method* gDouble_valueOf;
-Method* gFloat_valueOf;
-Method* gInteger_valueOf;
-Method* gLong_valueOf;
-Method* gShort_valueOf;
-
-void InitBoxingMethods() {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  gBoolean_valueOf = class_linker->FindSystemClass("Ljava/lang/Boolean;")->FindDeclaredDirectMethod("valueOf", "(Z)Ljava/lang/Boolean;");
-  gByte_valueOf = class_linker->FindSystemClass("Ljava/lang/Byte;")->FindDeclaredDirectMethod("valueOf", "(B)Ljava/lang/Byte;");
-  gCharacter_valueOf = class_linker->FindSystemClass("Ljava/lang/Character;")->FindDeclaredDirectMethod("valueOf", "(C)Ljava/lang/Character;");
-  gDouble_valueOf = class_linker->FindSystemClass("Ljava/lang/Double;")->FindDeclaredDirectMethod("valueOf", "(D)Ljava/lang/Double;");
-  gFloat_valueOf = class_linker->FindSystemClass("Ljava/lang/Float;")->FindDeclaredDirectMethod("valueOf", "(F)Ljava/lang/Float;");
-  gInteger_valueOf = class_linker->FindSystemClass("Ljava/lang/Integer;")->FindDeclaredDirectMethod("valueOf", "(I)Ljava/lang/Integer;");
-  gLong_valueOf = class_linker->FindSystemClass("Ljava/lang/Long;")->FindDeclaredDirectMethod("valueOf", "(J)Ljava/lang/Long;");
-  gShort_valueOf = class_linker->FindSystemClass("Ljava/lang/Short;")->FindDeclaredDirectMethod("valueOf", "(S)Ljava/lang/Short;");
-}
-
-jobject InvokeMethod(const ScopedJniThreadState& ts, jobject javaMethod, jobject javaReceiver,
+jobject InvokeMethod(const ScopedObjectAccess& soa, jobject javaMethod, jobject javaReceiver,
                      jobject javaArgs) {
-  jmethodID mid = ts.Env()->FromReflectedMethod(javaMethod);
-  Method* m = ts.DecodeMethod(mid);
+  jmethodID mid = soa.Env()->FromReflectedMethod(javaMethod);
+  Method* m = soa.DecodeMethod(mid);
 
   Class* declaring_class = m->GetDeclaringClass();
   if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(declaring_class, true, true)) {
@@ -58,24 +38,24 @@
   Object* receiver = NULL;
   if (!m->IsStatic()) {
     // Check that the receiver is non-null and an instance of the field's declaring class.
-    receiver = ts.Decode<Object*>(javaReceiver);
+    receiver = soa.Decode<Object*>(javaReceiver);
     if (!VerifyObjectInClass(receiver, declaring_class)) {
       return NULL;
     }
 
     // Find the actual implementation of the virtual method.
     m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(m);
-    mid = ts.EncodeMethod(m);
+    mid = soa.EncodeMethod(m);
   }
 
   // Get our arrays of arguments and their types, and check they're the same size.
-  ObjectArray<Object>* objects = ts.Decode<ObjectArray<Object>*>(javaArgs);
+  ObjectArray<Object>* objects = soa.Decode<ObjectArray<Object>*>(javaArgs);
   MethodHelper mh(m);
   const DexFile::TypeList* classes = mh.GetParameterTypeList();
   uint32_t classes_size = classes == NULL ? 0 : classes->Size();
   uint32_t arg_count = (objects != NULL) ? objects->GetLength() : 0;
   if (arg_count != classes_size) {
-    ts.Self()->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
+    soa.Self()->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
         "wrong number of arguments; expected %d, got %d",
         classes_size, arg_count);
     return NULL;
@@ -91,27 +71,27 @@
       return NULL;
     }
     if (!dst_class->IsPrimitive()) {
-      args[i].l = ts.AddLocalReference<jobject>(arg);
+      args[i].l = soa.AddLocalReference<jobject>(arg);
     }
   }
 
   // Invoke the method.
-  JValue value(InvokeWithJValues(ts, javaReceiver, mid, args.get()));
+  JValue value(InvokeWithJValues(soa, javaReceiver, mid, args.get()));
 
   // Wrap any exception with "Ljava/lang/reflect/InvocationTargetException;" and return early.
-  if (ts.Self()->IsExceptionPending()) {
-    jthrowable th = ts.Env()->ExceptionOccurred();
-    ts.Env()->ExceptionClear();
-    jclass exception_class = ts.Env()->FindClass("java/lang/reflect/InvocationTargetException");
-    jmethodID mid = ts.Env()->GetMethodID(exception_class, "<init>", "(Ljava/lang/Throwable;)V");
-    jobject exception_instance = ts.Env()->NewObject(exception_class, mid, th);
-    ts.Env()->Throw(reinterpret_cast<jthrowable>(exception_instance));
+  if (soa.Self()->IsExceptionPending()) {
+    jthrowable th = soa.Env()->ExceptionOccurred();
+    soa.Env()->ExceptionClear();
+    jclass exception_class = soa.Env()->FindClass("java/lang/reflect/InvocationTargetException");
+    jmethodID mid = soa.Env()->GetMethodID(exception_class, "<init>", "(Ljava/lang/Throwable;)V");
+    jobject exception_instance = soa.Env()->NewObject(exception_class, mid, th);
+    soa.Env()->Throw(reinterpret_cast<jthrowable>(exception_instance));
     return NULL;
   }
 
   // Box if necessary and return.
   BoxPrimitive(mh.GetReturnType()->GetPrimitiveType(), value);
-  return ts.AddLocalReference<jobject>(value.GetL());
+  return soa.AddLocalReference<jobject>(value.GetL());
 }
 
 bool VerifyObjectInClass(Object* o, Class* c) {
@@ -220,31 +200,31 @@
     return;
   }
 
-  Method* m = NULL;
+  jmethodID m = NULL;
   switch (src_class) {
   case Primitive::kPrimBoolean:
-    m = gBoolean_valueOf;
+    m = WellKnownClasses::java_lang_Boolean_valueOf;
     break;
   case Primitive::kPrimByte:
-    m = gByte_valueOf;
+    m = WellKnownClasses::java_lang_Byte_valueOf;
     break;
   case Primitive::kPrimChar:
-    m = gCharacter_valueOf;
+    m = WellKnownClasses::java_lang_Character_valueOf;
     break;
   case Primitive::kPrimDouble:
-    m = gDouble_valueOf;
+    m = WellKnownClasses::java_lang_Double_valueOf;
     break;
   case Primitive::kPrimFloat:
-    m = gFloat_valueOf;
+    m = WellKnownClasses::java_lang_Float_valueOf;
     break;
   case Primitive::kPrimInt:
-    m = gInteger_valueOf;
+    m = WellKnownClasses::java_lang_Integer_valueOf;
     break;
   case Primitive::kPrimLong:
-    m = gLong_valueOf;
+    m = WellKnownClasses::java_lang_Long_valueOf;
     break;
   case Primitive::kPrimShort:
-    m = gShort_valueOf;
+    m = WellKnownClasses::java_lang_Short_valueOf;
     break;
   case Primitive::kPrimVoid:
     // There's no such thing as a void field, and void methods invoked via reflection return null.
@@ -254,13 +234,17 @@
     LOG(FATAL) << static_cast<int>(src_class);
   }
 
-  Thread* self = Thread::Current();
-  ScopedThreadStateChange tsc(self, kRunnable);
+  if (kIsDebugBuild) {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_EQ(Thread::Current()->GetState(), kRunnable);
+  }
+  ScopedObjectAccessUnchecked soa(Thread::Current());
   JValue args[1] = { value };
-  m->Invoke(self, NULL, args, &value);
+  soa.DecodeMethod(m)->Invoke(soa.Self(), NULL, args, &value);
 }
 
-static std::string UnboxingFailureKind(Method* m, int index, Field* f) {
+static std::string UnboxingFailureKind(Method* m, int index, Field* f)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (m != NULL && index != -1) {
     ++index; // Humans count from 1.
     return StringPrintf("method %s argument %d", PrettyMethod(m, false).c_str(), index);
@@ -271,7 +255,9 @@
   return "result";
 }
 
-static bool UnboxPrimitive(Object* o, Class* dst_class, JValue& unboxed_value, Method* m, int index, Field* f) {
+static bool UnboxPrimitive(Object* o, Class* dst_class, JValue& unboxed_value, Method* m,
+                           int index, Field* f)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (!dst_class->IsPrimitive()) {
     if (o != NULL && !o->InstanceOf(dst_class)) {
       Thread::Current()->ThrowNewExceptionF("Ljava/lang/IllegalArgumentException;",
diff --git a/src/reflection.h b/src/reflection.h
index 03847f8..2979e5b 100644
--- a/src/reflection.h
+++ b/src/reflection.h
@@ -27,19 +27,27 @@
 union JValue;
 class Method;
 class Object;
-class ScopedJniThreadState;
+class ScopedObjectAccess;
 
-void InitBoxingMethods();
-void BoxPrimitive(Primitive::Type src_class, JValue& value);
-bool UnboxPrimitiveForArgument(Object* o, Class* dst_class, JValue& unboxed_value, Method* m, size_t index);
-bool UnboxPrimitiveForField(Object* o, Class* dst_class, JValue& unboxed_value, Field* f);
-bool UnboxPrimitiveForResult(Object* o, Class* dst_class, JValue& unboxed_value);
+void BoxPrimitive(Primitive::Type src_class, JValue& value)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+bool UnboxPrimitiveForArgument(Object* o, Class* dst_class, JValue& unboxed_value, Method* m,
+                               size_t index)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+bool UnboxPrimitiveForField(Object* o, Class* dst_class, JValue& unboxed_value, Field* f)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+bool UnboxPrimitiveForResult(Object* o, Class* dst_class, JValue& unboxed_value)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-bool ConvertPrimitiveValue(Primitive::Type src_class, Primitive::Type dst_class, const JValue& src, JValue& dst);
+bool ConvertPrimitiveValue(Primitive::Type src_class, Primitive::Type dst_class, const JValue& src,
+                           JValue& dst)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-jobject InvokeMethod(const ScopedJniThreadState& ts, jobject method, jobject receiver, jobject args);
+jobject InvokeMethod(const ScopedObjectAccess& soa, jobject method, jobject receiver, jobject args)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-bool VerifyObjectInClass(Object* o, Class* c);
+bool VerifyObjectInClass(Object* o, Class* c)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 }  // namespace art
 
diff --git a/src/runtime.cc b/src/runtime.cc
index 6447ede..5230b77 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -35,9 +35,8 @@
 #include "jni_internal.h"
 #include "monitor.h"
 #include "oat_file.h"
-#include "scoped_heap_lock.h"
-#include "scoped_jni_thread_state.h"
 #include "ScopedLocalRef.h"
+#include "scoped_thread_state_change.h"
 #include "signal_catcher.h"
 #include "signal_set.h"
 #include "space.h"
@@ -118,10 +117,7 @@
   }
 
   // Make sure to let the GC complete if it is running.
-  {
-    ScopedHeapLock heap_lock;
-    heap_->WaitForConcurrentGcToComplete();
-  }
+  heap_->WaitForConcurrentGcToComplete();
 
   // Make sure our internal threads are dead before we start tearing down things they're using.
   Dbg::StopJdwp();
@@ -162,6 +158,10 @@
     if (self == NULL) {
       os << "(Aborting thread was not attached to runtime!)\n";
     } else {
+      // TODO: we're aborting and the ScopedObjectAccess may attempt to acquire the mutator_lock_
+      //       which may block indefinitely if there's a misbehaving thread holding it exclusively.
+      //       The code below should be made robust to this.
+      ScopedObjectAccess soa(self);
       self->Dump(os);
       if (self->IsExceptionPending()) {
         os << "Pending " << PrettyTypeOf(self->GetException()) << " on thread:\n"
@@ -171,15 +171,10 @@
   }
 };
 
-static Mutex& GetAbortLock() {
-  static Mutex abort_lock("abort lock");
-  return abort_lock;
-}
-
 void Runtime::Abort() {
   // Ensure that we don't have multiple threads trying to abort at once,
   // which would result in significantly worse diagnostics.
-  MutexLock mu(GetAbortLock());
+  MutexLock mu(*GlobalSynchronization::abort_lock_);
 
   // Get any pending output out of the way.
   fflush(NULL);
@@ -315,15 +310,6 @@
   return result;
 }
 
-void LoadJniLibrary(JavaVMExt* vm, const char* name) {
-  std::string mapped_name(StringPrintf(OS_SHARED_LIB_FORMAT_STR, name));
-  std::string reason;
-  if (!vm->LoadNativeLibrary(mapped_name, NULL, reason)) {
-    LOG(FATAL) << "LoadNativeLibrary failed for \"" << mapped_name << "\": "
-               << reason;
-  }
-}
-
 Runtime::ParsedOptions* Runtime::ParsedOptions::Create(const Options& options, bool ignore_unrecognized) {
   UniquePtr<ParsedOptions> parsed(new ParsedOptions());
   const char* boot_class_path_string = getenv("BOOTCLASSPATH");
@@ -543,17 +529,19 @@
   return parsed.release();
 }
 
-Runtime* Runtime::Create(const Options& options, bool ignore_unrecognized) {
+bool Runtime::Create(const Options& options, bool ignore_unrecognized) {
   // TODO: acquire a static mutex on Runtime to avoid racing.
   if (Runtime::instance_ != NULL) {
-    return NULL;
+    return false;
   }
+  GlobalSynchronization::Init();
   instance_ = new Runtime;
   if (!instance_->Init(options, ignore_unrecognized)) {
     delete instance_;
     instance_ = NULL;
+    return false;
   }
-  return instance_;
+  return true;
 }
 
 static void CreateSystemClassLoader() {
@@ -561,28 +549,28 @@
     return;
   }
 
-  ScopedJniThreadState ts(Thread::Current());
+  ScopedObjectAccess soa(Thread::Current());
 
-  Class* class_loader_class = ts.Decode<Class*>(WellKnownClasses::java_lang_ClassLoader);
+  Class* class_loader_class = soa.Decode<Class*>(WellKnownClasses::java_lang_ClassLoader);
   CHECK(Runtime::Current()->GetClassLinker()->EnsureInitialized(class_loader_class, true, true));
 
   Method* getSystemClassLoader = class_loader_class->FindDirectMethod("getSystemClassLoader", "()Ljava/lang/ClassLoader;");
   CHECK(getSystemClassLoader != NULL);
 
   ClassLoader* class_loader =
-    down_cast<ClassLoader*>(InvokeWithJValues(ts, NULL, getSystemClassLoader, NULL).GetL());
+    down_cast<ClassLoader*>(InvokeWithJValues(soa, NULL, getSystemClassLoader, NULL).GetL());
   CHECK(class_loader != NULL);
 
-  ts.Self()->SetClassLoaderOverride(class_loader);
+  soa.Self()->SetClassLoaderOverride(class_loader);
 
-  Class* thread_class = ts.Decode<Class*>(WellKnownClasses::java_lang_Thread);
+  Class* thread_class = soa.Decode<Class*>(WellKnownClasses::java_lang_Thread);
   CHECK(Runtime::Current()->GetClassLinker()->EnsureInitialized(thread_class, true, true));
 
   Field* contextClassLoader = thread_class->FindDeclaredInstanceField("contextClassLoader",
                                                                       "Ljava/lang/ClassLoader;");
   CHECK(contextClassLoader != NULL);
 
-  contextClassLoader->SetObject(ts.Self()->GetPeer(), class_loader);
+  contextClassLoader->SetObject(soa.Self()->GetPeer(), class_loader);
 }
 
 void Runtime::Start() {
@@ -593,16 +581,16 @@
   // Relocate the OatFiles (ELF images).
   class_linker_->RelocateExecutable();
 
-  // Restore main thread state to kNative as expected by native code.
-  Thread* self = Thread::Current();
-  self->SetState(kNative);
-
   // Pre-allocate an OutOfMemoryError for the double-OOME case.
+  Thread* self = Thread::Current();
   self->ThrowNewException("Ljava/lang/OutOfMemoryError;",
                           "OutOfMemoryError thrown while trying to throw OutOfMemoryError; no stack available");
   pre_allocated_OutOfMemoryError_ = self->GetException();
   self->ClearException();
 
+  // Restore main thread state to kNative as expected by native code.
+  self->TransitionFromRunnableToSuspended(kNative);
+
   started_ = true;
 
   // InitNativeMethods needs to be after started_ so that the classes
@@ -651,7 +639,10 @@
   Thread* self = Thread::Current();
 
   // Must be in the kNative state for calling native methods.
-  CHECK_EQ(self->GetState(), kNative);
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_EQ(self->GetState(), kNative);
+  }
 
   JNIEnv* env = self->GetJniEnv();
   env->CallStaticVoidMethod(WellKnownClasses::java_lang_Daemons, WellKnownClasses::java_lang_Daemons_start);
@@ -700,7 +691,8 @@
   heap_ = new Heap(options->heap_initial_size_,
                    options->heap_growth_limit_,
                    options->heap_maximum_size_,
-                   options->image_);
+                   options->image_,
+                   options->is_concurrent_gc_enabled_);
 
   BlockSignals();
   InitPlatformSignalHandlers();
@@ -714,7 +706,7 @@
   Thread::Attach("main", false, NULL);
 
   // Set us to runnable so tools using a runtime can allocate and GC by default
-  Thread::Current()->SetState(kRunnable);
+  Thread::Current()->TransitionFromSuspendedToRunnable();
 
   // Now we're attached, we can take the heap lock and validate the heap.
   GetHeap()->EnableObjectValidation();
@@ -747,7 +739,10 @@
   JNIEnv* env = self->GetJniEnv();
 
   // Must be in the kNative state for calling native methods (JNI_OnLoad code).
-  CHECK_EQ(self->GetState(), kNative);
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_EQ(self->GetState(), kNative);
+  }
 
   // First set up JniConstants, which is used by both the runtime's built-in native
   // methods and libcore.
@@ -760,7 +755,15 @@
   // Then set up libcore, which is just a regular JNI library with a regular JNI_OnLoad.
   // Most JNI libraries can just use System.loadLibrary, but libcore can't because it's
   // the library that implements System.loadLibrary!
-  LoadJniLibrary(instance_->GetJavaVM(), "javacore");
+  {
+    std::string mapped_name(StringPrintf(OS_SHARED_LIB_FORMAT_STR, "javacore"));
+    std::string reason;
+    self->TransitionFromSuspendedToRunnable();
+    if (!instance_->java_vm_->LoadNativeLibrary(mapped_name, NULL, reason)) {
+      LOG(FATAL) << "LoadNativeLibrary failed for \"" << mapped_name << "\": " << reason;
+    }
+    self->TransitionFromRunnableToSuspended(kNative);
+  }
   VLOG(startup) << "Runtime::InitNativeMethods exiting";
 }
 
@@ -826,12 +829,12 @@
 }
 
 void Runtime::DumpLockHolders(std::ostream& os) {
-  pid_t heap_lock_owner = GetHeap()->GetLockOwner();
+  uint64_t mutator_lock_owner = GlobalSynchronization::mutator_lock_->GetExclusiveOwnerTid();
   pid_t thread_list_lock_owner = GetThreadList()->GetLockOwner();
   pid_t classes_lock_owner = GetClassLinker()->GetClassesLockOwner();
   pid_t dex_lock_owner = GetClassLinker()->GetDexLockOwner();
-  if ((heap_lock_owner | thread_list_lock_owner | classes_lock_owner | dex_lock_owner) != 0) {
-    os << "Heap lock owner tid: " << heap_lock_owner << "\n"
+  if ((thread_list_lock_owner | classes_lock_owner | dex_lock_owner) != 0) {
+    os << "Mutator lock exclusive owner tid: " << mutator_lock_owner << "\n"
        << "ThreadList lock owner tid: " << thread_list_lock_owner << "\n"
        << "ClassLinker classes lock owner tid: " << classes_lock_owner << "\n"
        << "ClassLinker dex lock owner tid: " << dex_lock_owner << "\n";
@@ -913,7 +916,7 @@
   if (self->HasManagedStack()) {
     LOG(FATAL) << *Thread::Current() << " attempting to detach while still running code";
   }
-  thread_list_->Unregister();
+  thread_list_->Unregister(self);
 }
 
 void Runtime::VisitRoots(Heap::RootVisitor* visitor, void* arg) const {
@@ -1031,7 +1034,7 @@
   tracer_ = NULL;
 }
 
-const std::vector<const DexFile*>& Runtime::GetCompileTimeClassPath(const ClassLoader* class_loader) {
+const std::vector<const DexFile*>& Runtime::GetCompileTimeClassPath(jobject class_loader) {
   if (class_loader == NULL) {
     return GetClassLinker()->GetBootClassPath();
   }
@@ -1041,7 +1044,7 @@
   return it->second;
 }
 
-void Runtime::SetCompileTimeClassPath(const ClassLoader* class_loader, std::vector<const DexFile*>& class_path) {
+void Runtime::SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path) {
   CHECK(!IsStarted());
   use_compile_time_class_path_ = true;
   compile_time_class_paths_.Put(class_loader, class_path);
diff --git a/src/runtime.h b/src/runtime.h
index 544dcf4..3b9919c 100644
--- a/src/runtime.h
+++ b/src/runtime.h
@@ -28,6 +28,7 @@
 #include "globals.h"
 #include "heap.h"
 #include "instruction_set.h"
+#include "jobject_comparator.h"
 #include "macros.h"
 #include "runtime_stats.h"
 #include "safe_map.h"
@@ -95,7 +96,8 @@
   };
 
   // Creates and initializes a new runtime.
-  static Runtime* Create(const Options& options, bool ignore_unrecognized);
+  static bool Create(const Options& options, bool ignore_unrecognized)
+      SHARED_TRYLOCK_FUNCTION(true, GlobalSynchronization::mutator_lock_);
 
   bool IsCompiler() const {
     return is_compiler_;
@@ -115,7 +117,7 @@
   }
 
   // Starts a runtime, which may cause threads to be started and code to run.
-  void Start();
+  void Start() UNLOCK_FUNCTION(GlobalSynchronization::mutator_lock_);
 
   bool IsShuttingDown() const {
     return shutting_down_;
@@ -138,7 +140,7 @@
   // This isn't marked ((noreturn)) because then gcc will merge multiple calls
   // in a single function together. This reduces code size slightly, but means
   // that the native stack trace we get may point at the wrong call site.
-  static void Abort();
+  static void Abort() LOCKS_EXCLUDED(GlobalSynchronization::abort_lock_);
 
   // Returns the "main" ThreadGroup, used when attaching user threads.
   jobject GetMainThreadGroup() const;
@@ -152,9 +154,10 @@
   void CallExitHook(jint status);
 
   // Detaches the current native thread from the runtime.
-  void DetachCurrentThread();
+  void DetachCurrentThread() LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
 
-  void DumpForSigQuit(std::ostream& os);
+  void DumpForSigQuit(std::ostream& os)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   void DumpLockHolders(std::ostream& os);
 
   ~Runtime();
@@ -207,7 +210,8 @@
     return "2.0.0";
   }
 
-  void VisitRoots(Heap::RootVisitor* visitor, void* arg) const;
+  void VisitRoots(Heap::RootVisitor* visitor, void* arg) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool HasJniDlsymLookupStub() const {
     return jni_stub_array_ != NULL;
@@ -263,7 +267,7 @@
     resolution_method_ = method;
   }
 
-  Method* CreateResolutionMethod();
+  Method* CreateResolutionMethod() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Returns a special method that describes all callee saves being spilled to the stack.
   enum CalleeSaveType {
@@ -284,10 +288,14 @@
 
   void SetCalleeSaveMethod(Method* method, CalleeSaveType type);
 
-  Method* CreateCalleeSaveMethod(InstructionSet instruction_set, CalleeSaveType type);
+  Method* CreateCalleeSaveMethod(InstructionSet instruction_set, CalleeSaveType type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Method* CreateRefOnlyCalleeSaveMethod(InstructionSet instruction_set);
-  Method* CreateRefAndArgsCalleeSaveMethod(InstructionSet instruction_set);
+  Method* CreateRefOnlyCalleeSaveMethod(InstructionSet instruction_set)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+
+  Method* CreateRefAndArgsCalleeSaveMethod(InstructionSet instruction_set)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   int32_t GetStat(int kind);
 
@@ -322,8 +330,8 @@
     return use_compile_time_class_path_;
   }
 
-  const std::vector<const DexFile*>& GetCompileTimeClassPath(const ClassLoader* class_loader);
-  void SetCompileTimeClassPath(const ClassLoader* class_loader, std::vector<const DexFile*>& class_path);
+  const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
+  void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
  private:
   static void InitPlatformSignalHandlers();
@@ -332,8 +340,9 @@
 
   void BlockSignals();
 
-  bool Init(const Options& options, bool ignore_unrecognized);
-  void InitNativeMethods();
+  bool Init(const Options& options, bool ignore_unrecognized)
+      SHARED_TRYLOCK_FUNCTION(true, GlobalSynchronization::mutator_lock_);
+  void InitNativeMethods() LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_);
   void InitThreadGroups(Thread* self);
   void RegisterRuntimeNativeMethods(JNIEnv* env);
 
@@ -417,7 +426,7 @@
   size_t method_trace_file_size_;
   Trace* tracer_;
 
-  typedef SafeMap<const ClassLoader*, std::vector<const DexFile*> > CompileTimeClassPaths;
+  typedef SafeMap<jobject, std::vector<const DexFile*>, JobjectComparator> CompileTimeClassPaths;
   CompileTimeClassPaths compile_time_class_paths_;
   bool use_compile_time_class_path_;
 
diff --git a/src/runtime_linux.cc b/src/runtime_linux.cc
index a84dfc9..8365079 100644
--- a/src/runtime_linux.cc
+++ b/src/runtime_linux.cc
@@ -226,8 +226,7 @@
 };
 
 static void HandleUnexpectedSignal(int signal_number, siginfo_t* info, void* raw_context) {
-  static Mutex unexpected_signal_lock("unexpected signal lock");
-  MutexLock mu(unexpected_signal_lock);
+  MutexLock mu(*GlobalSynchronization::unexpected_signal_lock_);
 
   bool has_address = (signal_number == SIGILL || signal_number == SIGBUS ||
                       signal_number == SIGFPE || signal_number == SIGSEGV);
diff --git a/src/runtime_support.cc b/src/runtime_support.cc
index c067765..0caccf6 100644
--- a/src/runtime_support.cc
+++ b/src/runtime_support.cc
@@ -293,7 +293,8 @@
 }
 
 static std::string ClassNameFromIndex(const Method* method, uint32_t ref,
-                                      verifier::VerifyErrorRefType ref_type, bool access) {
+                                      verifier::VerifyErrorRefType ref_type, bool access)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   const DexFile& dex_file = class_linker->FindDexFile(method->GetDeclaringClass()->GetDexCache());
 
diff --git a/src/runtime_support.h b/src/runtime_support.h
index d96cab9..bba9161 100644
--- a/src/runtime_support.h
+++ b/src/runtime_support.h
@@ -44,37 +44,50 @@
 class Object;
 
 // Helpers to give consistent descriptive exception messages
-void ThrowNewIllegalAccessErrorClass(Thread* self, Class* referrer, Class* accessed);
+void ThrowNewIllegalAccessErrorClass(Thread* self, Class* referrer, Class* accessed)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 void ThrowNewIllegalAccessErrorClassForMethodDispatch(Thread* self, Class* referrer,
                                                       Class* accessed,
                                                       const Method* caller,
                                                       const Method* called,
-                                                      InvokeType type);
+                                                      InvokeType type)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 void ThrowNewIncompatibleClassChangeErrorClassForInterfaceDispatch(Thread* self,
                                                                    const Method* referrer,
                                                                    const Method* interface_method,
-                                                                   Object* this_object);
-void ThrowNewIllegalAccessErrorField(Thread* self, Class* referrer, Field* accessed);
-void ThrowNewIllegalAccessErrorFinalField(Thread* self, const Method* referrer, Field* accessed);
+                                                                   Object* this_object)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+void ThrowNewIllegalAccessErrorField(Thread* self, Class* referrer, Field* accessed)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+void ThrowNewIllegalAccessErrorFinalField(Thread* self, const Method* referrer, Field* accessed)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-void ThrowNewIllegalAccessErrorMethod(Thread* self, Class* referrer, Method* accessed);
-void ThrowNullPointerExceptionForFieldAccess(Thread* self, Field* field, bool is_read);
+void ThrowNewIllegalAccessErrorMethod(Thread* self, Class* referrer, Method* accessed)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+void ThrowNullPointerExceptionForFieldAccess(Thread* self, Field* field, bool is_read)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 void ThrowNullPointerExceptionForMethodAccess(Thread* self, Method* caller, uint32_t method_idx,
-                                              InvokeType type);
-void ThrowNullPointerExceptionFromDexPC(Thread* self, Method* caller, uint32_t dex_pc);
-void ThrowVerificationError(Thread* self, const Method* method, int32_t kind, int32_t ref);
+                                              InvokeType type)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+void ThrowNullPointerExceptionFromDexPC(Thread* self, Method* caller, uint32_t dex_pc)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+void ThrowVerificationError(Thread* self, const Method* method, int32_t kind, int32_t ref)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 std::string FieldNameFromIndex(const Method* method, uint32_t ref,
-                               verifier::VerifyErrorRefType ref_type, bool access);
+                               verifier::VerifyErrorRefType ref_type, bool access)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 std::string MethodNameFromIndex(const Method* method, uint32_t ref,
-                                verifier::VerifyErrorRefType ref_type, bool access);
+                                verifier::VerifyErrorRefType ref_type, bool access)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
 // cannot be resolved, throw an error. If it can, use it to create an instance.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
 static inline Object* AllocObjectFromCode(uint32_t type_idx, Method* method, Thread* self,
-                                          bool access_check) {
+                                          bool access_check)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Class* klass = method->GetDexCacheResolvedTypes()->Get(type_idx);
   Runtime* runtime = Runtime::Current();
   if (UNLIKELY(klass == NULL)) {
@@ -108,7 +121,8 @@
 // When verification/compiler hasn't been able to verify access, optionally perform an access
 // check.
 static inline Array* AllocArrayFromCode(uint32_t type_idx, Method* method, int32_t component_count,
-                                        Thread* self, bool access_check) {
+                                        Thread* self, bool access_check)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (UNLIKELY(component_count < 0)) {
     Thread::Current()->ThrowNewExceptionF("Ljava/lang/NegativeArraySizeException;", "%d",
                                           component_count);
@@ -134,15 +148,18 @@
 }
 
 extern Array* CheckAndAllocArrayFromCode(uint32_t type_idx, Method* method, int32_t component_count,
-                                         Thread* self, bool access_check);
+                                         Thread* self, bool access_check)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 extern Field* FindFieldFromCode(uint32_t field_idx, const Method* referrer, Thread* self,
                                 bool is_static, bool is_primitive, bool is_set,
-                                size_t expected_size);
+                                size_t expected_size)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 // Fast path field resolution that can't throw exceptions
 static inline Field* FindFieldFast(uint32_t field_idx, const Method* referrer, bool is_primitive,
-                                   size_t expected_size, bool is_set) {
+                                   size_t expected_size, bool is_set)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Field* resolved_field = referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx);
   if (UNLIKELY(resolved_field == NULL)) {
     return NULL;
@@ -170,7 +187,8 @@
 
 // Fast path method resolution that can't throw exceptions
 static inline Method* FindMethodFast(uint32_t method_idx, Object* this_object, const Method* referrer,
-                              bool access_check, InvokeType type) {
+                                     bool access_check, InvokeType type)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   bool is_direct = type == kStatic || type == kDirect;
   if (UNLIKELY(this_object == NULL && !is_direct)) {
     return NULL;
@@ -204,12 +222,15 @@
 }
 
 extern Method* FindMethodFromCode(uint32_t method_idx, Object* this_object, const Method* referrer,
-                                  Thread* self, bool access_check, InvokeType type);
+                                  Thread* self, bool access_check, InvokeType type)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 extern Class* ResolveVerifyAndClinit(uint32_t type_idx, const Method* referrer, Thread* self,
-                                     bool can_run_clinit, bool verify_access);
+                                     bool can_run_clinit, bool verify_access)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-static inline String* ResolveStringFromCode(const Method* referrer, uint32_t string_idx) {
+static inline String* ResolveStringFromCode(const Method* referrer, uint32_t string_idx)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   return class_linker->ResolveString(string_idx, referrer);
 }
diff --git a/src/scoped_heap_lock.h b/src/scoped_heap_lock.h
deleted file mode 100644
index 0dee589..0000000
--- a/src/scoped_heap_lock.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_SRC_SCOPED_HEAP_LOCK_H_
-#define ART_SRC_SCOPED_HEAP_LOCK_H_
-
-#include "heap.h"
-#include "macros.h"
-#include "runtime.h"
-
-namespace art {
-
-class ScopedHeapLock {
- public:
-  ScopedHeapLock() {
-    Runtime::Current()->GetHeap()->Lock();
-  }
-
-  ~ScopedHeapLock() {
-    Runtime::Current()->GetHeap()->Unlock();
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(ScopedHeapLock);
-};
-
-}  // namespace art
-
-#endif  // ART_SRC_SCOPED_HEAP_LOCK_H_
diff --git a/src/scoped_jni_thread_state.h b/src/scoped_jni_thread_state.h
deleted file mode 100644
index 1c9ab2c..0000000
--- a/src/scoped_jni_thread_state.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "casts.h"
-#include "jni_internal.h"
-#include "thread.h"
-
-namespace art {
-
-// Entry/exit processing for transitions from Native to Runnable (ie within JNI functions).
-//
-// This class performs the necessary thread state switching to and from Runnable and lets us
-// amortize the cost of working out the current thread. Additionally it lets us check (and repair)
-// apps that are using a JNIEnv on the wrong thread. The class also decodes and encodes Objects
-// into jobjects via methods of this class. Performing this here enforces the Runnable thread state
-// for use of Object, thereby inhibiting the Object being modified by GC whilst native or VM code
-// is also manipulating the Object.
-//
-// The destructor transitions back to the previous thread state, typically Native. In this case
-// GC and thread suspension may occur.
-class ScopedJniThreadState {
- public:
-  explicit ScopedJniThreadState(JNIEnv* env, ThreadState new_state = kRunnable)
-      : env_(reinterpret_cast<JNIEnvExt*>(env)), vm_(env_->vm), self_(ThreadForEnv(env)),
-        old_thread_state_(self_->SetState(new_state)), thread_state_(new_state) {
-    self_->VerifyStack();
-  }
-
-  explicit ScopedJniThreadState(Thread* self, ThreadState new_state = kRunnable)
-      : env_(reinterpret_cast<JNIEnvExt*>(self->GetJniEnv())), vm_(env_->vm), self_(self),
-        old_thread_state_(self_->SetState(new_state)), thread_state_(new_state) {
-    if (!Vm()->work_around_app_jni_bugs && self != Thread::Current()) {
-      UnexpectedThreads(self, Thread::Current());
-    }
-    self_->VerifyStack();
-  }
-
-  // Used when we want a scoped JNI thread state but have no thread/JNIEnv.
-  explicit ScopedJniThreadState(JavaVM* vm)
-      : env_(NULL), vm_(reinterpret_cast<JavaVMExt*>(vm)), self_(NULL),
-        old_thread_state_(kTerminated), thread_state_(kTerminated) {}
-
-  ~ScopedJniThreadState() {
-    if (self_ != NULL) {
-      self_->SetState(old_thread_state_);
-    }
-  }
-
-  JNIEnvExt* Env() const {
-    return env_;
-  }
-
-  Thread* Self() const {
-    return self_;
-  }
-
-  JavaVMExt* Vm() const {
-    return vm_;
-  }
-
-  /*
-   * Add a local reference for an object to the indirect reference table associated with the
-   * current stack frame.  When the native function returns, the reference will be discarded.
-   * Part of the ScopedJniThreadState as native code shouldn't be working on raw Object* without
-   * having transitioned its state.
-   *
-   * We need to allow the same reference to be added multiple times.
-   *
-   * This will be called on otherwise unreferenced objects.  We cannot do GC allocations here, and
-   * it's best if we don't grab a mutex.
-   *
-   * Returns the local reference (currently just the same pointer that was
-   * passed in), or NULL on failure.
-   */
-  template<typename T>
-  T AddLocalReference(Object* obj) const {
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-    if (obj == NULL) {
-      return NULL;
-    }
-
-    DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
-
-    IndirectReferenceTable& locals = Env()->locals;
-
-    uint32_t cookie = Env()->local_ref_cookie;
-    IndirectRef ref = locals.Add(cookie, obj);
-
-  #if 0 // TODO: fix this to understand PushLocalFrame, so we can turn it on.
-    if (Env()->check_jni) {
-      size_t entry_count = locals.Capacity();
-      if (entry_count > 16) {
-        LOG(WARNING) << "Warning: more than 16 JNI local references: "
-                     << entry_count << " (most recent was a " << PrettyTypeOf(obj) << ")\n"
-                     << Dumpable<IndirectReferenceTable>(locals);
-        // TODO: LOG(FATAL) in a later release?
-      }
-    }
-  #endif
-
-    if (Vm()->work_around_app_jni_bugs) {
-      // Hand out direct pointers to support broken old apps.
-      return reinterpret_cast<T>(obj);
-    }
-
-    return reinterpret_cast<T>(ref);
-  }
-
-  template<typename T>
-  T Decode(jobject obj) const {
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-    return down_cast<T>(Self()->DecodeJObject(obj));
-  }
-
-  Field* DecodeField(jfieldID fid) const {
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-  #ifdef MOVING_GARBAGE_COLLECTOR
-    // TODO: we should make these unique weak globals if Field instances can ever move.
-    UNIMPLEMENTED(WARNING);
-  #endif
-    return reinterpret_cast<Field*>(fid);
-  }
-
-  jfieldID EncodeField(Field* field) const {
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-  #ifdef MOVING_GARBAGE_COLLECTOR
-    UNIMPLEMENTED(WARNING);
-  #endif
-    return reinterpret_cast<jfieldID>(field);
-  }
-
-  Method* DecodeMethod(jmethodID mid) const {
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-  #ifdef MOVING_GARBAGE_COLLECTOR
-    // TODO: we should make these unique weak globals if Method instances can ever move.
-    UNIMPLEMENTED(WARNING);
-  #endif
-    return reinterpret_cast<Method*>(mid);
-  }
-
-  jmethodID EncodeMethod(Method* method) const {
-    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
-  #ifdef MOVING_GARBAGE_COLLECTOR
-    UNIMPLEMENTED(WARNING);
-  #endif
-    return reinterpret_cast<jmethodID>(method);
-  }
-
- private:
-  static Thread* ThreadForEnv(JNIEnv* env) {
-    JNIEnvExt* full_env(reinterpret_cast<JNIEnvExt*>(env));
-    bool work_around_app_jni_bugs = full_env->vm->work_around_app_jni_bugs;
-    Thread* env_self = full_env->self;
-    Thread* self = work_around_app_jni_bugs ? Thread::Current() : env_self;
-    if (!work_around_app_jni_bugs && self != env_self) {
-      UnexpectedThreads(env_self, self);
-    }
-    return self;
-  }
-
-  static void UnexpectedThreads(Thread* found_self, Thread* expected_self) {
-    // TODO: pass through function name so we can use it here instead of NULL...
-    JniAbortF(NULL, "JNIEnv for %s used on %s",
-             found_self != NULL ? ToStr<Thread>(*found_self).c_str() : "NULL",
-             expected_self != NULL ? ToStr<Thread>(*expected_self).c_str() : "NULL");
-
-  }
-
-  // The full JNIEnv.
-  JNIEnvExt* const env_;
-  // The full JavaVM.
-  JavaVMExt* const vm_;
-  // Cached current thread derived from the JNIEnv.
-  Thread* const self_;
-  // Previous thread state, most likely kNative.
-  const ThreadState old_thread_state_;
-  // Local cache of thread state to enable quick sanity checks.
-  const ThreadState thread_state_;
-  DISALLOW_COPY_AND_ASSIGN(ScopedJniThreadState);
-};
-
-}  // namespace art
diff --git a/src/scoped_thread_list_lock.cc b/src/scoped_thread_list_lock.cc
deleted file mode 100644
index 269c97e..0000000
--- a/src/scoped_thread_list_lock.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scoped_thread_list_lock.h"
-
-#include "runtime.h"
-#include "thread_list.h"
-
-namespace art {
-
-ScopedThreadListLock::ScopedThreadListLock() {
-  // Avoid deadlock between two threads trying to SuspendAll
-  // simultaneously by going to kVmWait if the lock cannot be
-  // immediately acquired.
-  ThreadList* thread_list = Runtime::Current()->GetThreadList();
-  if (!thread_list->thread_list_lock_.TryLock()) {
-    Thread* self = Thread::Current();
-    if (self == NULL) {
-      // Self may be null during shutdown, but in that case there's no point going to kVmWait.
-      thread_list->thread_list_lock_.Lock();
-    } else {
-      ThreadState old_thread_state = self->SetState(kVmWait);
-      thread_list->thread_list_lock_.Lock();
-      // If we have the lock, by definition there's no GC in progress (though we
-      // might be taking the lock in order to start one). We avoid the suspend
-      // check here so we don't risk going to sleep on the thread suspend count lock
-      // while holding the thread list lock.
-      self->SetStateWithoutSuspendCheck(old_thread_state);
-    }
-  }
-}
-
-ScopedThreadListLock::~ScopedThreadListLock() {
-  Runtime::Current()->GetThreadList()->thread_list_lock_.Unlock();
-}
-
-}  // namespace art
diff --git a/src/scoped_thread_list_lock_releaser.cc b/src/scoped_thread_list_lock_releaser.cc
deleted file mode 100644
index d15eae5..0000000
--- a/src/scoped_thread_list_lock_releaser.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scoped_thread_list_lock_releaser.h"
-
-#include "runtime.h"
-#include "thread_list.h"
-
-namespace art {
-
-ScopedThreadListLockReleaser::ScopedThreadListLockReleaser() : unlocked_(false) {
-  if (Thread::Current() == NULL) {
-    CHECK(Runtime::Current()->IsShuttingDown());
-    return;
-  }
-
-  if (Thread::Current()->held_mutexes_[kThreadListLock] > 0) {
-    Runtime::Current()->GetThreadList()->thread_list_lock_.Unlock();
-    unlocked_ = true;
-  }
-}
-
-ScopedThreadListLockReleaser::~ScopedThreadListLockReleaser() {
-  if (unlocked_) {
-    Runtime::Current()->GetThreadList()->thread_list_lock_.Lock();
-  }
-}
-
-}  // namespace art
diff --git a/src/scoped_thread_list_lock_releaser.h b/src/scoped_thread_list_lock_releaser.h
deleted file mode 100644
index af656d5..0000000
--- a/src/scoped_thread_list_lock_releaser.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_SRC_SCOPED_THREAD_LIST_LOCK_RELEASER_H_
-#define ART_SRC_SCOPED_THREAD_LIST_LOCK_RELEASER_H_
-
-#include "macros.h"
-
-namespace art {
-
-class ScopedThreadListLockReleaser {
- public:
-  ScopedThreadListLockReleaser();
-  ~ScopedThreadListLockReleaser();
-
- private:
-  // Whether or not we unlocked the thread list lock.
-  bool unlocked_;
-
-  DISALLOW_COPY_AND_ASSIGN(ScopedThreadListLockReleaser);
-};
-
-}  // namespace art
-
-#endif  // ART_SRC_SCOPED_THREAD_LIST_LOCK_RELEASER_H_
diff --git a/src/scoped_thread_state_change.h b/src/scoped_thread_state_change.h
new file mode 100644
index 0000000..745e2d6
--- /dev/null
+++ b/src/scoped_thread_state_change.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_SRC_SCOPED_THREAD_STATE_CHANGE_H_
+#define ART_SRC_SCOPED_THREAD_STATE_CHANGE_H_
+
+#include "casts.h"
+#include "thread.h"
+
+namespace art {
+
+// Scoped change into and out of a particular state. Handles Runnable transitions that require
+// more complicated suspension checking. The subclasses ScopedObjectAccessUnchecked and
+// ScopedObjectAccess are used to handle the change into Runnable to get direct access to objects,
+// the unchecked variant doesn't aid annotalysis.
+class ScopedThreadStateChange {
+ public:
+  ScopedThreadStateChange(Thread* self, ThreadState new_thread_state)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      : self_(self), thread_state_(new_thread_state), expected_has_no_thread_(false) {
+    if (self_ == NULL) {
+      // Value chosen arbitrarily and won't be used in the destructor since thread_ == NULL.
+      old_thread_state_ = kTerminated;
+      CHECK(!Runtime::Current()->IsStarted() || Runtime::Current()->IsShuttingDown());
+    } else {
+      bool runnable_transition;
+      {
+        MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+        old_thread_state_ = self->GetState();
+        runnable_transition = old_thread_state_ == kRunnable || new_thread_state == kRunnable;
+        if (!runnable_transition) {
+          self_->SetState(new_thread_state);
+        }
+      }
+      if (runnable_transition && old_thread_state_ != new_thread_state) {
+        if (new_thread_state == kRunnable) {
+          self_->TransitionFromSuspendedToRunnable();
+        } else {
+          DCHECK_EQ(old_thread_state_, kRunnable);
+          self_->TransitionFromRunnableToSuspended(new_thread_state);
+        }
+      }
+    }
+  }
+
+  ~ScopedThreadStateChange() LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_) {
+    if (self_ == NULL) {
+      if (!expected_has_no_thread_) {
+        CHECK(Runtime::Current()->IsShuttingDown());
+      }
+    } else {
+      if (old_thread_state_ != thread_state_) {
+        if (old_thread_state_ == kRunnable) {
+          self_->TransitionFromSuspendedToRunnable();
+        } else if (thread_state_ == kRunnable) {
+          self_->TransitionFromRunnableToSuspended(old_thread_state_);
+        } else {
+          MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+          self_->SetState(old_thread_state_);
+        }
+      }
+    }
+  }
+
+  Thread* Self() const {
+    return self_;
+  }
+
+ protected:
+  // Constructor used by ScopedJniThreadState for an unattached thread that has access to the VM*.
+  ScopedThreadStateChange()
+      : self_(NULL), thread_state_(kTerminated), old_thread_state_(kTerminated),
+        expected_has_no_thread_(true) {}
+
+  Thread* const self_;
+  const ThreadState thread_state_;
+
+ private:
+  ThreadState old_thread_state_;
+  const bool expected_has_no_thread_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedThreadStateChange);
+};
+
+// Entry/exit processing for transitions from Native to Runnable (ie within JNI functions).
+//
+// This class performs the necessary thread state switching to and from Runnable and lets us
+// amortize the cost of working out the current thread. Additionally it lets us check (and repair)
+// apps that are using a JNIEnv on the wrong thread. The class also decodes and encodes Objects
+// into jobjects via methods of this class. Performing this here enforces the Runnable thread state
+// for use of Object, thereby inhibiting the Object being modified by GC whilst native or VM code
+// is also manipulating the Object.
+//
+// The destructor transitions back to the previous thread state, typically Native. In this state
+// GC and thread suspension may occur.
+//
+// For annotalysis the subclass ScopedObjectAccess (below) makes it explicit that a shared of
+// the mutator_lock_ will be acquired on construction.
+class ScopedObjectAccessUnchecked : public ScopedThreadStateChange {
+ public:
+  explicit ScopedObjectAccessUnchecked(JNIEnv* env)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      : ScopedThreadStateChange(ThreadForEnv(env), kRunnable),
+        env_(reinterpret_cast<JNIEnvExt*>(env)), vm_(env_->vm) {
+    self_->VerifyStack();
+  }
+
+  explicit ScopedObjectAccessUnchecked(Thread* self)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      : ScopedThreadStateChange(self, kRunnable),
+        env_(reinterpret_cast<JNIEnvExt*>(self->GetJniEnv())),
+        vm_(env_ != NULL ? env_->vm : NULL) {
+    if (Vm() != NULL && !Vm()->work_around_app_jni_bugs && self != Thread::Current()) {
+      UnexpectedThreads(self, Thread::Current());
+    }
+    self_->VerifyStack();
+  }
+
+  // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
+  // change into Runnable or acquire a share on the mutator_lock_.
+  explicit ScopedObjectAccessUnchecked(JavaVM* vm)
+      : ScopedThreadStateChange(), env_(NULL), vm_(reinterpret_cast<JavaVMExt*>(vm)) {}
+
+  JNIEnvExt* Env() const {
+    return env_;
+  }
+
+  JavaVMExt* Vm() const {
+    return vm_;
+  }
+
+  /*
+   * Add a local reference for an object to the indirect reference table associated with the
+   * current stack frame.  When the native function returns, the reference will be discarded.
+   * Part of the ScopedJniThreadState as native code shouldn't be working on raw Object* without
+   * having transitioned its state.
+   *
+   * We need to allow the same reference to be added multiple times.
+   *
+   * This will be called on otherwise unreferenced objects.  We cannot do GC allocations here, and
+   * it's best if we don't grab a mutex.
+   *
+   * Returns the local reference (currently just the same pointer that was
+   * passed in), or NULL on failure.
+   */
+  template<typename T>
+  T AddLocalReference(Object* obj) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    if (obj == NULL) {
+      return NULL;
+    }
+
+    DCHECK_NE((reinterpret_cast<uintptr_t>(obj) & 0xffff0000), 0xebad0000);
+
+    IndirectReferenceTable& locals = Env()->locals;
+
+    uint32_t cookie = Env()->local_ref_cookie;
+    IndirectRef ref = locals.Add(cookie, obj);
+
+#if 0 // TODO: fix this to understand PushLocalFrame, so we can turn it on.
+    if (Env()->check_jni) {
+      size_t entry_count = locals.Capacity();
+      if (entry_count > 16) {
+        LOG(WARNING) << "Warning: more than 16 JNI local references: "
+                     << entry_count << " (most recent was a " << PrettyTypeOf(obj) << ")\n"
+                     << Dumpable<IndirectReferenceTable>(locals);
+        // TODO: LOG(FATAL) in a later release?
+      }
+    }
+#endif
+
+    if (Vm()->work_around_app_jni_bugs) {
+      // Hand out direct pointers to support broken old apps.
+      return reinterpret_cast<T>(obj);
+    }
+
+    return reinterpret_cast<T>(ref);
+  }
+
+  template<typename T>
+  T Decode(jobject obj) const
+      LOCKS_EXCLUDED(JavaVMExt::globals_lock,
+                     JavaVMExt::weak_globals_lock)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    GlobalSynchronization::mutator_lock_->AssertSharedHeld();
+    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+    return down_cast<T>(Self()->DecodeJObject(obj));
+  }
+
+  Field* DecodeField(jfieldID fid) const
+      LOCKS_EXCLUDED(JavaVMExt::globals_lock,
+                     JavaVMExt::weak_globals_lock)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    GlobalSynchronization::mutator_lock_->AssertSharedHeld();
+    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+#ifdef MOVING_GARBAGE_COLLECTOR
+    // TODO: we should make these unique weak globals if Field instances can ever move.
+    UNIMPLEMENTED(WARNING);
+#endif
+    return reinterpret_cast<Field*>(fid);
+  }
+
+  jfieldID EncodeField(Field* field) const
+      LOCKS_EXCLUDED(JavaVMExt::globals_lock,
+                     JavaVMExt::weak_globals_lock)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    GlobalSynchronization::mutator_lock_->AssertSharedHeld();
+    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+#ifdef MOVING_GARBAGE_COLLECTOR
+    UNIMPLEMENTED(WARNING);
+#endif
+    return reinterpret_cast<jfieldID>(field);
+  }
+
+  Method* DecodeMethod(jmethodID mid) const
+      LOCKS_EXCLUDED(JavaVMExt::globals_lock,
+                     JavaVMExt::weak_globals_lock)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    GlobalSynchronization::mutator_lock_->AssertSharedHeld();
+    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+#ifdef MOVING_GARBAGE_COLLECTOR
+    // TODO: we should make these unique weak globals if Method instances can ever move.
+    UNIMPLEMENTED(WARNING);
+#endif
+    return reinterpret_cast<Method*>(mid);
+  }
+
+  jmethodID EncodeMethod(Method* method) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    GlobalSynchronization::mutator_lock_->AssertSharedHeld();
+    DCHECK_EQ(thread_state_, kRunnable);  // Don't work with raw objects in non-runnable states.
+#ifdef MOVING_GARBAGE_COLLECTOR
+    UNIMPLEMENTED(WARNING);
+#endif
+    return reinterpret_cast<jmethodID>(method);
+  }
+
+ private:
+  static Thread* ThreadForEnv(JNIEnv* env) {
+    JNIEnvExt* full_env(reinterpret_cast<JNIEnvExt*>(env));
+    bool work_around_app_jni_bugs = full_env->vm->work_around_app_jni_bugs;
+    Thread* env_self = full_env->self;
+    Thread* self = work_around_app_jni_bugs ? Thread::Current() : env_self;
+    if (!work_around_app_jni_bugs && self != env_self) {
+      UnexpectedThreads(env_self, self);
+    }
+    return self;
+  }
+
+  static void UnexpectedThreads(Thread* found_self, Thread* expected_self) {
+    // TODO: pass through function name so we can use it here instead of NULL...
+    JniAbortF(NULL, "JNIEnv for %s used on %s",
+             found_self != NULL ? ToStr<Thread>(*found_self).c_str() : "NULL",
+             expected_self != NULL ? ToStr<Thread>(*expected_self).c_str() : "NULL");
+
+  }
+
+  // The full JNIEnv.
+  JNIEnvExt* const env_;
+  // The full JavaVM.
+  JavaVMExt* const vm_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedObjectAccessUnchecked);
+};
+
+// Annotalysis helping variant of the above.
+class ScopedObjectAccess : public ScopedObjectAccessUnchecked {
+ public:
+  explicit ScopedObjectAccess(JNIEnv* env)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_)
+      : ScopedObjectAccessUnchecked(env) {
+    GlobalSynchronization::mutator_lock_->AssertSharedHeld();
+  }
+
+  explicit ScopedObjectAccess(Thread* self)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_)
+      : ScopedObjectAccessUnchecked(self) {
+    GlobalSynchronization::mutator_lock_->AssertSharedHeld();
+  }
+
+  ~ScopedObjectAccess() UNLOCK_FUNCTION(GlobalSynchronization::mutator_lock_) {
+    // Base class will release share of lock. Invoked after this destructor.
+  }
+
+ private:
+  // TODO: remove this constructor. It is used by check JNI's ScopedCheck to make it believe that
+  //       routines operating with just a VM are sound, they are not, but when you have just a VM
+  //       you cannot call the unsound routines.
+  explicit ScopedObjectAccess(JavaVM* vm)
+      SHARED_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_)
+      : ScopedObjectAccessUnchecked(vm) {}
+
+  friend class ScopedCheck;
+  DISALLOW_COPY_AND_ASSIGN(ScopedObjectAccess);
+};
+
+}  // namespace art
+
+#endif  // ART_SRC_SCOPED_THREAD_STATE_CHANGE_H_
diff --git a/src/signal_catcher.cc b/src/signal_catcher.cc
index 919923e..156aec6 100644
--- a/src/signal_catcher.cc
+++ b/src/signal_catcher.cc
@@ -30,7 +30,7 @@
 #include "heap.h"
 #include "os.h"
 #include "runtime.h"
-#include "scoped_heap_lock.h"
+#include "scoped_thread_state_change.h"
 #include "signal_set.h"
 #include "thread.h"
 #include "thread_list.h"
@@ -99,7 +99,7 @@
     return;
   }
 
-  ScopedThreadStateChange tsc(Thread::Current(), kVmWait);
+  ScopedThreadStateChange tsc(Thread::Current(), kWaitingForSignalCatcherOutput);
   int fd = open(stack_trace_file_.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
   if (fd == -1) {
     PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'";
@@ -118,17 +118,27 @@
   Runtime* runtime = Runtime::Current();
   ThreadList* thread_list = runtime->GetThreadList();
 
-  // We take the heap lock before suspending all threads so we don't end up in a situation where
-  // one of the suspended threads suspended via the implicit FullSuspendCheck on the slow path of
-  // Heap::Lock, which is the only case where a thread can be suspended while holding the heap lock.
-  // (We need the heap lock when we dump the thread list. We could probably fix this by duplicating
-  // more state from java.lang.Thread in struct Thread.)
-  ScopedHeapLock heap_lock;
   thread_list->SuspendAll();
 
+  // We should exclusively hold the mutator lock, set state to Runnable without a pending
+  // suspension to avoid giving away or trying re-acquire the mutator lock.
+  GlobalSynchronization::mutator_lock_->AssertExclusiveHeld();
+  Thread* self = Thread::Current();
+  ThreadState old_state;
+  int suspend_count;
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    suspend_count = self->GetSuspendCount();
+    if (suspend_count != 0) {
+      CHECK_EQ(suspend_count, 1);
+      self->ModifySuspendCount(-1, false);
+    }
+    old_state = self->SetState(kRunnable);
+  }
+
   std::ostringstream os;
   os << "\n"
-     << "----- pid " << getpid() << " at " << GetIsoDate() << " -----\n";
+      << "----- pid " << getpid() << " at " << GetIsoDate() << " -----\n";
 
   DumpCmdLine(os);
 
@@ -144,7 +154,13 @@
   }
 
   os << "----- end " << getpid() << " -----\n";
-
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    self->SetState(old_state);
+    if (suspend_count != 0) {
+      self->ModifySuspendCount(+1, false);
+    }
+  }
   thread_list->ResumeAll();
 
   Output(os.str());
@@ -156,7 +172,7 @@
 }
 
 int SignalCatcher::WaitForSignal(Thread* self, SignalSet& signals) {
-  ScopedThreadStateChange tsc(self, kVmWait);
+  ScopedThreadStateChange tsc(self, kWaitingInMainSignalCatcherLoop);
 
   // Signals for sigwait() must be blocked but not ignored.  We
   // block signals like SIGQUIT for all threads, so the condition
@@ -183,7 +199,6 @@
   runtime->AttachCurrentThread("Signal Catcher", true, runtime->GetSystemThreadGroup());
 
   Thread* self = Thread::Current();
-  self->SetState(kRunnable);
 
   {
     MutexLock mu(signal_catcher->lock_);
diff --git a/src/signal_catcher.h b/src/signal_catcher.h
index 35e035f..11a2c09 100644
--- a/src/signal_catcher.h
+++ b/src/signal_catcher.h
@@ -35,7 +35,10 @@
   explicit SignalCatcher(const std::string& stack_trace_file);
   ~SignalCatcher();
 
-  void HandleSigQuit();
+  void HandleSigQuit() LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_,
+                                      GlobalSynchronization::thread_list_lock_,
+                                      GlobalSynchronization::thread_suspend_count_lock_);
+
 
  private:
   static void* Run(void* arg);
diff --git a/src/space.cc b/src/space.cc
index 02230e1..a828d91 100644
--- a/src/space.cc
+++ b/src/space.cc
@@ -45,7 +45,8 @@
 
 AllocSpace::AllocSpace(const std::string& name, MemMap* mem_map, void* mspace, byte* begin, byte* end,
                        size_t growth_limit)
-    : Space(name, mem_map, begin, end, GCRP_ALWAYS_COLLECT), mspace_(mspace), growth_limit_(growth_limit) {
+    : Space(name, mem_map, begin, end, GCRP_ALWAYS_COLLECT), lock_("allocation space lock"),
+      mspace_(mspace), growth_limit_(growth_limit) {
   CHECK(mspace != NULL);
 
   size_t bitmap_index = bitmap_index_++;
@@ -155,29 +156,37 @@
   mark_bitmap_.reset(temp_live_bitmap);
 }
 
-Object* AllocSpace::AllocWithoutGrowth(size_t num_bytes) {
+Object* AllocSpace::AllocWithoutGrowthLocked(size_t num_bytes) {
   Object* result = reinterpret_cast<Object*>(mspace_calloc(mspace_, 1, num_bytes));
 #if DEBUG_SPACES
   if (result != NULL) {
     CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
-        << ") not in bounds of heap " << *this;
+        << ") not in bounds of allocation space " << *this;
   }
 #endif
   return result;
 }
 
+Object* AllocSpace::AllocWithoutGrowth(size_t num_bytes) {
+  MutexLock mu(lock_);
+  return AllocWithoutGrowthLocked(num_bytes);
+}
+
 Object* AllocSpace::AllocWithGrowth(size_t num_bytes) {
+  MutexLock mu(lock_);
   // Grow as much as possible within the mspace.
   size_t max_allowed = Capacity();
   mspace_set_footprint_limit(mspace_, max_allowed);
   // Try the allocation.
-  void* ptr = AllocWithoutGrowth(num_bytes);
+  void* ptr = AllocWithoutGrowthLocked(num_bytes);
   // Shrink back down as small as possible.
   size_t footprint = mspace_footprint(mspace_);
   mspace_set_footprint_limit(mspace_, footprint);
   // Return the new allocation or NULL.
   Object* result = reinterpret_cast<Object*>(ptr);
+#if DEBUG_SPACES
   CHECK(result == NULL || Contains(result));
+#endif
   return result;
 }
 
@@ -228,6 +237,7 @@
 }
 
 void AllocSpace::Free(Object* ptr) {
+  MutexLock mu(lock_);
 #if DEBUG_SPACES
   CHECK(ptr != NULL);
   CHECK(Contains(ptr)) << "Free (" << ptr << ") not in bounds of heap " << *this;
@@ -236,6 +246,7 @@
 }
 
 void AllocSpace::FreeList(size_t num_ptrs, Object** ptrs) {
+  MutexLock mu(lock_);
 #if DEBUG_SPACES
   CHECK(ptrs != NULL);
   size_t num_broken_ptrs = 0;
@@ -275,6 +286,7 @@
 }
 
 void* AllocSpace::MoreCore(intptr_t increment) {
+  lock_.AssertHeld();
   byte* original_end = end_;
   if (increment != 0) {
     VLOG(heap) << "AllocSpace::MoreCore " << PrettySize(increment);
@@ -330,6 +342,7 @@
 }
 
 void AllocSpace::Trim() {
+  MutexLock mu(lock_);
   // Trim to release memory at the end of the space.
   mspace_trim(mspace_, 0);
   // Visit space looking for page-sized holes to advise the kernel we don't need.
@@ -338,14 +351,17 @@
 
 void AllocSpace::Walk(void(*callback)(void *start, void *end, size_t num_bytes, void* callback_arg),
                       void* arg) {
+  MutexLock mu(lock_);
   mspace_inspect_all(mspace_, callback, arg);
 }
 
 size_t AllocSpace::GetFootprintLimit() {
+  MutexLock mu(lock_);
   return mspace_footprint_limit(mspace_);
 }
 
 void AllocSpace::SetFootprintLimit(size_t new_size) {
+  MutexLock mu(lock_);
   VLOG(heap) << "AllocSpace::SetFootprintLimit " << PrettySize(new_size);
   // Compare against the actual footprint, rather than the Size(), because the heap may not have
   // grown all the way to the allowed size yet.
diff --git a/src/space.h b/src/space.h
index 1aeb33e..6ab3302 100644
--- a/src/space.h
+++ b/src/space.h
@@ -52,7 +52,8 @@
                                       byte* requested_begin);
 
   // create a Space from an image file. cannot be used for future allocation or collected.
-  static ImageSpace* CreateImageSpace(const std::string& image);
+  static ImageSpace* CreateImageSpace(const std::string& image)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   virtual ~Space() {}
 
@@ -122,7 +123,8 @@
   }
 
  protected:
-  Space(const std::string& name, MemMap* mem_map, byte* begin, byte* end, GcRetentionPolicy gc_retention_policy)
+  Space(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
+        GcRetentionPolicy gc_retention_policy)
       : name_(name),
         mem_map_(mem_map),
         begin_(begin),
@@ -229,6 +231,8 @@
   AllocSpace* CreateZygoteSpace();
 
  private:
+  Object* AllocWithoutGrowthLocked(size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
   friend class Space;
 
   UniquePtr<SpaceBitmap> live_bitmap_;
@@ -245,6 +249,9 @@
   // The boundary tag overhead.
   static const size_t kChunkOverhead = kWordSize;
 
+  // Used to ensure mutual exclusion when the allocation spaces data structures are being modified.
+  Mutex lock_;
+
   // Underlying malloc space
   void* const mspace_;
 
@@ -272,7 +279,8 @@
   }
 
   // Mark the objects defined in this space in the given live bitmap
-  void RecordImageAllocations(SpaceBitmap* live_bitmap) const;
+  void RecordImageAllocations(SpaceBitmap* live_bitmap) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   virtual bool IsAllocSpace() const {
     return false;
diff --git a/src/space_bitmap.cc b/src/space_bitmap.cc
index 7da8146..438237d 100644
--- a/src/space_bitmap.cc
+++ b/src/space_bitmap.cc
@@ -225,7 +225,8 @@
 // Walk instance fields of the given Class. Separate function to allow recursion on the super
 // class.
 static void WalkInstanceFields(SpaceBitmap* visited, SpaceBitmap::Callback* callback, Object* obj,
-                               Class* klass, void* arg) {
+                               Class* klass, void* arg)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   // Visit fields of parent classes first.
   Class* super = klass->GetSuperClass();
   if (super != NULL) {
@@ -249,7 +250,8 @@
 
 // For an unvisited object, visit it then all its children found via fields.
 static void WalkFieldsInOrder(SpaceBitmap* visited, SpaceBitmap::Callback* callback, Object* obj,
-                              void* arg) {
+                              void* arg)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   if (visited->Test(obj)) {
     return;
   }
diff --git a/src/space_bitmap.h b/src/space_bitmap.h
index adf1996..bbf60f3 100644
--- a/src/space_bitmap.h
+++ b/src/space_bitmap.h
@@ -111,7 +111,8 @@
   }
 
   template <typename Visitor>
-  void VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end, const Visitor& visitor) const {
+  void VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end, const Visitor& visitor) const
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::heap_bitmap_lock_) {
     DCHECK_LT(visit_begin, visit_end);
 
     const size_t bit_index_start = (visit_begin - heap_begin_) / kAlignment;
@@ -177,7 +178,8 @@
 
   void Walk(Callback* callback, void* arg);
 
-  void InOrderWalk(Callback* callback, void* arg);
+  void InOrderWalk(Callback* callback, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void ScanWalk(uintptr_t base, uintptr_t max, ScanCallback* thunk, void* arg);
 
diff --git a/src/stack.cc b/src/stack.cc
index dd319bc..9795a77 100644
--- a/src/stack.cc
+++ b/src/stack.cc
@@ -24,37 +24,6 @@
 
 namespace art {
 
-class StackGetter {
- public:
-  StackGetter(const ScopedJniThreadState& ts, Thread* thread)
-      : ts_(ts), thread_(thread), trace_(NULL) {
-  }
-
-  static void Callback(void* arg) {
-    reinterpret_cast<StackGetter*>(arg)->Callback();
-  }
-
-  jobject GetTrace() {
-    return trace_;
-  }
-
- private:
-  void Callback() {
-    trace_ = thread_->CreateInternalStackTrace(ts_);
-  }
-
-  const ScopedJniThreadState& ts_;
-  Thread* const thread_;
-  jobject trace_;
-};
-
-jobject GetThreadStack(const ScopedJniThreadState& ts, Thread* thread) {
-  ThreadList* thread_list = Runtime::Current()->GetThreadList();
-  StackGetter stack_getter(ts, thread);
-  thread_list->RunWhileSuspended(thread, StackGetter::Callback, &stack_getter);
-  return stack_getter.GetTrace();
-}
-
 void ManagedStack::PushManagedStackFragment(ManagedStack* fragment) {
   // Copy this top fragment into given fragment.
   memcpy(fragment, this, sizeof(ManagedStack));
@@ -201,7 +170,7 @@
   return visitor.frames;
 }
 
-void StackVisitor::SanityCheckFrame() {
+void StackVisitor::SanityCheckFrame() const {
 #ifndef NDEBUG
   Method* method = GetMethod();
   CHECK(method->GetClass() == Method::GetMethodClass() ||
diff --git a/src/stack.h b/src/stack.h
index fb0bc48..578c631 100644
--- a/src/stack.h
+++ b/src/stack.h
@@ -32,11 +32,9 @@
 class Object;
 class ShadowFrame;
 class StackIndirectReferenceTable;
-class ScopedJniThreadState;
+class ScopedObjectAccess;
 class Thread;
 
-jobject GetThreadStack(const ScopedJniThreadState&, Thread*);
-
 class ShadowFrame {
  public:
   // Number of references contained within this shadow frame
@@ -217,6 +215,7 @@
  protected:
   StackVisitor(const ManagedStack* stack, const std::vector<TraceStackFrame>* trace_stack,
                Context* context)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : stack_start_(stack), trace_stack_(trace_stack), cur_shadow_frame_(NULL),
         cur_quick_frame_(NULL), cur_quick_frame_pc_(0), num_frames_(0), cur_depth_(0),
         context_(context) {}
@@ -225,9 +224,10 @@
   virtual ~StackVisitor() {}
 
   // Return 'true' if we should continue to visit more frames, 'false' to stop.
-  virtual bool VisitFrame() = 0;
+  virtual bool VisitFrame() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) = 0;
 
-  void WalkStack(bool include_transitions = false);
+  void WalkStack(bool include_transitions = false)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   Method* GetMethod() const {
     if (cur_shadow_frame_ != NULL) {
@@ -255,28 +255,30 @@
     return *reinterpret_cast<uintptr_t*>(save_addr);
   }
 
-  uint32_t GetDexPc() const;
+  uint32_t GetDexPc() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Returns the height of the stack in the managed stack frames, including transitions.
-  size_t GetFrameHeight() {
+  size_t GetFrameHeight() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetNumFrames() - cur_depth_;
   }
 
   // Returns a frame ID for JDWP use, starting from 1.
-  size_t GetFrameId() {
+  size_t GetFrameId() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return GetFrameHeight() + 1;
   }
 
-  size_t GetNumFrames() {
+  size_t GetNumFrames() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (num_frames_ == 0) {
       num_frames_ = ComputeNumFrames();
     }
     return num_frames_;
   }
 
-  uint32_t GetVReg(Method* m, int vreg) const;
+  uint32_t GetVReg(Method* m, int vreg) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void SetVReg(Method* m, int vreg, uint32_t new_value);
+  void SetVReg(Method* m, int vreg, uint32_t new_value)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   uintptr_t GetGPR(uint32_t reg) const;
 
@@ -368,13 +370,13 @@
   }
 
  private:
-  size_t ComputeNumFrames() const;
+  size_t ComputeNumFrames() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   TraceStackFrame GetTraceStackFrame(uint32_t depth) const {
     return trace_stack_->at(trace_stack_->size() - depth - 1);
   }
 
-  void SanityCheckFrame();
+  void SanityCheckFrame() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   const ManagedStack* const stack_start_;
   const std::vector<TraceStackFrame>* const trace_stack_;
diff --git a/src/thread.cc b/src/thread.cc
index e5ade4d..3bae0af 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -40,7 +40,7 @@
 #include "reflection.h"
 #include "runtime.h"
 #include "runtime_support.h"
-#include "scoped_jni_thread_state.h"
+#include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "space.h"
 #include "stack.h"
@@ -53,6 +53,7 @@
 namespace art {
 
 pthread_key_t Thread::pthread_key_self_;
+ConditionVariable* Thread::resume_cond_;
 
 static const char* kThreadNameDuringStartup = "<native thread without managed peer>";
 
@@ -101,15 +102,10 @@
   Thread* self = reinterpret_cast<Thread*>(arg);
   self->Init();
 
-  // Wait until it's safe to start running code. (There may have been a suspend-all
-  // in progress while we were starting up.)
-  Runtime* runtime = Runtime::Current();
-  runtime->GetThreadList()->WaitForGo();
-
   {
-    ScopedJniThreadState ts(self);
+    ScopedObjectAccess soa(self);
     {
-      SirtRef<String> thread_name(self->GetThreadName(ts));
+      SirtRef<String> thread_name(self->GetThreadName(soa));
       self->SetThreadName(thread_name->ToModifiedUtf8().c_str());
     }
 
@@ -119,29 +115,37 @@
     CHECK(self->peer_ != NULL);
     Object* receiver = self->peer_;
     jmethodID mid = WellKnownClasses::java_lang_Thread_run;
-    Method* m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(ts.DecodeMethod(mid));
+    Method* m = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
     m->Invoke(self, receiver, NULL, NULL);
   }
 
-  // Detach.
-  runtime->GetThreadList()->Unregister();
+  // Detach and delete self.
+  Runtime::Current()->GetThreadList()->Unregister(self);
 
   return NULL;
 }
 
-static void SetVmData(const ScopedJniThreadState& ts, Object* managed_thread,
-                      Thread* native_thread) {
-  Field* f = ts.DecodeField(WellKnownClasses::java_lang_Thread_vmData);
+static void SetVmData(const ScopedObjectAccess& soa, Object* managed_thread,
+                      Thread* native_thread)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+  Field* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_vmData);
   f->SetInt(managed_thread, reinterpret_cast<uintptr_t>(native_thread));
 }
 
-Thread* Thread::FromManagedThread(const ScopedJniThreadState& ts, Object* thread_peer) {
-  Field* f = ts.DecodeField(WellKnownClasses::java_lang_Thread_vmData);
-  return reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetInt(thread_peer)));
+Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa, Object* thread_peer) {
+  Field* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_vmData);
+  Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetInt(thread_peer)));
+  // Sanity check that if we have a result it is either suspended or we hold the thread_list_lock_
+  // to stop it from going away.
+  MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+  if (result != NULL && !result->IsSuspended()) {
+    GlobalSynchronization::thread_list_lock_->AssertHeld();
+  }
+  return result;
 }
 
-Thread* Thread::FromManagedThread(const ScopedJniThreadState& ts, jobject java_thread) {
-  return FromManagedThread(ts, ts.Decode<Object*>(java_thread));
+Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa, jobject java_thread) {
+  return FromManagedThread(soa, soa.Decode<Object*>(java_thread));
 }
 
 static size_t FixStackSize(size_t stack_size) {
@@ -210,42 +214,38 @@
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool daemon) {
   Thread* native_thread = new Thread(daemon);
   {
-    ScopedJniThreadState ts(env);
-    Object* peer = ts.Decode<Object*>(java_peer);
+    ScopedObjectAccess soa(env);
+    Object* peer = soa.Decode<Object*>(java_peer);
     CHECK(peer != NULL);
     native_thread->peer_ = peer;
 
     stack_size = FixStackSize(stack_size);
 
-    // Thread.start is synchronized, so we know that vmData is 0,
-    // and know that we're not racing to assign it.
-    SetVmData(ts, peer, native_thread);
-
-    int pthread_create_result = 0;
-    {
-      ScopedThreadStateChange tsc(Thread::Current(), kVmWait);
-      pthread_t new_pthread;
-      pthread_attr_t attr;
-      CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), "new thread");
-      CHECK_PTHREAD_CALL(pthread_attr_setdetachstate, (&attr, PTHREAD_CREATE_DETACHED), "PTHREAD_CREATE_DETACHED");
-      CHECK_PTHREAD_CALL(pthread_attr_setstacksize, (&attr, stack_size), stack_size);
-      pthread_create_result = pthread_create(&new_pthread, &attr, Thread::CreateCallback, native_thread);
-      CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attr), "new thread");
-    }
-
-    if (pthread_create_result != 0) {
-      // pthread_create(3) failed, so clean up.
-      SetVmData(ts, peer, 0);
-      delete native_thread;
-
-      std::string msg(StringPrintf("pthread_create (%s stack) failed: %s",
-                                   PrettySize(stack_size).c_str(), strerror(pthread_create_result)));
-      Thread::Current()->ThrowOutOfMemoryError(msg.c_str());
-      return;
-    }
+    // Thread.start is synchronized, so we know that vmData is 0, and know that we're not racing to
+    // assign it.
+    SetVmData(soa, peer, native_thread);
   }
-  // Let the child know when it's safe to start running.
-  Runtime::Current()->GetThreadList()->SignalGo(native_thread);
+
+  pthread_t new_pthread;
+  pthread_attr_t attr;
+  CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), "new thread");
+  CHECK_PTHREAD_CALL(pthread_attr_setdetachstate, (&attr, PTHREAD_CREATE_DETACHED), "PTHREAD_CREATE_DETACHED");
+  CHECK_PTHREAD_CALL(pthread_attr_setstacksize, (&attr, stack_size), stack_size);
+  int pthread_create_result = pthread_create(&new_pthread, &attr, Thread::CreateCallback, native_thread);
+  CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attr), "new thread");
+
+  if (UNLIKELY(pthread_create_result != 0)) {
+    // pthread_create(3) failed, so clean up.
+    ScopedObjectAccess soa(env);
+    Object* peer = soa.Decode<Object*>(java_peer);
+    SetVmData(soa, peer, 0);
+    delete native_thread;
+
+    std::string msg(StringPrintf("pthread_create (%s stack) failed: %s",
+                                 PrettySize(stack_size).c_str(), strerror(pthread_create_result)));
+    Thread::Current()->ThrowOutOfMemoryError(msg.c_str());
+    return;
+  }
 }
 
 void Thread::Init() {
@@ -262,7 +262,9 @@
 
   Runtime* runtime = Runtime::Current();
   CHECK(runtime != NULL);
-
+  if (runtime->IsShuttingDown()) {
+    UNIMPLEMENTED(WARNING) << "Thread attaching whilst runtime is shutting down";
+  }
   thin_lock_id_ = runtime->GetThreadList()->AllocThreadId();
   pthread_self_ = pthread_self();
 
@@ -273,14 +275,18 @@
 
   jni_env_ = new JNIEnvExt(this, runtime->GetJavaVM());
 
-  runtime->GetThreadList()->Register();
+  runtime->GetThreadList()->Register(this);
 }
 
 Thread* Thread::Attach(const char* thread_name, bool as_daemon, jobject thread_group) {
   Thread* self = new Thread(as_daemon);
   self->Init();
 
-  self->SetState(kNative);
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_NE(self->GetState(), kRunnable);
+    self->SetState(kNative);
+  }
 
   // If we're the main thread, ClassLinker won't be created until after we're attached,
   // so that thread needs a two-stage attach. Regular threads don't need this hack.
@@ -313,30 +319,33 @@
   jboolean thread_is_daemon = as_daemon;
 
   ScopedLocalRef<jobject> peer(env, env->AllocObject(WellKnownClasses::java_lang_Thread));
-  peer_ = DecodeJObject(peer.get());
-  if (peer_ == NULL) {
-    CHECK(IsExceptionPending());
-    return;
+  {
+    ScopedObjectAccess soa(env);
+    peer_ = DecodeJObject(peer.get());
+    if (peer_ == NULL) {
+      CHECK(IsExceptionPending());
+      return;
+    }
   }
   env->CallNonvirtualVoidMethod(peer.get(),
                                 WellKnownClasses::java_lang_Thread,
                                 WellKnownClasses::java_lang_Thread_init,
                                 thread_group, thread_name.get(), thread_priority, thread_is_daemon);
-  CHECK(!IsExceptionPending()) << " " << PrettyTypeOf(GetException());
+  AssertNoPendingException();
 
-  ScopedJniThreadState ts(this);
-  SetVmData(ts, peer_, Thread::Current());
-  SirtRef<String> peer_thread_name(GetThreadName(ts));
+  ScopedObjectAccess soa(this);
+  SetVmData(soa, peer_, Thread::Current());
+  SirtRef<String> peer_thread_name(GetThreadName(soa));
   if (peer_thread_name.get() == NULL) {
     // The Thread constructor should have set the Thread.name to a
     // non-null value. However, because we can run without code
     // available (in the compiler, in tests), we manually assign the
     // fields the constructor should have set.
-    ts.DecodeField(WellKnownClasses::java_lang_Thread_daemon)->SetBoolean(peer_, thread_is_daemon);
-    ts.DecodeField(WellKnownClasses::java_lang_Thread_group)->SetObject(peer_, ts.Decode<Object*>(thread_group));
-    ts.DecodeField(WellKnownClasses::java_lang_Thread_name)->SetObject(peer_, ts.Decode<Object*>(thread_name.get()));
-    ts.DecodeField(WellKnownClasses::java_lang_Thread_priority)->SetInt(peer_, thread_priority);
-    peer_thread_name.reset(GetThreadName(ts));
+    soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)->SetBoolean(peer_, thread_is_daemon);
+    soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->SetObject(peer_, soa.Decode<Object*>(thread_group));
+    soa.DecodeField(WellKnownClasses::java_lang_Thread_name)->SetObject(peer_, soa.Decode<Object*>(thread_name.get()));
+    soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)->SetInt(peer_, thread_priority);
+    peer_thread_name.reset(GetThreadName(soa));
   }
   // 'thread_name' may have been null, so don't trust 'peer_thread_name' to be non-null.
   if (peer_thread_name.get() != NULL) {
@@ -403,27 +412,27 @@
   CHECK_GT(&stack_variable, reinterpret_cast<void*>(stack_end_));
 }
 
-void Thread::Dump(std::ostream& os, bool full) const {
-  if (full) {
-    DumpState(os);
-    DumpStack(os);
-  } else {
-    os << "Thread[";
-    if (GetThinLockId() != 0) {
-      // If we're in kStarting, we won't have a thin lock id or tid yet.
-      os << GetThinLockId()
-         << ",tid=" << GetTid() << ',';
-    }
-    os << GetState()
-       << ",Thread*=" << this
-       << ",peer=" << peer_
-       << ",\"" << *name_ << "\""
-       << "]";
+void Thread::ShortDump(std::ostream& os) const {
+  os << "Thread[";
+  if (GetThinLockId() != 0) {
+    // If we're in kStarting, we won't have a thin lock id or tid yet.
+    os << GetThinLockId()
+             << ",tid=" << GetTid() << ',';
   }
+  os << GetStateUnsafe()
+           << ",Thread*=" << this
+           << ",peer=" << peer_
+           << ",\"" << *name_ << "\""
+           << "]";
 }
 
-String* Thread::GetThreadName(const ScopedJniThreadState& ts) const {
-  Field* f = ts.DecodeField(WellKnownClasses::java_lang_Thread_name);
+void Thread::Dump(std::ostream& os) const {
+  DumpState(os);
+  DumpStack(os);
+}
+
+String* Thread::GetThreadName(const ScopedObjectAccessUnchecked& soa) const {
+  Field* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
   return (peer_ != NULL) ? reinterpret_cast<String*>(f->GetObject(peer_)) : NULL;
 }
 
@@ -431,19 +440,177 @@
   name.assign(*name_);
 }
 
+// Attempt to rectify locks so that we dump thread list with required locks before exiting.
+static void UnsafeLogFatalForSuspendCount(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
+  GlobalSynchronization::thread_suspend_count_lock_->Unlock();
+  GlobalSynchronization::mutator_lock_->SharedTryLock();
+  if (!GlobalSynchronization::mutator_lock_->IsSharedHeld()) {
+    LOG(WARNING) << "Dumping thread list without holding mutator_lock_";
+  }
+  GlobalSynchronization::thread_list_lock_->TryLock();
+  if (!GlobalSynchronization::thread_list_lock_->IsExclusiveHeld()) {
+    LOG(WARNING) << "Dumping thread list without holding thread_list_lock_";
+  }
+  std::ostringstream ss;
+  Runtime::Current()->GetThreadList()->DumpLocked(ss);
+  LOG(FATAL) << self << " suspend count already zero.\n" << ss.str();
+}
+
+void Thread::ModifySuspendCount(int delta, bool for_debugger) {
+  DCHECK(delta == -1 || delta == +1 || delta == -debug_suspend_count_)
+      << delta << " " << debug_suspend_count_ << " " << this;
+  DCHECK_GE(suspend_count_, debug_suspend_count_) << this;
+  GlobalSynchronization::thread_suspend_count_lock_->AssertHeld();
+
+  if (delta == -1 && suspend_count_ <= 0) {
+    // This is expected if you attach a thread during a GC.
+    if (UNLIKELY(!IsStillStarting())) {
+      UnsafeLogFatalForSuspendCount(this);
+    }
+    return;
+  }
+  suspend_count_ += delta;
+  if (for_debugger) {
+    debug_suspend_count_ += delta;
+  }
+}
+
+void Thread::FullSuspendCheck() {
+  VLOG(threads) << this << " self-suspending";
+  // Make thread appear suspended to other threads, release mutator_lock_.
+  TransitionFromRunnableToSuspended(kSuspended);
+  // Transition back to runnable noting requests to suspend, re-acquire share on mutator_lock_.
+  TransitionFromSuspendedToRunnable();
+  VLOG(threads) << this << " self-reviving";
+}
+
+void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) {
+  AssertThreadSuspensionIsAllowable();
+  CHECK_NE(new_state, kRunnable);
+  CHECK_EQ(this, Thread::Current());
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_EQ(GetState(), kRunnable);
+    SetState(new_state);
+  }
+  // Release share on mutator_lock_.
+  GlobalSynchronization::mutator_lock_->SharedUnlock();
+}
+
+ThreadState Thread::TransitionFromSuspendedToRunnable() {
+  bool done = false;
+  ThreadState old_state;
+  do {
+    {
+      // Wait while our suspend count is non-zero.
+      MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+      old_state = GetState();
+      CHECK_NE(old_state, kRunnable);
+      GlobalSynchronization::mutator_lock_->AssertNotHeld();  // Otherwise we starve GC..
+      while (GetSuspendCount() != 0) {
+        // Re-check when Thread::resume_cond_ is notified.
+        Thread::resume_cond_->Wait(*GlobalSynchronization::thread_suspend_count_lock_);
+      }
+    }
+    // Re-acquire shared mutator_lock_ access.
+    GlobalSynchronization::mutator_lock_->SharedLock();
+    // Holding the mutator_lock_, synchronize with any thread trying to raise the suspend count
+    // and change state to Runnable if no suspend is pending.
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    if (GetSuspendCount() == 0) {
+      SetState(kRunnable);
+      done = true;
+    } else {
+      // Release shared mutator_lock_ access and try again.
+      GlobalSynchronization::mutator_lock_->SharedUnlock();
+    }
+  } while (!done);
+  return old_state;
+}
+
+Thread* Thread::SuspendForDebugger(jobject peer, bool request_suspension, bool* timeout) {
+  static const useconds_t kTimeoutUs = 30 * 1000000; // 30s.
+  useconds_t total_delay_us = 0;
+  useconds_t delay_us = 0;
+  bool did_suspend_request = false;
+  *timeout = false;
+  while (true) {
+    Thread* thread;
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+      thread = Thread::FromManagedThread(soa, peer);
+      if (thread == NULL) {
+        LOG(WARNING) << "No such thread for suspend: " << peer;
+        return NULL;
+      }
+      {
+        MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+        if (request_suspension) {
+          thread->ModifySuspendCount(+1, true /* for_debugger */);
+          request_suspension = false;
+          did_suspend_request = true;
+        }
+        // IsSuspended on the current thread will fail as the current thread is changed into
+        // Runnable above. As the suspend count is now raised if this is the current thread
+        // it will self suspend on transition to Runnable, making it hard to work with. Its simpler
+        // to just explicitly handle the current thread in the callers to this code.
+        CHECK_NE(thread, soa.Self()) << "Attempt to suspend for debugger the current thread";
+        // If thread is suspended (perhaps it was already not Runnable but didn't have a suspend
+        // count, or else we've waited and it has self suspended) or is the current thread, we're
+        // done.
+        if (thread->IsSuspended()) {
+          return thread;
+        }
+        if (total_delay_us >= kTimeoutUs) {
+          LOG(ERROR) << "Thread suspension timed out: " << peer;
+          if (did_suspend_request) {
+            thread->ModifySuspendCount(-1, true /* for_debugger */);
+          }
+          *timeout = true;
+          return NULL;
+        }
+      }
+      // Release locks and come out of runnable state.
+    }
+    for (int i = kMaxMutexLevel; i >= 0; --i) {
+      BaseMutex* held_mutex = Thread::Current()->GetHeldMutex(static_cast<MutexLevel>(i));
+      if (held_mutex != NULL) {
+        LOG(FATAL) << "Holding " << held_mutex->GetName()
+            << " while sleeping for thread suspension";
+      }
+    }
+    {
+      useconds_t new_delay_us = delay_us * 2;
+      CHECK_GE(new_delay_us, delay_us);
+      if (new_delay_us < 500000) {  // Don't allow sleeping to be more than 0.5s.
+        delay_us = new_delay_us;
+      }
+    }
+    if (delay_us == 0) {
+      sched_yield();
+      // Default to 1 milliseconds (note that this gets multiplied by 2 before the first sleep).
+      delay_us = 500;
+    } else {
+      usleep(delay_us);
+      total_delay_us += delay_us;
+    }
+  }
+}
+
 void Thread::DumpState(std::ostream& os, const Thread* thread, pid_t tid) {
   std::string group_name;
   int priority;
   bool is_daemon = false;
 
   if (thread != NULL && thread->peer_ != NULL) {
-    ScopedJniThreadState ts(Thread::Current());
-    priority = ts.DecodeField(WellKnownClasses::java_lang_Thread_priority)->GetInt(thread->peer_);
-    is_daemon = ts.DecodeField(WellKnownClasses::java_lang_Thread_daemon)->GetBoolean(thread->peer_);
+    ScopedObjectAccess soa(Thread::Current());
+    priority = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)->GetInt(thread->peer_);
+    is_daemon = soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)->GetBoolean(thread->peer_);
 
-    Object* thread_group = thread->GetThreadGroup(ts);
+    Object* thread_group = thread->GetThreadGroup(soa);
     if (thread_group != NULL) {
-      Field* group_name_field = ts.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
+      Field* group_name_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
       String* group_name_string = reinterpret_cast<String*>(group_name_field->GetObject(thread_group));
       group_name = (group_name_string != NULL) ? group_name_string->ToModifiedUtf8() : "<null>";
     }
@@ -461,6 +628,7 @@
     if (is_daemon) {
       os << " daemon";
     }
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
     os << " prio=" << priority
        << " tid=" << thread->GetThinLockId()
        << " " << thread->GetState() << "\n";
@@ -471,6 +639,7 @@
   }
 
   if (thread != NULL) {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
     os << "  | group=\"" << group_name << "\""
        << " sCount=" << thread->suspend_count_
        << " dsCount=" << thread->debug_suspend_count_
@@ -520,6 +689,7 @@
 
 struct StackDumpVisitor : public StackVisitor {
   StackDumpVisitor(std::ostream& os, const Thread* thread, Context* context, bool can_allocate)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : StackVisitor(thread->GetManagedStack(), thread->GetTraceStack(), context),
         os(os), thread(thread), can_allocate(can_allocate),
         last_method(NULL), last_line_number(0), repetition_count(0), frame_count(0) {
@@ -531,7 +701,7 @@
     }
   }
 
-  bool VisitFrame() {
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* m = GetMethod();
     if (m->IsRuntimeMethod()) {
       return true;
@@ -589,7 +759,12 @@
 
 void Thread::DumpStack(std::ostream& os) const {
   // If we're currently in native code, dump that stack before dumping the managed stack.
-  if (GetState() == kNative || GetState() == kVmWait) {
+  ThreadState state;
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    state = GetState();
+  }
+  if (state == kNative) {
     DumpKernelStack(os, GetTid(), "  kernel: ", false);
     DumpNativeStack(os, GetTid(), "  native: ", false);
   }
@@ -598,148 +773,6 @@
   dumper.WalkStack();
 }
 
-void Thread::SetStateWithoutSuspendCheck(ThreadState new_state) {
-  DCHECK_EQ(this, Thread::Current());
-  volatile void* raw = reinterpret_cast<volatile void*>(&state_);
-  volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw);
-  android_atomic_release_store(new_state, addr);
-}
-
-ThreadState Thread::SetState(ThreadState new_state) {
-  if (new_state != kVmWait && new_state != kTerminated) {
-    // TODO: kVmWait is set by the parent thread to a child thread to indicate it can go. Similarly
-    // kTerminated may be set by a parent thread to its child if pthread creation fails.  This
-    // overloaded use of the state variable means we cannot fully assert that only threads
-    // themselves modify their state.
-    DCHECK_EQ(this, Thread::Current());
-  }
-  ThreadState old_state = state_;
-  if (old_state == kRunnable) {
-    // Non-runnable states are points where we expect thread suspension can occur.
-    AssertThreadSuspensionIsAllowable();
-  }
-
-  if (old_state == new_state) {
-    return old_state;
-  }
-
-  volatile void* raw = reinterpret_cast<volatile void*>(&state_);
-  volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw);
-
-  if (new_state == kRunnable) {
-    /*
-     * Change our status to kRunnable.  The transition requires
-     * that we check for pending suspension, because the runtime considers
-     * us to be "asleep" in all other states, and another thread could
-     * be performing a GC now.
-     *
-     * The order of operations is very significant here.  One way to
-     * do this wrong is:
-     *
-     *   GCing thread                   Our thread (in kNative)
-     *   ------------                   ----------------------
-     *                                  check suspend count (== 0)
-     *   SuspendAllThreads()
-     *   grab suspend-count lock
-     *   increment all suspend counts
-     *   release suspend-count lock
-     *   check thread state (== kNative)
-     *   all are suspended, begin GC
-     *                                  set state to kRunnable
-     *                                  (continue executing)
-     *
-     * We can correct this by grabbing the suspend-count lock and
-     * performing both of our operations (check suspend count, set
-     * state) while holding it, now we need to grab a mutex on every
-     * transition to kRunnable.
-     *
-     * What we do instead is change the order of operations so that
-     * the transition to kRunnable happens first.  If we then detect
-     * that the suspend count is nonzero, we switch to kSuspended.
-     *
-     * Appropriate compiler and memory barriers are required to ensure
-     * that the operations are observed in the expected order.
-     *
-     * This does create a small window of opportunity where a GC in
-     * progress could observe what appears to be a running thread (if
-     * it happens to look between when we set to kRunnable and when we
-     * switch to kSuspended).  At worst this only affects assertions
-     * and thread logging.  (We could work around it with some sort
-     * of intermediate "pre-running" state that is generally treated
-     * as equivalent to running, but that doesn't seem worthwhile.)
-     *
-     * We can also solve this by combining the "status" and "suspend
-     * count" fields into a single 32-bit value.  This trades the
-     * store/load barrier on transition to kRunnable for an atomic RMW
-     * op on all transitions and all suspend count updates (also, all
-     * accesses to status or the thread count require bit-fiddling).
-     * It also eliminates the brief transition through kRunnable when
-     * the thread is supposed to be suspended.  This is possibly faster
-     * on SMP and slightly more correct, but less convenient.
-     */
-    AssertThreadSuspensionIsAllowable();
-    android_atomic_acquire_store(new_state, addr);
-    ANNOTATE_IGNORE_READS_BEGIN();
-    int suspend_count = suspend_count_;
-    ANNOTATE_IGNORE_READS_END();
-    if (suspend_count != 0) {
-      Runtime::Current()->GetThreadList()->FullSuspendCheck(this);
-    }
-  } else {
-    /*
-     * Not changing to kRunnable. No additional work required.
-     *
-     * We use a releasing store to ensure that, if we were runnable,
-     * any updates we previously made to objects on the managed heap
-     * will be observed before the state change.
-     */
-    android_atomic_release_store(new_state, addr);
-  }
-
-  return old_state;
-}
-
-bool Thread::IsSuspended() {
-  ANNOTATE_IGNORE_READS_BEGIN();
-  int suspend_count = suspend_count_;
-  ANNOTATE_IGNORE_READS_END();
-  return suspend_count != 0 && GetState() != kRunnable;
-}
-
-static void ReportThreadSuspendTimeout(Thread* waiting_thread) {
-  Runtime* runtime = Runtime::Current();
-  std::ostringstream ss;
-  ss << "Thread suspend timeout waiting for thread " << *waiting_thread << "\n";
-  runtime->DumpLockHolders(ss);
-  ss << "\n";
-  runtime->GetThreadList()->DumpLocked(ss);
-  LOG(FATAL) << ss.str();
-}
-
-void Thread::WaitUntilSuspended() {
-  static const useconds_t kTimeoutUs = 30 * 1000000; // 30s.
-
-  useconds_t total_delay = 0;
-  useconds_t delay = 0;
-  while (GetState() == kRunnable) {
-    if (total_delay >= kTimeoutUs) {
-      ReportThreadSuspendTimeout(this);
-    }
-    useconds_t new_delay = delay * 2;
-    CHECK_GE(new_delay, delay);
-    delay = new_delay;
-    if (delay == 0) {
-      sched_yield();
-      // Default to 1 milliseconds (note that this gets multiplied by 2 before
-      // the first sleep)
-      delay = 500;
-    } else {
-      usleep(delay);
-      total_delay += delay;
-    }
-  }
-}
-
 void Thread::ThreadExitCallback(void* arg) {
   Thread* self = reinterpret_cast<Thread*>(arg);
   if (self->thread_exit_check_count_ == 0) {
@@ -752,6 +785,11 @@
 }
 
 void Thread::Startup() {
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);  // Keep GCC happy.
+    resume_cond_ = new ConditionVariable("Thread resumption condition variable");
+  }
+
   // Allocate a TLS slot.
   CHECK_PTHREAD_CALL(pthread_key_create, (&Thread::pthread_key_self_, Thread::ThreadExitCallback), "self key");
 
@@ -764,13 +802,11 @@
 void Thread::FinishStartup() {
   Runtime* runtime = Runtime::Current();
   CHECK(runtime->IsStarted());
-  Thread* self = Thread::Current();
 
   // Finish attaching the main thread.
-  ScopedThreadStateChange tsc(self, kRunnable);
+  ScopedObjectAccess soa(Thread::Current());
   Thread::Current()->CreatePeer("main", false, runtime->GetMainThreadGroup());
 
-  InitBoxingMethods();
   Runtime::Current()->GetClassLinker()->RunRootClinits();
 }
 
@@ -808,6 +844,7 @@
       trace_stack_(new std::vector<TraceStackFrame>),
       name_(new std::string(kThreadNameDuringStartup)),
       daemon_(daemon),
+      pthread_self_(0),
       no_thread_suspension_(0),
       last_no_thread_suspension_cause_(NULL),
       thread_exit_check_count_(0) {
@@ -825,36 +862,49 @@
   return (*name_ == kThreadNameDuringStartup);
 }
 
-static void MonitorExitVisitor(const Object* object, void*) {
+void Thread::AssertNoPendingException() const {
+  if (UNLIKELY(IsExceptionPending())) {
+    ScopedObjectAccess soa(Thread::Current());
+    Throwable* exception = GetException();
+    LOG(FATAL) << "No pending exception expected: " << exception->Dump();
+  }
+}
+
+static void MonitorExitVisitor(const Object* object, void* arg) NO_THREAD_SAFETY_ANALYSIS {
+  Thread* self = reinterpret_cast<Thread*>(arg);
   Object* entered_monitor = const_cast<Object*>(object);
-  LOG(WARNING) << "Calling MonitorExit on object " << object << " (" << PrettyTypeOf(object) << ")"
-               << " left locked by native thread " << *Thread::Current() << " which is detaching";
-  entered_monitor->MonitorExit(Thread::Current());
+  if (self->HoldsLock(entered_monitor)) {
+    LOG(WARNING) << "Calling MonitorExit on object "
+                 << object << " (" << PrettyTypeOf(object) << ")"
+                 << " left locked by native thread "
+                 << *Thread::Current() << " which is detaching";
+    entered_monitor->MonitorExit(self);
+  }
 }
 
 void Thread::Destroy() {
   // On thread detach, all monitors entered with JNI MonitorEnter are automatically exited.
   if (jni_env_ != NULL) {
-    jni_env_->monitors.VisitRoots(MonitorExitVisitor, NULL);
+    jni_env_->monitors.VisitRoots(MonitorExitVisitor, Thread::Current());
   }
 
   if (peer_ != NULL) {
     Thread* self = this;
 
     // We may need to call user-supplied managed code.
-    ScopedJniThreadState ts(this);
+    ScopedObjectAccess soa(this);
 
-    HandleUncaughtExceptions(ts);
-    RemoveFromThreadGroup(ts);
+    HandleUncaughtExceptions(soa);
+    RemoveFromThreadGroup(soa);
 
     // this.vmData = 0;
-    SetVmData(ts, peer_, NULL);
+    SetVmData(soa, peer_, NULL);
 
     Dbg::PostThreadDeath(self);
 
     // Thread.join() is implemented as an Object.wait() on the Thread.lock
     // object. Signal anyone who is waiting.
-    Object* lock = ts.DecodeField(WellKnownClasses::java_lang_Thread_lock)->GetObject(peer_);
+    Object* lock = soa.DecodeField(WellKnownClasses::java_lang_Thread_lock)->GetObject(peer_);
     // (This conditional is only needed for tests, where Thread.lock won't have been set.)
     if (lock != NULL) {
       lock->MonitorEnter(self);
@@ -868,7 +918,11 @@
   delete jni_env_;
   jni_env_ = NULL;
 
-  SetState(kTerminated);
+  {
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_NE(GetState(), kRunnable);
+    SetState(kTerminated);
+  }
 
   delete wait_cond_;
   delete wait_mutex_;
@@ -884,7 +938,7 @@
   TearDownAlternateSignalStack();
 }
 
-void Thread::HandleUncaughtExceptions(const ScopedJniThreadState& ts) {
+void Thread::HandleUncaughtExceptions(const ScopedObjectAccess& soa) {
   if (!IsExceptionPending()) {
     return;
   }
@@ -894,15 +948,15 @@
 
   // If the thread has its own handler, use that.
   Object* handler =
-      ts.DecodeField(WellKnownClasses::java_lang_Thread_uncaughtHandler)->GetObject(peer_);
+      soa.DecodeField(WellKnownClasses::java_lang_Thread_uncaughtHandler)->GetObject(peer_);
   if (handler == NULL) {
     // Otherwise use the thread group's default handler.
-    handler = GetThreadGroup(ts);
+    handler = GetThreadGroup(soa);
   }
 
   // Call the handler.
   jmethodID mid = WellKnownClasses::java_lang_Thread$UncaughtExceptionHandler_uncaughtException;
-  Method* m = handler->GetClass()->FindVirtualMethodForVirtualOrInterface(ts.DecodeMethod(mid));
+  Method* m = handler->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
   JValue args[2];
   args[0].SetL(peer_);
   args[1].SetL(exception);
@@ -912,17 +966,17 @@
   ClearException();
 }
 
-Object* Thread::GetThreadGroup(const ScopedJniThreadState& ts) const {
-  return ts.DecodeField(WellKnownClasses::java_lang_Thread_group)->GetObject(peer_);
+Object* Thread::GetThreadGroup(const ScopedObjectAccessUnchecked& soa) const {
+  return soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->GetObject(peer_);
 }
 
-void Thread::RemoveFromThreadGroup(const ScopedJniThreadState& ts) {
+void Thread::RemoveFromThreadGroup(const ScopedObjectAccess& soa) {
   // this.group.removeThread(this);
   // group can be null if we're in the compiler or a test.
-  Object* group = GetThreadGroup(ts);
+  Object* group = GetThreadGroup(soa);
   if (group != NULL) {
     jmethodID mid = WellKnownClasses::java_lang_ThreadGroup_removeThread;
-    Method* m = group->GetClass()->FindVirtualMethodForVirtualOrInterface(ts.DecodeMethod(mid));
+    Method* m = group->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
     JValue args[1];
     args[0].SetL(peer_);
     m->Invoke(this, group, args, NULL);
@@ -1023,10 +1077,11 @@
  public:
   CountStackDepthVisitor(const ManagedStack* stack,
                          const std::vector<TraceStackFrame>* trace_stack)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : StackVisitor(stack, trace_stack, NULL),
         depth_(0), skip_depth_(0), skipping_(true) {}
 
-  bool VisitFrame() {
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     // We want to skip frames up to and including the exception's constructor.
     // Note we also skip the frame if it doesn't have a method (namely the callee
     // save frame)
@@ -1067,7 +1122,8 @@
       : StackVisitor(stack, trace_stack, NULL),
         skip_depth_(skip_depth), count_(0), dex_pc_trace_(NULL), method_trace_(NULL) {}
 
-  bool Init(int depth, const ScopedJniThreadState& ts) {
+  bool Init(int depth, const ScopedObjectAccess& soa)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     // Allocate method trace with an extra slot that will hold the PC trace
     SirtRef<ObjectArray<Object> >
       method_trace(Runtime::Current()->GetClassLinker()->AllocObjectArray<Object>(depth + 1));
@@ -1083,7 +1139,7 @@
     method_trace->Set(depth, dex_pc_trace);
     // Set the Object*s and assert that no thread suspension is now possible.
     const char* last_no_suspend_cause =
-        ts.Self()->StartAssertNoThreadSuspension("Building internal stack trace");
+        soa.Self()->StartAssertNoThreadSuspension("Building internal stack trace");
     CHECK(last_no_suspend_cause == NULL) << last_no_suspend_cause;
     method_trace_ = method_trace.get();
     dex_pc_trace_ = dex_pc_trace;
@@ -1096,7 +1152,7 @@
     }
   }
 
-  bool VisitFrame() {
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (method_trace_ == NULL || dex_pc_trace_ == NULL) {
       return true; // We're probably trying to fillInStackTrace for an OutOfMemoryError.
     }
@@ -1141,7 +1197,7 @@
   return sirt;
 }
 
-jobject Thread::CreateInternalStackTrace(const ScopedJniThreadState& ts) const {
+jobject Thread::CreateInternalStackTrace(const ScopedObjectAccess& soa) const {
   // Compute depth of stack
   CountStackDepthVisitor count_visitor(GetManagedStack(), GetTraceStack());
   count_visitor.WalkStack();
@@ -1151,19 +1207,19 @@
   // Build internal stack trace
   BuildInternalStackTraceVisitor build_trace_visitor(GetManagedStack(), GetTraceStack(),
                                                      skip_depth);
-  if (!build_trace_visitor.Init(depth, ts)) {
+  if (!build_trace_visitor.Init(depth, soa)) {
     return NULL;  // Allocation failed
   }
   build_trace_visitor.WalkStack();
-  return ts.AddLocalReference<jobjectArray>(build_trace_visitor.GetInternalStackTrace());
+  return soa.AddLocalReference<jobjectArray>(build_trace_visitor.GetInternalStackTrace());
 }
 
 jobjectArray Thread::InternalStackTraceToStackTraceElementArray(JNIEnv* env, jobject internal,
     jobjectArray output_array, int* stack_depth) {
   // Transition into runnable state to work on Object*/Array*
-  ScopedJniThreadState ts(env);
+  ScopedObjectAccess soa(env);
   // Decode the internal stack trace into the depth, method trace and PC trace
-  ObjectArray<Object>* method_trace = ts.Decode<ObjectArray<Object>*>(internal);
+  ObjectArray<Object>* method_trace = soa.Decode<ObjectArray<Object>*>(internal);
   int32_t depth = method_trace->GetLength() - 1;
   IntArray* pc_trace = down_cast<IntArray*>(method_trace->Get(depth));
 
@@ -1174,7 +1230,7 @@
   if (output_array != NULL) {
     // Reuse the array we were given.
     result = output_array;
-    java_traces = ts.Decode<ObjectArray<StackTraceElement>*>(output_array);
+    java_traces = soa.Decode<ObjectArray<StackTraceElement>*>(output_array);
     // ...adjusting the number of frames we'll write to not exceed the array length.
     depth = std::min(depth, java_traces->GetLength());
   } else {
@@ -1183,7 +1239,7 @@
     if (java_traces == NULL) {
       return NULL;
     }
-    result = ts.AddLocalReference<jobjectArray>(java_traces);
+    result = soa.AddLocalReference<jobjectArray>(java_traces);
   }
 
   if (stack_depth != NULL) {
@@ -1223,8 +1279,8 @@
     }
 #ifdef MOVING_GARBAGE_COLLECTOR
     // Re-read after potential GC
-    java_traces = Decode<ObjectArray<Object>*>(ts.Env(), result);
-    method_trace = down_cast<ObjectArray<Object>*>(Decode<Object*>(ts.Env(), internal));
+    java_traces = Decode<ObjectArray<Object>*>(soa.Env(), result);
+    method_trace = down_cast<ObjectArray<Object>*>(Decode<Object*>(soa.Env(), internal));
     pc_trace = down_cast<IntArray*>(method_trace->Get(depth));
 #endif
     java_traces->Set(i, obj);
@@ -1246,7 +1302,7 @@
 }
 
 void Thread::ThrowNewException(const char* exception_class_descriptor, const char* msg) {
-  CHECK(!IsExceptionPending()); // Callers should either clear or call ThrowNewWrappedException.
+  AssertNoPendingException(); // Callers should either clear or call ThrowNewWrappedException.
   ThrowNewWrappedException(exception_class_descriptor, msg);
 }
 
@@ -1276,10 +1332,10 @@
     ScopedLocalRef<jthrowable> exception(
         env, reinterpret_cast<jthrowable>(env->AllocObject(exception_class.get())));
     if (exception.get() != NULL) {
-      ScopedJniThreadState ts(env);
-      Throwable* t = reinterpret_cast<Throwable*>(ts.Self()->DecodeJObject(exception.get()));
+      ScopedObjectAccessUnchecked soa(env);
+      Throwable* t = reinterpret_cast<Throwable*>(soa.Self()->DecodeJObject(exception.get()));
       t->SetDetailMessage(String::AllocFromModifiedUtf8(msg));
-      ts.Self()->SetException(t);
+      soa.Self()->SetException(t);
     } else {
       LOG(ERROR) << "Couldn't throw new " << descriptor << " because JNI AllocObject failed: "
                  << PrettyTypeOf(GetException());
@@ -1358,8 +1414,13 @@
   ENTRY_POINT_INFO(pGetObjInstance),
   ENTRY_POINT_INFO(pGetObjStatic),
   ENTRY_POINT_INFO(pHandleFillArrayDataFromCode),
-  ENTRY_POINT_INFO(pDecodeJObjectInThread),
   ENTRY_POINT_INFO(pFindNativeMethod),
+  ENTRY_POINT_INFO(pJniMethodStart),
+  ENTRY_POINT_INFO(pJniMethodStartSynchronized),
+  ENTRY_POINT_INFO(pJniMethodEnd),
+  ENTRY_POINT_INFO(pJniMethodEndSynchronized),
+  ENTRY_POINT_INFO(pJniMethodEndWithReference),
+  ENTRY_POINT_INFO(pJniMethodEndWithReferenceSynchronized),
   ENTRY_POINT_INFO(pLockObjectFromCode),
   ENTRY_POINT_INFO(pUnlockObjectFromCode),
   ENTRY_POINT_INFO(pCmpgDouble),
@@ -1452,6 +1513,7 @@
 class CatchBlockStackVisitor : public StackVisitor {
  public:
   CatchBlockStackVisitor(Thread* self, Throwable* exception)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : StackVisitor(self->GetManagedStack(), self->GetTraceStack(), self->GetLongJumpContext()),
         self_(self), exception_(exception), to_find_(exception->GetClass()), throw_method_(NULL),
         throw_frame_id_(0), throw_dex_pc_(0), handler_quick_frame_(NULL),
@@ -1465,7 +1527,8 @@
     LOG(FATAL) << "UNREACHABLE";  // Expected to take long jump.
   }
 
-  bool VisitFrame() {
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* method = GetMethod();
     if (method == NULL) {
       // This is the upcall, we remember the frame and last pc so that we may long jump to them.
@@ -1507,7 +1570,7 @@
     return true;  // Continue stack walk.
   }
 
-  void DoLongJump() {
+  void DoLongJump() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* catch_method = *handler_quick_frame_;
     Dbg::PostException(self_, throw_frame_id_, throw_method_, throw_dex_pc_,
                        catch_method, handler_dex_pc_, exception_);
@@ -1587,9 +1650,10 @@
   struct CurrentMethodVisitor : public StackVisitor {
     CurrentMethodVisitor(const ManagedStack* stack,
                          const std::vector<TraceStackFrame>* trace_stack)
+        SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
         : StackVisitor(stack, trace_stack, NULL), method_(NULL), dex_pc_(0), frame_id_(0) {}
 
-    virtual bool VisitFrame() {
+    virtual bool VisitFrame() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
       Method* m = GetMethod();
       if (m->IsRuntimeMethod()) {
         // Continue if this is a runtime method.
@@ -1627,9 +1691,10 @@
  public:
   ReferenceMapVisitor(const ManagedStack* stack, const std::vector<TraceStackFrame>* trace_stack,
                       Context* context, Heap::RootVisitor* root_visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : StackVisitor(stack, trace_stack, context), root_visitor_(root_visitor), arg_(arg) {}
 
-  bool VisitFrame() {
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (false) {
       LOG(INFO) << "Visiting stack roots in " << PrettyMethod(GetMethod())
           << StringPrintf("@ PC:%04x", GetDexPc());
@@ -1739,51 +1804,42 @@
 }
 #endif
 
+// Set the stack end to that to be used during a stack overflow
+void Thread::SetStackEndForStackOverflow() {
+  // During stack overflow we allow use of the full stack
+  if (stack_end_ == stack_begin_) {
+    DumpStack(std::cerr);
+    LOG(FATAL) << "Need to increase kStackOverflowReservedBytes (currently "
+               << kStackOverflowReservedBytes << ")";
+  }
+
+  stack_end_ = stack_begin_;
+}
+
 std::ostream& operator<<(std::ostream& os, const Thread& thread) {
-  thread.Dump(os, false);
+  thread.ShortDump(os);
   return os;
 }
 
-void Thread::CheckSafeToLockOrUnlock(MutexRank rank, bool is_locking) {
-  if (this == NULL) {
-    CHECK(Runtime::Current()->IsShuttingDown());
-    return;
-  }
-  if (is_locking) {
-    if (held_mutexes_[rank] == 0) {
-      bool bad_mutexes_held = false;
-      for (int i = kMaxMutexRank; i > rank; --i) {
-        if (held_mutexes_[i] != 0) {
-          LOG(ERROR) << "holding " << static_cast<MutexRank>(i) << " while " << (is_locking ? "locking" : "unlocking") << " " << rank;
+#ifndef NDEBUG
+void Thread::AssertThreadSuspensionIsAllowable(bool check_locks) const {
+  CHECK_EQ(0u, no_thread_suspension_) << last_no_thread_suspension_cause_;
+  if (check_locks) {
+    bool bad_mutexes_held = false;
+    for (int i = kMaxMutexLevel; i >= 0; --i) {
+      // We expect no locks except the mutator_lock_.
+      if (i != kMutatorLock) {
+        BaseMutex* held_mutex = GetHeldMutex(static_cast<MutexLevel>(i));
+        if (held_mutex != NULL) {
+          LOG(ERROR) << "holding \"" << held_mutex->GetName()
+                  << "\" at point where thread suspension is expected";
           bad_mutexes_held = true;
         }
       }
-      CHECK(!bad_mutexes_held) << rank;
     }
-    ++held_mutexes_[rank];
-  } else {
-    CHECK_GT(held_mutexes_[rank], 0U) << rank;
-    --held_mutexes_[rank];
+    CHECK(!bad_mutexes_held);
   }
 }
-
-void Thread::CheckSafeToWait(MutexRank rank) {
-  if (this == NULL) {
-    CHECK(Runtime::Current()->IsShuttingDown());
-    return;
-  }
-  bool bad_mutexes_held = false;
-  for (int i = kMaxMutexRank; i >= 0; --i) {
-    if (i != rank && held_mutexes_[i] != 0) {
-      LOG(ERROR) << "holding " << static_cast<MutexRank>(i) << " while doing condition variable wait on " << rank;
-      bad_mutexes_held = true;
-    }
-  }
-  if (held_mutexes_[rank] == 0) {
-    LOG(ERROR) << "*not* holding " << rank << " while doing condition variable wait on it";
-    bad_mutexes_held = true;
-  }
-  CHECK(!bad_mutexes_held);
-}
+#endif
 
 }  // namespace art
diff --git a/src/thread.h b/src/thread.h
index 155c980..48278d8 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -51,6 +51,8 @@
 class Monitor;
 class Object;
 class Runtime;
+class ScopedObjectAccess;
+class ScopedObjectAccessUnchecked;
 class ShadowFrame;
 class StackIndirectReferenceTable;
 class StackTraceElement;
@@ -72,15 +74,23 @@
 };
 
 enum ThreadState {
-  kTerminated   = 0, // Thread.TERMINATED     JDWP TS_ZOMBIE
-  kRunnable     = 1, // Thread.RUNNABLE       JDWP TS_RUNNING
-  kTimedWaiting = 2, // Thread.TIMED_WAITING  JDWP TS_WAIT    - in Object.wait() with a timeout
-  kBlocked      = 3, // Thread.BLOCKED        JDWP TS_MONITOR - blocked on a monitor
-  kWaiting      = 4, // Thread.WAITING        JDWP TS_WAIT    - in Object.wait()
-  kStarting     = 5, // Thread.NEW                            - native thread started, not yet ready to run managed code
-  kNative       = 6, //                                       - running in a JNI native method
-  kVmWait       = 7, //                                       - waiting on an internal runtime resource
-  kSuspended    = 8, //                                       - suspended by GC or debugger
+  kTerminated                     = 0,   // Thread.TERMINATED     JDWP TS_ZOMBIE
+  kRunnable                       = 1,   // Thread.RUNNABLE       JDWP TS_RUNNING
+  kTimedWaiting                   = 2,   // Thread.TIMED_WAITING  JDWP TS_WAIT    - in Object.wait() with a timeout
+  kBlocked                        = 3,   // Thread.BLOCKED        JDWP TS_MONITOR - blocked on a monitor
+  kWaiting                        = 4,   // Thread.WAITING        JDWP TS_WAIT    - in Object.wait()
+  kWaitingForGcToComplete         = 5,   // Thread.WAITING        JDWP TS_WAIT    - blocked waiting for GC
+  kWaitingPerformingGc            = 6,   // Thread.WAITING        JDWP TS_WAIT    - performing GC
+  kWaitingForDebuggerSend         = 7,   // Thread.WAITING        JDWP TS_WAIT    - blocked waiting for events to be sent
+  kWaitingForDebuggerToAttach     = 8,   // Thread.WAITING        JDWP TS_WAIT    - blocked waiting for debugger to attach
+  kWaitingInMainDebuggerLoop      = 9,   // Thread.WAITING        JDWP TS_WAIT    - blocking/reading/processing debugger events
+  kWaitingForDebuggerSuspension   = 10,  // Thread.WAITING        JDWP TS_WAIT    - waiting for debugger suspend all
+  kWaitingForJniOnLoad            = 11,  // Thread.WAITING        JDWP TS_WAIT    - waiting for execution of dlopen and JNI on load code
+  kWaitingForSignalCatcherOutput  = 12,  // Thread.WAITING        JDWP TS_WAIT    - waiting for signal catcher IO to complete
+  kWaitingInMainSignalCatcherLoop = 13,  // Thread.WAITING        JDWP TS_WAIT    - blocking/reading/processing signals
+  kStarting                       = 14,  // Thread.NEW            JDWP TS_WAIT    - native thread started, not yet ready to run managed code
+  kNative                         = 15,  // Thread.RUNNABLE       JDWP TS_RUNNING - running in a JNI native method
+  kSuspended                      = 16,  // Thread.RUNNABLE       JDWP TS_RUNNING - suspended by GC or debugger
 };
 
 class PACKED Thread {
@@ -110,34 +120,90 @@
     return reinterpret_cast<Thread*>(thread);
   }
 
-  static Thread* FromManagedThread(const ScopedJniThreadState& ts, Object* thread_peer);
-  static Thread* FromManagedThread(const ScopedJniThreadState& ts, jobject thread);
+  static Thread* FromManagedThread(const ScopedObjectAccessUnchecked& ts, Object* thread_peer)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static Thread* FromManagedThread(const ScopedObjectAccessUnchecked& ts, jobject thread)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Translates 172 to pAllocArrayFromCode and so on.
   static void DumpThreadOffset(std::ostream& os, uint32_t offset, size_t size_of_pointers);
 
-  // When full == true, dumps the detailed thread state and the thread stack (used for SIGQUIT).
-  // When full == false, dumps a one-line summary of thread state (used for operator<<).
-  void Dump(std::ostream& os, bool full = true) const;
+  // Dumps a one-line summary of thread state (used for operator<<).
+  void ShortDump(std::ostream& os) const;
+
+  // Dumps the detailed thread state and the thread stack (used for SIGQUIT).
+  void Dump(std::ostream& os) const
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Dumps the SIGQUIT per-thread header. 'thread' can be NULL for a non-attached thread, in which
   // case we use 'tid' to identify the thread, and we'll include as much information as we can.
-  static void DumpState(std::ostream& os, const Thread* thread, pid_t tid);
+  static void DumpState(std::ostream& os, const Thread* thread, pid_t tid)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_);
 
-  ThreadState GetState() const {
+  ThreadState GetState() const
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_suspend_count_lock_) {
+    GlobalSynchronization::thread_suspend_count_lock_->AssertHeld();
     return state_;
   }
 
-  ThreadState SetState(ThreadState new_state);
-  void SetStateWithoutSuspendCheck(ThreadState new_state);
-
-  bool IsDaemon() const {
-    return daemon_;
+  ThreadState SetState(ThreadState new_state)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_suspend_count_lock_) {
+    GlobalSynchronization::thread_suspend_count_lock_->AssertHeld();
+    ThreadState old_state = state_;
+    if (new_state == kRunnable) {
+      // Sanity, should never become runnable with a pending suspension and should always hold
+      // share of mutator_lock_.
+      CHECK_EQ(GetSuspendCount(), 0);
+      GlobalSynchronization::mutator_lock_->AssertSharedHeld();
+    }
+    state_ = new_state;
+    return old_state;
   }
 
-  bool IsSuspended();
+  int GetSuspendCount() const
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_suspend_count_lock_) {
+    GlobalSynchronization::thread_suspend_count_lock_->AssertHeld();
+    return suspend_count_;
+  }
 
-  void WaitUntilSuspended();
+  int GetDebugSuspendCount() const
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_suspend_count_lock_) {
+    GlobalSynchronization::thread_suspend_count_lock_->AssertHeld();
+    return debug_suspend_count_;
+  }
+
+  bool IsSuspended() const
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_suspend_count_lock_) {
+    int suspend_count = GetSuspendCount();
+    return suspend_count != 0 && GetState() != kRunnable;
+  }
+
+  void ModifySuspendCount(int delta, bool for_debugger)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_suspend_count_lock_);
+
+  // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of
+  // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero.
+  void FullSuspendCheck() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+
+  // Transition from non-runnable to runnable state acquiring share on mutator_lock_.
+  ThreadState TransitionFromSuspendedToRunnable()
+      SHARED_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_);
+
+  // Transition from runnable into a state where mutator privileges are denied. Releases share of
+  // mutator lock.
+  void TransitionFromRunnableToSuspended(ThreadState new_state)
+      UNLOCK_FUNCTION(GlobalSynchronization::mutator_lock_);
+
+  // Wait for a debugger suspension on the thread associated with the given peer. Returns the
+  // thread on success, else NULL. If the thread should be suspended then request_suspension should
+  // be true on entry. If the suspension times out then *timeout is set to true.
+  static Thread* SuspendForDebugger(jobject peer,  bool request_suspension, bool* timeout)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_,
+                     GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
 
   // Once called thread suspension will cause an assertion failure.
 #ifndef NDEBUG
@@ -168,9 +234,14 @@
   }
 #endif
 
-  void AssertThreadSuspensionIsAllowable() const {
-    DCHECK_EQ(0u, no_thread_suspension_) << last_no_thread_suspension_cause_;
+
+#ifndef NDEBUG
+  void AssertThreadSuspensionIsAllowable(bool check_locks = true) const;
+#else
+  void AssertThreadSuspensionIsAllowable(bool check_locks = true) const {
+    check_locks = !check_locks;  // Keep GCC happy about unused parameters.
   }
+#endif
 
   bool CanAccessDirectReferences() const {
 #ifdef MOVING_GARBAGE_COLLECTOR
@@ -179,6 +250,10 @@
     return true;
   }
 
+  bool IsDaemon() const {
+    return daemon_;
+  }
+
   bool HoldsLock(Object*);
 
   /*
@@ -206,41 +281,46 @@
   }
 
   // Returns the java.lang.Thread's name, or NULL if this Thread* doesn't have a peer.
-  String* GetThreadName(const ScopedJniThreadState& ts) const;
+  String* GetThreadName(const ScopedObjectAccessUnchecked& ts) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Sets 'name' to the java.lang.Thread's name. This requires no transition to managed code,
   // allocation, or locking.
   void GetThreadName(std::string& name) const;
 
   // Sets the thread's name.
-  void SetThreadName(const char* name);
+  void SetThreadName(const char* name) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  Object* GetPeer() const {
+  Object* GetPeer() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     return peer_;
   }
 
-  Object* GetThreadGroup(const ScopedJniThreadState& ts) const;
+  bool HasPeer() const {
+    return peer_ != NULL;
+  }
+
+  Object* GetThreadGroup(const ScopedObjectAccessUnchecked& ts) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   RuntimeStats* GetStats() {
     return &stats_;
   }
 
-  int GetSuspendCount() const {
-    return suspend_count_;
-  }
-
   bool IsStillStarting() const;
 
   bool IsExceptionPending() const {
     return exception_ != NULL;
   }
 
-  Throwable* GetException() const {
+  Throwable* GetException() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(CanAccessDirectReferences());
     return exception_;
   }
 
-  void SetException(Throwable* new_exception) {
+  void AssertNoPendingException() const;
+
+  void SetException(Throwable* new_exception)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     DCHECK(CanAccessDirectReferences());
     CHECK(new_exception != NULL);
     // TODO: CHECK(exception_ == NULL);
@@ -252,7 +332,7 @@
   }
 
   // Find catch block and perform long jump to appropriate exception handle
-  void DeliverException();
+  void DeliverException() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   Context* GetLongJumpContext();
   void ReleaseLongJumpContext(Context* context) {
@@ -260,7 +340,8 @@
     long_jump_context_ = context;
   }
 
-  Method* GetCurrentMethod(uint32_t* dex_pc = NULL, size_t* frame_id = NULL) const;
+  Method* GetCurrentMethod(uint32_t* dex_pc = NULL, size_t* frame_id = NULL) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void SetTopOfStack(void* stack, uintptr_t pc) {
     Method** top_method = reinterpret_cast<Method**>(stack);
@@ -273,20 +354,25 @@
   }
 
   // If 'msg' is NULL, no detail message is set.
-  void ThrowNewException(const char* exception_class_descriptor, const char* msg);
+  void ThrowNewException(const char* exception_class_descriptor, const char* msg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // If 'msg' is NULL, no detail message is set. An exception must be pending, and will be
   // used as the new exception's cause.
-  void ThrowNewWrappedException(const char* exception_class_descriptor, const char* msg);
+  void ThrowNewWrappedException(const char* exception_class_descriptor, const char* msg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void ThrowNewExceptionF(const char* exception_class_descriptor, const char* fmt, ...)
-      __attribute__((format(printf, 3, 4)));
+      __attribute__((format(printf, 3, 4)))
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void ThrowNewExceptionV(const char* exception_class_descriptor, const char* fmt, va_list ap);
+  void ThrowNewExceptionV(const char* exception_class_descriptor, const char* fmt, va_list ap)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // OutOfMemoryError is special, because we need to pre-allocate an instance.
   // Only the GC should call this.
-  void ThrowOutOfMemoryError(const char* msg);
+  void ThrowOutOfMemoryError(const char* msg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   //QuickFrameIterator FindExceptionHandler(void* throw_pc, void** handler_pc);
 
@@ -305,7 +391,10 @@
   }
 
   // Convert a jobject into a Object*
-  Object* DecodeJObject(jobject obj);
+  Object* DecodeJObject(jobject obj)
+      LOCKS_EXCLUDED(JavaVMExt::globals_lock,
+                     JavaVMExt::weak_globals_lock)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Implements java.lang.Thread.interrupted.
   bool Interrupted() {
@@ -335,9 +424,9 @@
     NotifyLocked();
   }
 
-  ClassLoader* GetClassLoaderOverride() {
-    // TODO: need to place the class_loader_override_ in a handle
-    // DCHECK(CanAccessDirectReferences());
+  ClassLoader* GetClassLoaderOverride()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    DCHECK(CanAccessDirectReferences());
     return class_loader_override_;
   }
 
@@ -347,7 +436,8 @@
 
   // Create the internal representation of a stack trace, that is more time
   // and space efficient to compute than the StackTraceElement[]
-  jobject CreateInternalStackTrace(const ScopedJniThreadState& ts) const;
+  jobject CreateInternalStackTrace(const ScopedObjectAccess& soa) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Convert an internal stack trace representation (returned by CreateInternalStackTrace) to a
   // StackTraceElement[]. If output_array is NULL, a new array is created, otherwise as many
@@ -356,7 +446,8 @@
   static jobjectArray InternalStackTraceToStackTraceElementArray(JNIEnv* env, jobject internal,
       jobjectArray output_array = NULL, int* stack_depth = NULL);
 
-  void VisitRoots(Heap::RootVisitor* visitor, void* arg);
+  void VisitRoots(Heap::RootVisitor* visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 #if VERIFY_OBJECT_ENABLED
   void VerifyStack();
@@ -398,16 +489,7 @@
   }
 
   // Set the stack end to that to be used during a stack overflow
-  void SetStackEndForStackOverflow() {
-    // During stack overflow we allow use of the full stack
-    if (stack_end_ == stack_begin_) {
-      DumpStack(std::cerr);
-      LOG(FATAL) << "Need to increase kStackOverflowReservedBytes (currently "
-                 << kStackOverflowReservedBytes << ")";
-    }
-
-    stack_end_ = stack_begin_;
-  }
+  void SetStackEndForStackOverflow() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Set the stack end to that to be used during regular execution
   void ResetDefaultStackEnd() {
@@ -508,8 +590,13 @@
     return frame;
   }
 
-  void CheckSafeToLockOrUnlock(MutexRank rank, bool is_locking);
-  void CheckSafeToWait(MutexRank rank);
+  BaseMutex* GetHeldMutex(MutexLevel level) const {
+    return held_mutexes_[level];
+  }
+
+  void SetHeldMutex(MutexLevel level, BaseMutex* mutex) {
+    held_mutexes_[level] = mutex;
+  }
 
  private:
   // We have no control over the size of 'bool', but want our boolean fields
@@ -517,24 +604,36 @@
   typedef uint32_t bool32_t;
 
   explicit Thread(bool daemon);
-  ~Thread();
+  ~Thread() LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_,
+                           GlobalSynchronization::thread_suspend_count_lock_);
   void Destroy();
   friend class ThreadList;  // For ~Thread and Destroy.
 
   void CreatePeer(const char* name, bool as_daemon, jobject thread_group);
   friend class Runtime; // For CreatePeer.
 
+  // TODO: remove, callers should use GetState and hold the appropriate locks. Used only by
+  //       ShortDump.
+  ThreadState GetStateUnsafe() const NO_THREAD_SAFETY_ANALYSIS {
+    return state_;
+  }
+
   void DumpState(std::ostream& os) const;
-  void DumpStack(std::ostream& os) const;
+  void DumpStack(std::ostream& os) const
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Out-of-line conveniences for debugging in gdb.
   static Thread* CurrentFromGdb(); // Like Thread::Current.
-  void DumpFromGdb() const; // Like Thread::Dump(std::cerr).
+  // Like Thread::Dump(std::cerr).
+  void DumpFromGdb() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static void* CreateCallback(void* arg);
 
-  void HandleUncaughtExceptions(const ScopedJniThreadState& ts);
-  void RemoveFromThreadGroup(const ScopedJniThreadState& ts);
+  void HandleUncaughtExceptions(const ScopedObjectAccess& soa)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void RemoveFromThreadGroup(const ScopedObjectAccess& soa)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void Init();
   void InitCardTable();
@@ -544,7 +643,7 @@
   void InitPthreadKeySelf();
   void InitStackHwm();
 
-  void NotifyLocked() {
+  void NotifyLocked() EXCLUSIVE_LOCKS_REQUIRED(wait_mutex_) {
     if (wait_monitor_ != NULL) {
       wait_cond_->Signal();
     }
@@ -555,11 +654,16 @@
   // TLS key used to retrieve the Thread*.
   static pthread_key_t pthread_key_self_;
 
+  // Used to notify threads that they should attempt to resume, they will suspend again if
+  // their suspend count is > 0.
+  static ConditionVariable* resume_cond_
+      GUARDED_BY(GlobalSynchronization::thread_suspend_count_lock_);
+
   // --- Frequently accessed fields first for short offsets ---
 
   // A non-zero value is used to tell the current thread to enter a safe point
   // at the next poll.
-  int suspend_count_;
+  int suspend_count_ GUARDED_BY(GlobalSynchronization::thread_suspend_count_lock_);
 
   // The biased card table, see CardTable for details
   byte* card_table_;
@@ -582,7 +686,7 @@
   // is hard. This field can be read off of Thread::Current to give the address.
   Thread* self_;
 
-  volatile ThreadState state_;
+  volatile ThreadState state_ GUARDED_BY(GlobalSynchronization::thread_suspend_count_lock_);
 
   // Our managed peer (an instance of java.lang.Thread).
   Object* peer_;
@@ -604,12 +708,12 @@
   pid_t tid_;
 
   // Guards the 'interrupted_' and 'wait_monitor_' members.
-  mutable Mutex* wait_mutex_;
-  ConditionVariable* wait_cond_;
-  // Pointer to the monitor lock we're currently waiting on (or NULL), guarded by wait_mutex_.
-  Monitor* wait_monitor_;
-  // Thread "interrupted" status; stays raised until queried or thrown, guarded by wait_mutex_.
-  bool32_t interrupted_;
+  mutable Mutex* wait_mutex_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ConditionVariable* wait_cond_ GUARDED_BY(wait_mutex_);
+  // Pointer to the monitor lock we're currently waiting on (or NULL).
+  Monitor* wait_monitor_ GUARDED_BY(wait_mutex_);
+  // Thread "interrupted" status; stays raised until queried or thrown.
+  bool32_t interrupted_ GUARDED_BY(wait_mutex_);
   // The next thread in the wait set this thread is part of.
   Thread* wait_next_;
   // If we're blocked in MonitorEnter, this is the object we're trying to lock.
@@ -636,7 +740,7 @@
 
   // How much of 'suspend_count_' is by request of the debugger, used to set things right
   // when the debugger detaches. Must be <= suspend_count_.
-  int debug_suspend_count_;
+  int debug_suspend_count_ GUARDED_BY(GlobalSynchronization::thread_suspend_count_lock_);
 
   // JDWP invoke-during-breakpoint support.
   DebugInvokeReq* debug_invoke_req_;
@@ -654,8 +758,8 @@
   // A cached pthread_t for the pthread underlying this Thread*.
   pthread_t pthread_self_;
 
-  // Mutexes held by this thread, see CheckSafeToLockOrUnlock.
-  uint32_t held_mutexes_[kMaxMutexRank + 1];
+  // Support for Mutex lock hierarchy bug detection.
+  BaseMutex* held_mutexes_[kMaxMutexLevel + 1];
 
   // A positive value implies we're in a region where thread suspension isn't expected.
   uint32_t no_thread_suspension_;
@@ -672,39 +776,12 @@
   // How many times has our pthread key's destructor been called?
   uint32_t thread_exit_check_count_;
 
-  friend class ScopedThreadListLockReleaser;
   DISALLOW_COPY_AND_ASSIGN(Thread);
 };
 
 std::ostream& operator<<(std::ostream& os, const Thread& thread);
 std::ostream& operator<<(std::ostream& os, const ThreadState& state);
 
-class ScopedThreadStateChange {
- public:
-  ScopedThreadStateChange(Thread* thread, ThreadState new_state) : thread_(thread) {
-    if (thread_ == NULL) {
-      // Value chosen arbitrarily and won't be used in the destructor since thread_ == NULL.
-      old_thread_state_ = kTerminated;
-      CHECK(Runtime::Current()->IsShuttingDown());
-      return;
-    }
-    old_thread_state_ = thread_->SetState(new_state);
-  }
-
-  ~ScopedThreadStateChange() {
-    if (thread_ == NULL) {
-      CHECK(Runtime::Current()->IsShuttingDown());
-      return;
-    }
-    thread_->SetState(old_thread_state_);
-  }
-
- private:
-  Thread* thread_;
-  ThreadState old_thread_state_;
-  DISALLOW_COPY_AND_ASSIGN(ScopedThreadStateChange);
-};
-
 }  // namespace art
 
 #endif  // ART_SRC_THREAD_H_
diff --git a/src/thread_list.cc b/src/thread_list.cc
index ae74420..0bd587c 100644
--- a/src/thread_list.cc
+++ b/src/thread_list.cc
@@ -21,8 +21,6 @@
 #include <unistd.h>
 
 #include "debugger.h"
-#include "scoped_heap_lock.h"
-#include "scoped_thread_list_lock.h"
 #include "timing_logger.h"
 #include "utils.h"
 
@@ -30,11 +28,8 @@
 
 ThreadList::ThreadList()
     : allocated_ids_lock_("allocated thread ids lock"),
-      thread_list_lock_("thread list lock", kThreadListLock),
-      thread_start_cond_("thread start condition variable"),
-      thread_exit_cond_("thread exit condition variable"),
-      thread_suspend_count_lock_("thread suspend count lock", kThreadSuspendCountLock),
-      thread_suspend_count_cond_("thread suspend count condition variable") {
+      suspend_all_count_(0), debug_suspend_all_count_(0),
+      thread_exit_cond_("thread exit condition variable") {
 }
 
 ThreadList::~ThreadList() {
@@ -46,6 +41,8 @@
   }
 
   WaitForOtherNonDaemonThreadsToExit();
+  // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
+  //       Thread::Init.
   SuspendAllDaemonThreads();
 }
 
@@ -63,12 +60,14 @@
 }
 
 pid_t ThreadList::GetLockOwner() {
-  return thread_list_lock_.GetOwner();
+  return GlobalSynchronization::thread_list_lock_->GetExclusiveOwnerTid();
 }
 
 void ThreadList::DumpForSigQuit(std::ostream& os) {
-  ScopedThreadListLock thread_list_lock;
-  DumpLocked(os);
+  {
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+    DumpLocked(os);
+  }
   DumpUnattachedThreads(os);
 }
 
@@ -94,14 +93,22 @@
   while (!readdir_r(d, &de, &e) && e != NULL) {
     char* end;
     pid_t tid = strtol(de.d_name, &end, 10);
-    if (!*end && !Contains(tid)) {
-      DumpUnattachedThread(os, tid);
+    if (!*end) {
+      bool contains;
+      {
+        MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+        contains = Contains(tid);
+      }
+      if (!contains) {
+        DumpUnattachedThread(os, tid);
+      }
     }
   }
   closedir(d);
 }
 
 void ThreadList::DumpLocked(std::ostream& os) {
+  GlobalSynchronization::thread_list_lock_->AssertHeld();
   os << "DALVIK THREADS (" << list_.size() << "):\n";
   for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
     (*it)->Dump(os);
@@ -109,119 +116,171 @@
   }
 }
 
-void ThreadList::ModifySuspendCount(Thread* thread, int delta, bool for_debugger) {
-#ifndef NDEBUG
-  DCHECK(delta == -1 || delta == +1 || delta == -thread->debug_suspend_count_)
-      << delta << " " << thread->debug_suspend_count_ << " " << *thread;
-  DCHECK_GE(thread->suspend_count_, thread->debug_suspend_count_) << *thread;
-#endif
-  if (delta == -1 && thread->suspend_count_ <= 0) {
-    // This is expected if you attach a thread during a GC.
-    if (UNLIKELY(!thread->IsStillStarting())) {
-      std::ostringstream ss;
-      Runtime::Current()->GetThreadList()->DumpLocked(ss);
-      LOG(FATAL) << *thread << " suspend count already zero.\n" << ss.str();
-    }
-    return;
-  }
-  thread->suspend_count_ += delta;
-  if (for_debugger) {
-    thread->debug_suspend_count_ += delta;
-  }
-}
-
-void ThreadList::FullSuspendCheck(Thread* thread) {
-  CHECK(thread != NULL);
-  CHECK_GE(thread->suspend_count_, 0);
-
-  MutexLock mu(thread_suspend_count_lock_);
-  if (thread->suspend_count_ == 0) {
-    return;
-  }
-
-  VLOG(threads) << *thread << " self-suspending";
-  {
-    ScopedThreadStateChange tsc(thread, kSuspended);
-    while (thread->suspend_count_ != 0) {
-      /*
-       * Wait for wakeup signal, releasing lock.  The act of releasing
-       * and re-acquiring the lock provides the memory barriers we
-       * need for correct behavior on SMP.
-       */
-      thread_suspend_count_cond_.Wait(thread_suspend_count_lock_);
-    }
-    CHECK_EQ(thread->suspend_count_, 0);
-  }
-  VLOG(threads) << *thread << " self-reviving";
-}
-
-void ThreadList::SuspendAll(bool for_debugger) {
-  Thread* self = Thread::Current();
-
-  VLOG(threads) << *self << " SuspendAll starting..." << (for_debugger ? " (debugger)" : "");
-
-  CHECK_EQ(self->GetState(), kRunnable);
-  ScopedThreadListLock thread_list_lock;
-  Thread* debug_thread = Dbg::GetDebugThread();
-  {
-    // Increment everybody's suspend count (except our own).
-    MutexLock mu(thread_suspend_count_lock_);
-    for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
-      Thread* thread = *it;
-      if (thread == self || (for_debugger && thread == debug_thread)) {
-        continue;
-      }
-      VLOG(threads) << "requesting thread suspend: " << *thread;
-      ModifySuspendCount(thread, +1, for_debugger);
-    }
-  }
-
-  /*
-   * Wait for everybody in kRunnable state to stop.  Other states
-   * indicate the code is either running natively or sleeping quietly.
-   * Any attempt to transition back to kRunnable will cause a check
-   * for suspension, so it should be impossible for anything to execute
-   * interpreted code or modify objects (assuming native code plays nicely).
-   *
-   * It's also okay if the thread transitions to a non-kRunnable state.
-   *
-   * Note we released the thread_suspend_count_lock_ before getting here,
-   * so if another thread is fiddling with its suspend count (perhaps
-   * self-suspending for the debugger) it won't block while we're waiting
-   * in here.
-   */
+void ThreadList::AssertThreadsAreSuspended() {
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
   for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
     Thread* thread = *it;
-    if (thread == self || (for_debugger && thread == debug_thread)) {
-      continue;
-    }
-    thread->WaitUntilSuspended();
-    VLOG(threads) << "thread suspended: " << *thread;
+    CHECK_NE(thread->GetState(), kRunnable);
   }
+}
+
+// Attempt to rectify locks so that we dump thread list with required locks before exiting.
+static void UnsafeLogFatalForThreadSuspendAllTimeout() NO_THREAD_SAFETY_ANALYSIS {
+  Runtime* runtime = Runtime::Current();
+  std::ostringstream ss;
+  ss << "Thread suspend timeout\n";
+  runtime->DumpLockHolders(ss);
+  ss << "\n";
+  GlobalSynchronization::mutator_lock_->SharedTryLock();
+  if (!GlobalSynchronization::mutator_lock_->IsSharedHeld()) {
+    LOG(WARNING) << "Dumping thread list without holding mutator_lock_";
+  }
+  GlobalSynchronization::thread_list_lock_->TryLock();
+  if (!GlobalSynchronization::thread_list_lock_->IsExclusiveHeld()) {
+    LOG(WARNING) << "Dumping thread list without holding thread_list_lock_";
+  }
+  runtime->GetThreadList()->DumpLocked(ss);
+  LOG(FATAL) << ss.str();
+}
+
+void ThreadList::SuspendAll() {
+  Thread* self = Thread::Current();
+
+  VLOG(threads) << *self << " SuspendAll starting...";
+
+  if (kIsDebugBuild) {
+    GlobalSynchronization::mutator_lock_->AssertNotHeld();
+    GlobalSynchronization::thread_list_lock_->AssertNotHeld();
+    GlobalSynchronization::thread_suspend_count_lock_->AssertNotHeld();
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK_NE(self->GetState(), kRunnable);
+  }
+  {
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+    {
+      MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+      // Update global suspend all state for attaching threads.
+      ++suspend_all_count_;
+      // Increment everybody's suspend count (except our own).
+      for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
+        Thread* thread = *it;
+        if (thread == self) {
+          continue;
+        }
+        VLOG(threads) << "requesting thread suspend: " << *thread;
+        thread->ModifySuspendCount(+1, false);
+      }
+    }
+  }
+
+  // Block on the mutator lock until all Runnable threads release their share of access. Timeout
+  // if we wait more than 30 seconds.
+  timespec timeout;
+  clock_gettime(CLOCK_REALTIME, &timeout);
+  timeout.tv_sec += 30;
+  if (UNLIKELY(!GlobalSynchronization::mutator_lock_->ExclusiveLockWithTimeout(timeout))) {
+    UnsafeLogFatalForThreadSuspendAllTimeout();
+  }
+
+  // Debug check that all threads are suspended.
+  AssertThreadsAreSuspended();
 
   VLOG(threads) << *self << " SuspendAll complete";
 }
 
-void ThreadList::Suspend(Thread* thread, bool for_debugger) {
+void ThreadList::ResumeAll() {
+  Thread* self = Thread::Current();
+
+  VLOG(threads) << *self << " ResumeAll starting";
+  {
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+    MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+    // Update global suspend all state for attaching threads.
+    --suspend_all_count_;
+    // Decrement the suspend counts for all threads.
+    for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
+      Thread* thread = *it;
+      if (thread == self) {
+        continue;
+      }
+      thread->ModifySuspendCount(-1, false);
+    }
+
+    // Broadcast a notification to all suspended threads, some or all of
+    // which may choose to wake up.  No need to wait for them.
+    VLOG(threads) << *self << " ResumeAll waking others";
+    Thread::resume_cond_->Broadcast();
+  }
+  GlobalSynchronization::mutator_lock_->ExclusiveUnlock();
+  VLOG(threads) << *self << " ResumeAll complete";
+}
+
+void ThreadList::Resume(Thread* thread, bool for_debugger) {
   DCHECK(thread != Thread::Current());
-  thread_list_lock_.AssertHeld();
+  VLOG(threads) << "Resume(" << *thread << ") starting..." << (for_debugger ? " (debugger)" : "");
 
-  // TODO: add another thread_suspend_lock_ to avoid GC/debugger races.
-
-  VLOG(threads) << "Suspend(" << *thread << ") starting..." << (for_debugger ? " (debugger)" : "");
-
-  if (!Contains(thread)) {
-    return;
+  {
+    // To check Contains.
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+    // To check IsSuspended.
+    MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+    CHECK(thread->IsSuspended());
+    if (!Contains(thread)) {
+      return;
+    }
+    thread->ModifySuspendCount(-1, for_debugger);
   }
 
   {
-    MutexLock mu(thread_suspend_count_lock_);
-    ModifySuspendCount(thread, +1, for_debugger);
+    VLOG(threads) << "Resume(" << *thread << ") waking others";
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    Thread::resume_cond_->Broadcast();
   }
 
-  thread->WaitUntilSuspended();
+  VLOG(threads) << "Resume(" << *thread << ") complete";
+}
 
-  VLOG(threads) << "Suspend(" << *thread << ") complete";
+void ThreadList::SuspendAllForDebugger() {
+  Thread* self = Thread::Current();
+  Thread* debug_thread = Dbg::GetDebugThread();
+
+  VLOG(threads) << *self << " SuspendAllForDebugger starting...";
+
+  {
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+    {
+      MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+      // Update global suspend all state for attaching threads.
+      ++suspend_all_count_;
+      ++debug_suspend_all_count_;
+      // Increment everybody's suspend count (except our own).
+      for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
+        Thread* thread = *it;
+        if (thread == self || thread == debug_thread) {
+          continue;
+        }
+        VLOG(threads) << "requesting thread suspend: " << *thread;
+        thread->ModifySuspendCount(+1, true);
+      }
+    }
+  }
+
+  // Block on the mutator lock until all Runnable threads release their share of access. Timeout
+  // if we wait more than 30 seconds.
+  timespec timeout;
+  clock_gettime(CLOCK_REALTIME, &timeout);
+  timeout.tv_sec += 30;
+  if (!GlobalSynchronization::mutator_lock_->ExclusiveLockWithTimeout(timeout)) {
+    UnsafeLogFatalForThreadSuspendAllTimeout();
+  } else {
+    // Debugger suspends all threads but doesn't hold onto the mutator_lock_.
+    GlobalSynchronization::mutator_lock_->ExclusiveUnlock();
+  }
+
+  AssertThreadsAreSuspended();
+
+  VLOG(threads) << *self << " SuspendAll complete";
 }
 
 void ThreadList::SuspendSelfForDebugger() {
@@ -235,8 +294,8 @@
   // Collisions with other suspends aren't really interesting. We want
   // to ensure that we're the only one fiddling with the suspend count
   // though.
-  MutexLock mu(thread_suspend_count_lock_);
-  ModifySuspendCount(self, +1, true);
+  MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+  self->ModifySuspendCount(+1, true);
 
   // Suspend ourselves.
   CHECK_GT(self->suspend_count_, 0);
@@ -249,7 +308,7 @@
   Dbg::ClearWaitForEventThread();
 
   while (self->suspend_count_ != 0) {
-    thread_suspend_count_cond_.Wait(thread_suspend_count_lock_);
+    Thread::resume_cond_->Wait(*GlobalSynchronization::thread_suspend_count_lock_);
     if (self->suspend_count_ != 0) {
       // The condition was signaled but we're still suspended. This
       // can happen if the debugger lets go while a SIGQUIT thread
@@ -264,255 +323,71 @@
   VLOG(threads) << *self << " self-reviving (debugger)";
 }
 
-void ThreadList::ResumeAll(bool for_debugger) {
-  Thread* self = Thread::Current();
-
-  VLOG(threads) << *self << " ResumeAll starting" << (for_debugger ? " (debugger)" : "");
-
-  // Decrement the suspend counts for all threads.  No need for atomic
-  // writes, since nobody should be moving until we decrement the count.
-  // We do need to hold the thread list because of JNI attaches.
-  {
-    ScopedThreadListLock thread_list_lock;
-    Thread* debug_thread = Dbg::GetDebugThread();
-    MutexLock mu(thread_suspend_count_lock_);
-    for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
-      Thread* thread = *it;
-      if (thread == self || (for_debugger && thread == debug_thread)) {
-        continue;
-      }
-      ModifySuspendCount(thread, -1, for_debugger);
-    }
-  }
-
-  // Broadcast a notification to all suspended threads, some or all of
-  // which may choose to wake up.  No need to wait for them.
-  {
-    VLOG(threads) << *self << " ResumeAll waking others";
-    MutexLock mu(thread_suspend_count_lock_);
-    thread_suspend_count_cond_.Broadcast();
-  }
-
-  VLOG(threads) << *self << " ResumeAll complete";
-}
-
-void ThreadList::Resume(Thread* thread, bool for_debugger) {
-  DCHECK(thread != Thread::Current());
-
-  if (!for_debugger) { // The debugger is very naughty. See Dbg::InvokeMethod.
-    thread_list_lock_.AssertHeld();
-  }
-
-  VLOG(threads) << "Resume(" << *thread << ") starting..." << (for_debugger ? " (debugger)" : "");
-
-  {
-    MutexLock mu(thread_suspend_count_lock_);
-    if (!Contains(thread)) {
-      return;
-    }
-    ModifySuspendCount(thread, -1, for_debugger);
-  }
-
-  {
-    VLOG(threads) << "Resume(" << *thread << ") waking others";
-    MutexLock mu(thread_suspend_count_lock_);
-    thread_suspend_count_cond_.Broadcast();
-  }
-
-  VLOG(threads) << "Resume(" << *thread << ") complete";
-}
-
-void ThreadList::RunWhileSuspended(Thread* thread, void (*callback)(void*), void* arg) {  // NOLINT
-  DCHECK(thread != NULL);
-  Thread* self = Thread::Current();
-  if (thread != self) {
-    Suspend(thread);
-  }
-  callback(arg);
-  if (thread != self) {
-    Resume(thread);
-  }
-}
-
 void ThreadList::UndoDebuggerSuspensions() {
   Thread* self = Thread::Current();
 
   VLOG(threads) << *self << " UndoDebuggerSuspensions starting";
 
   {
-    ScopedThreadListLock thread_list_lock;
-    MutexLock mu(thread_suspend_count_lock_);
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+    MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+    // Update global suspend all state for attaching threads.
+    suspend_all_count_ -= debug_suspend_all_count_;
+    debug_suspend_all_count_ = 0;
+    // Update running threads.
     for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
       Thread* thread = *it;
       if (thread == self || thread->debug_suspend_count_ == 0) {
         continue;
       }
-      ModifySuspendCount(thread, -thread->debug_suspend_count_, true);
+      thread->ModifySuspendCount(-thread->debug_suspend_count_, true);
     }
   }
 
   {
-    MutexLock mu(thread_suspend_count_lock_);
-    thread_suspend_count_cond_.Broadcast();
+    MutexLock mu(*GlobalSynchronization::thread_suspend_count_lock_);
+    Thread::resume_cond_->Broadcast();
   }
 
   VLOG(threads) << "UndoDebuggerSuspensions(" << *self << ") complete";
 }
 
-void ThreadList::Register() {
-  Thread* self = Thread::Current();
-
-  VLOG(threads) << "ThreadList::Register() " << *self << "\n" << Dumpable<Thread>(*self);
-
-  ScopedThreadListLock thread_list_lock;
-  CHECK(!Contains(self));
-  list_.push_back(self);
-}
-
-void ThreadList::Unregister() {
-  Thread* self = Thread::Current();
-
-  VLOG(threads) << "ThreadList::Unregister() " << *self;
-
-  // Any time-consuming destruction, plus anything that can call back into managed code or
-  // suspend and so on, must happen at this point, and not in ~Thread.
-  self->Destroy();
-
-  {
-    // Remove this thread from the list.
-    ScopedThreadListLock thread_list_lock;
-    CHECK(Contains(self));
-    list_.remove(self);
-  }
-
-  // Delete the Thread* and release the thin lock id.
-  uint32_t thin_lock_id = self->thin_lock_id_;
-  delete self;
-  ReleaseThreadId(thin_lock_id);
-
-  // Clear the TLS data, so that the underlying native thread is recognizably detached.
-  // (It may wish to reattach later.)
-  CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, NULL), "detach self");
-
-  // Signal that a thread just detached.
-  thread_exit_cond_.Signal();
-}
-
-void ThreadList::ForEach(void (*callback)(Thread*, void*), void* context) {
-  ScopedThreadListLock thread_list_lock;
-  for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
-    callback(*it, context);
-  }
-}
-
-void ThreadList::VisitRoots(Heap::RootVisitor* visitor, void* arg) const {
-  ScopedThreadListLock thread_list_lock;
-  for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
-    (*it)->VisitRoots(visitor, arg);
-  }
-}
-
-/*
- * Tell a new thread it's safe to start.
- *
- * We must hold the thread list lock before messing with another thread.
- * In the general case we would also need to verify that the new thread was
- * still in the thread list, but in our case the thread has not started
- * executing user code and therefore has not had a chance to exit.
- *
- * We move it to kVmWait, and it then shifts itself to kRunning, which
- * comes with a suspend-pending check. We do this after
- */
-void ThreadList::SignalGo(Thread* child) {
-  Thread* self = Thread::Current();
-  CHECK(child != self);
-
-  {
-    ScopedThreadListLock thread_list_lock;
-    VLOG(threads) << *self << " waiting for child " << *child << " to be in thread list...";
-
-    // We wait for the child to tell us that it's in the thread list.
-    while (child->GetState() != kStarting) {
-      thread_start_cond_.Wait(thread_list_lock_);
-    }
-  }
-
-  // If we switch out of runnable and then back in, we know there's no pending suspend.
-  self->SetState(kVmWait);
-  self->SetState(kRunnable);
-
-  // Tell the child that it's safe: it will see any future suspend request.
-  ScopedThreadListLock thread_list_lock;
-  VLOG(threads) << *self << " telling child " << *child << " it's safe to proceed...";
-  child->SetState(kVmWait);
-  thread_start_cond_.Broadcast();
-}
-
-void ThreadList::WaitForGo() {
-  Thread* self = Thread::Current();
-
-  {
-    ScopedThreadListLock thread_list_lock;
-    DCHECK(Contains(self));
-
-    // Tell our parent that we're in the thread list.
-    VLOG(threads) << *self << " telling parent that we're now in thread list...";
-    self->SetState(kStarting);
-    thread_start_cond_.Broadcast();
-
-    // Wait until our parent tells us there's no suspend still pending
-    // from before we were on the thread list.
-    VLOG(threads) << *self << " waiting for parent's go-ahead...";
-    while (self->GetState() != kVmWait) {
-      thread_start_cond_.Wait(thread_list_lock_);
-    }
-  }
-
-  // Enter the runnable state. We know that any pending suspend will affect us now.
-  VLOG(threads) << *self << " entering runnable state...";
-  // Lock and unlock the heap lock. This ensures that if there was a GC in progress when we
-  // started, we wait until it's over. Which means that if there's now another GC pending, our
-  // suspend count is non-zero, so switching to the runnable state will suspend us.
-  // TODO: find a better solution!
-  {
-    ScopedHeapLock heap_lock;
-  }
-  self->SetState(kRunnable);
-}
-
-bool ThreadList::AllOtherThreadsAreDaemons() {
-  for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
-    // TODO: there's a race here with thread exit that's being worked around by checking if the peer
-    // is null.
-    Thread* thread = *it;
-    if (thread != Thread::Current() && thread->GetPeer() != NULL && !thread->IsDaemon()) {
-      return false;
-    }
-  }
-  return true;
-}
-
 void ThreadList::WaitForOtherNonDaemonThreadsToExit() {
-  ScopedThreadListLock thread_list_lock;
-  while (!AllOtherThreadsAreDaemons()) {
-    thread_exit_cond_.Wait(thread_list_lock_);
-  }
+  GlobalSynchronization::mutator_lock_->AssertNotHeld();
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  bool all_threads_are_daemons;
+  do {
+    all_threads_are_daemons = true;
+    for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
+      // TODO: there's a race here with thread exit that's being worked around by checking if the
+      // thread has a peer.
+      Thread* thread = *it;
+      if (thread != Thread::Current() && thread->HasPeer() && !thread->IsDaemon()) {
+        all_threads_are_daemons = false;
+        break;
+      }
+    }
+    if (!all_threads_are_daemons) {
+      // Wait for another thread to exit before re-checking.
+      thread_exit_cond_.Wait(*GlobalSynchronization::thread_list_lock_);
+    }
+  } while(!all_threads_are_daemons);
 }
 
 void ThreadList::SuspendAllDaemonThreads() {
-  ScopedThreadListLock thread_list_lock;
-
-  // Tell all the daemons it's time to suspend.
-  {
-    MutexLock mu(thread_suspend_count_lock_);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  { // Tell all the daemons it's time to suspend.
+    MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
     for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
       Thread* thread = *it;
+      // This is only run after all non-daemon threads have exited, so the remainder should all be
+      // daemons.
+      CHECK(thread->IsDaemon());
       if (thread != Thread::Current()) {
         ++thread->suspend_count_;
       }
     }
   }
-
   // Give the threads a chance to suspend, complaining if they're slow.
   bool have_complained = false;
   for (int i = 0; i < 10; ++i) {
@@ -520,6 +395,7 @@
     bool all_suspended = true;
     for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
       Thread* thread = *it;
+      MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
       if (thread != Thread::Current() && thread->GetState() == kRunnable) {
         if (!have_complained) {
           LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
@@ -532,11 +408,72 @@
       return;
     }
   }
+  LOG(ERROR) << "suspend all daemons failed";
+}
+void ThreadList::Register(Thread* self) {
+  DCHECK_EQ(self, Thread::Current());
+
+  if (VLOG_IS_ON(threads)) {
+    std::ostringstream oss;
+    self->ShortDump(oss);  // We don't hold the mutator_lock_ yet and so cannot call Dump.
+    LOG(INFO) << "ThreadList::Register() " << *self  << "\n" << oss;
+  }
+
+  // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing
+  // SuspendAll requests.
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  MutexLock mu2(*GlobalSynchronization::thread_suspend_count_lock_);
+  self->suspend_count_ = suspend_all_count_;
+  self->debug_suspend_count_ = debug_suspend_all_count_;
+  CHECK(!Contains(self));
+  list_.push_back(self);
+}
+
+void ThreadList::Unregister(Thread* self) {
+  DCHECK_EQ(self, Thread::Current());
+
+  VLOG(threads) << "ThreadList::Unregister() " << *self;
+
+  // Any time-consuming destruction, plus anything that can call back into managed code or
+  // suspend and so on, must happen at this point, and not in ~Thread.
+  self->Destroy();
+
+  {
+    // Remove this thread from the list.
+    MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+    CHECK(Contains(self));
+    list_.remove(self);
+  }
+
+  // Delete the Thread* and release the thin lock id.
+  uint32_t thin_lock_id = self->thin_lock_id_;
+  ReleaseThreadId(thin_lock_id);
+  delete self;
+
+  // Clear the TLS data, so that the underlying native thread is recognizably detached.
+  // (It may wish to reattach later.)
+  CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, NULL), "detach self");
+
+  // Signal that a thread just detached.
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  thread_exit_cond_.Signal();
+}
+
+void ThreadList::ForEach(void (*callback)(Thread*, void*), void* context) {
+  for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
+    callback(*it, context);
+  }
+}
+
+void ThreadList::VisitRoots(Heap::RootVisitor* visitor, void* arg) const {
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
+  for (It it = list_.begin(), end = list_.end(); it != end; ++it) {
+    (*it)->VisitRoots(visitor, arg);
+  }
 }
 
 uint32_t ThreadList::AllocThreadId() {
   MutexLock mu(allocated_ids_lock_);
-  //ScopedThreadListLock thread_list_lock;
   for (size_t i = 0; i < allocated_ids_.size(); ++i) {
     if (!allocated_ids_[i]) {
       allocated_ids_.set(i);
diff --git a/src/thread_list.h b/src/thread_list.h
index ef475fe..e5b9114 100644
--- a/src/thread_list.h
+++ b/src/thread_list.h
@@ -33,66 +33,98 @@
   explicit ThreadList();
   ~ThreadList();
 
-  void DumpForSigQuit(std::ostream& os);
-  void DumpLocked(std::ostream& os); // For thread suspend timeout dumps.
+  void DumpForSigQuit(std::ostream& os)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void DumpLocked(std::ostream& os)  // For thread suspend timeout dumps.
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_list_lock_)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   pid_t GetLockOwner(); // For SignalCatcher.
 
   // Thread suspension support.
-  void FullSuspendCheck(Thread* thread);
-  void ResumeAll(bool for_debugger = false);
-  void Resume(Thread* thread, bool for_debugger = false);
-  void RunWhileSuspended(Thread* thread, void (*callback)(void*), void* arg);  // NOLINT
-  void SuspendAll(bool for_debugger = false);
-  void SuspendSelfForDebugger();
-  void Suspend(Thread* thread, bool for_debugger = false);
-  void UndoDebuggerSuspensions();
+  void ResumeAll()
+      UNLOCK_FUNCTION(GlobalSynchronization::mutator_lock_)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
+  void Resume(Thread* thread, bool for_debugger = false)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_);
+
+  // Suspends all threads and gets exclusive access to the mutator_lock_.
+  void SuspendAll()
+      EXCLUSIVE_LOCK_FUNCTION(GlobalSynchronization::mutator_lock_)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
+
+  // Suspends all threads
+  void SuspendAllForDebugger()
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_,
+                     GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
+
+  void SuspendSelfForDebugger()
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_suspend_count_lock_);
+
+  void UndoDebuggerSuspensions()
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
 
   // Iterates over all the threads.
-  void ForEach(void (*callback)(Thread*, void*), void* context);
+  void ForEach(void (*callback)(Thread*, void*), void* context)
+      EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_list_lock_);
 
-  void Register();
-  void Unregister();
+  // Add/remove current thread from list.
+  void Register(Thread* self)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_,
+                     GlobalSynchronization::thread_list_lock_);
+  void Unregister(Thread* self)
+      LOCKS_EXCLUDED(GlobalSynchronization::mutator_lock_,
+                     GlobalSynchronization::thread_list_lock_);
 
-  void VisitRoots(Heap::RootVisitor* visitor, void* arg) const;
+  void VisitRoots(Heap::RootVisitor* visitor, void* arg) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  // Handshaking for new thread creation.
-  void SignalGo(Thread* child);
-  void WaitForGo();
+  // Return a copy of the thread list.
+  std::list<Thread*> GetList() EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_list_lock_) {
+    return list_;
+  }
 
  private:
   typedef std::list<Thread*>::const_iterator It; // TODO: C++0x auto
 
   uint32_t AllocThreadId();
-  void ReleaseThreadId(uint32_t id);
+  void ReleaseThreadId(uint32_t id) LOCKS_EXCLUDED(allocated_ids_lock_);
 
-  bool Contains(Thread* thread);
-  bool Contains(pid_t tid);
+  bool Contains(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_list_lock_);
+  bool Contains(pid_t tid) EXCLUSIVE_LOCKS_REQUIRED(GlobalSynchronization::thread_list_lock_);
 
-  void DumpUnattachedThreads(std::ostream& os);
+  void DumpUnattachedThreads(std::ostream& os)
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_);
 
-  bool AllOtherThreadsAreDaemons();
-  void SuspendAllDaemonThreads();
-  void WaitForOtherNonDaemonThreadsToExit();
+  void SuspendAllDaemonThreads()
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
+  void WaitForOtherNonDaemonThreadsToExit()
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
 
-  static void ModifySuspendCount(Thread* thread, int delta, bool for_debugger);
+  void AssertThreadsAreSuspended()
+      LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_,
+                     GlobalSynchronization::thread_suspend_count_lock_);
 
-  mutable Mutex allocated_ids_lock_;
+  mutable Mutex allocated_ids_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(allocated_ids_lock_);
 
-  mutable Mutex thread_list_lock_;
-  std::list<Thread*> list_; // TODO: GUARDED_BY(thread_list_lock_);
+  // The actual list of all threads.
+  std::list<Thread*> list_ GUARDED_BY(GlobalSynchronization::thread_list_lock_);
 
-  ConditionVariable thread_start_cond_;
-  ConditionVariable thread_exit_cond_;
+  // Ongoing suspend all requests, used to ensure threads added to list_ respect SuspendAll.
+  int suspend_all_count_ GUARDED_BY(GlobalSynchronization::thread_suspend_count_lock_);
+  int debug_suspend_all_count_ GUARDED_BY(GlobalSynchronization::thread_suspend_count_lock_);
 
-  // This lock guards every thread's suspend_count_ field...
-  mutable Mutex thread_suspend_count_lock_;
-  // ...and is used in conjunction with this condition variable.
-  ConditionVariable thread_suspend_count_cond_ GUARDED_BY(thread_suspend_count_lock_);
+  // Signaled when threads terminate. Used to determine when all non-daemons have terminated.
+  ConditionVariable thread_exit_cond_ GUARDED_BY(GlobalSynchronization::thread_list_lock_);
 
   friend class Thread;
-  friend class ScopedThreadListLock;
-  friend class ScopedThreadListLockReleaser;
 
   DISALLOW_COPY_AND_ASSIGN(ThreadList);
 };
diff --git a/src/trace.cc b/src/trace.cc
index cd594cf..5ac7e3d 100644
--- a/src/trace.cc
+++ b/src/trace.cc
@@ -26,7 +26,7 @@
 #endif
 #include "object_utils.h"
 #include "os.h"
-#include "scoped_thread_list_lock.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "thread_list.h"
 
@@ -158,7 +158,8 @@
   *buf++ = (uint8_t) (val >> 56);
 }
 
-static bool InstallStubsClassVisitor(Class* klass, void*) {
+static bool InstallStubsClassVisitor(Class* klass, void*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Trace* tracer = Runtime::Current()->GetTracer();
   for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
     Method* method = klass->GetDirectMethod(i);
@@ -176,7 +177,8 @@
   return true;
 }
 
-static bool UninstallStubsClassVisitor(Class* klass, void*) {
+static bool UninstallStubsClassVisitor(Class* klass, void*)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   Trace* tracer = Runtime::Current()->GetTracer();
   for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
     Method* method = klass->GetDirectMethod(i);
@@ -214,6 +216,7 @@
 
     Thread* self_;
   };
+  ScopedObjectAccess soa(self);
   RestoreStackVisitor visitor(self);
   visitor.WalkStack();
 }
@@ -266,7 +269,7 @@
   }
 
   ScopedThreadStateChange tsc(Thread::Current(), kRunnable);
-  Runtime::Current()->GetThreadList()->SuspendAll(false);
+  Runtime::Current()->GetThreadList()->SuspendAll();
 
   // Open trace file if not going directly to ddms.
   File* trace_file = NULL;
@@ -280,7 +283,7 @@
       PLOG(ERROR) << "Unable to open trace file '" << trace_filename << "'";
       Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;",
           StringPrintf("Unable to open trace file '%s'", trace_filename).c_str());
-      Runtime::Current()->GetThreadList()->ResumeAll(false);
+      Runtime::Current()->GetThreadList()->ResumeAll();
       return;
     }
   }
@@ -296,7 +299,7 @@
   Runtime::Current()->EnableMethodTracing(tracer);
   tracer->BeginTracing();
 
-  Runtime::Current()->GetThreadList()->ResumeAll(false);
+  Runtime::Current()->GetThreadList()->ResumeAll();
 }
 
 void Trace::Stop() {
@@ -306,12 +309,12 @@
   }
 
   ScopedThreadStateChange tsc(Thread::Current(), kRunnable);
-  Runtime::Current()->GetThreadList()->SuspendAll(false);
+  Runtime::Current()->GetThreadList()->SuspendAll();
 
   Runtime::Current()->GetTracer()->FinishTracing();
   Runtime::Current()->DisableMethodTracing();
 
-  Runtime::Current()->GetThreadList()->ResumeAll(false);
+  Runtime::Current()->GetThreadList()->ResumeAll();
 }
 
 void Trace::Shutdown() {
@@ -486,6 +489,8 @@
 }
 
 void Trace::DumpThreadList(std::ostream& os) {
+  GlobalSynchronization::thread_list_lock_->AssertNotHeld();
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
   Runtime::Current()->GetThreadList()->ForEach(DumpThread, &os);
 }
 
@@ -494,7 +499,9 @@
 }
 
 void Trace::UninstallStubs() {
+  GlobalSynchronization::thread_list_lock_->AssertNotHeld();
   Runtime::Current()->GetClassLinker()->VisitClasses(UninstallStubsClassVisitor, NULL);
+  MutexLock mu(*GlobalSynchronization::thread_list_lock_);
   Runtime::Current()->GetThreadList()->ForEach(TraceRestoreStack, NULL);
 }
 
diff --git a/src/trace.h b/src/trace.h
index 0042fc2..c60ae15 100644
--- a/src/trace.h
+++ b/src/trace.h
@@ -65,7 +65,7 @@
 
   static void Start(const char* trace_filename, int trace_fd, int buffer_size, int flags, bool direct_to_ddms);
   static void Stop();
-  static void Shutdown();
+  static void Shutdown() NO_THREAD_SAFETY_ANALYSIS;  // TODO: implement appropriate locking.
 
   bool UseWallClock();
   bool UseThreadCpuClock();
@@ -83,18 +83,18 @@
   explicit Trace(File* trace_file, int buffer_size, int flags);
 
   void BeginTracing();
-  void FinishTracing();
+  void FinishTracing() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Replaces code of each method with a pointer to a stub for method tracing.
   void InstallStubs();
 
   // Restores original code for each method and fixes the return values of each thread's stack.
-  void UninstallStubs();
+  void UninstallStubs() LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_);
 
   // Methods to output traced methods and threads.
   void GetVisitedMethods(size_t end_offset);
-  void DumpMethodList(std::ostream& os);
-  void DumpThreadList(std::ostream& os);
+  void DumpMethodList(std::ostream& os) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  void DumpThreadList(std::ostream& os) LOCKS_EXCLUDED(GlobalSynchronization::thread_list_lock_);
 
   // Maps a method to its original code pointer.
   SafeMap<const Method*, const void*> saved_code_map_;
diff --git a/src/utf.h b/src/utf.h
index 0758896..dd5791c 100644
--- a/src/utf.h
+++ b/src/utf.h
@@ -19,6 +19,7 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include "macros.h"
 
 /*
  * All UTF-8 in art is actually modified UTF-8. Mostly, this distinction
@@ -63,7 +64,8 @@
 /*
  * The java.lang.String hashCode() algorithm.
  */
-int32_t ComputeUtf16Hash(const CharArray* chars, int32_t offset, size_t char_count);
+int32_t ComputeUtf16Hash(const CharArray* chars, int32_t offset, size_t char_count)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count);
 
 /*
diff --git a/src/utils.h b/src/utils.h
index a31588b..851c6b1 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -167,30 +167,36 @@
 std::string PrettyDescriptor(const String* descriptor);
 std::string PrettyDescriptor(const std::string& descriptor);
 std::string PrettyDescriptor(Primitive::Type type);
-std::string PrettyDescriptor(const Class* klass);
+std::string PrettyDescriptor(const Class* klass)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 // Returns a human-readable signature for 'f'. Something like "a.b.C.f" or
 // "int a.b.C.f" (depending on the value of 'with_type').
-std::string PrettyField(const Field* f, bool with_type = true);
+std::string PrettyField(const Field* f, bool with_type = true)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 std::string PrettyField(uint32_t field_idx, const DexFile& dex_file, bool with_type = true);
 
 // Returns a human-readable signature for 'm'. Something like "a.b.C.m" or
 // "a.b.C.m(II)V" (depending on the value of 'with_signature').
-std::string PrettyMethod(const Method* m, bool with_signature = true);
+std::string PrettyMethod(const Method* m, bool with_signature = true)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 std::string PrettyMethod(uint32_t method_idx, const DexFile& dex_file, bool with_signature = true);
 
 // Returns a human-readable form of the name of the *class* of the given object.
 // So given an instance of java.lang.String, the output would
 // be "java.lang.String". Given an array of int, the output would be "int[]".
 // Given String.class, the output would be "java.lang.Class<java.lang.String>".
-std::string PrettyTypeOf(const Object* obj);
+std::string PrettyTypeOf(const Object* obj)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 // Returns a human-readable form of the name of the given class.
 // Given String.class, the output would be "java.lang.Class<java.lang.String>".
-std::string PrettyClass(const Class* c);
+std::string PrettyClass(const Class* c)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 // Returns a human-readable form of the name of the given class with its class loader.
-std::string PrettyClassAndClassLoader(const Class* c);
+std::string PrettyClassAndClassLoader(const Class* c)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 // Returns a human-readable size string such as "1MB".
 std::string PrettySize(size_t size_in_bytes);
@@ -223,9 +229,11 @@
 bool IsValidMemberName(const char* s);
 
 // Returns the JNI native function name for the non-overloaded method 'm'.
-std::string JniShortName(const Method* m);
+std::string JniShortName(const Method* m)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 // Returns the JNI native function name for the overloaded method 'm'.
-std::string JniLongName(const Method* m);
+std::string JniLongName(const Method* m)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 bool ReadFileToString(const std::string& file_name, std::string* result);
 
diff --git a/src/utils_test.cc b/src/utils_test.cc
index 311bd16..e6ff118 100644
--- a/src/utils_test.cc
+++ b/src/utils_test.cc
@@ -16,6 +16,7 @@
 
 #include "object.h"
 #include "common_test.h"
+#include "scoped_thread_state_change.h"
 #include "utils.h"
 
 namespace art {
@@ -84,6 +85,7 @@
 }
 
 TEST_F(UtilsTest, PrettyTypeOf) {
+  ScopedObjectAccess soa(Thread::Current());
   EXPECT_EQ("null", PrettyTypeOf(NULL));
 
   SirtRef<String> s(String::AllocFromModifiedUtf8(""));
@@ -100,6 +102,7 @@
 }
 
 TEST_F(UtilsTest, PrettyClass) {
+  ScopedObjectAccess soa(Thread::Current());
   EXPECT_EQ("null", PrettyClass(NULL));
   Class* c = class_linker_->FindSystemClass("[Ljava/lang/String;");
   ASSERT_TRUE(c != NULL);
@@ -108,6 +111,7 @@
 }
 
 TEST_F(UtilsTest, PrettyClassAndClassLoader) {
+  ScopedObjectAccess soa(Thread::Current());
   EXPECT_EQ("null", PrettyClassAndClassLoader(NULL));
   Class* c = class_linker_->FindSystemClass("[Ljava/lang/String;");
   ASSERT_TRUE(c != NULL);
@@ -116,6 +120,7 @@
 }
 
 TEST_F(UtilsTest, PrettyField) {
+  ScopedObjectAccess soa(Thread::Current());
   EXPECT_EQ("null", PrettyField(NULL));
 
   Class* java_lang_String = class_linker_->FindSystemClass("Ljava/lang/String;");
@@ -177,6 +182,7 @@
 }
 
 TEST_F(UtilsTest, MangleForJni) {
+  ScopedObjectAccess soa(Thread::Current());
   EXPECT_EQ("hello_00024world", MangleForJni("hello$world"));
   EXPECT_EQ("hello_000a9world", MangleForJni("hello\xc2\xa9world"));
   EXPECT_EQ("hello_1world", MangleForJni("hello_world"));
@@ -185,6 +191,7 @@
 }
 
 TEST_F(UtilsTest, JniShortName_JniLongName) {
+  ScopedObjectAccess soa(Thread::Current());
   Class* c = class_linker_->FindSystemClass("Ljava/lang/String;");
   ASSERT_TRUE(c != NULL);
   Method* m;
diff --git a/src/verifier/method_verifier.cc b/src/verifier/method_verifier.cc
index 5e98b1e..167e1e5 100644
--- a/src/verifier/method_verifier.cc
+++ b/src/verifier/method_verifier.cc
@@ -313,7 +313,7 @@
                           method, method->GetAccessFlags());
   verifier.Verify();
   verifier.DumpFailures(LOG(INFO) << "Dump of method " << PrettyMethod(method) << "\n")
-      << verifier.info_messages_.str() << Dumpable<MethodVerifier>(verifier);
+      << verifier.info_messages_.str() << MutatorLockedDumpable<MethodVerifier>(verifier);
 }
 
 MethodVerifier::MethodVerifier(const DexFile* dex_file, DexCache* dex_cache,
@@ -1026,7 +1026,8 @@
   return os;
 }
 
-extern "C" void MethodVerifierGdbDump(MethodVerifier* v) {
+extern "C" void MethodVerifierGdbDump(MethodVerifier* v)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   v->Dump(std::cerr);
 }
 
@@ -3327,13 +3328,15 @@
 }
 
 void MethodVerifier::SetGcMap(Compiler::MethodReference ref, const std::vector<uint8_t>& gc_map) {
-  MutexLock mu(*gc_maps_lock_);
-  GcMapTable::iterator it = gc_maps_->find(ref);
-  if (it != gc_maps_->end()) {
-    delete it->second;
-    gc_maps_->erase(it);
+  {
+    MutexLock mu(*gc_maps_lock_);
+    GcMapTable::iterator it = gc_maps_->find(ref);
+    if (it != gc_maps_->end()) {
+      delete it->second;
+      gc_maps_->erase(it);
+    }
+    gc_maps_->Put(ref, &gc_map);
   }
-  gc_maps_->Put(ref, &gc_map);
   CHECK(GetGcMap(ref) != NULL);
 }
 
@@ -3411,8 +3414,10 @@
 }
 
 void MethodVerifier::AddRejectedClass(Compiler::ClassReference ref) {
-  MutexLock mu(*rejected_classes_lock_);
-  rejected_classes_->insert(ref);
+  {
+    MutexLock mu(*rejected_classes_lock_);
+    rejected_classes_->insert(ref);
+  }
   CHECK(IsClassRejected(ref));
 }
 
diff --git a/src/verifier/method_verifier.h b/src/verifier/method_verifier.h
index b2a88b7..a67366f 100644
--- a/src/verifier/method_verifier.h
+++ b/src/verifier/method_verifier.h
@@ -162,10 +162,12 @@
   };
 
   /* Verify a class. Returns "kNoFailure" on success. */
-  static FailureKind VerifyClass(const Class* klass, std::string& error);
+  static FailureKind VerifyClass(const Class* klass, std::string& error)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   static FailureKind VerifyClass(const DexFile* dex_file, DexCache* dex_cache,
                                  ClassLoader* class_loader, uint32_t class_def_idx,
-                                 std::string& error);
+                                 std::string& error)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   uint8_t EncodePcToReferenceMapData() const;
 
@@ -191,13 +193,16 @@
 
   // Dump the state of the verifier, namely each instruction, what flags are set on it, register
   // information
-  void Dump(std::ostream& os);
+  void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  static const std::vector<uint8_t>* GetGcMap(Compiler::MethodReference ref);
+  static const std::vector<uint8_t>* GetGcMap(Compiler::MethodReference ref)
+      LOCKS_EXCLUDED(gc_maps_lock_);
 
   // Fills 'monitor_enter_dex_pcs' with the dex pcs of the monitor-enter instructions corresponding
   // to the locks held at 'dex_pc' in 'm'.
-  static void FindLocksAtDexPc(Method* m, uint32_t dex_pc, std::vector<uint32_t>& monitor_enter_dex_pcs);
+  static void FindLocksAtDexPc(Method* m, uint32_t dex_pc,
+                               std::vector<uint32_t>& monitor_enter_dex_pcs)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static void Init();
   static void Shutdown();
@@ -206,12 +211,14 @@
   static const InferredRegCategoryMap* GetInferredRegCategoryMap(Compiler::MethodReference ref);
 #endif
 
-  static bool IsClassRejected(Compiler::ClassReference ref);
+  static bool IsClassRejected(Compiler::ClassReference ref)
+      LOCKS_EXCLUDED(rejected_classes_lock_);
 
  private:
   explicit MethodVerifier(const DexFile* dex_file, DexCache* dex_cache,
       ClassLoader* class_loader, uint32_t class_def_idx, const DexFile::CodeItem* code_item,
-      uint32_t method_idx, Method* method, uint32_t access_flags);
+      uint32_t method_idx, Method* method, uint32_t access_flags)
+          SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Adds the given string to the beginning of the last failure message.
   void PrependToLastFailMessage(std::string);
@@ -232,14 +239,16 @@
    */
   static FailureKind VerifyMethod(uint32_t method_idx, const DexFile* dex_file, DexCache* dex_cache,
       ClassLoader* class_loader, uint32_t class_def_idx, const DexFile::CodeItem* code_item,
-      Method* method, uint32_t method_access_flags);
-  static void VerifyMethodAndDump(Method* method);
+      Method* method, uint32_t method_access_flags)
+          SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  static void VerifyMethodAndDump(Method* method)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Run verification on the method. Returns true if verification completes and false if the input
   // has an irrecoverable corruption.
-  bool Verify();
+  bool Verify() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  void FindLocksAtDexPc();
+  void FindLocksAtDexPc() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Compute the width of the instruction at each address in the instruction stream, and store it in
@@ -267,7 +276,7 @@
    * Returns "false" if something in the exception table looks fishy, but we're expecting the
    * exception table to be somewhat sane.
    */
-  bool ScanTryCatchBlocks();
+  bool ScanTryCatchBlocks() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Perform static verification on all instructions in a method.
@@ -373,11 +382,11 @@
                        bool* selfOkay);
 
   /* Perform detailed code-flow analysis on a single method. */
-  bool VerifyCodeFlow();
+  bool VerifyCodeFlow() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Set the register types for the first instruction in the method based on the method signature.
   // This has the side-effect of validating the signature.
-  bool SetTypesFromSignature();
+  bool SetTypesFromSignature() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Perform code flow on a method.
@@ -425,7 +434,7 @@
    * reordering by specifying that you can't execute the new-instance instruction if a register
    * contains an uninitialized instance created by that same instruction.
    */
-  bool CodeFlowVerifyMethod();
+  bool CodeFlowVerifyMethod() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Perform verification for a single instruction.
@@ -436,54 +445,62 @@
    * point needs to be (re-)evaluated. Register changes are merged into "reg_types_" at the target
    * addresses. Does not set or clear any other flags in "insn_flags_".
    */
-  bool CodeFlowVerifyInstruction(uint32_t* start_guess);
+  bool CodeFlowVerifyInstruction(uint32_t* start_guess)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Perform verification of a new array instruction
   void VerifyNewArray(const DecodedInstruction& dec_insn, bool is_filled,
-                      bool is_range);
+                      bool is_range)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Perform verification of an aget instruction. The destination register's type will be set to
   // be that of component type of the array unless the array type is unknown, in which case a
   // bottom type inferred from the type of instruction is used. is_primitive is false for an
   // aget-object.
   void VerifyAGet(const DecodedInstruction& insn, const RegType& insn_type,
-                  bool is_primitive);
+                  bool is_primitive) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Perform verification of an aput instruction.
   void VerifyAPut(const DecodedInstruction& insn, const RegType& insn_type,
-                  bool is_primitive);
+                  bool is_primitive) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Lookup instance field and fail for resolution violations
-  Field* GetInstanceField(const RegType& obj_type, int field_idx);
+  Field* GetInstanceField(const RegType& obj_type, int field_idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Lookup static field and fail for resolution violations
-  Field* GetStaticField(int field_idx);
+  Field* GetStaticField(int field_idx) SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Perform verification of an iget or sget instruction.
   void VerifyISGet(const DecodedInstruction& insn, const RegType& insn_type,
-                   bool is_primitive, bool is_static);
+                   bool is_primitive, bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Perform verification of an iput or sput instruction.
   void VerifyISPut(const DecodedInstruction& insn, const RegType& insn_type,
-                   bool is_primitive, bool is_static);
+                   bool is_primitive, bool is_static)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Resolves a class based on an index and performs access checks to ensure the referrer can
   // access the resolved class.
-  const RegType& ResolveClassAndCheckAccess(uint32_t class_idx);
+  const RegType& ResolveClassAndCheckAccess(uint32_t class_idx)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * For the "move-exception" instruction at "work_insn_idx_", which must be at an exception handler
    * address, determine the Join of all exceptions that can land here. Fails if no matching
    * exception handler can be found or if the Join of exception types fails.
    */
-  const RegType& GetCaughtExceptionType();
+  const RegType& GetCaughtExceptionType()
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Resolves a method based on an index and performs access checks to ensure
    * the referrer can access the resolved method.
    * Does not throw exceptions.
    */
-  Method* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type);
+  Method* ResolveMethodAndCheckAccess(uint32_t method_idx, MethodType method_type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Verify the arguments to a method. We're executing in "method", making
@@ -508,7 +525,8 @@
    * set appropriately).
    */
   Method* VerifyInvocationArgs(const DecodedInstruction& dec_insn,
-                               MethodType method_type, bool is_range, bool is_super);
+                               MethodType method_type, bool is_range, bool is_super)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Verify that the target instruction is not "move-exception". It's important that the only way
@@ -528,7 +546,8 @@
   * next_insn, and set the changed flag on the target address if any of the registers were changed.
   * Returns "false" if an error is encountered.
   */
-  bool UpdateRegisters(uint32_t next_insn, const RegisterLine* merge_line);
+  bool UpdateRegisters(uint32_t next_insn, const RegisterLine* merge_line)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Is the method being verified a constructor?
   bool IsConstructor() const {
@@ -541,10 +560,10 @@
   }
 
   // Return the register type for the method.
-  const RegType& GetMethodReturnType();
+  const RegType& GetMethodReturnType() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Get a type representing the declaring class of the method.
-  const RegType& GetDeclaringClass();
+  const RegType& GetDeclaringClass() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
 #if defined(ART_USE_LLVM_COMPILER) || defined(ART_USE_GREENLAND_COMPILER)
   /*
@@ -572,25 +591,27 @@
 
   // All the GC maps that the verifier has created
   typedef SafeMap<const Compiler::MethodReference, const std::vector<uint8_t>*> GcMapTable;
-  static Mutex* gc_maps_lock_;
-  static GcMapTable* gc_maps_;
-  static void SetGcMap(Compiler::MethodReference ref, const std::vector<uint8_t>& gc_map);
+  static Mutex* gc_maps_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  static GcMapTable* gc_maps_ GUARDED_BY(gc_maps_lock_);
+  static void SetGcMap(Compiler::MethodReference ref, const std::vector<uint8_t>& gc_map)
+      LOCKS_EXCLUDED(gc_maps_lock_);
 
   typedef std::set<Compiler::ClassReference> RejectedClassesTable;
-  static Mutex* rejected_classes_lock_;
+  static Mutex* rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   static RejectedClassesTable* rejected_classes_;
 
 #if defined(ART_USE_LLVM_COMPILER) || defined(ART_USE_GREENLAND_COMPILER)
   // All the inferred register category maps that the verifier has created.
   typedef SafeMap<const Compiler::MethodReference,
                   const InferredRegCategoryMap*> InferredRegCategoryMapTable;
-  static Mutex* inferred_reg_category_maps_lock_;
+  static Mutex* inferred_reg_category_maps_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   static InferredRegCategoryMapTable* inferred_reg_category_maps_;
   static void SetInferredRegCategoryMap(Compiler::MethodReference ref,
                                         const InferredRegCategoryMap& m);
 #endif
 
-  static void AddRejectedClass(Compiler::ClassReference ref);
+  static void AddRejectedClass(Compiler::ClassReference ref)
+      LOCKS_EXCLUDED(rejected_classes_lock_);
 
   RegTypeCache reg_types_;
 
@@ -607,11 +628,14 @@
   UniquePtr<RegisterLine> saved_line_;
 
   uint32_t method_idx_;  // The method we're working on.
-  Method* foo_method_;  // Its object representation if known.
+  // Its object representation if known.
+  Method* foo_method_ GUARDED_BY(GlobalSynchronization::mutator_lock_);
   uint32_t method_access_flags_;  // Method's access flags.
   const DexFile* dex_file_;  // The dex file containing the method.
-  DexCache* dex_cache_;  // The dex_cache for the declaring class of the method.
-  ClassLoader* class_loader_;  // The class loader for the declaring class of the method.
+  // The dex_cache for the declaring class of the method.
+  DexCache* dex_cache_ GUARDED_BY(GlobalSynchronization::mutator_lock_);
+  // The class loader for the declaring class of the method.
+  ClassLoader* class_loader_ GUARDED_BY(GlobalSynchronization::mutator_lock_);
   uint32_t class_def_idx_;  // The class def index of the declaring class of the method.
   const DexFile::CodeItem* code_item_;  // The code item containing the code for the method.
   UniquePtr<InsnFlags[]> insn_flags_;  // Instruction widths and flags, one entry per code unit.
diff --git a/src/verifier/method_verifier_test.cc b/src/verifier/method_verifier_test.cc
index 5c23e9f..e52feb3 100644
--- a/src/verifier/method_verifier_test.cc
+++ b/src/verifier/method_verifier_test.cc
@@ -27,7 +27,8 @@
 
 class MethodVerifierTest : public CommonTest {
  protected:
-  void VerifyClass(const std::string& descriptor) {
+  void VerifyClass(const std::string& descriptor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ASSERT_TRUE(descriptor != NULL);
     Class* klass = class_linker_->FindSystemClass(descriptor.c_str());
 
@@ -36,7 +37,8 @@
     ASSERT_TRUE(MethodVerifier::VerifyClass(klass, error_msg) == MethodVerifier::kNoFailure) << error_msg;
   }
 
-  void VerifyDexFile(const DexFile* dex) {
+  void VerifyDexFile(const DexFile* dex)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     ASSERT_TRUE(dex != NULL);
 
     // Verify all the classes defined in this file
@@ -49,12 +51,14 @@
 };
 
 TEST_F(MethodVerifierTest, LibCore) {
+  ScopedObjectAccess soa(Thread::Current());
   VerifyDexFile(java_lang_dex_file_);
 }
 
 TEST_F(MethodVerifierTest, IntMath) {
-  SirtRef<ClassLoader> class_loader(LoadDex("IntMath"));
-  Class* klass = class_linker_->FindClass("LIntMath;", class_loader.get());
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("IntMath");
+  Class* klass = class_linker_->FindClass("LIntMath;", soa.Decode<ClassLoader*>(class_loader));
   std::string error_msg;
   ASSERT_TRUE(MethodVerifier::VerifyClass(klass, error_msg) == MethodVerifier::kNoFailure) << error_msg;
 }
diff --git a/src/verifier/reg_type.cc b/src/verifier/reg_type.cc
index 8d1df22..281d96e 100644
--- a/src/verifier/reg_type.cc
+++ b/src/verifier/reg_type.cc
@@ -384,7 +384,8 @@
   }
 }
 
-std::ostream& operator<<(std::ostream& os, const RegType& rhs) {
+std::ostream& operator<<(std::ostream& os, const RegType& rhs)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   os << rhs.Dump();
   return os;
 }
diff --git a/src/verifier/reg_type.h b/src/verifier/reg_type.h
index 7e8fca1..a0e2ff8 100644
--- a/src/verifier/reg_type.h
+++ b/src/verifier/reg_type.h
@@ -117,7 +117,8 @@
   }
 
   // The high half that corresponds to this low half
-  const RegType& HighHalf(RegTypeCache* cache) const;
+  const RegType& HighHalf(RegTypeCache* cache) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool IsConstant() const { return type_ == kRegTypeConst; }
   bool IsLongConstant() const { return type_ == kRegTypeConstLo; }
@@ -208,7 +209,7 @@
     return IsReference() && GetClass()->IsObjectClass();
   }
 
-  bool IsArrayTypes() const {
+  bool IsArrayTypes() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (IsUnresolvedTypes() && !IsUnresolvedMergedReference() && !IsUnresolvedSuperClass()) {
       return GetDescriptor()->CharAt(0) == '[';
     } else if (IsReference()) {
@@ -218,7 +219,7 @@
     }
   }
 
-  bool IsObjectArrayTypes() const {
+  bool IsObjectArrayTypes() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     if (IsUnresolvedTypes() && !IsUnresolvedMergedReference() && !IsUnresolvedSuperClass()) {
       // Primitive arrays will always resolve
       DCHECK(GetDescriptor()->CharAt(1) == 'L' || GetDescriptor()->CharAt(1) == '[');
@@ -293,22 +294,28 @@
     return static_cast<uint16_t>(allocation_pc_or_constant_or_merged_types_ & 0xFFFF);
   }
 
-  const RegType& GetSuperClass(RegTypeCache* cache) const;
+  const RegType& GetSuperClass(RegTypeCache* cache) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
-  std::string Dump(const RegTypeCache* reg_types = NULL) const;
+  std::string Dump(const RegTypeCache* reg_types = NULL) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Can this type access other?
-  bool CanAccess(const RegType& other) const;
+  bool CanAccess(const RegType& other) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   // Can this type access a member with the given properties?
-  bool CanAccessMember(Class* klass, uint32_t access_flags) const;
+  bool CanAccessMember(Class* klass, uint32_t access_flags) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Can this type be assigned by src?
-  bool IsAssignableFrom(const RegType& src) const;
+  bool IsAssignableFrom(const RegType& src) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   bool Equals(const RegType& other) const { return GetId() == other.GetId(); }
 
   // Compute the merge of this register from one edge (path) with incoming_type from another.
-  const RegType& Merge(const RegType& incoming_type, RegTypeCache* reg_types) const;
+  const RegType& Merge(const RegType& incoming_type, RegTypeCache* reg_types) const
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * A basic Join operation on classes. For a pair of types S and T the Join, written S v T = J, is
@@ -326,7 +333,8 @@
    *
    * [1] Java bytecode verification: algorithms and formalizations, Xavier Leroy
    */
-  static Class* ClassJoin(Class* s, Class* t);
+  static Class* ClassJoin(Class* s, Class* t)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
  private:
   friend class RegTypeCache;
diff --git a/src/verifier/reg_type_cache.h b/src/verifier/reg_type_cache.h
index 4ba667b..1287388 100644
--- a/src/verifier/reg_type_cache.h
+++ b/src/verifier/reg_type_cache.h
@@ -40,32 +40,68 @@
     return *result;
   }
 
-  const RegType& From(RegType::Type type, ClassLoader* loader, const char* descriptor);
-  const RegType& FromClass(Class* klass);
+  const RegType& From(RegType::Type type, ClassLoader* loader, const char* descriptor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  const RegType& FromClass(Class* klass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   const RegType& FromCat1Const(int32_t value);
-  const RegType& FromDescriptor(ClassLoader* loader, const char* descriptor);
-  const RegType& FromType(RegType::Type);
+  const RegType& FromDescriptor(ClassLoader* loader, const char* descriptor)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
+  const RegType& FromType(RegType::Type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
   const RegType& FromUnresolvedMerge(const RegType& left, const RegType& right);
   const RegType& FromUnresolvedSuperClass(const RegType& child);
 
-  const RegType& Boolean() { return FromType(RegType::kRegTypeBoolean); }
-  const RegType& Byte()    { return FromType(RegType::kRegTypeByte); }
-  const RegType& Char()    { return FromType(RegType::kRegTypeChar); }
-  const RegType& Short()   { return FromType(RegType::kRegTypeShort); }
-  const RegType& Integer() { return FromType(RegType::kRegTypeInteger); }
-  const RegType& Float()   { return FromType(RegType::kRegTypeFloat); }
-  const RegType& Long()    { return FromType(RegType::kRegTypeLongLo); }
-  const RegType& Double()  { return FromType(RegType::kRegTypeDoubleLo); }
+  const RegType& Boolean() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeBoolean);
+  }
+  const RegType& Byte() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeByte);
+  }
+  const RegType& Char() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeChar);
+  }
+  const RegType& Short() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeShort);
+  }
+  const RegType& Integer() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeInteger);
+  }
+  const RegType& Float() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeFloat);
+  }
+  const RegType& Long() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeLongLo);
+  }
+  const RegType& Double() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeDoubleLo);
+  }
 
-  const RegType& JavaLangClass()  { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Class;"); }
-  const RegType& JavaLangObject() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Object;"); }
-  const RegType& JavaLangString() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/String;"); }
-  const RegType& JavaLangThrowable() { return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Throwable;"); }
+  const RegType& JavaLangClass() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Class;");
+  }
+  const RegType& JavaLangObject() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Object;");
+  }
+  const RegType& JavaLangString() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return From(RegType::kRegTypeReference, NULL, "Ljava/lang/String;");
+  }
+  const RegType& JavaLangThrowable() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return From(RegType::kRegTypeReference, NULL, "Ljava/lang/Throwable;");
+  }
 
-  const RegType& Undefined() { return FromType(RegType::kRegTypeUndefined); }
-  const RegType& Conflict()  { return FromType(RegType::kRegTypeConflict); }
-  const RegType& ConstLo()   { return FromType(RegType::kRegTypeConstLo); }
-  const RegType& Zero()      { return FromCat1Const(0); }
+  const RegType& Undefined() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeUndefined);
+  }
+  const RegType& Conflict() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeConflict);
+  }
+  const RegType& ConstLo() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromType(RegType::kRegTypeConstLo);
+  }
+  const RegType& Zero() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
+    return FromCat1Const(0);
+  }
 
   const RegType& Uninitialized(const RegType& type, uint32_t allocation_pc);
   // Create an uninitialized 'this' argument for the given type.
@@ -79,7 +115,8 @@
   const RegType& ShortConstant() { return FromCat1Const(std::numeric_limits<jshort>::min()); }
   const RegType& IntConstant() { return FromCat1Const(std::numeric_limits<jint>::max()); }
 
-  const RegType& GetComponentType(const RegType& array, ClassLoader* loader);
+  const RegType& GetComponentType(const RegType& array, ClassLoader* loader)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
  private:
   // The allocated entries
diff --git a/src/verifier/reg_type_test.cc b/src/verifier/reg_type_test.cc
index 18c1655..6bdf886 100644
--- a/src/verifier/reg_type_test.cc
+++ b/src/verifier/reg_type_test.cc
@@ -26,6 +26,7 @@
 class RegTypeTest : public CommonTest {};
 
 TEST_F(RegTypeTest, Primitives) {
+  ScopedObjectAccess soa(Thread::Current());
   RegTypeCache cache;
 
   const RegType& bool_reg_type = cache.Boolean();
diff --git a/src/verifier/register_line.cc b/src/verifier/register_line.cc
index 1d4366f..ec7891e 100644
--- a/src/verifier/register_line.cc
+++ b/src/verifier/register_line.cc
@@ -367,7 +367,8 @@
   }
 }
 
-std::ostream& operator<<(std::ostream& os, const RegisterLine& rhs) {
+std::ostream& operator<<(std::ostream& os, const RegisterLine& rhs)
+    SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
   os << rhs.Dump();
   return os;
 }
diff --git a/src/verifier/register_line.h b/src/verifier/register_line.h
index 9664a5b..1d131ad 100644
--- a/src/verifier/register_line.h
+++ b/src/verifier/register_line.h
@@ -61,19 +61,23 @@
   }
 
   // Implement category-1 "move" instructions. Copy a 32-bit value from "vsrc" to "vdst".
-  void CopyRegister1(uint32_t vdst, uint32_t vsrc, TypeCategory cat);
+  void CopyRegister1(uint32_t vdst, uint32_t vsrc, TypeCategory cat)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Implement category-2 "move" instructions. Copy a 64-bit value from "vsrc" to "vdst". This
   // copies both halves of the register.
-  void CopyRegister2(uint32_t vdst, uint32_t vsrc);
+  void CopyRegister2(uint32_t vdst, uint32_t vsrc)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Implement "move-result". Copy the category-1 value from the result register to another
   // register, and reset the result register.
-  void CopyResultRegister1(uint32_t vdst, bool is_reference);
+  void CopyResultRegister1(uint32_t vdst, bool is_reference)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Implement "move-result-wide". Copy the category-2 value from the result register to another
   // register, and reset the result register.
-  void CopyResultRegister2(uint32_t vdst);
+  void CopyResultRegister2(uint32_t vdst)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Set the invisible result register to unknown
   void SetResultTypeToUnknown();
@@ -81,15 +85,18 @@
   // Set the type of register N, verifying that the register is valid.  If "newType" is the "Lo"
   // part of a 64-bit value, register N+1 will be set to "newType+1".
   // The register index was validated during the static pass, so we don't need to check it here.
-  bool SetRegisterType(uint32_t vdst, const RegType& new_type);
+  bool SetRegisterType(uint32_t vdst, const RegType& new_type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /* Set the type of the "result" register. */
-  void SetResultRegisterType(const RegType& new_type);
+  void SetResultRegisterType(const RegType& new_type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Get the type of register vsrc.
   const RegType& GetRegisterType(uint32_t vsrc) const;
 
-  bool VerifyRegisterType(uint32_t vsrc, const RegType& check_type);
+  bool VerifyRegisterType(uint32_t vsrc, const RegType& check_type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void CopyFromLine(const RegisterLine* src) {
     DCHECK_EQ(num_regs_, src->num_regs_);
@@ -98,7 +105,7 @@
     reg_to_lock_depths_ = src->reg_to_lock_depths_;
   }
 
-  std::string Dump() const;
+  std::string Dump() const SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   void FillWithGarbage() {
     memset(line_.get(), 0xf1, num_regs_ * sizeof(uint16_t));
@@ -114,7 +121,8 @@
    * to prevent them from being used (otherwise, MarkRefsAsInitialized would mark the old ones and
    * the new ones at the same time).
    */
-  void MarkUninitRefsAsInvalid(const RegType& uninit_type);
+  void MarkUninitRefsAsInvalid(const RegType& uninit_type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Update all registers holding "uninit_type" to instead hold the corresponding initialized
@@ -152,14 +160,16 @@
    * The argument count is in vA, and the first argument is in vC, for both "simple" and "range"
    * versions. We just need to make sure vA is >= 1 and then return vC.
    */
-  const RegType& GetInvocationThis(const DecodedInstruction& dec_insn);
+  const RegType& GetInvocationThis(const DecodedInstruction& dec_insn)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Verify types for a simple two-register instruction (e.g. "neg-int").
    * "dst_type" is stored into vA, and "src_type" is verified against vB.
    */
   void CheckUnaryOp(const DecodedInstruction& dec_insn,
-                    const RegType& dst_type, const RegType& src_type);
+                    const RegType& dst_type, const RegType& src_type)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Verify types for a simple three-register instruction (e.g. "add-int").
@@ -168,7 +178,8 @@
    */
   void CheckBinaryOp(const DecodedInstruction& dec_insn,
                      const RegType& dst_type, const RegType& src_type1, const RegType& src_type2,
-                     bool check_boolean_op);
+                     bool check_boolean_op)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Verify types for a binary "2addr" operation. "src_type1"/"src_type2"
@@ -177,7 +188,8 @@
   void CheckBinaryOp2addr(const DecodedInstruction& dec_insn,
                           const RegType& dst_type,
                           const RegType& src_type1, const RegType& src_type2,
-                          bool check_boolean_op);
+                          bool check_boolean_op)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   /*
    * Verify types for A two-register instruction with a literal constant (e.g. "add-int/lit8").
@@ -186,7 +198,8 @@
    * If "check_boolean_op" is set, we use the constant value in vC.
    */
   void CheckLiteralOp(const DecodedInstruction& dec_insn,
-                      const RegType& dst_type, const RegType& src_type, bool check_boolean_op);
+                      const RegType& dst_type, const RegType& src_type, bool check_boolean_op)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   // Verify/push monitor onto the monitor stack, locking the value in reg_idx at location insn_idx.
   void PushMonitor(uint32_t reg_idx, int32_t insn_idx);
@@ -203,7 +216,8 @@
   // is empty, failing and returning false if not.
   bool VerifyMonitorStackEmpty();
 
-  bool MergeRegisters(const RegisterLine* incoming_line);
+  bool MergeRegisters(const RegisterLine* incoming_line)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   size_t GetMaxNonZeroReferenceReg(size_t max_ref_reg) {
     size_t i = static_cast<int>(max_ref_reg) < 0 ? 0 : max_ref_reg;
diff --git a/src/well_known_classes.cc b/src/well_known_classes.cc
index a0397ff..cae7369 100644
--- a/src/well_known_classes.cc
+++ b/src/well_known_classes.cc
@@ -25,6 +25,7 @@
 namespace art {
 
 jclass WellKnownClasses::com_android_dex_Dex;
+jclass WellKnownClasses::dalvik_system_PathClassLoader;
 jclass WellKnownClasses::java_lang_ClassLoader;
 jclass WellKnownClasses::java_lang_ClassNotFoundException;
 jclass WellKnownClasses::java_lang_Daemons;
@@ -43,14 +44,22 @@
 jclass WellKnownClasses::org_apache_harmony_dalvik_ddmc_DdmServer;
 
 jmethodID WellKnownClasses::com_android_dex_Dex_create;
-jmethodID WellKnownClasses::java_lang_ClassNotFoundException_init;
+jmethodID WellKnownClasses::java_lang_Boolean_valueOf;
+jmethodID WellKnownClasses::java_lang_Byte_valueOf;
+jmethodID WellKnownClasses::java_lang_Character_valueOf;
 jmethodID WellKnownClasses::java_lang_ClassLoader_loadClass;
+jmethodID WellKnownClasses::java_lang_ClassNotFoundException_init;
 jmethodID WellKnownClasses::java_lang_Daemons_requestGC;
 jmethodID WellKnownClasses::java_lang_Daemons_requestHeapTrim;
 jmethodID WellKnownClasses::java_lang_Daemons_start;
+jmethodID WellKnownClasses::java_lang_Double_valueOf;
+jmethodID WellKnownClasses::java_lang_Float_valueOf;
+jmethodID WellKnownClasses::java_lang_Integer_valueOf;
+jmethodID WellKnownClasses::java_lang_Long_valueOf;
 jmethodID WellKnownClasses::java_lang_ref_FinalizerReference_add;
 jmethodID WellKnownClasses::java_lang_ref_ReferenceQueue_add;
 jmethodID WellKnownClasses::java_lang_reflect_InvocationHandler_invoke;
+jmethodID WellKnownClasses::java_lang_Short_valueOf;
 jmethodID WellKnownClasses::java_lang_Thread_init;
 jmethodID WellKnownClasses::java_lang_Thread_run;
 jmethodID WellKnownClasses::java_lang_Thread$UncaughtExceptionHandler_uncaughtException;
@@ -102,8 +111,15 @@
   return mid;
 }
 
-void WellKnownClasses::Init(JNIEnv* env) {
+static jmethodID CachePrimitiveBoxingMethod(JNIEnv* env, char prim_name, const char* boxed_name) {
+  ScopedLocalRef<jclass> boxed_class(env, env->FindClass(boxed_name));
+  return CacheMethod(env, boxed_class.get(), true, "valueOf",
+                     StringPrintf("(%c)L%s;", prim_name, boxed_name).c_str());
+}
+
+void WellKnownClasses::InitClasses(JNIEnv* env) {
   com_android_dex_Dex = CacheClass(env, "com/android/dex/Dex");
+  dalvik_system_PathClassLoader = CacheClass(env, "dalvik/system/PathClassLoader");
   java_lang_ClassLoader = CacheClass(env, "java/lang/ClassLoader");
   java_lang_ClassNotFoundException = CacheClass(env, "java/lang/ClassNotFoundException");
   java_lang_Daemons = CacheClass(env, "java/lang/Daemons");
@@ -120,6 +136,10 @@
   java_nio_ReadWriteDirectByteBuffer = CacheClass(env, "java/nio/ReadWriteDirectByteBuffer");
   org_apache_harmony_dalvik_ddmc_Chunk = CacheClass(env, "org/apache/harmony/dalvik/ddmc/Chunk");
   org_apache_harmony_dalvik_ddmc_DdmServer = CacheClass(env, "org/apache/harmony/dalvik/ddmc/DdmServer");
+}
+
+void WellKnownClasses::Init(JNIEnv* env) {
+  InitClasses(env);
 
   com_android_dex_Dex_create = CacheMethod(env, com_android_dex_Dex, true, "create", "(Ljava/nio/ByteBuffer;)Lcom/android/dex/Dex;");
   java_lang_ClassNotFoundException_init = CacheMethod(env, java_lang_ClassNotFoundException, false, "<init>", "(Ljava/lang/String;Ljava/lang/Throwable;)V");
@@ -161,6 +181,15 @@
   org_apache_harmony_dalvik_ddmc_Chunk_length = CacheField(env, org_apache_harmony_dalvik_ddmc_Chunk, false, "length", "I");
   org_apache_harmony_dalvik_ddmc_Chunk_offset = CacheField(env, org_apache_harmony_dalvik_ddmc_Chunk, false, "offset", "I");
   org_apache_harmony_dalvik_ddmc_Chunk_type = CacheField(env, org_apache_harmony_dalvik_ddmc_Chunk, false, "type", "I");
+
+  java_lang_Boolean_valueOf = CachePrimitiveBoxingMethod(env, 'Z', "java/lang/Boolean");
+  java_lang_Byte_valueOf = CachePrimitiveBoxingMethod(env, 'B', "java/lang/Byte");
+  java_lang_Character_valueOf = CachePrimitiveBoxingMethod(env, 'C', "java/lang/Character");
+  java_lang_Double_valueOf = CachePrimitiveBoxingMethod(env, 'D', "java/lang/Double");
+  java_lang_Float_valueOf = CachePrimitiveBoxingMethod(env, 'F', "java/lang/Float");
+  java_lang_Integer_valueOf = CachePrimitiveBoxingMethod(env, 'I', "java/lang/Integer");
+  java_lang_Long_valueOf = CachePrimitiveBoxingMethod(env, 'J', "java/lang/Long");
+  java_lang_Short_valueOf = CachePrimitiveBoxingMethod(env, 'S', "java/lang/Short");
 }
 
 Class* WellKnownClasses::ToClass(jclass global_jclass) {
diff --git a/src/well_known_classes.h b/src/well_known_classes.h
index d2c4959..15961e2 100644
--- a/src/well_known_classes.h
+++ b/src/well_known_classes.h
@@ -18,6 +18,7 @@
 #define ART_SRC_WELL_KNOWN_CLASSES_H_
 
 #include "jni.h"
+#include "mutex.h"
 
 namespace art {
 
@@ -28,11 +29,14 @@
 // we keep them separate).
 
 struct WellKnownClasses {
+  static void InitClasses(JNIEnv* env);
   static void Init(JNIEnv* env);
 
-  static Class* ToClass(jclass global_jclass);
+  static Class* ToClass(jclass global_jclass)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_);
 
   static jclass com_android_dex_Dex;
+  static jclass dalvik_system_PathClassLoader;
   static jclass java_lang_ClassLoader;
   static jclass java_lang_ClassNotFoundException;
   static jclass java_lang_Daemons;
@@ -51,14 +55,22 @@
   static jclass org_apache_harmony_dalvik_ddmc_DdmServer;
 
   static jmethodID com_android_dex_Dex_create;
+  static jmethodID java_lang_Boolean_valueOf;
+  static jmethodID java_lang_Byte_valueOf;
+  static jmethodID java_lang_Character_valueOf;
   static jmethodID java_lang_ClassLoader_loadClass;
   static jmethodID java_lang_ClassNotFoundException_init;
   static jmethodID java_lang_Daemons_requestGC;
   static jmethodID java_lang_Daemons_requestHeapTrim;
   static jmethodID java_lang_Daemons_start;
+  static jmethodID java_lang_Double_valueOf;
+  static jmethodID java_lang_Float_valueOf;
+  static jmethodID java_lang_Integer_valueOf;
+  static jmethodID java_lang_Long_valueOf;
   static jmethodID java_lang_ref_FinalizerReference_add;
   static jmethodID java_lang_ref_ReferenceQueue_add;
   static jmethodID java_lang_reflect_InvocationHandler_invoke;
+  static jmethodID java_lang_Short_valueOf;
   static jmethodID java_lang_Thread_init;
   static jmethodID java_lang_Thread_run;
   static jmethodID java_lang_Thread$UncaughtExceptionHandler_uncaughtException;
diff --git a/test/ReferenceMap/stack_walk_refmap_jni.cc b/test/ReferenceMap/stack_walk_refmap_jni.cc
index ddda260..0aefa97 100644
--- a/test/ReferenceMap/stack_walk_refmap_jni.cc
+++ b/test/ReferenceMap/stack_walk_refmap_jni.cc
@@ -20,6 +20,7 @@
 #include "class_linker.h"
 #include "object.h"
 #include "object_utils.h"
+#include "scoped_thread_state_change.h"
 #include "thread.h"
 #include "jni.h"
 #include "verifier/gc_map.h"
@@ -42,11 +43,12 @@
 
 struct ReferenceMap2Visitor : public StackVisitor {
   explicit ReferenceMap2Visitor(const ManagedStack* stack,
-                                const std::vector<TraceStackFrame>* trace_stack) :
-    StackVisitor(stack, trace_stack, NULL) {
+                                const std::vector<TraceStackFrame>* trace_stack)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
+      : StackVisitor(stack, trace_stack, NULL) {
   }
 
-  bool VisitFrame() {
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* m = GetMethod();
     if (!m || m->IsNative() || m->IsRuntimeMethod() || IsShadowFrame()) {
       return true;
@@ -62,7 +64,7 @@
 
     // Enable this to dump reference map to LOG(INFO)
     if (false) {
-      ScopedThreadStateChange tsc(Thread::Current(), kRunnable);
+      ScopedObjectAccess ts(Thread::Current());
       art::verifier::MethodVerifier::VerifyMethodAndDump(m);
     }
     const uint8_t* ref_bitmap = NULL;
@@ -281,6 +283,7 @@
 
 extern "C" JNIEXPORT jint JNICALL Java_ReferenceMap_refmap(JNIEnv*, jobject, jint count) {
   // Visitor
+  ScopedObjectAccess ts(Thread::Current());
   ReferenceMap2Visitor mapper(Thread::Current()->GetManagedStack(),
                               Thread::Current()->GetTraceStack());
   mapper.WalkStack();
diff --git a/test/StackWalk/stack_walk_jni.cc b/test/StackWalk/stack_walk_jni.cc
index 9382b8f..41e8d58 100644
--- a/test/StackWalk/stack_walk_jni.cc
+++ b/test/StackWalk/stack_walk_jni.cc
@@ -21,6 +21,7 @@
 #include "object.h"
 #include "object_utils.h"
 #include "jni.h"
+#include "scoped_thread_state_change.h"
 #include "verifier/gc_map.h"
 
 namespace art {
@@ -41,10 +42,11 @@
 struct TestReferenceMapVisitor : public StackVisitor {
   explicit TestReferenceMapVisitor(const ManagedStack* stack,
                                    const std::vector<TraceStackFrame>* trace_stack)
+      SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_)
       : StackVisitor(stack, trace_stack, NULL) {
   }
 
-  bool VisitFrame() {
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(GlobalSynchronization::mutator_lock_) {
     Method* m = GetMethod();
     CHECK(m != NULL);
     LOG(INFO) << "At " << PrettyMethod(m, false);
@@ -100,6 +102,7 @@
 };
 
 extern "C" JNIEXPORT jint JNICALL Java_StackWalk_refmap(JNIEnv*, jobject, jint count) {
+  ScopedObjectAccess ts(Thread::Current());
   CHECK_EQ(count, 0);
   gJava_StackWalk_refmap_calls++;
 
@@ -112,6 +115,7 @@
 }
 
 extern "C" JNIEXPORT jint JNICALL Java_StackWalk2_refmap2(JNIEnv*, jobject, jint count) {
+  ScopedObjectAccess ts(Thread::Current());
   gJava_StackWalk_refmap_calls++;
 
   // Visitor