Remove unneeded ScopedGCCriticalSections from openjdkjvmti.

We used ScopedGCCriticalSections in many parts of the openjdkjvmti
often unnecessarily.

We removed a totally unneeded GCCriticalSection that was acquired when
modifying the instrumentation listeners.

We also removed RequestGCSafeSynchronousCheckpoint and the change to
use GcRoots instead.  We added RequestGCSafeSynchronousCheckpoint as a
way to prevent the GC from running when we are doing some JVMTI
operations on other threads.  This could interact with running GCs in
non-trivial ways, potentially causing deadlocks in some situations.
This changes the code to instead use read-barriers and GcRoots to
ensure that we do not read data from the wrong gc space.

In order for this to work correctly we need to make sure that we are
only ever reading the GcRoots from the thread that eventually needs
the reference. This required some re-writing of the checkpoint
closures since they would often just call AddLocalReference on
non-local Thread objects.

Changes to Thread::RequestSynchronousCheckpoint and art::Barrier were
needed in order to allow this all to work since we needed to ensure
that the requesting thread did not suspend as the checkpoint was being
run.

This is a partial revert of commit 7585b91bfc77b8.

Bug: 67838964
Bug: 76003243

Test: use gapid
Test: ./test.py --host -j50
Test: ./art/tools/run-libjdwp-tests.sh --mode=host

Change-Id: I26d871089829639eccb973cecc315194f7bcf681
diff --git a/openjdkjvmti/events.cc b/openjdkjvmti/events.cc
index 07b1529..de67871 100644
--- a/openjdkjvmti/events.cc
+++ b/openjdkjvmti/events.cc
@@ -940,9 +940,6 @@
   }
   art::ScopedThreadStateChange stsc(art::Thread::Current(), art::ThreadState::kNative);
   art::instrumentation::Instrumentation* instr = art::Runtime::Current()->GetInstrumentation();
-  art::gc::ScopedGCCriticalSection gcs(art::Thread::Current(),
-                                       art::gc::kGcCauseInstrumentation,
-                                       art::gc::kCollectorTypeInstrumentation);
   art::ScopedSuspendAll ssa("jvmti method tracing installation");
   if (enable) {
     instr->AddListener(listener, new_events);
diff --git a/openjdkjvmti/ti_method.cc b/openjdkjvmti/ti_method.cc
index 83d64ef..bf2e6cd 100644
--- a/openjdkjvmti/ti_method.cc
+++ b/openjdkjvmti/ti_method.cc
@@ -42,6 +42,7 @@
 #include "dex/dex_file_types.h"
 #include "dex/modifiers.h"
 #include "events-inl.h"
+#include "gc_root-inl.h"
 #include "jit/jit.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
@@ -546,13 +547,12 @@
 
 class CommonLocalVariableClosure : public art::Closure {
  public:
-  CommonLocalVariableClosure(art::Thread* caller,
-                             jint depth,
-                             jint slot)
-      : result_(ERR(INTERNAL)), caller_(caller), depth_(depth), slot_(slot) {}
+  CommonLocalVariableClosure(jint depth, jint slot)
+      : result_(ERR(INTERNAL)), depth_(depth), slot_(slot) {}
 
   void Run(art::Thread* self) OVERRIDE REQUIRES(art::Locks::mutator_lock_) {
     art::Locks::mutator_lock_->AssertSharedHeld(art::Thread::Current());
+    art::ScopedAssertNoThreadSuspension sants("CommonLocalVariableClosure::Run");
     std::unique_ptr<art::Context> context(art::Context::Create());
     FindFrameAtDepthVisitor visitor(self, context.get(), depth_);
     visitor.WalkStack();
@@ -597,17 +597,17 @@
     }
   }
 
-  jvmtiError GetResult() const {
+  virtual jvmtiError GetResult() {
     return result_;
   }
 
  protected:
   virtual jvmtiError Execute(art::ArtMethod* method, art::StackVisitor& visitor)
-      REQUIRES(art::Locks::mutator_lock_) = 0;
+      REQUIRES_SHARED(art::Locks::mutator_lock_) = 0;
   virtual jvmtiError GetTypeError(art::ArtMethod* method,
                                   art::Primitive::Type type,
                                   const std::string& descriptor)
-      REQUIRES(art::Locks::mutator_lock_)  = 0;
+      REQUIRES_SHARED(art::Locks::mutator_lock_)  = 0;
 
   jvmtiError GetSlotType(art::ArtMethod* method,
                          uint32_t dex_pc,
@@ -674,25 +674,35 @@
   }
 
   jvmtiError result_;
-  art::Thread* caller_;
   jint depth_;
   jint slot_;
 };
 
 class GetLocalVariableClosure : public CommonLocalVariableClosure {
  public:
-  GetLocalVariableClosure(art::Thread* caller,
-                          jint depth,
+  GetLocalVariableClosure(jint depth,
                           jint slot,
                           art::Primitive::Type type,
                           jvalue* val)
-      : CommonLocalVariableClosure(caller, depth, slot), type_(type), val_(val) {}
+      : CommonLocalVariableClosure(depth, slot),
+        type_(type),
+        val_(val),
+        obj_val_(nullptr) {}
+
+  virtual jvmtiError GetResult() REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (result_ == OK && type_ == art::Primitive::kPrimNot) {
+      val_->l = obj_val_.IsNull()
+          ? nullptr
+          : art::Thread::Current()->GetJniEnv()->AddLocalReference<jobject>(obj_val_.Read());
+    }
+    return CommonLocalVariableClosure::GetResult();
+  }
 
  protected:
   jvmtiError GetTypeError(art::ArtMethod* method ATTRIBUTE_UNUSED,
                           art::Primitive::Type slot_type,
                           const std::string& descriptor ATTRIBUTE_UNUSED)
-      OVERRIDE REQUIRES(art::Locks::mutator_lock_) {
+      OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
     switch (slot_type) {
       case art::Primitive::kPrimByte:
       case art::Primitive::kPrimChar:
@@ -712,7 +722,7 @@
   }
 
   jvmtiError Execute(art::ArtMethod* method, art::StackVisitor& visitor)
-      OVERRIDE REQUIRES(art::Locks::mutator_lock_) {
+      OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
     switch (type_) {
       case art::Primitive::kPrimNot: {
         uint32_t ptr_val;
@@ -722,8 +732,8 @@
                              &ptr_val)) {
           return ERR(OPAQUE_FRAME);
         }
-        art::ObjPtr<art::mirror::Object> obj(reinterpret_cast<art::mirror::Object*>(ptr_val));
-        val_->l = obj.IsNull() ? nullptr : caller_->GetJniEnv()->AddLocalReference<jobject>(obj);
+        obj_val_ = art::GcRoot<art::mirror::Object>(
+            reinterpret_cast<art::mirror::Object*>(ptr_val));
         break;
       }
       case art::Primitive::kPrimInt:
@@ -760,6 +770,7 @@
  private:
   art::Primitive::Type type_;
   jvalue* val_;
+  art::GcRoot<art::mirror::Object> obj_val_;
 };
 
 jvmtiError MethodUtil::GetLocalVariableGeneric(jvmtiEnv* env ATTRIBUTE_UNUSED,
@@ -782,9 +793,12 @@
     art::Locks::thread_list_lock_->ExclusiveUnlock(self);
     return err;
   }
-  GetLocalVariableClosure c(self, depth, slot, type, val);
-  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(target, &c)) {
+  art::ScopedAssertNoThreadSuspension sants("Performing GetLocalVariable");
+  GetLocalVariableClosure c(depth, slot, type, val);
+  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.  We
+  // need to avoid suspending as we wait for the checkpoint to occur since we are (potentially)
+  // transfering a GcRoot across threads.
+  if (!target->RequestSynchronousCheckpoint(&c, art::ThreadState::kRunnable)) {
     return ERR(THREAD_NOT_ALIVE);
   } else {
     return c.GetResult();
@@ -798,13 +812,13 @@
                           jint slot,
                           art::Primitive::Type type,
                           jvalue val)
-      : CommonLocalVariableClosure(caller, depth, slot), type_(type), val_(val) {}
+      : CommonLocalVariableClosure(depth, slot), caller_(caller), type_(type), val_(val) {}
 
  protected:
   jvmtiError GetTypeError(art::ArtMethod* method,
                           art::Primitive::Type slot_type,
                           const std::string& descriptor)
-      OVERRIDE REQUIRES(art::Locks::mutator_lock_) {
+      OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
     switch (slot_type) {
       case art::Primitive::kPrimNot: {
         if (type_ != art::Primitive::kPrimNot) {
@@ -840,7 +854,7 @@
   }
 
   jvmtiError Execute(art::ArtMethod* method, art::StackVisitor& visitor)
-      OVERRIDE REQUIRES(art::Locks::mutator_lock_) {
+      OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
     switch (type_) {
       case art::Primitive::kPrimNot: {
         uint32_t ptr_val;
@@ -887,6 +901,7 @@
   }
 
  private:
+  art::Thread* caller_;
   art::Primitive::Type type_;
   jvalue val_;
 };
@@ -913,7 +928,7 @@
   }
   SetLocalVariableClosure c(self, depth, slot, type, val);
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(target, &c)) {
+  if (!target->RequestSynchronousCheckpoint(&c)) {
     return ERR(THREAD_NOT_ALIVE);
   } else {
     return c.GetResult();
@@ -922,13 +937,13 @@
 
 class GetLocalInstanceClosure : public art::Closure {
  public:
-  GetLocalInstanceClosure(art::Thread* caller, jint depth, jobject* val)
+  explicit GetLocalInstanceClosure(jint depth)
       : result_(ERR(INTERNAL)),
-        caller_(caller),
         depth_(depth),
-        val_(val) {}
+        val_(nullptr) {}
 
   void Run(art::Thread* self) OVERRIDE REQUIRES(art::Locks::mutator_lock_) {
+    art::ScopedAssertNoThreadSuspension sants("GetLocalInstanceClosure::Run");
     art::Locks::mutator_lock_->AssertSharedHeld(art::Thread::Current());
     std::unique_ptr<art::Context> context(art::Context::Create());
     FindFrameAtDepthVisitor visitor(self, context.get(), depth_);
@@ -939,19 +954,22 @@
       return;
     }
     result_ = OK;
-    art::ObjPtr<art::mirror::Object> obj = visitor.GetThisObject();
-    *val_ = obj.IsNull() ? nullptr : caller_->GetJniEnv()->AddLocalReference<jobject>(obj);
+    val_ = art::GcRoot<art::mirror::Object>(visitor.GetThisObject());
   }
 
-  jvmtiError GetResult() const {
+  jvmtiError GetResult(jobject* data_out) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (result_ == OK) {
+      *data_out = val_.IsNull()
+          ? nullptr
+          : art::Thread::Current()->GetJniEnv()->AddLocalReference<jobject>(val_.Read());
+    }
     return result_;
   }
 
  private:
   jvmtiError result_;
-  art::Thread* caller_;
   jint depth_;
-  jobject* val_;
+  art::GcRoot<art::mirror::Object> val_;
 };
 
 jvmtiError MethodUtil::GetLocalInstance(jvmtiEnv* env ATTRIBUTE_UNUSED,
@@ -970,12 +988,15 @@
     art::Locks::thread_list_lock_->ExclusiveUnlock(self);
     return err;
   }
-  GetLocalInstanceClosure c(self, depth, data);
-  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(target, &c)) {
+  art::ScopedAssertNoThreadSuspension sants("Performing GetLocalInstance");
+  GetLocalInstanceClosure c(depth);
+  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.  We
+  // need to avoid suspending as we wait for the checkpoint to occur since we are (potentially)
+  // transfering a GcRoot across threads.
+  if (!target->RequestSynchronousCheckpoint(&c, art::ThreadState::kRunnable)) {
     return ERR(THREAD_NOT_ALIVE);
   } else {
-    return c.GetResult();
+    return c.GetResult(data);
   }
 }
 
diff --git a/openjdkjvmti/ti_monitor.cc b/openjdkjvmti/ti_monitor.cc
index 94408ba..1cfc64a 100644
--- a/openjdkjvmti/ti_monitor.cc
+++ b/openjdkjvmti/ti_monitor.cc
@@ -37,6 +37,7 @@
 #include <mutex>
 
 #include "art_jvmti.h"
+#include "gc_root-inl.h"
 #include "monitor.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
@@ -351,19 +352,17 @@
   }
   struct GetContendedMonitorClosure : public art::Closure {
    public:
-    explicit GetContendedMonitorClosure(art::Thread* current, jobject* out)
-        : result_thread_(current), out_(out) {}
+    GetContendedMonitorClosure() : out_(nullptr) {}
 
     void Run(art::Thread* target_thread) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      art::ScopedAssertNoThreadSuspension sants("GetContendedMonitorClosure::Run");
       switch (target_thread->GetState()) {
         // These three we are actually currently waiting on a monitor and have sent the appropriate
         // events (if anyone is listening).
         case art::kBlocked:
         case art::kTimedWaiting:
         case art::kWaiting: {
-          art::mirror::Object* mon = art::Monitor::GetContendedMonitor(target_thread);
-          *out_ = (mon == nullptr) ? nullptr
-                                   : result_thread_->GetJniEnv()->AddLocalReference<jobject>(mon);
+          out_ = art::GcRoot<art::mirror::Object>(art::Monitor::GetContendedMonitor(target_thread));
           return;
         }
         case art::kTerminated:
@@ -390,22 +389,30 @@
         case art::kStarting:
         case art::kNative:
         case art::kSuspended: {
-          // We aren't currently (explicitly) waiting for a monitor anything so just return null.
-          *out_ = nullptr;
+          // We aren't currently (explicitly) waiting for a monitor so just return null.
           return;
         }
       }
     }
 
+    jobject GetResult() REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      return out_.IsNull()
+          ? nullptr
+          : art::Thread::Current()->GetJniEnv()->AddLocalReference<jobject>(out_.Read());
+    }
+
    private:
-    art::Thread* result_thread_;
-    jobject* out_;
+    art::GcRoot<art::mirror::Object> out_;
   };
-  GetContendedMonitorClosure closure(self, monitor);
-  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(target, &closure)) {
+  art::ScopedAssertNoThreadSuspension sants("Performing GetCurrentContendedMonitor");
+  GetContendedMonitorClosure closure;
+  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.  We
+  // need to avoid suspending as we wait for the checkpoint to occur since we are (potentially)
+  // transfering a GcRoot across threads.
+  if (!target->RequestSynchronousCheckpoint(&closure, art::ThreadState::kRunnable)) {
     return ERR(THREAD_NOT_ALIVE);
   }
+  *monitor = closure.GetResult();
   return OK;
 }
 
diff --git a/openjdkjvmti/ti_stack.cc b/openjdkjvmti/ti_stack.cc
index 373944f..3936f7a 100644
--- a/openjdkjvmti/ti_stack.cc
+++ b/openjdkjvmti/ti_stack.cc
@@ -258,7 +258,7 @@
                                        static_cast<size_t>(start_depth),
                                        static_cast<size_t>(max_frame_count));
     // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-    if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(thread, &closure)) {
+    if (!thread->RequestSynchronousCheckpoint(&closure)) {
       return ERR(THREAD_NOT_ALIVE);
     }
     *count_ptr = static_cast<jint>(closure.index);
@@ -269,7 +269,7 @@
   } else {
     GetStackTraceVectorClosure closure(0, 0);
     // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-    if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(thread, &closure)) {
+    if (!thread->RequestSynchronousCheckpoint(&closure)) {
       return ERR(THREAD_NOT_ALIVE);
     }
 
@@ -713,7 +713,7 @@
 
   GetFrameCountClosure closure;
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(thread, &closure)) {
+  if (!thread->RequestSynchronousCheckpoint(&closure)) {
     return ERR(THREAD_NOT_ALIVE);
   }
 
@@ -803,7 +803,7 @@
 
   GetLocationClosure closure(static_cast<size_t>(depth));
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(thread, &closure)) {
+  if (!thread->RequestSynchronousCheckpoint(&closure)) {
     return ERR(THREAD_NOT_ALIVE);
   }
 
@@ -882,8 +882,8 @@
 template<typename Fn>
 struct MonitorInfoClosure : public art::Closure {
  public:
-  MonitorInfoClosure(art::ScopedObjectAccess& soa, Fn handle_results)
-      : soa_(soa), err_(OK), handle_results_(handle_results) {}
+  explicit MonitorInfoClosure(Fn handle_results)
+      : err_(OK), handle_results_(handle_results) {}
 
   void Run(art::Thread* target) OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
     art::Locks::mutator_lock_->AssertSharedHeld(art::Thread::Current());
@@ -893,7 +893,7 @@
     // Find any other monitors, including ones acquired in native code.
     art::RootInfo root_info(art::kRootVMInternal);
     target->GetJniEnv()->VisitMonitorRoots(&visitor, root_info);
-    err_ = handle_results_(soa_, visitor);
+    err_ = handle_results_(visitor);
   }
 
   jvmtiError GetError() {
@@ -901,17 +901,18 @@
   }
 
  private:
-  art::ScopedObjectAccess& soa_;
   jvmtiError err_;
   Fn handle_results_;
 };
 
 
 template <typename Fn>
-static jvmtiError GetOwnedMonitorInfoCommon(jthread thread, Fn handle_results) {
+static jvmtiError GetOwnedMonitorInfoCommon(const art::ScopedObjectAccessAlreadyRunnable& soa,
+                                            jthread thread,
+                                            Fn handle_results)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
   art::Thread* self = art::Thread::Current();
-  art::ScopedObjectAccess soa(self);
-  MonitorInfoClosure<Fn> closure(soa, handle_results);
+  MonitorInfoClosure<Fn> closure(handle_results);
   bool called_method = false;
   {
     art::Locks::thread_list_lock_->ExclusiveLock(self);
@@ -924,7 +925,7 @@
     if (target != self) {
       called_method = true;
       // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-      if (!ThreadUtil::RequestGCSafeSynchronousCheckpoint(target, &closure)) {
+      if (!target->RequestSynchronousCheckpoint(&closure)) {
         return ERR(THREAD_NOT_ALIVE);
       }
     } else {
@@ -948,23 +949,33 @@
   if (info_cnt == nullptr || info_ptr == nullptr) {
     return ERR(NULL_POINTER);
   }
-  auto handle_fun = [&] (art::ScopedObjectAccess& soa, MonitorVisitor& visitor)
-      REQUIRES_SHARED(art::Locks::mutator_lock_) {
-    auto nbytes = sizeof(jvmtiMonitorStackDepthInfo) * visitor.monitors.size();
-    jvmtiError err = env->Allocate(nbytes, reinterpret_cast<unsigned char**>(info_ptr));
-    if (err != OK) {
-      return err;
-    }
-    *info_cnt = visitor.monitors.size();
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  std::vector<art::GcRoot<art::mirror::Object>> mons;
+  std::vector<uint32_t> depths;
+  auto handle_fun = [&] (MonitorVisitor& visitor) REQUIRES_SHARED(art::Locks::mutator_lock_) {
     for (size_t i = 0; i < visitor.monitors.size(); i++) {
-      (*info_ptr)[i] = {
-        soa.Env()->AddLocalReference<jobject>(visitor.monitors[i].Get()),
-        visitor.stack_depths[i]
-      };
+      mons.push_back(art::GcRoot<art::mirror::Object>(visitor.monitors[i].Get()));
+      depths.push_back(visitor.stack_depths[i]);
     }
     return OK;
   };
-  return GetOwnedMonitorInfoCommon(thread, handle_fun);
+  jvmtiError err = GetOwnedMonitorInfoCommon(soa, thread, handle_fun);
+  if (err != OK) {
+    return err;
+  }
+  auto nbytes = sizeof(jvmtiMonitorStackDepthInfo) * mons.size();
+  err = env->Allocate(nbytes, reinterpret_cast<unsigned char**>(info_ptr));
+  if (err != OK) {
+    return err;
+  }
+  *info_cnt = mons.size();
+  for (uint32_t i = 0; i < mons.size(); i++) {
+    (*info_ptr)[i] = {
+      soa.AddLocalReference<jobject>(mons[i].Read()),
+      static_cast<jint>(depths[i])
+    };
+  }
+  return err;
 }
 
 jvmtiError StackUtil::GetOwnedMonitorInfo(jvmtiEnv* env,
@@ -974,21 +985,28 @@
   if (owned_monitors_ptr == nullptr || owned_monitors_ptr == nullptr) {
     return ERR(NULL_POINTER);
   }
-  auto handle_fun = [&] (art::ScopedObjectAccess& soa, MonitorVisitor& visitor)
-      REQUIRES_SHARED(art::Locks::mutator_lock_) {
-    auto nbytes = sizeof(jobject) * visitor.monitors.size();
-    jvmtiError err = env->Allocate(nbytes, reinterpret_cast<unsigned char**>(owned_monitors_ptr));
-    if (err != OK) {
-      return err;
-    }
-    *owned_monitor_count_ptr = visitor.monitors.size();
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  std::vector<art::GcRoot<art::mirror::Object>> mons;
+  auto handle_fun = [&] (MonitorVisitor& visitor) REQUIRES_SHARED(art::Locks::mutator_lock_) {
     for (size_t i = 0; i < visitor.monitors.size(); i++) {
-      (*owned_monitors_ptr)[i] =
-          soa.Env()->AddLocalReference<jobject>(visitor.monitors[i].Get());
+      mons.push_back(art::GcRoot<art::mirror::Object>(visitor.monitors[i].Get()));
     }
     return OK;
   };
-  return GetOwnedMonitorInfoCommon(thread, handle_fun);
+  jvmtiError err = GetOwnedMonitorInfoCommon(soa, thread, handle_fun);
+  if (err != OK) {
+    return err;
+  }
+  auto nbytes = sizeof(jobject) * mons.size();
+  err = env->Allocate(nbytes, reinterpret_cast<unsigned char**>(owned_monitors_ptr));
+  if (err != OK) {
+    return err;
+  }
+  *owned_monitor_count_ptr = mons.size();
+  for (uint32_t i = 0; i < mons.size(); i++) {
+    (*owned_monitors_ptr)[i] = soa.AddLocalReference<jobject>(mons[i].Read());
+  }
+  return err;
 }
 
 jvmtiError StackUtil::NotifyFramePop(jvmtiEnv* env, jthread thread, jint depth) {
diff --git a/openjdkjvmti/ti_thread.cc b/openjdkjvmti/ti_thread.cc
index 555c5a7..414139c 100644
--- a/openjdkjvmti/ti_thread.cc
+++ b/openjdkjvmti/ti_thread.cc
@@ -1077,7 +1077,7 @@
   };
   StopThreadClosure c(exc);
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-  if (RequestGCSafeSynchronousCheckpoint(target, &c)) {
+  if (target->RequestSynchronousCheckpoint(&c)) {
     return OK;
   } else {
     // Something went wrong, probably the thread died.
@@ -1100,29 +1100,4 @@
   return OK;
 }
 
-class GcCriticalSectionClosure : public art::Closure {
- public:
-  explicit GcCriticalSectionClosure(art::Closure* wrapped) : wrapped_(wrapped) {}
-
-  void Run(art::Thread* self) OVERRIDE {
-    if (art::kIsDebugBuild) {
-      art::Locks::thread_list_lock_->AssertNotHeld(art::Thread::Current());
-    }
-    // This might block as it waits for any in-progress GCs to finish but this is fine since we
-    // released the Thread-list-lock prior to calling this in RequestSynchronousCheckpoint.
-    art::gc::ScopedGCCriticalSection sgccs(art::Thread::Current(),
-                                           art::gc::kGcCauseDebugger,
-                                           art::gc::kCollectorTypeDebugger);
-    wrapped_->Run(self);
-  }
-
- private:
-  art::Closure* wrapped_;
-};
-
-bool ThreadUtil::RequestGCSafeSynchronousCheckpoint(art::Thread* thr, art::Closure* function) {
-  GcCriticalSectionClosure gccsc(function);
-  return thr->RequestSynchronousCheckpoint(&gccsc);
-}
-
 }  // namespace openjdkjvmti
diff --git a/openjdkjvmti/ti_thread.h b/openjdkjvmti/ti_thread.h
index 341bffe..c6b6af1 100644
--- a/openjdkjvmti/ti_thread.h
+++ b/openjdkjvmti/ti_thread.h
@@ -134,16 +134,6 @@
     REQUIRES(!art::Locks::user_code_suspension_lock_,
              !art::Locks::thread_suspend_count_lock_);
 
-  // This will request a synchronous checkpoint in such a way as to prevent gc races if a local
-  // variable is taken from one thread's stack and placed in the stack of another thread.
-  // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution. This is
-  // due to the fact that Thread::Current() needs to go to sleep to allow the targeted thread to
-  // execute the checkpoint for us if it is Runnable.
-  static bool RequestGCSafeSynchronousCheckpoint(art::Thread* thr, art::Closure* function)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      RELEASE(art::Locks::thread_list_lock_)
-      REQUIRES(!art::Locks::thread_suspend_count_lock_);
-
  private:
   // We need to make sure only one thread tries to suspend threads at a time so we can get the
   // 'suspend-only-once' behavior the spec requires. Internally, ART considers suspension to be a
diff --git a/runtime/barrier.cc b/runtime/barrier.cc
index 4329a5a..8d3cf45 100644
--- a/runtime/barrier.cc
+++ b/runtime/barrier.cc
@@ -31,6 +31,9 @@
       condition_("GC barrier condition", lock_) {
 }
 
+template void Barrier::Increment<Barrier::kAllowHoldingLocks>(Thread* self, int delta);
+template void Barrier::Increment<Barrier::kDisallowHoldingLocks>(Thread* self, int delta);
+
 void Barrier::Pass(Thread* self) {
   MutexLock mu(self, lock_);
   SetCountLocked(self, count_ - 1);
@@ -45,6 +48,7 @@
   SetCountLocked(self, count);
 }
 
+template <Barrier::LockHandling locks>
 void Barrier::Increment(Thread* self, int delta) {
   MutexLock mu(self, lock_);
   SetCountLocked(self, count_ + delta);
@@ -57,7 +61,11 @@
   // be decremented to zero and a Broadcast will be made on the
   // condition variable, thus waking this up.
   while (count_ != 0) {
-    condition_.Wait(self);
+    if (locks == kAllowHoldingLocks) {
+      condition_.WaitHoldingLocks(self);
+    } else {
+      condition_.Wait(self);
+    }
   }
 }
 
diff --git a/runtime/barrier.h b/runtime/barrier.h
index d7c4661..8a38c4c 100644
--- a/runtime/barrier.h
+++ b/runtime/barrier.h
@@ -35,6 +35,11 @@
 // TODO: Maybe give this a better name.
 class Barrier {
  public:
+  enum LockHandling {
+    kAllowHoldingLocks,
+    kDisallowHoldingLocks,
+  };
+
   explicit Barrier(int count);
   virtual ~Barrier();
 
@@ -50,7 +55,9 @@
   // If these calls are made in that situation, the offending thread is likely to go back
   // to sleep, resulting in a deadlock.
 
-  // Increment the count by delta, wait on condition if count is non zero.
+  // Increment the count by delta, wait on condition if count is non zero.  If LockHandling is
+  // kAllowHoldingLocks we will not check that all locks are released when waiting.
+  template <Barrier::LockHandling locks = kDisallowHoldingLocks>
   void Increment(Thread* self, int delta) REQUIRES(!lock_);
 
   // Increment the count by delta, wait on condition if count is non zero, with a timeout. Returns
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 5b03c2d..c0deda8 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1438,8 +1438,12 @@
     barrier_.Pass(self);
   }
 
-  void Wait(Thread* self) {
-    barrier_.Increment(self, 1);
+  void Wait(Thread* self, ThreadState suspend_state) {
+    if (suspend_state != ThreadState::kRunnable) {
+      barrier_.Increment<Barrier::kDisallowHoldingLocks>(self, 1);
+    } else {
+      barrier_.Increment<Barrier::kAllowHoldingLocks>(self, 1);
+    }
   }
 
  private:
@@ -1448,7 +1452,7 @@
 };
 
 // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-bool Thread::RequestSynchronousCheckpoint(Closure* function) {
+bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState suspend_state) {
   Thread* self = Thread::Current();
   if (this == Thread::Current()) {
     Locks::thread_list_lock_->AssertExclusiveHeld(self);
@@ -1496,8 +1500,8 @@
         // Relinquish the thread-list lock. We should not wait holding any locks. We cannot
         // reacquire it since we don't know if 'this' hasn't been deleted yet.
         Locks::thread_list_lock_->ExclusiveUnlock(self);
-        ScopedThreadSuspension sts(self, ThreadState::kWaiting);
-        barrier_closure.Wait(self);
+        ScopedThreadStateChange sts(self, suspend_state);
+        barrier_closure.Wait(self, suspend_state);
         return true;
       }
       // Fall-through.
@@ -1521,7 +1525,7 @@
       // that we can call ModifySuspendCount without racing against ThreadList::Unregister.
       ScopedThreadListLockUnlock stllu(self);
       {
-        ScopedThreadSuspension sts(self, ThreadState::kWaiting);
+        ScopedThreadStateChange sts(self, suspend_state);
         while (GetState() == ThreadState::kRunnable) {
           // We became runnable again. Wait till the suspend triggered in ModifySuspendCount
           // moves us to suspended.
diff --git a/runtime/thread.h b/runtime/thread.h
index 6549fc1..9adae96 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -263,16 +263,31 @@
       WARN_UNUSED
       REQUIRES(Locks::thread_suspend_count_lock_);
 
+  // Requests a checkpoint closure to run on another thread. The closure will be run when the thread
+  // gets suspended. This will return true if the closure was added and will (eventually) be
+  // executed. It returns false otherwise.
+  //
+  // Since multiple closures can be queued and some closures can delay other threads from running no
+  // closure should attempt to suspend another thread while running.
+  // TODO We should add some debug option that verifies this.
   bool RequestCheckpoint(Closure* function)
       REQUIRES(Locks::thread_suspend_count_lock_);
 
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution. This is
   // due to the fact that Thread::Current() needs to go to sleep to allow the targeted thread to
-  // execute the checkpoint for us if it is Runnable.
-  bool RequestSynchronousCheckpoint(Closure* function)
+  // execute the checkpoint for us if it is Runnable. The suspend_state is the state that the thread
+  // will go into while it is awaiting the checkpoint to be run.
+  // NB Passing ThreadState::kRunnable may cause the current thread to wait in a condition variable
+  // while holding the mutator_lock_.  Callers should ensure that this will not cause any problems
+  // for the closure or the rest of the system.
+  // NB Since multiple closures can be queued and some closures can delay other threads from running
+  // no closure should attempt to suspend another thread while running.
+  bool RequestSynchronousCheckpoint(Closure* function,
+                                    ThreadState suspend_state = ThreadState::kWaiting)
       REQUIRES_SHARED(Locks::mutator_lock_)
       RELEASE(Locks::thread_list_lock_)
       REQUIRES(!Locks::thread_suspend_count_lock_);
+
   bool RequestEmptyCheckpoint()
       REQUIRES(Locks::thread_suspend_count_lock_);