Remove unneeded ScopedGCCriticalSections from openjdkjvmti.

We used ScopedGCCriticalSections in many parts of the openjdkjvmti
often unnecessarily.

We removed a totally unneeded GCCriticalSection that was acquired when
modifying the instrumentation listeners.

We also removed RequestGCSafeSynchronousCheckpoint and the change to
use GcRoots instead.  We added RequestGCSafeSynchronousCheckpoint as a
way to prevent the GC from running when we are doing some JVMTI
operations on other threads.  This could interact with running GCs in
non-trivial ways, potentially causing deadlocks in some situations.
This changes the code to instead use read-barriers and GcRoots to
ensure that we do not read data from the wrong gc space.

In order for this to work correctly we need to make sure that we are
only ever reading the GcRoots from the thread that eventually needs
the reference. This required some re-writing of the checkpoint
closures since they would often just call AddLocalReference on
non-local Thread objects.

Changes to Thread::RequestSynchronousCheckpoint and art::Barrier were
needed in order to allow this all to work since we needed to ensure
that the requesting thread did not suspend as the checkpoint was being
run.

This is a partial revert of commit 7585b91bfc77b8.

Bug: 67838964
Bug: 76003243

Test: use gapid
Test: ./test.py --host -j50
Test: ./art/tools/run-libjdwp-tests.sh --mode=host

Change-Id: I26d871089829639eccb973cecc315194f7bcf681
diff --git a/runtime/barrier.cc b/runtime/barrier.cc
index 4329a5a..8d3cf45 100644
--- a/runtime/barrier.cc
+++ b/runtime/barrier.cc
@@ -31,6 +31,9 @@
       condition_("GC barrier condition", lock_) {
 }
 
+template void Barrier::Increment<Barrier::kAllowHoldingLocks>(Thread* self, int delta);
+template void Barrier::Increment<Barrier::kDisallowHoldingLocks>(Thread* self, int delta);
+
 void Barrier::Pass(Thread* self) {
   MutexLock mu(self, lock_);
   SetCountLocked(self, count_ - 1);
@@ -45,6 +48,7 @@
   SetCountLocked(self, count);
 }
 
+template <Barrier::LockHandling locks>
 void Barrier::Increment(Thread* self, int delta) {
   MutexLock mu(self, lock_);
   SetCountLocked(self, count_ + delta);
@@ -57,7 +61,11 @@
   // be decremented to zero and a Broadcast will be made on the
   // condition variable, thus waking this up.
   while (count_ != 0) {
-    condition_.Wait(self);
+    if (locks == kAllowHoldingLocks) {
+      condition_.WaitHoldingLocks(self);
+    } else {
+      condition_.Wait(self);
+    }
   }
 }
 
diff --git a/runtime/barrier.h b/runtime/barrier.h
index d7c4661..8a38c4c 100644
--- a/runtime/barrier.h
+++ b/runtime/barrier.h
@@ -35,6 +35,11 @@
 // TODO: Maybe give this a better name.
 class Barrier {
  public:
+  enum LockHandling {
+    kAllowHoldingLocks,
+    kDisallowHoldingLocks,
+  };
+
   explicit Barrier(int count);
   virtual ~Barrier();
 
@@ -50,7 +55,9 @@
   // If these calls are made in that situation, the offending thread is likely to go back
   // to sleep, resulting in a deadlock.
 
-  // Increment the count by delta, wait on condition if count is non zero.
+  // Increment the count by delta, wait on condition if count is non zero.  If LockHandling is
+  // kAllowHoldingLocks we will not check that all locks are released when waiting.
+  template <Barrier::LockHandling locks = kDisallowHoldingLocks>
   void Increment(Thread* self, int delta) REQUIRES(!lock_);
 
   // Increment the count by delta, wait on condition if count is non zero, with a timeout. Returns
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 5b03c2d..c0deda8 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1438,8 +1438,12 @@
     barrier_.Pass(self);
   }
 
-  void Wait(Thread* self) {
-    barrier_.Increment(self, 1);
+  void Wait(Thread* self, ThreadState suspend_state) {
+    if (suspend_state != ThreadState::kRunnable) {
+      barrier_.Increment<Barrier::kDisallowHoldingLocks>(self, 1);
+    } else {
+      barrier_.Increment<Barrier::kAllowHoldingLocks>(self, 1);
+    }
   }
 
  private:
@@ -1448,7 +1452,7 @@
 };
 
 // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution.
-bool Thread::RequestSynchronousCheckpoint(Closure* function) {
+bool Thread::RequestSynchronousCheckpoint(Closure* function, ThreadState suspend_state) {
   Thread* self = Thread::Current();
   if (this == Thread::Current()) {
     Locks::thread_list_lock_->AssertExclusiveHeld(self);
@@ -1496,8 +1500,8 @@
         // Relinquish the thread-list lock. We should not wait holding any locks. We cannot
         // reacquire it since we don't know if 'this' hasn't been deleted yet.
         Locks::thread_list_lock_->ExclusiveUnlock(self);
-        ScopedThreadSuspension sts(self, ThreadState::kWaiting);
-        barrier_closure.Wait(self);
+        ScopedThreadStateChange sts(self, suspend_state);
+        barrier_closure.Wait(self, suspend_state);
         return true;
       }
       // Fall-through.
@@ -1521,7 +1525,7 @@
       // that we can call ModifySuspendCount without racing against ThreadList::Unregister.
       ScopedThreadListLockUnlock stllu(self);
       {
-        ScopedThreadSuspension sts(self, ThreadState::kWaiting);
+        ScopedThreadStateChange sts(self, suspend_state);
         while (GetState() == ThreadState::kRunnable) {
           // We became runnable again. Wait till the suspend triggered in ModifySuspendCount
           // moves us to suspended.
diff --git a/runtime/thread.h b/runtime/thread.h
index 6549fc1..9adae96 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -263,16 +263,31 @@
       WARN_UNUSED
       REQUIRES(Locks::thread_suspend_count_lock_);
 
+  // Requests a checkpoint closure to run on another thread. The closure will be run when the thread
+  // gets suspended. This will return true if the closure was added and will (eventually) be
+  // executed. It returns false otherwise.
+  //
+  // Since multiple closures can be queued and some closures can delay other threads from running no
+  // closure should attempt to suspend another thread while running.
+  // TODO We should add some debug option that verifies this.
   bool RequestCheckpoint(Closure* function)
       REQUIRES(Locks::thread_suspend_count_lock_);
 
   // RequestSynchronousCheckpoint releases the thread_list_lock_ as a part of its execution. This is
   // due to the fact that Thread::Current() needs to go to sleep to allow the targeted thread to
-  // execute the checkpoint for us if it is Runnable.
-  bool RequestSynchronousCheckpoint(Closure* function)
+  // execute the checkpoint for us if it is Runnable. The suspend_state is the state that the thread
+  // will go into while it is awaiting the checkpoint to be run.
+  // NB Passing ThreadState::kRunnable may cause the current thread to wait in a condition variable
+  // while holding the mutator_lock_.  Callers should ensure that this will not cause any problems
+  // for the closure or the rest of the system.
+  // NB Since multiple closures can be queued and some closures can delay other threads from running
+  // no closure should attempt to suspend another thread while running.
+  bool RequestSynchronousCheckpoint(Closure* function,
+                                    ThreadState suspend_state = ThreadState::kWaiting)
       REQUIRES_SHARED(Locks::mutator_lock_)
       RELEASE(Locks::thread_list_lock_)
       REQUIRES(!Locks::thread_suspend_count_lock_);
+
   bool RequestEmptyCheckpoint()
       REQUIRES(Locks::thread_suspend_count_lock_);