ART: Fix RequestSynchronousCheckpoint

Fix the known races in the code by requiring more locks on entry.
This helps ensure there is no time in the caller where the thread
can die while requesting the checkpoint.

Fix up GetStackTrace, GetFrameCount and GetFrameLocation.

Bug: 62353392
Test: m test-art-host
Test: art/test/testrunner/testrunner.py -b --host -t 911
Change-Id: Ia0c5e920599b58098dc4b3a3d09c5284045f2947
diff --git a/runtime/openjdkjvmti/ti_stack.cc b/runtime/openjdkjvmti/ti_stack.cc
index 9f4002d..ee89372 100644
--- a/runtime/openjdkjvmti/ti_stack.cc
+++ b/runtime/openjdkjvmti/ti_stack.cc
@@ -205,7 +205,12 @@
   size_t index = 0;
 };
 
-static jvmtiError GetThread(JNIEnv* env, jthread java_thread, art::Thread** thread) {
+static jvmtiError GetThread(JNIEnv* env,
+                            art::ScopedObjectAccessAlreadyRunnable& soa,
+                            jthread java_thread,
+                            art::Thread** thread)
+    REQUIRES_SHARED(art::Locks::mutator_lock_)  // Needed for FromManagedThread.
+    REQUIRES(art::Locks::thread_list_lock_) {   // Needed for FromManagedThread.
   if (java_thread == nullptr) {
     *thread = art::Thread::Current();
     if (*thread == nullptr) {
@@ -220,8 +225,6 @@
     }
 
     // TODO: Need non-aborting call here, to return JVMTI_ERROR_INVALID_THREAD.
-    art::ScopedObjectAccess soa(art::Thread::Current());
-    art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
     *thread = art::Thread::FromManagedThread(soa, java_thread);
     if (*thread == nullptr) {
       return ERR(THREAD_NOT_ALIVE);
@@ -236,8 +239,16 @@
                                     jint max_frame_count,
                                     jvmtiFrameInfo* frame_buffer,
                                     jint* count_ptr) {
+  // It is not great that we have to hold these locks for so long, but it is necessary to ensure
+  // that the thread isn't dying on us.
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+
   art::Thread* thread;
-  jvmtiError thread_error = GetThread(art::Thread::Current()->GetJniEnv(), java_thread, &thread);
+  jvmtiError thread_error = GetThread(art::Thread::Current()->GetJniEnv(),
+                                      soa,
+                                      java_thread,
+                                      &thread);
   if (thread_error != ERR(NONE)) {
     return thread_error;
   }
@@ -675,8 +686,17 @@
 jvmtiError StackUtil::GetFrameCount(jvmtiEnv* env ATTRIBUTE_UNUSED,
                                     jthread java_thread,
                                     jint* count_ptr) {
+  // It is not great that we have to hold these locks for so long, but it is necessary to ensure
+  // that the thread isn't dying on us.
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+
   art::Thread* thread;
-  jvmtiError thread_error = GetThread(art::Thread::Current()->GetJniEnv(), java_thread, &thread);
+  jvmtiError thread_error = GetThread(art::Thread::Current()->GetJniEnv(),
+                                      soa,
+                                      java_thread,
+                                      &thread);
+
   if (thread_error != ERR(NONE)) {
     return thread_error;
   }
@@ -745,8 +765,16 @@
                                        jint depth,
                                        jmethodID* method_ptr,
                                        jlocation* location_ptr) {
+  // It is not great that we have to hold these locks for so long, but it is necessary to ensure
+  // that the thread isn't dying on us.
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+
   art::Thread* thread;
-  jvmtiError thread_error = GetThread(art::Thread::Current()->GetJniEnv(), java_thread, &thread);
+  jvmtiError thread_error = GetThread(art::Thread::Current()->GetJniEnv(),
+                                      soa,
+                                      java_thread,
+                                      &thread);
   if (thread_error != ERR(NONE)) {
     return thread_error;
   }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 4ddf217..9f8c90a 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1402,16 +1402,36 @@
   Barrier barrier_;
 };
 
-void Thread::RequestSynchronousCheckpoint(Closure* function) {
+bool Thread::RequestSynchronousCheckpoint(Closure* function) {
   if (this == Thread::Current()) {
     // Asked to run on this thread. Just run.
     function->Run(this);
-    return;
+    return true;
   }
   Thread* self = Thread::Current();
 
   // The current thread is not this thread.
 
+  if (GetState() == ThreadState::kTerminated) {
+    return false;
+  }
+
+  // Note: we're holding the thread-list lock. The thread cannot die at this point.
+  struct ScopedThreadListLockUnlock {
+    explicit ScopedThreadListLockUnlock(Thread* self_in) RELEASE(*Locks::thread_list_lock_)
+        : self_thread(self_in) {
+      Locks::thread_list_lock_->AssertHeld(self_thread);
+      Locks::thread_list_lock_->Unlock(self_thread);
+    }
+
+    ~ScopedThreadListLockUnlock() ACQUIRE(*Locks::thread_list_lock_) {
+      Locks::thread_list_lock_->AssertNotHeld(self_thread);
+      Locks::thread_list_lock_->Lock(self_thread);
+    }
+
+    Thread* self_thread;
+  };
+
   for (;;) {
     // If this thread is runnable, try to schedule a checkpoint. Do some gymnastics to not hold the
     // suspend-count lock for too long.
@@ -1423,8 +1443,11 @@
         installed = RequestCheckpoint(&barrier_closure);
       }
       if (installed) {
+        // Relinquish the thread-list lock, temporarily. We should not wait holding any locks.
+        ScopedThreadListLockUnlock stllu(self);
+        ScopedThreadSuspension sts(self, ThreadState::kWaiting);
         barrier_closure.Wait(self);
-        return;
+        return true;
       }
       // Fall-through.
     }
@@ -1433,7 +1456,6 @@
     // Note: ModifySuspendCountInternal also expects the thread_list_lock to be held in
     //       certain situations.
     {
-      MutexLock mu(self, *Locks::thread_list_lock_);
       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
 
       if (!ModifySuspendCount(self, +1, nullptr, false)) {
@@ -1443,16 +1465,19 @@
       }
     }
 
-    while (GetState() == ThreadState::kRunnable) {
-      // We became runnable again. Wait till the suspend triggered in ModifySuspendCount
-      // moves us to suspended.
-      sched_yield();
+    {
+      ScopedThreadListLockUnlock stllu(self);
+      ScopedThreadSuspension sts(self, ThreadState::kWaiting);
+      while (GetState() == ThreadState::kRunnable) {
+        // We became runnable again. Wait till the suspend triggered in ModifySuspendCount
+        // moves us to suspended.
+        sched_yield();
+      }
+
+      function->Run(this);
     }
 
-    function->Run(this);
-
     {
-      MutexLock mu(self, *Locks::thread_list_lock_);
       MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
 
       DCHECK_NE(GetState(), ThreadState::kRunnable);
@@ -1460,7 +1485,7 @@
       DCHECK(updated);
     }
 
-    return;  // We're done, break out of the loop.
+    return true;  // We're done, break out of the loop.
   }
 }
 
diff --git a/runtime/thread.h b/runtime/thread.h
index e85ee0d..770173e 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -250,8 +250,10 @@
 
   bool RequestCheckpoint(Closure* function)
       REQUIRES(Locks::thread_suspend_count_lock_);
-  void RequestSynchronousCheckpoint(Closure* function)
-      REQUIRES(!Locks::thread_suspend_count_lock_, !Locks::thread_list_lock_);
+  bool RequestSynchronousCheckpoint(Closure* function)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(Locks::thread_list_lock_)
+      REQUIRES(!Locks::thread_suspend_count_lock_);
   bool RequestEmptyCheckpoint()
       REQUIRES(Locks::thread_suspend_count_lock_);