Selective deoptimization.

Update the instrumentation to allow selective deoptimization.

Separate instrumentation listener registration from stubs configuration. A
listener is now responsible for configuring the appropriate stubs.
- The method tracing listener installs instrumentation entry/exit stubs or
the interpreter depending on the accuracy of events we want (controlled by
kDeoptimizeForAccurateMethodEntryExitListeners).
- The debugger registers itself as an instrumentation listener but does not
modify methods entrypoints. It only does this on demand when deoptimizing one
method or all the methods.

The selective deoptimization is used for breakpoint only. When a breakpoint is
requested, the debugger deoptimizes this method by setting its entrypoint to
the interpreter stub. As several breakpoints can be set on the same method, we
deoptimize only once. When the last breakpoint on a method is removed, we
reoptimize it by restoring the original entrypoints.

The full deoptimization is used for method entry, method exit and single-step
events. When one of these events is requested, we force eveything to run with
the interpreter (except native and proxy methods). When the last of these
events is removed, we restore all methods entrypoints except those which are
currently deoptimized.

Deoptimizing a method requires all mutator threads be suspended in order to
walk each thread's stack and ensure no code is actually executing while we
modify methods entrypoints. Suspending all the threads requires to not hold
any lock.
In the debugger, we deoptimize/undeoptimize when the JDWP event list changes
(add or remove a breakpoint for instance). During the update, we need to hold
the JDWP event list lock. This means we cannot suspend all the threads at this
time.
In order to deal with these constraints, we support a queue of deoptimization
requests. When an event needs selective/full deoptimization/undeoptimization,
we save its request in the queue. Once we release the JDWP event list lock, we
suspend all the threads, process this queue and finally resume all the threads.
This is done in Dbg::ManageDeoptimization. Note: threads already suspended
before doing this remain suspended so we don't "break" debugger suspensions.

When we deoptimize one method or every method, we need to browse each thread's
stack to install instrumentation exit PC as return PC and save information in
the instrumentation stack frame. Now we can deoptimize multiple times during
the execution of an application, we need to preserve exisiting instrumentation
frames (which is the result of a previous deoptimization). This require to push
new instrumentation frames before existing ones so we don't corrupt the
instrumentation stack frame while walking the stack.

Bug: 11538162
Change-Id: I477142df17edf2dab8ac5d879daacc5c08a67c39
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index bcf7267..4ea1366 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -176,14 +176,27 @@
 static size_t gAllocRecordHead GUARDED_BY(gAllocTrackerLock) = 0;
 static size_t gAllocRecordCount GUARDED_BY(gAllocTrackerLock) = 0;
 
-// Breakpoints and single-stepping.
+// Deoptimization support.
+struct MethodInstrumentationRequest {
+  bool deoptimize;
+
+  // Method for selective deoptimization. NULL means full deoptimization.
+  mirror::ArtMethod* method;
+
+  MethodInstrumentationRequest(bool deoptimize, mirror::ArtMethod* method)
+    : deoptimize(deoptimize), method(method) {}
+};
+// TODO we need to visit associated methods as roots.
+static std::vector<MethodInstrumentationRequest> gDeoptimizationRequests GUARDED_BY(Locks::deoptimization_lock_);
+
+// Breakpoints.
 static std::vector<Breakpoint> gBreakpoints GUARDED_BY(Locks::breakpoint_lock_);
 
 static bool IsBreakpoint(const mirror::ArtMethod* m, uint32_t dex_pc)
     LOCKS_EXCLUDED(Locks::breakpoint_lock_)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   MutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
-  for (size_t i = 0; i < gBreakpoints.size(); ++i) {
+  for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
     if (gBreakpoints[i].method == m && gBreakpoints[i].dex_pc == dex_pc) {
       VLOG(jdwp) << "Hit breakpoint #" << i << ": " << gBreakpoints[i];
       return true;
@@ -520,11 +533,17 @@
     CHECK_EQ(gBreakpoints.size(), 0U);
   }
 
+  {
+    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+    CHECK_EQ(gDeoptimizationRequests.size(), 0U);
+  }
+
   Runtime* runtime = Runtime::Current();
   runtime->GetThreadList()->SuspendAll();
   Thread* self = Thread::Current();
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
   CHECK_NE(old_state, kRunnable);
+  runtime->GetInstrumentation()->EnableDeoptimization();
   runtime->GetInstrumentation()->AddListener(&gDebugInstrumentationListener,
                                              instrumentation::Instrumentation::kMethodEntered |
                                              instrumentation::Instrumentation::kMethodExited |
@@ -549,6 +568,14 @@
   runtime->GetThreadList()->SuspendAll();
   Thread* self = Thread::Current();
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
+  {
+    // Since we're going to disable deoptimization, we clear the deoptimization requests queue.
+    // This prevent us from having any pending deoptimization request when the debugger attaches to
+    // us again while no event has been requested yet.
+    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+    gDeoptimizationRequests.clear();
+  }
+  runtime->GetInstrumentation()->DisableDeoptimization();
   runtime->GetInstrumentation()->RemoveListener(&gDebugInstrumentationListener,
                                                 instrumentation::Instrumentation::kMethodEntered |
                                                 instrumentation::Instrumentation::kMethodExited |
@@ -1691,6 +1718,7 @@
     case kWaitingForDebuggerSend:
     case kWaitingForDebuggerSuspension:
     case kWaitingForDebuggerToAttach:
+    case kWaitingForDeoptimization:
     case kWaitingForGcToComplete:
     case kWaitingForCheckPointsToRun:
     case kWaitingForJniOnLoad:
@@ -2384,22 +2412,129 @@
   }
 }
 
+static void ProcessDeoptimizationRequests()
+    LOCKS_EXCLUDED(Locks::deoptimization_lock_)
+    EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
+  MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  for (const MethodInstrumentationRequest& request : gDeoptimizationRequests) {
+    mirror::ArtMethod* const method = request.method;
+    if (method != nullptr) {
+      // Selective deoptimization.
+      if (request.deoptimize) {
+        VLOG(jdwp) << "Deoptimize method " << PrettyMethod(method);
+        instrumentation->Deoptimize(method);
+      } else {
+        VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(method);
+        instrumentation->Undeoptimize(method);
+      }
+    } else {
+      // Full deoptimization.
+      if (request.deoptimize) {
+        VLOG(jdwp) << "Deoptimize the world";
+        instrumentation->DeoptimizeEverything();
+      } else {
+        VLOG(jdwp) << "Undeoptimize the world";
+        instrumentation->UndeoptimizeEverything();
+      }
+    }
+  }
+  gDeoptimizationRequests.clear();
+}
+
+// Process deoptimization requests after suspending all mutator threads.
+void Dbg::ManageDeoptimization() {
+  Thread* const self = Thread::Current();
+  {
+    // Avoid suspend/resume if there is no pending request.
+    MutexLock mu(self, *Locks::deoptimization_lock_);
+    if (gDeoptimizationRequests.empty()) {
+      return;
+    }
+  }
+  CHECK_EQ(self->GetState(), kRunnable);
+  self->TransitionFromRunnableToSuspended(kWaitingForDeoptimization);
+  // We need to suspend mutator threads first.
+  Runtime* const runtime = Runtime::Current();
+  runtime->GetThreadList()->SuspendAll();
+  const ThreadState old_state = self->SetStateUnsafe(kRunnable);
+  ProcessDeoptimizationRequests();
+  CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
+  runtime->GetThreadList()->ResumeAll();
+  self->TransitionFromSuspendedToRunnable();
+}
+
+// Enable full deoptimization.
+void Dbg::EnableFullDeoptimization() {
+  MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+  VLOG(jdwp) << "Request full deoptimization";
+  gDeoptimizationRequests.push_back(MethodInstrumentationRequest(true, nullptr));
+}
+
+// Disable full deoptimization.
+void Dbg::DisableFullDeoptimization() {
+  MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+  VLOG(jdwp) << "Request full undeoptimization";
+  gDeoptimizationRequests.push_back(MethodInstrumentationRequest(false, nullptr));
+}
+
 void Dbg::WatchLocation(const JDWP::JdwpLocation* location) {
-  MutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
+  bool need_deoptimization = true;
   mirror::ArtMethod* m = FromMethodId(location->method_id);
-  gBreakpoints.push_back(Breakpoint(m, location->dex_pc));
-  VLOG(jdwp) << "Set breakpoint #" << (gBreakpoints.size() - 1) << ": " << gBreakpoints[gBreakpoints.size() - 1];
+  {
+    MutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
+
+    // If there is no breakpoint on this method yet, we need to deoptimize it.
+    for (const Breakpoint& breakpoint : gBreakpoints) {
+      if (breakpoint.method == m) {
+        // We already set a breakpoint on this method, hence we deoptimized it.
+        DCHECK(Runtime::Current()->GetInstrumentation()->IsDeoptimized(m));
+        need_deoptimization = false;
+        break;
+      }
+    }
+
+    gBreakpoints.push_back(Breakpoint(m, location->dex_pc));
+    VLOG(jdwp) << "Set breakpoint #" << (gBreakpoints.size() - 1) << ": " << gBreakpoints[gBreakpoints.size() - 1];
+  }
+
+  if (need_deoptimization) {
+    // Request its deoptimization. This will be done after updating the JDWP event list.
+    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+    gDeoptimizationRequests.push_back(MethodInstrumentationRequest(true, m));
+    VLOG(jdwp) << "Request deoptimization of " << PrettyMethod(m);
+  }
 }
 
 void Dbg::UnwatchLocation(const JDWP::JdwpLocation* location) {
-  MutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
+  bool can_undeoptimize = true;
   mirror::ArtMethod* m = FromMethodId(location->method_id);
-  for (size_t i = 0; i < gBreakpoints.size(); ++i) {
-    if (gBreakpoints[i].method == m && gBreakpoints[i].dex_pc == location->dex_pc) {
-      VLOG(jdwp) << "Removed breakpoint #" << i << ": " << gBreakpoints[i];
-      gBreakpoints.erase(gBreakpoints.begin() + i);
-      return;
+  DCHECK(Runtime::Current()->GetInstrumentation()->IsDeoptimized(m));
+  {
+    MutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
+    for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
+      if (gBreakpoints[i].method == m && gBreakpoints[i].dex_pc == location->dex_pc) {
+        VLOG(jdwp) << "Removed breakpoint #" << i << ": " << gBreakpoints[i];
+        gBreakpoints.erase(gBreakpoints.begin() + i);
+        break;
+      }
     }
+
+    // If there is no breakpoint on this method, we can undeoptimize it.
+    for (const Breakpoint& breakpoint : gBreakpoints) {
+      if (breakpoint.method == m) {
+        can_undeoptimize = false;
+        break;
+      }
+    }
+  }
+
+  if (can_undeoptimize) {
+    // Request its undeoptimization. This will be done after updating the JDWP event list.
+    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+    gDeoptimizationRequests.push_back(MethodInstrumentationRequest(false, m));
+    VLOG(jdwp) << "Request undeoptimization of " << PrettyMethod(m);
   }
 }