Handle nested signals This allows for signals to be raised inside the ART signal handler. This can occur when the JavaStackTraceHandler attempts to generate a stack trace and something goes wrong. It also fixes an issue where the fault manager was not being correctly shut down inside the signal chaining code. In this case the signal handler was not restored to the original. Bug: 17006816 Bug: 17133266 (cherry picked from commit fabe91e0d558936ac26b98d2b4ee1af08f58831d) Change-Id: I10730ef52d5d8d34610a5293253b3be6caf4829e

commit: 8ce6b9040747054b444a7fa706503cd257801936 [log] [tgz]
author: Dave Allison <dallison@google.com> Tue Aug 26 11:07:58 2014 -0700
committer: Dave Allison <dallison@google.com> Wed Aug 27 12:47:44 2014 -0700
tree: 04712170addb252d307ef9015abfc9bfc2b73581
parent: a0a0da29e7d4d5c1bd471c49f1a4b6ec98fb767a [diff] [blame]
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 68fad7b..25f87c5 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc

@@ -16,6 +16,7 @@
 
 #include "fault_handler.h"
 
+#include <setjmp.h>
 #include <sys/mman.h>
 #include <sys/ucontext.h>
 #include "mirror/art_method.h"
@@ -24,6 +25,47 @@
 #include "thread-inl.h"
 #include "verify_object-inl.h"
 
+// Note on nested signal support
+// -----------------------------
+//
+// Typically a signal handler should not need to deal with signals that occur within it.
+// However, when a SIGSEGV occurs that is in generated code and is not one of the
+// handled signals (implicit checks), we call a function to try to dump the stack
+// to the log.  This enhances the debugging experience but may have the side effect
+// that it may not work.  If the cause of the original SIGSEGV is a corrupted stack or other
+// memory region, the stack backtrace code may run into trouble and may either crash
+// or fail with an abort (SIGABRT).  In either case we don't want that (new) signal to
+// mask the original signal and thus prevent useful debug output from being presented.
+//
+// In order to handle this situation, before we call the stack tracer we do the following:
+//
+// 1. shutdown the fault manager so that we are talking to the real signal management
+//    functions rather than those in sigchain.
+// 2. use pthread_sigmask to allow SIGSEGV and SIGABRT signals to be delivered to the
+//    thread running the signal handler.
+// 3. set the handler for SIGSEGV and SIGABRT to a secondary signal handler.
+// 4. save the thread's state to the TLS of the current thread using 'setjmp'
+//
+// We then call the stack tracer and one of two things may happen:
+// a. it completes successfully
+// b. it crashes and a signal is raised.
+//
+// In the former case, we fall through and everything is fine.  In the latter case
+// our secondary signal handler gets called in a signal context.  This results in
+// a call to FaultManager::HandledNestedSignal(), an archirecture specific function
+// whose purpose is to call 'longjmp' on the jmp_buf saved in the TLS of the current
+// thread.  This results in a return with a non-zero value from 'setjmp'.  We detect this
+// and write something to the log to tell the user that it happened.
+//
+// Regardless of how we got there, we reach the code after the stack tracer and we
+// restore the signal states to their original values, reinstate the fault manager (thus
+// reestablishing the signal chain) and continue.
+
+// This is difficult to test with a runtime test.  To invoke the nested signal code
+// on any signal, uncomment the following line and run something that throws a
+// NullPointerException.
+// #define TEST_NESTED_SIGNAL
+
 namespace art {
 // Static fault manger object accessed by signal handler.
 FaultManager fault_manager;
@@ -37,10 +79,14 @@
 
 // Signal handler called on SIGSEGV.
 static void art_fault_handler(int sig, siginfo_t* info, void* context) {
-  // std::cout << "handling fault in ART handler\n";
   fault_manager.HandleFault(sig, info, context);
 }
 
+// Signal handler for dealing with a nested signal.
+static void art_nested_signal_handler(int sig, siginfo_t* info, void* context) {
+  fault_manager.HandleNestedSignal(sig, info, context);
+}
+
 FaultManager::FaultManager() : initialized_(false) {
   sigaction(SIGSEGV, nullptr, &oldaction_);
 }
@@ -82,26 +128,36 @@
   // If malloc calls abort, it will be holding its lock.
   // If the handler tries to call malloc, it will deadlock.
 
-  // Also, there is only an 8K stack available here to logging can cause memory
-  // overwrite issues if you are unlucky.  If you want to enable logging and
-  // are getting crashes, allocate more space for the alternate signal stack.
-
   VLOG(signals) << "Handling fault";
   if (IsInGeneratedCode(info, context, true)) {
     VLOG(signals) << "in generated code, looking for handler";
     for (const auto& handler : generated_code_handlers_) {
       VLOG(signals) << "invoking Action on handler " << handler;
       if (handler->Action(sig, info, context)) {
+#ifdef TEST_NESTED_SIGNAL
+        // In test mode we want to fall through to stack trace handler
+        // on every signal (in reality this will cause a crash on the first
+        // signal).
+        break;
+#else
+        // We have handled a signal so it's time to return from the
+        // signal handler to the appropriate place.
         return;
+#endif
       }
     }
   }
+
+  // We hit a signal we didn't handle.  This might be something for which
+  // we can give more information about so call all registered handlers to see
+  // if it is.
   for (const auto& handler : other_handlers_) {
     if (handler->Action(sig, info, context)) {
       return;
     }
   }
 
+  // Set a breakpoint in this function to catch unhandled signals.
   art_sigsegv_fault();
 
   // Pass this on to the next handler in the chain, or the default if none.
@@ -242,19 +298,90 @@
 
 bool JavaStackTraceHandler::Action(int sig, siginfo_t* siginfo, void* context) {
   // Make sure that we are in the generated code, but we may not have a dex pc.
-  if (manager_->IsInGeneratedCode(siginfo, context, false)) {
+
+#ifdef TEST_NESTED_SIGNAL
+  bool in_generated_code = true;
+#else
+  bool in_generated_code = manager_->IsInGeneratedCode(siginfo, context, false);
+#endif
+  if (in_generated_code) {
     LOG(ERROR) << "Dumping java stack trace for crash in generated code";
     mirror::ArtMethod* method = nullptr;
     uintptr_t return_pc = 0;
     uintptr_t sp = 0;
-    manager_->GetMethodAndReturnPcAndSp(siginfo, context, &method, &return_pc, &sp);
     Thread* self = Thread::Current();
-    // Inside of generated code, sp[0] is the method, so sp is the frame.
-    StackReference<mirror::ArtMethod>* frame =
-        reinterpret_cast<StackReference<mirror::ArtMethod>*>(sp);
-    self->SetTopOfStack(frame, 0);  // Since we don't necessarily have a dex pc, pass in 0.
-    self->DumpJavaStack(LOG(ERROR));
+
+    // Shutdown the fault manager so that it will remove the signal chain for
+    // SIGSEGV and we call the real sigaction.
+    fault_manager.Shutdown();
+
+    // The action for SIGSEGV should be the default handler now.
+
+    // Unblock the signals we allow so that they can be delivered in the signal handler.
+    sigset_t sigset;
+    sigemptyset(&sigset);
+    sigaddset(&sigset, SIGSEGV);
+    sigaddset(&sigset, SIGABRT);
+    pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
+
+    // If we get a signal in this code we want to invoke our nested signal
+    // handler.
+    struct sigaction action, oldsegvaction, oldabortaction;
+    action.sa_sigaction = art_nested_signal_handler;
+
+    // Explictly mask out SIGSEGV and SIGABRT from the nested signal handler.  This
+    // should be the default but we definitely don't want these happening in our
+    // nested signal handler.
+    sigemptyset(&action.sa_mask);
+    sigaddset(&action.sa_mask, SIGSEGV);
+    sigaddset(&action.sa_mask, SIGABRT);
+
+    action.sa_flags = SA_SIGINFO | SA_ONSTACK;
+#if !defined(__APPLE__) && !defined(__mips__)
+    action.sa_restorer = nullptr;
+#endif
+
+    // Catch SIGSEGV and SIGABRT to invoke our nested handler
+    int e1 = sigaction(SIGSEGV, &action, &oldsegvaction);
+    int e2 = sigaction(SIGABRT, &action, &oldabortaction);
+    if (e1 != 0 || e2 != 0) {
+      LOG(ERROR) << "Unable to register nested signal handler - no stack trace possible";
+      // If sigaction failed we have a serious problem.  We cannot catch
+      // any failures in the stack tracer and it's likely to occur since
+      // the program state is bad.  Therefore we don't even try to give
+      // a stack trace.
+    } else {
+      // Save the current state and try to dump the stack.  If this causes a signal
+      // our nested signal handler will be invoked and this will longjmp to the saved
+      // state.
+      if (setjmp(*self->GetNestedSignalState()) == 0) {
+        manager_->GetMethodAndReturnPcAndSp(siginfo, context, &method, &return_pc, &sp);
+        // Inside of generated code, sp[0] is the method, so sp is the frame.
+        StackReference<mirror::ArtMethod>* frame =
+            reinterpret_cast<StackReference<mirror::ArtMethod>*>(sp);
+        self->SetTopOfStack(frame, 0);  // Since we don't necessarily have a dex pc, pass in 0.
+#ifdef TEST_NESTED_SIGNAL
+        // To test the nested signal handler we raise a signal here.  This will cause the
+        // nested signal handler to be called and perform a longjmp back to the setjmp
+        // above.
+        abort();
+#endif
+        self->DumpJavaStack(LOG(ERROR));
+      } else {
+        LOG(ERROR) << "Stack trace aborted due to nested signal - original signal being reported";
+      }
+
+      // Restore the signal handlers.
+      sigaction(SIGSEGV, &oldsegvaction, nullptr);
+      sigaction(SIGABRT, &oldabortaction, nullptr);
+    }
+
+    // Now put the fault manager back in place.
+    fault_manager.Init();
+
+    // And we're done.
   }
+
   return false;  // Return false since we want to propagate the fault to the main signal handler.
 }
commit	8ce6b9040747054b444a7fa706503cd257801936	[log] [tgz]
author	Dave Allison <dallison@google.com>	Tue Aug 26 11:07:58 2014 -0700
committer	Dave Allison <dallison@google.com>	Wed Aug 27 12:47:44 2014 -0700
tree	04712170addb252d307ef9015abfc9bfc2b73581
parent	a0a0da29e7d4d5c1bd471c49f1a4b6ec98fb767a [diff] [blame]