Merge "Minor robustness tweaks."
diff --git a/libcorkscrew/backtrace.c b/libcorkscrew/backtrace.c
index f9a49ec..857b741 100644
--- a/libcorkscrew/backtrace.c
+++ b/libcorkscrew/backtrace.c
@@ -85,9 +85,13 @@
 }
 
 #ifdef CORKSCREW_HAVE_ARCH
+static const int32_t STATE_DUMPING = -1;
+static const int32_t STATE_DONE = -2;
+static const int32_t STATE_CANCEL = -3;
+
 static pthread_mutex_t g_unwind_signal_mutex = PTHREAD_MUTEX_INITIALIZER;
 static volatile struct {
-    int32_t tid;
+    int32_t tid_state;
     const map_info_t* map_info_list;
     backtrace_frame_t* backtrace;
     size_t ignore_depth;
@@ -96,22 +100,23 @@
 } g_unwind_signal_state;
 
 static void unwind_backtrace_thread_signal_handler(int n, siginfo_t* siginfo, void* sigcontext) {
-    int32_t tid = android_atomic_acquire_load(&g_unwind_signal_state.tid);
-    if (tid == gettid()) {
+    if (!android_atomic_acquire_cas(gettid(), STATE_DUMPING, &g_unwind_signal_state.tid_state)) {
         g_unwind_signal_state.returned_frames = unwind_backtrace_signal_arch(
                 siginfo, sigcontext,
                 g_unwind_signal_state.map_info_list,
                 g_unwind_signal_state.backtrace,
                 g_unwind_signal_state.ignore_depth,
                 g_unwind_signal_state.max_depth);
-        android_atomic_release_store(-1, &g_unwind_signal_state.tid);
+        android_atomic_release_store(STATE_DONE, &g_unwind_signal_state.tid_state);
     } else {
         ALOGV("Received spurious SIGURG on thread %d that was intended for thread %d.",
-                gettid(), tid);
+                gettid(), android_atomic_acquire_load(&g_unwind_signal_state.tid_state));
     }
 }
 #endif
 
+extern int tgkill(int tgid, int tid, int sig);
+
 ssize_t unwind_backtrace_thread(pid_t tid, backtrace_frame_t* backtrace,
         size_t ignore_depth, size_t max_depth) {
     if (tid == gettid()) {
@@ -125,7 +130,7 @@
     struct sigaction oact;
     memset(&act, 0, sizeof(act));
     act.sa_sigaction = unwind_backtrace_thread_signal_handler;
-    act.sa_flags = SA_RESTART | SA_SIGINFO;
+    act.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
     sigemptyset(&act.sa_mask);
 
     pthread_mutex_lock(&g_unwind_signal_mutex);
@@ -138,16 +143,51 @@
         g_unwind_signal_state.ignore_depth = ignore_depth;
         g_unwind_signal_state.max_depth = max_depth;
         g_unwind_signal_state.returned_frames = 0;
-        android_atomic_release_store(tid, &g_unwind_signal_state.tid);
+        android_atomic_release_store(tid, &g_unwind_signal_state.tid_state);
 
-        if (kill(tid, SIGURG)) {
+        // Signal the specific thread that we want to dump.
+        int32_t tid_state = tid;
+        if (tgkill(getpid(), tid, SIGURG)) {
             ALOGV("Failed to send SIGURG to thread %d.", tid);
-            android_atomic_release_store(-1, &g_unwind_signal_state.tid);
         } else {
-            while (android_atomic_acquire_load(&g_unwind_signal_state.tid) == tid) {
-                ALOGV("Waiting for response from thread %d...", tid);
-                usleep(1000);
+            // Wait for the other thread to start dumping the stack, or time out.
+            int wait_millis = 250;
+            for (;;) {
+                tid_state = android_atomic_acquire_load(&g_unwind_signal_state.tid_state);
+                if (tid_state != tid) {
+                    break;
+                }
+                if (wait_millis--) {
+                    ALOGV("Waiting for thread %d to start dumping the stack...", tid);
+                    usleep(1000);
+                } else {
+                    ALOGV("Timed out waiting for thread %d to start dumping the stack.", tid);
+                    break;
+                }
             }
+        }
+
+        // Try to cancel the dump if it has not started yet.
+        if (tid_state == tid) {
+            if (!android_atomic_acquire_cas(tid, STATE_CANCEL, &g_unwind_signal_state.tid_state)) {
+                ALOGV("Canceled thread %d stack dump.", tid);
+                tid_state = STATE_CANCEL;
+            } else {
+                tid_state = android_atomic_acquire_load(&g_unwind_signal_state.tid_state);
+            }
+        }
+
+        // Wait indefinitely for the dump to finish or be canceled.
+        // We cannot apply a timeout here because the other thread is accessing state that
+        // is owned by this thread, such as milist.  It should not take very
+        // long to take the dump once started.
+        while (tid_state == STATE_DUMPING) {
+            ALOGV("Waiting for thread %d to finish dumping the stack...", tid);
+            usleep(1000);
+            tid_state = android_atomic_acquire_load(&g_unwind_signal_state.tid_state);
+        }
+
+        if (tid_state == STATE_DONE) {
             frames = g_unwind_signal_state.returned_frames;
         }