Merge "Minor robustness tweaks."
diff --git a/libcorkscrew/backtrace.c b/libcorkscrew/backtrace.c
index f9a49ec..857b741 100644
--- a/libcorkscrew/backtrace.c
+++ b/libcorkscrew/backtrace.c
@@ -85,9 +85,13 @@
}
#ifdef CORKSCREW_HAVE_ARCH
+static const int32_t STATE_DUMPING = -1;
+static const int32_t STATE_DONE = -2;
+static const int32_t STATE_CANCEL = -3;
+
static pthread_mutex_t g_unwind_signal_mutex = PTHREAD_MUTEX_INITIALIZER;
static volatile struct {
- int32_t tid;
+ int32_t tid_state;
const map_info_t* map_info_list;
backtrace_frame_t* backtrace;
size_t ignore_depth;
@@ -96,22 +100,23 @@
} g_unwind_signal_state;
static void unwind_backtrace_thread_signal_handler(int n, siginfo_t* siginfo, void* sigcontext) {
- int32_t tid = android_atomic_acquire_load(&g_unwind_signal_state.tid);
- if (tid == gettid()) {
+ if (!android_atomic_acquire_cas(gettid(), STATE_DUMPING, &g_unwind_signal_state.tid_state)) {
g_unwind_signal_state.returned_frames = unwind_backtrace_signal_arch(
siginfo, sigcontext,
g_unwind_signal_state.map_info_list,
g_unwind_signal_state.backtrace,
g_unwind_signal_state.ignore_depth,
g_unwind_signal_state.max_depth);
- android_atomic_release_store(-1, &g_unwind_signal_state.tid);
+ android_atomic_release_store(STATE_DONE, &g_unwind_signal_state.tid_state);
} else {
ALOGV("Received spurious SIGURG on thread %d that was intended for thread %d.",
- gettid(), tid);
+ gettid(), android_atomic_acquire_load(&g_unwind_signal_state.tid_state));
}
}
#endif
+extern int tgkill(int tgid, int tid, int sig);
+
ssize_t unwind_backtrace_thread(pid_t tid, backtrace_frame_t* backtrace,
size_t ignore_depth, size_t max_depth) {
if (tid == gettid()) {
@@ -125,7 +130,7 @@
struct sigaction oact;
memset(&act, 0, sizeof(act));
act.sa_sigaction = unwind_backtrace_thread_signal_handler;
- act.sa_flags = SA_RESTART | SA_SIGINFO;
+ act.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
sigemptyset(&act.sa_mask);
pthread_mutex_lock(&g_unwind_signal_mutex);
@@ -138,16 +143,51 @@
g_unwind_signal_state.ignore_depth = ignore_depth;
g_unwind_signal_state.max_depth = max_depth;
g_unwind_signal_state.returned_frames = 0;
- android_atomic_release_store(tid, &g_unwind_signal_state.tid);
+ android_atomic_release_store(tid, &g_unwind_signal_state.tid_state);
- if (kill(tid, SIGURG)) {
+ // Signal the specific thread that we want to dump.
+ int32_t tid_state = tid;
+ if (tgkill(getpid(), tid, SIGURG)) {
ALOGV("Failed to send SIGURG to thread %d.", tid);
- android_atomic_release_store(-1, &g_unwind_signal_state.tid);
} else {
- while (android_atomic_acquire_load(&g_unwind_signal_state.tid) == tid) {
- ALOGV("Waiting for response from thread %d...", tid);
- usleep(1000);
+ // Wait for the other thread to start dumping the stack, or time out.
+ int wait_millis = 250;
+ for (;;) {
+ tid_state = android_atomic_acquire_load(&g_unwind_signal_state.tid_state);
+ if (tid_state != tid) {
+ break;
+ }
+ if (wait_millis--) {
+ ALOGV("Waiting for thread %d to start dumping the stack...", tid);
+ usleep(1000);
+ } else {
+ ALOGV("Timed out waiting for thread %d to start dumping the stack.", tid);
+ break;
+ }
}
+ }
+
+ // Try to cancel the dump if it has not started yet.
+ if (tid_state == tid) {
+ if (!android_atomic_acquire_cas(tid, STATE_CANCEL, &g_unwind_signal_state.tid_state)) {
+ ALOGV("Canceled thread %d stack dump.", tid);
+ tid_state = STATE_CANCEL;
+ } else {
+ tid_state = android_atomic_acquire_load(&g_unwind_signal_state.tid_state);
+ }
+ }
+
+ // Wait indefinitely for the dump to finish or be canceled.
+ // We cannot apply a timeout here because the other thread is accessing state that
+ // is owned by this thread, such as milist. It should not take very
+ // long to take the dump once started.
+ while (tid_state == STATE_DUMPING) {
+ ALOGV("Waiting for thread %d to finish dumping the stack...", tid);
+ usleep(1000);
+ tid_state = android_atomic_acquire_load(&g_unwind_signal_state.tid_state);
+ }
+
+ if (tid_state == STATE_DONE) {
frames = g_unwind_signal_state.returned_frames;
}