Add a concept of tasks and leader thread
diff --git a/ChangeLog b/ChangeLog
index bc089e6..83679ff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -25,6 +25,23 @@
* common.h (struct Process): Drop field type_being_displayed.
+2011-05-06 Petr Machata <pmachata@redhat.com>
+
+ * common.h (struct Process.leader): Points to leader thread of the
+ process.
+ (process_leader, process_tasks): For querying threads of process.
+ (process_status, process_stopped): For querying task status.
+ (task_kill): For sending signals to a single task.
+ (each_task): Like each_process, but for tasks of a process.
+ * proc.c (open_pid): Open all tasks of the process.
+ (each_task): Implementation.
+ * sysdeps/linux-gnu/proc.c (process_leader, process_tasks)
+ (process_stopped, process_status): Implementations.
+ * handle_event.c: Update throughout.
+ * sysdeps/linux-gnu/events.c: Likewise.
+ * sysdeps/linux-gnu/trace.c (trace_pid): waitpid should wait for
+ threads, too.
+
2011-05-05 Petr Machata <pmachata@redhat.com>
* common.h (add_process, remove_process, each_process)
diff --git a/breakpoints.c b/breakpoints.c
index f215075..4e74923 100644
--- a/breakpoints.c
+++ b/breakpoints.c
@@ -16,6 +16,7 @@
address2bpstruct(Process *proc, void *addr) {
assert(proc != NULL);
assert(proc->breakpoints != NULL);
+ assert(proc->leader == proc);
debug(DEBUG_FUNCTION, "address2bpstruct(pid=%d, addr=%p)", proc->pid, addr);
return dict_find_entry(proc->breakpoints, addr);
}
@@ -25,6 +26,13 @@
struct library_symbol *libsym, int enable) {
Breakpoint *sbp;
+ Process * leader = proc->leader;
+
+ /* Only the group leader should be getting the breakpoints and
+ * thus have ->breakpoint initialized. */
+ assert(leader != NULL);
+ assert(leader->breakpoints != NULL);
+
#ifdef __arm__
int thumb_mode = (int)addr & 1;
if (thumb_mode)
@@ -40,13 +48,13 @@
if (libsym)
libsym->needs_init = 0;
- sbp = dict_find_entry(proc->breakpoints, addr);
+ sbp = dict_find_entry(leader->breakpoints, addr);
if (!sbp) {
sbp = calloc(1, sizeof(Breakpoint));
if (!sbp) {
return; /* TODO FIXME XXX: error_mem */
}
- dict_enter(proc->breakpoints, addr, sbp);
+ dict_enter(leader->breakpoints, addr, sbp);
sbp->addr = addr;
sbp->libsym = libsym;
}
@@ -67,7 +75,10 @@
debug(DEBUG_FUNCTION, "delete_breakpoint(pid=%d, addr=%p)", proc->pid, addr);
- sbp = dict_find_entry(proc->breakpoints, addr);
+ Process * leader = proc->leader;
+ assert(leader != NULL);
+
+ sbp = dict_find_entry(leader->breakpoints, addr);
assert(sbp); /* FIXME: remove after debugging has been done. */
/* This should only happen on out-of-memory conditions. */
if (sbp == NULL)
@@ -157,6 +168,7 @@
void
disable_all_breakpoints(Process *proc) {
debug(DEBUG_FUNCTION, "disable_all_breakpoints(pid=%d)", proc->pid);
+ assert(proc->leader == proc);
if (proc->breakpoints_enabled) {
debug(1, "Disabling breakpoints for pid %u...", proc->pid);
dict_apply_to_all(proc->breakpoints, disable_bp_cb, proc);
@@ -183,6 +195,11 @@
proc->breakpoints = NULL;
}
+ /* Only the thread group leader should hold the breakpoints.
+ * (N.B. PID may be set to 0 temporarily when called by
+ * handle_exec). */
+ assert(proc->leader == proc);
+
proc->breakpoints = dict_init(dict_key2hash_int,
dict_key_cmp_int);
diff --git a/common.h b/common.h
index f34a99d..a53bc83 100644
--- a/common.h
+++ b/common.h
@@ -165,12 +165,21 @@
STATE_IGNORED /* ignore this process (it's a fork and no -f was used) */
};
+/* XXX We would rather have this all organized a little differently,
+ * have Process for the whole group and Task for what's there for
+ * per-thread stuff. But for now this is the less invasive way of
+ * structuring it. */
struct Process {
Process_State state;
Process * parent; /* needed by STATE_BEING_CREATED */
char * filename;
pid_t pid;
+
+ /* Dictionary of breakpoints (which is a mapping
+ * address->Breakpoint). This is NULL for non-leader
+ * processes. */
Dict * breakpoints;
+
int breakpoints_enabled; /* -1:not enabled yet, 0:disabled, 1:enabled */
int mask_32bit; /* 1 if 64-bit ltrace is tracing 32-bit process */
unsigned int personality;
@@ -201,7 +210,18 @@
void *unwind_priv;
#endif /* defined(HAVE_LIBUNWIND) */
+ /**
+ * Process chaining.
+ **/
Process * next;
+
+ /* LEADER points to the leader thread of the POSIX.1 process.
+ If X->LEADER == X, then X is the leader thread and the
+ Process structures chained by NEXT represent other threads,
+ up until, but not including, the next leader thread.
+ LEADER may be NULL after the leader has already exited. In
+ that case this process is waiting to be collected. */
+ Process * leader;
};
struct opt_c_struct {
@@ -229,6 +249,10 @@
extern Process *each_process(Process * start,
enum pcb_status (* cb)(Process * proc, void * data),
void * data);
+extern Process *each_task(Process * start,
+ enum pcb_status (* cb)(Process * proc, void * data),
+ void * data);
+
extern void handle_event(Event * event);
extern pid_t execute_program(const char * command, char ** argv);
extern int display_arg(enum tof type, Process * proc, int arg_num, arg_type_info * info);
@@ -256,6 +280,10 @@
/* Arch-dependent stuff: */
extern char * pid2name(pid_t pid);
+extern pid_t process_leader(pid_t pid);
+extern int process_tasks(pid_t pid, pid_t **ret_tasks, size_t *ret_n);
+extern int process_stopped(pid_t pid);
+extern char process_status(pid_t pid);
extern void trace_set_options(Process * proc, pid_t pid);
extern void trace_me(void);
extern int trace_pid(pid_t pid);
@@ -282,6 +310,8 @@
extern void * sym2addr(Process *, struct library_symbol *);
extern int linkmap_init(Process *, struct ltelf *);
extern void arch_check_dbg(Process *proc);
+extern int task_kill (pid_t pid, int sig);
+
extern struct ltelf main_lte;
diff --git a/handle_event.c b/handle_event.c
index 8664f90..a2b1dc2 100644
--- a/handle_event.c
+++ b/handle_event.c
@@ -477,6 +477,13 @@
handle_breakpoint(Event *event) {
int i, j;
Breakpoint *sbp;
+ Process *leader = event->proc->leader;
+
+ /* The leader has terminated. */
+ if (leader == NULL) {
+ continue_process(event->proc->pid);
+ return;
+ }
debug(DEBUG_FUNCTION, "handle_breakpoint(pid=%d, addr=%p)", event->proc->pid, event->e_un.brk_addr);
debug(2, "event: breakpoint (%p)", event->e_un.brk_addr);
@@ -487,7 +494,7 @@
Breakpoint *stub_bp = NULL;
char nop_instruction[] = PPC_NOP;
- stub_bp = address2bpstruct (event->proc, event->e_un.brk_addr);
+ stub_bp = address2bpstruct(leader, event->e_un.brk_addr);
if (stub_bp) {
unsigned char *bp_instruction = stub_bp->orig_value;
@@ -527,7 +534,7 @@
if (libsym->plt_type != LS_TOPLT_POINT) {
unsigned char break_insn[] = BREAKPOINT_VALUE;
- sbp = address2bpstruct(event->proc, addr);
+ sbp = address2bpstruct(leader, addr);
assert(sbp);
a = ptrace(PTRACE_PEEKTEXT, event->proc->pid,
addr);
@@ -538,7 +545,7 @@
libsym);
}
} else {
- sbp = dict_find_entry(event->proc->breakpoints, addr);
+ sbp = dict_find_entry(leader->breakpoints, addr);
/* On powerpc, the breakpoint address
may end up being actual entry point
of the library symbol, not the PLT
@@ -554,8 +561,8 @@
struct library_symbol *sym= event->proc->callstack[i].c_un.libfunc;
struct library_symbol *new_sym;
assert(sym);
- addr = sym2addr(event->proc, sym);
- sbp = dict_find_entry(event->proc->breakpoints, addr);
+ addr = sym2addr(leader, sym);
+ sbp = dict_find_entry(leader->breakpoints, addr);
if (sbp) {
if (addr != sbp->addr) {
insert_breakpoint(event->proc, addr, sym);
@@ -564,7 +571,7 @@
new_sym=malloc(sizeof(*new_sym) + strlen(sym->name) + 1);
memcpy(new_sym,sym,sizeof(*new_sym) + strlen(sym->name) + 1);
new_sym->next = leader->list_of_symbols;
- event->proc->list_of_symbols = new_sym;
+ leader->list_of_symbols = new_sym;
insert_breakpoint(event->proc, addr, new_sym);
}
#endif
@@ -582,18 +589,23 @@
event->proc->callstack[i].c_un.libfunc->name);
}
callstack_pop(event->proc);
- continue_after_breakpoint(event->proc,
- address2bpstruct(event->proc,
- event->e_un.brk_addr));
+ sbp = address2bpstruct(leader, event->e_un.brk_addr);
+ continue_after_breakpoint(event->proc, sbp);
return;
}
}
- if ((sbp = address2bpstruct(event->proc, event->e_un.brk_addr))) {
+ if ((sbp = address2bpstruct(leader, event->e_un.brk_addr))) {
+ if (sbp->libsym == NULL) {
+ continue_after_breakpoint(event->proc, sbp);
+ return;
+ }
+
if (strcmp(sbp->libsym->name, "") == 0) {
debug(DEBUG_PROCESS, "Hit _dl_debug_state breakpoint!\n");
- arch_check_dbg(event->proc);
+ arch_check_dbg(leader);
}
+
if (event->proc->state != STATE_IGNORED) {
event->proc->stack_pointer = get_stack_pointer(event->proc);
event->proc->return_addr =
@@ -605,7 +617,7 @@
if (event->proc->need_to_reinitialize_breakpoints
&& (strcmp(sbp->libsym->name, PLTs_initialized_by_here) ==
0))
- reinitialize_breakpoints(event->proc);
+ reinitialize_breakpoints(leader);
#endif
continue_after_breakpoint(event->proc, sbp);
@@ -682,6 +694,7 @@
debug(DEBUG_FUNCTION, "callstack_pop(pid=%d)", proc->pid);
elem = &proc->callstack[proc->callstack_depth - 1];
if (!elem->is_syscall && elem->return_addr) {
+ assert(proc->leader != NULL);
delete_breakpoint(proc, elem->return_addr);
}
if (elem->arch_ptr != NULL) {
diff --git a/libltrace.c b/libltrace.c
index 018923d..caccb48 100644
--- a/libltrace.c
+++ b/libltrace.c
@@ -27,7 +27,7 @@
printf("stop_non_p_processes: %d terminated?\n", it->pid);
continue;
}
- if (p_proc == proc) {
+ if (p_proc == proc || p_proc->leader == proc->leader) {
stop = 0;
break;
}
diff --git a/proc.c b/proc.c
index b553246..0423eeb 100644
--- a/proc.c
+++ b/proc.c
@@ -32,16 +32,21 @@
#endif /* defined(HAVE_LIBUNWIND) */
add_process(proc);
+ assert(proc->leader != NULL);
- breakpoints_init(proc, enable);
+ if (proc->leader == proc)
+ breakpoints_init(proc, enable);
return proc;
}
-void
-open_pid(pid_t pid) {
+static void
+open_one_pid(pid_t pid)
+{
Process *proc;
char *filename;
+ debug(DEBUG_PROCESS, "open_one_pid(pid=%d)", pid);
+
if (trace_pid(pid) < 0) {
fprintf(stderr, "Cannot attach to pid %u: %s\n", pid,
@@ -62,6 +67,31 @@
proc->breakpoints_enabled = 1;
}
+void
+open_pid(pid_t pid)
+{
+ debug(DEBUG_PROCESS, "open_pid(pid=%d)", pid);
+ pid_t *tasks;
+ size_t ntasks;
+ int should_free = 1;
+ if (process_tasks(pid, &tasks, &ntasks) < 0) {
+ fprintf(stderr, "Cannot obtain tasks of pid %u: %s\n", pid,
+ strerror(errno));
+
+ // Attach at least this one.
+ tasks = &pid;
+ ntasks = 1;
+ should_free = 0;
+ }
+
+ size_t i;
+ for (i = 0; i < ntasks; ++i)
+ open_one_pid(tasks[i]);
+
+ if (should_free)
+ free(tasks);
+}
+
static enum pcb_status
find_proc(Process * proc, void * data)
{
@@ -92,11 +122,51 @@
}
return NULL;
}
+
+Process *
+each_task(Process * it, enum pcb_status (* cb)(Process * proc, void * data),
+ void * data)
+{
+ if (it != NULL) {
+ Process * leader = it->leader;
+ for (; it != NULL && it->leader == leader; ) {
+ /* Callback might call remove_process. */
+ Process * next = it->next;
+ if ((*cb) (it, data) == pcb_stop)
+ return it;
+ it = next;
+ }
+ }
+ return NULL;
+}
+
void
add_process(Process * proc)
{
- proc->next = list_of_processes;
- list_of_processes = proc;
+ Process ** leaderp = &list_of_processes;
+ if (proc->pid) {
+ pid_t tgid = process_leader(proc->pid);
+ if (tgid == proc->pid)
+ proc->leader = proc;
+ else {
+ Process * leader = pid2proc(tgid);
+ proc->leader = leader;
+ if (leader != NULL)
+ // NULL: sub-task added before leader?
+ leaderp = &leader->next;
+ }
+ }
+ proc->next = *leaderp;
+ *leaderp = proc;
+}
+
+static enum pcb_status
+clear_leader(Process * proc, void * data)
+{
+ debug(DEBUG_FUNCTION, "detach_task %d from leader %d",
+ proc->pid, proc->leader->pid);
+ proc->leader = NULL;
+ return pcb_cont;
}
void
@@ -106,6 +176,9 @@
debug(DEBUG_FUNCTION, "remove_proc(pid=%d)", proc->pid);
+ if (proc->leader == proc)
+ each_task(proc, &clear_leader, NULL);
+
if (list_of_processes == proc) {
tmp = list_of_processes;
list_of_processes = list_of_processes->next;
diff --git a/sysdeps/linux-gnu/events.c b/sysdeps/linux-gnu/events.c
index 52978b4..b944cd6 100644
--- a/sysdeps/linux-gnu/events.c
+++ b/sysdeps/linux-gnu/events.c
@@ -53,22 +53,28 @@
return &event;
}
get_arch_dep(event.proc);
- event.proc->instruction_pointer = NULL;
debug(3, "event from pid %u", pid);
- if (event.proc->breakpoints_enabled == -1) {
- event.type = EVENT_NONE;
+ if (event.proc->breakpoints_enabled == -1)
trace_set_options(event.proc, event.proc->pid);
- enable_all_breakpoints(event.proc);
- continue_process(event.proc->pid);
- debug(DEBUG_EVENT, "event: NONE: pid=%d (enabling breakpoints)", pid);
- return &event;
- } else if (!event.proc->libdl_hooked) {
- /* debug struct may not have been written yet.. */
- if (linkmap_init(event.proc, &main_lte) == 0) {
- event.proc->libdl_hooked = 1;
+ Process *leader = event.proc->leader;
+ if (leader == event.proc) {
+ if (event.proc->breakpoints_enabled == -1) {
+ event.type = EVENT_NONE;
+ enable_all_breakpoints(event.proc);
+ continue_process(event.proc->pid);
+ debug(DEBUG_EVENT,
+ "event: NONE: pid=%d (enabling breakpoints)",
+ pid);
+ return &event;
+ } else if (!event.proc->libdl_hooked) {
+ /* debug struct may not have been written yet.. */
+ if (linkmap_init(event.proc, &main_lte) == 0) {
+ event.proc->libdl_hooked = 1;
+ }
}
}
+ event.proc->instruction_pointer = (void *)(uintptr_t)-1;
event.proc->instruction_pointer = get_instruction_pointer(event.proc);
if (event.proc->instruction_pointer == (void *)(uintptr_t)-1) {
@@ -148,7 +154,8 @@
void * break_address
= event.proc->instruction_pointer - DECR_PC_AFTER_BREAK;
if ((stop_signal == SIGSEGV || stop_signal == SIGILL)
- && address2bpstruct(event.proc, break_address))
+ && leader != NULL
+ && address2bpstruct(leader, break_address))
stop_signal = SIGTRAP;
if (stop_signal != (SIGTRAP | event.proc->tracesysgood)
diff --git a/sysdeps/linux-gnu/proc.c b/sysdeps/linux-gnu/proc.c
index eca3548..b55c5ef 100644
--- a/sysdeps/linux-gnu/proc.c
+++ b/sysdeps/linux-gnu/proc.c
@@ -2,12 +2,19 @@
#include "common.h"
#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
#include <inttypes.h>
#include <link.h>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
+#include <dirent.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sys/syscall.h>
+
/* /proc/pid doesn't exist just after the fork, and sometimes `ltrace'
* couldn't open it to find the executable. So it may be necessary to
@@ -16,17 +23,19 @@
#define MAX_DELAY 100000 /* 100000 microseconds = 0.1 seconds */
+#define PROC_PID_FILE(VAR, FORMAT, PID) \
+ char VAR[strlen(FORMAT) + 6]; \
+ sprintf(VAR, FORMAT, PID)
+
/*
* Returns a (malloc'd) file name corresponding to a running pid
*/
char *
pid2name(pid_t pid) {
- char proc_exe[1024];
-
if (!kill(pid, 0)) {
int delay = 0;
- sprintf(proc_exe, "/proc/%d/exe", pid);
+ PROC_PID_FILE(proc_exe, "/proc/%d/exe", pid);
while (delay < MAX_DELAY) {
if (!access(proc_exe, F_OK)) {
@@ -38,6 +47,167 @@
return NULL;
}
+static FILE *
+open_status_file(pid_t pid)
+{
+ PROC_PID_FILE(fn, "/proc/%d/status", pid);
+ /* Don't complain if we fail. This would typically happen
+ when the process is about to terminate, and these files are
+ not available anymore. This function is called from the
+ event loop, and we don't want to clutter the output just
+ because the process terminates. */
+ return fopen(fn, "r");
+}
+
+static char *
+find_line_starting(FILE * file, const char * prefix, size_t len)
+{
+ char * line = NULL;
+ size_t line_len = 0;
+ while (!feof(file)) {
+ if (getline(&line, &line_len, file) < 0)
+ return NULL;
+ if (strncmp(line, prefix, len) == 0)
+ return line;
+ }
+ return NULL;
+}
+
+static void
+each_line_starting(FILE * file, const char *prefix,
+ enum pcb_status (*cb)(const char * line, const char * prefix,
+ void * data),
+ void * data)
+{
+ size_t len = strlen(prefix);
+ char * line;
+ while ((line = find_line_starting(file, prefix, len)) != NULL) {
+ enum pcb_status st = (*cb)(line, prefix, data);
+ free (line);
+ if (st == pcb_stop)
+ return;
+ }
+}
+
+static enum pcb_status
+process_leader_cb(const char * line, const char * prefix, void * data)
+{
+ pid_t * pidp = data;
+ *pidp = atoi(line + strlen(prefix));
+ return pcb_stop;
+}
+
+pid_t
+process_leader(pid_t pid)
+{
+ pid_t tgid = pid;
+ FILE * file = open_status_file(pid);
+ if (file != NULL) {
+ each_line_starting(file, "Tgid:\t", &process_leader_cb, &tgid);
+ fclose(file);
+ }
+
+ return tgid;
+}
+
+static enum pcb_status
+process_stopped_cb(const char * line, const char * prefix, void * data)
+{
+ char c = line[strlen(prefix)];
+ // t:tracing stop, T:job control stop
+ *(int *)data = (c == 't' || c == 'T');
+ return pcb_stop;
+}
+
+int
+process_stopped(pid_t pid)
+{
+ int is_stopped = -1;
+ FILE * file = open_status_file(pid);
+ if (file != NULL) {
+ each_line_starting(file, "State:\t", &process_stopped_cb,
+ &is_stopped);
+ fclose(file);
+ }
+ return is_stopped;
+}
+
+static enum pcb_status
+process_status_cb(const char * line, const char * prefix, void * data)
+{
+ *(char *)data = line[strlen(prefix)];
+ return pcb_stop;
+}
+
+char
+process_status(pid_t pid)
+{
+ char ret = '?';
+ FILE * file = open_status_file(pid);
+ if (file != NULL) {
+ each_line_starting(file, "State:\t", &process_status_cb, &ret);
+ fclose(file);
+ }
+ return ret;
+}
+
+static int
+all_digits(const char *str)
+{
+ while (isdigit(*str))
+ str++;
+ return !*str;
+}
+
+int
+process_tasks(pid_t pid, pid_t **ret_tasks, size_t *ret_n)
+{
+ PROC_PID_FILE(fn, "/proc/%d/task", pid);
+ DIR * d = opendir(fn);
+ if (d == NULL)
+ return -1;
+
+ /* XXX This is racy. We need to stop the tasks that we
+ discover this way and re-scan the directory to eventually
+ reach a full set of tasks. */
+ pid_t *tasks = NULL;
+ size_t n = 0;
+ size_t alloc = 0;
+
+ while (1) {
+ struct dirent entry;
+ struct dirent *result;
+ if (readdir_r(d, &entry, &result) != 0) {
+ free(tasks);
+ return -1;
+ }
+ if (result == NULL)
+ break;
+ if (result->d_type == DT_DIR && all_digits(result->d_name)) {
+ pid_t npid = atoi(result->d_name);
+ if (n >= alloc) {
+ alloc = alloc > 0 ? (2 * alloc) : 8;
+ pid_t *ntasks = realloc(tasks,
+ sizeof(*tasks) * alloc);
+ if (ntasks == NULL) {
+ free(tasks);
+ return -1;
+ }
+ tasks = ntasks;
+ }
+ if (n >= alloc)
+ abort();
+ tasks[n++] = npid;
+ }
+ }
+
+ closedir(d);
+
+ *ret_tasks = tasks;
+ *ret_n = n;
+ return 0;
+}
+
static int
find_dynamic_entry_addr(Process *proc, void *pvAddr, int d_tag, void **addr) {
int i = 0, done = 0;
@@ -286,3 +456,14 @@
free(rdbg);
return 0;
}
+
+int
+task_kill (pid_t pid, int sig)
+{
+ // Taken from GDB
+ int ret;
+
+ errno = 0;
+ ret = syscall (__NR_tkill, pid, sig);
+ return ret;
+}
diff --git a/sysdeps/linux-gnu/trace.c b/sysdeps/linux-gnu/trace.c
index 6fe8e5e..fceef82 100644
--- a/sysdeps/linux-gnu/trace.c
+++ b/sysdeps/linux-gnu/trace.c
@@ -7,6 +7,7 @@
#include <sys/wait.h>
#include "ptrace.h"
#include <asm/unistd.h>
+#include <assert.h>
#include "common.h"
@@ -87,9 +88,9 @@
in pid. The child is sent a SIGSTOP, but will not
necessarily have stopped by the completion of this call;
use wait() to wait for the child to stop. */
- if (waitpid (pid, NULL, 0) != pid) {
+ if (waitpid (pid, NULL, __WALL) != pid) {
perror ("trace_pid: waitpid");
- exit (1);
+ return -1;
}
return 0;