Fix serious race in attach to many-threaded process
- the gist is that tasks continue running while we attach, so more tasks
come into existence, or the ones that we didn't attach to yet disappear,
etc.
- besides, we really can't enable breakpoints before we are done attaching,
otherwise the still-running tasks risk running into them and dying of
SIGTRAP.
diff --git a/breakpoints.c b/breakpoints.c
index 4e74923..1eff8b0 100644
--- a/breakpoints.c
+++ b/breakpoints.c
@@ -183,7 +183,7 @@
free(sbp);
}
-void
+int
breakpoints_init(Process *proc, int enable)
{
struct library_symbol *sym;
@@ -215,6 +215,11 @@
if (options.libcalls && proc->filename) {
proc->list_of_symbols = read_elf(proc);
+ if (proc->list_of_symbols == NULL) {
+ /* XXX leak breakpoints */
+ return -1;
+ }
+
if (opt_e) {
struct library_symbol **tmp1 = &proc->list_of_symbols;
while (*tmp1) {
@@ -242,6 +247,7 @@
proc->callstack_depth = 0;
proc->breakpoints_enabled = -1;
+ return 0;
}
void
diff --git a/common.h b/common.h
index f40d405..1af2ac6 100644
--- a/common.h
+++ b/common.h
@@ -295,7 +295,7 @@
extern pid_t execute_program(const char * command, char ** argv);
extern int display_arg(enum tof type, Process * proc, int arg_num, arg_type_info * info);
extern Breakpoint * address2bpstruct(Process * proc, void * addr);
-extern void breakpoints_init(Process * proc, int enable);
+extern int breakpoints_init(Process * proc, int enable);
extern void insert_breakpoint(Process * proc, void * addr,
struct library_symbol * libsym, int enable);
extern void delete_breakpoint(Process * proc, void * addr);
@@ -308,7 +308,7 @@
extern void show_summary(void);
extern arg_type_info * lookup_prototype(enum arg_type at);
-extern void do_init_elf(struct ltelf *lte, const char *filename);
+extern int do_init_elf(struct ltelf *lte, const char *filename);
extern void do_close_elf(struct ltelf *lte);
extern int in_load_libraries(const char *name, struct ltelf *lte, size_t count, GElf_Sym *sym);
extern struct library_symbol *library_symbols;
diff --git a/ltrace-elf.c b/ltrace-elf.c
index 841fac7..21350da 100644
--- a/ltrace-elf.c
+++ b/ltrace-elf.c
@@ -14,7 +14,6 @@
#include "common.h"
-void do_init_elf(struct ltelf *lte, const char *filename);
void do_close_elf(struct ltelf *lte);
void add_library_symbol(GElf_Addr addr, const char *name,
struct library_symbol **library_symbolspp,
@@ -136,7 +135,7 @@
return 0;
}
-void
+int
do_init_elf(struct ltelf *lte, const char *filename) {
int i;
GElf_Addr relplt_addr = 0;
@@ -147,7 +146,7 @@
lte->fd = open(filename, O_RDONLY);
if (lte->fd == -1)
- error(EXIT_FAILURE, errno, "Can't open \"%s\"", filename);
+ return 1;
#ifdef HAVE_ELF_C_READ_MMAP
lte->elf = elf_begin(lte->fd, ELF_C_READ_MMAP, NULL);
@@ -454,6 +453,7 @@
debug(1, "%s %zd PLT relocations", filename, lte->relplt_count);
}
+ return 0;
}
void
@@ -623,7 +623,8 @@
elf_version(EV_CURRENT);
- do_init_elf(lte, proc->filename);
+ if (do_init_elf(lte, proc->filename))
+ return NULL;
memcpy(&main_lte, lte, sizeof(struct ltelf));
@@ -635,7 +636,9 @@
proc->e_machine = lte->ehdr.e_machine;
for (i = 0; i < library_num; ++i) {
- do_init_elf(<e[i + 1], library[i]);
+ if (do_init_elf(<e[i + 1], library[i]))
+ error(EXIT_FAILURE, errno, "Can't open \"%s\"",
+ proc->filename);
}
if (!options.no_plt) {
diff --git a/proc.c b/proc.c
index cd7b354..0425e09 100644
--- a/proc.c
+++ b/proc.c
@@ -11,6 +11,7 @@
#include <errno.h>
#include <stdlib.h>
#include <assert.h>
+#include <error.h>
#include "common.h"
@@ -23,6 +24,7 @@
perror("malloc");
exit(1);
}
+
proc->filename = strdup(filename);
proc->breakpoints_enabled = -1;
proc->pid = pid;
@@ -32,10 +34,18 @@
#endif /* defined(HAVE_LIBUNWIND) */
add_process(proc);
- assert(proc->leader != NULL);
+ if (proc->leader == NULL) {
+ free(proc);
+ return NULL;
+ }
if (proc->leader == proc)
- breakpoints_init(proc, enable);
+ if (breakpoints_init(proc, enable)) {
+ fprintf(stderr, "failed to init breakpoints %d\n",
+ proc->pid);
+ remove_process(proc);
+ return NULL;
+ }
return proc;
}
@@ -47,19 +57,28 @@
char *filename;
debug(DEBUG_PROCESS, "open_one_pid(pid=%d)", pid);
- if (trace_pid(pid) < 0)
- return 0;
+ /* Get the filename first. Should the trace_pid fail, we can
+ * easily free it, untracing is more work. */
+ if ((filename = pid2name(pid)) == NULL
+ || trace_pid(pid) < 0) {
+ free(filename);
+ return -1;
+ }
- filename = pid2name(pid);
- if (filename == NULL)
- return 0;
-
- proc = open_program(filename, pid, 1);
+ proc = open_program(filename, pid, 0);
+ if (proc == NULL)
+ return -1;
trace_set_options(proc, pid);
- continue_process(pid);
- proc->breakpoints_enabled = 1;
- return 1;
+ return 0;
+}
+
+enum pcb_status
+start_one_pid(Process * proc, void * data)
+{
+ continue_process(proc->pid);
+ proc->breakpoints_enabled = 1;
+ return pcb_cont;
}
void
@@ -72,7 +91,7 @@
return;
/* First, see if we can attach the requested PID itself. */
- if (!open_one_pid(pid)) {
+ if (open_one_pid(pid)) {
fprintf(stderr, "Cannot attach to pid %u: %s\n",
pid, strerror(errno));
return;
@@ -87,26 +106,40 @@
* reach a point where process_tasks doesn't give any new
* processes (because there's nobody left to produce
* them). */
+ size_t old_ntasks = 0;
int have_all;
- do {
+ while (1) {
pid_t *tasks;
size_t ntasks;
size_t i;
+
if (process_tasks(pid, &tasks, &ntasks) < 0) {
fprintf(stderr, "Cannot obtain tasks of pid %u: %s\n",
pid, strerror(errno));
- return;
+ goto start;
}
have_all = 1;
for (i = 0; i < ntasks; ++i)
if (pid2proc(tasks[i]) == NULL
- && !open_one_pid(tasks[i]))
+ && open_one_pid(tasks[i]))
have_all = 0;
free(tasks);
- } while (have_all == 0);
+ if (have_all && old_ntasks == ntasks)
+ break;
+ old_ntasks = ntasks;
+ }
+
+ /* Done. Now initialize breakpoints and then continue
+ * everyone. */
+ Process * leader;
+start:
+ leader = pid2proc(pid)->leader;
+ enable_all_breakpoints(leader);
+
+ each_task(pid2proc(pid)->leader, start_one_pid, NULL);
}
static enum pcb_status
@@ -163,13 +196,16 @@
Process ** leaderp = &list_of_processes;
if (proc->pid) {
pid_t tgid = process_leader(proc->pid);
+ if (tgid == 0)
+ /* Must have been terminated before we managed
+ * to fully attach. */
+ return;
if (tgid == proc->pid)
proc->leader = proc;
else {
Process * leader = pid2proc(tgid);
proc->leader = leader;
if (leader != NULL)
- // NULL: sub-task added before leader?
leaderp = &leader->next;
}
}
diff --git a/sysdeps/linux-gnu/proc.c b/sysdeps/linux-gnu/proc.c
index a3fb3af..e3b71e5 100644
--- a/sysdeps/linux-gnu/proc.c
+++ b/sysdeps/linux-gnu/proc.c
@@ -102,7 +102,7 @@
pid_t
process_leader(pid_t pid)
{
- pid_t tgid = pid;
+ pid_t tgid = 0;
FILE * file = open_status_file(pid);
if (file != NULL) {
each_line_starting(file, "Tgid:\t", &process_leader_cb, &tgid);
diff --git a/sysdeps/linux-gnu/trace.c b/sysdeps/linux-gnu/trace.c
index c934d49..dbe4eec 100644
--- a/sysdeps/linux-gnu/trace.c
+++ b/sysdeps/linux-gnu/trace.c
@@ -148,7 +148,6 @@
continue_process(pid_t pid)
{
debug(DEBUG_PROCESS, "continue_process: pid=%d", pid);
- //printf("continue_process %d\n", pid);
/* Only really continue the process if there are no events in
the queue for this process. Otherwise just for the other