Fix handling of test/threaded_execve.c testcase

Since 3.0, Linux has a way to identify which thread execve'ed.
This patch makes use of it in order to properly dispose
of disappeared ("superseded") thread leader,
and replace it with execve'ed thread.

Before this patch, strace was "leaking" thread which exec'ed.
It was thinking that it still runs. It would look like this:

18460 pause( <unfinished ...>     <=== thread leader
18466 execve("/proc/self/exe", ["exe", "exe"], [/* 47 vars */] <unfinished ...>
18465 +++ exited with 0 +++       <=== exits from other threads
18460 <... pause resumed> )             = 0

The last line is wrong: it's not pause resumed, it's execve resumed.
If thread leader would do exit instead of pause, it is much worse:
strace panics because it thinks it sees return from exit syscall!

And strace isn't aware 18466 (exec'ed thread) is gone.
It still thinks it's executes execve syscall.

* strace.c: New variable "static char *os_release".
(get_os_release): New static function.
(main): Call get_os_release to retrieve Linux version.
(trace): If we see PTRACE_EVENT_EXEC, retrieve old pid, and if it
differs from new one, free one of tcbs and print correct messages.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
diff --git a/strace.c b/strace.c
index 782fe8f..7cbcb0c 100644
--- a/strace.c
+++ b/strace.c
@@ -45,6 +45,7 @@
 #include <grp.h>
 #include <string.h>
 #include <dirent.h>
+#include <sys/utsname.h>
 
 #ifdef LINUX
 # include <asm/unistd.h>
@@ -129,6 +130,8 @@
 static unsigned int nprocs, tcbtabsize;
 static const char *progname;
 
+static char *os_release; /* from uname() */
+
 static int detach(struct tcb *tcp);
 static int trace(void);
 static void cleanup(void);
@@ -965,6 +968,18 @@
 }
 #endif
 
+/* Noinline: don't want main to have struct utsname permanently on stack */
+static void __attribute__ ((noinline))
+get_os_release(void)
+{
+	struct utsname u;
+	if (uname(&u) < 0)
+		perror_msg_and_die("uname");
+	os_release = strdup(u.release);
+	if (!os_release)
+		die_out_of_memory();
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -977,6 +992,8 @@
 
 	strace_tracer_pid = getpid();
 
+	get_os_release();
+
 	/* Allocate the initial tcbtab.  */
 	tcbtabsize = argc;	/* Surely enough for all -p args.  */
 	tcbtab = calloc(tcbtabsize, sizeof(tcbtab[0]));
@@ -2324,10 +2341,6 @@
 static int
 trace()
 {
-	int pid;
-	int wait_errno;
-	int status, sig;
-	struct tcb *tcp;
 #ifdef LINUX
 	struct rusage ru;
 	struct rusage *rup = cflag ? &ru : NULL;
@@ -2337,6 +2350,12 @@
 #endif /* LINUX */
 
 	while (nprocs != 0) {
+		int pid;
+		int wait_errno;
+		int status, sig;
+		struct tcb *tcp;
+		unsigned event;
+
 		if (interrupted)
 			return 0;
 		if (interactive)
@@ -2390,11 +2409,12 @@
 				popen_pid = 0;
 			continue;
 		}
+
+		event = ((unsigned)status >> 16);
 		if (debug) {
 			char buf[sizeof("WIFEXITED,exitcode=%u") + sizeof(int)*3 /*paranoia:*/ + 16];
 #ifdef LINUX
-			unsigned ev = (unsigned)status >> 16;
-			if (ev) {
+			if (event != 0) {
 				static const char *const event_names[] = {
 					[PTRACE_EVENT_CLONE] = "CLONE",
 					[PTRACE_EVENT_FORK]  = "FORK",
@@ -2404,10 +2424,10 @@
 					[PTRACE_EVENT_EXIT]  = "EXIT",
 				};
 				const char *e;
-				if (ev < ARRAY_SIZE(event_names))
-					e = event_names[ev];
+				if (event < ARRAY_SIZE(event_names))
+					e = event_names[event];
 				else {
-					sprintf(buf, "?? (%u)", ev);
+					sprintf(buf, "?? (%u)", event);
 					e = buf;
 				}
 				fprintf(stderr, " PTRACE_EVENT_%s", e);
@@ -2434,8 +2454,62 @@
 			fprintf(stderr, " [wait(0x%04x) = %u] %s\n", status, pid, buf);
 		}
 
-		/* Look up `pid' in our table. */
+		/* Look up 'pid' in our table. */
 		tcp = pid2tcb(pid);
+
+#ifdef LINUX
+		/* Under Linux, execve changes pid to thread leader's pid,
+		 * and we see this changed pid on EVENT_EXEC and later,
+		 * execve sysexit. Leader "disappears" without exit
+		 * notification. Let user know that, drop leader's tcb,
+		 * and fix up pid in execve thread's tcb.
+		 * Effectively, execve thread's tcb replaces leader's tcb.
+		 *
+		 * BTW, leader is 'stuck undead' (doesn't report WIFEXITED
+		 * on exit syscall) in multithreaded programs exactly
+		 * in order to handle this case.
+		 *
+		 * PTRACE_GETEVENTMSG returns old pid starting from Linux 3.0.
+		 * On 2.6 and earlier, it can return garbage.
+		 */
+		if (event == PTRACE_EVENT_EXEC && os_release[0] >= '3') {
+			long old_pid = 0;
+			if (ptrace(PTRACE_GETEVENTMSG, pid, NULL, (long) &old_pid) >= 0
+			 && old_pid > 0
+			 && old_pid != pid
+			) {
+				struct tcb *execve_thread = pid2tcb(old_pid);
+				if (tcp) {
+					outf = tcp->outf;
+					curcol = tcp->curcol;
+					if (!cflag) {
+						if ((tcp->flags & (TCB_INSYSCALL|TCB_REPRINT)) == TCB_INSYSCALL) {
+							/* We printed "syscall(some params"
+							 * but didn't print "\n" yet.
+							 */
+							tprints(" <unfinished ...>\n");
+						}
+						printleader(tcp);
+						tprintf("+++ superseded by execve in pid %lu +++", old_pid);
+						printtrailer();
+						fflush(outf);
+					}
+					if (execve_thread) {
+						/* swap output FILEs (needed for -ff) */
+						tcp->outf = execve_thread->outf;
+						execve_thread->outf = outf;
+					}
+					droptcb(tcp);
+				}
+				tcp = execve_thread;
+				if (tcp) {
+					tcp->pid = pid;
+					tcp->flags |= TCB_REPRINT;
+				}
+			}
+		}
+#endif
+
 		if (tcp == NULL) {
 #ifdef LINUX
 			if (followfork) {
@@ -2549,7 +2623,7 @@
 #endif
 		}
 
-		if (((unsigned)status >> 16) != 0) {
+		if (event != 0) {
 			/* Ptrace event (we ignore all of them for now) */
 			goto restart_tracee_with_sig_0;
 		}