Improve thread reap handling

It's a bit of a mess currently, streamline and clean it up.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/fio.c b/fio.c
index fad4cca..0f72fd9 100644
--- a/fio.c
+++ b/fio.c
@@ -824,6 +824,8 @@
 	 */
 	pending = cputhreads = 0;
 	for_each_td(td, i) {
+		int flags;
+
 		/*
 		 * ->io_ops is NULL for a thread that has closed its
 		 * io engine
@@ -831,65 +833,57 @@
 		if (td->io_ops && td->io_ops->flags & FIO_CPUIO)
 			cputhreads++;
 
-		if (td->runstate < TD_EXITED) {
-			/*
-			 * check if someone quit or got killed in an unusual way
-			 */
-			ret = waitpid(td->pid, &status, WNOHANG);
-			if (ret < 0) {
-				if (errno == ECHILD) {
-					log_err("fio: pid=%d disappeared\n", td->pid);
-					td_set_runstate(td, TD_REAPED);
-					goto reaped;
-				}
-				perror("waitpid");
-			} else if ((ret == td->pid) && WIFSIGNALED(status)) {
+		if (!td->pid || td->runstate == TD_REAPED)
+			continue;
+
+		flags = WNOHANG;
+		if (td->runstate == TD_EXITED)
+			flags = 0;
+
+		/*
+		 * check if someone quit or got killed in an unusual way
+		 */
+		ret = waitpid(td->pid, &status, flags);
+		if (ret < 0) {
+			if (errno == ECHILD) {
+				log_err("fio: pid=%d disappeared %d\n", td->pid, td->runstate);
+				td_set_runstate(td, TD_REAPED);
+				goto reaped;
+			}
+			perror("waitpid");
+		} else if (ret == td->pid) {
+			if (WIFSIGNALED(status)) {
 				int sig = WTERMSIG(status);
 
 				log_err("fio: pid=%d, got signal=%d\n", td->pid, sig);
 				td_set_runstate(td, TD_REAPED);
 				goto reaped;
 			}
-		}
+			if (WIFEXITED(status)) {
+				if (WEXITSTATUS(status) && !td->error)
+					td->error = WEXITSTATUS(status);
+				if (td->use_thread) {
+					long ret;
 
-		if (td->runstate != TD_EXITED) {
-			if (td->runstate < TD_RUNNING)
-				pending++;
-
-			continue;
-		}
-
-		if (td->error)
-			exit_value++;
-
-		td_set_runstate(td, TD_REAPED);
-
-		if (td->use_thread) {
-			long ret;
-
-			if (pthread_join(td->thread, (void *) &ret))
-				perror("thread_join");
-		} else {
-			int status;
-
-			ret = waitpid(td->pid, &status, 0);
-			if (ret < 0) {
-				if (errno == ECHILD) {
-					log_err("fio: pid=%d disappeared\n", td->pid);
-					td_set_runstate(td, TD_REAPED);
-					goto reaped;
+					if (pthread_join(td->thread, (void *) &ret))
+						perror("pthread_join");
 				}
-				perror("waitpid");
-			} else if (WIFEXITED(status) && WEXITSTATUS(status)) {
-				if (!exit_value)
-					exit_value++;
+				td_set_runstate(td, TD_REAPED);
+				goto reaped;
 			}
 		}
 
+		/*
+		 * thread is not dead, continue
+		 */
+		continue;
 reaped:
 		(*nr_running)--;
 		(*m_rate) -= td->ratemin;
 		(*t_rate) -= td->rate;
+
+		if (td->error)
+			exit_value++;
 	}
 
 	if (*nr_running == cputhreads && !pending)
@@ -945,6 +939,8 @@
 		init_disk_util(td);
 	}
 
+	set_genesis_time();
+
 	while (todo) {
 		struct thread_data *map[MAX_JOBS];
 		struct timeval this_start;
@@ -1103,6 +1099,8 @@
 		setup_log(&agg_io_log[DDIR_WRITE]);
 	}
 
+	set_genesis_time();
+
 	disk_util_timer_arm();
 
 	run_threads();