io_u timeout handling

Further measures to prevent fio getting stuck, even in case of
engine timeout errors.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/fio.c b/fio.c
index 0ccd5d8..fad4cca 100644
--- a/fio.c
+++ b/fio.c
@@ -567,6 +567,8 @@
 		list_add(&io_u->list, &td->io_u_freelist);
 	}
 
+	io_u_init_timeout();
+
 	return 0;
 }
 
@@ -715,6 +717,7 @@
 	}
 
 	fio_gettime(&td->epoch, NULL);
+	memcpy(&td->timeout_end, &td->epoch, sizeof(td->epoch));
 	getrusage(RUSAGE_SELF, &td->ts.ru_start);
 
 	runtime[0] = runtime[1] = 0;
@@ -833,9 +836,14 @@
 			 * check if someone quit or got killed in an unusual way
 			 */
 			ret = waitpid(td->pid, &status, WNOHANG);
-			if (ret < 0)
+			if (ret < 0) {
+				if (errno == ECHILD) {
+					log_err("fio: pid=%d disappeared\n", td->pid);
+					td_set_runstate(td, TD_REAPED);
+					goto reaped;
+				}
 				perror("waitpid");
-			else if ((ret == td->pid) && WIFSIGNALED(status)) {
+			} else if ((ret == td->pid) && WIFSIGNALED(status)) {
 				int sig = WTERMSIG(status);
 
 				log_err("fio: pid=%d, got signal=%d\n", td->pid, sig);
@@ -865,9 +873,14 @@
 			int status;
 
 			ret = waitpid(td->pid, &status, 0);
-			if (ret < 0)
+			if (ret < 0) {
+				if (errno == ECHILD) {
+					log_err("fio: pid=%d disappeared\n", td->pid);
+					td_set_runstate(td, TD_REAPED);
+					goto reaped;
+				}
 				perror("waitpid");
-			else if (WIFEXITED(status) && WEXITSTATUS(status)) {
+			} else if (WIFEXITED(status) && WEXITSTATUS(status)) {
 				if (!exit_value)
 					exit_value++;
 			}