io_u timeout handling
Further measures to prevent fio getting stuck, even in case of
engine timeout errors.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/fio.c b/fio.c
index 0ccd5d8..fad4cca 100644
--- a/fio.c
+++ b/fio.c
@@ -567,6 +567,8 @@
list_add(&io_u->list, &td->io_u_freelist);
}
+ io_u_init_timeout();
+
return 0;
}
@@ -715,6 +717,7 @@
}
fio_gettime(&td->epoch, NULL);
+ memcpy(&td->timeout_end, &td->epoch, sizeof(td->epoch));
getrusage(RUSAGE_SELF, &td->ts.ru_start);
runtime[0] = runtime[1] = 0;
@@ -833,9 +836,14 @@
* check if someone quit or got killed in an unusual way
*/
ret = waitpid(td->pid, &status, WNOHANG);
- if (ret < 0)
+ if (ret < 0) {
+ if (errno == ECHILD) {
+ log_err("fio: pid=%d disappeared\n", td->pid);
+ td_set_runstate(td, TD_REAPED);
+ goto reaped;
+ }
perror("waitpid");
- else if ((ret == td->pid) && WIFSIGNALED(status)) {
+ } else if ((ret == td->pid) && WIFSIGNALED(status)) {
int sig = WTERMSIG(status);
log_err("fio: pid=%d, got signal=%d\n", td->pid, sig);
@@ -865,9 +873,14 @@
int status;
ret = waitpid(td->pid, &status, 0);
- if (ret < 0)
+ if (ret < 0) {
+ if (errno == ECHILD) {
+ log_err("fio: pid=%d disappeared\n", td->pid);
+ td_set_runstate(td, TD_REAPED);
+ goto reaped;
+ }
perror("waitpid");
- else if (WIFEXITED(status) && WEXITSTATUS(status)) {
+ } else if (WIFEXITED(status) && WEXITSTATUS(status)) {
if (!exit_value)
exit_value++;
}