Allow threads 60 seconds to exit before being forceful

Right now the fio status thread can sit forever waiting for jobs
to exit, when ctrl-c (or another signal) is sent. Be a bit more
brutal and force quit jobs if they haven't exited on their own
in 60 seconds. That should be long enough to ensure that they
are stuck in some way.

Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/backend.c b/backend.c
index 68540ab..d9ece4c 100644
--- a/backend.c
+++ b/backend.c
@@ -1603,6 +1603,13 @@
 	return (int) (uintptr_t) ret;
 }
 
+static void dump_td_info(struct thread_data *td)
+{
+	log_err("fio: job '%s' hasn't exited in %lu seconds, it appears to "
+		"be stuck. Doing forceful exit of this job.\n", td->o.name,
+			(unsigned long) time_since_now(&td->terminate_time));
+}
+
 /*
  * Run over the job map and reap the threads that have exited, if any.
  */
@@ -1681,6 +1688,17 @@
 		}
 
 		/*
+		 * If the job is stuck, do a forceful timeout of it and
+		 * move on.
+		 */
+		if (td->terminate &&
+		    time_since_now(&td->terminate_time) >= FIO_REAP_TIMEOUT) {
+			dump_td_info(td);
+			td_set_runstate(td, TD_REAPED);
+			goto reaped;
+		}
+
+		/*
 		 * thread is not dead, continue
 		 */
 		pending++;
diff --git a/fio.h b/fio.h
index df0d020..199610c 100644
--- a/fio.h
+++ b/fio.h
@@ -254,6 +254,7 @@
 	struct timeval epoch;	/* time job was started */
 	struct timeval last_issue;
 	struct timeval tv_cache;
+	struct timeval terminate_time;
 	unsigned int tv_cache_nr;
 	unsigned int tv_cache_mask;
 	unsigned int ramp_time_over;
@@ -486,6 +487,12 @@
 extern int td_bump_runstate(struct thread_data *, int);
 extern void td_restore_runstate(struct thread_data *, int);
 
+/*
+ * Allow 60 seconds for a job to quit on its own, otherwise reap with
+ * a vengeance.
+ */
+#define FIO_REAP_TIMEOUT	60
+
 #define TERMINATE_ALL		(-1)
 extern void fio_terminate_threads(int);
 
diff --git a/libfio.c b/libfio.c
index 8af1129..9245688 100644
--- a/libfio.c
+++ b/libfio.c
@@ -199,8 +199,13 @@
 		if (group_id == TERMINATE_ALL || groupid == td->groupid) {
 			dprint(FD_PROCESS, "setting terminate on %s/%d\n",
 						td->o.name, (int) td->pid);
+
+			if (td->terminate)
+				continue;
+
 			td->terminate = 1;
 			td->o.start_delay = 0;
+			fio_gettime(&td->terminate_time, NULL);
 
 			/*
 			 * if the thread is running, just let it exit