Add a 'continue_on_error' option to fio

Add option to make fio continue on non-fatal errors.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/HOWTO b/HOWTO
index 536e370..0eab6e1 100644
--- a/HOWTO
+++ b/HOWTO
@@ -928,6 +928,14 @@
 		for doing these time calls will be excluded from other
 		uses. Fio will manually clear it from the CPU mask of other
 		jobs.
+continue_on_error=bool	Normally fio will exit the job on the first observed
+		failure. If this option is set, fio will continue the job when
+		there is a 'non-fatal error' (EIO or EILSEQ) until the runtime
+		is exceeded or the I/O size specified is completed. If this
+		option is used, there are two more stats that are appended,
+		the total error count and the first error. The error field
+		given in the stats is the first error that was hit during the
+		run.
 
 
 6.0 Interpreting the output
diff --git a/fio.1 b/fio.1
index b984a8c..637304e 100644
--- a/fio.1
+++ b/fio.1
@@ -680,6 +680,14 @@
 entering the kernel with a gettimeofday() call. The CPU set aside for doing
 these time calls will be excluded from other uses. Fio will manually clear it
 from the CPU mask of other jobs.
+.TP
+.BI continue_on_error \fR=\fPbool
+Normally fio will exit the job on the first observed failure. If this option is
+set, fio will continue the job when there is a 'non-fatal error'
+(\fBEIO\fR or \fBEILSEQ\fR) until the runtime is exceeded or the I/O size
+specified is completed. If this option is used, there are two more stats that
+are appended, the total error count and the first error. The error field given
+in the stats is the first error that was hit during the run.
 .SH OUTPUT
 While running, \fBfio\fR will display the status of the created jobs.  For
 example:
diff --git a/fio.c b/fio.c
index 632b002..e1da2c9 100644
--- a/fio.c
+++ b/fio.c
@@ -372,6 +372,43 @@
 		fio_gettime(&td->tv_cache, NULL);
 }
 
+static int break_on_this_error(struct thread_data *td, int *retptr)
+{
+	int ret = *retptr;
+
+	if (ret < 0 || td->error) {
+		int err;
+
+		if (!td->o.continue_on_error);
+			return 0;
+
+		if (ret < 0)
+			err = -ret;
+		else
+			err = td->error;
+
+		update_error_count(td, err);
+
+		if (td_non_fatal_error(err)) {
+		        /*
+		         * Continue with the I/Os in case of
+			 * a non fatal error.
+			 */
+			td_clear_error(td);
+			*retptr = 0;
+			return 0;
+		} else {
+			/*
+			 * Stop the I/O in case of a fatal
+			 * error.
+			 */
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * The main verify engine. Runs over the writes we previously submitted,
  * reads the blocks back in, and checks the crc/md5 of the data.
@@ -432,9 +469,10 @@
 		ret = td_io_queue(td, io_u);
 		switch (ret) {
 		case FIO_Q_COMPLETED:
-			if (io_u->error)
+			if (io_u->error) {
 				ret = -io_u->error;
-			else if (io_u->resid) {
+				clear_io_u(td, io_u);
+			} else if (io_u->resid) {
 				int bytes = io_u->xfer_buflen - io_u->resid;
 				struct fio_file *f = io_u->file;
 
@@ -478,7 +516,7 @@
 			break;
 		}
 
-		if (ret < 0 || td->error)
+		if (break_on_this_error(td, &ret))
 			break;
 
 		/*
@@ -569,9 +607,10 @@
 		ret = td_io_queue(td, io_u);
 		switch (ret) {
 		case FIO_Q_COMPLETED:
-			if (io_u->error)
+			if (io_u->error) {
 				ret = -io_u->error;
-			else if (io_u->resid) {
+				clear_io_u(td, io_u);
+			} else if (io_u->resid) {
 				int bytes = io_u->xfer_buflen - io_u->resid;
 				struct fio_file *f = io_u->file;
 
@@ -626,7 +665,7 @@
 			break;
 		}
 
-		if (ret < 0 || td->error)
+		if (break_on_this_error(td, &ret))
 			break;
 
 		/*
diff --git a/fio.h b/fio.h
index 21d49a6..477b19a 100644
--- a/fio.h
+++ b/fio.h
@@ -111,6 +111,13 @@
 	unsigned long long io_bytes[2];
 	unsigned long runtime[2];
 	unsigned long total_run_time;
+
+	/*
+	 * IO Error related stats
+	 */
+	unsigned continue_on_error;
+	unsigned long total_err_count;
+	int first_error;
 };
 
 struct bssplit {
@@ -241,6 +248,11 @@
 	 */
 	unsigned int cpuload;
 	unsigned int cpucycle;
+
+	/*
+	 * I/O Error handling
+	 */
+	unsigned int continue_on_error;
 };
 
 #define FIO_VERROR_SIZE	128
@@ -369,6 +381,12 @@
 	 * For generating file sizes
 	 */
 	os_random_state_t file_size_state;
+
+	/*
+	 * Error counts
+	 */
+	unsigned int total_err_count;
+	int first_error;
 };
 
 /*
@@ -386,10 +404,13 @@
 			break;						\
 		int e = (err);						\
 		(td)->error = e;					\
-		snprintf(td->verror, sizeof(td->verror) - 1, "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg));	\
+		if (!(td)->first_error)					\
+			snprintf(td->verror, sizeof(td->verror) - 1, "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg));		\
 	} while (0)
 
 
+#define td_clear_error(td)		\
+	(td)->error = 0;
 #define td_verror(td, err, func)	\
 	__td_verror((td), (err), strerror((err)), (func))
 #define td_vmsg(td, err, msg, func)	\
@@ -425,6 +446,15 @@
 
 #define MAX_JOBS	(1024)
 
+#define td_non_fatal_error(e)	((e) == -EIO || (e) == EILSEQ)
+
+static inline void update_error_count(struct thread_data *td, int err)
+{
+	td->total_err_count++;
+	if (td->total_err_count == 1)
+		td->first_error = err;
+}
+
 static inline int should_fsync(struct thread_data *td)
 {
 	if (td->last_was_sync)
diff --git a/io_u.c b/io_u.c
index 34ab58a..276f3b0 100644
--- a/io_u.c
+++ b/io_u.c
@@ -412,6 +412,12 @@
 	td->cur_depth--;
 }
 
+void clear_io_u(struct thread_data *td, struct io_u *io_u)
+{
+	io_u->flags &= ~IO_U_F_FLIGHT;
+	put_io_u(td, io_u);
+}
+
 void requeue_io_u(struct thread_data *td, struct io_u **io_u)
 {
 	struct io_u *__io_u = *io_u;
@@ -994,6 +1000,17 @@
 		icd->error = io_u->error;
 		io_u_log_error(td, io_u);
 	}
+	if (td->o.continue_on_error && icd->error &&
+	    td_non_fatal_error(icd->error)) {
+		/*
+		 * If there is a non_fatal error, then add to the error count
+		 * and clear all the errors.
+		 */
+		update_error_count(td, icd->error);
+		td_clear_error(td);
+		icd->error = 0;
+		io_u->error = 0;
+	}
 }
 
 static void init_icd(struct thread_data *td, struct io_completion_data *icd,
diff --git a/ioengine.h b/ioengine.h
index 9c0ed9a..6190977 100644
--- a/ioengine.h
+++ b/ioengine.h
@@ -139,6 +139,7 @@
 extern struct io_u *__get_io_u(struct thread_data *);
 extern struct io_u *get_io_u(struct thread_data *);
 extern void put_io_u(struct thread_data *, struct io_u *);
+extern void clear_io_u(struct thread_data *, struct io_u *);
 extern void requeue_io_u(struct thread_data *, struct io_u **);
 extern int __must_check io_u_sync_complete(struct thread_data *, struct io_u *, unsigned long *);
 extern int __must_check io_u_queued_complete(struct thread_data *, int, unsigned long *);
diff --git a/options.c b/options.c
index b65def9..9606ab2 100644
--- a/options.c
+++ b/options.c
@@ -1505,6 +1505,13 @@
 		.help	= "Setup dedicated gettimeofday() thread on this CPU",
 	},
 	{
+		.name	= "continue_on_error",
+		.type	= FIO_OPT_BOOL,
+		.off1	= td_var_offset(continue_on_error),
+		.help	= "Continue on non-fatal errors during I/O",
+		.def	= "0",
+	},
+	{
 		.name = NULL,
 	},
 };
diff --git a/stat.c b/stat.c
index 977796c..ec87deb 100644
--- a/stat.c
+++ b/stat.c
@@ -335,6 +335,10 @@
 	stat_calc_lat_u(ts, io_u_lat_u);
 	stat_calc_lat_m(ts, io_u_lat_m);
 	show_latencies(io_u_lat_u, io_u_lat_m);
+	if (ts->continue_on_error) {
+		log_info("     errors: total=%lu, first_error=%d\n",
+					ts->total_err_count, ts->first_error);
+	}
 }
 
 static void show_ddir_status_terse(struct thread_stat *ts,
@@ -410,6 +414,8 @@
 		log_info(";%3.2f%%", io_u_lat_u[i]);
 	for (i = 0; i < FIO_IO_U_LAT_M_NR; i++)
 		log_info(";%3.2f%%", io_u_lat_m[i]);
+	if (ts->continue_on_error)
+		log_info(";%lu;%d", ts->total_err_count, ts->first_error);
 	log_info("\n");
 
 	if (ts->description)
@@ -523,9 +529,18 @@
 			ts->pid = td->pid;
 		}
 
-		if (td->error && !ts->error) {
-			ts->error = td->error;
-			ts->verror = td->verror;
+		ts->continue_on_error = td->o.continue_on_error;
+		ts->total_err_count += td->total_err_count;
+		ts->first_error = td->first_error;
+		if (!ts->error) {
+			if (!td->error && td->o.continue_on_error &&
+			    td->first_error) {
+				ts->error = td->first_error;
+				ts->verror = td->verror;
+			} else  if (td->error) {
+				ts->error = td->error;
+				ts->verror = td->verror;
+			}
 		}
 
 		for (l = 0; l <= DDIR_WRITE; l++) {