Expand continue_on_error to select which type of error to allow This expands the continue_on_error option to take a string specifying what type of error to continue on, breaking out errors into read, write, and verify. (Sync, trim, and anything else not specifically a read are considered write operations for the sake of error continuation.) Backwards compatibility is retained by allowing =0 and =1 values to specify none and all, respectively. Signed-off-by: Jens Axboe <axboe@kernel.dk>

commit: 068420271828b3b2426ffc3ccf64404cb9d340fb [log] [tgz]
author: Steven Lang <tirea@google.com> Thu Nov 17 09:45:17 2011 +0100
committer: Jens Axboe <axboe@kernel.dk> Thu Nov 17 09:45:17 2011 +0100
tree: 22c1bb750e9a86abcc0967e5d7884bbf62ad306a
parent: 184b4098cccb8392eb8ecdd23cdc6597b540df36 [diff]
diff --git a/HOWTO b/HOWTO
index 2403a5c..ac7e729 100644
--- a/HOWTO
+++ b/HOWTO

@@ -1170,7 +1170,7 @@
 		uses. Fio will manually clear it from the CPU mask of other
 		jobs.
 
-continue_on_error=bool	Normally fio will exit the job on the first observed
+continue_on_error=str	Normally fio will exit the job on the first observed
 		failure. If this option is set, fio will continue the job when
 		there is a 'non-fatal error' (EIO or EILSEQ) until the runtime
 		is exceeded or the I/O size specified is completed. If this
@@ -1179,6 +1179,24 @@
 		given in the stats is the first error that was hit during the
 		run.
 
+		The allowed values are:
+
+			none	Exit on any IO or verify errors.
+
+			read	Continue on read errors, exit on all others.
+
+			write	Continue on write errors, exit on all others.
+
+			io	Continue on any IO error, exit on all others.
+
+			verify	Continue on verify errors, exit on all others.
+
+			all	Continue on all errors.
+
+			0		Backward-compatible alias for 'none'.
+
+			1		Backward-compatible alias for 'all'.
+
 cgroup=str	Add job to this control group. If it doesn't exist, it will
 		be created. The system must have a mounted cgroup blkio
 		mount point for this to work. If your system doesn't have it

diff --git a/fio.c b/fio.c
index 5b58ab8..8702086 100644
--- a/fio.c
+++ b/fio.c

@@ -452,21 +452,22 @@
 		__update_tv_cache(td);
 }
 
-static int break_on_this_error(struct thread_data *td, int *retptr)
+static int break_on_this_error(struct thread_data *td, enum fio_ddir ddir,
+			       int *retptr)
 {
 	int ret = *retptr;
 
 	if (ret < 0 || td->error) {
 		int err;
 
-		if (!td->o.continue_on_error)
-			return 1;
-
 		if (ret < 0)
 			err = -ret;
 		else
 			err = td->error;
 
+		if (!(td->o.continue_on_error & td_error_type(ddir, err)))
+			return 1;
+
 		if (td_non_fatal_error(err)) {
 		        /*
 		         * Continue with the I/Os in case of
@@ -612,7 +613,7 @@
 			break;
 		}
 
-		if (break_on_this_error(td, &ret))
+		if (break_on_this_error(td, io_u->ddir, &ret))
 			break;
 
 		/*
@@ -678,6 +679,7 @@
 		int min_evts = 0;
 		struct io_u *io_u;
 		int ret2, full;
+		enum fio_ddir ddir;
 
 		if (td->terminate)
 			break;
@@ -696,6 +698,8 @@
 		if (!io_u)
 			break;
 
+		ddir = io_u->ddir;
+
 		/*
 		 * Add verification end_io handler, if asked to verify
 		 * a previously written file.
@@ -774,7 +778,7 @@
 			break;
 		}
 
-		if (break_on_this_error(td, &ret))
+		if (break_on_this_error(td, ddir, &ret))
 			break;
 
 		/*

diff --git a/fio.h b/fio.h
index cc1f65f..4733990 100644
--- a/fio.h
+++ b/fio.h

@@ -65,6 +65,17 @@
 	RW_SEQ_IDENT,
 };
 
+/*
+ * What type of errors to continue on when continue_on_error is used
+ */
+enum error_type {
+        ERROR_TYPE_NONE = 0,
+        ERROR_TYPE_READ = 1 << 0,
+        ERROR_TYPE_WRITE = 1 << 1,
+        ERROR_TYPE_VERIFY = 1 << 2,
+        ERROR_TYPE_ANY = 0xffff,
+};
+
 struct bssplit {
 	unsigned int bs;
 	unsigned char perc;
@@ -227,7 +238,7 @@
 	/*
 	 * I/O Error handling
 	 */
-	unsigned int continue_on_error;
+	enum error_type continue_on_error;
 
 	/*
 	 * Benchmark profile type
@@ -520,6 +531,15 @@
 
 #define td_non_fatal_error(e)	((e) == EIO || (e) == EILSEQ)
 
+static inline enum error_type td_error_type(enum fio_ddir ddir, int err)
+{
+	if (err == EILSEQ)
+		return ERROR_TYPE_VERIFY;
+	if (ddir == DDIR_READ)
+		return ERROR_TYPE_READ;
+	return ERROR_TYPE_WRITE;
+}
+
 static inline void update_error_count(struct thread_data *td, int err)
 {
 	td->total_err_count++;

diff --git a/io_u.c b/io_u.c
index 0ff66f9..1aa418c 100644
--- a/io_u.c
+++ b/io_u.c

@@ -1389,8 +1389,8 @@
 		icd->error = io_u->error;
 		io_u_log_error(td, io_u);
 	}
-	if (td->o.continue_on_error && icd->error &&
-	    td_non_fatal_error(icd->error)) {
+	if (icd->error && td_non_fatal_error(icd->error) &&
+           (td->o.continue_on_error & td_error_type(io_u->ddir, icd->error))) {
 		/*
 		 * If there is a non_fatal error, then add to the error count
 		 * and clear all the errors.

diff --git a/options.c b/options.c
index 53c3a82..2e1e709 100644
--- a/options.c
+++ b/options.c

@@ -2057,10 +2057,44 @@
 	},
 	{
 		.name	= "continue_on_error",
-		.type	= FIO_OPT_BOOL,
+		.type	= FIO_OPT_STR,
 		.off1	= td_var_offset(continue_on_error),
 		.help	= "Continue on non-fatal errors during IO",
-		.def	= "0",
+		.def	= "none",
+		.posval = {
+			  { .ival = "none",
+			    .oval = ERROR_TYPE_NONE,
+			    .help = "Exit when an error is encountered",
+			  },
+			  { .ival = "read",
+			    .oval = ERROR_TYPE_READ,
+			    .help = "Continue on read errors only",
+			  },
+			  { .ival = "write",
+			    .oval = ERROR_TYPE_WRITE,
+			    .help = "Continue on write errors only",
+			  },
+			  { .ival = "io",
+			    .oval = ERROR_TYPE_READ | ERROR_TYPE_WRITE,
+			    .help = "Continue on any IO errors",
+			  },
+			  { .ival = "verify",
+			    .oval = ERROR_TYPE_VERIFY,
+			    .help = "Continue on verify errors only",
+			  },
+			  { .ival = "all",
+			    .oval = ERROR_TYPE_ANY,
+			    .help = "Continue on all io and verify errors",
+			  },
+			  { .ival = "0",
+			    .oval = ERROR_TYPE_NONE,
+			    .help = "Alias for 'none'",
+			  },
+			  { .ival = "1",
+			    .oval = ERROR_TYPE_ANY,
+			    .help = "Alias for 'all'",
+			  },
+		},
 	},
 	{
 		.name	= "profile",

diff --git a/verify.c b/verify.c
index 5a94281..91a9077 100644
--- a/verify.c
+++ b/verify.c

@@ -1033,7 +1033,7 @@
 			put_io_u(td, io_u);
 			if (!ret)
 				continue;
-			if (td->o.continue_on_error &&
+			if (td->o.continue_on_error & ERROR_TYPE_VERIFY &&
 			    td_non_fatal_error(ret)) {
 				update_error_count(td, ret);
 				td_clear_error(td);
commit	068420271828b3b2426ffc3ccf64404cb9d340fb	[log] [tgz]
author	Steven Lang <tirea@google.com>	Thu Nov 17 09:45:17 2011 +0100
committer	Jens Axboe <axboe@kernel.dk>	Thu Nov 17 09:45:17 2011 +0100
tree	22c1bb750e9a86abcc0967e5d7884bbf62ad306a
parent	184b4098cccb8392eb8ecdd23cdc6597b540df36 [diff]