Expand continue_on_error to select which type of error to allow
This expands the continue_on_error option to take a string specifying
what type of error to continue on, breaking out errors into read,
write, and verify. (Sync, trim, and anything else not specifically a
read are considered write operations for the sake of error
continuation.)
Backwards compatibility is retained by allowing =0 and =1 values to
specify none and all, respectively.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/HOWTO b/HOWTO
index 2403a5c..ac7e729 100644
--- a/HOWTO
+++ b/HOWTO
@@ -1170,7 +1170,7 @@
uses. Fio will manually clear it from the CPU mask of other
jobs.
-continue_on_error=bool Normally fio will exit the job on the first observed
+continue_on_error=str Normally fio will exit the job on the first observed
failure. If this option is set, fio will continue the job when
there is a 'non-fatal error' (EIO or EILSEQ) until the runtime
is exceeded or the I/O size specified is completed. If this
@@ -1179,6 +1179,24 @@
given in the stats is the first error that was hit during the
run.
+ The allowed values are:
+
+ none Exit on any IO or verify errors.
+
+ read Continue on read errors, exit on all others.
+
+ write Continue on write errors, exit on all others.
+
+ io Continue on any IO error, exit on all others.
+
+ verify Continue on verify errors, exit on all others.
+
+ all Continue on all errors.
+
+ 0 Backward-compatible alias for 'none'.
+
+ 1 Backward-compatible alias for 'all'.
+
cgroup=str Add job to this control group. If it doesn't exist, it will
be created. The system must have a mounted cgroup blkio
mount point for this to work. If your system doesn't have it
diff --git a/fio.c b/fio.c
index 5b58ab8..8702086 100644
--- a/fio.c
+++ b/fio.c
@@ -452,21 +452,22 @@
__update_tv_cache(td);
}
-static int break_on_this_error(struct thread_data *td, int *retptr)
+static int break_on_this_error(struct thread_data *td, enum fio_ddir ddir,
+ int *retptr)
{
int ret = *retptr;
if (ret < 0 || td->error) {
int err;
- if (!td->o.continue_on_error)
- return 1;
-
if (ret < 0)
err = -ret;
else
err = td->error;
+ if (!(td->o.continue_on_error & td_error_type(ddir, err)))
+ return 1;
+
if (td_non_fatal_error(err)) {
/*
* Continue with the I/Os in case of
@@ -612,7 +613,7 @@
break;
}
- if (break_on_this_error(td, &ret))
+ if (break_on_this_error(td, io_u->ddir, &ret))
break;
/*
@@ -678,6 +679,7 @@
int min_evts = 0;
struct io_u *io_u;
int ret2, full;
+ enum fio_ddir ddir;
if (td->terminate)
break;
@@ -696,6 +698,8 @@
if (!io_u)
break;
+ ddir = io_u->ddir;
+
/*
* Add verification end_io handler, if asked to verify
* a previously written file.
@@ -774,7 +778,7 @@
break;
}
- if (break_on_this_error(td, &ret))
+ if (break_on_this_error(td, ddir, &ret))
break;
/*
diff --git a/fio.h b/fio.h
index cc1f65f..4733990 100644
--- a/fio.h
+++ b/fio.h
@@ -65,6 +65,17 @@
RW_SEQ_IDENT,
};
+/*
+ * What type of errors to continue on when continue_on_error is used
+ */
+enum error_type {
+ ERROR_TYPE_NONE = 0,
+ ERROR_TYPE_READ = 1 << 0,
+ ERROR_TYPE_WRITE = 1 << 1,
+ ERROR_TYPE_VERIFY = 1 << 2,
+ ERROR_TYPE_ANY = 0xffff,
+};
+
struct bssplit {
unsigned int bs;
unsigned char perc;
@@ -227,7 +238,7 @@
/*
* I/O Error handling
*/
- unsigned int continue_on_error;
+ enum error_type continue_on_error;
/*
* Benchmark profile type
@@ -520,6 +531,15 @@
#define td_non_fatal_error(e) ((e) == EIO || (e) == EILSEQ)
+static inline enum error_type td_error_type(enum fio_ddir ddir, int err)
+{
+ if (err == EILSEQ)
+ return ERROR_TYPE_VERIFY;
+ if (ddir == DDIR_READ)
+ return ERROR_TYPE_READ;
+ return ERROR_TYPE_WRITE;
+}
+
static inline void update_error_count(struct thread_data *td, int err)
{
td->total_err_count++;
diff --git a/io_u.c b/io_u.c
index 0ff66f9..1aa418c 100644
--- a/io_u.c
+++ b/io_u.c
@@ -1389,8 +1389,8 @@
icd->error = io_u->error;
io_u_log_error(td, io_u);
}
- if (td->o.continue_on_error && icd->error &&
- td_non_fatal_error(icd->error)) {
+ if (icd->error && td_non_fatal_error(icd->error) &&
+ (td->o.continue_on_error & td_error_type(io_u->ddir, icd->error))) {
/*
* If there is a non_fatal error, then add to the error count
* and clear all the errors.
diff --git a/options.c b/options.c
index 53c3a82..2e1e709 100644
--- a/options.c
+++ b/options.c
@@ -2057,10 +2057,44 @@
},
{
.name = "continue_on_error",
- .type = FIO_OPT_BOOL,
+ .type = FIO_OPT_STR,
.off1 = td_var_offset(continue_on_error),
.help = "Continue on non-fatal errors during IO",
- .def = "0",
+ .def = "none",
+ .posval = {
+ { .ival = "none",
+ .oval = ERROR_TYPE_NONE,
+ .help = "Exit when an error is encountered",
+ },
+ { .ival = "read",
+ .oval = ERROR_TYPE_READ,
+ .help = "Continue on read errors only",
+ },
+ { .ival = "write",
+ .oval = ERROR_TYPE_WRITE,
+ .help = "Continue on write errors only",
+ },
+ { .ival = "io",
+ .oval = ERROR_TYPE_READ | ERROR_TYPE_WRITE,
+ .help = "Continue on any IO errors",
+ },
+ { .ival = "verify",
+ .oval = ERROR_TYPE_VERIFY,
+ .help = "Continue on verify errors only",
+ },
+ { .ival = "all",
+ .oval = ERROR_TYPE_ANY,
+ .help = "Continue on all io and verify errors",
+ },
+ { .ival = "0",
+ .oval = ERROR_TYPE_NONE,
+ .help = "Alias for 'none'",
+ },
+ { .ival = "1",
+ .oval = ERROR_TYPE_ANY,
+ .help = "Alias for 'all'",
+ },
+ },
},
{
.name = "profile",
diff --git a/verify.c b/verify.c
index 5a94281..91a9077 100644
--- a/verify.c
+++ b/verify.c
@@ -1033,7 +1033,7 @@
put_io_u(td, io_u);
if (!ret)
continue;
- if (td->o.continue_on_error &&
+ if (td->o.continue_on_error & ERROR_TYPE_VERIFY &&
td_non_fatal_error(ret)) {
update_error_count(td, ret);
td_clear_error(td);