Make experimental_verify=1 handle all cases properly

- Don't track written bytes, just replay the workload by resetting
  all the random generators. This should work for any mixture of IO.

- Handle trims for verify.

- Ensure that rwmix is replayed properly for verify.

- Fixup logging for replay.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/backend.c b/backend.c
index 507faa9..902414e 100644
--- a/backend.c
+++ b/backend.c
@@ -393,12 +393,11 @@
  * The main verify engine. Runs over the writes we previously submitted,
  * reads the blocks back in, and checks the crc/md5 of the data.
  */
-static void do_verify(struct thread_data *td, uint64_t verify_bytes)
+static void do_verify(struct thread_data *td)
 {
 	struct fio_file *f;
 	struct io_u *io_u;
 	int ret, min_events;
-	uint64_t io_bytes;
 	unsigned int i;
 
 	dprint(FD_VERIFY, "starting loop\n");
@@ -422,7 +421,6 @@
 	td_set_runstate(td, TD_VERIFYING);
 
 	io_u = NULL;
-	io_bytes = 0;
 	while (!td->terminate) {
 		enum fio_ddir ddir;
 		int ret2, full;
@@ -455,11 +453,34 @@
 				break;
 			}
 		} else {
-			io_u = get_io_u(td);
-			if (!io_u)
-				break;
+			while ((io_u = get_io_u(td)) != NULL) {
+				/*
+				 * We are only interested in the places where
+				 * we wrote or trimmed IOs. Turn those into
+				 * reads for verification purposes.
+				 */
+				if (io_u->ddir == DDIR_READ) {
+					/*
+					 * Pretend we issued it for rwmix
+					 * accounting
+					 */
+					td->io_issues[DDIR_READ]++;
+					put_io_u(td, io_u);
+					continue;
+				} else if (io_u->ddir == DDIR_TRIM) {
+					io_u->ddir = DDIR_READ;
+					io_u->flags |= IO_U_F_TRIMMED;
+					break;
+				} else if (io_u->ddir == DDIR_WRITE) {
+					io_u->ddir = DDIR_READ;
+					break;
+				} else {
+					put_io_u(td, io_u);
+					continue;
+				}
+			}
 
-			if (io_u->buflen + io_bytes > verify_bytes)
+			if (!io_u)
 				break;
 		}
 
@@ -491,7 +512,6 @@
 				io_u->xfer_buflen = io_u->resid;
 				io_u->xfer_buf += bytes;
 				io_u->offset += bytes;
-				io_bytes += bytes;
 
 				if (ddir_rw(io_u->ddir))
 					td->ts.short_io_u[io_u->ddir]++;
@@ -507,7 +527,6 @@
 				if (ret < 0)
 					break;
 			}
-			io_bytes += io_u->xfer_buflen;
 			continue;
 		case FIO_Q_QUEUED:
 			break;
@@ -542,18 +561,15 @@
 				min_events = 1;
 
 			do {
-				unsigned long bytes = 0;
-
 				/*
 				 * Reap required number of io units, if any,
 				 * and do the verification on them through
 				 * the callback handler
 				 */
-				if (io_u_queued_complete(td, min_events, &bytes) < 0) {
+				if (io_u_queued_complete(td, min_events, NULL) < 0) {
 					ret = -1;
 					break;
 				}
-				io_bytes += bytes;
 			} while (full && (td->cur_depth > td->o.iodepth_low));
 		}
 		if (ret < 0)
@@ -1190,8 +1206,6 @@
 
 	clear_state = 0;
 	while (keep_running(td)) {
-		uint64_t write_bytes;
-
 		fio_gettime(&td->start, NULL);
 		memcpy(&td->bw_sample_time, &td->start, sizeof(td->start));
 		memcpy(&td->iops_sample_time, &td->start, sizeof(td->start));
@@ -1212,9 +1226,7 @@
 
 		prune_io_piece_log(td);
 
-		write_bytes = td->io_bytes[DDIR_WRITE];
 		do_io(td);
-		write_bytes = td->io_bytes[DDIR_WRITE] - write_bytes;
 
 		clear_state = 1;
 
@@ -1243,7 +1255,7 @@
 
 		fio_gettime(&td->start, NULL);
 
-		do_verify(td, write_bytes);
+		do_verify(td);
 
 		td->ts.runtime[DDIR_READ] += utime_since_now(&td->start);
 
diff --git a/io_u.c b/io_u.c
index f020cac..8567e11 100644
--- a/io_u.c
+++ b/io_u.c
@@ -172,19 +172,31 @@
 	return 1;
 }
 
+/*
+ * Sort the reads for a verify phase in batches of verifysort_nr, if
+ * specified.
+ */
+static inline int should_sort_io(struct thread_data *td)
+{
+	if (!td->o.verifysort_nr || !td->o.do_verify)
+		return 0;
+	if (!td_random(td))
+		return 0;
+	if (td->runstate != TD_VERIFYING)
+		return 0;
+	if (td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE)
+		return 0;
+
+	return 1;
+}
+
 static int get_next_rand_offset(struct thread_data *td, struct fio_file *f,
 				enum fio_ddir ddir, uint64_t *b)
 {
 	struct rand_off *r;
 	int i, ret = 1;
 
-	/*
-	 * If sort not enabled, or not a pure random read workload without
-	 * any stored write metadata, just return a random offset
-	 */
-	if (!td->o.verifysort_nr || !(ddir == DDIR_READ && td->o.do_verify &&
-	    td->o.verify != VERIFY_NONE && td_random(td)) ||
-	    td->o.random_generator == FIO_RAND_GEN_TAUSWORTHE)
+	if (!should_sort_io(td))
 		return get_off_from_method(td, f, ddir, b);
 
 	if (!flist_empty(&td->next_rand_list)) {
@@ -546,12 +558,6 @@
 	enum fio_ddir ddir;
 
 	/*
-	 * If verify phase started, it's always a READ
-	 */
-	if (td->runstate == TD_VERIFYING)
-		return DDIR_READ;
-
-	/*
 	 * see if it's time to fsync
 	 */
 	if (td->o.fsync_blocks &&
@@ -606,7 +612,7 @@
 
 static void set_rw_ddir(struct thread_data *td, struct io_u *io_u)
 {
-	io_u->ddir = get_rw_ddir(td);
+	io_u->ddir = io_u->acct_ddir = get_rw_ddir(td);
 
 	if (io_u->ddir == DDIR_WRITE && (td->io_ops->flags & FIO_BARRIER) &&
 	    td->o.barrier_blocks &&
@@ -650,14 +656,15 @@
 void requeue_io_u(struct thread_data *td, struct io_u **io_u)
 {
 	struct io_u *__io_u = *io_u;
+	enum fio_ddir ddir = acct_ddir(__io_u);
 
 	dprint(FD_IO, "requeue %p\n", __io_u);
 
 	td_io_u_lock(td);
 
 	__io_u->flags |= IO_U_F_FREE;
-	if ((__io_u->flags & IO_U_F_FLIGHT) && ddir_rw(__io_u->ddir))
-		td->io_issues[__io_u->ddir]--;
+	if ((__io_u->flags & IO_U_F_FLIGHT) && ddir_rw(ddir))
+		td->io_issues[ddir]--;
 
 	__io_u->flags &= ~IO_U_F_FLIGHT;
 	if (__io_u->flags & IO_U_F_IN_CUR_DEPTH)
@@ -719,13 +726,9 @@
 	if (td_random(td) && file_randommap(td, io_u->file))
 		mark_random_map(td, io_u);
 
-	/*
-	 * If using a write iolog, store this entry.
-	 */
 out:
 	dprint_io_u(io_u, "fill_io_u");
 	td->zone_bytes += io_u->buflen;
-	log_io_u(td, io_u);
 	return 0;
 }
 
@@ -1091,6 +1094,7 @@
 		io_u->flags &= ~IO_U_F_VER_LIST;
 
 		io_u->error = 0;
+		io_u->acct_ddir = -1;
 		flist_del(&io_u->list);
 		flist_add_tail(&io_u->list, &td->io_u_busylist);
 		td->cur_depth++;
diff --git a/ioengine.h b/ioengine.h
index 6809501..d5a0dc9 100644
--- a/ioengine.h
+++ b/ioengine.h
@@ -8,7 +8,7 @@
 #include <guasi.h>
 #endif
 
-#define FIO_IOOPS_VERSION	14
+#define FIO_IOOPS_VERSION	15
 
 enum {
 	IO_U_F_FREE		= 1 << 0,
@@ -57,6 +57,12 @@
 	enum fio_ddir ddir;
 
 	/*
+	 * For replay workloads, we may want to account as a different
+	 * IO type than what is being submitted.
+	 */
+	enum fio_ddir acct_ddir;
+
+	/*
 	 * Allocated/set buffer and length
 	 */
 	unsigned long buflen;
@@ -210,4 +216,12 @@
 #define dprint_io_u(io_u, p)
 #endif
 
+static inline enum fio_ddir acct_ddir(struct io_u *io_u)
+{
+	if (io_u->acct_ddir != -1)
+		return io_u->acct_ddir;
+
+	return io_u->ddir;
+}
+
 #endif
diff --git a/ioengines.c b/ioengines.c
index f81c46f..234f8ed 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -260,6 +260,11 @@
 
 	assert(fio_file_open(io_u->file));
 
+	/*
+	 * If using a write iolog, store this entry.
+	 */
+	log_io_u(td, io_u);
+
 	io_u->error = 0;
 	io_u->resid = 0;
 
@@ -275,8 +280,8 @@
 					sizeof(struct timeval));
 	}
 
-	if (ddir_rw(io_u->ddir))
-		td->io_issues[io_u->ddir]++;
+	if (ddir_rw(acct_ddir(io_u)))
+		td->io_issues[acct_ddir(io_u)]++;
 
 	ret = td->io_ops->queue(td, io_u);
 
diff --git a/libfio.c b/libfio.c
index 8255072..ac629dc 100644
--- a/libfio.c
+++ b/libfio.c
@@ -67,6 +67,7 @@
 static void reset_io_counters(struct thread_data *td)
 {
 	int ddir;
+
 	for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++) {
 		td->stat_io_bytes[ddir] = 0;
 		td->this_io_bytes[ddir] = 0;
@@ -74,10 +75,12 @@
 		td->this_io_blocks[ddir] = 0;
 		td->rate_bytes[ddir] = 0;
 		td->rate_blocks[ddir] = 0;
+		td->io_issues[ddir] = 0;
 	}
 	td->zone_bytes = 0;
 
 	td->last_was_sync = 0;
+	td->rwmix_issues = 0;
 
 	/*
 	 * reset file done count if we are to start over