verify: fix verification of uncompleted or errored ios

Stoo Davies reports:

------

I'm doing some powerfail recovery testing on a storage array over iSCSI.
Host is RHEL 6.4 kernel 2.6.32-358.el6.x86_64.

With fio 2.1.2 -> 2.1.4 the job file below rides through the disks going
away, and continues I/O after they come back, without reporting any
errors.  With fio 2.1.5 -> 2.1.8 when the disks come back fio
immediately reports a meta verification error.

I captured a trace with an finisar analyzer, and can see that after the
disks come back and the host logs back in, a read is issued for an lba
which was never written to.  Since I don't see verification errors
outside of the powerfail testing, I suspect fio isn't correctly handling
failed writes during the time the disks are unavailable.

------

The issue is caused by a change between 2.1.4 and 2.1.5 that started
logging IO for verification at issue time, instead of at completion
time. If the IO only partially completes or completes in error, then
we need to adjust the verification unit so that we verify only what
fio actually wrote.

Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/backend.c b/backend.c
index 9deef28..d1d5571 100644
--- a/backend.c
+++ b/backend.c
@@ -780,16 +780,21 @@
 		case FIO_Q_COMPLETED:
 			if (io_u->error) {
 				ret = -io_u->error;
+				unlog_io_piece(td, io_u);
 				clear_io_u(td, io_u);
 			} else if (io_u->resid) {
 				int bytes = io_u->xfer_buflen - io_u->resid;
 				struct fio_file *f = io_u->file;
 
 				bytes_issued += bytes;
+
+				trim_io_piece(td, io_u);
+
 				/*
 				 * zero read, fail
 				 */
 				if (!bytes) {
+					unlog_io_piece(td, io_u);
 					td_verror(td, EIO, "full resid");
 					put_io_u(td, io_u);
 					break;
@@ -830,6 +835,7 @@
 			bytes_issued += io_u->xfer_buflen;
 			break;
 		case FIO_Q_BUSY:
+			unlog_io_piece(td, io_u);
 			requeue_io_u(td, &io_u);
 			ret2 = td_io_commit(td);
 			if (ret2 < 0)
diff --git a/io_u.c b/io_u.c
index 4b0b5a7..e132fd9 100644
--- a/io_u.c
+++ b/io_u.c
@@ -1622,8 +1622,15 @@
 	 * Mark IO ok to verify
 	 */
 	if (io_u->ipo) {
-		io_u->ipo->flags &= ~IP_F_IN_FLIGHT;
-		write_barrier();
+		/*
+		 * Remove errored entry from the verification list
+		 */
+		if (io_u->error)
+			unlog_io_piece(td, io_u);
+		else {
+			io_u->ipo->flags &= ~IP_F_IN_FLIGHT;
+			write_barrier();
+		}
 	}
 
 	td_io_u_unlock(td);
diff --git a/iolog.c b/iolog.c
index f498959..cac1aba 100644
--- a/iolog.c
+++ b/iolog.c
@@ -268,6 +268,33 @@
 	td->io_hist_len++;
 }
 
+void unlog_io_piece(struct thread_data *td, struct io_u *io_u)
+{
+	struct io_piece *ipo = io_u->ipo;
+
+	if (!ipo)
+		return;
+
+	if (ipo->flags & IP_F_ONRB)
+		rb_erase(&ipo->rb_node, &td->io_hist_tree);
+	else if (ipo->flags & IP_F_ONLIST)
+		flist_del(&ipo->list);
+
+	free(ipo);
+	io_u->ipo = NULL;
+	td->io_hist_len--;
+}
+
+void trim_io_piece(struct thread_data *td, struct io_u *io_u)
+{
+	struct io_piece *ipo = io_u->ipo;
+
+	if (!ipo)
+		return;
+
+	ipo->len = io_u->xfer_buflen - io_u->resid;
+}
+
 void write_iolog_close(struct thread_data *td)
 {
 	fflush(td->iolog_f);
diff --git a/iolog.h b/iolog.h
index 50d09e2..3af5668 100644
--- a/iolog.h
+++ b/iolog.h
@@ -110,6 +110,8 @@
 extern void log_file(struct thread_data *, struct fio_file *, enum file_log_act);
 extern int __must_check init_iolog(struct thread_data *td);
 extern void log_io_piece(struct thread_data *, struct io_u *);
+extern void unlog_io_piece(struct thread_data *, struct io_u *);
+extern void trim_io_piece(struct thread_data *, struct io_u *);
 extern void queue_io_piece(struct thread_data *, struct io_piece *);
 extern void prune_io_piece_log(struct thread_data *);
 extern void write_iolog_close(struct thread_data *);