Change IO engine queuing

Instead of always pretending to be async, let the IO engines
return FIO_Q_COMPLETED or FIO_Q_QUEUED to signal async or
sync completions regardless of their nature. This cleans up
the queuing model quite a bit.

Also fixed a verification error spotted while doing this
transformation.

The main intent of this is to allow queuing more than 1 piece
of IO at the time, that will come in a later changeset.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/io_u.c b/io_u.c
index 132d897..6439979 100644
--- a/io_u.c
+++ b/io_u.c
@@ -353,23 +353,23 @@
 			return NULL;
 		}
 
-		f->last_pos += io_u->buflen;
+		f->last_pos = io_u->offset + io_u->buflen;
 
 		if (td->verify != VERIFY_NONE)
 			populate_verify_io_u(td, io_u);
 	}
 
-	if (td_io_prep(td, io_u)) {
-		put_io_u(td, io_u);
-		return NULL;
-	}
-
 	/*
 	 * Set io data pointers.
 	 */
 	io_u->xfer_buf = io_u->buf;
 	io_u->xfer_buflen = io_u->buflen;
 
+	if (td_io_prep(td, io_u)) {
+		put_io_u(td, io_u);
+		return NULL;
+	}
+
 	fio_gettime(&io_u->start_time, NULL);
 	return io_u;
 }
@@ -411,15 +411,20 @@
 		icd->error = io_u->error;
 }
 
+void init_icd(struct io_completion_data *icd)
+{
+	fio_gettime(&icd->time, NULL);
+
+	icd->error = 0;
+	icd->bytes_done[0] = icd->bytes_done[1] = 0;
+}
+
 void ios_completed(struct thread_data *td, struct io_completion_data *icd)
 {
 	struct io_u *io_u;
 	int i;
 
-	fio_gettime(&icd->time, NULL);
-
-	icd->error = 0;
-	icd->bytes_done[0] = icd->bytes_done[1] = 0;
+	init_icd(icd);
 
 	for (i = 0; i < icd->nr; i++) {
 		io_u = td->io_ops->event(td, i);