verify: fix a bug with verify_async

There's a race between marking an io_u as deferred, completing it,
and checking of that flag. It cannot be done reliably for async
verify threads. So change the mechanism to have the verify_async
part pull the io_u completely, so we don't have to check for a flag
in it after we have run ->end_io().

This fixes a bug with verify_async, where fio would crash with
this message:

fio: io_u.c:1315: __get_io_u: Assertion `io_u->flags & IO_U_F_FREE' failed

This race has always existed, but was made considerably worse with
this commit:

commit 2ae0b204743d6b4048c6fffd46c6280a70f2ecd1
Author: Jens Axboe <axboe@kernel.dk>
Date:   Tue May 28 14:16:55 2013 +0200

    Replace list based free/busy/requeue list with FIFO + ring

    Cache friendliness of the list is pretty low. This has
    provably lower overhead.

since we moved from a single list holding the io, to a separate
verify list and io_u queues. The above bug is happening because
the io_u ends up on both the freelist and the pending verify list,
causing mayhem.

Reported-by: scameron@beardog.cce.hp.com
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/io_u.c b/io_u.c
index 16b52d6..ba192a3 100644
--- a/io_u.c
+++ b/io_u.c
@@ -688,10 +688,10 @@
 {
 	td_io_u_lock(td);
 
-	if (io_u->file && !(io_u->flags & IO_U_F_FREE_DEF))
+	if (io_u->file && !(io_u->flags & IO_U_F_NO_FILE_PUT))
 		put_file_log(td, io_u->file);
+
 	io_u->file = NULL;
-	io_u->flags &= ~IO_U_F_FREE_DEF;
 	io_u->flags |= IO_U_F_FREE;
 
 	if (io_u->flags & IO_U_F_IN_CUR_DEPTH)
@@ -1313,9 +1313,9 @@
 
 	if (io_u) {
 		assert(io_u->flags & IO_U_F_FREE);
-		io_u->flags &= ~(IO_U_F_FREE | IO_U_F_FREE_DEF);
-		io_u->flags &= ~(IO_U_F_TRIMMED | IO_U_F_BARRIER);
-		io_u->flags &= ~IO_U_F_VER_LIST;
+		io_u->flags &= ~(IO_U_F_FREE | IO_U_F_NO_FILE_PUT |
+				 IO_U_F_TRIMMED | IO_U_F_BARRIER |
+				 IO_U_F_VER_LIST);
 
 		io_u->error = 0;
 		io_u->acct_ddir = -1;
@@ -1607,10 +1607,12 @@
 	return remainder * 1000000 / bps + secs * 1000000;
 }
 
-static void io_completed(struct thread_data *td, struct io_u *io_u,
+static void io_completed(struct thread_data *td, struct io_u **io_u_ptr,
 			 struct io_completion_data *icd)
 {
-	struct fio_file *f;
+	struct io_u *io_u = *io_u_ptr;
+	enum fio_ddir ddir = io_u->ddir;
+	struct fio_file *f = io_u->file;
 
 	dprint_io_u(io_u, "io complete");
 
@@ -1635,9 +1637,8 @@
 
 	td_io_u_unlock(td);
 
-	if (ddir_sync(io_u->ddir)) {
+	if (ddir_sync(ddir)) {
 		td->last_was_sync = 1;
-		f = io_u->file;
 		if (f) {
 			f->first_write = -1ULL;
 			f->last_write = -1ULL;
@@ -1646,52 +1647,51 @@
 	}
 
 	td->last_was_sync = 0;
-	td->last_ddir = io_u->ddir;
+	td->last_ddir = ddir;
 
-	if (!io_u->error && ddir_rw(io_u->ddir)) {
+	if (!io_u->error && ddir_rw(ddir)) {
 		unsigned int bytes = io_u->buflen - io_u->resid;
-		const enum fio_ddir idx = io_u->ddir;
-		const enum fio_ddir odx = io_u->ddir ^ 1;
+		const enum fio_ddir oddir = ddir ^ 1;
 		int ret;
 
-		td->io_blocks[idx]++;
-		td->this_io_blocks[idx]++;
-		td->io_bytes[idx] += bytes;
+		td->io_blocks[ddir]++;
+		td->this_io_blocks[ddir]++;
+		td->io_bytes[ddir] += bytes;
 
 		if (!(io_u->flags & IO_U_F_VER_LIST))
-			td->this_io_bytes[idx] += bytes;
+			td->this_io_bytes[ddir] += bytes;
 
-		if (idx == DDIR_WRITE) {
-			f = io_u->file;
-			if (f) {
-				if (f->first_write == -1ULL ||
-				    io_u->offset < f->first_write)
-					f->first_write = io_u->offset;
-				if (f->last_write == -1ULL ||
-				    ((io_u->offset + bytes) > f->last_write))
-					f->last_write = io_u->offset + bytes;
-			}
+		if (ddir == DDIR_WRITE && f) {
+			if (f->first_write == -1ULL ||
+			    io_u->offset < f->first_write)
+				f->first_write = io_u->offset;
+			if (f->last_write == -1ULL ||
+			    ((io_u->offset + bytes) > f->last_write))
+				f->last_write = io_u->offset + bytes;
 		}
 
 		if (ramp_time_over(td) && (td->runstate == TD_RUNNING ||
 					   td->runstate == TD_VERIFYING)) {
-			account_io_completion(td, io_u, icd, idx, bytes);
+			account_io_completion(td, io_u, icd, ddir, bytes);
 
-			if (__should_check_rate(td, idx)) {
-				td->rate_pending_usleep[idx] =
-					(usec_for_io(td, idx) -
+			if (__should_check_rate(td, ddir)) {
+				td->rate_pending_usleep[ddir] =
+					(usec_for_io(td, ddir) -
 					 utime_since_now(&td->start));
 			}
-			if (idx != DDIR_TRIM && __should_check_rate(td, odx))
-				td->rate_pending_usleep[odx] =
-					(usec_for_io(td, odx) -
+			if (ddir != DDIR_TRIM &&
+			    __should_check_rate(td, oddir)) {
+				td->rate_pending_usleep[oddir] =
+					(usec_for_io(td, oddir) -
 					 utime_since_now(&td->start));
+			}
 		}
 
-		icd->bytes_done[idx] += bytes;
+		icd->bytes_done[ddir] += bytes;
 
 		if (io_u->end_io) {
-			ret = io_u->end_io(td, io_u);
+			ret = io_u->end_io(td, io_u_ptr);
+			io_u = *io_u_ptr;
 			if (ret && !icd->error)
 				icd->error = ret;
 		}
@@ -1700,9 +1700,11 @@
 		io_u_log_error(td, io_u);
 	}
 	if (icd->error) {
-		enum error_type_bit eb = td_error_type(io_u->ddir, icd->error);
+		enum error_type_bit eb = td_error_type(ddir, icd->error);
+
 		if (!td_non_fatal_error(td, eb, icd->error))
 			return;
+
 		/*
 		 * If there is a non_fatal error, then add to the error count
 		 * and clear all the errors.
@@ -1710,7 +1712,8 @@
 		update_error_count(td, icd->error);
 		td_clear_error(td);
 		icd->error = 0;
-		io_u->error = 0;
+		if (io_u)
+			io_u->error = 0;
 	}
 }
 
@@ -1738,9 +1741,9 @@
 	for (i = 0; i < icd->nr; i++) {
 		io_u = td->io_ops->event(td, i);
 
-		io_completed(td, io_u, icd);
+		io_completed(td, &io_u, icd);
 
-		if (!(io_u->flags & IO_U_F_FREE_DEF))
+		if (io_u)
 			put_io_u(td, io_u);
 	}
 }
@@ -1754,9 +1757,9 @@
 	struct io_completion_data icd;
 
 	init_icd(td, &icd, 1);
-	io_completed(td, io_u, &icd);
+	io_completed(td, &io_u, &icd);
 
-	if (!(io_u->flags & IO_U_F_FREE_DEF))
+	if (io_u)
 		put_io_u(td, io_u);
 
 	if (icd.error) {
diff --git a/io_u_queue.h b/io_u_queue.h
index 5b6cad0..b702b1f 100644
--- a/io_u_queue.h
+++ b/io_u_queue.h
@@ -12,8 +12,13 @@
 
 static inline struct io_u *io_u_qpop(struct io_u_queue *q)
 {
-	if (q->nr)
-		return q->io_us[--q->nr];
+	if (q->nr) {
+		const unsigned int next = --q->nr;
+		struct io_u *io_u = q->io_us[next];
+
+		q->io_us[next] = NULL;
+		return io_u;
+	}
 
 	return NULL;
 }
diff --git a/ioengine.h b/ioengine.h
index 37bf5fc..29c8487 100644
--- a/ioengine.h
+++ b/ioengine.h
@@ -20,7 +20,7 @@
 enum {
 	IO_U_F_FREE		= 1 << 0,
 	IO_U_F_FLIGHT		= 1 << 1,
-	IO_U_F_FREE_DEF		= 1 << 2,
+	IO_U_F_NO_FILE_PUT	= 1 << 2,
 	IO_U_F_IN_CUR_DEPTH	= 1 << 3,
 	IO_U_F_BUSY_OK		= 1 << 4,
 	IO_U_F_TRIMMED		= 1 << 5,
@@ -90,7 +90,7 @@
 	/*
 	 * Callback for io completion
 	 */
-	int (*end_io)(struct thread_data *, struct io_u *);
+	int (*end_io)(struct thread_data *, struct io_u **);
 
 	union {
 #ifdef CONFIG_LIBAIO
diff --git a/verify.c b/verify.c
index 7c99e15..217b686 100644
--- a/verify.c
+++ b/verify.c
@@ -656,19 +656,21 @@
 /*
  * Push IO verification to a separate thread
  */
-int verify_io_u_async(struct thread_data *td, struct io_u *io_u)
+int verify_io_u_async(struct thread_data *td, struct io_u **io_u_ptr)
 {
-	if (io_u->file)
-		put_file_log(td, io_u->file);
+	struct io_u *io_u = *io_u_ptr;
 
 	pthread_mutex_lock(&td->io_u_lock);
 
+	if (io_u->file)
+		put_file_log(td, io_u->file);
+
 	if (io_u->flags & IO_U_F_IN_CUR_DEPTH) {
 		td->cur_depth--;
 		io_u->flags &= ~IO_U_F_IN_CUR_DEPTH;
 	}
 	flist_add_tail(&io_u->verify_list, &td->verify_list);
-	io_u->flags |= IO_U_F_FREE_DEF;
+	*io_u_ptr = NULL;
 	pthread_mutex_unlock(&td->io_u_lock);
 
 	pthread_cond_signal(&td->verify_cond);
@@ -747,9 +749,10 @@
 	return EILSEQ;
 }
 
-int verify_io_u(struct thread_data *td, struct io_u *io_u)
+int verify_io_u(struct thread_data *td, struct io_u **io_u_ptr)
 {
 	struct verify_header *hdr;
+	struct io_u *io_u = *io_u_ptr;
 	unsigned int header_size, hdr_inc, hdr_num = 0;
 	void *p;
 	int ret;
@@ -1188,9 +1191,11 @@
 
 		while (!flist_empty(&list)) {
 			io_u = flist_first_entry(&list, struct io_u, verify_list);
-			flist_del(&io_u->verify_list);
+			flist_del_init(&io_u->verify_list);
 
-			ret = verify_io_u(td, io_u);
+			io_u->flags |= IO_U_F_NO_FILE_PUT;
+			ret = verify_io_u(td, &io_u);
+
 			put_io_u(td, io_u);
 			if (!ret)
 				continue;
diff --git a/verify.h b/verify.h
index dba7743..bb3fd49 100644
--- a/verify.h
+++ b/verify.h
@@ -76,8 +76,8 @@
  */
 extern void populate_verify_io_u(struct thread_data *, struct io_u *);
 extern int __must_check get_next_verify(struct thread_data *td, struct io_u *);
-extern int __must_check verify_io_u(struct thread_data *, struct io_u *);
-extern int verify_io_u_async(struct thread_data *, struct io_u *);
+extern int __must_check verify_io_u(struct thread_data *, struct io_u **);
+extern int verify_io_u_async(struct thread_data *, struct io_u **);
 extern void fill_verify_pattern(struct thread_data *td, void *p, unsigned int len, struct io_u *io_u, unsigned long seed, int use_seed);
 extern void fill_buffer_pattern(struct thread_data *td, void *p, unsigned int len);
 extern void fio_verify_init(struct thread_data *td);