Add support for replaying blktrace trim/discard

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
diff --git a/blktrace.c b/blktrace.c
index 6cf8d46..f22ab1b 100644
--- a/blktrace.c
+++ b/blktrace.c
@@ -228,12 +228,41 @@
 	case BLK_TN_TIMESTAMP:
 		printf("got timestamp notify: %x, %d\n", t->action, t->pid);
 		break;
+	case BLK_TN_MESSAGE:
+		break;
 	default:
 		dprint(FD_BLKTRACE, "unknown trace act %x\n", t->action);
 		break;
 	}
 }
 
+static void handle_trace_discard(struct thread_data *td, struct blk_io_trace *t,
+				 unsigned long long ttime, unsigned long *ios)
+{
+	struct io_piece *ipo = malloc(sizeof(*ipo));
+
+	trace_add_file(td, t->device);
+
+	ios[DDIR_WRITE]++;
+	td->o.size += t->bytes;
+
+	memset(ipo, 0, sizeof(*ipo));
+	INIT_FLIST_HEAD(&ipo->list);
+
+	/*
+	 * the 512 is wrong here, it should be the hardware sector size...
+	 */
+	ipo->offset = t->sector * 512;
+	ipo->len = t->bytes;
+	ipo->delay = ttime / 1000;
+	ipo->ddir = DDIR_TRIM;
+
+	dprint(FD_BLKTRACE, "store discard, off=%llu, len=%lu, delay=%lu\n",
+							ipo->offset, ipo->len,
+							ipo->delay);
+	queue_io_piece(td, ipo);
+}
+
 static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
 			    unsigned long long ttime, unsigned long *ios,
 			    unsigned int *bs)
@@ -267,6 +296,8 @@
 
 	if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
 		handle_trace_notify(t);
+	else if (t->action & BLK_TC_ACT(BLK_TC_DISCARD))
+		handle_trace_discard(td, t, ttime, ios);
 	else
 		handle_trace_fs(td, t, ttime, ios, bs);
 }
diff --git a/blktrace_api.h b/blktrace_api.h
index 61b405a..3df3347 100644
--- a/blktrace_api.h
+++ b/blktrace_api.h
@@ -20,6 +20,8 @@
 	BLK_TC_NOTIFY	= 1 << 10,	/* special message */
 	BLK_TC_AHEAD	= 1 << 11,	/* readahead */
 	BLK_TC_META	= 1 << 12,	/* metadata */
+	BLK_TC_DISCARD	= 1 << 13,	/* discard requests */
+	BLK_TC_DRV_DATA	= 1 << 14,	/* binary per-driver data */
 
 	BLK_TC_END	= 1 << 15,	/* only 16-bits, reminder */
 };
@@ -46,6 +48,8 @@
 	__BLK_TA_SPLIT,			/* bio was split */
 	__BLK_TA_BOUNCE,		/* bio was bounced */
 	__BLK_TA_REMAP,			/* bio was remapped */
+	__BLK_TA_ABORT,			/* request aborted */
+	__BLK_TA_DRV_DATA,		/* driver-specific binary data */
 };
 
 /*
@@ -54,6 +58,7 @@
 enum blktrace_notify {
 	__BLK_TN_PROCESS = 0,		/* establish pid/name mapping */
 	__BLK_TN_TIMESTAMP,		/* include system clock */
+	__BLK_TN_MESSAGE,		/* Character string message */
 };
 
 /*
@@ -74,9 +79,11 @@
 #define BLK_TA_SPLIT		(__BLK_TA_SPLIT)
 #define BLK_TA_BOUNCE		(__BLK_TA_BOUNCE)
 #define BLK_TA_REMAP		(__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_DRV_DATA (__BLK_TA_DRV_DATA | BLK_TC_ACT(BLK_TC_DRV_DATA))
 
 #define BLK_TN_PROCESS		(__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
 #define BLK_TN_TIMESTAMP	(__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
+#define BLK_TN_MESSAGE          (__BLK_TN_MESSAGE | BLK_TC_ACT(BLK_TC_NOTIFY))
 
 #define BLK_IO_TRACE_MAGIC	0x65617400
 #define BLK_IO_TRACE_VERSION	0x07
diff --git a/engines/mmap.c b/engines/mmap.c
index 53fd358..002918c 100644
--- a/engines/mmap.c
+++ b/engines/mmap.c
@@ -154,12 +154,18 @@
 			io_u->error = errno;
 			td_verror(td, io_u->error, "msync");
 		}
+	} else if (io_u->ddir == DDIR_TRIM) {
+		int ret = do_io_u_trim(td, io_u);
+
+		if (!ret)
+			td_verror(td, io_u->error, "trim");
 	}
 
+
 	/*
 	 * not really direct, but should drop the pages from the cache
 	 */
-	if (td->o.odirect && !ddir_sync(io_u->ddir)) {
+	if (td->o.odirect && ddir_rw(io_u->ddir)) {
 		if (msync(io_u->mmap_data, io_u->xfer_buflen, MS_SYNC) < 0) {
 			io_u->error = errno;
 			td_verror(td, io_u->error, "msync");
diff --git a/engines/sync.c b/engines/sync.c
index 3dbce47..4eea2f9 100644
--- a/engines/sync.c
+++ b/engines/sync.c
@@ -30,7 +30,7 @@
 {
 	struct fio_file *f = io_u->file;
 
-	if (ddir_sync(io_u->ddir))
+	if (!ddir_rw(io_u->ddir))
 		return 0;
 
 	if (f->file_pos != -1ULL && f->file_pos == io_u->offset)
@@ -46,7 +46,7 @@
 
 static int fio_io_end(struct thread_data *td, struct io_u *io_u, int ret)
 {
-	if (io_u->file && ret >= 0)
+	if (io_u->file && ret >= 0 && ddir_rw(io_u->ddir))
 		io_u->file->file_pos = io_u->offset + ret;
 
 	if (ret != (int) io_u->xfer_buflen) {
diff --git a/fio.c b/fio.c
index 6ab0f4a..1d20cf7 100644
--- a/fio.c
+++ b/fio.c
@@ -176,6 +176,8 @@
 	unsigned int rate_iops = 0;
 	unsigned int rate_iops_min = 0;
 
+	assert(ddir_rw(ddir));
+
 	if (!td->o.ratemin[ddir] && !td->o.rate_iops_min[ddir])
 		return 0;
 
@@ -491,7 +493,8 @@
 				io_u->xfer_buf += bytes;
 				io_u->offset += bytes;
 
-				td->ts.short_io_u[io_u->ddir]++;
+				if (ddir_rw(io_u->ddir))
+					td->ts.short_io_u[io_u->ddir]++;
 
 				if (io_u->offset == f->real_file_size)
 					goto sync_done;
@@ -636,7 +639,8 @@
 				io_u->xfer_buf += bytes;
 				io_u->offset += bytes;
 
-				td->ts.short_io_u[io_u->ddir]++;
+				if (ddir_rw(io_u->ddir))
+					td->ts.short_io_u[io_u->ddir]++;
 
 				if (io_u->offset == f->real_file_size)
 					goto sync_done;
diff --git a/init.c b/init.c
index ff7da40..90e2063 100644
--- a/init.c
+++ b/init.c
@@ -194,6 +194,8 @@
 	unsigned int bs = td->o.min_bs[ddir];
 	unsigned long long bytes_per_sec;
 
+	assert(ddir_rw(ddir));
+
 	if (td->o.rate[ddir])
 		bytes_per_sec = td->o.rate[ddir];
 	else
diff --git a/io_ddir.h b/io_ddir.h
index f83fc9b..b234256 100644
--- a/io_ddir.h
+++ b/io_ddir.h
@@ -34,4 +34,9 @@
 	       ddir == DDIR_SYNC_FILE_RANGE;
 }
 
+static inline int ddir_rw(enum fio_ddir ddir)
+{
+	return ddir == DDIR_READ || ddir == DDIR_WRITE;
+}
+
 #endif
diff --git a/io_u.c b/io_u.c
index 53bf492..21a801f 100644
--- a/io_u.c
+++ b/io_u.c
@@ -103,6 +103,8 @@
 	unsigned long long max_blocks;
 	unsigned long long max_size;
 
+	assert(ddir_rw(ddir));
+
 	/*
 	 * Hmm, should we make sure that ->io_size <= ->real_file_size?
 	 */
@@ -211,6 +213,8 @@
 static int get_next_seq_block(struct thread_data *td, struct fio_file *f,
 			      enum fio_ddir ddir, unsigned long long *b)
 {
+	assert(ddir_rw(ddir));
+
 	if (f->last_pos < f->real_file_size) {
 		*b = (f->last_pos - f->file_offset) / td->o.min_bs[ddir];
 		return 0;
@@ -225,6 +229,8 @@
 	struct fio_file *f = io_u->file;
 	int ret;
 
+	assert(ddir_rw(ddir));
+
 	if (rw_seq) {
 		if (td_random(td))
 			ret = get_next_rand_block(td, f, ddir, b);
@@ -264,6 +270,8 @@
 	enum fio_ddir ddir = io_u->ddir;
 	int rw_seq_hit = 0;
 
+	assert(ddir_rw(ddir));
+
 	if (td->o.ddir_seq_nr && !--td->ddir_seq_nr) {
 		rw_seq_hit = 1;
 		td->ddir_seq_nr = td->o.ddir_seq_nr;
@@ -308,6 +316,8 @@
 	unsigned int minbs, maxbs;
 	long r;
 
+	assert(ddir_rw(ddir));
+
 	minbs = td->o.min_bs[ddir];
 	maxbs = td->o.max_bs[ddir];
 
@@ -388,6 +398,8 @@
 	struct timeval t;
 	long usec;
 
+	assert(ddir_rw(ddir));
+
 	if (td->rate_pending_usleep[ddir] <= 0)
 		return ddir;
 
@@ -531,7 +543,7 @@
 	td_io_u_lock(td);
 
 	__io_u->flags |= IO_U_F_FREE;
-	if ((__io_u->flags & IO_U_F_FLIGHT) && !ddir_sync(__io_u->ddir))
+	if ((__io_u->flags & IO_U_F_FLIGHT) && ddir_rw(__io_u->ddir))
 		td->io_issues[__io_u->ddir]--;
 
 	__io_u->flags &= ~IO_U_F_FLIGHT;
@@ -551,9 +563,9 @@
 	io_u->ddir = get_rw_ddir(td);
 
 	/*
-	 * fsync() or fdatasync(), we are done
+	 * fsync() or fdatasync() or trim etc, we are done
 	 */
-	if (ddir_sync(io_u->ddir))
+	if (!ddir_rw(io_u->ddir))
 		goto out;
 
 	/*
@@ -1023,7 +1035,7 @@
 	f = io_u->file;
 	assert(fio_file_open(f));
 
-	if (!ddir_sync(io_u->ddir)) {
+	if (ddir_rw(io_u->ddir)) {
 		if (!io_u->buflen && !(td->io_ops->flags & FIO_NOIO)) {
 			dprint(FD_IO, "get_io_u: zero buflen on %p\n", io_u);
 			goto err_put;
@@ -1114,7 +1126,7 @@
 	td->last_was_sync = 0;
 	td->last_ddir = io_u->ddir;
 
-	if (!io_u->error) {
+	if (!io_u->error && ddir_rw(io_u->ddir)) {
 		unsigned int bytes = io_u->buflen - io_u->resid;
 		const enum fio_ddir idx = io_u->ddir;
 		const enum fio_ddir odx = io_u->ddir ^ 1;
@@ -1180,7 +1192,7 @@
 			if (ret && !icd->error)
 				icd->error = ret;
 		}
-	} else {
+	} else if (io_u->error) {
 		icd->error = io_u->error;
 		io_u_log_error(td, io_u);
 	}
diff --git a/ioengines.c b/ioengines.c
index 4e059a8..f976efb 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -241,7 +241,7 @@
 					sizeof(struct timeval));
 	}
 
-	if (!ddir_sync(io_u->ddir))
+	if (ddir_rw(io_u->ddir))
 		td->io_issues[io_u->ddir]++;
 
 	ret = td->io_ops->queue(td, io_u);
@@ -254,7 +254,7 @@
 	 * IO, then it's likely an alignment problem or because the host fs
 	 * does not support O_DIRECT
 	 */
-	if (io_u->error == EINVAL && td->io_issues[io_u->ddir] == 1 &&
+	if (io_u->error == EINVAL && td->io_issues[io_u->ddir & 1] == 1 &&
 	    td->o.odirect) {
 		log_info("fio: first direct IO errored. File system may not "
 			 "support direct IO, or iomem_align= is bad.\n");
@@ -266,14 +266,14 @@
 	}
 
 	if (ret == FIO_Q_COMPLETED) {
-		if (!ddir_sync(io_u->ddir)) {
+		if (ddir_rw(io_u->ddir)) {
 			io_u_mark_depth(td, 1);
 			td->ts.total_io_u[io_u->ddir]++;
 		}
 	} else if (ret == FIO_Q_QUEUED) {
 		int r;
 
-		if (!ddir_sync(io_u->ddir)) {
+		if (ddir_rw(io_u->ddir)) {
 			td->io_u_queued++;
 			td->ts.total_io_u[io_u->ddir]++;
 		}
@@ -483,16 +483,16 @@
 {
 #ifndef FIO_HAVE_TRIM
 	io_u->error = EINVAL;
-	return io_u->xfer_buflen;
+	return 0;
 #else
 	struct fio_file *f = io_u->file;
 	int ret;
 
 	ret = os_trim(f->fd, io_u->offset + f->file_offset, io_u->xfer_buflen);
 	if (!ret)
-		return 0;
+		return io_u->xfer_buflen;;
 
-	io_u->error = errno;
-	return io_u->xfer_buflen;
+	io_u->error = ret;
+	return 0;
 #endif
 }
diff --git a/log.c b/log.c
index 829de94..ce4ac9f 100644
--- a/log.c
+++ b/log.c
@@ -294,6 +294,8 @@
 				rw = DDIR_SYNC;
 			else if (!strcmp(act, "datasync"))
 				rw = DDIR_DATASYNC;
+			else if (!strcmp(act, "trim"))
+				rw = DDIR_TRIM;
 			else {
 				log_err("fio: bad iolog file action: %s\n",
 									act);
diff --git a/stat.c b/stat.c
index 8a0fab0..8e9fba0 100644
--- a/stat.c
+++ b/stat.c
@@ -157,6 +157,8 @@
 	char *io_p, *bw_p, *iops_p;
 	int i2p;
 
+	assert(ddir_rw(ddir));
+
 	if (!ts->runtime[ddir])
 		return;
 
@@ -370,6 +372,8 @@
 	unsigned long long bw;
 	double mean, dev;
 
+	assert(ddir_rw(ddir));
+
 	bw = 0;
 	if (ts->runtime[ddir])
 		bw = ts->io_bytes[ddir] / ts->runtime[ddir];
@@ -735,13 +739,20 @@
 			   unsigned long val, enum fio_ddir ddir,
 			   unsigned int bs)
 {
+	if (!ddir_rw(ddir))
+		return;
+
 	__add_log_sample(iolog, val, ddir, bs, mtime_since_now(&td->epoch));
 }
 
 void add_agg_sample(unsigned long val, enum fio_ddir ddir, unsigned int bs)
 {
-	struct io_log *iolog = agg_io_log[ddir];
+	struct io_log *iolog;
 
+	if (!ddir_rw(ddir))
+		return;
+
+	iolog = agg_io_log[ddir];
 	__add_log_sample(iolog, val, ddir, bs, mtime_since_genesis());
 }
 
@@ -750,6 +761,9 @@
 {
 	struct thread_stat *ts = &td->ts;
 
+	if (!ddir_rw(ddir))
+		return;
+
 	add_stat_sample(&ts->clat_stat[ddir], usec);
 
 	if (ts->clat_log)
@@ -761,6 +775,9 @@
 {
 	struct thread_stat *ts = &td->ts;
 
+	if (!ddir_rw(ddir))
+		return;
+
 	add_stat_sample(&ts->slat_stat[ddir], usec);
 
 	if (ts->slat_log)
@@ -772,6 +789,9 @@
 {
 	struct thread_stat *ts = &td->ts;
 
+	if (!ddir_rw(ddir))
+		return;
+
 	add_stat_sample(&ts->lat_stat[ddir], usec);
 
 	if (ts->lat_log)
@@ -782,9 +802,12 @@
 		   struct timeval *t)
 {
 	struct thread_stat *ts = &td->ts;
-	unsigned long spent = mtime_since(&ts->stat_sample_time[ddir], t);
-	unsigned long rate;
+	unsigned long spent, rate;
 
+	if (!ddir_rw(ddir))
+		return;
+
+	spent = mtime_since(&ts->stat_sample_time[ddir], t);
 	if (spent < td->o.bw_avg_time)
 		return;