Initial blktrace support

This doesn't work yet, just committing what little bits I did as not
to lose them.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/Makefile b/Makefile
index f172ed4..5858ab0 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@
 SCRIPTS = fio_generate_plots
 OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o \
 	filesetup.o eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \
-	rbtree.o diskutil.o
+	rbtree.o diskutil.o blktrace.o
 
 OBJS += engines/cpu.o
 OBJS += engines/libaio.o
diff --git a/blktrace.c b/blktrace.c
new file mode 100644
index 0000000..178a2a3
--- /dev/null
+++ b/blktrace.c
@@ -0,0 +1,95 @@
+/*
+ * blktrace support code for fio
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include "list.h"
+#include "fio.h"
+#include "blktrace_api.h"
+
+static int discard_pdu(int fd, struct blk_io_trace *t)
+{
+	if (t->pdu_len == 0)
+		return 0;
+
+	if (lseek(fd, t->pdu_len, SEEK_CUR) < 0)
+		return errno;
+		
+	return 0;
+}
+
+int is_blktrace(const char *filename)
+{
+	struct blk_io_trace t;
+	int fd, ret;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		perror("open blktrace");
+		return 0;
+	}
+
+	ret = read(fd, &t, sizeof(t));
+	close(fd);
+
+	if (ret < 0) {
+		perror("read blktrace");
+		return 0;
+	} else if (ret != sizeof(t)) {
+		log_err("fio: short read on blktrace file\n");
+		return 0;
+	}
+
+	if ((t.magic & 0xffffff00) == BLK_IO_TRACE_MAGIC)
+		return 1;
+
+	return 0;
+}
+
+static void handle_trace(struct thread_data *td, struct blk_io_trace *t)
+{
+}
+
+int load_blktrace(struct thread_data *td, const char *filename)
+{
+	struct blk_io_trace t;
+	int fd;
+
+	fd = open(filename, O_RDONLY);
+	if (fd < 0) {
+		td_verror(td, errno, "open blktrace file");
+		return 1;
+	}
+
+	do {
+		int ret = read(fd, &t, sizeof(t));
+
+		if (ret < 0) {
+			td_verror(td, errno, "read blktrace file");
+			return 1;
+		} else if (!ret) {
+			break;
+		} else if (ret != sizeof(t)) {
+			log_err("fio: short read on blktrace file\n");
+			return 1;
+		}
+
+		if ((t.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
+			log_err("fio: bad magic in blktrace data\n");
+			return 1;
+		}
+		if ((t.magic & 0xff) != BLK_IO_TRACE_VERSION) {
+			log_err("fio: bad blktrace version %d\n", t.magic & 0xff);
+			return 1;
+		}
+		ret = discard_pdu(fd, &t);
+		if (ret) {
+			td_verror(td, ret, "blktrace lseek");
+			return 1;
+		}
+		handle_trace(td, &t);
+	} while (1);
+
+	close(fd);
+	return 0;
+}
diff --git a/blktrace_api.h b/blktrace_api.h
new file mode 100644
index 0000000..61b405a
--- /dev/null
+++ b/blktrace_api.h
@@ -0,0 +1,128 @@
+#ifndef BLKTRACEAPI_H
+#define BLKTRACEAPI_H
+
+#include <asm/types.h>
+
+/*
+ * Trace categories
+ */
+enum {
+	BLK_TC_READ	= 1 << 0,	/* reads */
+	BLK_TC_WRITE	= 1 << 1,	/* writes */
+	BLK_TC_BARRIER	= 1 << 2,	/* barrier */
+	BLK_TC_SYNC	= 1 << 3,	/* sync */
+	BLK_TC_QUEUE	= 1 << 4,	/* queueing/merging */
+	BLK_TC_REQUEUE	= 1 << 5,	/* requeueing */
+	BLK_TC_ISSUE	= 1 << 6,	/* issue */
+	BLK_TC_COMPLETE	= 1 << 7,	/* completions */
+	BLK_TC_FS	= 1 << 8,	/* fs requests */
+	BLK_TC_PC	= 1 << 9,	/* pc requests */
+	BLK_TC_NOTIFY	= 1 << 10,	/* special message */
+	BLK_TC_AHEAD	= 1 << 11,	/* readahead */
+	BLK_TC_META	= 1 << 12,	/* metadata */
+
+	BLK_TC_END	= 1 << 15,	/* only 16-bits, reminder */
+};
+
+#define BLK_TC_SHIFT		(16)
+#define BLK_TC_ACT(act)		((act) << BLK_TC_SHIFT)
+
+/*
+ * Basic trace actions
+ */
+enum {
+	__BLK_TA_QUEUE = 1,		/* queued */
+	__BLK_TA_BACKMERGE,		/* back merged to existing rq */
+	__BLK_TA_FRONTMERGE,		/* front merge to existing rq */
+	__BLK_TA_GETRQ,			/* allocated new request */
+	__BLK_TA_SLEEPRQ,		/* sleeping on rq allocation */
+	__BLK_TA_REQUEUE,		/* request requeued */
+	__BLK_TA_ISSUE,			/* sent to driver */
+	__BLK_TA_COMPLETE,		/* completed by driver */
+	__BLK_TA_PLUG,			/* queue was plugged */
+	__BLK_TA_UNPLUG_IO,		/* queue was unplugged by io */
+	__BLK_TA_UNPLUG_TIMER,		/* queue was unplugged by timer */
+	__BLK_TA_INSERT,		/* insert request */
+	__BLK_TA_SPLIT,			/* bio was split */
+	__BLK_TA_BOUNCE,		/* bio was bounced */
+	__BLK_TA_REMAP,			/* bio was remapped */
+};
+
+/*
+ * Notify events.
+ */
+enum blktrace_notify {
+	__BLK_TN_PROCESS = 0,		/* establish pid/name mapping */
+	__BLK_TN_TIMESTAMP,		/* include system clock */
+};
+
+/*
+ * Trace actions in full. Additionally, read or write is masked
+ */
+#define BLK_TA_QUEUE		(__BLK_TA_QUEUE | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_BACKMERGE	(__BLK_TA_BACKMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_FRONTMERGE	(__BLK_TA_FRONTMERGE | BLK_TC_ACT(BLK_TC_QUEUE))
+#define	BLK_TA_GETRQ		(__BLK_TA_GETRQ | BLK_TC_ACT(BLK_TC_QUEUE))
+#define	BLK_TA_SLEEPRQ		(__BLK_TA_SLEEPRQ | BLK_TC_ACT(BLK_TC_QUEUE))
+#define	BLK_TA_REQUEUE		(__BLK_TA_REQUEUE | BLK_TC_ACT(BLK_TC_REQUEUE))
+#define BLK_TA_ISSUE		(__BLK_TA_ISSUE | BLK_TC_ACT(BLK_TC_ISSUE))
+#define BLK_TA_COMPLETE		(__BLK_TA_COMPLETE| BLK_TC_ACT(BLK_TC_COMPLETE))
+#define BLK_TA_PLUG		(__BLK_TA_PLUG | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_UNPLUG_IO	(__BLK_TA_UNPLUG_IO | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_UNPLUG_TIMER	(__BLK_TA_UNPLUG_TIMER | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_INSERT		(__BLK_TA_INSERT | BLK_TC_ACT(BLK_TC_QUEUE))
+#define BLK_TA_SPLIT		(__BLK_TA_SPLIT)
+#define BLK_TA_BOUNCE		(__BLK_TA_BOUNCE)
+#define BLK_TA_REMAP		(__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
+
+#define BLK_TN_PROCESS		(__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
+#define BLK_TN_TIMESTAMP	(__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
+
+#define BLK_IO_TRACE_MAGIC	0x65617400
+#define BLK_IO_TRACE_VERSION	0x07
+
+/*
+ * The trace itself
+ */
+struct blk_io_trace {
+	__u32 magic;		/* MAGIC << 8 | version */
+	__u32 sequence;		/* event number */
+	__u64 time;		/* in nanoseconds */
+	__u64 sector;		/* disk offset */
+	__u32 bytes;		/* transfer length */
+	__u32 action;		/* what happened */
+	__u32 pid;		/* who did it */
+	__u32 device;		/* device identifier (dev_t) */
+	__u32 cpu;		/* on what cpu did it happen */
+	__u16 error;		/* completion error */
+	__u16 pdu_len;		/* length of data after this trace */
+};
+
+/*
+ * The remap event
+ */
+struct blk_io_trace_remap {
+	__u32 device;
+	__u32 device_from;
+	__u64 sector;
+};
+
+/*
+ * User setup structure passed with BLKSTARTTRACE
+ */
+struct blk_user_trace_setup {
+	char name[32];			/* output */
+	__u16 act_mask;			/* input */
+	__u32 buf_size;			/* input */
+	__u32 buf_nr;			/* input */
+	__u64 start_lba;
+	__u64 end_lba;
+	__u32 pid;
+};
+
+#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
+#define BLKTRACESTART _IO(0x12,116)
+#define BLKTRACESTOP _IO(0x12,117)
+#define BLKTRACETEARDOWN _IO(0x12,118)
+
+#endif
diff --git a/fio.h b/fio.h
index 060a1ee..d0506f9 100644
--- a/fio.h
+++ b/fio.h
@@ -826,6 +826,12 @@
 extern void td_io_close_file(struct thread_data *, struct fio_file *);
 
 /*
+ * blktrace support
+ */
+extern int is_blktrace(const char *);
+extern int load_blktrace(struct thread_data *, const char *);
+
+/*
  * If logging output to a file, stderr should go to both stderr and f_err
  */
 #define log_err(args...)	do {		\
diff --git a/log.c b/log.c
index 9669b4a..fbc407d 100644
--- a/log.c
+++ b/log.c
@@ -199,9 +199,16 @@
 	if (td->io_ops->flags & FIO_DISKLESSIO)
 		return 0;
 
-	if (td->o.read_iolog_file)
-		ret = init_iolog_read(td);
-	else if (td->o.write_iolog_file)
+	if (td->o.read_iolog_file) {
+		/*
+		 * Check if it's a blktrace file and load that if possible.
+		 * Otherwise assume it's a normal log file and load that.
+		 */
+		if (is_blktrace(td->o.read_iolog_file))
+			ret = load_blktrace(td, td->o.read_iolog_file);
+		else
+			ret = init_iolog_read(td);
+	} else if (td->o.write_iolog_file)
 		ret = init_iolog_write(td);
 
 	return ret;