blktrace improvements and fixups

Remove iolog version 1 support, it's just too limited. Version 2 and
blktrace manage file events (add,open,close,tc) on their own and we
can cleanup the file creation if we kill it.

Fixup for_each_file() for no allocated files.

Start of handling notify events for blktrace. We don't see program
notifications, need to look into that.

Properly account io bytes when adding blktrace events, so that the ETA
and io count is correct on replay.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/blktrace.c b/blktrace.c
index b707180..4d64b9f 100644
--- a/blktrace.c
+++ b/blktrace.c
@@ -148,6 +148,18 @@
 #define FMAJOR(dev)	((unsigned int) ((dev) >> FMINORBITS))
 #define FMINOR(dev)	((unsigned int) ((dev) & FMINORMASK))
 
+static void trace_add_open_event(struct thread_data *td, int fileno)
+{
+	struct io_piece *ipo;
+
+	ipo = calloc(1, sizeof(*ipo));
+
+	ipo->ddir = DDIR_INVAL;
+	ipo->fileno = fileno;
+	ipo->file_action = FIO_LOG_OPEN_FILE;
+	list_add_tail(&ipo->list, &td->io_log_list);
+}
+
 static void trace_add_file(struct thread_data *td, __u32 device)
 {
 	static unsigned int last_maj, last_min;
@@ -172,8 +184,11 @@
 
 	strcpy(dev, "/dev");
 	if (lookup_device(dev, maj, min)) {
+		int fileno;
+
 		dprint(FD_BLKTRACE, "add devices %s\n", dev);
-		add_file(td, dev);
+		fileno = add_file(td, dev);
+		trace_add_open_event(td, fileno);
 	}
 }
 
@@ -201,26 +216,30 @@
 	dprint(FD_BLKTRACE, "store ddir=%d, off=%llu, len=%lu, delay=%lu\n",
 							ipo->ddir, ipo->offset,
 							ipo->len, ipo->delay);
-	list_add_tail(&ipo->list, &td->io_log_list);
+	queue_io_piece(td, ipo);
 }
 
-/*
- * We only care for queue traces, most of the others are side effects
- * due to internal workings of the block layer.
- */
-static void handle_trace(struct thread_data *td, struct blk_io_trace *t,
-			 unsigned long long ttime, unsigned long *ios,
-			 unsigned int *bs)
+static void handle_trace_notify(struct thread_data *td, struct blk_io_trace *t)
+{
+	switch (t->action) {
+	case BLK_TN_PROCESS:
+		printf("got process notify: %x, %d\n", t->action, t->pid);
+		break;
+	case BLK_TN_TIMESTAMP:
+		printf("got timestamp notify: %x, %d\n", t->action, t->pid);
+		break;
+	default:
+		dprint(FD_BLKTRACE, "unknown trace act %x\n", t->action);
+		break;
+	}
+}
+
+static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
+			    unsigned long long ttime, unsigned long *ios,
+			    unsigned int *bs)
 {
 	int rw;
 
-	if ((t->action & 0xffff) != __BLK_TA_QUEUE)
-		return;
-	if (t->action & BLK_TC_ACT(BLK_TC_PC))
-		return;
-	if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
-		return;
-
 	trace_add_file(td, t->device);
 
 	rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
@@ -234,6 +253,25 @@
 }
 
 /*
+ * We only care for queue traces, most of the others are side effects
+ * due to internal workings of the block layer.
+ */
+static void handle_trace(struct thread_data *td, struct blk_io_trace *t,
+			 unsigned long long ttime, unsigned long *ios,
+			 unsigned int *bs)
+{
+	if ((t->action & 0xffff) != __BLK_TA_QUEUE)
+		return;
+	if (t->action & BLK_TC_ACT(BLK_TC_PC))
+		return;
+
+	if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
+		handle_trace_notify(td, t);
+	else
+		handle_trace_fs(td, t, ttime, ios, bs);
+}
+
+/*
  * Load a blktrace file by reading all the blk_io_trace entries, and storing
  * them as io_pieces like the fio text version would do.
  */
@@ -292,21 +330,24 @@
 			log_err("fio: discarded %d of %d\n", ret, t.pdu_len);
 			goto err;
 		}
-		if (t.action & BLK_TC_ACT(BLK_TC_NOTIFY))
-			continue;
-		if (!ttime) {
+		if ((t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) == 0) {
+			if (!ttime) {
+				ttime = t.time;
+				cpu = t.cpu;
+			}
+
+			delay = 0;
+			if (cpu == t.cpu)
+				delay = t.time - ttime;
+			if ((t.action & BLK_TC_ACT(BLK_TC_WRITE)) && read_only)
+				skipped_writes++;
+			else
+				handle_trace(td, &t, delay, ios, rw_bs);
+
 			ttime = t.time;
 			cpu = t.cpu;
-		}
-		delay = 0;
-		if (cpu == t.cpu)
-			delay = t.time - ttime;
-		if ((t.action & BLK_TC_ACT(BLK_TC_WRITE)) && read_only)
-			skipped_writes++;
-		else
+		} else
 			handle_trace(td, &t, delay, ios, rw_bs);
-		ttime = t.time;
-		cpu = t.cpu;
 	} while (1);
 
 	fifo_free(fifo);