blktrace improvements and fixups
Remove iolog version 1 support, it's just too limited. Version 2 and
blktrace manage file events (add,open,close,tc) on their own and we
can cleanup the file creation if we kill it.
Fixup for_each_file() for no allocated files.
Start of handling notify events for blktrace. We don't see program
notifications, need to look into that.
Properly account io bytes when adding blktrace events, so that the ETA
and io count is correct on replay.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/blktrace.c b/blktrace.c
index b707180..4d64b9f 100644
--- a/blktrace.c
+++ b/blktrace.c
@@ -148,6 +148,18 @@
#define FMAJOR(dev) ((unsigned int) ((dev) >> FMINORBITS))
#define FMINOR(dev) ((unsigned int) ((dev) & FMINORMASK))
+static void trace_add_open_event(struct thread_data *td, int fileno)
+{
+ struct io_piece *ipo;
+
+ ipo = calloc(1, sizeof(*ipo));
+
+ ipo->ddir = DDIR_INVAL;
+ ipo->fileno = fileno;
+ ipo->file_action = FIO_LOG_OPEN_FILE;
+ list_add_tail(&ipo->list, &td->io_log_list);
+}
+
static void trace_add_file(struct thread_data *td, __u32 device)
{
static unsigned int last_maj, last_min;
@@ -172,8 +184,11 @@
strcpy(dev, "/dev");
if (lookup_device(dev, maj, min)) {
+ int fileno;
+
dprint(FD_BLKTRACE, "add devices %s\n", dev);
- add_file(td, dev);
+ fileno = add_file(td, dev);
+ trace_add_open_event(td, fileno);
}
}
@@ -201,26 +216,30 @@
dprint(FD_BLKTRACE, "store ddir=%d, off=%llu, len=%lu, delay=%lu\n",
ipo->ddir, ipo->offset,
ipo->len, ipo->delay);
- list_add_tail(&ipo->list, &td->io_log_list);
+ queue_io_piece(td, ipo);
}
-/*
- * We only care for queue traces, most of the others are side effects
- * due to internal workings of the block layer.
- */
-static void handle_trace(struct thread_data *td, struct blk_io_trace *t,
- unsigned long long ttime, unsigned long *ios,
- unsigned int *bs)
+static void handle_trace_notify(struct thread_data *td, struct blk_io_trace *t)
+{
+ switch (t->action) {
+ case BLK_TN_PROCESS:
+ printf("got process notify: %x, %d\n", t->action, t->pid);
+ break;
+ case BLK_TN_TIMESTAMP:
+ printf("got timestamp notify: %x, %d\n", t->action, t->pid);
+ break;
+ default:
+ dprint(FD_BLKTRACE, "unknown trace act %x\n", t->action);
+ break;
+ }
+}
+
+static void handle_trace_fs(struct thread_data *td, struct blk_io_trace *t,
+ unsigned long long ttime, unsigned long *ios,
+ unsigned int *bs)
{
int rw;
- if ((t->action & 0xffff) != __BLK_TA_QUEUE)
- return;
- if (t->action & BLK_TC_ACT(BLK_TC_PC))
- return;
- if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
- return;
-
trace_add_file(td, t->device);
rw = (t->action & BLK_TC_ACT(BLK_TC_WRITE)) != 0;
@@ -234,6 +253,25 @@
}
/*
+ * We only care for queue traces, most of the others are side effects
+ * due to internal workings of the block layer.
+ */
+static void handle_trace(struct thread_data *td, struct blk_io_trace *t,
+ unsigned long long ttime, unsigned long *ios,
+ unsigned int *bs)
+{
+ if ((t->action & 0xffff) != __BLK_TA_QUEUE)
+ return;
+ if (t->action & BLK_TC_ACT(BLK_TC_PC))
+ return;
+
+ if (t->action & BLK_TC_ACT(BLK_TC_NOTIFY))
+ handle_trace_notify(td, t);
+ else
+ handle_trace_fs(td, t, ttime, ios, bs);
+}
+
+/*
* Load a blktrace file by reading all the blk_io_trace entries, and storing
* them as io_pieces like the fio text version would do.
*/
@@ -292,21 +330,24 @@
log_err("fio: discarded %d of %d\n", ret, t.pdu_len);
goto err;
}
- if (t.action & BLK_TC_ACT(BLK_TC_NOTIFY))
- continue;
- if (!ttime) {
+ if ((t.action & BLK_TC_ACT(BLK_TC_NOTIFY)) == 0) {
+ if (!ttime) {
+ ttime = t.time;
+ cpu = t.cpu;
+ }
+
+ delay = 0;
+ if (cpu == t.cpu)
+ delay = t.time - ttime;
+ if ((t.action & BLK_TC_ACT(BLK_TC_WRITE)) && read_only)
+ skipped_writes++;
+ else
+ handle_trace(td, &t, delay, ios, rw_bs);
+
ttime = t.time;
cpu = t.cpu;
- }
- delay = 0;
- if (cpu == t.cpu)
- delay = t.time - ttime;
- if ((t.action & BLK_TC_ACT(BLK_TC_WRITE)) && read_only)
- skipped_writes++;
- else
+ } else
handle_trace(td, &t, delay, ios, rw_bs);
- ttime = t.time;
- cpu = t.cpu;
} while (1);
fifo_free(fifo);
diff --git a/filesetup.c b/filesetup.c
index 5c02f1c..e847276 100644
--- a/filesetup.c
+++ b/filesetup.c
@@ -425,6 +425,9 @@
dprint(FD_FILE, "setup files\n");
+ if (td->o.read_iolog_file)
+ return 0;
+
/*
* if ioengine defines a setup() method, it's responsible for
* opening the files and setting f->real_file_size to indicate
diff --git a/fio.h b/fio.h
index 0cf5334..dab66d2 100644
--- a/fio.h
+++ b/fio.h
@@ -762,6 +762,7 @@
extern void log_file(struct thread_data *, struct fio_file *, enum file_log_act);
extern int __must_check init_iolog(struct thread_data *td);
extern void log_io_piece(struct thread_data *, struct io_u *);
+extern void queue_io_piece(struct thread_data *, struct io_piece *);
extern void prune_io_piece_log(struct thread_data *);
extern void write_iolog_close(struct thread_data *);
@@ -961,9 +962,10 @@
#define for_each_td(td, i) \
for ((i) = 0, (td) = &threads[0]; (i) < (int) thread_number; (i)++, (td)++)
#define for_each_file(td, f, i) \
- for ((i) = 0, (f) = (td)->files[0]; \
- (i) < (td)->o.nr_files && ((f) = (td)->files[i]) != NULL; \
- (i)++)
+ if ((td)->files_index) \
+ for ((i) = 0, (f) = (td)->files[0]; \
+ (i) < (td)->o.nr_files && ((f) = (td)->files[i]) != NULL; \
+ (i)++)
#define fio_assert(td, cond) do { \
if (!(cond)) { \
diff --git a/init.c b/init.c
index 8683ba6..a2aba33 100644
--- a/init.c
+++ b/init.c
@@ -446,7 +446,7 @@
td->io_ops->flags |= FIO_RAWIO;
file_alloced = 0;
- if (!td->o.filename && !td->files_index) {
+ if (!td->o.filename && !td->files_index && !td->o.read_iolog_file) {
file_alloced = 1;
if (td->o.nr_files == 1 && exists_and_not_file(jobname))
diff --git a/log.c b/log.c
index 86de3e8..8ac8ec3 100644
--- a/log.c
+++ b/log.c
@@ -10,6 +10,12 @@
static const char iolog_ver2[] = "fio version 2 iolog";
+void queue_io_piece(struct thread_data *td, struct io_piece *ipo)
+{
+ list_add_tail(&ipo->list, &td->io_log_list);
+ td->total_io_size += ipo->len;
+}
+
void log_io_u(struct thread_data *td, struct io_u *io_u)
{
const char *act[] = { "read", "write", "sync" };
@@ -279,8 +285,7 @@
ipo->fileno = fileno;
ipo->file_action = file_action;
}
- list_add_tail(&ipo->list, &td->io_log_list);
- td->total_io_size += bytes;
+ queue_io_piece(td, ipo);
}
free(str);
@@ -310,69 +315,8 @@
*/
static int read_iolog(struct thread_data *td, FILE *f)
{
- unsigned long long offset;
- unsigned int bytes;
- char *str, *p;
- int reads, writes;
- int rw;
-
- /*
- * Read in the read iolog and store it, reuse the infrastructure
- * for doing verifications.
- */
- str = malloc(4096);
- reads = writes = 0;
- while ((p = fgets(str, 4096, f)) != NULL) {
- struct io_piece *ipo;
-
- if (sscanf(p, "%d,%llu,%u", &rw, &offset, &bytes) != 3) {
- log_err("bad iolog: %s\n", p);
- continue;
- }
- if (rw == DDIR_READ)
- reads++;
- else if (rw == DDIR_WRITE) {
- /*
- * Don't add a write for ro mode
- */
- if (read_only)
- continue;
- writes++;
- } else if (rw != DDIR_SYNC) {
- log_err("bad ddir: %d\n", rw);
- continue;
- }
-
- ipo = malloc(sizeof(*ipo));
- memset(ipo, 0, sizeof(*ipo));
- INIT_LIST_HEAD(&ipo->list);
- ipo->offset = offset;
- ipo->len = bytes;
- ipo->ddir = (enum fio_ddir) rw;
- if (bytes > td->o.max_bs[rw])
- td->o.max_bs[rw] = bytes;
- list_add_tail(&ipo->list, &td->io_log_list);
- td->total_io_size += bytes;
- }
-
- free(str);
-
- if (writes && read_only) {
- log_err("fio: <%s> skips replay of %d writes due to"
- " read-only\n", td->o.name, writes);
- writes = 0;
- }
-
- if (!reads && !writes)
- return 1;
- else if (reads && !writes)
- td->o.td_ddir = TD_DDIR_READ;
- else if (!reads && writes)
- td->o.td_ddir = TD_DDIR_WRITE;
- else
- td->o.td_ddir = TD_DDIR_RW;
-
- return 0;
+ log_err("fio: iolog version 1 is no longer supported\n");
+ return 1;
}
/*