Add support for queuing > 1 command at the time
For the async engines, we currently do queuing by issuing one
command at the the time. Improve this by adding a ->commit()
hook to complement the ->queue() hook. When ->queue() returns
FIO_Q_BUSY, call ->commit() to actually send off the io to the
kernel.
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/engines/libaio.c b/engines/libaio.c
index cb488ef..510ecab 100644
--- a/engines/libaio.c
+++ b/engines/libaio.c
@@ -18,6 +18,8 @@
struct libaio_data {
io_context_t aio_ctx;
struct io_event *aio_events;
+ struct iocb **iocbs;
+ int iocbs_nr;
};
static int fio_libaio_prep(struct thread_data fio_unused *td, struct io_u *io_u)
@@ -68,40 +70,60 @@
static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
{
struct libaio_data *ld = td->io_ops->data;
- struct iocb *iocb = &io_u->iocb;
- long ret;
+ if (ld->iocbs_nr == (int) td->iodepth)
+ return FIO_Q_BUSY;
+
+ /*
+ * fsync is tricky, since it can fail and we need to do it
+ * serialized with other io. the reason is that linux doesn't
+ * support aio fsync yet. So return busy for the case where we
+ * have pending io, to let fio complete those first.
+ */
+ if (io_u->ddir == DDIR_SYNC) {
+ if (ld->iocbs_nr)
+ return FIO_Q_BUSY;
+ if (fsync(io_u->file->fd) < 0)
+ io_u->error = errno;
+
+ return FIO_Q_COMPLETED;
+ }
+
+ ld->iocbs[ld->iocbs_nr] = &io_u->iocb;
+ ld->iocbs_nr++;
+ return FIO_Q_QUEUED;
+}
+
+static int fio_libaio_commit(struct thread_data *td)
+{
+ struct libaio_data *ld = td->io_ops->data;
+ struct iocb **iocbs;
+ int ret, iocbs_nr;
+
+ if (!ld->iocbs_nr)
+ return 0;
+
+ iocbs_nr = ld->iocbs_nr;
+ iocbs = ld->iocbs;
do {
- ret = io_submit(ld->aio_ctx, 1, &iocb);
- if (ret == 1)
- return FIO_Q_QUEUED;
- else if (ret == -EAGAIN || !ret)
+ ret = io_submit(ld->aio_ctx, iocbs_nr, iocbs);
+ if (ret == iocbs_nr) {
+ ret = 0;
+ break;
+ } else if (ret > 0) {
+ iocbs += ret;
+ iocbs_nr -= ret;
+ continue;
+ } else if (ret == -EAGAIN || !ret)
usleep(100);
else if (ret == -EINTR)
continue;
- else if (ret == -EINVAL && io_u->ddir == DDIR_SYNC) {
- /*
- * the async fsync doesn't currently seem to be
- * supported, so just fsync if we fail with EINVAL
- * for a sync. since buffered io is also sync
- * with libaio (still), we don't have pending
- * requests to flush first.
- */
- if (fsync(io_u->file->fd) < 0)
- ret = -errno;
- else
- ret = FIO_Q_COMPLETED;
- break;
- } else
+ else
break;
} while (1);
- if (ret <= 0) {
- io_u->resid = io_u->xfer_buflen;
- io_u->error = -ret;
- td_verror(td, io_u->error);
- return FIO_Q_COMPLETED;
- }
+ if (!ret)
+ ld->iocbs_nr = 0;
return ret;
}
@@ -121,6 +143,8 @@
io_destroy(ld->aio_ctx);
if (ld->aio_events)
free(ld->aio_events);
+ if (ld->iocbs)
+ free(ld->iocbs);
free(ld);
td->io_ops->data = NULL;
@@ -140,6 +164,10 @@
ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
memset(ld->aio_events, 0, td->iodepth * sizeof(struct io_event));
+ ld->iocbs = malloc(td->iodepth * sizeof(struct iocb *));
+ memset(ld->iocbs, 0, sizeof(struct iocb *));
+ ld->iocbs_nr = 0;
+
td->io_ops->data = ld;
return 0;
}
@@ -150,6 +178,7 @@
.init = fio_libaio_init,
.prep = fio_libaio_prep,
.queue = fio_libaio_queue,
+ .commit = fio_libaio_commit,
.cancel = fio_libaio_cancel,
.getevents = fio_libaio_getevents,
.event = fio_libaio_event,
diff --git a/fio.c b/fio.c
index 6e78949..858d6b8 100644
--- a/fio.c
+++ b/fio.c
@@ -209,6 +209,7 @@
return 1;
}
+requeue:
ret = td_io_queue(td, io_u);
if (ret < 0) {
td_verror(td, io_u->error);
@@ -224,6 +225,10 @@
}
io_u_sync_complete(td, io_u, NULL);
+ } else if (ret == FIO_Q_BUSY) {
+ if (td_io_commit(td))
+ return 1;
+ goto requeue;
}
return 0;
@@ -285,6 +290,10 @@
continue;
case FIO_Q_QUEUED:
break;
+ case FIO_Q_BUSY:
+ requeue_io_u(td, &io_u);
+ ret = td_io_commit(td);
+ break;
default:
assert(ret < 0);
td_verror(td, -ret);
@@ -299,7 +308,7 @@
* completed io_u's first.
*/
min_events = 0;
- if (queue_full(td))
+ if (queue_full(td) || ret == FIO_Q_BUSY)
min_events = 1;
/*
@@ -403,6 +412,10 @@
break;
case FIO_Q_QUEUED:
break;
+ case FIO_Q_BUSY:
+ requeue_io_u(td, &io_u);
+ ret = td_io_commit(td);
+ break;
default:
assert(ret < 0);
put_io_u(td, io_u);
@@ -412,14 +425,15 @@
if (ret < 0 || td->error)
break;
- add_slat_sample(td, io_u->ddir, mtime_since(&io_u->start_time, &io_u->issue_time));
+ if (io_u)
+ add_slat_sample(td, io_u->ddir, mtime_since(&io_u->start_time, &io_u->issue_time));
/*
* See if we need to complete some commands
*/
- if (ret == FIO_Q_QUEUED) {
+ if (ret == FIO_Q_QUEUED || ret == FIO_Q_BUSY) {
min_evts = 0;
- if (queue_full(td))
+ if (queue_full(td) || ret == FIO_Q_BUSY)
min_evts = 1;
fio_gettime(&comp_time, NULL);
@@ -633,6 +647,7 @@
INIT_LIST_HEAD(&td->io_u_freelist);
INIT_LIST_HEAD(&td->io_u_busylist);
+ INIT_LIST_HEAD(&td->io_u_requeues);
INIT_LIST_HEAD(&td->io_hist_list);
INIT_LIST_HEAD(&td->io_log_list);
diff --git a/fio.h b/fio.h
index aa66ecd..baaa9a8 100644
--- a/fio.h
+++ b/fio.h
@@ -135,6 +135,7 @@
enum {
FIO_Q_COMPLETED = 0, /* completed sync */
FIO_Q_QUEUED = 1, /* queued, will complete async */
+ FIO_Q_BUSY = 2, /* no more room, call ->commit() */
};
#define FIO_HDR_MAGIC 0xf00baaef
@@ -334,6 +335,7 @@
unsigned long total_io_u;
struct list_head io_u_freelist;
struct list_head io_u_busylist;
+ struct list_head io_u_requeues;
/*
* Rate state
@@ -352,6 +354,7 @@
unsigned long long start_offset;
unsigned long long total_io_size;
+ unsigned long io_issues[2];
unsigned long long io_blocks[2];
unsigned long long io_bytes[2];
unsigned long long zone_bytes;
@@ -608,6 +611,7 @@
extern struct io_u *__get_io_u(struct thread_data *);
extern struct io_u *get_io_u(struct thread_data *, struct fio_file *);
extern void put_io_u(struct thread_data *, struct io_u *);
+extern void requeue_io_u(struct thread_data *, struct io_u **);
extern long io_u_sync_complete(struct thread_data *, struct io_u *, endio_handler *);
extern long io_u_queued_complete(struct thread_data *, int, endio_handler *);
@@ -619,6 +623,7 @@
extern int td_io_queue(struct thread_data *, struct io_u *);
extern int td_io_sync(struct thread_data *, struct fio_file *);
extern int td_io_getevents(struct thread_data *, int, int, struct timespec *);
+extern int td_io_commit(struct thread_data *);
/*
* This is a pretty crappy semaphore implementation, but with the use that fio
@@ -662,6 +667,7 @@
int (*init)(struct thread_data *);
int (*prep)(struct thread_data *, struct io_u *);
int (*queue)(struct thread_data *, struct io_u *);
+ int (*commit)(struct thread_data *);
int (*getevents)(struct thread_data *, int, int, struct timespec *);
struct io_u *(*event)(struct thread_data *, int);
int (*cancel)(struct thread_data *, struct io_u *);
@@ -671,7 +677,7 @@
unsigned long priv;
};
-#define FIO_IOOPS_VERSION 4
+#define FIO_IOOPS_VERSION 5
extern struct ioengine_ops *load_ioengine(struct thread_data *, const char *);
extern int register_ioengine(struct ioengine_ops *);
diff --git a/io_u.c b/io_u.c
index c04c9de..781599f 100644
--- a/io_u.c
+++ b/io_u.c
@@ -199,6 +199,16 @@
td->cur_depth--;
}
+void requeue_io_u(struct thread_data *td, struct io_u **io_u)
+{
+ struct io_u *__io_u = *io_u;
+
+ list_del(&__io_u->list);
+ list_add_tail(&__io_u->list, &td->io_u_requeues);
+ td->cur_depth--;
+ *io_u = NULL;
+}
+
static int fill_io_u(struct thread_data *td, struct fio_file *f,
struct io_u *io_u)
{
@@ -211,8 +221,8 @@
/*
* see if it's time to sync
*/
- if (td->fsync_blocks && !(td->io_blocks[DDIR_WRITE] % td->fsync_blocks)
- && should_fsync(td)) {
+ if (td->fsync_blocks && !(td->io_issues[DDIR_WRITE] % td->fsync_blocks)
+ && td->io_issues[DDIR_WRITE] && should_fsync(td)) {
io_u->ddir = DDIR_SYNC;
io_u->file = f;
return 0;
@@ -310,12 +320,18 @@
{
struct io_u *io_u = NULL;
- if (!queue_full(td)) {
+ if (!list_empty(&td->io_u_requeues))
+ io_u = list_entry(td->io_u_requeues.next, struct io_u, list);
+ else if (!queue_full(td)) {
io_u = list_entry(td->io_u_freelist.next, struct io_u, list);
io_u->buflen = 0;
- io_u->error = 0;
io_u->resid = 0;
+ io_u->file = NULL;
+ }
+
+ if (io_u) {
+ io_u->error = 0;
list_del(&io_u->list);
list_add(&io_u->list, &td->io_u_busylist);
td->cur_depth++;
@@ -337,6 +353,12 @@
if (!io_u)
return NULL;
+ /*
+ * from a requeue, io_u already setup
+ */
+ if (io_u->file)
+ return io_u;
+
if (td->zone_bytes >= td->zone_size) {
td->zone_bytes = 0;
f->last_pos += td->zone_skip;
@@ -477,8 +499,14 @@
struct io_completion_data icd;
int ret;
- if (min_events > 0)
+ if (min_events > 0) {
tsp = &ts;
+ ret = td_io_commit(td);
+ if (ret < 0) {
+ td_verror(td, -ret);
+ return ret;
+ }
+ }
ret = td_io_getevents(td, min_events, td->cur_depth, tsp);
if (ret < 0) {
diff --git a/ioengines.c b/ioengines.c
index 1b510df..16ea928 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -189,6 +189,9 @@
{
fio_gettime(&io_u->issue_time, NULL);
+ if (io_u->ddir != DDIR_SYNC)
+ td->io_issues[io_u->ddir]++;
+
return td->io_ops->queue(td, io_u);
}
@@ -199,3 +202,11 @@
return 0;
}
+
+int td_io_commit(struct thread_data *td)
+{
+ if (td->io_ops->commit)
+ return td->io_ops->commit(td);
+
+ return 0;
+}