[PATCH] Separate io engines into separate loadable objects
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/Makefile b/Makefile
index 684e8e0..5335e14 100644
--- a/Makefile
+++ b/Makefile
@@ -4,12 +4,13 @@
SCRIPTS = fio_generate_plots
all: depend $(PROGS) $(SCRIPTS)
+ $(MAKE) -C engines
fio: fio.o ioengines.o init.o stat.o log.o time.o md5.o crc32.o
- $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt
+ $(CC) $(CFLAGS) -o $@ $(filter %.o,$^) -lpthread -laio -lm -lrt -ldl
clean:
- -rm -f *.o .depend cscope.out $(PROGS)
+ -rm -f *.o .depend cscope.out $(PROGS) engines/*.o
depend:
@$(CC) -MM $(ALL_CFLAGS) *.c 1> .depend
@@ -20,10 +21,13 @@
INSTALL = install
prefix = /usr/local
bindir = $(prefix)/bin
+libdir = $(prefix)/lib/fio
install: $(PROGS) $(SCRIPTS)
$(INSTALL) -m755 -d $(DESTDIR)$(bindir)
$(INSTALL) $(PROGS) $(SCRIPTS) $(DESTDIR)$(bindir)
+ $(INSTALL) -m755 -d $(DESTDIR) $(libdir)
+ $(INSTALL) engines/*.o $(libdir)
ifneq ($(wildcard .depend),)
include .depend
diff --git a/engines/Makefile b/engines/Makefile
new file mode 100644
index 0000000..2d8de7a
--- /dev/null
+++ b/engines/Makefile
@@ -0,0 +1,20 @@
+CC = gcc
+CFLAGS = -Wall -O2 -W -shared -rdynamic -fPIC
+ALL_CFLAGS = $(CFLAGS) -I.. -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
+LIBS =
+OBJS = fio-engine-sync.o fio-engine-splice.o fio-engine-mmap.o fio-engine-libaio.o fio-engine-posixaio.o fio-engine-sg.o fio-engine-cpu.o
+
+all: depend $(OBJS)
+
+depend:
+ @$(CC) -MM $(ALL_CFLAGS) *.c 1> .depend
+
+clean:
+ -rm -f *.o $(OBJS) .depend
+
+%.o: %.c
+ $(CC) $(ALL_CFLAGS) -o $*.o $<
+
+ifneq ($(wildcard .depend),)
+include .depend
+endif
diff --git a/engines/fio-engine-cpu.c b/engines/fio-engine-cpu.c
new file mode 100644
index 0000000..6d6fc56
--- /dev/null
+++ b/engines/fio-engine-cpu.c
@@ -0,0 +1,23 @@
+#include "fio.h"
+#include "os.h"
+
+static int fio_cpuio_init(struct thread_data *td)
+{
+ if (!td->cpuload) {
+ td_vmsg(td, EINVAL, "cpu thread needs rate");
+ return 1;
+ } else if (td->cpuload > 100)
+ td->cpuload = 100;
+
+ td->read_iolog = td->write_iolog = 0;
+ td->fd = -1;
+
+ return 0;
+}
+
+struct ioengine_ops ioengine = {
+ .name = "cpuio",
+ .version = FIO_IOOPS_VERSION,
+ .init = fio_cpuio_init,
+ .flags = FIO_CPUIO,
+};
diff --git a/engines/fio-engine-libaio.c b/engines/fio-engine-libaio.c
new file mode 100644
index 0000000..703808b
--- /dev/null
+++ b/engines/fio-engine-libaio.c
@@ -0,0 +1,131 @@
+/*
+ * native linux aio io engine
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include "fio.h"
+#include "os.h"
+
+#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
+
+struct libaio_data {
+ io_context_t aio_ctx;
+ struct io_event *aio_events;
+};
+
+static int fio_libaio_sync(struct thread_data *td)
+{
+ return fsync(td->fd);
+}
+
+static int fio_libaio_prep(struct thread_data *td, struct io_u *io_u)
+{
+ if (io_u->ddir == DDIR_READ)
+ io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+ else
+ io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
+
+ return 0;
+}
+
+static struct io_u *fio_libaio_event(struct thread_data *td, int event)
+{
+ struct libaio_data *ld = td->io_ops->data;
+
+ return ev_to_iou(ld->aio_events + event);
+}
+
+static int fio_libaio_getevents(struct thread_data *td, int min, int max,
+ struct timespec *t)
+{
+ struct libaio_data *ld = td->io_ops->data;
+ long r;
+
+ do {
+ r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
+ if (r == -EAGAIN) {
+ usleep(100);
+ continue;
+ } else if (r == -EINTR)
+ continue;
+ else
+ break;
+ } while (1);
+
+ return (int) r;
+}
+
+static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct libaio_data *ld = td->io_ops->data;
+ struct iocb *iocb = &io_u->iocb;
+ long ret;
+
+ do {
+ ret = io_submit(ld->aio_ctx, 1, &iocb);
+ if (ret == 1)
+ return 0;
+ else if (ret == -EAGAIN)
+ usleep(100);
+ else if (ret == -EINTR)
+ continue;
+ else
+ break;
+ } while (1);
+
+ return (int) ret;
+
+}
+
+static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
+{
+ struct libaio_data *ld = td->io_ops->data;
+
+ return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
+}
+
+static void fio_libaio_cleanup(struct thread_data *td)
+{
+ struct libaio_data *ld = td->io_ops->data;
+
+ if (ld) {
+ io_destroy(ld->aio_ctx);
+ if (ld->aio_events)
+ free(ld->aio_events);
+
+ free(ld);
+ td->io_ops->data = NULL;
+ }
+}
+
+static int fio_libaio_init(struct thread_data *td)
+{
+ struct libaio_data *ld = malloc(sizeof(*ld));
+
+ memset(ld, 0, sizeof(*ld));
+ if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
+ td->io_ops->data = ld;
+ return 0;
+}
+
+struct ioengine_ops ioengine = {
+ .name = "libaio",
+ .version = FIO_IOOPS_VERSION,
+ .init = fio_libaio_init,
+ .prep = fio_libaio_prep,
+ .queue = fio_libaio_queue,
+ .cancel = fio_libaio_cancel,
+ .getevents = fio_libaio_getevents,
+ .event = fio_libaio_event,
+ .cleanup = fio_libaio_cleanup,
+ .sync = fio_libaio_sync,
+};
diff --git a/engines/fio-engine-mmap.c b/engines/fio-engine-mmap.c
new file mode 100644
index 0000000..abb42bf
--- /dev/null
+++ b/engines/fio-engine-mmap.c
@@ -0,0 +1,101 @@
+/*
+ * regular read/write sync io engine
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include "fio.h"
+#include "os.h"
+
+struct mmapio_data {
+ struct io_u *last_io_u;
+};
+
+static int fio_mmapio_getevents(struct thread_data *td, int fio_unused min,
+ int max, struct timespec fio_unused *t)
+{
+ assert(max <= 1);
+
+ /*
+ * we can only have one finished io_u for sync io, since the depth
+ * is always 1
+ */
+ if (list_empty(&td->io_u_busylist))
+ return 0;
+
+ return 1;
+}
+
+static struct io_u *fio_mmapio_event(struct thread_data *td, int event)
+{
+ struct mmapio_data *sd = td->io_ops->data;
+
+ assert(event == 0);
+
+ return sd->last_io_u;
+}
+
+
+static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ unsigned long long real_off = io_u->offset - td->file_offset;
+ struct mmapio_data *sd = td->io_ops->data;
+
+ if (io_u->ddir == DDIR_READ)
+ memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
+ else
+ memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
+
+ /*
+ * not really direct, but should drop the pages from the cache
+ */
+ if (td->odirect) {
+ if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
+ io_u->error = errno;
+ if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
+ io_u->error = errno;
+ }
+
+ if (!io_u->error)
+ sd->last_io_u = io_u;
+
+ return io_u->error;
+}
+
+static int fio_mmapio_sync(struct thread_data *td)
+{
+ return msync(td->mmap, td->file_size, MS_SYNC);
+}
+
+static void fio_mmapio_cleanup(struct thread_data *td)
+{
+ if (td->io_ops->data) {
+ free(td->io_ops->data);
+ td->io_ops->data = NULL;
+ }
+}
+
+static int fio_mmapio_init(struct thread_data *td)
+{
+ struct mmapio_data *sd = malloc(sizeof(*sd));
+
+ sd->last_io_u = NULL;
+ td->io_ops->data = sd;
+ return 0;
+}
+
+struct ioengine_ops ioengine = {
+ .name = "mmap",
+ .version = FIO_IOOPS_VERSION,
+ .init = fio_mmapio_init,
+ .queue = fio_mmapio_queue,
+ .getevents = fio_mmapio_getevents,
+ .event = fio_mmapio_event,
+ .cleanup = fio_mmapio_cleanup,
+ .sync = fio_mmapio_sync,
+ .flags = FIO_SYNCIO,
+};
diff --git a/engines/fio-engine-posixaio.c b/engines/fio-engine-posixaio.c
new file mode 100644
index 0000000..871db77
--- /dev/null
+++ b/engines/fio-engine-posixaio.c
@@ -0,0 +1,188 @@
+/*
+ * posix aio io engine
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include "fio.h"
+#include "os.h"
+
+struct posixaio_data {
+ struct io_u **aio_events;
+};
+
+static int fill_timespec(struct timespec *ts)
+{
+#ifdef _POSIX_TIMERS
+ if (!clock_gettime(CLOCK_MONOTONIC, ts))
+ return 0;
+
+ perror("clock_gettime");
+#endif
+ return 1;
+}
+
+static unsigned long long ts_utime_since_now(struct timespec *t)
+{
+ long long sec, nsec;
+ struct timespec now;
+
+ if (fill_timespec(&now))
+ return 0;
+
+ sec = now.tv_sec - t->tv_sec;
+ nsec = now.tv_nsec - t->tv_nsec;
+ if (sec > 0 && nsec < 0) {
+ sec--;
+ nsec += 1000000000;
+ }
+
+ sec *= 1000000;
+ nsec /= 1000;
+ return sec + nsec;
+}
+
+static int fio_posixaio_sync(struct thread_data *td)
+{
+ return fsync(td->fd);
+}
+
+static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
+{
+ int r = aio_cancel(td->fd, &io_u->aiocb);
+
+ if (r == 1 || r == AIO_CANCELED)
+ return 0;
+
+ return 1;
+}
+
+static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
+{
+ struct aiocb *aiocb = &io_u->aiocb;
+
+ aiocb->aio_fildes = td->fd;
+ aiocb->aio_buf = io_u->buf;
+ aiocb->aio_nbytes = io_u->buflen;
+ aiocb->aio_offset = io_u->offset;
+
+ io_u->seen = 0;
+ return 0;
+}
+
+static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
+ struct timespec *t)
+{
+ struct posixaio_data *pd = td->io_ops->data;
+ struct list_head *entry;
+ struct timespec start;
+ int r, have_timeout = 0;
+
+ if (t && !fill_timespec(&start))
+ have_timeout = 1;
+
+ r = 0;
+restart:
+ list_for_each(entry, &td->io_u_busylist) {
+ struct io_u *io_u = list_entry(entry, struct io_u, list);
+ int err;
+
+ if (io_u->seen)
+ continue;
+
+ err = aio_error(&io_u->aiocb);
+ switch (err) {
+ default:
+ io_u->error = err;
+ case ECANCELED:
+ case 0:
+ pd->aio_events[r++] = io_u;
+ io_u->seen = 1;
+ break;
+ case EINPROGRESS:
+ break;
+ }
+
+ if (r >= max)
+ break;
+ }
+
+ if (r >= min)
+ return r;
+
+ if (have_timeout) {
+ unsigned long long usec;
+
+ usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
+ if (ts_utime_since_now(&start) > usec)
+ return r;
+ }
+
+ /*
+ * hrmpf, we need to wait for more. we should use aio_suspend, for
+ * now just sleep a little and recheck status of busy-and-not-seen
+ */
+ usleep(1000);
+ goto restart;
+}
+
+static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
+{
+ struct posixaio_data *pd = td->io_ops->data;
+
+ return pd->aio_events[event];
+}
+
+static int fio_posixaio_queue(struct thread_data fio_unused *td,
+ struct io_u *io_u)
+{
+ struct aiocb *aiocb = &io_u->aiocb;
+ int ret;
+
+ if (io_u->ddir == DDIR_READ)
+ ret = aio_read(aiocb);
+ else
+ ret = aio_write(aiocb);
+
+ if (ret)
+ io_u->error = errno;
+
+ return io_u->error;
+}
+
+static void fio_posixaio_cleanup(struct thread_data *td)
+{
+ struct posixaio_data *pd = td->io_ops->data;
+
+ if (pd) {
+ free(pd->aio_events);
+ free(pd);
+ td->io_ops->data = NULL;
+ }
+}
+
+static int fio_posixaio_init(struct thread_data *td)
+{
+ struct posixaio_data *pd = malloc(sizeof(*pd));
+
+ pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
+
+ td->io_ops->data = pd;
+ return 0;
+}
+
+struct ioengine_ops ioengine = {
+ .name = "posixaio",
+ .version = FIO_IOOPS_VERSION,
+ .init = fio_posixaio_init,
+ .prep = fio_posixaio_prep,
+ .queue = fio_posixaio_queue,
+ .cancel = fio_posixaio_cancel,
+ .getevents = fio_posixaio_getevents,
+ .event = fio_posixaio_event,
+ .cleanup = fio_posixaio_cleanup,
+ .sync = fio_posixaio_sync,
+};
diff --git a/engines/fio-engine-sg.c b/engines/fio-engine-sg.c
new file mode 100644
index 0000000..59eea1d
--- /dev/null
+++ b/engines/fio-engine-sg.c
@@ -0,0 +1,324 @@
+/*
+ * scsi generic sg v3 io engine
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/poll.h>
+#include "fio.h"
+#include "os.h"
+
+struct sgio_cmd {
+ unsigned char cdb[10];
+ int nr;
+};
+
+struct sgio_data {
+ struct sgio_cmd *cmds;
+ struct io_u **events;
+ unsigned int bs;
+};
+
+static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
+ struct io_u *io_u, int fs)
+{
+ struct sgio_cmd *sc = &sd->cmds[io_u->index];
+
+ memset(hdr, 0, sizeof(*hdr));
+ memset(sc->cdb, 0, sizeof(sc->cdb));
+
+ hdr->interface_id = 'S';
+ hdr->cmdp = sc->cdb;
+ hdr->cmd_len = sizeof(sc->cdb);
+ hdr->pack_id = io_u->index;
+ hdr->usr_ptr = io_u;
+
+ if (fs) {
+ hdr->dxferp = io_u->buf;
+ hdr->dxfer_len = io_u->buflen;
+ }
+}
+
+static int fio_sgio_ioctl_getevents(struct thread_data *td, int fio_unused min,
+ int max, struct timespec fio_unused *t)
+{
+ assert(max <= 1);
+
+ /*
+ * we can only have one finished io_u for sync io, since the depth
+ * is always 1
+ */
+ if (list_empty(&td->io_u_busylist))
+ return 0;
+
+ return 1;
+}
+
+
+static int fio_sgio_getevents(struct thread_data *td, int min, int max,
+ struct timespec fio_unused *t)
+{
+ struct sgio_data *sd = td->io_ops->data;
+ struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
+ void *buf = malloc(max * sizeof(struct sg_io_hdr));
+ int left = max, ret, events, i, r = 0, fl = 0;
+
+ /*
+ * don't block for !events
+ */
+ if (!min) {
+ fl = fcntl(td->fd, F_GETFL);
+ fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
+ }
+
+ while (left) {
+ do {
+ if (!min)
+ break;
+ poll(&pfd, 1, -1);
+ if (pfd.revents & POLLIN)
+ break;
+ } while (1);
+
+ ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ break;
+ td_verror(td, errno);
+ r = -1;
+ break;
+ } else if (!ret)
+ break;
+
+ events = ret / sizeof(struct sg_io_hdr);
+ left -= events;
+ r += events;
+
+ for (i = 0; i < events; i++) {
+ struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
+
+ sd->events[i] = hdr->usr_ptr;
+ }
+ }
+
+ if (!min)
+ fcntl(td->fd, F_SETFL, fl);
+
+ free(buf);
+ return r;
+}
+
+static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
+{
+ struct sgio_data *sd = td->io_ops->data;
+ struct sg_io_hdr *hdr = &io_u->hdr;
+
+ sd->events[0] = io_u;
+
+ return ioctl(td->fd, SG_IO, hdr);
+}
+
+static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
+{
+ struct sg_io_hdr *hdr = &io_u->hdr;
+ int ret;
+
+ ret = write(td->fd, hdr, sizeof(*hdr));
+ if (ret < 0)
+ return errno;
+
+ if (sync) {
+ ret = read(td->fd, hdr, sizeof(*hdr));
+ if (ret < 0)
+ return errno;
+ }
+
+ return 0;
+}
+
+static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
+{
+ if (td->filetype == FIO_TYPE_BD)
+ return fio_sgio_ioctl_doio(td, io_u);
+
+ return fio_sgio_rw_doio(td, io_u, sync);
+}
+
+static int fio_sgio_sync(struct thread_data *td)
+{
+ struct sgio_data *sd = td->io_ops->data;
+ struct sg_io_hdr *hdr;
+ struct io_u *io_u;
+ int ret;
+
+ io_u = __get_io_u(td);
+ if (!io_u)
+ return ENOMEM;
+
+ hdr = &io_u->hdr;
+ sgio_hdr_init(sd, hdr, io_u, 0);
+ hdr->dxfer_direction = SG_DXFER_NONE;
+
+ hdr->cmdp[0] = 0x35;
+
+ ret = fio_sgio_doio(td, io_u, 1);
+ put_io_u(td, io_u);
+ return ret;
+}
+
+static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
+{
+ struct sg_io_hdr *hdr = &io_u->hdr;
+ struct sgio_data *sd = td->io_ops->data;
+ int nr_blocks, lba;
+
+ if (io_u->buflen & (sd->bs - 1)) {
+ log_err("read/write not sector aligned\n");
+ return EINVAL;
+ }
+
+ sgio_hdr_init(sd, hdr, io_u, 1);
+
+ if (io_u->ddir == DDIR_READ) {
+ hdr->dxfer_direction = SG_DXFER_FROM_DEV;
+ hdr->cmdp[0] = 0x28;
+ } else {
+ hdr->dxfer_direction = SG_DXFER_TO_DEV;
+ hdr->cmdp[0] = 0x2a;
+ }
+
+ nr_blocks = io_u->buflen / sd->bs;
+ lba = io_u->offset / sd->bs;
+ hdr->cmdp[2] = (lba >> 24) & 0xff;
+ hdr->cmdp[3] = (lba >> 16) & 0xff;
+ hdr->cmdp[4] = (lba >> 8) & 0xff;
+ hdr->cmdp[5] = lba & 0xff;
+ hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
+ hdr->cmdp[8] = nr_blocks & 0xff;
+ return 0;
+}
+
+static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct sg_io_hdr *hdr = &io_u->hdr;
+ int ret;
+
+ ret = fio_sgio_doio(td, io_u, 0);
+
+ if (ret < 0)
+ io_u->error = errno;
+ else if (hdr->status) {
+ io_u->resid = hdr->resid;
+ io_u->error = EIO;
+ }
+
+ return io_u->error;
+}
+
+static struct io_u *fio_sgio_event(struct thread_data *td, int event)
+{
+ struct sgio_data *sd = td->io_ops->data;
+
+ return sd->events[event];
+}
+
+static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
+{
+ struct sgio_data *sd = td->io_ops->data;
+ struct io_u *io_u;
+ struct sg_io_hdr *hdr;
+ unsigned char buf[8];
+ int ret;
+
+ io_u = __get_io_u(td);
+ assert(io_u);
+
+ hdr = &io_u->hdr;
+ sgio_hdr_init(sd, hdr, io_u, 0);
+ memset(buf, 0, sizeof(buf));
+
+ hdr->cmdp[0] = 0x25;
+ hdr->dxfer_direction = SG_DXFER_FROM_DEV;
+ hdr->dxferp = buf;
+ hdr->dxfer_len = sizeof(buf);
+
+ ret = fio_sgio_doio(td, io_u, 1);
+ if (ret) {
+ put_io_u(td, io_u);
+ return ret;
+ }
+
+ *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
+ put_io_u(td, io_u);
+ return 0;
+}
+
+static void fio_sgio_cleanup(struct thread_data *td)
+{
+ if (td->io_ops->data) {
+ free(td->io_ops->data);
+ td->io_ops->data = NULL;
+ }
+}
+
+static int fio_sgio_init(struct thread_data *td)
+{
+ struct sgio_data *sd;
+ unsigned int bs;
+ int ret;
+
+ sd = malloc(sizeof(*sd));
+ sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
+ sd->events = malloc(td->iodepth * sizeof(struct io_u *));
+ td->io_ops->data = sd;
+
+ if (td->filetype == FIO_TYPE_BD) {
+ if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+ } else if (td->filetype == FIO_TYPE_CHAR) {
+ int version;
+
+ if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ ret = fio_sgio_get_bs(td, &bs);
+ if (ret)
+ return ret;
+ } else {
+ log_err("ioengine sgio only works on block devices\n");
+ return 1;
+ }
+
+ sd->bs = bs;
+
+ if (td->filetype == FIO_TYPE_BD)
+ td->io_ops->getevents = fio_sgio_ioctl_getevents;
+ else
+ td->io_ops->getevents = fio_sgio_getevents;
+
+ /*
+ * we want to do it, regardless of whether odirect is set or not
+ */
+ td->override_sync = 1;
+ return 0;
+}
+
+struct ioengine_ops ioengine = {
+ .name = "sg",
+ .version = FIO_IOOPS_VERSION,
+ .init = fio_sgio_init,
+ .prep = fio_sgio_prep,
+ .queue = fio_sgio_queue,
+ .getevents = fio_sgio_getevents,
+ .event = fio_sgio_event,
+ .cleanup = fio_sgio_cleanup,
+ .sync = fio_sgio_sync,
+ .flags = FIO_SYNCIO,
+};
diff --git a/engines/fio-engine-splice.c b/engines/fio-engine-splice.c
new file mode 100644
index 0000000..30984f1
--- /dev/null
+++ b/engines/fio-engine-splice.c
@@ -0,0 +1,193 @@
+/*
+ * splice io engine
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/poll.h>
+#include "fio.h"
+#include "os.h"
+
+struct spliceio_data {
+ struct io_u *last_io_u;
+ int pipe[2];
+};
+
+static int fio_spliceio_sync(struct thread_data *td)
+{
+ return fsync(td->fd);
+}
+
+static int fio_spliceio_getevents(struct thread_data *td, int fio_unused min,
+ int max, struct timespec fio_unused *t)
+{
+ assert(max <= 1);
+
+ /*
+ * we can only have one finished io_u for sync io, since the depth
+ * is always 1
+ */
+ if (list_empty(&td->io_u_busylist))
+ return 0;
+
+ return 1;
+}
+
+static struct io_u *fio_spliceio_event(struct thread_data *td, int event)
+{
+ struct spliceio_data *sd = td->io_ops->data;
+
+ assert(event == 0);
+
+ return sd->last_io_u;
+}
+
+/*
+ * For splice reading, we unfortunately cannot (yet) vmsplice the other way.
+ * So just splice the data from the file into the pipe, and use regular
+ * read to fill the buffer. Doesn't make a lot of sense, but...
+ */
+static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
+{
+ struct spliceio_data *sd = td->io_ops->data;
+ int ret, ret2, buflen;
+ off_t offset;
+ void *p;
+
+ offset = io_u->offset;
+ buflen = io_u->buflen;
+ p = io_u->buf;
+ while (buflen) {
+ int this_len = buflen;
+
+ if (this_len > SPLICE_DEF_SIZE)
+ this_len = SPLICE_DEF_SIZE;
+
+ ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
+ if (ret < 0) {
+ if (errno == ENODATA || errno == EAGAIN)
+ continue;
+
+ return errno;
+ }
+
+ buflen -= ret;
+
+ while (ret) {
+ ret2 = read(sd->pipe[0], p, ret);
+ if (ret2 < 0)
+ return errno;
+
+ ret -= ret2;
+ p += ret2;
+ }
+ }
+
+ return io_u->buflen;
+}
+
+/*
+ * For splice writing, we can vmsplice our data buffer directly into a
+ * pipe and then splice that to a file.
+ */
+static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
+{
+ struct spliceio_data *sd = td->io_ops->data;
+ struct iovec iov[1] = {
+ {
+ .iov_base = io_u->buf,
+ .iov_len = io_u->buflen,
+ }
+ };
+ struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
+ off_t off = io_u->offset;
+ int ret, ret2;
+
+ while (iov[0].iov_len) {
+ if (poll(&pfd, 1, -1) < 0)
+ return errno;
+
+ ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK);
+ if (ret < 0)
+ return errno;
+
+ iov[0].iov_len -= ret;
+ iov[0].iov_base += ret;
+
+ while (ret) {
+ ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
+ if (ret2 < 0)
+ return errno;
+
+ ret -= ret2;
+ }
+ }
+
+ return io_u->buflen;
+}
+
+static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct spliceio_data *sd = td->io_ops->data;
+ int ret;
+
+ if (io_u->ddir == DDIR_READ)
+ ret = fio_splice_read(td, io_u);
+ else
+ ret = fio_splice_write(td, io_u);
+
+ if ((unsigned int) ret != io_u->buflen) {
+ if (ret > 0) {
+ io_u->resid = io_u->buflen - ret;
+ io_u->error = ENODATA;
+ } else
+ io_u->error = errno;
+ }
+
+ if (!io_u->error)
+ sd->last_io_u = io_u;
+
+ return io_u->error;
+}
+
+static void fio_spliceio_cleanup(struct thread_data *td)
+{
+ struct spliceio_data *sd = td->io_ops->data;
+
+ if (sd) {
+ close(sd->pipe[0]);
+ close(sd->pipe[1]);
+ free(sd);
+ td->io_ops->data = NULL;
+ }
+}
+
+static int fio_spliceio_init(struct thread_data *td)
+{
+ struct spliceio_data *sd = malloc(sizeof(*sd));
+
+ sd->last_io_u = NULL;
+ if (pipe(sd->pipe) < 0) {
+ td_verror(td, errno);
+ free(sd);
+ return 1;
+ }
+
+ td->io_ops->data = sd;
+ return 0;
+}
+
+struct ioengine_ops ioengine = {
+ .name = "splice",
+ .version = FIO_IOOPS_VERSION,
+ .init = fio_spliceio_init,
+ .queue = fio_spliceio_queue,
+ .getevents = fio_spliceio_getevents,
+ .event = fio_spliceio_event,
+ .cleanup = fio_spliceio_cleanup,
+ .sync = fio_spliceio_sync,
+ .flags = FIO_SYNCIO,
+};
diff --git a/engines/fio-engine-sync.c b/engines/fio-engine-sync.c
new file mode 100644
index 0000000..abc29f4
--- /dev/null
+++ b/engines/fio-engine-sync.c
@@ -0,0 +1,108 @@
+/*
+ * regular read/write sync io engine
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include "fio.h"
+#include "os.h"
+
+struct syncio_data {
+ struct io_u *last_io_u;
+};
+
+static int fio_syncio_sync(struct thread_data *td)
+{
+ return fsync(td->fd);
+}
+
+static int fio_syncio_getevents(struct thread_data *td, int fio_unused min,
+ int max, struct timespec fio_unused *t)
+{
+ assert(max <= 1);
+
+ /*
+ * we can only have one finished io_u for sync io, since the depth
+ * is always 1
+ */
+ if (list_empty(&td->io_u_busylist))
+ return 0;
+
+ return 1;
+}
+
+static struct io_u *fio_syncio_event(struct thread_data *td, int event)
+{
+ struct syncio_data *sd = td->io_ops->data;
+
+ assert(event == 0);
+
+ return sd->last_io_u;
+}
+
+static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
+{
+ if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
+ td_verror(td, errno);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
+{
+ struct syncio_data *sd = td->io_ops->data;
+ int ret;
+
+ if (io_u->ddir == DDIR_READ)
+ ret = read(td->fd, io_u->buf, io_u->buflen);
+ else
+ ret = write(td->fd, io_u->buf, io_u->buflen);
+
+ if ((unsigned int) ret != io_u->buflen) {
+ if (ret > 0) {
+ io_u->resid = io_u->buflen - ret;
+ io_u->error = EIO;
+ } else
+ io_u->error = errno;
+ }
+
+ if (!io_u->error)
+ sd->last_io_u = io_u;
+
+ return io_u->error;
+}
+
+static void fio_syncio_cleanup(struct thread_data *td)
+{
+ if (td->io_ops->data) {
+ free(td->io_ops->data);
+ td->io_ops->data = NULL;
+ }
+}
+
+static int fio_syncio_init(struct thread_data *td)
+{
+ struct syncio_data *sd = malloc(sizeof(*sd));
+
+ sd->last_io_u = NULL;
+ td->io_ops->data = sd;
+ return 0;
+}
+
+struct ioengine_ops ioengine = {
+ .name = "sync",
+ .version = FIO_IOOPS_VERSION,
+ .init = fio_syncio_init,
+ .prep = fio_syncio_prep,
+ .queue = fio_syncio_queue,
+ .getevents = fio_syncio_getevents,
+ .event = fio_syncio_event,
+ .cleanup = fio_syncio_cleanup,
+ .sync = fio_syncio_sync,
+ .flags = FIO_SYNCIO,
+};
diff --git a/fio.c b/fio.c
index 445babd..a2651f0 100644
--- a/fio.c
+++ b/fio.c
@@ -434,7 +434,7 @@
static int td_io_prep(struct thread_data *td, struct io_u *io_u)
{
- if (td->io_prep && td->io_prep(td, io_u))
+ if (td->io_ops->prep && td->io_ops->prep(td, io_u))
return 1;
return 0;
@@ -569,8 +569,8 @@
static int sync_td(struct thread_data *td)
{
- if (td->io_sync)
- return td->io_sync(td);
+ if (td->io_ops->sync)
+ return td->io_ops->sync(td);
return 0;
}
@@ -578,14 +578,14 @@
static int io_u_getevents(struct thread_data *td, int min, int max,
struct timespec *t)
{
- return td->io_getevents(td, min, max, t);
+ return td->io_ops->getevents(td, min, max, t);
}
static int io_u_queue(struct thread_data *td, struct io_u *io_u)
{
gettimeofday(&io_u->issue_time, NULL);
- return td->io_queue(td, io_u);
+ return td->io_ops->queue(td, io_u);
}
#define iocb_time(iocb) ((unsigned long) (iocb)->data)
@@ -629,7 +629,7 @@
icd->bytes_done[0] = icd->bytes_done[1] = 0;
for (i = 0; i < icd->nr; i++) {
- io_u = td->io_event(td, i);
+ io_u = td->io_ops->event(td, i);
io_completed(td, io_u, icd);
put_io_u(td, io_u);
@@ -660,11 +660,11 @@
/*
* now cancel remaining active events
*/
- if (td->io_cancel) {
+ if (td->io_ops->cancel) {
list_for_each_safe(entry, n, &td->io_u_busylist) {
io_u = list_entry(entry, struct io_u, list);
- r = td->io_cancel(td, io_u);
+ r = td->io_ops->cancel(td, io_u);
if (!r)
put_io_u(td, io_u);
}
@@ -749,7 +749,7 @@
break;
}
- v_io_u = td->io_event(td, 0);
+ v_io_u = td->io_ops->event(td, 0);
icd.nr = 1;
icd.error = 0;
io_completed(td, v_io_u, &icd);
@@ -895,32 +895,12 @@
}
}
-static void cleanup_io(struct thread_data *td)
-{
- if (td->io_cleanup)
- td->io_cleanup(td);
-}
-
static int init_io(struct thread_data *td)
{
- if (td->io_engine == FIO_SYNCIO)
- return fio_syncio_init(td);
- else if (td->io_engine == FIO_MMAPIO)
- return fio_mmapio_init(td);
- else if (td->io_engine == FIO_LIBAIO)
- return fio_libaio_init(td);
- else if (td->io_engine == FIO_POSIXAIO)
- return fio_posixaio_init(td);
- else if (td->io_engine == FIO_SGIO)
- return fio_sgio_init(td);
- else if (td->io_engine == FIO_SPLICEIO)
- return fio_spliceio_init(td);
- else if (td->io_engine == FIO_CPUIO)
- return fio_cpuio_init(td);
- else {
- log_err("bad io_engine %d\n", td->io_engine);
- return 1;
- }
+ if (td->io_ops->init)
+ return td->io_ops->init(td);
+
+ return 0;
}
static void cleanup_io_u(struct thread_data *td)
@@ -956,10 +936,10 @@
int i, max_units;
char *p;
- if (td->io_engine == FIO_CPUIO)
+ if (td->io_ops->flags & FIO_CPUIO)
return 0;
- if (td->io_engine & FIO_SYNCIO)
+ if (td->io_ops->flags & FIO_SYNCIO)
max_units = 1;
else
max_units = td->iodepth;
@@ -1229,7 +1209,7 @@
struct stat st;
int flags = 0;
- if (td->io_engine == FIO_CPUIO)
+ if (td->io_ops->flags & FIO_CPUIO)
return 0;
if (stat(td->file_name, &st) == -1) {
@@ -1282,10 +1262,10 @@
if (get_file_size(td))
return 1;
- if (td->io_engine != FIO_MMAPIO)
- return setup_file_plain(td);
- else
+ if (td->io_ops->flags & FIO_MMAPIO)
return setup_file_mmap(td);
+ else
+ return setup_file_plain(td);
}
static int switch_ioscheduler(struct thread_data *td)
@@ -1338,7 +1318,7 @@
static void clear_io_state(struct thread_data *td)
{
- if (td->io_engine == FIO_SYNCIO)
+ if (td->io_ops->flags & FIO_SYNCIO)
lseek(td->fd, SEEK_SET, 0);
td->last_pos = 0;
@@ -1423,7 +1403,7 @@
clear_io_state(td);
prune_io_piece_log(td);
- if (td->io_engine == FIO_CPUIO)
+ if (td->io_ops->flags & FIO_CPUIO)
do_cpuio(td);
else
do_io(td);
@@ -1472,7 +1452,7 @@
}
if (td->mmap)
munmap(td->mmap, td->file_size);
- cleanup_io(td);
+ close_ioengine(td);
cleanup_io_u(td);
td_set_runstate(td, TD_EXITED);
return NULL;
@@ -1738,7 +1718,7 @@
for (i = 0, cputhreads = 0; i < thread_number; i++) {
struct thread_data *td = &threads[i];
- if (td->io_engine == FIO_CPUIO)
+ if (td->io_ops->flags & FIO_CPUIO)
cputhreads++;
if (td->runstate != TD_EXITED)
diff --git a/fio.h b/fio.h
index ee47599..64f3b2d 100644
--- a/fio.h
+++ b/fio.h
@@ -124,14 +124,10 @@
FIO_TYPE_CHAR,
};
-enum fio_iotype {
+enum fio_ioengine_flags {
FIO_SYNCIO = 1 << 0,
- FIO_MMAPIO = 1 << 1 | FIO_SYNCIO,
- FIO_LIBAIO = 1 << 2,
- FIO_POSIXAIO = 1 << 3,
- FIO_SGIO = 1 << 4,
- FIO_SPLICEIO = 1 << 5 | FIO_SYNCIO,
- FIO_CPUIO = 1 << 6,
+ FIO_CPUIO = 1 << 1,
+ FIO_MMAPIO = 1 << 2,
};
/*
@@ -179,7 +175,6 @@
unsigned int fsync_blocks;
unsigned int start_delay;
unsigned long timeout;
- enum fio_iotype io_engine;
unsigned int overwrite;
unsigned int bw_avg_time;
unsigned int loops;
@@ -216,15 +211,7 @@
* IO engine hooks, contains everything needed to submit an io_u
* to any of the available IO engines.
*/
- void *io_data;
- char io_engine_name[16];
- int (*io_prep)(struct thread_data *, struct io_u *);
- int (*io_queue)(struct thread_data *, struct io_u *);
- int (*io_getevents)(struct thread_data *, int, int, struct timespec *);
- struct io_u *(*io_event)(struct thread_data *, int);
- int (*io_cancel)(struct thread_data *, struct io_u *);
- void (*io_cleanup)(struct thread_data *);
- int (*io_sync)(struct thread_data *);
+ struct ioengine_ops *io_ops;
/*
* Current IO depth and list of free and busy io_u's.
@@ -463,4 +450,30 @@
fprintf(stderr, ##args); \
} while (0)
+struct ioengine_ops {
+ char name[16];
+ int version;
+ int flags;
+ int (*init)(struct thread_data *);
+ int (*prep)(struct thread_data *, struct io_u *);
+ int (*queue)(struct thread_data *, struct io_u *);
+ int (*getevents)(struct thread_data *, int, int, struct timespec *);
+ struct io_u *(*event)(struct thread_data *, int);
+ int (*cancel)(struct thread_data *, struct io_u *);
+ void (*cleanup)(struct thread_data *);
+ int (*sync)(struct thread_data *);
+ void *data;
+ void *dlhandle;
+};
+
+#define FIO_IOOPS_VERSION 1
+
+extern struct ioengine_ops *load_ioengine(struct thread_data *, char *);
+extern void close_ioengine(struct thread_data *);
+
+/*
+ * Mark unused variables passed to ops functions as unused, to silence gcc
+ */
+#define fio_unused __attribute((__unused__))
+
#endif
diff --git a/init.c b/init.c
index 1ffd6ff..3eaf9b7 100644
--- a/init.c
+++ b/init.c
@@ -124,7 +124,7 @@
if (td == &def_thread)
return 0;
- if (td->io_engine & FIO_SYNCIO)
+ if (td->io_ops->flags & FIO_SYNCIO)
td->iodepth = 1;
else {
if (!td->iodepth)
@@ -197,10 +197,10 @@
if (!terse_output) {
if (!job_add_num) {
- if (td->io_engine == FIO_CPUIO)
+ if (td->io_ops->flags & FIO_CPUIO)
fprintf(f_out, "%s: ioengine=cpu, cpuload=%u, cpucycle=%u\n", td->name, td->cpuload, td->cpucycle);
else
- fprintf(f_out, "%s: (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->name, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_engine_name, td->iodepth);
+ fprintf(f_out, "%s: (g=%d): rw=%s, odir=%d, bs=%d-%d, rate=%d, ioengine=%s, iodepth=%d\n", td->name, td->groupid, ddir_str[ddir], td->odirect, td->min_bs, td->max_bs, td->rate, td->io_ops->name, td->iodepth);
} else if (job_add_num == 1)
fprintf(f_out, "...\n");
}
@@ -594,36 +594,12 @@
static int str_ioengine_cb(struct thread_data *td, char *str)
{
- if (!strncmp(str, "linuxaio", 8) || !strncmp(str, "aio", 3) ||
- !strncmp(str, "libaio", 6)) {
- strcpy(td->io_engine_name, "libaio");
- td->io_engine = FIO_LIBAIO;
+ if (!str)
+ str = DEF_IO_ENGINE_NAME;
+
+ td->io_ops = load_ioengine(td, str);
+ if (td->io_ops)
return 0;
- } else if (!strncmp(str, "posixaio", 8)) {
- strcpy(td->io_engine_name, "posixaio");
- td->io_engine = FIO_POSIXAIO;
- return 0;
- } else if (!strncmp(str, "sync", 4)) {
- strcpy(td->io_engine_name, "sync");
- td->io_engine = FIO_SYNCIO;
- return 0;
- } else if (!strncmp(str, "mmap", 4)) {
- strcpy(td->io_engine_name, "mmap");
- td->io_engine = FIO_MMAPIO;
- return 0;
- } else if (!strncmp(str, "sgio", 4)) {
- strcpy(td->io_engine_name, "sgio");
- td->io_engine = FIO_SGIO;
- return 0;
- } else if (!strncmp(str, "splice", 6)) {
- strcpy(td->io_engine_name, "splice");
- td->io_engine = FIO_SPLICEIO;
- return 0;
- } else if (!strncmp(str, "cpu", 3)) {
- strcpy(td->io_engine_name, "cpu");
- td->io_engine = FIO_CPUIO;
- return 0;
- }
log_err("fio: ioengine: { linuxaio, aio, libaio }, posixaio, sync, mmap, sgio, splice, cpu\n");
return 1;
@@ -1002,8 +978,6 @@
def_thread.bs = DEF_BS;
def_thread.min_bs = -1;
def_thread.max_bs = -1;
- def_thread.io_engine = DEF_IO_ENGINE;
- strcpy(def_thread.io_engine_name, DEF_IO_ENGINE_NAME);
def_thread.odirect = DEF_ODIRECT;
def_thread.ratecycle = DEF_RATE_CYCLE;
def_thread.sequential = DEF_SEQUENTIAL;
diff --git a/ioengines.c b/ioengines.c
index 01492bc..723f310 100644
--- a/ioengines.c
+++ b/ioengines.c
@@ -12,923 +12,43 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
-#include <errno.h>
-#include <assert.h>
-#include <time.h>
#include <string.h>
-#include <sys/mman.h>
-#include <sys/poll.h>
+#include <dlfcn.h>
#include "fio.h"
#include "os.h"
-static int fill_timespec(struct timespec *ts)
+struct ioengine_ops *load_ioengine(struct thread_data *td, char *name)
{
-#ifdef _POSIX_TIMERS
- if (!clock_gettime(CLOCK_MONOTONIC, ts))
- return 0;
+ char engine[16], engine_lib[256];
+ struct ioengine_ops *ops;
+ void *dlhandle;
- perror("clock_gettime");
-#endif
- return 1;
-}
-
-static unsigned long long ts_utime_since_now(struct timespec *t)
-{
- long long sec, nsec;
- struct timespec now;
-
- if (fill_timespec(&now))
- return 0;
-
- sec = now.tv_sec - t->tv_sec;
- nsec = now.tv_nsec - t->tv_nsec;
- if (sec > 0 && nsec < 0) {
- sec--;
- nsec += 1000000000;
- }
-
- sec *= 1000000;
- nsec /= 1000;
- return sec + nsec;
-}
-
-static int fio_io_sync(struct thread_data *td)
-{
- return fsync(td->fd);
-}
-
-#ifdef FIO_HAVE_LIBAIO
-
-#define ev_to_iou(ev) (struct io_u *) ((unsigned long) (ev)->obj)
-
-struct libaio_data {
- io_context_t aio_ctx;
- struct io_event *aio_events;
-};
-
-static int fio_libaio_io_prep(struct thread_data *td, struct io_u *io_u)
-{
- if (io_u->ddir == DDIR_READ)
- io_prep_pread(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
- else
- io_prep_pwrite(&io_u->iocb, td->fd, io_u->buf, io_u->buflen, io_u->offset);
-
- return 0;
-}
-
-static struct io_u *fio_libaio_event(struct thread_data *td, int event)
-{
- struct libaio_data *ld = td->io_data;
-
- return ev_to_iou(ld->aio_events + event);
-}
-
-static int fio_libaio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- struct libaio_data *ld = td->io_data;
- long r;
-
- do {
- r = io_getevents(ld->aio_ctx, min, max, ld->aio_events, t);
- if (r == -EAGAIN) {
- usleep(100);
- continue;
- } else if (r == -EINTR)
- continue;
- else
- break;
- } while (1);
-
- return (int) r;
-}
-
-static int fio_libaio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct libaio_data *ld = td->io_data;
- struct iocb *iocb = &io_u->iocb;
- long ret;
-
- do {
- ret = io_submit(ld->aio_ctx, 1, &iocb);
- if (ret == 1)
- return 0;
- else if (ret == -EAGAIN)
- usleep(100);
- else if (ret == -EINTR)
- continue;
- else
- break;
- } while (1);
-
- return (int) ret;
-
-}
-
-static int fio_libaio_cancel(struct thread_data *td, struct io_u *io_u)
-{
- struct libaio_data *ld = td->io_data;
-
- return io_cancel(ld->aio_ctx, &io_u->iocb, ld->aio_events);
-}
-
-static void fio_libaio_cleanup(struct thread_data *td)
-{
- struct libaio_data *ld = td->io_data;
-
- if (ld) {
- io_destroy(ld->aio_ctx);
- if (ld->aio_events)
- free(ld->aio_events);
-
- free(ld);
- td->io_data = NULL;
- }
-}
-
-int fio_libaio_init(struct thread_data *td)
-{
- struct libaio_data *ld = malloc(sizeof(*ld));
-
- memset(ld, 0, sizeof(*ld));
- if (io_queue_init(td->iodepth, &ld->aio_ctx)) {
- td_verror(td, errno);
- return 1;
- }
-
- td->io_prep = fio_libaio_io_prep;
- td->io_queue = fio_libaio_queue;
- td->io_getevents = fio_libaio_getevents;
- td->io_event = fio_libaio_event;
- td->io_cancel = fio_libaio_cancel;
- td->io_cleanup = fio_libaio_cleanup;
- td->io_sync = fio_io_sync;
-
- ld->aio_events = malloc(td->iodepth * sizeof(struct io_event));
- td->io_data = ld;
- return 0;
-}
-
-#else /* FIO_HAVE_LIBAIO */
-
-int fio_libaio_init(struct thread_data *td)
-{
- return EINVAL;
-}
-
-#endif /* FIO_HAVE_LIBAIO */
-
-#ifdef FIO_HAVE_POSIXAIO
-
-struct posixaio_data {
- struct io_u **aio_events;
-};
-
-static int fio_posixaio_cancel(struct thread_data *td, struct io_u *io_u)
-{
- int r = aio_cancel(td->fd, &io_u->aiocb);
-
- if (r == 1 || r == AIO_CANCELED)
- return 0;
-
- return 1;
-}
-
-static int fio_posixaio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct aiocb *aiocb = &io_u->aiocb;
-
- aiocb->aio_fildes = td->fd;
- aiocb->aio_buf = io_u->buf;
- aiocb->aio_nbytes = io_u->buflen;
- aiocb->aio_offset = io_u->offset;
-
- io_u->seen = 0;
- return 0;
-}
-
-static int fio_posixaio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- struct posixaio_data *pd = td->io_data;
- struct list_head *entry;
- struct timespec start;
- int r, have_timeout = 0;
-
- if (t && !fill_timespec(&start))
- have_timeout = 1;
-
- r = 0;
-restart:
- list_for_each(entry, &td->io_u_busylist) {
- struct io_u *io_u = list_entry(entry, struct io_u, list);
- int err;
-
- if (io_u->seen)
- continue;
-
- err = aio_error(&io_u->aiocb);
- switch (err) {
- default:
- io_u->error = err;
- case ECANCELED:
- case 0:
- pd->aio_events[r++] = io_u;
- io_u->seen = 1;
- break;
- case EINPROGRESS:
- break;
- }
-
- if (r >= max)
- break;
- }
-
- if (r >= min)
- return r;
-
- if (have_timeout) {
- unsigned long long usec;
-
- usec = (t->tv_sec * 1000000) + (t->tv_nsec / 1000);
- if (ts_utime_since_now(&start) > usec)
- return r;
- }
+ strcpy(engine, name);
/*
- * hrmpf, we need to wait for more. we should use aio_suspend, for
- * now just sleep a little and recheck status of busy-and-not-seen
+ * linux libaio has alias names, so convert to what we want
*/
- usleep(1000);
- goto restart;
+ if (!strncmp(engine, "linuxaio", 8) || !strncmp(engine, "aio", 3))
+ strcpy(engine, "libaio");
+
+ sprintf(engine_lib, "/usr/local/lib/fio/fio-engine-%s.o", engine);
+ dlerror();
+ dlhandle = dlopen(engine_lib, RTLD_LAZY);
+ if (!dlhandle)
+ printf("bla: %s\n", dlerror());
+
+ ops = dlsym(dlhandle, "ioengine");
+ if (!ops)
+ printf("get ops failed\n");
+
+ ops->dlhandle = dlhandle;
+ return ops;
}
-static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
+void close_ioengine(struct thread_data *td)
{
- struct posixaio_data *pd = td->io_data;
+ if (td->io_ops->cleanup)
+ td->io_ops->cleanup(td);
- return pd->aio_events[event];
-}
-
-static int fio_posixaio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct aiocb *aiocb = &io_u->aiocb;
- int ret;
-
- if (io_u->ddir == DDIR_READ)
- ret = aio_read(aiocb);
- else
- ret = aio_write(aiocb);
-
- if (ret)
- io_u->error = errno;
-
- return io_u->error;
-}
-
-static void fio_posixaio_cleanup(struct thread_data *td)
-{
- struct posixaio_data *pd = td->io_data;
-
- if (pd) {
- free(pd->aio_events);
- free(pd);
- td->io_data = NULL;
- }
-}
-
-int fio_posixaio_init(struct thread_data *td)
-{
- struct posixaio_data *pd = malloc(sizeof(*pd));
-
- pd->aio_events = malloc(td->iodepth * sizeof(struct io_u *));
-
- td->io_prep = fio_posixaio_prep;
- td->io_queue = fio_posixaio_queue;
- td->io_getevents = fio_posixaio_getevents;
- td->io_event = fio_posixaio_event;
- td->io_cancel = fio_posixaio_cancel;
- td->io_cleanup = fio_posixaio_cleanup;
- td->io_sync = fio_io_sync;
-
- td->io_data = pd;
- return 0;
-}
-
-#else /* FIO_HAVE_POSIXAIO */
-
-int fio_posixaio_init(struct thread_data *td)
-{
- return EINVAL;
-}
-
-#endif /* FIO_HAVE_POSIXAIO */
-
-struct syncio_data {
- struct io_u *last_io_u;
-};
-
-static int fio_syncio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- assert(max <= 1);
-
- /*
- * we can only have one finished io_u for sync io, since the depth
- * is always 1
- */
- if (list_empty(&td->io_u_busylist))
- return 0;
-
- return 1;
-}
-
-static struct io_u *fio_syncio_event(struct thread_data *td, int event)
-{
- struct syncio_data *sd = td->io_data;
-
- assert(event == 0);
-
- return sd->last_io_u;
-}
-
-static int fio_syncio_prep(struct thread_data *td, struct io_u *io_u)
-{
- if (lseek(td->fd, io_u->offset, SEEK_SET) == -1) {
- td_verror(td, errno);
- return 1;
- }
-
- return 0;
-}
-
-static int fio_syncio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct syncio_data *sd = td->io_data;
- int ret;
-
- if (io_u->ddir == DDIR_READ)
- ret = read(td->fd, io_u->buf, io_u->buflen);
- else
- ret = write(td->fd, io_u->buf, io_u->buflen);
-
- if ((unsigned int) ret != io_u->buflen) {
- if (ret > 0) {
- io_u->resid = io_u->buflen - ret;
- io_u->error = EIO;
- } else
- io_u->error = errno;
- }
-
- if (!io_u->error)
- sd->last_io_u = io_u;
-
- return io_u->error;
-}
-
-static void fio_syncio_cleanup(struct thread_data *td)
-{
- if (td->io_data) {
- free(td->io_data);
- td->io_data = NULL;
- }
-}
-
-int fio_syncio_init(struct thread_data *td)
-{
- struct syncio_data *sd = malloc(sizeof(*sd));
-
- td->io_prep = fio_syncio_prep;
- td->io_queue = fio_syncio_queue;
- td->io_getevents = fio_syncio_getevents;
- td->io_event = fio_syncio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_syncio_cleanup;
- td->io_sync = fio_io_sync;
-
- sd->last_io_u = NULL;
- td->io_data = sd;
- return 0;
-}
-
-static int fio_mmapio_queue(struct thread_data *td, struct io_u *io_u)
-{
- unsigned long long real_off = io_u->offset - td->file_offset;
- struct syncio_data *sd = td->io_data;
-
- if (io_u->ddir == DDIR_READ)
- memcpy(io_u->buf, td->mmap + real_off, io_u->buflen);
- else
- memcpy(td->mmap + real_off, io_u->buf, io_u->buflen);
-
- /*
- * not really direct, but should drop the pages from the cache
- */
- if (td->odirect) {
- if (msync(td->mmap + real_off, io_u->buflen, MS_SYNC) < 0)
- io_u->error = errno;
- if (madvise(td->mmap + real_off, io_u->buflen, MADV_DONTNEED) < 0)
- io_u->error = errno;
- }
-
- if (!io_u->error)
- sd->last_io_u = io_u;
-
- return io_u->error;
-}
-
-static int fio_mmapio_sync(struct thread_data *td)
-{
- return msync(td->mmap, td->file_size, MS_SYNC);
-}
-
-int fio_mmapio_init(struct thread_data *td)
-{
- struct syncio_data *sd = malloc(sizeof(*sd));
-
- td->io_prep = NULL;
- td->io_queue = fio_mmapio_queue;
- td->io_getevents = fio_syncio_getevents;
- td->io_event = fio_syncio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_syncio_cleanup;
- td->io_sync = fio_mmapio_sync;
-
- sd->last_io_u = NULL;
- td->io_data = sd;
- return 0;
-}
-
-#ifdef FIO_HAVE_SGIO
-
-struct sgio_cmd {
- unsigned char cdb[10];
- int nr;
-};
-
-struct sgio_data {
- struct sgio_cmd *cmds;
- struct io_u **events;
- unsigned int bs;
-};
-
-static void sgio_hdr_init(struct sgio_data *sd, struct sg_io_hdr *hdr,
- struct io_u *io_u, int fs)
-{
- struct sgio_cmd *sc = &sd->cmds[io_u->index];
-
- memset(hdr, 0, sizeof(*hdr));
- memset(sc->cdb, 0, sizeof(sc->cdb));
-
- hdr->interface_id = 'S';
- hdr->cmdp = sc->cdb;
- hdr->cmd_len = sizeof(sc->cdb);
- hdr->pack_id = io_u->index;
- hdr->usr_ptr = io_u;
-
- if (fs) {
- hdr->dxferp = io_u->buf;
- hdr->dxfer_len = io_u->buflen;
- }
-}
-
-static int fio_sgio_getevents(struct thread_data *td, int min, int max,
- struct timespec *t)
-{
- struct sgio_data *sd = td->io_data;
- struct pollfd pfd = { .fd = td->fd, .events = POLLIN };
- void *buf = malloc(max * sizeof(struct sg_io_hdr));
- int left = max, ret, events, i, r = 0, fl = 0;
-
- /*
- * don't block for !events
- */
- if (!min) {
- fl = fcntl(td->fd, F_GETFL);
- fcntl(td->fd, F_SETFL, fl | O_NONBLOCK);
- }
-
- while (left) {
- do {
- if (!min)
- break;
- poll(&pfd, 1, -1);
- if (pfd.revents & POLLIN)
- break;
- } while (1);
-
- ret = read(td->fd, buf, left * sizeof(struct sg_io_hdr));
- if (ret < 0) {
- if (errno == EAGAIN)
- break;
- td_verror(td, errno);
- r = -1;
- break;
- } else if (!ret)
- break;
-
- events = ret / sizeof(struct sg_io_hdr);
- left -= events;
- r += events;
-
- for (i = 0; i < events; i++) {
- struct sg_io_hdr *hdr = (struct sg_io_hdr *) buf + i;
-
- sd->events[i] = hdr->usr_ptr;
- }
- }
-
- if (!min)
- fcntl(td->fd, F_SETFL, fl);
-
- free(buf);
- return r;
-}
-
-static int fio_sgio_ioctl_doio(struct thread_data *td, struct io_u *io_u)
-{
- struct sgio_data *sd = td->io_data;
- struct sg_io_hdr *hdr = &io_u->hdr;
-
- sd->events[0] = io_u;
-
- return ioctl(td->fd, SG_IO, hdr);
-}
-
-static int fio_sgio_rw_doio(struct thread_data *td, struct io_u *io_u, int sync)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- int ret;
-
- ret = write(td->fd, hdr, sizeof(*hdr));
- if (ret < 0)
- return errno;
-
- if (sync) {
- ret = read(td->fd, hdr, sizeof(*hdr));
- if (ret < 0)
- return errno;
- }
-
- return 0;
-}
-
-static int fio_sgio_doio(struct thread_data *td, struct io_u *io_u, int sync)
-{
- if (td->filetype == FIO_TYPE_BD)
- return fio_sgio_ioctl_doio(td, io_u);
-
- return fio_sgio_rw_doio(td, io_u, sync);
-}
-
-static int fio_sgio_sync(struct thread_data *td)
-{
- struct sgio_data *sd = td->io_data;
- struct sg_io_hdr *hdr;
- struct io_u *io_u;
- int ret;
-
- io_u = __get_io_u(td);
- if (!io_u)
- return ENOMEM;
-
- hdr = &io_u->hdr;
- sgio_hdr_init(sd, hdr, io_u, 0);
- hdr->dxfer_direction = SG_DXFER_NONE;
-
- hdr->cmdp[0] = 0x35;
-
- ret = fio_sgio_doio(td, io_u, 1);
- put_io_u(td, io_u);
- return ret;
-}
-
-static int fio_sgio_prep(struct thread_data *td, struct io_u *io_u)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- struct sgio_data *sd = td->io_data;
- int nr_blocks, lba;
-
- if (io_u->buflen & (sd->bs - 1)) {
- log_err("read/write not sector aligned\n");
- return EINVAL;
- }
-
- sgio_hdr_init(sd, hdr, io_u, 1);
-
- if (io_u->ddir == DDIR_READ) {
- hdr->dxfer_direction = SG_DXFER_FROM_DEV;
- hdr->cmdp[0] = 0x28;
- } else {
- hdr->dxfer_direction = SG_DXFER_TO_DEV;
- hdr->cmdp[0] = 0x2a;
- }
-
- nr_blocks = io_u->buflen / sd->bs;
- lba = io_u->offset / sd->bs;
- hdr->cmdp[2] = (lba >> 24) & 0xff;
- hdr->cmdp[3] = (lba >> 16) & 0xff;
- hdr->cmdp[4] = (lba >> 8) & 0xff;
- hdr->cmdp[5] = lba & 0xff;
- hdr->cmdp[7] = (nr_blocks >> 8) & 0xff;
- hdr->cmdp[8] = nr_blocks & 0xff;
- return 0;
-}
-
-static int fio_sgio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct sg_io_hdr *hdr = &io_u->hdr;
- int ret;
-
- ret = fio_sgio_doio(td, io_u, 0);
-
- if (ret < 0)
- io_u->error = errno;
- else if (hdr->status) {
- io_u->resid = hdr->resid;
- io_u->error = EIO;
- }
-
- return io_u->error;
-}
-
-static struct io_u *fio_sgio_event(struct thread_data *td, int event)
-{
- struct sgio_data *sd = td->io_data;
-
- return sd->events[event];
-}
-
-static int fio_sgio_get_bs(struct thread_data *td, unsigned int *bs)
-{
- struct sgio_data *sd = td->io_data;
- struct io_u *io_u;
- struct sg_io_hdr *hdr;
- unsigned char buf[8];
- int ret;
-
- io_u = __get_io_u(td);
- assert(io_u);
-
- hdr = &io_u->hdr;
- sgio_hdr_init(sd, hdr, io_u, 0);
- memset(buf, 0, sizeof(buf));
-
- hdr->cmdp[0] = 0x25;
- hdr->dxfer_direction = SG_DXFER_FROM_DEV;
- hdr->dxferp = buf;
- hdr->dxfer_len = sizeof(buf);
-
- ret = fio_sgio_doio(td, io_u, 1);
- if (ret) {
- put_io_u(td, io_u);
- return ret;
- }
-
- *bs = (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
- put_io_u(td, io_u);
- return 0;
-}
-
-int fio_sgio_init(struct thread_data *td)
-{
- struct sgio_data *sd;
- unsigned int bs;
- int ret;
-
- sd = malloc(sizeof(*sd));
- sd->cmds = malloc(td->iodepth * sizeof(struct sgio_cmd));
- sd->events = malloc(td->iodepth * sizeof(struct io_u *));
- td->io_data = sd;
-
- if (td->filetype == FIO_TYPE_BD) {
- if (ioctl(td->fd, BLKSSZGET, &bs) < 0) {
- td_verror(td, errno);
- return 1;
- }
- } else if (td->filetype == FIO_TYPE_CHAR) {
- int version;
-
- if (ioctl(td->fd, SG_GET_VERSION_NUM, &version) < 0) {
- td_verror(td, errno);
- return 1;
- }
-
- ret = fio_sgio_get_bs(td, &bs);
- if (ret)
- return ret;
- } else {
- log_err("ioengine sgio only works on block devices\n");
- return 1;
- }
-
- sd->bs = bs;
-
- td->io_prep = fio_sgio_prep;
- td->io_queue = fio_sgio_queue;
-
- if (td->filetype == FIO_TYPE_BD)
- td->io_getevents = fio_syncio_getevents;
- else
- td->io_getevents = fio_sgio_getevents;
-
- td->io_event = fio_sgio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_syncio_cleanup;
- td->io_sync = fio_sgio_sync;
-
- /*
- * we want to do it, regardless of whether odirect is set or not
- */
- td->override_sync = 1;
- return 0;
-}
-
-#else /* FIO_HAVE_SGIO */
-
-int fio_sgio_init(struct thread_data *td)
-{
- return EINVAL;
-}
-
-#endif /* FIO_HAVE_SGIO */
-
-#ifdef FIO_HAVE_SPLICE
-struct spliceio_data {
- struct io_u *last_io_u;
- int pipe[2];
-};
-
-static struct io_u *fio_spliceio_event(struct thread_data *td, int event)
-{
- struct spliceio_data *sd = td->io_data;
-
- assert(event == 0);
-
- return sd->last_io_u;
-}
-
-/*
- * For splice reading, we unfortunately cannot (yet) vmsplice the other way.
- * So just splice the data from the file into the pipe, and use regular
- * read to fill the buffer. Doesn't make a lot of sense, but...
- */
-static int fio_splice_read(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_data;
- int ret, ret2, buflen;
- off_t offset;
- void *p;
-
- offset = io_u->offset;
- buflen = io_u->buflen;
- p = io_u->buf;
- while (buflen) {
- int this_len = buflen;
-
- if (this_len > SPLICE_DEF_SIZE)
- this_len = SPLICE_DEF_SIZE;
-
- ret = splice(td->fd, &offset, sd->pipe[1], NULL, this_len, SPLICE_F_MORE);
- if (ret < 0) {
- if (errno == ENODATA || errno == EAGAIN)
- continue;
-
- return errno;
- }
-
- buflen -= ret;
-
- while (ret) {
- ret2 = read(sd->pipe[0], p, ret);
- if (ret2 < 0)
- return errno;
-
- ret -= ret2;
- p += ret2;
- }
- }
-
- return io_u->buflen;
-}
-
-/*
- * For splice writing, we can vmsplice our data buffer directly into a
- * pipe and then splice that to a file.
- */
-static int fio_splice_write(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_data;
- struct iovec iov[1] = {
- {
- .iov_base = io_u->buf,
- .iov_len = io_u->buflen,
- }
- };
- struct pollfd pfd = { .fd = sd->pipe[1], .events = POLLOUT, };
- off_t off = io_u->offset;
- int ret, ret2;
-
- while (iov[0].iov_len) {
- if (poll(&pfd, 1, -1) < 0)
- return errno;
-
- ret = vmsplice(sd->pipe[1], iov, 1, SPLICE_F_NONBLOCK);
- if (ret < 0)
- return errno;
-
- iov[0].iov_len -= ret;
- iov[0].iov_base += ret;
-
- while (ret) {
- ret2 = splice(sd->pipe[0], NULL, td->fd, &off, ret, 0);
- if (ret2 < 0)
- return errno;
-
- ret -= ret2;
- }
- }
-
- return io_u->buflen;
-}
-
-static int fio_spliceio_queue(struct thread_data *td, struct io_u *io_u)
-{
- struct spliceio_data *sd = td->io_data;
- int ret;
-
- if (io_u->ddir == DDIR_READ)
- ret = fio_splice_read(td, io_u);
- else
- ret = fio_splice_write(td, io_u);
-
- if ((unsigned int) ret != io_u->buflen) {
- if (ret > 0) {
- io_u->resid = io_u->buflen - ret;
- io_u->error = ENODATA;
- } else
- io_u->error = errno;
- }
-
- if (!io_u->error)
- sd->last_io_u = io_u;
-
- return io_u->error;
-}
-
-static void fio_spliceio_cleanup(struct thread_data *td)
-{
- struct spliceio_data *sd = td->io_data;
-
- if (sd) {
- close(sd->pipe[0]);
- close(sd->pipe[1]);
- free(sd);
- td->io_data = NULL;
- }
-}
-
-int fio_spliceio_init(struct thread_data *td)
-{
- struct spliceio_data *sd = malloc(sizeof(*sd));
-
- td->io_queue = fio_spliceio_queue;
- td->io_getevents = fio_syncio_getevents;
- td->io_event = fio_spliceio_event;
- td->io_cancel = NULL;
- td->io_cleanup = fio_spliceio_cleanup;
- td->io_sync = fio_io_sync;
-
- sd->last_io_u = NULL;
- if (pipe(sd->pipe) < 0) {
- td_verror(td, errno);
- free(sd);
- return 1;
- }
-
- td->io_data = sd;
- return 0;
-}
-
-#else /* FIO_HAVE_SPLICE */
-
-int fio_spliceio_init(struct thread_data *td)
-{
- return EINVAL;
-}
-
-#endif /* FIO_HAVE_SPLICE */
-
-int fio_cpuio_init(struct thread_data *td)
-{
- if (!td->cpuload) {
- td_vmsg(td, EINVAL, "cpu thread needs rate");
- return 1;
- } else if (td->cpuload > 100)
- td->cpuload = 100;
-
- td->read_iolog = td->write_iolog = 0;
- td->fd = -1;
-
- return 0;
+ dlclose(td->io_ops->dlhandle);
}
diff --git a/os.h b/os.h
index 68660b6..b44d34c 100644
--- a/os.h
+++ b/os.h
@@ -47,13 +47,4 @@
#define OS_O_DIRECT O_DIRECT
#endif
-struct thread_data;
-extern int fio_libaio_init(struct thread_data *);
-extern int fio_posixaio_init(struct thread_data *);
-extern int fio_syncio_init(struct thread_data *);
-extern int fio_mmapio_init(struct thread_data *);
-extern int fio_sgio_init(struct thread_data *);
-extern int fio_spliceio_init(struct thread_data *);
-extern int fio_cpuio_init(struct thread_data *);
-
#endif