Adding userspace_libaio_reap option
When a single thread is reading from a libaio io_context_t object
in a non-blocking polling manner (that is, with the minimum number
of events to return being 0), then it is possible to safely read
events directly from user-space, taking advantage of the fact that
the io_context_t object is a pointer to memory with a certain layout.
This patch adds an option, userspace_libaio_reap, which allows
reading events in this manner when the libaio engine is used.
You can observe its effect by setting iodepth_batch_complete=0
and seeing the change in distribution of system/user time based on
whether this new flag is set. If userspace_libaio_reap=1, then
busy polling takes place in userspace, and there is a larger amount of
usr CPU. If userspace_libaio_reap=0 (the default), then there is a
larger amount of sys CPU from the polling in the kernel.
Polling from a queue in this manner is several times faster. In my
testing, it took less than an eighth as much time to execute a
polling operation in user-space than with the io_getevents syscall.
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
diff --git a/engines/libaio.c b/engines/libaio.c
index c837ab6..ea05c63 100644
--- a/engines/libaio.c
+++ b/engines/libaio.c
@@ -58,6 +58,47 @@
return io_u;
}
+struct aio_ring {
+ unsigned id; /** kernel internal index number */
+ unsigned nr; /** number of io_events */
+ unsigned head;
+ unsigned tail;
+
+ unsigned magic;
+ unsigned compat_features;
+ unsigned incompat_features;
+ unsigned header_length; /** size of aio_ring */
+
+ struct io_event events[0];
+};
+
+#define AIO_RING_MAGIC 0xa10a10a1
+
+static int user_io_getevents(io_context_t aio_ctx, unsigned int max,
+ struct io_event *events)
+{
+ long i = 0;
+ unsigned head;
+ struct aio_ring *ring = (struct aio_ring*)aio_ctx;
+
+ while (i < max) {
+ head = ring->head;
+
+ if (head == ring->tail) {
+ /* There are no more completions */
+ break;
+ } else {
+ /* There is another completion to reap */
+ events[i] = ring->events[head];
+ read_barrier();
+ ring->head = (head + 1) % ring->nr;
+ i++;
+ }
+ }
+
+ return i;
+}
+
static int fio_libaio_getevents(struct thread_data *td, unsigned int min,
unsigned int max, struct timespec *t)
{
@@ -66,7 +107,16 @@
int r, events = 0;
do {
- r = io_getevents(ld->aio_ctx, actual_min, max, ld->aio_events + events, t);
+ if (td->o.userspace_libaio_reap == 1
+ && actual_min == 0
+ && ((struct aio_ring *)(ld->aio_ctx))->magic
+ == AIO_RING_MAGIC) {
+ r = user_io_getevents(ld->aio_ctx, max,
+ ld->aio_events + events);
+ } else {
+ r = io_getevents(ld->aio_ctx, actual_min,
+ max, ld->aio_events + events, t);
+ }
if (r >= 0)
events += r;
else if (r == -EAGAIN)