Adding full example to io_uring(7) man page while also fixing some text to comply to the "semantic new lines" recommendation from man-pages(7).

commit: 26afb36efb3c4fdb3f14680c90ce66ce9e9abc3b [log] [tgz]
author: Shuveb Hussain <shuveb@gmail.com> Mon Sep 28 18:41:35 2020 +0530
committer: Shuveb Hussain <shuveb@gmail.com> Mon Sep 28 18:41:35 2020 +0530
tree: 0a60efbdf26f68802313d01ab2fd0ff53f19cec9
parent: 6c2dc15e18ca9b14a00c615565343b62c35c13aa [diff] [blame]
diff --git a/man/io_uring.7 b/man/io_uring.7
index 4ccbd86..d371afb 100644
--- a/man/io_uring.7
+++ b/man/io_uring.7

@@ -107,14 +107,17 @@
 .BR io_uring_enter (2).
 .IP \(bu
 It is important to remember that I/O requests submitted to the kernel can
-complete in any order. It is not necessary for the kernel to process one
-request after another,
-in the order you placed them. Given that the interface is a ring, the requests
-are attempted in order, however that doesn't imply any sort of ordering on the
-completion of them. When more than one request is in flight, it is not possible
-to determine which one will complete first. When you dequeue CQEs off the CQ,
-you should always check which submitted request it corresponds to. The most
-common method for doing so is utilizing the
+complete in any order.
+It is not necessary for the kernel to process one request after another,
+in the order you placed them.
+Given that the interface is a ring,
+the requests are attempted in order,
+however that doesn't imply any sort of ordering on their completion.
+When more than one request is in flight,
+it is not possible to determine which one will complete first.
+When you dequeue CQEs off the CQ,
+you should always check which submitted request it corresponds to.
+The most common method for doing so is utilizing the
 .I user_data
 field in the request, which is passed back on the completion side.
 .PP
@@ -146,7 +149,15 @@
 dispatches them for asynchronous processing.
 .SS Setting up io_uring
 .PP
-The following example function sets up 
+The main steps in setting up
+.B io_uring
+consist of mapping in the shared buffers with
+.BR mmap (2)
+calls.
+In the example program included in this man page, 
+the function
+.BR app_setup_uring ()
+sets up 
 .B io_uring
 with a QUEUE_DEPTH deep submission queue.
 Pay attention to the 2 
@@ -157,87 +168,6 @@
 .BR mmap(2) 
 calls are required.
 .PP
-.EX
-int app_setup_uring(void) {
-    struct io_uring_params p;
-    void *sq_ptr, *cq_ptr;
-
-    /* See io_uring_setup(2) for io_uring_params.flags you can set */
-    memset(&p, 0, sizeof(p));
-    ring_fd = io_uring_setup(QUEUE_DEPTH, &p);
-    if (ring_fd < 0) {
-        perror("io_uring_setup");
-        return 1;
-    }
-
-    /*
-     * io_uring communication happens via 2 shared kernel-user space ring
-     * buffers, which can be jointly mapped with a single mmap() call in
-     * kernels >= 5.4.
-     */
-
-    int sring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned);
-    int cring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
-
-    /* Rather than check for kernel version, the recommended way is to
-     * check the features field of the io_uring_params structure, which is a 
-     * bitmask. If IORING_FEAT_SINGLE_MMAP is set, we can do away with the
-     * second mmap() call to map in the completion ring separately.
-     */
-    if (p.features & IORING_FEAT_SINGLE_MMAP) {
-        if (cring_sz > sring_sz)
-            sring_sz = cring_sz;
-        cring_sz = sring_sz;
-    }
-
-    /* Map in the submission and completion queue ring buffers.
-     *  Kernels < 5.4 only map in the submission queue, though.
-     */
-    sq_ptr = mmap(0, sring_sz, PROT_READ | PROT_WRITE,
-                  MAP_SHARED | MAP_POPULATE,
-                  ring_fd, IORING_OFF_SQ_RING);
-    if (sq_ptr == MAP_FAILED) {
-        perror("mmap");
-        return 1;
-    }
-
-    if (p.features & IORING_FEAT_SINGLE_MMAP) {
-        cq_ptr = sq_ptr;
-    } else {
-        /* Map in the completion queue ring buffer in older kernels separately */
-        cq_ptr = mmap(0, cring_sz, PROT_READ | PROT_WRITE,
-                      MAP_SHARED | MAP_POPULATE,
-                      ring_fd, IORING_OFF_CQ_RING);
-        if (cq_ptr == MAP_FAILED) {
-            perror("mmap");
-            return 1;
-        }
-    }
-    /* Save useful fields for later easy reference */
-    sring_tail = sq_ptr + p.sq_off.tail;
-    sring_mask = sq_ptr + p.sq_off.ring_mask;
-    sring_array = sq_ptr + p.sq_off.array;
-
-    /* Map in the submission queue entries array */
-    sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
-                   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
-                   ring_fd, IORING_OFF_SQES);
-    if (sqes == MAP_FAILED) {
-        perror("mmap");
-        return 1;
-    }
-
-    /* Save useful fields for later easy reference */
-    cring_head = cq_ptr + p.cq_off.head;
-    cring_tail = cq_ptr + p.cq_off.tail;
-    cring_mask = cq_ptr + p.cq_off.ring_mask;
-    cqes = cq_ptr + p.cq_off.cqes;
-
-    return 0;
-}
-.EE
-.in
-
 .SS Submitting I/O requests
 The process of submitting a request consists of describing the I/O
 operation you need to get done using an 
@@ -540,6 +470,266 @@
 .SH CONFORMING TO
 .B io_uring
 is Linux-specific.
+.SH EXAMPLES
+The following example uses
+.B io_uring
+to copy stdin to stdout.
+Using shell redirection,
+you should be able to copy files with this example.
+Because it uses a queue depth of only one,
+this example processes I/O requests one after the other.
+It is purposefully kept this way to aid understanding.
+In real-world scenarios however,
+you'll want to have a larger queue depth to parallelize I/O request
+processing so as to gain the kind of performance benefits
+.B io_uring
+provides with its asynchronous processing of requests.
+.PP
+.EX
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <linux/fs.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdatomic.h>
+
+#include <linux/io_uring.h>
+
+#define QUEUE_DEPTH 1
+#define BLOCK_SZ    1024
+
+/* Macros for barriers needed by io_uring */
+#define io_uring_smp_store_release(p, v)            \\
+    atomic_store_explicit((_Atomic typeof(*(p)) *)(p), (v), \\
+                  memory_order_release)
+#define io_uring_smp_load_acquire(p)                \\
+    atomic_load_explicit((_Atomic typeof(*(p)) *)(p),   \\
+                 memory_order_acquire)
+
+int ring_fd;
+unsigned *sring_tail, *sring_mask, *sring_array, 
+            *cring_head, *cring_tail, *cring_mask;
+struct io_uring_sqe *sqes;
+struct io_uring_cqe *cqes;
+char buff[BLOCK_SZ];
+off_t offset;
+
+/*
+ * System call wrappers provided since glibc does not yet
+ * provide wrappers for io_uring system calls.
+* */
+
+int io_uring_setup(unsigned entries, struct io_uring_params *p)
+{
+    return (int) syscall(__NR_io_uring_setup, entries, p);
+}
+
+int io_uring_enter(int ring_fd, unsigned int to_submit,
+                   unsigned int min_complete, unsigned int flags)
+{
+    return (int) syscall(__NR_io_uring_enter, ring_fd, to_submit, min_complete,
+                         flags, NULL, 0);
+}
+
+int app_setup_uring(void) {
+    struct io_uring_params p;
+    void *sq_ptr, *cq_ptr;
+
+    /* See io_uring_setup(2) for io_uring_params.flags you can set */
+    memset(&p, 0, sizeof(p));
+    ring_fd = io_uring_setup(QUEUE_DEPTH, &p);
+    if (ring_fd < 0) {
+        perror("io_uring_setup");
+        return 1;
+    }
+
+    /*
+     * io_uring communication happens via 2 shared kernel-user space ring
+     * buffers, which can be jointly mapped with a single mmap() call in
+     * kernels >= 5.4.
+     */
+
+    int sring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned);
+    int cring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
+
+    /* Rather than check for kernel version, the recommended way is to
+     * check the features field of the io_uring_params structure, which is a 
+     * bitmask. If IORING_FEAT_SINGLE_MMAP is set, we can do away with the
+     * second mmap() call to map in the completion ring separately.
+     */
+    if (p.features & IORING_FEAT_SINGLE_MMAP) {
+        if (cring_sz > sring_sz)
+            sring_sz = cring_sz;
+        cring_sz = sring_sz;
+    }
+
+    /* Map in the submission and completion queue ring buffers.
+     *  Kernels < 5.4 only map in the submission queue, though.
+     */
+    sq_ptr = mmap(0, sring_sz, PROT_READ | PROT_WRITE,
+                  MAP_SHARED | MAP_POPULATE,
+                  ring_fd, IORING_OFF_SQ_RING);
+    if (sq_ptr == MAP_FAILED) {
+        perror("mmap");
+        return 1;
+    }
+
+    if (p.features & IORING_FEAT_SINGLE_MMAP) {
+        cq_ptr = sq_ptr;
+    } else {
+        /* Map in the completion queue ring buffer in older kernels separately */
+        cq_ptr = mmap(0, cring_sz, PROT_READ | PROT_WRITE,
+                      MAP_SHARED | MAP_POPULATE,
+                      ring_fd, IORING_OFF_CQ_RING);
+        if (cq_ptr == MAP_FAILED) {
+            perror("mmap");
+            return 1;
+        }
+    }
+    /* Save useful fields for later easy reference */
+    sring_tail = sq_ptr + p.sq_off.tail;
+    sring_mask = sq_ptr + p.sq_off.ring_mask;
+    sring_array = sq_ptr + p.sq_off.array;
+
+    /* Map in the submission queue entries array */
+    sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
+                   PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+                   ring_fd, IORING_OFF_SQES);
+    if (sqes == MAP_FAILED) {
+        perror("mmap");
+        return 1;
+    }
+
+    /* Save useful fields for later easy reference */
+    cring_head = cq_ptr + p.cq_off.head;
+    cring_tail = cq_ptr + p.cq_off.tail;
+    cring_mask = cq_ptr + p.cq_off.ring_mask;
+    cqes = cq_ptr + p.cq_off.cqes;
+
+    return 0;
+}
+
+/*
+* Read from completion queue.
+* In this function, we read completion events from the completion queue.
+* We dequeue the CQE, update and head and return the result of the operation.
+* */
+
+int read_from_cq() {
+    struct io_uring_cqe *cqe;
+    unsigned head, reaped = 0;
+
+    /* Read barrier */
+    head = io_uring_smp_load_acquire(cring_head);
+    /*
+    * Remember, this is a ring buffer. If head == tail, it means that the
+    * buffer is empty.
+    * */
+    if (head == *cring_tail)
+        return -1;
+
+    /* Get the entry */
+    cqe = &cqes[head & (*cring_mask)];
+    if (cqe->res < 0)
+        fprintf(stderr, "Error: %s\n", strerror(abs(cqe->res)));
+
+    head++;
+
+    /* Write barrier so that update to the head are made visible */
+    io_uring_smp_store_release(cring_head, head);
+
+    return cqe->res;
+}
+
+/*
+* Submit a read or a write request to the submission queue.
+* */
+
+int submit_to_sq(int fd, int op) {
+    unsigned index, tail;
+
+    /* Add our submission queue entry to the tail of the SQE ring buffer */
+    tail = *sring_tail;
+    index = tail & *sring_mask;
+    struct io_uring_sqe *sqe = &sqes[index];
+    /* Fill in the parameters required for the read or write operation */
+    sqe->opcode = op;
+    sqe->fd = fd;
+    sqe->addr = (unsigned long) buff;
+    if (op == IORING_OP_READ) {
+        memset(buff, 0, sizeof(buff));
+        sqe->len = BLOCK_SZ;
+    }
+    else {
+        sqe->len = strlen(buff);
+    }
+    sqe->off = offset;
+
+    sring_array[index] = index;
+    tail++;
+
+    /* Update the tail */
+    io_uring_smp_store_release(sring_tail, tail);
+
+    /*
+    * Tell the kernel we have submitted events with the io_uring_enter() system
+    * call. We also pass in the IOURING_ENTER_GETEVENTS flag which causes the
+    * io_uring_enter() call to wait until min_complete (the 3rd param) events
+    * complete.
+    * */
+    int ret =  io_uring_enter(ring_fd, 1,1,
+                              IORING_ENTER_GETEVENTS);
+    if(ret < 0) {
+        perror("io_uring_enter");
+        return -1;
+    }
+
+    return ret;
+}
+
+int main(int argc, char *argv[]) {
+    int res;
+
+    /* Setup io_uring for use */
+    if(app_setup_uring()) {
+        fprintf(stderr, "Unable to setup uring!\n");
+        return 1;
+    }
+
+    /* 
+    * A while loop that reads from stdin and writes to stdout.
+    * Breaks on EOF.
+    */
+    while (1) {
+        /* Initiate read from stdin and wait for it to complete */
+        submit_to_sq(STDIN_FILENO, IORING_OP_READ);
+        /* Read completion queue entry */
+        res = read_from_cq();
+        if (res > 0) {
+            /* Read successful. Write to stdout. */
+            submit_to_sq(STDOUT_FILENO, IORING_OP_WRITE);
+            read_from_cq();
+        } else if (res == 0) {
+            /* reached EOF */
+            break;
+        }
+        else if (res < 0) {
+            /* Error reading file */
+            fprintf(stderr, "Error: %s\n", strerror(abs(res)));
+            break;
+        }
+        offset += res;
+    }
+
+    return 0;
+}
+.EE
 .SH SEE ALSO
 .BR io_uring_enter (2)
 .BR io_uring_register (2)
commit	26afb36efb3c4fdb3f14680c90ce66ce9e9abc3b	[log] [tgz]
author	Shuveb Hussain <shuveb@gmail.com>	Mon Sep 28 18:41:35 2020 +0530
committer	Shuveb Hussain <shuveb@gmail.com>	Mon Sep 28 18:41:35 2020 +0530
tree	0a60efbdf26f68802313d01ab2fd0ff53f19cec9
parent	6c2dc15e18ca9b14a00c615565343b62c35c13aa [diff] [blame]