blob: 14ec1b7c4c77ad98e9dbb6b3713ab1e24d294c53 [file] [log] [blame]
Jens Axboe213d6f32019-01-17 21:40:30 -07001#include <sys/types.h>
2#include <sys/stat.h>
3#include <sys/mman.h>
4#include <unistd.h>
5#include <errno.h>
6#include <string.h>
Jens Axboe043ea222019-06-17 11:41:15 -06007#include <stdbool.h>
Jens Axboe213d6f32019-01-17 21:40:30 -07008
Stefan Hajnoczic31c7ec2019-07-24 09:24:50 +01009#include "liburing/compat.h"
10#include "liburing/io_uring.h"
Jens Axboe213d6f32019-01-17 21:40:30 -070011#include "liburing.h"
Stefan Hajnoczic31c7ec2019-07-24 09:24:50 +010012#include "liburing/barrier.h"
Jens Axboe213d6f32019-01-17 21:40:30 -070013
Jens Axboe39e0ebd2019-04-18 08:32:06 -060014static int __io_uring_get_cqe(struct io_uring *ring,
Jens Axboe76e92322019-09-20 22:15:38 -060015 struct io_uring_cqe **cqe_ptr, unsigned submit,
Jens Axboe2d5ee692019-09-21 14:59:30 -060016 unsigned wait_nr)
Jens Axboe213d6f32019-01-17 21:40:30 -070017{
Jens Axboe76e92322019-09-20 22:15:38 -060018 int ret, err = 0;
Jens Axboe213d6f32019-01-17 21:40:30 -070019 unsigned head;
Jens Axboe213d6f32019-01-17 21:40:30 -070020
Jens Axboe213d6f32019-01-17 21:40:30 -070021 do {
Bart Van Asschebbb30992019-07-01 14:42:31 -070022 io_uring_for_each_cqe(ring, head, *cqe_ptr)
Jens Axboe213d6f32019-01-17 21:40:30 -070023 break;
Jens Axboe76e92322019-09-20 22:15:38 -060024 if (*cqe_ptr) {
25 if ((*cqe_ptr)->user_data == LIBURING_UDATA_TIMEOUT) {
26 if ((*cqe_ptr)->res < 0)
27 err = (*cqe_ptr)->res;
28 io_uring_cq_advance(ring, 1);
29 if (!err)
30 continue;
31 *cqe_ptr = NULL;
32 }
Bart Van Asschebbb30992019-07-01 14:42:31 -070033 break;
Jens Axboe76e92322019-09-20 22:15:38 -060034 }
Jens Axboe2d5ee692019-09-21 14:59:30 -060035 if (!wait_nr)
Jens Axboe681ba882019-08-28 17:59:40 -060036 return -EAGAIN;
Jens Axboe2d5ee692019-09-21 14:59:30 -060037 ret = io_uring_enter(ring->ring_fd, submit, wait_nr,
Jens Axboe76e92322019-09-20 22:15:38 -060038 IORING_ENTER_GETEVENTS, NULL);
Jens Axboe213d6f32019-01-17 21:40:30 -070039 if (ret < 0)
40 return -errno;
41 } while (1);
42
Jens Axboe76e92322019-09-20 22:15:38 -060043 return err;
Jens Axboe213d6f32019-01-17 21:40:30 -070044}
45
46/*
Jens Axboe39e0ebd2019-04-18 08:32:06 -060047 * Return an IO completion, if one is readily available. Returns 0 with
48 * cqe_ptr filled in on success, -errno on failure.
Jens Axboe213d6f32019-01-17 21:40:30 -070049 */
Jens Axboe39e0ebd2019-04-18 08:32:06 -060050int io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
Jens Axboe213d6f32019-01-17 21:40:30 -070051{
Jens Axboe76e92322019-09-20 22:15:38 -060052 return __io_uring_get_cqe(ring, cqe_ptr, 0, 0);
Jens Axboe213d6f32019-01-17 21:40:30 -070053}
54
55/*
James Rouzier0b88d722019-09-25 15:35:06 -040056 * Fill in an array of IO completions up to count, if any are available.
57 * Returns the amount of IO completions filled.
58 */
Jens Axboe6d338022019-09-26 00:41:24 -060059unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
60 struct io_uring_cqe **cqes, unsigned count)
James Rouzier0b88d722019-09-25 15:35:06 -040061{
Jens Axboe6d338022019-09-26 00:41:24 -060062 unsigned ready;
63
64 ready = io_uring_cq_ready(ring);
James Rouzier0b88d722019-09-25 15:35:06 -040065 if (ready) {
James Rouzier0b88d722019-09-25 15:35:06 -040066 unsigned head = *ring->cq.khead;
James Rouzier0b88d722019-09-25 15:35:06 -040067 unsigned mask = *ring->cq.kring_mask;
Jens Axboe6d338022019-09-26 00:41:24 -060068 unsigned last;
James Rouzier0b88d722019-09-25 15:35:06 -040069 int i = 0;
Jens Axboe6d338022019-09-26 00:41:24 -060070
71 count = count > ready ? ready : count;
72 last = head + count;
73 for (;head != last; head++, i++)
James Rouzier0b88d722019-09-25 15:35:06 -040074 cqes[i] = &ring->cq.cqes[head & mask];
James Rouzier0b88d722019-09-25 15:35:06 -040075
76 return count;
77 }
78
79 return 0;
80}
81
82/*
Jens Axboe39e0ebd2019-04-18 08:32:06 -060083 * Return an IO completion, waiting for it if necessary. Returns 0 with
84 * cqe_ptr filled in on success, -errno on failure.
Jens Axboe213d6f32019-01-17 21:40:30 -070085 */
Jens Axboe39e0ebd2019-04-18 08:32:06 -060086int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
Jens Axboe213d6f32019-01-17 21:40:30 -070087{
Jens Axboe76e92322019-09-20 22:15:38 -060088 return __io_uring_get_cqe(ring, cqe_ptr, 0, 1);
89}
90
91/*
92 * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note
93 * that an sqe is used internally to handle the timeout. Applications using
94 * this function must never set sqe->user_data to LIBURING_UDATA_TIMEOUT!
Jens Axboe8b93cca2019-09-21 14:44:57 -060095 *
96 * Note that the application need not call io_uring_submit() before calling
97 * this function, as we will do that on its behalf.
Jens Axboe76e92322019-09-20 22:15:38 -060098 */
Jens Axboe11e18b32019-09-21 15:04:52 -060099int io_uring_wait_cqes_timeout(struct io_uring *ring,
100 struct io_uring_cqe **cqe_ptr,
101 unsigned wait_nr,
102 struct timespec *ts)
Jens Axboe76e92322019-09-20 22:15:38 -0600103{
Jens Axboe76e92322019-09-20 22:15:38 -0600104 int ret;
105
Jens Axboe11e18b32019-09-21 15:04:52 -0600106 if (wait_nr) {
107 struct io_uring_sqe *sqe;
108
109 /*
110 * If the SQ ring is full, we may need to submit IO first
111 */
Jens Axboe76e92322019-09-20 22:15:38 -0600112 sqe = io_uring_get_sqe(ring);
Jens Axboe11e18b32019-09-21 15:04:52 -0600113 if (!sqe) {
114 ret = io_uring_submit(ring);
115 if (ret < 0)
116 return ret;
117 sqe = io_uring_get_sqe(ring);
118 }
119 io_uring_prep_timeout(sqe, ts, wait_nr);
120 sqe->user_data = LIBURING_UDATA_TIMEOUT;
Jens Axboe76e92322019-09-20 22:15:38 -0600121 }
Jens Axboe11e18b32019-09-21 15:04:52 -0600122
Jens Axboe76e92322019-09-20 22:15:38 -0600123 ret = io_uring_submit(ring);
Jens Axboe5652c3b2019-09-21 14:26:42 -0600124 if (ret < 0)
Jens Axboe76e92322019-09-20 22:15:38 -0600125 return ret;
126
127 return __io_uring_get_cqe(ring, cqe_ptr, 1, 1);
Jens Axboe213d6f32019-01-17 21:40:30 -0700128}
129
130/*
Jens Axboe11e18b32019-09-21 15:04:52 -0600131 * See io_uring_wait_cqes_timeout() - this function is the same, it just
132 * always uses '1' as the wait_nr.
133 */
134int io_uring_wait_cqe_timeout(struct io_uring *ring,
135 struct io_uring_cqe **cqe_ptr,
136 struct timespec *ts)
137{
138 return io_uring_wait_cqes_timeout(ring, cqe_ptr, 1, ts);
139}
140
141/*
Roman Penyaevdf23d2d2019-05-27 21:05:09 +0200142 * Returns true if we're not using SQ thread (thus nobody submits but us)
Jens Axboe043ea222019-06-17 11:41:15 -0600143 * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly
144 * awakened. For the latter case, we set the thread wakeup flag.
Roman Penyaevdf23d2d2019-05-27 21:05:09 +0200145 */
Jens Axboe043ea222019-06-17 11:41:15 -0600146static inline bool sq_ring_needs_enter(struct io_uring *ring, unsigned *flags)
Roman Penyaevdf23d2d2019-05-27 21:05:09 +0200147{
Jens Axboe043ea222019-06-17 11:41:15 -0600148 if (!(ring->flags & IORING_SETUP_SQPOLL))
149 return true;
150 if ((*ring->sq.kflags & IORING_SQ_NEED_WAKEUP)) {
151 *flags |= IORING_ENTER_SQ_WAKEUP;
152 return true;
153 }
154
155 return false;
Roman Penyaevdf23d2d2019-05-27 21:05:09 +0200156}
157
158/*
Jens Axboe213d6f32019-01-17 21:40:30 -0700159 * Submit sqes acquired from io_uring_get_sqe() to the kernel.
160 *
161 * Returns number of sqes submitted
162 */
Jens Axboe91dde5c2019-06-06 10:46:13 -0600163static int __io_uring_submit(struct io_uring *ring, unsigned wait_nr)
Jens Axboe213d6f32019-01-17 21:40:30 -0700164{
165 struct io_uring_sq *sq = &ring->sq;
166 const unsigned mask = *sq->kring_mask;
Bart Van Asscheecefd792019-07-01 14:42:32 -0700167 unsigned ktail, submitted, to_submit;
Jens Axboe043ea222019-06-17 11:41:15 -0600168 unsigned flags;
Jens Axboe82600292019-03-05 20:12:48 -0700169 int ret;
Jens Axboe213d6f32019-01-17 21:40:30 -0700170
Jens Axboe213d6f32019-01-17 21:40:30 -0700171 if (sq->sqe_head == sq->sqe_tail)
172 return 0;
173
174 /*
175 * Fill in sqes that we have queued up, adding them to the kernel ring
176 */
177 submitted = 0;
Bart Van Asscheecefd792019-07-01 14:42:32 -0700178 ktail = *sq->ktail;
Jens Axboe829f6a92019-05-21 10:45:16 -0600179 to_submit = sq->sqe_tail - sq->sqe_head;
180 while (to_submit--) {
Jens Axboe213d6f32019-01-17 21:40:30 -0700181 sq->array[ktail & mask] = sq->sqe_head & mask;
Bart Van Asscheecefd792019-07-01 14:42:32 -0700182 ktail++;
Jens Axboe213d6f32019-01-17 21:40:30 -0700183 sq->sqe_head++;
184 submitted++;
185 }
186
187 if (!submitted)
188 return 0;
189
Bart Van Asscheecefd792019-07-01 14:42:32 -0700190 /*
191 * Ensure that the kernel sees the SQE updates before it sees the tail
192 * update.
193 */
Julia Suvorova552c6a02019-08-19 08:45:28 -0600194 io_uring_smp_store_release(sq->ktail, ktail);
Jens Axboe213d6f32019-01-17 21:40:30 -0700195
Jens Axboe043ea222019-06-17 11:41:15 -0600196 flags = 0;
197 if (wait_nr || sq_ring_needs_enter(ring, &flags)) {
Jens Axboe91dde5c2019-06-06 10:46:13 -0600198 if (wait_nr) {
199 if (wait_nr > submitted)
200 wait_nr = submitted;
201 flags |= IORING_ENTER_GETEVENTS;
202 }
Roman Penyaevdf23d2d2019-05-27 21:05:09 +0200203
Jens Axboe91dde5c2019-06-06 10:46:13 -0600204 ret = io_uring_enter(ring->ring_fd, submitted, wait_nr, flags,
205 NULL);
Roman Penyaevdf23d2d2019-05-27 21:05:09 +0200206 if (ret < 0)
207 return -errno;
208 } else
209 ret = submitted;
Jens Axboe82600292019-03-05 20:12:48 -0700210
Jens Axboea8652212019-03-13 08:48:45 -0600211 return ret;
Jens Axboe213d6f32019-01-17 21:40:30 -0700212}
213
214/*
Jens Axboe91dde5c2019-06-06 10:46:13 -0600215 * Submit sqes acquired from io_uring_get_sqe() to the kernel.
216 *
217 * Returns number of sqes submitted
218 */
219int io_uring_submit(struct io_uring *ring)
220{
221 return __io_uring_submit(ring, 0);
222}
223
224/*
225 * Like io_uring_submit(), but allows waiting for events as well.
226 *
227 * Returns number of sqes submitted
228 */
229int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
230{
231 return __io_uring_submit(ring, wait_nr);
232}
233
234/*
Jens Axboe213d6f32019-01-17 21:40:30 -0700235 * Return an sqe to fill. Application must later call io_uring_submit()
236 * when it's ready to tell the kernel about it. The caller may call this
237 * function multiple times before calling io_uring_submit().
238 *
239 * Returns a vacant sqe, or NULL if we're full.
240 */
241struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
242{
243 struct io_uring_sq *sq = &ring->sq;
244 unsigned next = sq->sqe_tail + 1;
245 struct io_uring_sqe *sqe;
246
247 /*
248 * All sqes are used
249 */
250 if (next - sq->sqe_head > *sq->kring_entries)
251 return NULL;
252
253 sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask];
254 sq->sqe_tail = next;
255 return sqe;
256}