blob: 6a9659470dc7d3a9978dbb8a2b040ed881150eed [file] [log] [blame]
Jens Axboe213d6f32019-01-17 21:40:30 -07001#include <sys/types.h>
2#include <sys/stat.h>
3#include <sys/mman.h>
4#include <unistd.h>
5#include <errno.h>
6#include <string.h>
Jens Axboe043ea222019-06-17 11:41:15 -06007#include <stdbool.h>
Jens Axboe213d6f32019-01-17 21:40:30 -07008
Stefan Hajnoczic31c7ec2019-07-24 09:24:50 +01009#include "liburing/compat.h"
10#include "liburing/io_uring.h"
Jens Axboe213d6f32019-01-17 21:40:30 -070011#include "liburing.h"
Stefan Hajnoczic31c7ec2019-07-24 09:24:50 +010012#include "liburing/barrier.h"
Jens Axboe213d6f32019-01-17 21:40:30 -070013
Jens Axboe98455102019-11-27 17:21:38 -070014/*
15 * Returns true if we're not using SQ thread (thus nobody submits but us)
16 * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly
17 * awakened. For the latter case, we set the thread wakeup flag.
18 */
19static inline bool sq_ring_needs_enter(struct io_uring *ring, unsigned *flags)
20{
21 if (!(ring->flags & IORING_SETUP_SQPOLL))
22 return true;
23 if (IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_NEED_WAKEUP) {
24 *flags |= IORING_ENTER_SQ_WAKEUP;
25 return true;
26 }
27
28 return false;
29}
30
Jens Axboe20c92932019-09-28 05:35:02 -060031int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
32 unsigned submit, unsigned wait_nr, sigset_t *sigmask)
Jens Axboe213d6f32019-01-17 21:40:30 -070033{
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070034 int ret = 0, err;
Jens Axboe213d6f32019-01-17 21:40:30 -070035
Jens Axboe213d6f32019-01-17 21:40:30 -070036 do {
Jens Axboe98455102019-11-27 17:21:38 -070037 unsigned flags;
38
Jens Axboef6c80442019-10-01 08:58:17 -060039 err = __io_uring_peek_cqe(ring, cqe_ptr);
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070040 if (err)
Jens Axboe213d6f32019-01-17 21:40:30 -070041 break;
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070042 if (!*cqe_ptr && !wait_nr && !submit) {
Jens Axboe20c92932019-09-28 05:35:02 -060043 err = -EAGAIN;
Bart Van Asschebbb30992019-07-01 14:42:31 -070044 break;
Jens Axboe76e92322019-09-20 22:15:38 -060045 }
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070046 if (wait_nr)
47 flags = IORING_ENTER_GETEVENTS;
Jens Axboe98455102019-11-27 17:21:38 -070048 if (submit)
49 sq_ring_needs_enter(ring, &flags);
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070050 if (wait_nr || submit)
51 ret = io_uring_enter(ring->ring_fd, submit, wait_nr,
52 flags, sigmask);
Jens Axboe213d6f32019-01-17 21:40:30 -070053 if (ret < 0)
Jens Axboe20c92932019-09-28 05:35:02 -060054 err = -errno;
Jens Axboef6c80442019-10-01 08:58:17 -060055 submit -= ret;
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070056 if (*cqe_ptr)
57 break;
Jens Axboe20c92932019-09-28 05:35:02 -060058 } while (!err);
Jens Axboe213d6f32019-01-17 21:40:30 -070059
Jens Axboe76e92322019-09-20 22:15:38 -060060 return err;
Jens Axboe213d6f32019-01-17 21:40:30 -070061}
62
63/*
James Rouzier0b88d722019-09-25 15:35:06 -040064 * Fill in an array of IO completions up to count, if any are available.
65 * Returns the amount of IO completions filled.
66 */
Jens Axboe6d338022019-09-26 00:41:24 -060067unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
68 struct io_uring_cqe **cqes, unsigned count)
James Rouzier0b88d722019-09-25 15:35:06 -040069{
Jens Axboe6d338022019-09-26 00:41:24 -060070 unsigned ready;
71
72 ready = io_uring_cq_ready(ring);
James Rouzier0b88d722019-09-25 15:35:06 -040073 if (ready) {
James Rouzier0b88d722019-09-25 15:35:06 -040074 unsigned head = *ring->cq.khead;
James Rouzier0b88d722019-09-25 15:35:06 -040075 unsigned mask = *ring->cq.kring_mask;
Jens Axboe6d338022019-09-26 00:41:24 -060076 unsigned last;
James Rouzier0b88d722019-09-25 15:35:06 -040077 int i = 0;
Jens Axboe6d338022019-09-26 00:41:24 -060078
79 count = count > ready ? ready : count;
80 last = head + count;
81 for (;head != last; head++, i++)
James Rouzier0b88d722019-09-25 15:35:06 -040082 cqes[i] = &ring->cq.cqes[head & mask];
James Rouzier0b88d722019-09-25 15:35:06 -040083
84 return count;
85 }
86
87 return 0;
88}
89
90/*
Jens Axboe8578f0d2019-09-27 04:13:42 -060091 * Sync internal state with kernel ring state on the SQ side
92 */
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070093static void __io_uring_flush_sq(struct io_uring *ring)
Jens Axboe8578f0d2019-09-27 04:13:42 -060094{
95 struct io_uring_sq *sq = &ring->sq;
96 const unsigned mask = *sq->kring_mask;
97 unsigned ktail, submitted, to_submit;
98
99 if (sq->sqe_head == sq->sqe_tail)
Jens Axboe7ad0e4b2019-12-01 09:11:31 -0700100 return;
Jens Axboe8578f0d2019-09-27 04:13:42 -0600101
102 /*
103 * Fill in sqes that we have queued up, adding them to the kernel ring
104 */
105 submitted = 0;
106 ktail = *sq->ktail;
107 to_submit = sq->sqe_tail - sq->sqe_head;
108 while (to_submit--) {
109 sq->array[ktail & mask] = sq->sqe_head & mask;
110 ktail++;
111 sq->sqe_head++;
112 submitted++;
113 }
114
115 /*
116 * Ensure that the kernel sees the SQE updates before it sees the tail
117 * update.
118 */
Kornilios Kourtisf3897452019-10-30 13:25:13 +0100119 io_uring_smp_store_release(sq->ktail, ktail);
Jens Axboe8578f0d2019-09-27 04:13:42 -0600120}
121
122/*
Jens Axboe76e92322019-09-20 22:15:38 -0600123 * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note
124 * that an sqe is used internally to handle the timeout. Applications using
125 * this function must never set sqe->user_data to LIBURING_UDATA_TIMEOUT!
Jens Axboe8b93cca2019-09-21 14:44:57 -0600126 *
127 * Note that the application need not call io_uring_submit() before calling
Jens Axboe217756d2019-11-22 21:43:24 -0700128 * this function, as we will do that on its behalf. From this it also follows
129 * that this function isn't safe to use for applications that split SQ and CQ
130 * handling between two threads and expect that to work without synchronization,
131 * as this function manipulates both the SQ and CQ side.
Jens Axboe76e92322019-09-20 22:15:38 -0600132 */
Jens Axboeac726402019-09-27 07:26:45 -0600133int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
Jens Axboee2934e12019-10-01 10:05:16 -0600134 unsigned wait_nr, struct __kernel_timespec *ts,
135 sigset_t *sigmask)
Jens Axboe76e92322019-09-20 22:15:38 -0600136{
Jens Axboe217756d2019-11-22 21:43:24 -0700137 unsigned to_submit = 0;
Jens Axboe7ad0e4b2019-12-01 09:11:31 -0700138 int ret;
Jens Axboe76e92322019-09-20 22:15:38 -0600139
Jens Axboe7ad0e4b2019-12-01 09:11:31 -0700140 if (ts) {
Jens Axboe11e18b32019-09-21 15:04:52 -0600141 struct io_uring_sqe *sqe;
Jens Axboe217756d2019-11-22 21:43:24 -0700142 int ret;
Jens Axboe11e18b32019-09-21 15:04:52 -0600143
144 /*
145 * If the SQ ring is full, we may need to submit IO first
146 */
Jens Axboe76e92322019-09-20 22:15:38 -0600147 sqe = io_uring_get_sqe(ring);
Jens Axboe11e18b32019-09-21 15:04:52 -0600148 if (!sqe) {
149 ret = io_uring_submit(ring);
150 if (ret < 0)
151 return ret;
152 sqe = io_uring_get_sqe(ring);
153 }
Jens Axboe11a8f2b2019-10-15 17:31:17 -0600154 io_uring_prep_timeout(sqe, ts, wait_nr, 0);
Jens Axboe11e18b32019-09-21 15:04:52 -0600155 sqe->user_data = LIBURING_UDATA_TIMEOUT;
Jens Axboe7ad0e4b2019-12-01 09:11:31 -0700156 __io_uring_flush_sq(ring);
157 to_submit = *ring->sq.ktail - *ring->sq.khead;
Jens Axboe76e92322019-09-20 22:15:38 -0600158 }
Jens Axboe11e18b32019-09-21 15:04:52 -0600159
Jens Axboe7ad0e4b2019-12-01 09:11:31 -0700160 ret = __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask);
161 if (ret <= 0)
162 return ret;
163 return ret - to_submit;
Jens Axboe213d6f32019-01-17 21:40:30 -0700164}
165
166/*
Jens Axboe217756d2019-11-22 21:43:24 -0700167 * See io_uring_wait_cqes() - this function is the same, it just always uses
168 * '1' as the wait_nr.
Jens Axboe11e18b32019-09-21 15:04:52 -0600169 */
170int io_uring_wait_cqe_timeout(struct io_uring *ring,
171 struct io_uring_cqe **cqe_ptr,
Jens Axboee2934e12019-10-01 10:05:16 -0600172 struct __kernel_timespec *ts)
Jens Axboe11e18b32019-09-21 15:04:52 -0600173{
Jens Axboeac726402019-09-27 07:26:45 -0600174 return io_uring_wait_cqes(ring, cqe_ptr, 1, ts, NULL);
Jens Axboe11e18b32019-09-21 15:04:52 -0600175}
176
177/*
Jens Axboe40b44d22019-09-27 04:10:52 -0600178 * Submit sqes acquired from io_uring_get_sqe() to the kernel.
179 *
180 * Returns number of sqes submitted
181 */
182static int __io_uring_submit(struct io_uring *ring, unsigned submitted,
183 unsigned wait_nr)
184{
185 unsigned flags;
186 int ret;
Jens Axboe213d6f32019-01-17 21:40:30 -0700187
Jens Axboe043ea222019-06-17 11:41:15 -0600188 flags = 0;
Jens Axboed77a67e2019-11-27 17:02:21 -0700189 if (sq_ring_needs_enter(ring, &flags) || wait_nr) {
Hrvoje Zebab5837bd2019-11-25 09:44:19 -0700190 if (wait_nr)
Jens Axboe91dde5c2019-06-06 10:46:13 -0600191 flags |= IORING_ENTER_GETEVENTS;
Roman Penyaevdf23d2d2019-05-27 21:05:09 +0200192
Jens Axboe91dde5c2019-06-06 10:46:13 -0600193 ret = io_uring_enter(ring->ring_fd, submitted, wait_nr, flags,
194 NULL);
Roman Penyaevdf23d2d2019-05-27 21:05:09 +0200195 if (ret < 0)
196 return -errno;
197 } else
198 ret = submitted;
Jens Axboe82600292019-03-05 20:12:48 -0700199
Jens Axboea8652212019-03-13 08:48:45 -0600200 return ret;
Jens Axboe213d6f32019-01-17 21:40:30 -0700201}
202
Jens Axboe94c9df32019-09-27 05:35:28 -0600203static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
204{
Jens Axboe566506f2019-11-22 22:28:47 -0700205 int submit;
Jens Axboe94c9df32019-09-27 05:35:28 -0600206
Jens Axboe7ad0e4b2019-12-01 09:11:31 -0700207 __io_uring_flush_sq(ring);
208 submit = *ring->sq.ktail - *ring->sq.khead;
Jens Axboe566506f2019-11-22 22:28:47 -0700209 return __io_uring_submit(ring, submit, wait_nr);
Jens Axboe94c9df32019-09-27 05:35:28 -0600210}
211
Jens Axboe213d6f32019-01-17 21:40:30 -0700212/*
Jens Axboe91dde5c2019-06-06 10:46:13 -0600213 * Submit sqes acquired from io_uring_get_sqe() to the kernel.
214 *
215 * Returns number of sqes submitted
216 */
217int io_uring_submit(struct io_uring *ring)
218{
Jens Axboe94c9df32019-09-27 05:35:28 -0600219 return __io_uring_submit_and_wait(ring, 0);
Jens Axboe91dde5c2019-06-06 10:46:13 -0600220}
221
222/*
223 * Like io_uring_submit(), but allows waiting for events as well.
224 *
225 * Returns number of sqes submitted
226 */
227int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
228{
Jens Axboe94c9df32019-09-27 05:35:28 -0600229 return __io_uring_submit_and_wait(ring, wait_nr);
Jens Axboe91dde5c2019-06-06 10:46:13 -0600230}
231
Jens Axboe902e4462019-11-10 15:28:23 -0700232#define __io_uring_get_sqe(sq, __head) ({ \
233 unsigned __next = (sq)->sqe_tail + 1; \
234 struct io_uring_sqe *__sqe = NULL; \
235 \
236 if (__next - __head <= *(sq)->kring_entries) { \
237 __sqe = &(sq)->sqes[(sq)->sqe_tail & *(sq)->kring_mask];\
238 (sq)->sqe_tail = __next; \
239 } \
240 __sqe; \
241})
242
Jens Axboe91dde5c2019-06-06 10:46:13 -0600243/*
Jens Axboe213d6f32019-01-17 21:40:30 -0700244 * Return an sqe to fill. Application must later call io_uring_submit()
245 * when it's ready to tell the kernel about it. The caller may call this
246 * function multiple times before calling io_uring_submit().
247 *
248 * Returns a vacant sqe, or NULL if we're full.
249 */
250struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
251{
252 struct io_uring_sq *sq = &ring->sq;
Jens Axboe213d6f32019-01-17 21:40:30 -0700253
Jens Axboe902e4462019-11-10 15:28:23 -0700254 if (!(ring->flags & IORING_SETUP_SQPOLL))
255 return __io_uring_get_sqe(sq, sq->sqe_head);
Jens Axboe213d6f32019-01-17 21:40:30 -0700256
Jens Axboe902e4462019-11-10 15:28:23 -0700257 return __io_uring_get_sqe(sq, io_uring_smp_load_acquire(sq->khead));
Jens Axboe213d6f32019-01-17 21:40:30 -0700258}