blob: de2d002704e8e46aea70200baf7b200f14c33fc0 [file] [log] [blame]
Jens Axboee5024352020-02-11 20:34:12 -07001/* SPDX-License-Identifier: MIT */
Jens Axboe213d6f32019-01-17 21:40:30 -07002#include <sys/types.h>
3#include <sys/stat.h>
4#include <sys/mman.h>
5#include <unistd.h>
6#include <errno.h>
7#include <string.h>
Jens Axboe043ea222019-06-17 11:41:15 -06008#include <stdbool.h>
Jens Axboe213d6f32019-01-17 21:40:30 -07009
Stefan Hajnoczic31c7ec2019-07-24 09:24:50 +010010#include "liburing/compat.h"
11#include "liburing/io_uring.h"
Jens Axboe213d6f32019-01-17 21:40:30 -070012#include "liburing.h"
Stefan Hajnoczic31c7ec2019-07-24 09:24:50 +010013#include "liburing/barrier.h"
Jens Axboe213d6f32019-01-17 21:40:30 -070014
Jens Axboe96144ea2019-12-01 11:21:39 -070015#include "syscall.h"
16
Jens Axboe98455102019-11-27 17:21:38 -070017/*
18 * Returns true if we're not using SQ thread (thus nobody submits but us)
19 * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly
20 * awakened. For the latter case, we set the thread wakeup flag.
21 */
22static inline bool sq_ring_needs_enter(struct io_uring *ring, unsigned *flags)
23{
24 if (!(ring->flags & IORING_SETUP_SQPOLL))
25 return true;
26 if (IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_NEED_WAKEUP) {
27 *flags |= IORING_ENTER_SQ_WAKEUP;
28 return true;
29 }
30
31 return false;
32}
33
Jens Axboe20c92932019-09-28 05:35:02 -060034int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
35 unsigned submit, unsigned wait_nr, sigset_t *sigmask)
Jens Axboe213d6f32019-01-17 21:40:30 -070036{
Jens Axboe8ce3a072019-12-16 12:10:07 -070037 struct io_uring_cqe *cqe = NULL;
Jens Axboedc14e302020-03-02 08:33:17 -070038 const int to_wait = wait_nr;
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070039 int ret = 0, err;
Jens Axboe213d6f32019-01-17 21:40:30 -070040
Jens Axboe213d6f32019-01-17 21:40:30 -070041 do {
李通洲38c82de2019-12-02 22:36:04 +080042 unsigned flags = 0;
Jens Axboe98455102019-11-27 17:21:38 -070043
Jens Axboe8ce3a072019-12-16 12:10:07 -070044 err = __io_uring_peek_cqe(ring, &cqe);
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070045 if (err)
Jens Axboe213d6f32019-01-17 21:40:30 -070046 break;
Jens Axboedc14e302020-03-02 08:33:17 -070047 if (!cqe && !to_wait && !submit) {
Jens Axboe20c92932019-09-28 05:35:02 -060048 err = -EAGAIN;
Bart Van Asschebbb30992019-07-01 14:42:31 -070049 break;
Jens Axboe76e92322019-09-20 22:15:38 -060050 }
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070051 if (wait_nr)
52 flags = IORING_ENTER_GETEVENTS;
Jens Axboe98455102019-11-27 17:21:38 -070053 if (submit)
54 sq_ring_needs_enter(ring, &flags);
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070055 if (wait_nr || submit)
Jens Axboee8493b72019-12-01 11:30:40 -070056 ret = __sys_io_uring_enter(ring->ring_fd, submit,
57 wait_nr, flags, sigmask);
Xiaoguang Wang8a031502020-03-02 12:18:11 +080058 if (wait_nr)
59 wait_nr = 0;
Jens Axboedc14e302020-03-02 08:33:17 -070060 if (ret < 0) {
Jens Axboe20c92932019-09-28 05:35:02 -060061 err = -errno;
Jens Axboedc14e302020-03-02 08:33:17 -070062 } else if (ret == submit) {
63 submit = 0;
64 wait_nr = 0;
65 } else {
Jens Axboe0edcef52020-03-02 08:27:03 -070066 submit -= ret;
Jens Axboedc14e302020-03-02 08:33:17 -070067 }
Jens Axboe8ce3a072019-12-16 12:10:07 -070068 if (cqe)
Jens Axboe7ad0e4b2019-12-01 09:11:31 -070069 break;
Jens Axboe20c92932019-09-28 05:35:02 -060070 } while (!err);
Jens Axboe213d6f32019-01-17 21:40:30 -070071
Jens Axboe8ce3a072019-12-16 12:10:07 -070072 *cqe_ptr = cqe;
Jens Axboe76e92322019-09-20 22:15:38 -060073 return err;
Jens Axboe213d6f32019-01-17 21:40:30 -070074}
75
76/*
James Rouzier0b88d722019-09-25 15:35:06 -040077 * Fill in an array of IO completions up to count, if any are available.
78 * Returns the amount of IO completions filled.
79 */
Jens Axboe6d338022019-09-26 00:41:24 -060080unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
81 struct io_uring_cqe **cqes, unsigned count)
James Rouzier0b88d722019-09-25 15:35:06 -040082{
Jens Axboe6d338022019-09-26 00:41:24 -060083 unsigned ready;
84
85 ready = io_uring_cq_ready(ring);
James Rouzier0b88d722019-09-25 15:35:06 -040086 if (ready) {
James Rouzier0b88d722019-09-25 15:35:06 -040087 unsigned head = *ring->cq.khead;
James Rouzier0b88d722019-09-25 15:35:06 -040088 unsigned mask = *ring->cq.kring_mask;
Jens Axboe6d338022019-09-26 00:41:24 -060089 unsigned last;
James Rouzier0b88d722019-09-25 15:35:06 -040090 int i = 0;
Jens Axboe6d338022019-09-26 00:41:24 -060091
92 count = count > ready ? ready : count;
93 last = head + count;
94 for (;head != last; head++, i++)
James Rouzier0b88d722019-09-25 15:35:06 -040095 cqes[i] = &ring->cq.cqes[head & mask];
James Rouzier0b88d722019-09-25 15:35:06 -040096
97 return count;
98 }
99
100 return 0;
101}
102
103/*
Jens Axboec39a0582019-12-19 10:06:28 -0700104 * Sync internal state with kernel ring state on the SQ side. Returns the
105 * number of pending items in the SQ ring, for the shared ring.
Jens Axboe8578f0d2019-09-27 04:13:42 -0600106 */
Jens Axboec39a0582019-12-19 10:06:28 -0700107static int __io_uring_flush_sq(struct io_uring *ring)
Jens Axboe8578f0d2019-09-27 04:13:42 -0600108{
109 struct io_uring_sq *sq = &ring->sq;
110 const unsigned mask = *sq->kring_mask;
Jens Axboe1781f0e2019-12-11 09:00:43 -0700111 unsigned ktail, to_submit;
Jens Axboe8578f0d2019-09-27 04:13:42 -0600112
Jens Axboec39a0582019-12-19 10:06:28 -0700113 if (sq->sqe_head == sq->sqe_tail) {
114 ktail = *sq->ktail;
115 goto out;
116 }
Jens Axboe8578f0d2019-09-27 04:13:42 -0600117
118 /*
119 * Fill in sqes that we have queued up, adding them to the kernel ring
120 */
Jens Axboe8578f0d2019-09-27 04:13:42 -0600121 ktail = *sq->ktail;
122 to_submit = sq->sqe_tail - sq->sqe_head;
123 while (to_submit--) {
124 sq->array[ktail & mask] = sq->sqe_head & mask;
125 ktail++;
126 sq->sqe_head++;
Jens Axboe8578f0d2019-09-27 04:13:42 -0600127 }
128
129 /*
130 * Ensure that the kernel sees the SQE updates before it sees the tail
131 * update.
132 */
Kornilios Kourtisf3897452019-10-30 13:25:13 +0100133 io_uring_smp_store_release(sq->ktail, ktail);
Jens Axboec39a0582019-12-19 10:06:28 -0700134out:
135 return ktail - *sq->khead;
Jens Axboe8578f0d2019-09-27 04:13:42 -0600136}
137
138/*
Jens Axboe76e92322019-09-20 22:15:38 -0600139 * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note
140 * that an sqe is used internally to handle the timeout. Applications using
141 * this function must never set sqe->user_data to LIBURING_UDATA_TIMEOUT!
Jens Axboe8b93cca2019-09-21 14:44:57 -0600142 *
143 * Note that the application need not call io_uring_submit() before calling
Jens Axboe217756d2019-11-22 21:43:24 -0700144 * this function, as we will do that on its behalf. From this it also follows
145 * that this function isn't safe to use for applications that split SQ and CQ
146 * handling between two threads and expect that to work without synchronization,
147 * as this function manipulates both the SQ and CQ side.
Jens Axboe76e92322019-09-20 22:15:38 -0600148 */
Jens Axboeac726402019-09-27 07:26:45 -0600149int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
Jens Axboee2934e12019-10-01 10:05:16 -0600150 unsigned wait_nr, struct __kernel_timespec *ts,
151 sigset_t *sigmask)
Jens Axboe76e92322019-09-20 22:15:38 -0600152{
Jens Axboee80a08c2019-12-01 17:19:16 -0700153 unsigned to_submit = 0;
Jens Axboe76e92322019-09-20 22:15:38 -0600154
Jens Axboe7ad0e4b2019-12-01 09:11:31 -0700155 if (ts) {
Jens Axboe11e18b32019-09-21 15:04:52 -0600156 struct io_uring_sqe *sqe;
Jens Axboe217756d2019-11-22 21:43:24 -0700157 int ret;
Jens Axboe11e18b32019-09-21 15:04:52 -0600158
159 /*
160 * If the SQ ring is full, we may need to submit IO first
161 */
Jens Axboe76e92322019-09-20 22:15:38 -0600162 sqe = io_uring_get_sqe(ring);
Jens Axboe11e18b32019-09-21 15:04:52 -0600163 if (!sqe) {
164 ret = io_uring_submit(ring);
165 if (ret < 0)
166 return ret;
167 sqe = io_uring_get_sqe(ring);
Jens Axboee80a08c2019-12-01 17:19:16 -0700168 if (!sqe)
169 return -EAGAIN;
Jens Axboe11e18b32019-09-21 15:04:52 -0600170 }
Jens Axboe11a8f2b2019-10-15 17:31:17 -0600171 io_uring_prep_timeout(sqe, ts, wait_nr, 0);
Jens Axboe11e18b32019-09-21 15:04:52 -0600172 sqe->user_data = LIBURING_UDATA_TIMEOUT;
Jens Axboec39a0582019-12-19 10:06:28 -0700173 to_submit = __io_uring_flush_sq(ring);
Jens Axboe76e92322019-09-20 22:15:38 -0600174 }
Jens Axboe11e18b32019-09-21 15:04:52 -0600175
Jens Axboee80a08c2019-12-01 17:19:16 -0700176 return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask);
Jens Axboe213d6f32019-01-17 21:40:30 -0700177}
178
179/*
Jens Axboe217756d2019-11-22 21:43:24 -0700180 * See io_uring_wait_cqes() - this function is the same, it just always uses
181 * '1' as the wait_nr.
Jens Axboe11e18b32019-09-21 15:04:52 -0600182 */
183int io_uring_wait_cqe_timeout(struct io_uring *ring,
184 struct io_uring_cqe **cqe_ptr,
Jens Axboee2934e12019-10-01 10:05:16 -0600185 struct __kernel_timespec *ts)
Jens Axboe11e18b32019-09-21 15:04:52 -0600186{
Jens Axboeac726402019-09-27 07:26:45 -0600187 return io_uring_wait_cqes(ring, cqe_ptr, 1, ts, NULL);
Jens Axboe11e18b32019-09-21 15:04:52 -0600188}
189
190/*
Jens Axboe40b44d22019-09-27 04:10:52 -0600191 * Submit sqes acquired from io_uring_get_sqe() to the kernel.
192 *
193 * Returns number of sqes submitted
194 */
195static int __io_uring_submit(struct io_uring *ring, unsigned submitted,
196 unsigned wait_nr)
197{
198 unsigned flags;
199 int ret;
Jens Axboe213d6f32019-01-17 21:40:30 -0700200
Jens Axboe043ea222019-06-17 11:41:15 -0600201 flags = 0;
Jens Axboed77a67e2019-11-27 17:02:21 -0700202 if (sq_ring_needs_enter(ring, &flags) || wait_nr) {
Glauber Costabf3aeb32019-12-19 11:15:48 -0500203 if (wait_nr || (ring->flags & IORING_SETUP_IOPOLL))
Jens Axboe91dde5c2019-06-06 10:46:13 -0600204 flags |= IORING_ENTER_GETEVENTS;
Roman Penyaevdf23d2d2019-05-27 21:05:09 +0200205
Jens Axboe96144ea2019-12-01 11:21:39 -0700206 ret = __sys_io_uring_enter(ring->ring_fd, submitted, wait_nr,
207 flags, NULL);
Roman Penyaevdf23d2d2019-05-27 21:05:09 +0200208 if (ret < 0)
209 return -errno;
210 } else
211 ret = submitted;
Jens Axboe82600292019-03-05 20:12:48 -0700212
Jens Axboea8652212019-03-13 08:48:45 -0600213 return ret;
Jens Axboe213d6f32019-01-17 21:40:30 -0700214}
215
Jens Axboe94c9df32019-09-27 05:35:28 -0600216static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
217{
Jens Axboe94c9df32019-09-27 05:35:28 -0600218
Jens Axboec39a0582019-12-19 10:06:28 -0700219 return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr);
Jens Axboe94c9df32019-09-27 05:35:28 -0600220}
221
Jens Axboe213d6f32019-01-17 21:40:30 -0700222/*
Jens Axboe91dde5c2019-06-06 10:46:13 -0600223 * Submit sqes acquired from io_uring_get_sqe() to the kernel.
224 *
225 * Returns number of sqes submitted
226 */
227int io_uring_submit(struct io_uring *ring)
228{
Jens Axboe94c9df32019-09-27 05:35:28 -0600229 return __io_uring_submit_and_wait(ring, 0);
Jens Axboe91dde5c2019-06-06 10:46:13 -0600230}
231
232/*
233 * Like io_uring_submit(), but allows waiting for events as well.
234 *
235 * Returns number of sqes submitted
236 */
237int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
238{
Jens Axboe94c9df32019-09-27 05:35:28 -0600239 return __io_uring_submit_and_wait(ring, wait_nr);
Jens Axboe91dde5c2019-06-06 10:46:13 -0600240}
241
Jens Axboe902e4462019-11-10 15:28:23 -0700242#define __io_uring_get_sqe(sq, __head) ({ \
243 unsigned __next = (sq)->sqe_tail + 1; \
244 struct io_uring_sqe *__sqe = NULL; \
245 \
246 if (__next - __head <= *(sq)->kring_entries) { \
247 __sqe = &(sq)->sqes[(sq)->sqe_tail & *(sq)->kring_mask];\
248 (sq)->sqe_tail = __next; \
249 } \
250 __sqe; \
251})
252
Jens Axboe91dde5c2019-06-06 10:46:13 -0600253/*
Jens Axboe213d6f32019-01-17 21:40:30 -0700254 * Return an sqe to fill. Application must later call io_uring_submit()
255 * when it's ready to tell the kernel about it. The caller may call this
256 * function multiple times before calling io_uring_submit().
257 *
258 * Returns a vacant sqe, or NULL if we're full.
259 */
260struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
261{
262 struct io_uring_sq *sq = &ring->sq;
Jens Axboe213d6f32019-01-17 21:40:30 -0700263
Jens Axboe902e4462019-11-10 15:28:23 -0700264 if (!(ring->flags & IORING_SETUP_SQPOLL))
265 return __io_uring_get_sqe(sq, sq->sqe_head);
Jens Axboe213d6f32019-01-17 21:40:30 -0700266
Jens Axboe902e4462019-11-10 15:28:23 -0700267 return __io_uring_get_sqe(sq, io_uring_smp_load_acquire(sq->khead));
Jens Axboe213d6f32019-01-17 21:40:30 -0700268}