Jens Axboe | e502435 | 2020-02-11 20:34:12 -0700 | [diff] [blame] | 1 | /* SPDX-License-Identifier: MIT */ |
Simon Zeni | df6b9a9 | 2020-10-28 21:19:59 -0400 | [diff] [blame] | 2 | #define _POSIX_C_SOURCE 200112L |
| 3 | |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 4 | #include <sys/types.h> |
| 5 | #include <sys/stat.h> |
| 6 | #include <sys/mman.h> |
| 7 | #include <unistd.h> |
| 8 | #include <errno.h> |
| 9 | #include <string.h> |
Jens Axboe | 043ea22 | 2019-06-17 11:41:15 -0600 | [diff] [blame] | 10 | #include <stdbool.h> |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 11 | |
Stefan Hajnoczi | c31c7ec | 2019-07-24 09:24:50 +0100 | [diff] [blame] | 12 | #include "liburing/compat.h" |
| 13 | #include "liburing/io_uring.h" |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 14 | #include "liburing.h" |
Stefan Hajnoczi | c31c7ec | 2019-07-24 09:24:50 +0100 | [diff] [blame] | 15 | #include "liburing/barrier.h" |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 16 | |
Jens Axboe | 96144ea | 2019-12-01 11:21:39 -0700 | [diff] [blame] | 17 | #include "syscall.h" |
| 18 | |
Jens Axboe | 9845510 | 2019-11-27 17:21:38 -0700 | [diff] [blame] | 19 | /* |
| 20 | * Returns true if we're not using SQ thread (thus nobody submits but us) |
| 21 | * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly |
| 22 | * awakened. For the latter case, we set the thread wakeup flag. |
| 23 | */ |
Jens Axboe | 1bafb3c | 2020-08-20 21:40:16 -0600 | [diff] [blame] | 24 | static inline bool sq_ring_needs_enter(struct io_uring *ring, unsigned *flags) |
Jens Axboe | 9845510 | 2019-11-27 17:21:38 -0700 | [diff] [blame] | 25 | { |
Jens Axboe | 1bafb3c | 2020-08-20 21:40:16 -0600 | [diff] [blame] | 26 | if (!(ring->flags & IORING_SETUP_SQPOLL)) |
Jens Axboe | 9845510 | 2019-11-27 17:21:38 -0700 | [diff] [blame] | 27 | return true; |
Nuno Sa | c0b1512 | 2021-01-14 22:14:04 +0100 | [diff] [blame] | 28 | |
| 29 | if (uring_unlikely(IO_URING_READ_ONCE(*ring->sq.kflags) & |
| 30 | IORING_SQ_NEED_WAKEUP)) { |
| 31 | *flags |= IORING_ENTER_SQ_WAKEUP; |
| 32 | return true; |
| 33 | } |
Jens Axboe | 9845510 | 2019-11-27 17:21:38 -0700 | [diff] [blame] | 34 | |
| 35 | return false; |
| 36 | } |
| 37 | |
Xiaoguang Wang | 122eca6 | 2020-07-09 09:16:20 +0800 | [diff] [blame] | 38 | static inline bool cq_ring_needs_flush(struct io_uring *ring) |
| 39 | { |
| 40 | return IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW; |
| 41 | } |
| 42 | |
Bijan Mottahedeh | 36c05ec | 2020-05-19 14:52:21 -0700 | [diff] [blame] | 43 | static int __io_uring_peek_cqe(struct io_uring *ring, |
Marcelo Diop-Gonzalez | 3bdd983 | 2020-12-03 11:07:06 -0500 | [diff] [blame] | 44 | struct io_uring_cqe **cqe_ptr, |
| 45 | unsigned *nr_available) |
Bijan Mottahedeh | 36c05ec | 2020-05-19 14:52:21 -0700 | [diff] [blame] | 46 | { |
| 47 | struct io_uring_cqe *cqe; |
Bijan Mottahedeh | 36c05ec | 2020-05-19 14:52:21 -0700 | [diff] [blame] | 48 | int err = 0; |
Marcelo Diop-Gonzalez | 3bdd983 | 2020-12-03 11:07:06 -0500 | [diff] [blame] | 49 | unsigned available; |
| 50 | unsigned mask = *ring->cq.kring_mask; |
Bijan Mottahedeh | 36c05ec | 2020-05-19 14:52:21 -0700 | [diff] [blame] | 51 | |
| 52 | do { |
Marcelo Diop-Gonzalez | 3bdd983 | 2020-12-03 11:07:06 -0500 | [diff] [blame] | 53 | unsigned tail = io_uring_smp_load_acquire(ring->cq.ktail); |
| 54 | unsigned head = *ring->cq.khead; |
| 55 | |
| 56 | cqe = NULL; |
| 57 | available = tail - head; |
| 58 | if (!available) |
Bijan Mottahedeh | 36c05ec | 2020-05-19 14:52:21 -0700 | [diff] [blame] | 59 | break; |
Marcelo Diop-Gonzalez | 3bdd983 | 2020-12-03 11:07:06 -0500 | [diff] [blame] | 60 | |
| 61 | cqe = &ring->cq.cqes[head & mask]; |
李通洲 | 51be5d3 | 2021-02-08 10:28:07 +0800 | [diff] [blame] | 62 | if (!(ring->features & IORING_FEAT_EXT_ARG) && |
| 63 | cqe->user_data == LIBURING_UDATA_TIMEOUT) { |
Marcelo Diop-Gonzalez | 3bdd983 | 2020-12-03 11:07:06 -0500 | [diff] [blame] | 64 | if (cqe->res < 0) |
| 65 | err = cqe->res; |
| 66 | io_uring_cq_advance(ring, 1); |
| 67 | if (!err) |
| 68 | continue; |
| 69 | cqe = NULL; |
Bijan Mottahedeh | 36c05ec | 2020-05-19 14:52:21 -0700 | [diff] [blame] | 70 | } |
Marcelo Diop-Gonzalez | 3bdd983 | 2020-12-03 11:07:06 -0500 | [diff] [blame] | 71 | |
Bijan Mottahedeh | 36c05ec | 2020-05-19 14:52:21 -0700 | [diff] [blame] | 72 | break; |
| 73 | } while (1); |
| 74 | |
| 75 | *cqe_ptr = cqe; |
Marcelo Diop-Gonzalez | 3bdd983 | 2020-12-03 11:07:06 -0500 | [diff] [blame] | 76 | *nr_available = available; |
Bijan Mottahedeh | 36c05ec | 2020-05-19 14:52:21 -0700 | [diff] [blame] | 77 | return err; |
| 78 | } |
| 79 | |
Jens Axboe | 898294d | 2020-11-04 11:44:48 -0700 | [diff] [blame] | 80 | struct get_data { |
| 81 | unsigned submit; |
| 82 | unsigned wait_nr; |
| 83 | unsigned get_flags; |
| 84 | int sz; |
| 85 | void *arg; |
| 86 | }; |
| 87 | |
| 88 | static int _io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, |
| 89 | struct get_data *data) |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 90 | { |
Jens Axboe | 8ce3a07 | 2019-12-16 12:10:07 -0700 | [diff] [blame] | 91 | struct io_uring_cqe *cqe = NULL; |
Jens Axboe | 898294d | 2020-11-04 11:44:48 -0700 | [diff] [blame] | 92 | const int to_wait = data->wait_nr; |
Pavel Begunkov | be648cf | 2021-02-07 23:32:17 +0000 | [diff] [blame] | 93 | int err; |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 94 | |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 95 | do { |
Pavel Begunkov | 516e913 | 2021-02-11 23:08:13 +0000 | [diff] [blame^] | 96 | bool need_enter = false; |
Xiaoguang Wang | 122eca6 | 2020-07-09 09:16:20 +0800 | [diff] [blame] | 97 | bool cq_overflow_flush = false; |
李通洲 | 38c82de | 2019-12-02 22:36:04 +0800 | [diff] [blame] | 98 | unsigned flags = 0; |
Marcelo Diop-Gonzalez | 3bdd983 | 2020-12-03 11:07:06 -0500 | [diff] [blame] | 99 | unsigned nr_available; |
Pavel Begunkov | be648cf | 2021-02-07 23:32:17 +0000 | [diff] [blame] | 100 | int ret; |
Jens Axboe | 9845510 | 2019-11-27 17:21:38 -0700 | [diff] [blame] | 101 | |
Marcelo Diop-Gonzalez | 3bdd983 | 2020-12-03 11:07:06 -0500 | [diff] [blame] | 102 | err = __io_uring_peek_cqe(ring, &cqe, &nr_available); |
Jens Axboe | 7ad0e4b | 2019-12-01 09:11:31 -0700 | [diff] [blame] | 103 | if (err) |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 104 | break; |
Jens Axboe | 898294d | 2020-11-04 11:44:48 -0700 | [diff] [blame] | 105 | if (!cqe && !to_wait && !data->submit) { |
Xiaoguang Wang | 122eca6 | 2020-07-09 09:16:20 +0800 | [diff] [blame] | 106 | if (!cq_ring_needs_flush(ring)) { |
| 107 | err = -EAGAIN; |
| 108 | break; |
| 109 | } |
| 110 | cq_overflow_flush = true; |
Jens Axboe | 76e9232 | 2019-09-20 22:15:38 -0600 | [diff] [blame] | 111 | } |
Pavel Begunkov | 516e913 | 2021-02-11 23:08:13 +0000 | [diff] [blame^] | 112 | if (data->wait_nr > nr_available || cq_overflow_flush) { |
Jens Axboe | 898294d | 2020-11-04 11:44:48 -0700 | [diff] [blame] | 113 | flags = IORING_ENTER_GETEVENTS | data->get_flags; |
Pavel Begunkov | 516e913 | 2021-02-11 23:08:13 +0000 | [diff] [blame^] | 114 | need_enter = true; |
| 115 | } |
| 116 | if (data->submit) { |
Jens Axboe | 1bafb3c | 2020-08-20 21:40:16 -0600 | [diff] [blame] | 117 | sq_ring_needs_enter(ring, &flags); |
Pavel Begunkov | 516e913 | 2021-02-11 23:08:13 +0000 | [diff] [blame^] | 118 | need_enter = true; |
| 119 | } |
| 120 | if (!need_enter) |
Pavel Begunkov | be648cf | 2021-02-07 23:32:17 +0000 | [diff] [blame] | 121 | break; |
| 122 | |
| 123 | ret = __sys_io_uring_enter2(ring->ring_fd, data->submit, |
| 124 | data->wait_nr, flags, data->arg, |
| 125 | data->sz); |
Jens Axboe | dc14e30 | 2020-03-02 08:33:17 -0700 | [diff] [blame] | 126 | if (ret < 0) { |
Jens Axboe | 20c9293 | 2019-09-28 05:35:02 -0600 | [diff] [blame] | 127 | err = -errno; |
Pavel Begunkov | b4fea6b | 2021-02-07 23:32:16 +0000 | [diff] [blame] | 128 | break; |
| 129 | } |
| 130 | |
| 131 | data->submit -= ret; |
Pavel Begunkov | 51e6622 | 2021-02-11 23:08:12 +0000 | [diff] [blame] | 132 | if (!data->submit) { |
Bijan Mottahedeh | 87bad14 | 2020-05-19 14:52:19 -0700 | [diff] [blame] | 133 | /* |
| 134 | * When SETUP_IOPOLL is set, __sys_io_uring enter() |
| 135 | * must be called to reap new completions but the call |
| 136 | * won't be made if both wait_nr and submit are zero |
| 137 | * so preserve wait_nr. |
| 138 | */ |
| 139 | if (!(ring->flags & IORING_SETUP_IOPOLL)) |
Jens Axboe | 898294d | 2020-11-04 11:44:48 -0700 | [diff] [blame] | 140 | data->wait_nr = 0; |
Jens Axboe | dc14e30 | 2020-03-02 08:33:17 -0700 | [diff] [blame] | 141 | } |
Jens Axboe | 8ce3a07 | 2019-12-16 12:10:07 -0700 | [diff] [blame] | 142 | if (cqe) |
Jens Axboe | 7ad0e4b | 2019-12-01 09:11:31 -0700 | [diff] [blame] | 143 | break; |
Pavel Begunkov | b4fea6b | 2021-02-07 23:32:16 +0000 | [diff] [blame] | 144 | } while (1); |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 145 | |
Jens Axboe | 8ce3a07 | 2019-12-16 12:10:07 -0700 | [diff] [blame] | 146 | *cqe_ptr = cqe; |
Jens Axboe | 76e9232 | 2019-09-20 22:15:38 -0600 | [diff] [blame] | 147 | return err; |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 148 | } |
| 149 | |
Jens Axboe | 898294d | 2020-11-04 11:44:48 -0700 | [diff] [blame] | 150 | int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, |
| 151 | unsigned submit, unsigned wait_nr, sigset_t *sigmask) |
| 152 | { |
| 153 | struct get_data data = { |
| 154 | .submit = submit, |
| 155 | .wait_nr = wait_nr, |
Jens Axboe | 5a7c8ac | 2020-11-04 13:57:17 -0700 | [diff] [blame] | 156 | .get_flags = 0, |
Jens Axboe | 898294d | 2020-11-04 11:44:48 -0700 | [diff] [blame] | 157 | .sz = _NSIG / 8, |
| 158 | .arg = sigmask, |
| 159 | }; |
| 160 | |
| 161 | return _io_uring_get_cqe(ring, cqe_ptr, &data); |
| 162 | } |
| 163 | |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 164 | /* |
James Rouzier | 0b88d72 | 2019-09-25 15:35:06 -0400 | [diff] [blame] | 165 | * Fill in an array of IO completions up to count, if any are available. |
| 166 | * Returns the amount of IO completions filled. |
| 167 | */ |
Jens Axboe | 6d33802 | 2019-09-26 00:41:24 -0600 | [diff] [blame] | 168 | unsigned io_uring_peek_batch_cqe(struct io_uring *ring, |
| 169 | struct io_uring_cqe **cqes, unsigned count) |
James Rouzier | 0b88d72 | 2019-09-25 15:35:06 -0400 | [diff] [blame] | 170 | { |
Jens Axboe | 6d33802 | 2019-09-26 00:41:24 -0600 | [diff] [blame] | 171 | unsigned ready; |
Xiaoguang Wang | 20a7c01 | 2020-07-09 15:33:49 +0800 | [diff] [blame] | 172 | bool overflow_checked = false; |
Jens Axboe | 6d33802 | 2019-09-26 00:41:24 -0600 | [diff] [blame] | 173 | |
Xiaoguang Wang | 20a7c01 | 2020-07-09 15:33:49 +0800 | [diff] [blame] | 174 | again: |
Jens Axboe | 6d33802 | 2019-09-26 00:41:24 -0600 | [diff] [blame] | 175 | ready = io_uring_cq_ready(ring); |
James Rouzier | 0b88d72 | 2019-09-25 15:35:06 -0400 | [diff] [blame] | 176 | if (ready) { |
James Rouzier | 0b88d72 | 2019-09-25 15:35:06 -0400 | [diff] [blame] | 177 | unsigned head = *ring->cq.khead; |
James Rouzier | 0b88d72 | 2019-09-25 15:35:06 -0400 | [diff] [blame] | 178 | unsigned mask = *ring->cq.kring_mask; |
Jens Axboe | 6d33802 | 2019-09-26 00:41:24 -0600 | [diff] [blame] | 179 | unsigned last; |
James Rouzier | 0b88d72 | 2019-09-25 15:35:06 -0400 | [diff] [blame] | 180 | int i = 0; |
Jens Axboe | 6d33802 | 2019-09-26 00:41:24 -0600 | [diff] [blame] | 181 | |
| 182 | count = count > ready ? ready : count; |
| 183 | last = head + count; |
| 184 | for (;head != last; head++, i++) |
James Rouzier | 0b88d72 | 2019-09-25 15:35:06 -0400 | [diff] [blame] | 185 | cqes[i] = &ring->cq.cqes[head & mask]; |
James Rouzier | 0b88d72 | 2019-09-25 15:35:06 -0400 | [diff] [blame] | 186 | |
| 187 | return count; |
| 188 | } |
| 189 | |
Xiaoguang Wang | 20a7c01 | 2020-07-09 15:33:49 +0800 | [diff] [blame] | 190 | if (overflow_checked) |
| 191 | goto done; |
| 192 | |
| 193 | if (cq_ring_needs_flush(ring)) { |
| 194 | __sys_io_uring_enter(ring->ring_fd, 0, 0, |
| 195 | IORING_ENTER_GETEVENTS, NULL); |
| 196 | overflow_checked = true; |
| 197 | goto again; |
| 198 | } |
| 199 | |
| 200 | done: |
James Rouzier | 0b88d72 | 2019-09-25 15:35:06 -0400 | [diff] [blame] | 201 | return 0; |
| 202 | } |
| 203 | |
| 204 | /* |
Jens Axboe | c39a058 | 2019-12-19 10:06:28 -0700 | [diff] [blame] | 205 | * Sync internal state with kernel ring state on the SQ side. Returns the |
| 206 | * number of pending items in the SQ ring, for the shared ring. |
Jens Axboe | 8578f0d | 2019-09-27 04:13:42 -0600 | [diff] [blame] | 207 | */ |
Glauber Costa | 5964134 | 2020-08-21 09:25:07 -0400 | [diff] [blame] | 208 | int __io_uring_flush_sq(struct io_uring *ring) |
Jens Axboe | 8578f0d | 2019-09-27 04:13:42 -0600 | [diff] [blame] | 209 | { |
| 210 | struct io_uring_sq *sq = &ring->sq; |
| 211 | const unsigned mask = *sq->kring_mask; |
Jens Axboe | 1781f0e | 2019-12-11 09:00:43 -0700 | [diff] [blame] | 212 | unsigned ktail, to_submit; |
Jens Axboe | 8578f0d | 2019-09-27 04:13:42 -0600 | [diff] [blame] | 213 | |
Jens Axboe | c39a058 | 2019-12-19 10:06:28 -0700 | [diff] [blame] | 214 | if (sq->sqe_head == sq->sqe_tail) { |
| 215 | ktail = *sq->ktail; |
| 216 | goto out; |
| 217 | } |
Jens Axboe | 8578f0d | 2019-09-27 04:13:42 -0600 | [diff] [blame] | 218 | |
| 219 | /* |
| 220 | * Fill in sqes that we have queued up, adding them to the kernel ring |
| 221 | */ |
Jens Axboe | 8578f0d | 2019-09-27 04:13:42 -0600 | [diff] [blame] | 222 | ktail = *sq->ktail; |
| 223 | to_submit = sq->sqe_tail - sq->sqe_head; |
| 224 | while (to_submit--) { |
| 225 | sq->array[ktail & mask] = sq->sqe_head & mask; |
| 226 | ktail++; |
| 227 | sq->sqe_head++; |
Jens Axboe | 8578f0d | 2019-09-27 04:13:42 -0600 | [diff] [blame] | 228 | } |
| 229 | |
| 230 | /* |
| 231 | * Ensure that the kernel sees the SQE updates before it sees the tail |
| 232 | * update. |
| 233 | */ |
Kornilios Kourtis | f389745 | 2019-10-30 13:25:13 +0100 | [diff] [blame] | 234 | io_uring_smp_store_release(sq->ktail, ktail); |
Jens Axboe | c39a058 | 2019-12-19 10:06:28 -0700 | [diff] [blame] | 235 | out: |
Jens Axboe | 88b845d | 2020-12-17 17:10:18 -0700 | [diff] [blame] | 236 | /* |
| 237 | * This _may_ look problematic, as we're not supposed to be reading |
| 238 | * SQ->head without acquire semantics. When we're in SQPOLL mode, the |
| 239 | * kernel submitter could be updating this right now. For non-SQPOLL, |
Nuno Sa | 2ebc85b | 2021-01-14 22:18:51 +0100 | [diff] [blame] | 240 | * task itself does it, and there's no potential race. But even for |
Jens Axboe | 88b845d | 2020-12-17 17:10:18 -0700 | [diff] [blame] | 241 | * SQPOLL, the load is going to be potentially out-of-date the very |
| 242 | * instant it's done, regardless or whether or not it's done |
| 243 | * atomically. Worst case, we're going to be over-estimating what |
| 244 | * we can submit. The point is, we need to be able to deal with this |
| 245 | * situation regardless of any perceived atomicity. |
| 246 | */ |
Jens Axboe | c39a058 | 2019-12-19 10:06:28 -0700 | [diff] [blame] | 247 | return ktail - *sq->khead; |
Jens Axboe | 8578f0d | 2019-09-27 04:13:42 -0600 | [diff] [blame] | 248 | } |
| 249 | |
| 250 | /* |
Jens Axboe | ba0a2e4 | 2020-11-04 14:34:03 -0700 | [diff] [blame] | 251 | * If we have kernel support for IORING_ENTER_EXT_ARG, then we can use that |
| 252 | * more efficiently than queueing an internal timeout command. |
Jens Axboe | 0de9d8c | 2020-11-04 12:02:28 -0700 | [diff] [blame] | 253 | */ |
| 254 | static int io_uring_wait_cqes_new(struct io_uring *ring, |
| 255 | struct io_uring_cqe **cqe_ptr, |
| 256 | unsigned wait_nr, struct __kernel_timespec *ts, |
| 257 | sigset_t *sigmask) |
| 258 | { |
| 259 | struct io_uring_getevents_arg arg = { |
| 260 | .sigmask = (unsigned long) sigmask, |
| 261 | .sigmask_sz = _NSIG / 8, |
| 262 | .ts = (unsigned long) ts |
| 263 | }; |
| 264 | struct get_data data = { |
| 265 | .submit = __io_uring_flush_sq(ring), |
| 266 | .wait_nr = wait_nr, |
Jens Axboe | ba0a2e4 | 2020-11-04 14:34:03 -0700 | [diff] [blame] | 267 | .get_flags = IORING_ENTER_EXT_ARG, |
Jens Axboe | 0de9d8c | 2020-11-04 12:02:28 -0700 | [diff] [blame] | 268 | .sz = sizeof(arg), |
| 269 | .arg = &arg |
| 270 | }; |
| 271 | |
| 272 | return _io_uring_get_cqe(ring, cqe_ptr, &data); |
| 273 | } |
| 274 | |
| 275 | /* |
Jens Axboe | 76e9232 | 2019-09-20 22:15:38 -0600 | [diff] [blame] | 276 | * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note |
李通洲 | 51be5d3 | 2021-02-08 10:28:07 +0800 | [diff] [blame] | 277 | * that an sqe is used internally to handle the timeout. For kernel doesn't |
| 278 | * support IORING_FEAT_EXT_ARG, applications using this function must never |
| 279 | * set sqe->user_data to LIBURING_UDATA_TIMEOUT! |
Jens Axboe | 8b93cca | 2019-09-21 14:44:57 -0600 | [diff] [blame] | 280 | * |
Jens Axboe | c962025 | 2021-02-02 09:30:44 -0700 | [diff] [blame] | 281 | * For kernels without IORING_FEAT_EXT_ARG (5.10 and older), if 'ts' is |
| 282 | * specified, the application need not call io_uring_submit() before |
Jens Axboe | 4f48c04 | 2020-03-06 07:03:24 -0700 | [diff] [blame] | 283 | * calling this function, as we will do that on its behalf. From this it also |
| 284 | * follows that this function isn't safe to use for applications that split SQ |
| 285 | * and CQ handling between two threads and expect that to work without |
| 286 | * synchronization, as this function manipulates both the SQ and CQ side. |
Jens Axboe | c962025 | 2021-02-02 09:30:44 -0700 | [diff] [blame] | 287 | * |
| 288 | * For kernels with IORING_FEAT_EXT_ARG, no implicit submission is done and |
| 289 | * hence this function is safe to use for applications that split SQ and CQ |
| 290 | * handling between two threads. |
Jens Axboe | 76e9232 | 2019-09-20 22:15:38 -0600 | [diff] [blame] | 291 | */ |
Jens Axboe | ac72640 | 2019-09-27 07:26:45 -0600 | [diff] [blame] | 292 | int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr, |
Jens Axboe | e2934e1 | 2019-10-01 10:05:16 -0600 | [diff] [blame] | 293 | unsigned wait_nr, struct __kernel_timespec *ts, |
| 294 | sigset_t *sigmask) |
Jens Axboe | 76e9232 | 2019-09-20 22:15:38 -0600 | [diff] [blame] | 295 | { |
Jens Axboe | e80a08c | 2019-12-01 17:19:16 -0700 | [diff] [blame] | 296 | unsigned to_submit = 0; |
Jens Axboe | 76e9232 | 2019-09-20 22:15:38 -0600 | [diff] [blame] | 297 | |
Jens Axboe | 7ad0e4b | 2019-12-01 09:11:31 -0700 | [diff] [blame] | 298 | if (ts) { |
Jens Axboe | 11e18b3 | 2019-09-21 15:04:52 -0600 | [diff] [blame] | 299 | struct io_uring_sqe *sqe; |
Jens Axboe | 217756d | 2019-11-22 21:43:24 -0700 | [diff] [blame] | 300 | int ret; |
Jens Axboe | 11e18b3 | 2019-09-21 15:04:52 -0600 | [diff] [blame] | 301 | |
Jens Axboe | ba0a2e4 | 2020-11-04 14:34:03 -0700 | [diff] [blame] | 302 | if (ring->features & IORING_FEAT_EXT_ARG) |
| 303 | return io_uring_wait_cqes_new(ring, cqe_ptr, wait_nr, |
| 304 | ts, sigmask); |
| 305 | |
Jens Axboe | 11e18b3 | 2019-09-21 15:04:52 -0600 | [diff] [blame] | 306 | /* |
| 307 | * If the SQ ring is full, we may need to submit IO first |
| 308 | */ |
Jens Axboe | 76e9232 | 2019-09-20 22:15:38 -0600 | [diff] [blame] | 309 | sqe = io_uring_get_sqe(ring); |
Jens Axboe | 11e18b3 | 2019-09-21 15:04:52 -0600 | [diff] [blame] | 310 | if (!sqe) { |
| 311 | ret = io_uring_submit(ring); |
| 312 | if (ret < 0) |
| 313 | return ret; |
| 314 | sqe = io_uring_get_sqe(ring); |
Jens Axboe | e80a08c | 2019-12-01 17:19:16 -0700 | [diff] [blame] | 315 | if (!sqe) |
| 316 | return -EAGAIN; |
Jens Axboe | 11e18b3 | 2019-09-21 15:04:52 -0600 | [diff] [blame] | 317 | } |
Jens Axboe | 11a8f2b | 2019-10-15 17:31:17 -0600 | [diff] [blame] | 318 | io_uring_prep_timeout(sqe, ts, wait_nr, 0); |
Jens Axboe | 11e18b3 | 2019-09-21 15:04:52 -0600 | [diff] [blame] | 319 | sqe->user_data = LIBURING_UDATA_TIMEOUT; |
Jens Axboe | c39a058 | 2019-12-19 10:06:28 -0700 | [diff] [blame] | 320 | to_submit = __io_uring_flush_sq(ring); |
Jens Axboe | 76e9232 | 2019-09-20 22:15:38 -0600 | [diff] [blame] | 321 | } |
Jens Axboe | 11e18b3 | 2019-09-21 15:04:52 -0600 | [diff] [blame] | 322 | |
Jens Axboe | e80a08c | 2019-12-01 17:19:16 -0700 | [diff] [blame] | 323 | return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask); |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 324 | } |
| 325 | |
| 326 | /* |
Jens Axboe | 217756d | 2019-11-22 21:43:24 -0700 | [diff] [blame] | 327 | * See io_uring_wait_cqes() - this function is the same, it just always uses |
| 328 | * '1' as the wait_nr. |
Jens Axboe | 11e18b3 | 2019-09-21 15:04:52 -0600 | [diff] [blame] | 329 | */ |
| 330 | int io_uring_wait_cqe_timeout(struct io_uring *ring, |
| 331 | struct io_uring_cqe **cqe_ptr, |
Jens Axboe | e2934e1 | 2019-10-01 10:05:16 -0600 | [diff] [blame] | 332 | struct __kernel_timespec *ts) |
Jens Axboe | 11e18b3 | 2019-09-21 15:04:52 -0600 | [diff] [blame] | 333 | { |
Jens Axboe | ac72640 | 2019-09-27 07:26:45 -0600 | [diff] [blame] | 334 | return io_uring_wait_cqes(ring, cqe_ptr, 1, ts, NULL); |
Jens Axboe | 11e18b3 | 2019-09-21 15:04:52 -0600 | [diff] [blame] | 335 | } |
| 336 | |
| 337 | /* |
Jens Axboe | 40b44d2 | 2019-09-27 04:10:52 -0600 | [diff] [blame] | 338 | * Submit sqes acquired from io_uring_get_sqe() to the kernel. |
| 339 | * |
| 340 | * Returns number of sqes submitted |
| 341 | */ |
| 342 | static int __io_uring_submit(struct io_uring *ring, unsigned submitted, |
| 343 | unsigned wait_nr) |
| 344 | { |
| 345 | unsigned flags; |
| 346 | int ret; |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 347 | |
Jens Axboe | 043ea22 | 2019-06-17 11:41:15 -0600 | [diff] [blame] | 348 | flags = 0; |
Jens Axboe | 1bafb3c | 2020-08-20 21:40:16 -0600 | [diff] [blame] | 349 | if (sq_ring_needs_enter(ring, &flags) || wait_nr) { |
Glauber Costa | bf3aeb3 | 2019-12-19 11:15:48 -0500 | [diff] [blame] | 350 | if (wait_nr || (ring->flags & IORING_SETUP_IOPOLL)) |
Jens Axboe | 91dde5c | 2019-06-06 10:46:13 -0600 | [diff] [blame] | 351 | flags |= IORING_ENTER_GETEVENTS; |
Roman Penyaev | df23d2d | 2019-05-27 21:05:09 +0200 | [diff] [blame] | 352 | |
Jens Axboe | 96144ea | 2019-12-01 11:21:39 -0700 | [diff] [blame] | 353 | ret = __sys_io_uring_enter(ring->ring_fd, submitted, wait_nr, |
| 354 | flags, NULL); |
Roman Penyaev | df23d2d | 2019-05-27 21:05:09 +0200 | [diff] [blame] | 355 | if (ret < 0) |
| 356 | return -errno; |
| 357 | } else |
| 358 | ret = submitted; |
Jens Axboe | 8260029 | 2019-03-05 20:12:48 -0700 | [diff] [blame] | 359 | |
Jens Axboe | a865221 | 2019-03-13 08:48:45 -0600 | [diff] [blame] | 360 | return ret; |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 361 | } |
| 362 | |
Jens Axboe | 94c9df3 | 2019-09-27 05:35:28 -0600 | [diff] [blame] | 363 | static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr) |
| 364 | { |
Jens Axboe | c39a058 | 2019-12-19 10:06:28 -0700 | [diff] [blame] | 365 | return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr); |
Jens Axboe | 94c9df3 | 2019-09-27 05:35:28 -0600 | [diff] [blame] | 366 | } |
| 367 | |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 368 | /* |
Jens Axboe | 91dde5c | 2019-06-06 10:46:13 -0600 | [diff] [blame] | 369 | * Submit sqes acquired from io_uring_get_sqe() to the kernel. |
| 370 | * |
| 371 | * Returns number of sqes submitted |
| 372 | */ |
| 373 | int io_uring_submit(struct io_uring *ring) |
| 374 | { |
Jens Axboe | 94c9df3 | 2019-09-27 05:35:28 -0600 | [diff] [blame] | 375 | return __io_uring_submit_and_wait(ring, 0); |
Jens Axboe | 91dde5c | 2019-06-06 10:46:13 -0600 | [diff] [blame] | 376 | } |
| 377 | |
| 378 | /* |
| 379 | * Like io_uring_submit(), but allows waiting for events as well. |
| 380 | * |
| 381 | * Returns number of sqes submitted |
| 382 | */ |
| 383 | int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr) |
| 384 | { |
Jens Axboe | 94c9df3 | 2019-09-27 05:35:28 -0600 | [diff] [blame] | 385 | return __io_uring_submit_and_wait(ring, wait_nr); |
Jens Axboe | 91dde5c | 2019-06-06 10:46:13 -0600 | [diff] [blame] | 386 | } |
| 387 | |
| 388 | /* |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 389 | * Return an sqe to fill. Application must later call io_uring_submit() |
| 390 | * when it's ready to tell the kernel about it. The caller may call this |
| 391 | * function multiple times before calling io_uring_submit(). |
| 392 | * |
| 393 | * Returns a vacant sqe, or NULL if we're full. |
| 394 | */ |
| 395 | struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) |
| 396 | { |
| 397 | struct io_uring_sq *sq = &ring->sq; |
Pavel Begunkov | 865fe28 | 2021-01-07 23:00:27 +0000 | [diff] [blame] | 398 | unsigned int head = io_uring_smp_load_acquire(sq->khead); |
| 399 | unsigned int next = sq->sqe_tail + 1; |
| 400 | struct io_uring_sqe *sqe = NULL; |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 401 | |
Pavel Begunkov | 865fe28 | 2021-01-07 23:00:27 +0000 | [diff] [blame] | 402 | if (next - head <= *sq->kring_entries) { |
| 403 | sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask]; |
| 404 | sq->sqe_tail = next; |
| 405 | } |
| 406 | return sqe; |
Jens Axboe | 213d6f3 | 2019-01-17 21:40:30 -0700 | [diff] [blame] | 407 | } |
Jens Axboe | 2976811 | 2020-09-05 15:25:52 -0600 | [diff] [blame] | 408 | |
| 409 | int __io_uring_sqring_wait(struct io_uring *ring) |
| 410 | { |
| 411 | int ret; |
| 412 | |
| 413 | ret = __sys_io_uring_enter(ring->ring_fd, 0, 0, IORING_ENTER_SQ_WAIT, |
| 414 | NULL); |
| 415 | if (ret < 0) |
| 416 | ret = -errno; |
| 417 | return ret; |
| 418 | } |