blob: 74f023c9b519a0022f03c9ffefc21bdb96ef3842 [file] [log] [blame]
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <stdbool.h>
#include "liburing/compat.h"
#include "liburing/io_uring.h"
#include "liburing.h"
#include "liburing/barrier.h"
int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
unsigned submit, unsigned wait_nr, sigset_t *sigmask)
{
int ret, err = 0;
do {
err = __io_uring_peek_cqe(ring, cqe_ptr);
if (err || *cqe_ptr)
break;
if (!wait_nr) {
err = -EAGAIN;
break;
}
ret = io_uring_enter(ring->ring_fd, submit, wait_nr,
IORING_ENTER_GETEVENTS, sigmask);
if (ret < 0)
err = -errno;
submit -= ret;
} while (!err);
return err;
}
/*
* Fill in an array of IO completions up to count, if any are available.
* Returns the amount of IO completions filled.
*/
unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
struct io_uring_cqe **cqes, unsigned count)
{
unsigned ready;
ready = io_uring_cq_ready(ring);
if (ready) {
unsigned head = *ring->cq.khead;
unsigned mask = *ring->cq.kring_mask;
unsigned last;
int i = 0;
count = count > ready ? ready : count;
last = head + count;
for (;head != last; head++, i++)
cqes[i] = &ring->cq.cqes[head & mask];
return count;
}
return 0;
}
/*
* Sync internal state with kernel ring state on the SQ side
*/
static int __io_uring_flush_sq(struct io_uring *ring)
{
struct io_uring_sq *sq = &ring->sq;
const unsigned mask = *sq->kring_mask;
unsigned ktail, submitted, to_submit;
if (sq->sqe_head == sq->sqe_tail)
return 0;
/*
* Fill in sqes that we have queued up, adding them to the kernel ring
*/
submitted = 0;
ktail = *sq->ktail;
to_submit = sq->sqe_tail - sq->sqe_head;
while (to_submit--) {
sq->array[ktail & mask] = sq->sqe_head & mask;
ktail++;
sq->sqe_head++;
submitted++;
}
/*
* Ensure that the kernel sees the SQE updates before it sees the tail
* update.
*/
io_uring_smp_store_release(sq->ktail, ktail);
return submitted;
}
/*
* Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note
* that an sqe is used internally to handle the timeout. Applications using
* this function must never set sqe->user_data to LIBURING_UDATA_TIMEOUT!
*
* Note that the application need not call io_uring_submit() before calling
* this function, as we will do that on its behalf.
*/
int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
unsigned wait_nr, struct __kernel_timespec *ts,
sigset_t *sigmask)
{
int ret;
if (wait_nr && ts) {
struct io_uring_sqe *sqe;
/*
* If the SQ ring is full, we may need to submit IO first
*/
sqe = io_uring_get_sqe(ring);
if (!sqe) {
ret = io_uring_submit(ring);
if (ret < 0)
return ret;
sqe = io_uring_get_sqe(ring);
}
io_uring_prep_timeout(sqe, ts, wait_nr, 0);
sqe->user_data = LIBURING_UDATA_TIMEOUT;
}
ret = __io_uring_flush_sq(ring);
return __io_uring_get_cqe(ring, cqe_ptr, ret, wait_nr, sigmask);
}
/*
* See io_uring_wait_cqes_timeout() - this function is the same, it just
* always uses '1' as the wait_nr.
*/
int io_uring_wait_cqe_timeout(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr,
struct __kernel_timespec *ts)
{
return io_uring_wait_cqes(ring, cqe_ptr, 1, ts, NULL);
}
/*
* Returns true if we're not using SQ thread (thus nobody submits but us)
* or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly
* awakened. For the latter case, we set the thread wakeup flag.
*/
static inline bool sq_ring_needs_enter(struct io_uring *ring, unsigned *flags)
{
if (!(ring->flags & IORING_SETUP_SQPOLL))
return true;
if (IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_NEED_WAKEUP) {
*flags |= IORING_ENTER_SQ_WAKEUP;
return true;
}
return false;
}
/*
* Submit sqes acquired from io_uring_get_sqe() to the kernel.
*
* Returns number of sqes submitted
*/
static int __io_uring_submit(struct io_uring *ring, unsigned submitted,
unsigned wait_nr)
{
unsigned flags;
int ret;
flags = 0;
if (wait_nr || sq_ring_needs_enter(ring, &flags)) {
if (wait_nr) {
if (wait_nr > submitted)
wait_nr = submitted;
flags |= IORING_ENTER_GETEVENTS;
}
ret = io_uring_enter(ring->ring_fd, submitted, wait_nr, flags,
NULL);
if (ret < 0)
return -errno;
} else
ret = submitted;
return ret;
}
static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
{
int submitted;
submitted = __io_uring_flush_sq(ring);
if (submitted || io_uring_sq_ready(ring)) {
if (!submitted)
submitted = io_uring_sq_ready(ring);
return __io_uring_submit(ring, submitted, wait_nr);
}
return 0;
}
/*
* Submit sqes acquired from io_uring_get_sqe() to the kernel.
*
* Returns number of sqes submitted
*/
int io_uring_submit(struct io_uring *ring)
{
return __io_uring_submit_and_wait(ring, 0);
}
/*
* Like io_uring_submit(), but allows waiting for events as well.
*
* Returns number of sqes submitted
*/
int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
{
return __io_uring_submit_and_wait(ring, wait_nr);
}
#define __io_uring_get_sqe(sq, __head) ({ \
unsigned __next = (sq)->sqe_tail + 1; \
struct io_uring_sqe *__sqe = NULL; \
\
if (__next - __head <= *(sq)->kring_entries) { \
__sqe = &(sq)->sqes[(sq)->sqe_tail & *(sq)->kring_mask];\
(sq)->sqe_tail = __next; \
} \
__sqe; \
})
/*
* Return an sqe to fill. Application must later call io_uring_submit()
* when it's ready to tell the kernel about it. The caller may call this
* function multiple times before calling io_uring_submit().
*
* Returns a vacant sqe, or NULL if we're full.
*/
struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
{
struct io_uring_sq *sq = &ring->sq;
if (!(ring->flags & IORING_SETUP_SQPOLL))
return __io_uring_get_sqe(sq, sq->sqe_head);
return __io_uring_get_sqe(sq, io_uring_smp_load_acquire(sq->khead));
}