| /* SPDX-License-Identifier: MIT */ |
| /* |
| * io_uring_register.c |
| * |
| * Description: Unit tests for the io_uring_register system call. |
| * |
| * Copyright 2019, Red Hat, Inc. |
| * Author: Jeff Moyer <jmoyer@redhat.com> |
| */ |
| #include <stdio.h> |
| #include <fcntl.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| #include <errno.h> |
| #include <sys/sysinfo.h> |
| #include <poll.h> |
| #include <assert.h> |
| #include <sys/uio.h> |
| #include <sys/mman.h> |
| #include <linux/mman.h> |
| #include <sys/time.h> |
| #include <sys/resource.h> |
| #include <limits.h> |
| #include "liburing.h" |
| #include "../src/syscall.h" |
| |
| static int pagesize; |
| static rlim_t mlock_limit; |
| static int devnull; |
| |
| int |
| expect_fail(int fd, unsigned int opcode, void *arg, |
| unsigned int nr_args, int error) |
| { |
| int ret; |
| |
| printf("io_uring_register(%d, %u, %p, %u)\n", |
| fd, opcode, arg, nr_args); |
| ret = __sys_io_uring_register(fd, opcode, arg, nr_args); |
| if (ret != -1) { |
| int ret2 = 0; |
| |
| printf("expected %s, but call succeeded\n", strerror(error)); |
| if (opcode == IORING_REGISTER_BUFFERS) { |
| ret2 = __sys_io_uring_register(fd, |
| IORING_UNREGISTER_BUFFERS, 0, 0); |
| } else if (opcode == IORING_REGISTER_FILES) { |
| ret2 = __sys_io_uring_register(fd, |
| IORING_UNREGISTER_FILES, 0, 0); |
| } |
| if (ret2) { |
| printf("internal error: failed to unregister\n"); |
| exit(1); |
| } |
| return 1; |
| } |
| |
| if (errno != error) { |
| printf("expected %d, got %d\n", error, errno); |
| return 1; |
| } |
| return 0; |
| } |
| |
| int |
| new_io_uring(int entries, struct io_uring_params *p) |
| { |
| int fd; |
| |
| fd = __sys_io_uring_setup(entries, p); |
| if (fd < 0) { |
| perror("io_uring_setup"); |
| exit(1); |
| } |
| return fd; |
| } |
| |
| #define MAXFDS (UINT_MAX * sizeof(int)) |
| |
| void * |
| map_filebacked(size_t size) |
| { |
| int fd, ret; |
| void *addr; |
| char template[32] = "io_uring_register-test-XXXXXXXX"; |
| |
| fd = mkstemp(template); |
| if (fd < 0) { |
| perror("mkstemp"); |
| return NULL; |
| } |
| unlink(template); |
| |
| ret = ftruncate(fd, size); |
| if (ret < 0) { |
| perror("ftruncate"); |
| close(fd); |
| return NULL; |
| } |
| |
| addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); |
| if (addr == MAP_FAILED) { |
| perror("mmap"); |
| close(fd); |
| return NULL; |
| } |
| |
| close(fd); |
| return addr; |
| } |
| |
| /* |
| * NOTE: this is now limited by SCM_MAX_FD (253). Keep the code for now, |
| * but probably should augment it to test 253 and 254, specifically. |
| */ |
| int |
| test_max_fds(int uring_fd) |
| { |
| int status = 1; |
| int ret; |
| void *fd_as; /* file descriptor address space */ |
| int fdtable_fd; /* fd for the file that will be mapped over and over */ |
| int io_fd; /* the valid fd for I/O -- /dev/null */ |
| int *fds; /* used to map the file into the address space */ |
| char template[32] = "io_uring_register-test-XXXXXXXX"; |
| unsigned long long i, nr_maps, nr_fds; |
| |
| /* |
| * First, mmap anonymous the full size. That will guarantee the |
| * mapping will fit in the memory area selected by mmap. Then, |
| * over-write that mapping using a file-backed mapping, 128MiB at |
| * a time using MAP_FIXED. |
| */ |
| fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE, |
| MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); |
| if (fd_as == MAP_FAILED) { |
| if (errno == ENOMEM) { |
| printf("Not enough memory for this test, skipping\n"); |
| return 0; |
| } |
| perror("mmap fd_as"); |
| exit(1); |
| } |
| printf("allocated %zu bytes of address space\n", UINT_MAX * sizeof(int)); |
| |
| fdtable_fd = mkstemp(template); |
| if (fdtable_fd < 0) { |
| perror("mkstemp"); |
| exit(1); |
| } |
| unlink(template); |
| ret = ftruncate(fdtable_fd, 128*1024*1024); |
| if (ret < 0) { |
| perror("ftruncate"); |
| exit(1); |
| } |
| |
| io_fd = open("/dev/null", O_RDWR); |
| if (io_fd < 0) { |
| perror("open /dev/null"); |
| exit(1); |
| } |
| fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE, |
| MAP_SHARED|MAP_FIXED, fdtable_fd, 0); |
| if (fds == MAP_FAILED) { |
| perror("mmap fdtable"); |
| exit(1); |
| } |
| |
| /* fill the fd table */ |
| nr_fds = 128*1024*1024 / sizeof(int); |
| for (i = 0; i < nr_fds; i++) |
| fds[i] = io_fd; |
| |
| /* map the file through the rest of the address space */ |
| nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024); |
| for (i = 0; i < nr_maps; i++) { |
| fds = &fds[nr_fds]; /* advance fds by 128MiB */ |
| fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE, |
| MAP_SHARED|MAP_FIXED, fdtable_fd, 0); |
| if (fds == MAP_FAILED) { |
| printf("mmap failed at offset %lu\n", |
| (unsigned long)((char *)fd_as - (char *)fds)); |
| exit(1); |
| } |
| } |
| |
| /* Now fd_as points to the file descriptor array. */ |
| /* |
| * We may not be able to map all of these files. Let's back off |
| * until success. |
| */ |
| nr_fds = UINT_MAX; |
| while (nr_fds) { |
| ret = __sys_io_uring_register(uring_fd, IORING_REGISTER_FILES, |
| fd_as, nr_fds); |
| if (ret != 0) { |
| nr_fds /= 2; |
| continue; |
| } |
| printf("io_uring_register(%d, IORING_REGISTER_FILES, %p, %llu)" |
| "...succeeded\n", uring_fd, fd_as, nr_fds); |
| status = 0; |
| printf("io_uring_register(%d, IORING_UNREGISTER_FILES, 0, 0)...", |
| uring_fd); |
| ret = __sys_io_uring_register(uring_fd, IORING_UNREGISTER_FILES, |
| 0, 0); |
| if (ret < 0) { |
| ret = errno; |
| printf("failed\n"); |
| errno = ret; |
| perror("io_uring_register UNREGISTER_FILES"); |
| exit(1); |
| } |
| printf("succeeded\n"); |
| break; |
| } |
| |
| close(io_fd); |
| close(fdtable_fd); |
| ret = munmap(fd_as, UINT_MAX * sizeof(int)); |
| if (ret != 0) { |
| printf("munmap(%zu) failed\n", UINT_MAX * sizeof(int)); |
| exit(1); |
| } |
| |
| return status; |
| } |
| |
| int |
| test_memlock_exceeded(int fd) |
| { |
| int ret; |
| void *buf; |
| struct iovec iov; |
| |
| /* if limit is larger than 2gb, just skip this test */ |
| if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL) |
| return 0; |
| |
| iov.iov_len = mlock_limit * 2; |
| buf = malloc(iov.iov_len); |
| assert(buf); |
| iov.iov_base = buf; |
| |
| while (iov.iov_len) { |
| ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1); |
| if (ret < 0) { |
| if (errno == ENOMEM) { |
| printf("io_uring_register of %zu bytes failed " |
| "with ENOMEM (expected).\n", iov.iov_len); |
| iov.iov_len /= 2; |
| continue; |
| } |
| printf("expected success or EFAULT, got %d\n", errno); |
| free(buf); |
| return 1; |
| } |
| printf("successfully registered %zu bytes (%d).\n", |
| iov.iov_len, ret); |
| ret = __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, |
| NULL, 0); |
| if (ret != 0) { |
| printf("error: unregister failed with %d\n", errno); |
| free(buf); |
| return 1; |
| } |
| break; |
| } |
| if (!iov.iov_len) |
| printf("Unable to register buffers. Check memlock rlimit.\n"); |
| |
| free(buf); |
| return 0; |
| } |
| |
| int |
| test_iovec_nr(int fd) |
| { |
| int i, ret, status = 0; |
| unsigned int nr = UIO_MAXIOV + 1; |
| struct iovec *iovs; |
| void *buf; |
| |
| buf = malloc(pagesize); |
| assert(buf); |
| |
| iovs = malloc(nr * sizeof(struct iovec)); |
| assert(iovs); |
| |
| for (i = 0; i < nr; i++) { |
| iovs[i].iov_base = buf; |
| iovs[i].iov_len = pagesize; |
| } |
| |
| status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, EINVAL); |
| |
| /* reduce to UIO_MAXIOV */ |
| nr--; |
| printf("io_uring_register(%d, %u, %p, %u)\n", |
| fd, IORING_REGISTER_BUFFERS, iovs, nr); |
| ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr); |
| if (ret != 0) { |
| printf("expected success, got %d\n", errno); |
| status = 1; |
| } else |
| __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0); |
| |
| free(buf); |
| free(iovs); |
| return status; |
| } |
| |
| /* |
| * io_uring limit is 1G. iov_len limit is ~OUL, I think |
| */ |
| int |
| test_iovec_size(int fd) |
| { |
| unsigned int status = 0; |
| int ret; |
| struct iovec iov; |
| void *buf; |
| |
| /* NULL pointer for base */ |
| iov.iov_base = 0; |
| iov.iov_len = 4096; |
| status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT); |
| |
| /* valid base, 0 length */ |
| iov.iov_base = &buf; |
| iov.iov_len = 0; |
| status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT); |
| |
| /* valid base, length exceeds size */ |
| /* this requires an unampped page directly after buf */ |
| buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE, |
| MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
| assert(buf != MAP_FAILED); |
| ret = munmap(buf + pagesize, pagesize); |
| assert(ret == 0); |
| iov.iov_base = buf; |
| iov.iov_len = 2 * pagesize; |
| status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT); |
| munmap(buf, pagesize); |
| |
| /* huge page */ |
| buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE, |
| MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS, |
| -1, 0); |
| if (buf == MAP_FAILED) { |
| printf("Unable to map a huge page. Try increasing " |
| "/proc/sys/vm/nr_hugepages by at least 1.\n"); |
| printf("Skipping the hugepage test\n"); |
| } else { |
| /* |
| * This should succeed, so long as RLIMIT_MEMLOCK is |
| * not exceeded |
| */ |
| iov.iov_base = buf; |
| iov.iov_len = 2*1024*1024; |
| ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1); |
| if (ret < 0) { |
| if (errno == ENOMEM) |
| printf("Unable to test registering of a huge " |
| "page. Try increasing the " |
| "RLIMIT_MEMLOCK resource limit by at " |
| "least 2MB."); |
| else { |
| printf("expected success, got %d\n", errno); |
| status = 1; |
| } |
| } else { |
| printf("Success!\n"); |
| ret = __sys_io_uring_register(fd, |
| IORING_UNREGISTER_BUFFERS, 0, 0); |
| if (ret < 0) { |
| perror("io_uring_unregister"); |
| status = 1; |
| } |
| } |
| } |
| ret = munmap(iov.iov_base, iov.iov_len); |
| assert(ret == 0); |
| |
| /* file-backed buffers -- not supported */ |
| buf = map_filebacked(2*1024*1024); |
| if (!buf) |
| status = 1; |
| iov.iov_base = buf; |
| iov.iov_len = 2*1024*1024; |
| printf("reserve file-backed buffers\n"); |
| status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EOPNOTSUPP); |
| munmap(buf, 2*1024*1024); |
| |
| /* bump up against the soft limit and make sure we get EFAULT |
| * or whatever we're supposed to get. NOTE: this requires |
| * running the test as non-root. */ |
| if (getuid() != 0) |
| status |= test_memlock_exceeded(fd); |
| |
| return status; |
| } |
| |
| void |
| dump_sqe(struct io_uring_sqe *sqe) |
| { |
| printf("\topcode: %d\n", sqe->opcode); |
| printf("\tflags: 0x%.8x\n", sqe->flags); |
| printf("\tfd: %d\n", sqe->fd); |
| if (sqe->opcode == IORING_OP_POLL_ADD) |
| printf("\tpoll_events: 0x%.8x\n", sqe->poll_events); |
| } |
| |
| int |
| ioring_poll(struct io_uring *ring, int fd, int fixed) |
| { |
| int ret; |
| struct io_uring_sqe *sqe; |
| struct io_uring_cqe *cqe; |
| |
| sqe = io_uring_get_sqe(ring); |
| memset(sqe, 0, sizeof(*sqe)); |
| sqe->opcode = IORING_OP_POLL_ADD; |
| if (fixed) |
| sqe->flags = IOSQE_FIXED_FILE; |
| sqe->fd = fd; |
| sqe->poll_events = POLLIN|POLLOUT; |
| |
| printf("io_uring_submit:\n"); |
| dump_sqe(sqe); |
| ret = io_uring_submit(ring); |
| if (ret != 1) { |
| printf("failed to submit poll sqe: %d.\n", errno); |
| return 1; |
| } |
| |
| ret = io_uring_wait_cqe(ring, &cqe); |
| if (ret < 0) { |
| printf("io_uring_wait_cqe failed with %d\n", ret); |
| return 1; |
| } |
| ret = 0; |
| if (cqe->res != POLLOUT) { |
| printf("io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n", |
| POLLOUT, cqe->res); |
| ret = 1; |
| } |
| |
| io_uring_cqe_seen(ring, cqe); |
| return ret; |
| } |
| |
| int |
| test_poll_ringfd(void) |
| { |
| int status = 0; |
| int ret; |
| int fd; |
| struct io_uring ring; |
| |
| ret = io_uring_queue_init(1, &ring, 0); |
| if (ret) { |
| perror("io_uring_queue_init"); |
| return 1; |
| } |
| fd = ring.ring_fd; |
| |
| /* try polling the ring fd */ |
| status = ioring_poll(&ring, fd, 0); |
| |
| /* |
| * now register the ring fd, and try the poll again. This should |
| * fail, because the kernel does not allow registering of the |
| * ring_fd. |
| */ |
| status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, EBADF); |
| |
| /* tear down queue */ |
| io_uring_queue_exit(&ring); |
| |
| return status; |
| } |
| |
| int |
| main(int argc, char **argv) |
| { |
| int fd, ret; |
| unsigned int status = 0; |
| struct io_uring_params p; |
| struct rlimit rlim; |
| |
| if (argc > 1) |
| return 0; |
| |
| /* setup globals */ |
| pagesize = getpagesize(); |
| ret = getrlimit(RLIMIT_MEMLOCK, &rlim); |
| if (ret < 0) { |
| perror("getrlimit"); |
| return 1; |
| } |
| mlock_limit = rlim.rlim_cur; |
| printf("RELIMIT_MEMLOCK: %lu (%lu)\n", rlim.rlim_cur, rlim.rlim_max); |
| devnull = open("/dev/null", O_RDWR); |
| if (devnull < 0) { |
| perror("open /dev/null"); |
| exit(1); |
| } |
| |
| /* invalid fd */ |
| status |= expect_fail(-1, 0, NULL, 0, EBADF); |
| /* valid fd that is not an io_uring fd */ |
| status |= expect_fail(devnull, 0, NULL, 0, EOPNOTSUPP); |
| |
| /* invalid opcode */ |
| memset(&p, 0, sizeof(p)); |
| fd = new_io_uring(1, &p); |
| ret = expect_fail(fd, ~0U, NULL, 0, EINVAL); |
| if (ret) { |
| /* if this succeeds, tear down the io_uring instance |
| * and start clean for the next test. */ |
| close(fd); |
| fd = new_io_uring(1, &p); |
| } |
| |
| /* IORING_REGISTER_BUFFERS */ |
| status |= test_iovec_size(fd); |
| status |= test_iovec_nr(fd); |
| /* IORING_REGISTER_FILES */ |
| status |= test_max_fds(fd); |
| close(fd); |
| /* uring poll on the uring fd */ |
| status |= test_poll_ringfd(); |
| |
| if (!status) |
| printf("PASS\n"); |
| else |
| printf("FAIL\n"); |
| |
| return status; |
| } |