blob: eb5b9719202e9c9c8dba653e7f7576b5c7da7e74 [file] [log] [blame]
Jens Axboee5024352020-02-11 20:34:12 -07001/* SPDX-License-Identifier: MIT */
Jeff Moyer765ba232019-03-04 17:35:49 -05002/*
3 * io_uring_register.c
4 *
5 * Description: Unit tests for the io_uring_register system call.
6 *
7 * Copyright 2019, Red Hat, Inc.
8 * Author: Jeff Moyer <jmoyer@redhat.com>
9 */
10#include <stdio.h>
11#include <fcntl.h>
12#include <string.h>
13#include <stdlib.h>
14#include <unistd.h>
15#include <errno.h>
16#include <sys/sysinfo.h>
17#include <poll.h>
18#include <assert.h>
19#include <sys/uio.h>
20#include <sys/mman.h>
21#include <linux/mman.h>
22#include <sys/time.h>
23#include <sys/resource.h>
24#include <limits.h>
Stefan Hajnoczic31c7ec2019-07-24 09:24:50 +010025#include "liburing.h"
Jens Axboe96144ea2019-12-01 11:21:39 -070026#include "../src/syscall.h"
Jeff Moyer765ba232019-03-04 17:35:49 -050027
28static int pagesize;
29static rlim_t mlock_limit;
30static int devnull;
31
32int
33expect_fail(int fd, unsigned int opcode, void *arg,
34 unsigned int nr_args, int error)
35{
36 int ret;
37
38 printf("io_uring_register(%d, %u, %p, %u)\n",
39 fd, opcode, arg, nr_args);
Jens Axboe96144ea2019-12-01 11:21:39 -070040 ret = __sys_io_uring_register(fd, opcode, arg, nr_args);
Jeff Moyer765ba232019-03-04 17:35:49 -050041 if (ret != -1) {
42 int ret2 = 0;
43
44 printf("expected %s, but call succeeded\n", strerror(error));
45 if (opcode == IORING_REGISTER_BUFFERS) {
Jens Axboe96144ea2019-12-01 11:21:39 -070046 ret2 = __sys_io_uring_register(fd,
47 IORING_UNREGISTER_BUFFERS, 0, 0);
Jeff Moyer765ba232019-03-04 17:35:49 -050048 } else if (opcode == IORING_REGISTER_FILES) {
Jens Axboe96144ea2019-12-01 11:21:39 -070049 ret2 = __sys_io_uring_register(fd,
50 IORING_UNREGISTER_FILES, 0, 0);
Jeff Moyer765ba232019-03-04 17:35:49 -050051 }
52 if (ret2) {
53 printf("internal error: failed to unregister\n");
54 exit(1);
55 }
56 return 1;
57 }
58
59 if (errno != error) {
60 printf("expected %d, got %d\n", error, errno);
61 return 1;
62 }
63 return 0;
64}
65
66int
67new_io_uring(int entries, struct io_uring_params *p)
68{
69 int fd;
70
Jens Axboe96144ea2019-12-01 11:21:39 -070071 fd = __sys_io_uring_setup(entries, p);
Jeff Moyer765ba232019-03-04 17:35:49 -050072 if (fd < 0) {
73 perror("io_uring_setup");
74 exit(1);
75 }
76 return fd;
77}
78
79#define MAXFDS (UINT_MAX * sizeof(int))
80
81void *
82map_filebacked(size_t size)
83{
84 int fd, ret;
85 void *addr;
86 char template[32] = "io_uring_register-test-XXXXXXXX";
87
88 fd = mkstemp(template);
89 if (fd < 0) {
90 perror("mkstemp");
91 return NULL;
92 }
93 unlink(template);
94
95 ret = ftruncate(fd, size);
96 if (ret < 0) {
97 perror("ftruncate");
98 close(fd);
99 return NULL;
100 }
101
102 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
103 if (addr == MAP_FAILED) {
104 perror("mmap");
105 close(fd);
106 return NULL;
107 }
108
109 close(fd);
110 return addr;
111}
112
113/*
114 * NOTE: this is now limited by SCM_MAX_FD (253). Keep the code for now,
115 * but probably should augment it to test 253 and 254, specifically.
116 */
117int
118test_max_fds(int uring_fd)
119{
120 int status = 1;
121 int ret;
122 void *fd_as; /* file descriptor address space */
123 int fdtable_fd; /* fd for the file that will be mapped over and over */
124 int io_fd; /* the valid fd for I/O -- /dev/null */
125 int *fds; /* used to map the file into the address space */
126 char template[32] = "io_uring_register-test-XXXXXXXX";
127 unsigned long long i, nr_maps, nr_fds;
128
129 /*
130 * First, mmap anonymous the full size. That will guarantee the
131 * mapping will fit in the memory area selected by mmap. Then,
132 * over-write that mapping using a file-backed mapping, 128MiB at
133 * a time using MAP_FIXED.
134 */
135 fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE,
136 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
137 if (fd_as == MAP_FAILED) {
Jens Axboebb2c80f2019-05-14 20:11:43 -0600138 if (errno == ENOMEM) {
139 printf("Not enough memory for this test, skipping\n");
140 return 0;
141 }
Jeff Moyer765ba232019-03-04 17:35:49 -0500142 perror("mmap fd_as");
143 exit(1);
144 }
Bart Van Assche3036ba12019-07-08 12:57:48 -0700145 printf("allocated %zu bytes of address space\n", UINT_MAX * sizeof(int));
Jeff Moyer765ba232019-03-04 17:35:49 -0500146
147 fdtable_fd = mkstemp(template);
148 if (fdtable_fd < 0) {
149 perror("mkstemp");
150 exit(1);
151 }
152 unlink(template);
153 ret = ftruncate(fdtable_fd, 128*1024*1024);
154 if (ret < 0) {
155 perror("ftruncate");
156 exit(1);
157 }
158
159 io_fd = open("/dev/null", O_RDWR);
160 if (io_fd < 0) {
161 perror("open /dev/null");
162 exit(1);
163 }
164 fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE,
165 MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
166 if (fds == MAP_FAILED) {
167 perror("mmap fdtable");
168 exit(1);
169 }
170
171 /* fill the fd table */
172 nr_fds = 128*1024*1024 / sizeof(int);
173 for (i = 0; i < nr_fds; i++)
174 fds[i] = io_fd;
175
176 /* map the file through the rest of the address space */
177 nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024);
178 for (i = 0; i < nr_maps; i++) {
179 fds = &fds[nr_fds]; /* advance fds by 128MiB */
180 fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE,
181 MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
182 if (fds == MAP_FAILED) {
Bart Van Assche3036ba12019-07-08 12:57:48 -0700183 printf("mmap failed at offset %lu\n",
184 (unsigned long)((char *)fd_as - (char *)fds));
Jeff Moyer765ba232019-03-04 17:35:49 -0500185 exit(1);
186 }
187 }
188
189 /* Now fd_as points to the file descriptor array. */
190 /*
191 * We may not be able to map all of these files. Let's back off
192 * until success.
193 */
194 nr_fds = UINT_MAX;
195 while (nr_fds) {
Jens Axboe96144ea2019-12-01 11:21:39 -0700196 ret = __sys_io_uring_register(uring_fd, IORING_REGISTER_FILES,
197 fd_as, nr_fds);
Jeff Moyer765ba232019-03-04 17:35:49 -0500198 if (ret != 0) {
199 nr_fds /= 2;
200 continue;
201 }
202 printf("io_uring_register(%d, IORING_REGISTER_FILES, %p, %llu)"
203 "...succeeded\n", uring_fd, fd_as, nr_fds);
204 status = 0;
205 printf("io_uring_register(%d, IORING_UNREGISTER_FILES, 0, 0)...",
206 uring_fd);
Jens Axboe96144ea2019-12-01 11:21:39 -0700207 ret = __sys_io_uring_register(uring_fd, IORING_UNREGISTER_FILES,
208 0, 0);
Jeff Moyer765ba232019-03-04 17:35:49 -0500209 if (ret < 0) {
210 ret = errno;
211 printf("failed\n");
212 errno = ret;
213 perror("io_uring_register UNREGISTER_FILES");
214 exit(1);
215 }
216 printf("succeeded\n");
217 break;
218 }
219
220 close(io_fd);
221 close(fdtable_fd);
222 ret = munmap(fd_as, UINT_MAX * sizeof(int));
223 if (ret != 0) {
Bart Van Assche3036ba12019-07-08 12:57:48 -0700224 printf("munmap(%zu) failed\n", UINT_MAX * sizeof(int));
Jeff Moyer765ba232019-03-04 17:35:49 -0500225 exit(1);
226 }
227
228 return status;
229}
230
231int
232test_memlock_exceeded(int fd)
233{
234 int ret;
235 void *buf;
236 struct iovec iov;
237
Jens Axboeef9c4b42019-06-04 10:46:51 -0600238 /* if limit is larger than 2gb, just skip this test */
239 if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL)
240 return 0;
241
Jeff Moyer765ba232019-03-04 17:35:49 -0500242 iov.iov_len = mlock_limit * 2;
243 buf = malloc(iov.iov_len);
244 assert(buf);
245 iov.iov_base = buf;
246
247 while (iov.iov_len) {
Jens Axboe96144ea2019-12-01 11:21:39 -0700248 ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
Jeff Moyer765ba232019-03-04 17:35:49 -0500249 if (ret < 0) {
250 if (errno == ENOMEM) {
Bart Van Assche3036ba12019-07-08 12:57:48 -0700251 printf("io_uring_register of %zu bytes failed "
Jeff Moyer765ba232019-03-04 17:35:49 -0500252 "with ENOMEM (expected).\n", iov.iov_len);
253 iov.iov_len /= 2;
254 continue;
255 }
256 printf("expected success or EFAULT, got %d\n", errno);
257 free(buf);
258 return 1;
259 }
Bart Van Assche3036ba12019-07-08 12:57:48 -0700260 printf("successfully registered %zu bytes (%d).\n",
Jeff Moyer765ba232019-03-04 17:35:49 -0500261 iov.iov_len, ret);
Jens Axboe96144ea2019-12-01 11:21:39 -0700262 ret = __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
263 NULL, 0);
Jeff Moyer765ba232019-03-04 17:35:49 -0500264 if (ret != 0) {
265 printf("error: unregister failed with %d\n", errno);
266 free(buf);
267 return 1;
268 }
269 break;
270 }
271 if (!iov.iov_len)
272 printf("Unable to register buffers. Check memlock rlimit.\n");
273
274 free(buf);
275 return 0;
276}
277
278int
279test_iovec_nr(int fd)
280{
281 int i, ret, status = 0;
282 unsigned int nr = UIO_MAXIOV + 1;
283 struct iovec *iovs;
284 void *buf;
285
286 buf = malloc(pagesize);
287 assert(buf);
288
289 iovs = malloc(nr * sizeof(struct iovec));
290 assert(iovs);
291
292 for (i = 0; i < nr; i++) {
293 iovs[i].iov_base = buf;
294 iovs[i].iov_len = pagesize;
295 }
296
297 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, EINVAL);
298
299 /* reduce to UIO_MAXIOV */
300 nr--;
301 printf("io_uring_register(%d, %u, %p, %u)\n",
302 fd, IORING_REGISTER_BUFFERS, iovs, nr);
Jens Axboe96144ea2019-12-01 11:21:39 -0700303 ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
Jeff Moyer765ba232019-03-04 17:35:49 -0500304 if (ret != 0) {
305 printf("expected success, got %d\n", errno);
306 status = 1;
307 } else
Jens Axboe96144ea2019-12-01 11:21:39 -0700308 __sys_io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
Jeff Moyer765ba232019-03-04 17:35:49 -0500309
310 free(buf);
311 free(iovs);
312 return status;
313}
314
315/*
316 * io_uring limit is 1G. iov_len limit is ~OUL, I think
317 */
318int
319test_iovec_size(int fd)
320{
321 unsigned int status = 0;
322 int ret;
323 struct iovec iov;
324 void *buf;
325
326 /* NULL pointer for base */
327 iov.iov_base = 0;
328 iov.iov_len = 4096;
329 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
330
331 /* valid base, 0 length */
332 iov.iov_base = &buf;
333 iov.iov_len = 0;
334 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
335
336 /* valid base, length exceeds size */
337 /* this requires an unampped page directly after buf */
338 buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE,
339 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
340 assert(buf != MAP_FAILED);
341 ret = munmap(buf + pagesize, pagesize);
342 assert(ret == 0);
343 iov.iov_base = buf;
344 iov.iov_len = 2 * pagesize;
345 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EFAULT);
346 munmap(buf, pagesize);
347
348 /* huge page */
349 buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE,
350 MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
351 -1, 0);
352 if (buf == MAP_FAILED) {
353 printf("Unable to map a huge page. Try increasing "
354 "/proc/sys/vm/nr_hugepages by at least 1.\n");
355 printf("Skipping the hugepage test\n");
356 } else {
357 /*
358 * This should succeed, so long as RLIMIT_MEMLOCK is
359 * not exceeded
360 */
361 iov.iov_base = buf;
362 iov.iov_len = 2*1024*1024;
Jens Axboe96144ea2019-12-01 11:21:39 -0700363 ret = __sys_io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
Jeff Moyer765ba232019-03-04 17:35:49 -0500364 if (ret < 0) {
365 if (errno == ENOMEM)
366 printf("Unable to test registering of a huge "
367 "page. Try increasing the "
368 "RLIMIT_MEMLOCK resource limit by at "
369 "least 2MB.");
370 else {
371 printf("expected success, got %d\n", errno);
372 status = 1;
373 }
374 } else {
375 printf("Success!\n");
Jens Axboe96144ea2019-12-01 11:21:39 -0700376 ret = __sys_io_uring_register(fd,
377 IORING_UNREGISTER_BUFFERS, 0, 0);
Jeff Moyer765ba232019-03-04 17:35:49 -0500378 if (ret < 0) {
379 perror("io_uring_unregister");
380 status = 1;
381 }
382 }
383 }
384 ret = munmap(iov.iov_base, iov.iov_len);
385 assert(ret == 0);
386
387 /* file-backed buffers -- not supported */
388 buf = map_filebacked(2*1024*1024);
389 if (!buf)
390 status = 1;
391 iov.iov_base = buf;
392 iov.iov_len = 2*1024*1024;
393 printf("reserve file-backed buffers\n");
394 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, EOPNOTSUPP);
395 munmap(buf, 2*1024*1024);
396
397 /* bump up against the soft limit and make sure we get EFAULT
398 * or whatever we're supposed to get. NOTE: this requires
399 * running the test as non-root. */
400 if (getuid() != 0)
401 status |= test_memlock_exceeded(fd);
402
403 return status;
404}
405
406void
407dump_sqe(struct io_uring_sqe *sqe)
408{
409 printf("\topcode: %d\n", sqe->opcode);
410 printf("\tflags: 0x%.8x\n", sqe->flags);
411 printf("\tfd: %d\n", sqe->fd);
412 if (sqe->opcode == IORING_OP_POLL_ADD)
413 printf("\tpoll_events: 0x%.8x\n", sqe->poll_events);
414}
415
416int
417ioring_poll(struct io_uring *ring, int fd, int fixed)
418{
419 int ret;
420 struct io_uring_sqe *sqe;
421 struct io_uring_cqe *cqe;
422
423 sqe = io_uring_get_sqe(ring);
424 memset(sqe, 0, sizeof(*sqe));
425 sqe->opcode = IORING_OP_POLL_ADD;
426 if (fixed)
427 sqe->flags = IOSQE_FIXED_FILE;
428 sqe->fd = fd;
429 sqe->poll_events = POLLIN|POLLOUT;
430
431 printf("io_uring_submit:\n");
432 dump_sqe(sqe);
433 ret = io_uring_submit(ring);
434 if (ret != 1) {
435 printf("failed to submit poll sqe: %d.\n", errno);
436 return 1;
437 }
438
Jens Axboe39e0ebd2019-04-18 08:32:06 -0600439 ret = io_uring_wait_cqe(ring, &cqe);
Jeff Moyer765ba232019-03-04 17:35:49 -0500440 if (ret < 0) {
Jens Axboe39e0ebd2019-04-18 08:32:06 -0600441 printf("io_uring_wait_cqe failed with %d\n", ret);
Jeff Moyer765ba232019-03-04 17:35:49 -0500442 return 1;
443 }
Jens Axboe76b61eb2019-04-17 09:25:32 -0600444 ret = 0;
Jeff Moyer765ba232019-03-04 17:35:49 -0500445 if (cqe->res != POLLOUT) {
Jens Axboe39e0ebd2019-04-18 08:32:06 -0600446 printf("io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n",
Jeff Moyer765ba232019-03-04 17:35:49 -0500447 POLLOUT, cqe->res);
Jens Axboe76b61eb2019-04-17 09:25:32 -0600448 ret = 1;
Jeff Moyer765ba232019-03-04 17:35:49 -0500449 }
450
Jens Axboe76b61eb2019-04-17 09:25:32 -0600451 io_uring_cqe_seen(ring, cqe);
452 return ret;
Jeff Moyer765ba232019-03-04 17:35:49 -0500453}
454
455int
456test_poll_ringfd(void)
457{
458 int status = 0;
459 int ret;
460 int fd;
461 struct io_uring ring;
462
463 ret = io_uring_queue_init(1, &ring, 0);
464 if (ret) {
465 perror("io_uring_queue_init");
466 return 1;
467 }
468 fd = ring.ring_fd;
469
470 /* try polling the ring fd */
471 status = ioring_poll(&ring, fd, 0);
472
473 /*
474 * now register the ring fd, and try the poll again. This should
475 * fail, because the kernel does not allow registering of the
476 * ring_fd.
477 */
478 status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, EBADF);
479
480 /* tear down queue */
481 io_uring_queue_exit(&ring);
482
483 return status;
484}
485
486int
487main(int argc, char **argv)
488{
489 int fd, ret;
490 unsigned int status = 0;
491 struct io_uring_params p;
492 struct rlimit rlim;
493
Jens Axboea2141fc2020-05-19 17:36:19 -0600494 if (argc > 1)
495 return 0;
496
Jeff Moyer765ba232019-03-04 17:35:49 -0500497 /* setup globals */
498 pagesize = getpagesize();
499 ret = getrlimit(RLIMIT_MEMLOCK, &rlim);
500 if (ret < 0) {
501 perror("getrlimit");
502 return 1;
503 }
504 mlock_limit = rlim.rlim_cur;
505 printf("RELIMIT_MEMLOCK: %lu (%lu)\n", rlim.rlim_cur, rlim.rlim_max);
506 devnull = open("/dev/null", O_RDWR);
507 if (devnull < 0) {
508 perror("open /dev/null");
509 exit(1);
510 }
511
512 /* invalid fd */
513 status |= expect_fail(-1, 0, NULL, 0, EBADF);
514 /* valid fd that is not an io_uring fd */
515 status |= expect_fail(devnull, 0, NULL, 0, EOPNOTSUPP);
516
517 /* invalid opcode */
518 memset(&p, 0, sizeof(p));
519 fd = new_io_uring(1, &p);
520 ret = expect_fail(fd, ~0U, NULL, 0, EINVAL);
521 if (ret) {
522 /* if this succeeds, tear down the io_uring instance
523 * and start clean for the next test. */
524 close(fd);
525 fd = new_io_uring(1, &p);
526 }
527
528 /* IORING_REGISTER_BUFFERS */
529 status |= test_iovec_size(fd);
530 status |= test_iovec_nr(fd);
531 /* IORING_REGISTER_FILES */
532 status |= test_max_fds(fd);
533 close(fd);
534 /* uring poll on the uring fd */
535 status |= test_poll_ringfd();
536
537 if (!status)
538 printf("PASS\n");
539 else
540 printf("FAIL\n");
541
542 return status;
543}