Merge branch 'master' of https://github.com/romange/liburing

* 'master' of https://github.com/romange/liburing:
  Add a test demonstrating connect timeout functionality
diff --git a/.gitignore b/.gitignore
index 430e3a3..360064a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -84,7 +84,9 @@
 /test/probe
 /test/read-write
 /test/register-restrictions
+/test/rename
 /test/ring-leak
+/test/self
 /test/send_recv
 /test/send_recvmsg
 /test/shared-wq
@@ -99,13 +101,16 @@
 /test/sq-poll-dup
 /test/sq-poll-kthread
 /test/sq-poll-share
+/test/sqpoll-sleep
 /test/sq-space_left
 /test/statx
 /test/stdout
 /test/submit-reuse
 /test/teardowns
 /test/timeout
+/test/timeout-new
 /test/timeout-overflow
+/test/unlink
 /test/wakeup-hang
 /test/*.dmesg
 
diff --git a/man/io_uring_enter.2 b/man/io_uring_enter.2
index ae7e687..a1e486a 100644
--- a/man/io_uring_enter.2
+++ b/man/io_uring_enter.2
@@ -325,7 +325,9 @@
 
 .TP
 .B IORING_OP_TIMEOUT_REMOVE
-Attempt to remove an existing timeout operation.
+If
+.I timeout_flags are zero, then it attempts to remove an existing timeout
+operation.
 .I addr
 must contain the
 .I user_data
@@ -341,6 +343,19 @@
 .I -ENOENT
 Available since 5.5.
 
+If
+.I timeout_flags
+contain
+.I IORING_TIMEOUT_UPDATE,
+instead of removing an existing operation it updates it.
+.I addr
+and return values are same as before.
+.I addr2
+field must contain a pointer to a struct timespec64 structure.
+.I timeout_flags
+may also contain IORING_TIMEOUT_ABS.
+Available since 5.11.
+
 .TP
 .B IORING_OP_ACCEPT
 Issue the equivalent of an
diff --git a/man/io_uring_setup.2 b/man/io_uring_setup.2
index a903b04..7a66c65 100644
--- a/man/io_uring_setup.2
+++ b/man/io_uring_setup.2
@@ -126,8 +126,8 @@
 The presence of this feature can be detected by the
 .B IORING_FEAT_SQPOLL_NONFIXED
 feature flag.
-In version 5.10 and later, it is no longer necessary to register files to use
-this feature. 5.10 also allows using this as non-root, if the user has the
+In version 5.11 and later, it is no longer necessary to register files to use
+this feature. 5.11 also allows using this as non-root, if the user has the
 .B CAP_SYS_NICE
 capability.
 .TP
diff --git a/src/include/liburing.h b/src/include/liburing.h
index 4d61a39..ebfc424 100644
--- a/src/include/liburing.h
+++ b/src/include/liburing.h
@@ -62,7 +62,8 @@
 	unsigned flags;
 	int ring_fd;
 
-	unsigned pad[4];
+	unsigned features;
+	unsigned pad[3];
 };
 
 /*
@@ -325,6 +326,15 @@
 	sqe->timeout_flags = flags;
 }
 
+static inline void io_uring_prep_timeout_update(struct io_uring_sqe *sqe,
+						struct __kernel_timespec *ts,
+						__u64 user_data, unsigned flags)
+{
+	io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1,
+				(void *)(unsigned long)user_data, 0, (__u64)ts);
+	sqe->timeout_flags = flags | IORING_TIMEOUT_UPDATE;
+}
+
 static inline void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
 					struct sockaddr *addr,
 					socklen_t *addrlen, int flags)
diff --git a/src/include/liburing/io_uring.h b/src/include/liburing/io_uring.h
index 9685f65..0bb55b0 100644
--- a/src/include/liburing/io_uring.h
+++ b/src/include/liburing/io_uring.h
@@ -155,6 +155,7 @@
  * sqe->timeout_flags
  */
 #define IORING_TIMEOUT_ABS	(1U << 0)
+#define IORING_TIMEOUT_UPDATE	(1U << 1)
 
 /*
  * sqe->splice_flags
@@ -235,6 +236,7 @@
 #define IORING_ENTER_GETEVENTS	(1U << 0)
 #define IORING_ENTER_SQ_WAKEUP	(1U << 1)
 #define IORING_ENTER_SQ_WAIT	(1U << 2)
+#define IORING_ENTER_EXT_ARG	(1U << 3)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -263,6 +265,7 @@
 #define IORING_FEAT_FAST_POLL		(1U << 5)
 #define IORING_FEAT_POLL_32BITS 	(1U << 6)
 #define IORING_FEAT_SQPOLL_NONFIXED	(1U << 7)
+#define IORING_FEAT_EXT_ARG		(1U << 8)
 
 /*
  * io_uring_register(2) opcodes and arguments
@@ -306,7 +309,7 @@
 	__u8 ops_len;	/* length of ops[] array below */
 	__u16 resv;
 	__u32 resv2[3];
-	struct io_uring_probe_op ops[0];
+	struct io_uring_probe_op ops[];
 };
 
 struct io_uring_restriction {
@@ -339,6 +342,12 @@
 	IORING_RESTRICTION_LAST
 };
 
+struct io_uring_getevents_arg {
+	__u64	sigmask;
+	__u32	sigmask_sz;
+	__u32	pad;
+	__u64	ts;
+};
 
 #ifdef __cplusplus
 }
diff --git a/src/queue.c b/src/queue.c
index 053d430..df388f6 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -39,83 +39,117 @@
 }
 
 static int __io_uring_peek_cqe(struct io_uring *ring,
-			       struct io_uring_cqe **cqe_ptr)
+			       struct io_uring_cqe **cqe_ptr,
+			       unsigned *nr_available)
 {
 	struct io_uring_cqe *cqe;
-	unsigned head;
 	int err = 0;
+	unsigned available;
+	unsigned mask = *ring->cq.kring_mask;
 
 	do {
-		io_uring_for_each_cqe(ring, head, cqe)
+		unsigned tail = io_uring_smp_load_acquire(ring->cq.ktail);
+		unsigned head = *ring->cq.khead;
+
+		cqe = NULL;
+		available = tail - head;
+		if (!available)
 			break;
-		if (cqe) {
-			if (cqe->user_data == LIBURING_UDATA_TIMEOUT) {
-				if (cqe->res < 0)
-					err = cqe->res;
-				io_uring_cq_advance(ring, 1);
-				if (!err)
-					continue;
-				cqe = NULL;
-			}
+
+		cqe = &ring->cq.cqes[head & mask];
+		if (cqe->user_data == LIBURING_UDATA_TIMEOUT) {
+			if (cqe->res < 0)
+				err = cqe->res;
+			io_uring_cq_advance(ring, 1);
+			if (!err)
+				continue;
+			cqe = NULL;
 		}
+
 		break;
 	} while (1);
 
 	*cqe_ptr = cqe;
+	*nr_available = available;
+	return err;
+}
+
+struct get_data {
+	unsigned submit;
+	unsigned wait_nr;
+	unsigned get_flags;
+	int sz;
+	void *arg;
+};
+
+static int _io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
+			     struct get_data *data)
+{
+	struct io_uring_cqe *cqe = NULL;
+	const int to_wait = data->wait_nr;
+	int ret = 0, err;
+
+	do {
+		bool cq_overflow_flush = false;
+		unsigned flags = 0;
+		unsigned nr_available;
+
+		err = __io_uring_peek_cqe(ring, &cqe, &nr_available);
+		if (err)
+			break;
+		if (!cqe && !to_wait && !data->submit) {
+			if (!cq_ring_needs_flush(ring)) {
+				err = -EAGAIN;
+				break;
+			}
+			cq_overflow_flush = true;
+		}
+		if (data->wait_nr && cqe)
+			data->wait_nr--;
+		if (data->wait_nr || cq_overflow_flush)
+			flags = IORING_ENTER_GETEVENTS | data->get_flags;
+		if (data->submit)
+			sq_ring_needs_enter(ring, &flags);
+		if (data->wait_nr > nr_available || data->submit ||
+		    cq_overflow_flush)
+			ret = __sys_io_uring_enter2(ring->ring_fd, data->submit,
+					data->wait_nr, flags, data->arg,
+					data->sz);
+		if (ret < 0) {
+			err = -errno;
+		} else if (ret == (int)data->submit) {
+			data->submit = 0;
+			/*
+			 * When SETUP_IOPOLL is set, __sys_io_uring enter()
+			 * must be called to reap new completions but the call
+			 * won't be made if both wait_nr and submit are zero
+			 * so preserve wait_nr.
+			 */
+			if (!(ring->flags & IORING_SETUP_IOPOLL))
+				data->wait_nr = 0;
+		} else {
+			data->submit -= ret;
+		}
+		if (cqe)
+			break;
+	} while (!err);
+
+	*cqe_ptr = cqe;
 	return err;
 }
 
 int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
 		       unsigned submit, unsigned wait_nr, sigset_t *sigmask)
 {
-	struct io_uring_cqe *cqe = NULL;
-	const int to_wait = wait_nr;
-	int ret = 0, err;
+	struct get_data data = {
+		.submit		= submit,
+		.wait_nr 	= wait_nr,
+		.get_flags	= 0,
+		.sz		= _NSIG / 8,
+		.arg		= sigmask,
+	};
 
-	do {
-		bool cq_overflow_flush = false;
-		unsigned flags = 0;
-
-		err = __io_uring_peek_cqe(ring, &cqe);
-		if (err)
-			break;
-		if (!cqe && !to_wait && !submit) {
-			if (!cq_ring_needs_flush(ring)) {
-				err = -EAGAIN;
-				break;
-			}
-			cq_overflow_flush = true;
-		}
-		if (wait_nr && cqe)
-			wait_nr--;
-		if (wait_nr || cq_overflow_flush)
-			flags = IORING_ENTER_GETEVENTS;
-		if (submit)
-			sq_ring_needs_enter(ring, &flags);
-		if (wait_nr || submit || cq_overflow_flush)
-			ret = __sys_io_uring_enter(ring->ring_fd, submit,
-						   wait_nr, flags, sigmask);
-		if (ret < 0) {
-			err = -errno;
-		} else if (ret == (int)submit) {
-			submit = 0;
-			/*
-			 * When SETUP_IOPOLL is set, __sys_io_uring enter()
-			 * must be called to reap new completions but the call
-			 * won't be made if both wait_nr and submit are zero
-			 * so preserve wait_nr.
-			 */
-			if (!(ring->flags & IORING_SETUP_IOPOLL))
-				wait_nr = 0;
-		} else {
-			submit -= ret;
-		}
-		if (cqe)
-			break;
-	} while (!err);
-
-	*cqe_ptr = cqe;
-	return err;
+	return _io_uring_get_cqe(ring, cqe_ptr, &data);
 }
 
 /*
@@ -194,6 +228,31 @@
 }
 
 /*
+ * If we have kernel support for IORING_ENTER_EXT_ARG, then we can use that
+ * more efficiently than queueing an internal timeout command.
+ */
+static int io_uring_wait_cqes_new(struct io_uring *ring,
+				  struct io_uring_cqe **cqe_ptr,
+				  unsigned wait_nr, struct __kernel_timespec *ts,
+				  sigset_t *sigmask)
+{
+	struct io_uring_getevents_arg arg = {
+		.sigmask	= (unsigned long) sigmask,
+		.sigmask_sz	= _NSIG / 8,
+		.ts		= (unsigned long) ts
+	};
+	struct get_data data = {
+		.submit		= __io_uring_flush_sq(ring),
+		.wait_nr	= wait_nr,
+		.get_flags	= IORING_ENTER_EXT_ARG,
+		.sz		= sizeof(arg),
+		.arg		= &arg
+	};
+
+	return _io_uring_get_cqe(ring, cqe_ptr, &data);
+}
+
+/*
  * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note
  * that an sqe is used internally to handle the timeout. Applications using
  * this function must never set sqe->user_data to LIBURING_UDATA_TIMEOUT!
@@ -214,6 +273,10 @@
 		struct io_uring_sqe *sqe;
 		int ret;
 
+		if (ring->features & IORING_FEAT_EXT_ARG)
+			return io_uring_wait_cqes_new(ring, cqe_ptr, wait_nr,
+							ts, sigmask);
+
 		/*
 		 * If the SQ ring is full, we may need to submit IO first
 		 */
diff --git a/src/setup.c b/src/setup.c
index dded481..062eaa0 100644
--- a/src/setup.c
+++ b/src/setup.c
@@ -145,10 +145,13 @@
 		return -errno;
 
 	ret = io_uring_queue_mmap(fd, p, ring);
-	if (ret)
+	if (ret) {
 		close(fd);
+		return ret;
+	}
 
-	return ret;
+	ring->features = p->features;
+	return 0;
 }
 
 /*
diff --git a/src/syscall.c b/src/syscall.c
index 49b4ec2..2fd3dd4 100644
--- a/src/syscall.c
+++ b/src/syscall.c
@@ -48,9 +48,16 @@
 	return syscall(__NR_io_uring_setup, entries, p);
 }
 
+int __sys_io_uring_enter2(int fd, unsigned to_submit, unsigned min_complete,
+			 unsigned flags, sigset_t *sig, int sz)
+{
+	return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
+			flags, sig, sz);
+}
+
 int __sys_io_uring_enter(int fd, unsigned to_submit, unsigned min_complete,
 			 unsigned flags, sigset_t *sig)
 {
-	return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
-			flags, sig, _NSIG / 8);
+	return __sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig,
+					_NSIG / 8);
 }
diff --git a/src/syscall.h b/src/syscall.h
index 0b00f70..3b94efc 100644
--- a/src/syscall.h
+++ b/src/syscall.h
@@ -12,6 +12,8 @@
 extern int __sys_io_uring_setup(unsigned entries, struct io_uring_params *p);
 extern int __sys_io_uring_enter(int fd, unsigned to_submit,
 	unsigned min_complete, unsigned flags, sigset_t *sig);
+extern int __sys_io_uring_enter2(int fd, unsigned to_submit,
+	unsigned min_complete, unsigned flags, sigset_t *sig, int sz);
 extern int __sys_io_uring_register(int fd, unsigned int opcode, const void *arg,
 	unsigned int nr_args);
 
diff --git a/test/35fa71a030ca-test.c b/test/35fa71a030ca-test.c
index 4ecf211..7f2124b 100644
--- a/test/35fa71a030ca-test.c
+++ b/test/35fa71a030ca-test.c
@@ -24,6 +24,10 @@
 
 #include <linux/futex.h>
 
+#if !defined(SYS_futex) && defined(SYS_futex_time64)
+# define SYS_futex SYS_futex_time64
+#endif
+
 static void sleep_ms(uint64_t ms)
 {
   usleep(ms * 1000);
diff --git a/test/Makefile b/test/Makefile
index 1c0ffc6..6aa1788 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -84,6 +84,7 @@
 	register-restrictions \
 	rename \
 	ring-leak \
+	self \
 	send_recv \
 	send_recvmsg \
 	shared-wq \
@@ -96,11 +97,13 @@
 	sq-poll-dup \
 	sq-poll-kthread \
 	sq-poll-share \
+	sqpoll-sleep \
 	sq-space_left \
 	stdout \
 	submit-reuse \
 	teardowns \
 	timeout \
+	timeout-new \
 	timeout-overflow \
 	unlink \
 	wakeup-hang \
@@ -203,6 +206,7 @@
 	register-restrictions.c \
 	rename.c \
 	ring-leak.c \
+	self.c \
 	send_recvmsg.c \
 	shared-wq.c \
 	short-read.c \
@@ -216,11 +220,13 @@
 	sq-poll-dup.c \
 	sq-poll-kthread.c \
 	sq-poll-share.c \
+	sqpoll-sleep.c \
 	sq-space_left.c \
 	statx.c \
 	stdout.c \
 	submit-reuse.c \
 	teardowns.c \
+	timeout-new.c \
 	timeout-overflow.c \
 	timeout.c \
 	unlink.c \
@@ -241,6 +247,7 @@
 ce593a6c480a-test: XCFLAGS = -lpthread
 wakeup-hang: XCFLAGS = -lpthread
 pipe-eof: XCFLAGS = -lpthread
+timeout-new: XCFLAGS = -lpthread
 
 install: $(test_targets) runtests.sh runtests-loop.sh
 	$(INSTALL) -D -d -m 755 $(datadir)/liburing-test/
diff --git a/test/accept-link.c b/test/accept-link.c
index 34b6153..605e0ec 100644
--- a/test/accept-link.c
+++ b/test/accept-link.c
@@ -156,8 +156,8 @@
 				data->stop = 1;
 				goto out;
 			}
-			fprintf(stderr, "cqe %llu got %d, wanted %d\n",
-					cqe->user_data, cqe->res,
+			fprintf(stderr, "cqe %" PRIu64 " got %d, wanted %d\n",
+					(uint64_t) cqe->user_data, cqe->res,
 					data->expected[idx]);
 			goto err;
 		}
diff --git a/test/cq-peek-batch.c b/test/cq-peek-batch.c
index ee7537c..6c47bec 100644
--- a/test/cq-peek-batch.c
+++ b/test/cq-peek-batch.c
@@ -74,7 +74,8 @@
 	CHECK_BATCH(&ring, got, cqes, 4, 4);
 	for (i=0;i<4;i++) {
 		if (i != cqes[i]->user_data) {
-			printf("Got user_data %lld, expected %d\n", cqes[i]->user_data, i);
+			printf("Got user_data %" PRIu64 ", expected %d\n",
+				(uint64_t) cqes[i]->user_data, i);
 			goto err;
 		}
 	}
@@ -86,7 +87,8 @@
 	CHECK_BATCH(&ring, got, cqes, 4, 4);
 	for (i=0;i<4;i++) {
 		if (i + 4 != cqes[i]->user_data) {
-			printf("Got user_data %lld, expected %d\n", cqes[i]->user_data, i + 4);
+			printf("Got user_data %" PRIu64 ", expected %d\n",
+				(uint64_t) cqes[i]->user_data, i + 4);
 			goto err;
 		}
 	}
diff --git a/test/fallocate.c b/test/fallocate.c
index e662a6a..da90be8 100644
--- a/test/fallocate.c
+++ b/test/fallocate.c
@@ -191,8 +191,8 @@
 			goto err;
 		}
 		if (cqe->res) {
-			fprintf(stderr, "cqe->res=%d,data=%llu\n", cqe->res,
-							cqe->user_data);
+			fprintf(stderr, "cqe->res=%d,data=%" PRIu64 "\n", cqe->res,
+							(uint64_t) cqe->user_data);
 			goto err;
 		}
 		io_uring_cqe_seen(ring, cqe);
diff --git a/test/link-timeout.c b/test/link-timeout.c
index c9aff11..a517c5e 100644
--- a/test/link-timeout.c
+++ b/test/link-timeout.c
@@ -592,7 +592,7 @@
 		switch (cqe->user_data) {
 		case 1:
 			if (cqe->res != -EINTR && cqe->res != -ECANCELED) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
@@ -600,14 +600,14 @@
 		case 2:
 			/* FASTPOLL kernels can cancel successfully */
 			if (cqe->res != -EALREADY && cqe->res != -ETIME) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
 			break;
 		case 3:
 			if (cqe->res != -ECANCELED) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
@@ -687,14 +687,14 @@
 		/* poll cancel really should return -ECANCEL... */
 		case 1:
 			if (cqe->res != -ECANCELED) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
 			break;
 		case 2:
 			if (cqe->res != -ETIME) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
@@ -702,7 +702,7 @@
 		case 3:
 		case 4:
 			if (cqe->res != -ECANCELED) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
@@ -805,7 +805,7 @@
 		switch (cqe->user_data) {
 		case 2:
 			if (cqe->res != -ETIME) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
@@ -815,14 +815,14 @@
 		case 4:
 		case 5:
 			if (cqe->res != -ECANCELED) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
 			break;
 		case 6:
 			if (cqe->res) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
@@ -892,21 +892,21 @@
 		/* poll cancel really should return -ECANCEL... */
 		case 1:
 			if (cqe->res) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
 			break;
 		case 2:
 			if (cqe->res != -ECANCELED) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
 			break;
 		case 3:
 			if (cqe->res != -ETIME) {
-				fprintf(stderr, "Req %llu got %d\n", cqe->user_data,
+				fprintf(stderr, "Req %" PRIu64 " got %d\n", (uint64_t) cqe->user_data,
 						cqe->res);
 				goto err;
 			}
diff --git a/test/poll-link.c b/test/poll-link.c
index b6d4300..4b4f9aa 100644
--- a/test/poll-link.c
+++ b/test/poll-link.c
@@ -142,13 +142,13 @@
 		}
 		idx = cqe->user_data - 1;
 		if (data->is_mask[idx] && !(data->expected[idx] & cqe->res)) {
-			fprintf(stderr, "cqe %llu got %x, wanted mask %x\n",
-					cqe->user_data, cqe->res,
+			fprintf(stderr, "cqe %" PRIu64 " got %x, wanted mask %x\n",
+					(uint64_t) cqe->user_data, cqe->res,
 					data->expected[idx]);
 			goto err;
 		} else if (!data->is_mask[idx] && cqe->res != data->expected[idx]) {
-			fprintf(stderr, "cqe %llu got %d, wanted %d\n",
-					cqe->user_data, cqe->res,
+			fprintf(stderr, "cqe %" PRIu64 " got %d, wanted %d\n",
+					(uint64_t) cqe->user_data, cqe->res,
 					data->expected[idx]);
 			goto err;
 		}
diff --git a/test/read-write.c b/test/read-write.c
index 3bea26f..7f33ad4 100644
--- a/test/read-write.c
+++ b/test/read-write.c
@@ -37,6 +37,23 @@
 	return 0;
 }
 
+static int create_nonaligned_buffers(void)
+{
+	int i;
+
+	vecs = malloc(BUFFERS * sizeof(struct iovec));
+	for (i = 0; i < BUFFERS; i++) {
+		char *p = malloc(3 * BS);
+
+		if (!p)
+			return 1;
+		vecs[i].iov_base = p + (rand() % BS);
+		vecs[i].iov_len = 1 + (rand() % BS);
+	}
+
+	return 0;
+}
+
 static int create_file(const char *file)
 {
 	ssize_t ret;
@@ -56,8 +73,8 @@
 	return ret != FILE_SIZE;
 }
 
-static int __test_io(const char *file, struct io_uring *ring, int write, int buffered,
-		     int sqthread, int fixed, int mixed_fixed, int nonvec,
+static int __test_io(const char *file, struct io_uring *ring, int write,
+		     int buffered, int sqthread, int fixed, int nonvec,
 		     int buf_select, int seq, int exp_len)
 {
 	struct io_uring_sqe *sqe;
@@ -67,10 +84,9 @@
 	off_t offset;
 
 #ifdef VERBOSE
-	fprintf(stdout, "%s: start %d/%d/%d/%d/%d/%d: ", __FUNCTION__, write,
+	fprintf(stdout, "%s: start %d/%d/%d/%d/%d: ", __FUNCTION__, write,
 							buffered, sqthread,
-							fixed, mixed_fixed,
-							nonvec);
+							fixed, nonvec);
 #endif
 	if (sqthread && geteuid()) {
 #ifdef VERBOSE
@@ -156,6 +172,7 @@
 			}
 
 		}
+		sqe->user_data = i;
 		if (sqthread)
 			sqe->flags |= IOSQE_FIXED_FILE;
 		if (buf_select) {
@@ -163,7 +180,6 @@
 				sqe->addr = 0;
 			sqe->flags |= IOSQE_BUFFER_SELECT;
 			sqe->buf_group = buf_select;
-			sqe->user_data = i;
 		}
 		if (seq)
 			offset += BS;
@@ -188,6 +204,14 @@
 				warned = 1;
 				no_read = 1;
 			}
+		} else if (exp_len == -1) {
+			int iov_len = vecs[cqe->user_data].iov_len;
+
+			if (cqe->res != iov_len) {
+				fprintf(stderr, "cqe res %d, wanted %d\n",
+					cqe->res, iov_len);
+				goto err;
+			}
 		} else if (cqe->res != exp_len) {
 			fprintf(stderr, "cqe res %d, wanted %d\n", cqe->res, exp_len);
 			goto err;
@@ -239,7 +263,7 @@
 	return 1;
 }
 static int test_io(const char *file, int write, int buffered, int sqthread,
-		   int fixed, int mixed_fixed, int nonvec)
+		   int fixed, int nonvec, int exp_len)
 {
 	struct io_uring ring;
 	int ret, ring_flags;
@@ -263,8 +287,8 @@
 		return 1;
 	}
 
-	ret = __test_io(file, &ring, write, buffered, sqthread, fixed,
-			mixed_fixed, nonvec, 0, 0, BS);
+	ret = __test_io(file, &ring, write, buffered, sqthread, fixed, nonvec,
+			0, 0, exp_len);
 
 	io_uring_queue_exit(&ring);
 	return ret;
@@ -442,16 +466,48 @@
 		io_uring_cqe_seen(&ring, cqe);
 	}
 
-	ret = __test_io(filename, &ring, 0, 0, 0, 0, 0, nonvec, 1, 1, exp_len);
+	ret = __test_io(filename, &ring, 0, 0, 0, 0, nonvec, 1, 1, exp_len);
 
 	io_uring_queue_exit(&ring);
 	return ret;
 }
 
-static int test_buf_select(const char *filename, int nonvec)
+static int provide_buffers_iovec(struct io_uring *ring, int bgid)
 {
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
+	int i, ret;
+
+	for (i = 0; i < BUFFERS; i++) {
+		sqe = io_uring_get_sqe(ring);
+		io_uring_prep_provide_buffers(sqe, vecs[i].iov_base,
+						vecs[i].iov_len, 1, bgid, i);
+	}
+
+	ret = io_uring_submit(ring);
+	if (ret != BUFFERS) {
+		fprintf(stderr, "submit: %d\n", ret);
+		return -1;
+	}
+
+	for (i = 0; i < BUFFERS; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret) {
+			fprintf(stderr, "wait_cqe=%d\n", ret);
+			return 1;
+		}
+		if (cqe->res < 0) {
+			fprintf(stderr, "cqe->res=%d\n", cqe->res);
+			return 1;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	return 0;
+}
+
+static int test_buf_select(const char *filename, int nonvec)
+{
 	struct io_uring_probe *p;
 	struct io_uring ring;
 	int ret, i;
@@ -476,7 +532,7 @@
 	for (i = 0; i < BUFFERS; i++)
 		memset(vecs[i].iov_base, i, vecs[i].iov_len);
 
-	ret = __test_io(filename, &ring, 1, 0, 0, 0, 0, 0, 0, 1, BS);
+	ret = __test_io(filename, &ring, 1, 0, 0, 0, 0, 0, 1, BS);
 	if (ret) {
 		fprintf(stderr, "failed writing data\n");
 		return 1;
@@ -485,29 +541,67 @@
 	for (i = 0; i < BUFFERS; i++)
 		memset(vecs[i].iov_base, 0x55, vecs[i].iov_len);
 
-	for (i = 0; i < BUFFERS; i++) {
+	ret = provide_buffers_iovec(&ring, 1);
+	if (ret)
+		return ret;
+
+	ret = __test_io(filename, &ring, 0, 0, 0, 0, nonvec, 1, 1, BS);
+	io_uring_queue_exit(&ring);
+	return ret;
+}
+
+static int test_rem_buf(int batch, int sqe_flags)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct io_uring ring;
+	int left, ret, nr = 0;
+	int bgid = 1;
+
+	if (no_buf_select)
+		return 0;
+
+	ret = io_uring_queue_init(64, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring create failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = provide_buffers_iovec(&ring, bgid);
+	if (ret)
+		return ret;
+
+	left = BUFFERS;
+	while (left) {
+		int to_rem = (left < batch) ? left : batch;
+
+		left -= to_rem;
 		sqe = io_uring_get_sqe(&ring);
-		io_uring_prep_provide_buffers(sqe, vecs[i].iov_base,
-						vecs[i].iov_len, 1, 1, i);
+		io_uring_prep_remove_buffers(sqe, to_rem, bgid);
+		sqe->user_data = to_rem;
+		sqe->flags |= sqe_flags;
+		++nr;
 	}
 
 	ret = io_uring_submit(&ring);
-	if (ret != BUFFERS) {
+	if (ret != nr) {
 		fprintf(stderr, "submit: %d\n", ret);
 		return -1;
 	}
 
-	for (i = 0; i < BUFFERS; i++) {
+	for (; nr > 0; nr--) {
 		ret = io_uring_wait_cqe(&ring, &cqe);
-		if (cqe->res < 0) {
+		if (ret) {
+			fprintf(stderr, "wait_cqe=%d\n", ret);
+			return 1;
+		}
+		if (cqe->res != cqe->user_data) {
 			fprintf(stderr, "cqe->res=%d\n", cqe->res);
 			return 1;
 		}
 		io_uring_cqe_seen(&ring, cqe);
 	}
 
-	ret = __test_io(filename, &ring, 0, 0, 0, 0, 0, nonvec, 1, 1, BS);
-
 	io_uring_queue_exit(&ring);
 	return ret;
 }
@@ -594,14 +688,15 @@
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
 	struct io_uring ring;
-	struct rlimit rlim;
+	struct rlimit rlim, old_rlim;
 	int i, fd, ret;
 	loff_t off;
 
-	if (getrlimit(RLIMIT_FSIZE, &rlim) < 0) {
+	if (getrlimit(RLIMIT_FSIZE, &old_rlim) < 0) {
 		perror("getrlimit");
 		return 1;
 	}
+	rlim = old_rlim;
 	rlim.rlim_cur = 64 * 1024;
 	rlim.rlim_max = 64 * 1024;
 	if (setrlimit(RLIMIT_FSIZE, &rlim) < 0) {
@@ -661,6 +756,11 @@
 	io_uring_queue_exit(&ring);
 	close(fd);
 	unlink(".efbig");
+
+	if (setrlimit(RLIMIT_FSIZE, &old_rlim) < 0) {
+		perror("setrlimit");
+		return 1;
+	}
 	return 0;
 err:
 	if (fd != -1)
@@ -690,24 +790,20 @@
 	}
 
 	/* if we don't have nonvec read, skip testing that */
-	if (has_nonvec_read())
-		nr = 64;
-	else
-		nr = 32;
+	nr = has_nonvec_read() ? 32 : 16;
 
 	for (i = 0; i < nr; i++) {
-		int v1, v2, v3, v4, v5, v6;
+		int write = (i & 1) != 0;
+		int buffered = (i & 2) != 0;
+		int sqthread = (i & 4) != 0;
+		int fixed = (i & 8) != 0;
+		int nonvec = (i & 16) != 0;
 
-		v1 = (i & 1) != 0;
-		v2 = (i & 2) != 0;
-		v3 = (i & 4) != 0;
-		v4 = (i & 8) != 0;
-		v5 = (i & 16) != 0;
-		v6 = (i & 32) != 0;
-		ret = test_io(fname, v1, v2, v3, v4, v5, v6);
+		ret = test_io(fname, write, buffered, sqthread, fixed, nonvec,
+			      BS);
 		if (ret) {
-			fprintf(stderr, "test_io failed %d/%d/%d/%d/%d/%d\n",
-					v1, v2, v3, v4, v5, v6);
+			fprintf(stderr, "test_io failed %d/%d/%d/%d/%d\n",
+				write, buffered, sqthread, fixed, nonvec);
 			goto err;
 		}
 	}
@@ -760,6 +856,57 @@
 		goto err;
 	}
 
+	ret = test_rem_buf(1, 0);
+	if (ret) {
+		fprintf(stderr, "test_rem_buf by 1 failed\n");
+		goto err;
+	}
+
+	ret = test_rem_buf(10, 0);
+	if (ret) {
+		fprintf(stderr, "test_rem_buf by 10 failed\n");
+		goto err;
+	}
+
+	ret = test_rem_buf(2, IOSQE_IO_LINK);
+	if (ret) {
+		fprintf(stderr, "test_rem_buf link failed\n");
+		goto err;
+	}
+
+	ret = test_rem_buf(2, IOSQE_ASYNC);
+	if (ret) {
+		fprintf(stderr, "test_rem_buf async failed\n");
+		goto err;
+	}
+
+	srand((unsigned)time(NULL));
+	if (create_nonaligned_buffers()) {
+		fprintf(stderr, "file creation failed\n");
+		goto err;
+	}
+
+	/* test fixed bufs with non-aligned len/offset */
+	for (i = 0; i < nr; i++) {
+		int write = (i & 1) != 0;
+		int buffered = (i & 2) != 0;
+		int sqthread = (i & 4) != 0;
+		int fixed = (i & 8) != 0;
+		int nonvec = (i & 16) != 0;
+
+		/* direct IO requires alignment, skip it */
+		if (!buffered || !fixed || nonvec)
+			continue;
+
+		ret = test_io(fname, write, buffered, sqthread, fixed, nonvec,
+			      -1);
+		if (ret) {
+			fprintf(stderr, "test_io failed %d/%d/%d/%d/%d\n",
+				write, buffered, sqthread, fixed, nonvec);
+			goto err;
+		}
+	}
+
 	if (fname != argv[1])
 		unlink(fname);
 	return 0;
diff --git a/test/register-restrictions.c b/test/register-restrictions.c
index 4f64c41..04a0ed9 100644
--- a/test/register-restrictions.c
+++ b/test/register-restrictions.c
@@ -406,8 +406,8 @@
 		case 2: /* writev - flags = IOSQE_FIXED_FILE | IOSQE_ASYNC */
 		case 3: /* writev - flags = IOSQE_FIXED_FILE | IOSQE_IO_LINK */
 			if (cqe->res != sizeof(ptr)) {
-				fprintf(stderr, "write res: %d user_data %lld \n",
-					cqe->res, cqe->user_data);
+				fprintf(stderr, "write res: %d user_data %" PRIu64 "\n",
+					cqe->res, (uint64_t) cqe->user_data);
 				return TEST_FAILED;
 			}
 
@@ -417,8 +417,8 @@
 		case 6: /* writev - flags = IOSQE_ASYNC */
 		case 7: /* writev - flags = 0 */
 			if (cqe->res != -EACCES) {
-				fprintf(stderr, "write res: %d user_data %lld \n",
-					cqe->res, cqe->user_data);
+				fprintf(stderr, "write res: %d user_data %" PRIu64 "\n",
+					cqe->res, (uint64_t) cqe->user_data);
 				return TEST_FAILED;
 			}
 			break;
diff --git a/test/rename.c b/test/rename.c
index eea341f..af09d65 100644
--- a/test/rename.c
+++ b/test/rename.c
@@ -12,8 +12,6 @@
 
 #include "liburing.h"
 
-static int no_rename;
-
 static int test_rename(struct io_uring *ring, const char *old, const char *new)
 {
 	struct io_uring_cqe *cqe;
@@ -44,19 +42,9 @@
 		fprintf(stderr, "wait completion %d\n", ret);
 		goto err;
 	}
-	if (cqe->res < 0) {
-		if (cqe->res == -EBADF || cqe->res == -EINVAL) {
-			fprintf(stdout, "Rename not supported, skipping\n");
-			no_rename = 1;
-			goto out;
-		}
-		fprintf(stderr, "rename: %s\n", strerror(-cqe->res));
-		goto err;
-	}
-
-out:
+	ret = cqe->res;
 	io_uring_cqe_seen(ring, cqe);
-	return 0;
+	return ret;
 err:
 	return 1;
 }
@@ -111,14 +99,15 @@
 	}
 
 	ret = test_rename(&ring, src, dst);
-	if (ret) {
-		fprintf(stderr, "test_rename failed\n");
-		return ret;
-	}
-	if (no_rename) {
-		unlink(src);
-		goto out;
-	}
+	if (ret < 0) {
+		if (ret == -EBADF || ret == -EINVAL) {
+			fprintf(stdout, "Rename not supported, skipping\n");
+			goto out;
+		}
+		fprintf(stderr, "rename: %s\n", strerror(-ret));
+		goto err;
+	} else if (ret)
+		goto err;
 
 	if (stat_file(src) != ENOENT) {
 		fprintf(stderr, "stat got %s\n", strerror(ret));
@@ -130,7 +119,16 @@
 		return 1;
 	}
 
+	ret = test_rename(&ring, "/x/y/1/2", "/2/1/y/x");
+	if (ret != -ENOENT) {
+		fprintf(stderr, "test_rename invalid failed: %d\n", ret);
+		return ret;
+	}
 out:
 	unlink(dst);
 	return 0;
+err:
+	unlink(src);
+	unlink(dst);
+	return 1;
 }
diff --git a/test/self.c b/test/self.c
new file mode 100644
index 0000000..8895781
--- /dev/null
+++ b/test/self.c
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: test that pathname resolution works from async context when
+ * using /proc/self/ which should be the original submitting task, not the
+ * async worker.
+ *
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include "liburing.h"
+
+static int io_openat2(struct io_uring *ring, const char *path, int dfd)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct open_how how;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "get sqe failed\n");
+		goto err;
+	}
+	memset(&how, 0, sizeof(how));
+	how.flags = O_RDONLY;
+	io_uring_prep_openat2(sqe, dfd, path, &how);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "sqe submit failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "wait completion %d\n", ret);
+		goto err;
+	}
+	ret = cqe->res;
+	io_uring_cqe_seen(ring, cqe);
+	return ret;
+err:
+	return -1;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring;
+	char buf[64];
+	int ret;
+
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed\n");
+		return 1;
+	}
+
+	ret = io_openat2(&ring, "/proc/self/comm", -1);
+	if (ret < 0) {
+		if (ret == -EINVAL) {
+			fprintf(stdout, "openat2 not supported, skipping\n");
+			return 0;
+		}
+		fprintf(stderr, "openat2 failed: %s\n", strerror(-ret));
+		return 1;
+	}
+
+	memset(buf, 0, sizeof(buf));
+	ret = read(ret, buf, sizeof(buf));
+	if (ret < 0) {
+		perror("read");
+		return 1;
+	}
+
+	if (strncmp(buf, "self", 4)) {
+		fprintf(stderr, "got comm=<%s>, wanted <self>\n", buf);
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/test/send_recvmsg.c b/test/send_recvmsg.c
index 50c8e94..6b513bc 100644
--- a/test/send_recvmsg.c
+++ b/test/send_recvmsg.c
@@ -11,6 +11,7 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <pthread.h>
+#include <assert.h>
 
 #include "liburing.h"
 
@@ -24,7 +25,10 @@
 #define BUF_BGID	10
 #define BUF_BID		89
 
-static int recv_prep(struct io_uring *ring, struct iovec *iov, int bgid)
+#define MAX_IOV_COUNT	10
+
+static int recv_prep(struct io_uring *ring, struct iovec iov[], int iov_count,
+		     int bgid)
 {
 	struct sockaddr_in saddr;
 	struct msghdr msg;
@@ -53,11 +57,6 @@
 		goto err;
 	}
 
-	memset(&msg, 0, sizeof(msg));
-        msg.msg_namelen = sizeof(struct sockaddr_in);
-	msg.msg_iov = iov;
-	msg.msg_iovlen = 1;
-
 	sqe = io_uring_get_sqe(ring);
 	if (!sqe) {
 		fprintf(stderr, "io_uring_get_sqe failed\n");
@@ -66,11 +65,15 @@
 
 	io_uring_prep_recvmsg(sqe, sockfd, &msg, 0);
 	if (bgid) {
-		sqe->user_data = (unsigned long) iov->iov_base;
 		iov->iov_base = NULL;
 		sqe->flags |= IOSQE_BUFFER_SELECT;
 		sqe->buf_group = bgid;
+		iov_count = 1;
 	}
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_namelen = sizeof(struct sockaddr_in);
+	msg.msg_iov = iov;
+	msg.msg_iovlen = iov_count;
 
 	ret = io_uring_submit(ring);
 	if (ret <= 0) {
@@ -89,9 +92,10 @@
 	pthread_mutex_t *mutex;
 	int buf_select;
 	int no_buf_add;
+	int iov_count;
 };
 
-static int do_recvmsg(struct io_uring *ring, struct iovec *iov,
+static int do_recvmsg(struct io_uring *ring, char buf[MAX_MSG + 1],
 		      struct recv_data *rd)
 {
 	struct io_uring_cqe *cqe;
@@ -112,8 +116,6 @@
 		int bid = cqe->flags >> 16;
 		if (bid != BUF_BID)
 			fprintf(stderr, "Buffer ID mismatch %d\n", bid);
-		/* just for passing the pointer to str */
-		iov->iov_base = (void *) (uintptr_t) cqe->user_data;
 	}
 
 	if (rd->no_buf_add && rd->buf_select) {
@@ -127,7 +129,7 @@
 		goto err;
 	}
 
-	if (strcmp(str, iov->iov_base)) {
+	if (strncmp(str, buf, MAX_MSG + 1)) {
 		fprintf(stderr, "string mismatch\n");
 		goto err;
 	}
@@ -137,20 +139,34 @@
 	return 1;
 }
 
+static void init_iov(struct iovec iov[MAX_IOV_COUNT], int iov_to_use,
+		     char buf[MAX_MSG + 1])
+{
+	int i, last_idx = iov_to_use - 1;
+
+	assert(0 < iov_to_use && iov_to_use <= MAX_IOV_COUNT);
+	for (i = 0; i < last_idx; ++i) {
+		iov[i].iov_base = buf + i;
+		iov[i].iov_len = 1;
+	}
+
+	iov[last_idx].iov_base = buf + last_idx;
+	iov[last_idx].iov_len = MAX_MSG - last_idx;
+}
+
 static void *recv_fn(void *data)
 {
 	struct recv_data *rd = data;
 	pthread_mutex_t *mutex = rd->mutex;
 	char buf[MAX_MSG + 1];
-	struct iovec iov = {
-		.iov_base = buf,
-		.iov_len = sizeof(buf) - 1,
-	};
+	struct iovec iov[MAX_IOV_COUNT];
 	struct io_uring_sqe *sqe;
 	struct io_uring_cqe *cqe;
 	struct io_uring ring;
 	int ret;
 
+	init_iov(iov, rd->iov_count, buf);
+
 	ret = io_uring_queue_init(1, &ring, 0);
 	if (ret) {
 		fprintf(stderr, "queue init failed: %d\n", ret);
@@ -184,14 +200,14 @@
 		}
 	}
 
-	ret = recv_prep(&ring, &iov, rd->buf_select ? BUF_BGID : 0);
+	ret = recv_prep(&ring, iov, rd->iov_count, rd->buf_select ? BUF_BGID : 0);
 	if (ret) {
 		fprintf(stderr, "recv_prep failed: %d\n", ret);
 		goto err;
 	}
 
 	pthread_mutex_unlock(mutex);
-	ret = do_recvmsg(&ring, &iov, rd);
+	ret = do_recvmsg(&ring, buf, rd);
 
 	io_uring_queue_exit(&ring);
 
@@ -261,7 +277,7 @@
 	return 1;
 }
 
-static int test(int buf_select, int no_buf_add)
+static int test(int buf_select, int no_buf_add, int iov_count)
 {
 	struct recv_data rd;
 	pthread_mutexattr_t attr;
@@ -278,6 +294,7 @@
 	rd.mutex = &mutex;
 	rd.buf_select = buf_select;
 	rd.no_buf_add = no_buf_add;
+	rd.iov_count = iov_count;
 	ret = pthread_create(&recv_thread, NULL, recv_fn, &rd);
 	if (ret) {
 		fprintf(stderr, "Thread create failed\n");
@@ -299,19 +316,25 @@
 	if (argc > 1)
 		return 0;
 
-	ret = test(0, 0);
+	ret = test(0, 0, 1);
 	if (ret) {
 		fprintf(stderr, "send_recvmsg 0 failed\n");
 		return 1;
 	}
 
-	ret = test(1, 0);
+	ret = test(0, 0, 10);
+	if (ret) {
+		fprintf(stderr, "send_recvmsg multi iov failed\n");
+		return 1;
+	}
+
+	ret = test(1, 0, 1);
 	if (ret) {
 		fprintf(stderr, "send_recvmsg 1 0 failed\n");
 		return 1;
 	}
 
-	ret = test(1, 1);
+	ret = test(1, 1, 1);
 	if (ret) {
 		fprintf(stderr, "send_recvmsg 1 1 failed\n");
 		return 1;
diff --git a/test/sqpoll-sleep.c b/test/sqpoll-sleep.c
new file mode 100644
index 0000000..7ffd0e5
--- /dev/null
+++ b/test/sqpoll-sleep.c
@@ -0,0 +1,68 @@
+/*
+ * Test that the sqthread goes to sleep around the specified time, and that
+ * the NEED_WAKEUP flag is then set.
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include "liburing.h"
+
+static unsigned long long mtime_since(const struct timeval *s,
+				      const struct timeval *e)
+{
+	long long sec, usec;
+
+	sec = e->tv_sec - s->tv_sec;
+	usec = (e->tv_usec - s->tv_usec);
+	if (sec > 0 && usec < 0) {
+		sec--;
+		usec += 1000000;
+	}
+
+	sec *= 1000;
+	usec /= 1000;
+	return sec + usec;
+}
+
+static unsigned long long mtime_since_now(struct timeval *tv)
+{
+	struct timeval end;
+
+	gettimeofday(&end, NULL);
+	return mtime_since(tv, &end);
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring_params p = {};
+	struct timeval tv;
+	struct io_uring ring;
+	int ret;
+
+	if (argc > 1)
+		return 0;
+
+	p.flags = IORING_SETUP_SQPOLL;
+	p.sq_thread_idle = 100;
+
+	ret = io_uring_queue_init_params(1, &ring, &p);
+	if (ret) {
+		if (geteuid()) {
+			printf("%s: skipped, not root\n", argv[0]);
+			return 0;
+		}
+		fprintf(stderr, "queue_init=%d\n", ret);
+		return 1;
+	}
+
+	gettimeofday(&tv, NULL);
+	do {
+		usleep(1000);
+		if ((*ring.sq.kflags) & IORING_SQ_NEED_WAKEUP)
+			return 0;
+	} while (mtime_since_now(&tv) < 1000);
+
+	return 1;
+}
diff --git a/test/timeout-new.c b/test/timeout-new.c
new file mode 100644
index 0000000..45b9a14
--- /dev/null
+++ b/test/timeout-new.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Description: tests for getevents timeout
+ *
+ */
+#include <stdio.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <pthread.h>
+#include "liburing.h"
+
+#define TIMEOUT_MSEC	200
+#define TIMEOUT_SEC	10
+
+int thread_ret0, thread_ret1;
+int cnt = 0;
+pthread_mutex_t mutex;
+
+static void msec_to_ts(struct __kernel_timespec *ts, unsigned int msec)
+{
+	ts->tv_sec = msec / 1000;
+	ts->tv_nsec = (msec % 1000) * 1000000;
+}
+
+static unsigned long long mtime_since(const struct timeval *s,
+				      const struct timeval *e)
+{
+	long long sec, usec;
+
+	sec = e->tv_sec - s->tv_sec;
+	usec = (e->tv_usec - s->tv_usec);
+	if (sec > 0 && usec < 0) {
+		sec--;
+		usec += 1000000;
+	}
+
+	sec *= 1000;
+	usec /= 1000;
+	return sec + usec;
+}
+
+static unsigned long long mtime_since_now(struct timeval *tv)
+{
+	struct timeval end;
+
+	gettimeofday(&end, NULL);
+	return mtime_since(tv, &end);
+}
+
+
+static int test_return_before_timeout(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	int ret;
+	struct __kernel_timespec ts;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		return 1;
+	}
+
+	io_uring_prep_nop(sqe);
+
+	ret = io_uring_submit(ring);
+	if (ret <= 0) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		return 1;
+	}
+
+	msec_to_ts(&ts, TIMEOUT_MSEC);
+	ret = io_uring_wait_cqe_timeout(ring, &cqe, &ts);
+	if (ret < 0) {
+		fprintf(stderr, "%s: timeout error: %d\n", __FUNCTION__, ret);
+		return 1;
+	}
+
+	io_uring_cqe_seen(ring, cqe);
+	return 0;
+}
+
+static int test_return_after_timeout(struct io_uring *ring)
+{
+	struct io_uring_cqe *cqe;
+	int ret;
+	struct __kernel_timespec ts;
+	struct timeval tv;
+	unsigned long long exp;
+
+	msec_to_ts(&ts, TIMEOUT_MSEC);
+	gettimeofday(&tv, NULL);
+	ret = io_uring_wait_cqe_timeout(ring, &cqe, &ts);
+	exp = mtime_since_now(&tv);
+	if (ret != -ETIME) {
+		fprintf(stderr, "%s: timeout error: %d\n", __FUNCTION__, ret);
+		return 1;
+	}
+
+	if (exp < TIMEOUT_MSEC / 2 || exp > (TIMEOUT_MSEC  * 3) / 2) {
+		fprintf(stderr, "%s: Timeout seems wonky (got %llu)\n", __FUNCTION__, exp);
+		return 1;
+	}
+
+	return 0;
+}
+
+int __reap_thread_fn(void *data) {
+	struct io_uring *ring = (struct io_uring *)data;
+	struct io_uring_cqe *cqe;
+	struct __kernel_timespec ts;
+
+	msec_to_ts(&ts, TIMEOUT_SEC);
+	pthread_mutex_lock(&mutex);
+	cnt++;
+	pthread_mutex_unlock(&mutex);
+	return io_uring_wait_cqe_timeout(ring, &cqe, &ts);
+}
+
+void *reap_thread_fn0(void *data) {
+	thread_ret0 = __reap_thread_fn(data);
+	return NULL;
+}
+
+void *reap_thread_fn1(void *data) {
+	thread_ret1 = __reap_thread_fn(data);
+	return NULL;
+}
+
+/*
+ * This is to test issuing a sqe in main thread and reaping it in two child-thread
+ * at the same time. To see if timeout feature works or not.
+ */
+int test_multi_threads_timeout() {
+	struct io_uring ring;
+	int ret;
+	bool both_wait = false;
+	pthread_t reap_thread0, reap_thread1;
+	struct io_uring_sqe *sqe;
+
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "%s: ring setup failed: %d\n", __FUNCTION__, ret);
+		return 1;
+	}
+
+	pthread_create(&reap_thread0, NULL, reap_thread_fn0, &ring);
+	pthread_create(&reap_thread1, NULL, reap_thread_fn1, &ring);
+
+	/*
+	 * make two threads both enter io_uring_wait_cqe_timeout() before issuing the sqe
+	 * as possible as we can. So that there are two threads in the ctx->wait queue.
+	 * In this way, we can test if a cqe wakes up two threads at the same time.
+	 */
+	while(!both_wait) {
+		pthread_mutex_lock(&mutex);
+		if (cnt == 2)
+			both_wait = true;
+		pthread_mutex_unlock(&mutex);
+		sleep(1);
+	}
+
+	sqe = io_uring_get_sqe(&ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+
+	io_uring_prep_nop(sqe);
+
+	ret = io_uring_submit(&ring);
+	if (ret <= 0) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	pthread_join(reap_thread0, NULL);
+	pthread_join(reap_thread1, NULL);
+
+	if ((thread_ret0 && thread_ret0 != -ETIME) || (thread_ret1 && thread_ret1 != -ETIME)) {
+		fprintf(stderr, "%s: thread wait cqe timeout failed: %d %d\n",
+				__FUNCTION__, thread_ret0, thread_ret1);
+		goto err;
+	}
+
+	return 0;
+err:
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	struct io_uring ring_normal, ring_sq;
+	int ret;
+
+	if (argc > 1)
+		return 0;
+
+	ret = io_uring_queue_init(8, &ring_normal, 0);
+	if (ret) {
+		fprintf(stderr, "ring_normal setup failed: %d\n", ret);
+		return 1;
+	}
+	if (!(ring_normal.features & IORING_FEAT_EXT_ARG)) {
+		fprintf(stderr, "feature IORING_FEAT_EXT_ARG not supported.\n");
+		return 1;
+	}
+
+	ret = test_return_before_timeout(&ring_normal);
+	if (ret) {
+		fprintf(stderr, "ring_normal: test_return_before_timeout failed\n");
+		return ret;
+	}
+
+	ret = test_return_after_timeout(&ring_normal);
+	if (ret) {
+		fprintf(stderr, "ring_normal: test_return_after_timeout failed\n");
+		return ret;
+	}
+
+	ret = io_uring_queue_init(8, &ring_sq, IORING_SETUP_SQPOLL);
+	if (ret) {
+		fprintf(stderr, "ring_sq setup failed: %d\n", ret);
+		return 1;
+	}
+
+	ret = test_return_before_timeout(&ring_sq);
+	if (ret) {
+		fprintf(stderr, "ring_sq: test_return_before_timeout failed\n");
+		return ret;
+	}
+
+	ret = test_return_after_timeout(&ring_sq);
+	if (ret) {
+		fprintf(stderr, "ring_sq: test_return_after_timeout failed\n");
+		return ret;
+	}
+
+	ret = test_multi_threads_timeout();
+	if (ret) {
+		fprintf(stderr, "test_multi_threads_timeout failed\n");
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/test/timeout.c b/test/timeout.c
index 7e9f11d..9c8211c 100644
--- a/test/timeout.c
+++ b/test/timeout.c
@@ -965,10 +965,213 @@
 	return 1;
 }
 
+static int test_update_timeout(struct io_uring *ring, unsigned long ms,
+				bool abs, bool async, bool linked)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct __kernel_timespec ts, ts_upd;
+	unsigned long long exp_ms, base_ms = 10000;
+	struct timeval tv;
+	int ret, i, nr = 2;
+	__u32 mode = abs ? IORING_TIMEOUT_ABS : 0;
+
+	msec_to_ts(&ts_upd, ms);
+	gettimeofday(&tv, NULL);
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+	msec_to_ts(&ts, base_ms);
+	io_uring_prep_timeout(sqe, &ts, 0, 0);
+	sqe->user_data = 1;
+
+	if (linked) {
+		sqe = io_uring_get_sqe(ring);
+		if (!sqe) {
+			fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+			goto err;
+		}
+		io_uring_prep_nop(sqe);
+		sqe->user_data = 3;
+		sqe->flags = IOSQE_IO_LINK;
+		if (async)
+			sqe->flags |= IOSQE_ASYNC;
+		nr++;
+	}
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+	io_uring_prep_timeout_update(sqe, &ts_upd, 1, mode);
+	sqe->user_data = 2;
+	if (async)
+		sqe->flags |= IOSQE_ASYNC;
+
+	ret = io_uring_submit(ring);
+	if (ret != nr) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	for (i = 0; i < nr; i++) {
+		ret = io_uring_wait_cqe(ring, &cqe);
+		if (ret < 0) {
+			fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
+			goto err;
+		}
+
+		switch (cqe->user_data) {
+		case 1:
+			if (cqe->res != -ETIME) {
+				fprintf(stderr, "%s: got %d, wanted %d\n",
+						__FUNCTION__, cqe->res, -ETIME);
+				goto err;
+			}
+			break;
+		case 2:
+			if (cqe->res != 0) {
+				fprintf(stderr, "%s: got %d, wanted %d\n",
+						__FUNCTION__, cqe->res,
+						0);
+				goto err;
+			}
+			break;
+		case 3:
+			if (cqe->res != 0) {
+				fprintf(stderr, "nop failed\n");
+				goto err;
+			}
+			break;
+		default:
+			goto err;
+		}
+		io_uring_cqe_seen(ring, cqe);
+	}
+
+	exp_ms = mtime_since_now(&tv);
+	if (exp_ms >= base_ms / 2) {
+		fprintf(stderr, "too long, timeout wasn't updated\n");
+		goto err;
+	}
+	if (ms >= 1000 && !abs && exp_ms < ms / 2) {
+		fprintf(stderr, "fired too early, potentially updated to 0 ms"
+					"instead of %lu\n", ms);
+		goto err;
+	}
+	return 0;
+err:
+	return 1;
+}
+
+static int test_update_nonexistent_timeout(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct __kernel_timespec ts;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+	msec_to_ts(&ts, 0);
+	io_uring_prep_timeout_update(sqe, &ts, 42, 0);
+
+	ret = io_uring_submit(ring);
+	if (ret != 1) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = cqe->res;
+	if (ret == -ENOENT)
+		ret = 0;
+	io_uring_cqe_seen(ring, cqe);
+	return ret;
+err:
+	return 1;
+}
+
+static int test_update_invalid_flags(struct io_uring *ring)
+{
+	struct io_uring_sqe *sqe;
+	struct io_uring_cqe *cqe;
+	struct __kernel_timespec ts;
+	int ret;
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+	io_uring_prep_timeout_remove(sqe, 0, IORING_TIMEOUT_ABS);
+
+	ret = io_uring_submit(ring);
+	if (ret != 1) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+	if (cqe->res != -EINVAL) {
+		fprintf(stderr, "%s: got %d, wanted %d\n",
+				__FUNCTION__, cqe->res, -EINVAL);
+		goto err;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+
+	sqe = io_uring_get_sqe(ring);
+	if (!sqe) {
+		fprintf(stderr, "%s: get sqe failed\n", __FUNCTION__);
+		goto err;
+	}
+	msec_to_ts(&ts, 0);
+	io_uring_prep_timeout_update(sqe, &ts, 0, -1);
+
+	ret = io_uring_submit(ring);
+	if (ret != 1) {
+		fprintf(stderr, "%s: sqe submit failed: %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+
+	ret = io_uring_wait_cqe(ring, &cqe);
+	if (ret < 0) {
+		fprintf(stderr, "%s: wait completion %d\n", __FUNCTION__, ret);
+		goto err;
+	}
+	if (cqe->res != -EINVAL) {
+		fprintf(stderr, "%s: got %d, wanted %d\n",
+				__FUNCTION__, cqe->res, -EINVAL);
+		goto err;
+	}
+	io_uring_cqe_seen(ring, cqe);
+
+	return 0;
+err:
+	return 1;
+}
 
 int main(int argc, char *argv[])
 {
-	struct io_uring ring;
+	struct io_uring ring, sqpoll_ring;
+	bool has_timeout_update, sqpoll;
 	int ret;
 
 	if (argc > 1)
@@ -980,6 +1183,9 @@
 		return 1;
 	}
 
+	ret = io_uring_queue_init(8, &sqpoll_ring, IORING_SETUP_SQPOLL);
+	sqpoll = !ret;
+
 	ret = test_single_timeout(&ring);
 	if (ret) {
 		fprintf(stderr, "test_single_timeout failed\n");
@@ -1054,6 +1260,76 @@
 		return ret;
 	}
 
+	/* io_uring_wait_cqes() may have left a timeout, reinit ring */
+	io_uring_queue_exit(&ring);
+	ret = io_uring_queue_init(8, &ring, 0);
+	if (ret) {
+		fprintf(stderr, "ring setup failed\n");
+		return 1;
+	}
+
+	ret = test_update_nonexistent_timeout(&ring);
+	has_timeout_update = (ret != -EINVAL);
+	if (has_timeout_update) {
+		if (ret) {
+			fprintf(stderr, "test_update_nonexistent_timeout failed\n");
+			return ret;
+		}
+
+		ret = test_update_invalid_flags(&ring);
+		if (ret) {
+			fprintf(stderr, "test_update_invalid_flags failed\n");
+			return ret;
+		}
+
+		ret = test_update_timeout(&ring, 0, false, false, false);
+		if (ret) {
+			fprintf(stderr, "test_update_timeout failed\n");
+			return ret;
+		}
+
+		ret = test_update_timeout(&ring, 1, false, false, false);
+		if (ret) {
+			fprintf(stderr, "test_update_timeout 1ms failed\n");
+			return ret;
+		}
+
+		ret = test_update_timeout(&ring, 1000, false, false, false);
+		if (ret) {
+			fprintf(stderr, "test_update_timeout 1s failed\n");
+			return ret;
+		}
+
+		ret = test_update_timeout(&ring, 0, true, true, false);
+		if (ret) {
+			fprintf(stderr, "test_update_timeout abs failed\n");
+			return ret;
+		}
+
+
+		ret = test_update_timeout(&ring, 0, false, true, false);
+		if (ret) {
+			fprintf(stderr, "test_update_timeout async failed\n");
+			return ret;
+		}
+
+		ret = test_update_timeout(&ring, 0, false, false, true);
+		if (ret) {
+			fprintf(stderr, "test_update_timeout linked failed\n");
+			return ret;
+		}
+
+		if (sqpoll) {
+			ret = test_update_timeout(&sqpoll_ring, 0, false, false,
+						  false);
+			if (ret) {
+				fprintf(stderr, "test_update_timeout sqpoll"
+						"failed\n");
+				return ret;
+			}
+		}
+	}
+
 	/*
 	 * this test must go last, it kills the ring
 	 */
@@ -1063,5 +1339,7 @@
 		return ret;
 	}
 
+	if (sqpoll)
+		io_uring_queue_exit(&sqpoll_ring);
 	return 0;
 }
diff --git a/test/unlink.c b/test/unlink.c
index c217ba4..f8c7639 100644
--- a/test/unlink.c
+++ b/test/unlink.c
@@ -12,8 +12,6 @@
 
 #include "liburing.h"
 
-static int no_unlink;
-
 static int test_unlink(struct io_uring *ring, const char *old)
 {
 	struct io_uring_cqe *cqe;
@@ -38,19 +36,9 @@
 		fprintf(stderr, "wait completion %d\n", ret);
 		goto err;
 	}
-	if (cqe->res < 0) {
-		if (cqe->res == -EBADF || cqe->res == -EINVAL) {
-			fprintf(stdout, "Unlink not supported, skipping\n");
-			no_unlink = 1;
-			goto out;
-		}
-		fprintf(stderr, "rename: %s\n", strerror(-cqe->res));
-		goto err;
-	}
-
-out:
+	ret = cqe->res;
 	io_uring_cqe_seen(ring, cqe);
-	return 0;
+	return ret;
 err:
 	return 1;
 }
@@ -93,14 +81,16 @@
 	}
 
 	ret = test_unlink(&ring, buf);
-	if (ret) {
-		fprintf(stderr, "test_rename failed\n");
-		return ret;
-	}
-	if (no_unlink) {
-		unlink(buf);
-		return 0;
-	}
+	if (ret < 0) {
+		if (ret == -EBADF || ret == -EINVAL) {
+			fprintf(stdout, "Unlink not supported, skipping\n");
+			unlink(buf);
+			return 0;
+		}
+		fprintf(stderr, "rename: %s\n", strerror(-ret));
+		goto err;
+	} else if (ret)
+		goto err;
 
 	ret = stat_file(buf);
 	if (ret != ENOENT) {
@@ -108,5 +98,14 @@
 		return 1;
 	}
 
+	ret = test_unlink(&ring, "/3/2/3/1/z/y");
+	if (ret != -ENOENT) {
+		fprintf(stderr, "invalid unlink got %s\n", strerror(-ret));
+		return 1;
+	}
+
 	return 0;
+err:
+	unlink(buf);
+	return 1;
 }