diff mbox series

[10/11] io_uring/epoll: add support for provided buffers

Message ID 20250204194814.393112-11-axboe@kernel.dk (mailing list archive)
State New
Headers show
Series io_uring epoll wait support | expand

Commit Message

Jens Axboe Feb. 4, 2025, 7:46 p.m. UTC
This will be a prerequisite for adding multishot support, but can be
used with single shot support as well. Works like any other request that
supports provided buffers - set addr to NULL and ensure that
sqe->buf_group is set, and IOSQE_BUFFER_SELECT in sqe->flags. Then epoll
wait will pick a buffer from that group and store the events there.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/epoll.c | 31 +++++++++++++++++++++++++++----
 io_uring/opdef.c |  1 +
 2 files changed, 28 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/io_uring/epoll.c b/io_uring/epoll.c
index 5a47f0cce647..134112e7a505 100644
--- a/io_uring/epoll.c
+++ b/io_uring/epoll.c
@@ -10,6 +10,7 @@ 
 #include <uapi/linux/io_uring.h>
 
 #include "io_uring.h"
+#include "kbuf.h"
 #include "epoll.h"
 #include "poll.h"
 
@@ -189,11 +190,13 @@  int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
 
-	if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+	if (sqe->off || sqe->rw_flags || sqe->splice_fd_in)
 		return -EINVAL;
 
 	iew->maxevents = READ_ONCE(sqe->len);
 	iew->events = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	if (req->flags & REQ_F_BUFFER_SELECT && iew->events)
+		return -EINVAL;
 
 	iew->wait.flags = 0;
 	iew->wait.private = req;
@@ -207,22 +210,42 @@  int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
+	struct epoll_event __user *evs = iew->events;
 	struct io_ring_ctx *ctx = req->ctx;
+	int maxevents = iew->maxevents;
+	unsigned int cflags = 0;
 	int ret;
 
 	io_ring_submit_lock(ctx, issue_flags);
 
-	ret = epoll_wait(req->file, iew->events, iew->maxevents, NULL, &iew->wait);
+	if (io_do_buffer_select(req)) {
+		size_t len = iew->maxevents * sizeof(*evs);
+
+		evs = io_buffer_select(req, &len, 0);
+		if (!evs) {
+			ret = -ENOBUFS;
+			goto err;
+		}
+		maxevents = len / sizeof(*evs);
+	}
+
+	ret = epoll_wait(req->file, evs, maxevents, NULL, &iew->wait);
 	if (ret == -EIOCBQUEUED) {
+		io_kbuf_recycle(req, 0);
 		if (hlist_unhashed(&req->hash_node))
 			hlist_add_head(&req->hash_node, &ctx->epoll_list);
 		io_ring_submit_unlock(ctx, issue_flags);
 		return IOU_ISSUE_SKIP_COMPLETE;
-	} else if (ret < 0) {
+	} else if (ret > 0) {
+		cflags = io_put_kbuf(req, ret * sizeof(*evs), 0);
+	} else if (!ret) {
+		io_kbuf_recycle(req, 0);
+	} else {
+err:
 		req_set_fail(req);
 	}
 	hlist_del_init(&req->hash_node);
 	io_ring_submit_unlock(ctx, issue_flags);
-	io_req_set_res(req, ret, 0);
+	io_req_set_res(req, ret, cflags);
 	return IOU_OK;
 }
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 44553a657476..04ff2b438531 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -520,6 +520,7 @@  const struct io_issue_def io_issue_defs[] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.audit_skip		= 1,
+		.buffer_select		= 1,
 #if defined(CONFIG_EPOLL)
 		.prep			= io_epoll_wait_prep,
 		.issue			= io_epoll_wait,