@@ -10,6 +10,7 @@
#include <uapi/linux/io_uring.h>
#include "io_uring.h"
+#include "kbuf.h"
#include "epoll.h"
#include "poll.h"
@@ -189,11 +190,13 @@ int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
- if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
+ if (sqe->off || sqe->rw_flags || sqe->splice_fd_in)
return -EINVAL;
iew->maxevents = READ_ONCE(sqe->len);
iew->events = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ if (req->flags & REQ_F_BUFFER_SELECT && iew->events)
+ return -EINVAL;
iew->wait.flags = 0;
iew->wait.private = req;
@@ -207,22 +210,42 @@ int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
+ struct epoll_event __user *evs = iew->events;
struct io_ring_ctx *ctx = req->ctx;
+ int maxevents = iew->maxevents;
+ unsigned int cflags = 0;
int ret;
io_ring_submit_lock(ctx, issue_flags);
- ret = epoll_wait(req->file, iew->events, iew->maxevents, NULL, &iew->wait);
+ if (io_do_buffer_select(req)) {
+ size_t len = iew->maxevents * sizeof(*evs);
+
+ evs = io_buffer_select(req, &len, 0);
+ if (!evs) {
+ ret = -ENOBUFS;
+ goto err;
+ }
+ maxevents = len / sizeof(*evs);
+ }
+
+ ret = epoll_wait(req->file, evs, maxevents, NULL, &iew->wait);
if (ret == -EIOCBQUEUED) {
+ io_kbuf_recycle(req, 0);
if (hlist_unhashed(&req->hash_node))
hlist_add_head(&req->hash_node, &ctx->epoll_list);
io_ring_submit_unlock(ctx, issue_flags);
return IOU_ISSUE_SKIP_COMPLETE;
- } else if (ret < 0) {
+ } else if (ret > 0) {
+ cflags = io_put_kbuf(req, ret * sizeof(*evs), 0);
+ } else if (!ret) {
+ io_kbuf_recycle(req, 0);
+ } else {
+err:
req_set_fail(req);
}
hlist_del_init(&req->hash_node);
io_ring_submit_unlock(ctx, issue_flags);
- io_req_set_res(req, ret, 0);
+ io_req_set_res(req, ret, cflags);
return IOU_OK;
}
@@ -520,6 +520,7 @@ const struct io_issue_def io_issue_defs[] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.audit_skip = 1,
+ .buffer_select = 1,
#if defined(CONFIG_EPOLL)
.prep = io_epoll_wait_prep,
.issue = io_epoll_wait,
This will be a prerequisite for adding multishot support, but can be used with single shot support as well. Works like any other request that supports provided buffers - set addr to NULL and ensure that sqe->buf_group is set, and IOSQE_BUFFER_SELECT in sqe->flags. Then epoll wait will pick a buffer from that group and store the events there. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- io_uring/epoll.c | 31 +++++++++++++++++++++++++++---- io_uring/opdef.c | 1 + 2 files changed, 28 insertions(+), 4 deletions(-)