@@ -845,11 +845,30 @@ static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
state->cqes_count = 0;
}
-bool io_post_aux_cqe(struct io_ring_ctx *ctx,
+bool io_post_aux_cqe(struct io_ring_ctx *ctx, bool defer,
u64 user_data, s32 res, u32 cflags)
{
bool filled;
+ if (defer) {
+ unsigned int length = ARRAY_SIZE(ctx->submit_state.cqes);
+ struct io_uring_cqe *cqe;
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ if (ctx->submit_state.cqes_count == length) {
+ io_cq_lock(ctx);
+ __io_flush_post_cqes(ctx);
+ /* no need to flush - flush is deferred */
+ spin_unlock(&ctx->completion_lock);
+ }
+
+ cqe = ctx->submit_state.cqes + ctx->submit_state.cqes_count++;
+ cqe->user_data = user_data;
+ cqe->res = res;
+ cqe->flags = cflags;
+ return true;
+ }
io_cq_lock(ctx);
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
io_cq_unlock_post(ctx);
@@ -31,7 +31,7 @@ int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked);
int io_run_local_work(struct io_ring_ctx *ctx);
void io_req_defer_failed(struct io_kiocb *req, s32 res);
void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
-bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
+bool io_post_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags);
bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
@@ -23,7 +23,7 @@ struct io_msg {
u32 flags;
};
-static int io_msg_ring_data(struct io_kiocb *req)
+static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *target_ctx = req->file->private_data;
struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
@@ -31,7 +31,8 @@ static int io_msg_ring_data(struct io_kiocb *req)
if (msg->src_fd || msg->dst_fd || msg->flags)
return -EINVAL;
- if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ if (io_post_aux_cqe(target_ctx, false,
+ msg->user_data, msg->len, 0))
return 0;
return -EOVERFLOW;
@@ -116,7 +117,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
* completes with -EOVERFLOW, then the sender must ensure that a
* later IORING_OP_MSG_RING delivers the message.
*/
- if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+ if (!io_post_aux_cqe(target_ctx, false,
+ msg->user_data, msg->len, 0))
ret = -EOVERFLOW;
out_unlock:
io_double_unlock_ctx(ctx, target_ctx, issue_flags);
@@ -153,7 +155,7 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
switch (msg->cmd) {
case IORING_MSG_DATA:
- ret = io_msg_ring_data(req);
+ ret = io_msg_ring_data(req, issue_flags);
break;
case IORING_MSG_SEND_FD:
ret = io_msg_send_fd(req, issue_flags);
@@ -592,8 +592,8 @@ static inline void io_recv_prep_retry(struct io_kiocb *req)
* Returns true if it is actually finished, or false if it should run
* again (for multishot).
*/
-static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
- unsigned int cflags, bool mshot_finished)
+static inline bool io_recv_finish(struct io_kiocb *req, unsigned int issue_flags,
+ int *ret, unsigned int cflags, bool mshot_finished)
{
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
io_req_set_res(req, *ret, cflags);
@@ -602,8 +602,8 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
}
if (!mshot_finished) {
- if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret,
- cflags | IORING_CQE_F_MORE)) {
+ if (io_post_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
+ req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE)) {
io_recv_prep_retry(req);
return false;
}
@@ -801,7 +801,7 @@ int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
if (kmsg->msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
- if (!io_recv_finish(req, &ret, cflags, mshot_finished))
+ if (!io_recv_finish(req, issue_flags, &ret, cflags, mshot_finished))
goto retry_multishot;
if (mshot_finished) {
@@ -900,7 +900,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
if (msg.msg_inq)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
- if (!io_recv_finish(req, &ret, cflags, ret <= 0))
+ if (!io_recv_finish(req, issue_flags, &ret, cflags, ret <= 0))
goto retry_multishot;
return ret;
@@ -1323,7 +1323,8 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
return ret;
- if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE))
+ if (io_post_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
+ req->cqe.user_data, ret, IORING_CQE_F_MORE))
goto retry;
return -ECANCELED;
}
@@ -245,7 +245,7 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
__poll_t mask = mangle_poll(req->cqe.res &
req->apoll_events);
- if (!io_post_aux_cqe(ctx, req->cqe.user_data,
+ if (!io_post_aux_cqe(ctx, *locked, req->cqe.user_data,
mask, IORING_CQE_F_MORE))
return -ECANCELED;
} else {
@@ -170,10 +170,10 @@ static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
if (prsrc->tag) {
if (ctx->flags & IORING_SETUP_IOPOLL) {
mutex_lock(&ctx->uring_lock);
- io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
+ io_post_aux_cqe(ctx, false, prsrc->tag, 0, 0);
mutex_unlock(&ctx->uring_lock);
} else {
- io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
+ io_post_aux_cqe(ctx, false, prsrc->tag, 0, 0);
}
}
Use the just introduced deferred post cqe completion state when possible in io_post_aux_cqe. Signed-off-by: Dylan Yudaken <dylany@meta.com> --- io_uring/io_uring.c | 21 ++++++++++++++++++++- io_uring/io_uring.h | 2 +- io_uring/msg_ring.c | 10 ++++++---- io_uring/net.c | 15 ++++++++------- io_uring/poll.c | 2 +- io_uring/rsrc.c | 4 ++-- 6 files changed, 38 insertions(+), 16 deletions(-)