diff mbox series

[for-next,09/10] io_uring: allow io_post_aux_cqe to defer completion

Message ID 20221121100353.371865-10-dylany@meta.com (mailing list archive)
State New
Headers show
Series io_uring: batch multishot completions | expand

Commit Message

Dylan Yudaken Nov. 21, 2022, 10:03 a.m. UTC
Use the just introduced deferred post cqe completion state when possible
in io_post_aux_cqe.

Signed-off-by: Dylan Yudaken <dylany@meta.com>
---
 io_uring/io_uring.c | 21 ++++++++++++++++++++-
 io_uring/io_uring.h |  2 +-
 io_uring/msg_ring.c | 10 ++++++----
 io_uring/net.c      | 15 ++++++++-------
 io_uring/poll.c     |  2 +-
 io_uring/rsrc.c     |  4 ++--
 6 files changed, 38 insertions(+), 16 deletions(-)

Comments

Jens Axboe Nov. 21, 2022, 4:55 p.m. UTC | #1
On 11/21/22 3:03 AM, Dylan Yudaken wrote:
> Use the just introduced deferred post cqe completion state when possible
> in io_post_aux_cqe.
> 
> Signed-off-by: Dylan Yudaken <dylany@meta.com>
> ---
>  io_uring/io_uring.c | 21 ++++++++++++++++++++-
>  io_uring/io_uring.h |  2 +-
>  io_uring/msg_ring.c | 10 ++++++----
>  io_uring/net.c      | 15 ++++++++-------
>  io_uring/poll.c     |  2 +-
>  io_uring/rsrc.c     |  4 ++--
>  6 files changed, 38 insertions(+), 16 deletions(-)
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index c797f9a75dfe..5c240d01278a 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -845,11 +845,30 @@ static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
>  	state->cqes_count = 0;
>  }
>  
> -bool io_post_aux_cqe(struct io_ring_ctx *ctx,
> +bool io_post_aux_cqe(struct io_ring_ctx *ctx, bool defer,
>  		     u64 user_data, s32 res, u32 cflags)
>  {
>  	bool filled;
>  
> +	if (defer) {
> +		unsigned int length = ARRAY_SIZE(ctx->submit_state.cqes);
> +		struct io_uring_cqe *cqe;
> +
> +		lockdep_assert_held(&ctx->uring_lock);
> +
> +		if (ctx->submit_state.cqes_count == length) {
> +			io_cq_lock(ctx);
> +			__io_flush_post_cqes(ctx);
> +			/* no need to flush - flush is deferred */
> +			spin_unlock(&ctx->completion_lock);
> +		}
> +
> +		cqe  = ctx->submit_state.cqes + ctx->submit_state.cqes_count++;
> +		cqe->user_data = user_data;
> +		cqe->res = res;
> +		cqe->flags = cflags;
> +		return true;
> +	}
>  	io_cq_lock(ctx);
>  	filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
>  	io_cq_unlock_post(ctx);

Seems like this would be cleaner with a separate helper and make that
decision in the caller. For the ones that just pass false that is
trivial of course, then just gate it on the locked nature of the ring in
the other spots?
Jens Axboe Nov. 21, 2022, 5:31 p.m. UTC | #2
On 11/21/22 3:03?AM, Dylan Yudaken wrote:
> diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
> index afb543aab9f6..c5e831e3dcfc 100644
> --- a/io_uring/msg_ring.c
> +++ b/io_uring/msg_ring.c
> @@ -23,7 +23,7 @@ struct io_msg {
>  	u32 flags;
>  };
>  
> -static int io_msg_ring_data(struct io_kiocb *req)
> +static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
>  {
>  	struct io_ring_ctx *target_ctx = req->file->private_data;
>  	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
> @@ -31,7 +31,8 @@ static int io_msg_ring_data(struct io_kiocb *req)
>  	if (msg->src_fd || msg->dst_fd || msg->flags)
>  		return -EINVAL;
>  
> -	if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
> +	if (io_post_aux_cqe(target_ctx, false,
> +			    msg->user_data, msg->len, 0))
>  		return 0;
>  
>  	return -EOVERFLOW;
> @@ -116,7 +117,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
>  	 * completes with -EOVERFLOW, then the sender must ensure that a
>  	 * later IORING_OP_MSG_RING delivers the message.
>  	 */
> -	if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
> +	if (!io_post_aux_cqe(target_ctx, false,
> +			     msg->user_data, msg->len, 0))
>  		ret = -EOVERFLOW;
>  out_unlock:
>  	io_double_unlock_ctx(ctx, target_ctx, issue_flags);
> @@ -153,7 +155,7 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
>  
>  	switch (msg->cmd) {
>  	case IORING_MSG_DATA:
> -		ret = io_msg_ring_data(req);
> +		ret = io_msg_ring_data(req, issue_flags);
>  		break;
>  	case IORING_MSG_SEND_FD:
>  		ret = io_msg_send_fd(req, issue_flags);

This is a bit odd, either we can drop this or it should be wired up for
defer?
diff mbox series

Patch

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index c797f9a75dfe..5c240d01278a 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -845,11 +845,30 @@  static void __io_flush_post_cqes(struct io_ring_ctx *ctx)
 	state->cqes_count = 0;
 }
 
-bool io_post_aux_cqe(struct io_ring_ctx *ctx,
+bool io_post_aux_cqe(struct io_ring_ctx *ctx, bool defer,
 		     u64 user_data, s32 res, u32 cflags)
 {
 	bool filled;
 
+	if (defer) {
+		unsigned int length = ARRAY_SIZE(ctx->submit_state.cqes);
+		struct io_uring_cqe *cqe;
+
+		lockdep_assert_held(&ctx->uring_lock);
+
+		if (ctx->submit_state.cqes_count == length) {
+			io_cq_lock(ctx);
+			__io_flush_post_cqes(ctx);
+			/* no need to flush - flush is deferred */
+			spin_unlock(&ctx->completion_lock);
+		}
+
+		cqe  = ctx->submit_state.cqes + ctx->submit_state.cqes_count++;
+		cqe->user_data = user_data;
+		cqe->res = res;
+		cqe->flags = cflags;
+		return true;
+	}
 	io_cq_lock(ctx);
 	filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
 	io_cq_unlock_post(ctx);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index bfe1b5488c25..979a223286bd 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -31,7 +31,7 @@  int __io_run_local_work(struct io_ring_ctx *ctx, bool *locked);
 int io_run_local_work(struct io_ring_ctx *ctx);
 void io_req_defer_failed(struct io_kiocb *req, s32 res);
 void __io_req_complete(struct io_kiocb *req, unsigned issue_flags);
-bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
+bool io_post_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags);
 bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
 void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
 
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index afb543aab9f6..c5e831e3dcfc 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -23,7 +23,7 @@  struct io_msg {
 	u32 flags;
 };
 
-static int io_msg_ring_data(struct io_kiocb *req)
+static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_ring_ctx *target_ctx = req->file->private_data;
 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
@@ -31,7 +31,8 @@  static int io_msg_ring_data(struct io_kiocb *req)
 	if (msg->src_fd || msg->dst_fd || msg->flags)
 		return -EINVAL;
 
-	if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+	if (io_post_aux_cqe(target_ctx, false,
+			    msg->user_data, msg->len, 0))
 		return 0;
 
 	return -EOVERFLOW;
@@ -116,7 +117,8 @@  static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
 	 * completes with -EOVERFLOW, then the sender must ensure that a
 	 * later IORING_OP_MSG_RING delivers the message.
 	 */
-	if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+	if (!io_post_aux_cqe(target_ctx, false,
+			     msg->user_data, msg->len, 0))
 		ret = -EOVERFLOW;
 out_unlock:
 	io_double_unlock_ctx(ctx, target_ctx, issue_flags);
@@ -153,7 +155,7 @@  int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
 
 	switch (msg->cmd) {
 	case IORING_MSG_DATA:
-		ret = io_msg_ring_data(req);
+		ret = io_msg_ring_data(req, issue_flags);
 		break;
 	case IORING_MSG_SEND_FD:
 		ret = io_msg_send_fd(req, issue_flags);
diff --git a/io_uring/net.c b/io_uring/net.c
index a1a0b8f223e0..8c5154b05344 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -592,8 +592,8 @@  static inline void io_recv_prep_retry(struct io_kiocb *req)
  * Returns true if it is actually finished, or false if it should run
  * again (for multishot).
  */
-static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
-				  unsigned int cflags, bool mshot_finished)
+static inline bool io_recv_finish(struct io_kiocb *req, unsigned int issue_flags,
+				  int *ret, unsigned int cflags, bool mshot_finished)
 {
 	if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
 		io_req_set_res(req, *ret, cflags);
@@ -602,8 +602,8 @@  static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
 	}
 
 	if (!mshot_finished) {
-		if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret,
-				    cflags | IORING_CQE_F_MORE)) {
+		if (io_post_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
+				    req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE)) {
 			io_recv_prep_retry(req);
 			return false;
 		}
@@ -801,7 +801,7 @@  int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
 	if (kmsg->msg.msg_inq)
 		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
 
-	if (!io_recv_finish(req, &ret, cflags, mshot_finished))
+	if (!io_recv_finish(req, issue_flags, &ret, cflags, mshot_finished))
 		goto retry_multishot;
 
 	if (mshot_finished) {
@@ -900,7 +900,7 @@  int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 	if (msg.msg_inq)
 		cflags |= IORING_CQE_F_SOCK_NONEMPTY;
 
-	if (!io_recv_finish(req, &ret, cflags, ret <= 0))
+	if (!io_recv_finish(req, issue_flags, &ret, cflags, ret <= 0))
 		goto retry_multishot;
 
 	return ret;
@@ -1323,7 +1323,8 @@  int io_accept(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (ret < 0)
 		return ret;
-	if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE))
+	if (io_post_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
+			    req->cqe.user_data, ret, IORING_CQE_F_MORE))
 		goto retry;
 	return -ECANCELED;
 }
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 2b77d18a67a7..c4865dd58862 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -245,7 +245,7 @@  static int io_poll_check_events(struct io_kiocb *req, bool *locked)
 			__poll_t mask = mangle_poll(req->cqe.res &
 						    req->apoll_events);
 
-			if (!io_post_aux_cqe(ctx, req->cqe.user_data,
+			if (!io_post_aux_cqe(ctx, *locked, req->cqe.user_data,
 					     mask, IORING_CQE_F_MORE))
 				return -ECANCELED;
 		} else {
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 133608200769..f37cdd8cfc95 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -170,10 +170,10 @@  static void __io_rsrc_put_work(struct io_rsrc_node *ref_node)
 		if (prsrc->tag) {
 			if (ctx->flags & IORING_SETUP_IOPOLL) {
 				mutex_lock(&ctx->uring_lock);
-				io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
+				io_post_aux_cqe(ctx, false, prsrc->tag, 0, 0);
 				mutex_unlock(&ctx->uring_lock);
 			} else {
-				io_post_aux_cqe(ctx, prsrc->tag, 0, 0);
+				io_post_aux_cqe(ctx, false, prsrc->tag, 0, 0);
 			}
 		}