diff mbox series

[1/4] io_uring: add remote task_work execution helper

Message ID 20240401175757.1054072-2-axboe@kernel.dk (mailing list archive)
State New
Headers show
Series Cleanup and improve MSG_RING performance | expand

Commit Message

Jens Axboe April 1, 2024, 5:56 p.m. UTC
All our task_work handling is targeted at the state in the io_kiocb
itself, which is what it is being used for. However, MSG_RING rolls its
own task_work handling, ignoring how that is usually done.

In preparation for switching MSG_RING to be able to use the normal
task_work handling, add io_req_task_work_add_remote() which allows the
caller to pass in the target io_ring_ctx.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/io_uring.c | 30 ++++++++++++++++++++++--------
 io_uring/io_uring.h |  2 ++
 2 files changed, 24 insertions(+), 8 deletions(-)

Comments

Pavel Begunkov April 2, 2024, 1:53 p.m. UTC | #1
On 4/1/24 18:56, Jens Axboe wrote:
> All our task_work handling is targeted at the state in the io_kiocb
> itself, which is what it is being used for. However, MSG_RING rolls its
> own task_work handling, ignoring how that is usually done.
> 
> In preparation for switching MSG_RING to be able to use the normal
> task_work handling, add io_req_task_work_add_remote() which allows the
> caller to pass in the target io_ring_ctx.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>   io_uring/io_uring.c | 30 ++++++++++++++++++++++--------
>   io_uring/io_uring.h |  2 ++
>   2 files changed, 24 insertions(+), 8 deletions(-)
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 9986e9bb825a..df4d9c9aeeab 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -1232,9 +1232,10 @@ void tctx_task_work(struct callback_head *cb)
>   	WARN_ON_ONCE(ret);
>   }
>   
> -static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
> +static inline void io_req_local_work_add(struct io_kiocb *req,
> +					 struct io_ring_ctx *ctx,
> +					 unsigned flags)
>   {
> -	struct io_ring_ctx *ctx = req->ctx;
>   	unsigned nr_wait, nr_tw, nr_tw_prev;
>   	struct llist_node *head;
>   
> @@ -1300,9 +1301,10 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
>   	wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
>   }
>   
> -static void io_req_normal_work_add(struct io_kiocb *req)
> +static void io_req_normal_work_add(struct io_kiocb *req,
> +				   struct task_struct *task)
>   {
> -	struct io_uring_task *tctx = req->task->io_uring;
> +	struct io_uring_task *tctx = task->io_uring;
>   	struct io_ring_ctx *ctx = req->ctx;
>   
>   	/* task_work already pending, we're done */
> @@ -1321,7 +1323,7 @@ static void io_req_normal_work_add(struct io_kiocb *req)
>   		return;
>   	}
>   
> -	if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
> +	if (likely(!task_work_add(task, &tctx->task_work, ctx->notify_method)))
>   		return;
>   
>   	io_fallback_tw(tctx, false);
> @@ -1331,10 +1333,22 @@ void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
>   {
>   	if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
>   		rcu_read_lock();
> -		io_req_local_work_add(req, flags);
> +		io_req_local_work_add(req, req->ctx, flags);
> +		rcu_read_unlock();
> +	} else {
> +		io_req_normal_work_add(req, req->task);
> +	}
> +}
> +
> +void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
> +				 unsigned flags)
> +{
> +	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
> +		rcu_read_lock();

Let's move rcu section into io_req_local_work_add().

Perhaps the easiest way is to

guard(rcu)();

> +		io_req_local_work_add(req, ctx, flags);
>   		rcu_read_unlock();
>   	} else {
> -		io_req_normal_work_add(req);
> +		io_req_normal_work_add(req, READ_ONCE(ctx->submitter_task));

->submitter_task can be null.

Why do you care about ->submitter_task? SINGLE_ISSUER allows
CQE posting and all other stuff from a random context, most
optimisations shifted into a more stricter DEFER_TASKRUN.

But let's say it's queued it to a valid task. tw run kicks in,
it splices the req, takes req->ctx, locks it and executes from
there, at which point the callback would probably assume that
the target ctx is locked and do all kinds of messy stuff
without sync. Even funnier if the original ctx is DEFER_TASKRUN,
then you have both deferred and normal tw for that ctx, and
it should never happen.

Let's not pretend that io_req_normal_work_add to a foreign
context would work and limit io_req_task_work_add_remote()
to !DEFER_TASKRUN?



>   	}
>   }
>   
> @@ -1348,7 +1362,7 @@ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
>   						    io_task_work.node);
>   
>   		node = node->next;
> -		io_req_normal_work_add(req);
> +		io_req_normal_work_add(req, req->task);
>   	}
>   }
>   
> diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
> index 1eb65324792a..4155379ee586 100644
> --- a/io_uring/io_uring.h
> +++ b/io_uring/io_uring.h
> @@ -74,6 +74,8 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
>   			       unsigned issue_flags);
>   
>   void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
> +void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
> +				 unsigned flags);
>   bool io_alloc_async_data(struct io_kiocb *req);
>   void io_req_task_queue(struct io_kiocb *req);
>   void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);
diff mbox series

Patch

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9986e9bb825a..df4d9c9aeeab 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1232,9 +1232,10 @@  void tctx_task_work(struct callback_head *cb)
 	WARN_ON_ONCE(ret);
 }
 
-static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
+static inline void io_req_local_work_add(struct io_kiocb *req,
+					 struct io_ring_ctx *ctx,
+					 unsigned flags)
 {
-	struct io_ring_ctx *ctx = req->ctx;
 	unsigned nr_wait, nr_tw, nr_tw_prev;
 	struct llist_node *head;
 
@@ -1300,9 +1301,10 @@  static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
 	wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
 }
 
-static void io_req_normal_work_add(struct io_kiocb *req)
+static void io_req_normal_work_add(struct io_kiocb *req,
+				   struct task_struct *task)
 {
-	struct io_uring_task *tctx = req->task->io_uring;
+	struct io_uring_task *tctx = task->io_uring;
 	struct io_ring_ctx *ctx = req->ctx;
 
 	/* task_work already pending, we're done */
@@ -1321,7 +1323,7 @@  static void io_req_normal_work_add(struct io_kiocb *req)
 		return;
 	}
 
-	if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
+	if (likely(!task_work_add(task, &tctx->task_work, ctx->notify_method)))
 		return;
 
 	io_fallback_tw(tctx, false);
@@ -1331,10 +1333,22 @@  void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
 {
 	if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
 		rcu_read_lock();
-		io_req_local_work_add(req, flags);
+		io_req_local_work_add(req, req->ctx, flags);
+		rcu_read_unlock();
+	} else {
+		io_req_normal_work_add(req, req->task);
+	}
+}
+
+void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
+				 unsigned flags)
+{
+	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
+		rcu_read_lock();
+		io_req_local_work_add(req, ctx, flags);
 		rcu_read_unlock();
 	} else {
-		io_req_normal_work_add(req);
+		io_req_normal_work_add(req, READ_ONCE(ctx->submitter_task));
 	}
 }
 
@@ -1348,7 +1362,7 @@  static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
 						    io_task_work.node);
 
 		node = node->next;
-		io_req_normal_work_add(req);
+		io_req_normal_work_add(req, req->task);
 	}
 }
 
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 1eb65324792a..4155379ee586 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -74,6 +74,8 @@  struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
 			       unsigned issue_flags);
 
 void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
+void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
+				 unsigned flags);
 bool io_alloc_async_data(struct io_kiocb *req);
 void io_req_task_queue(struct io_kiocb *req);
 void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);