diff mbox series

[for-next,v2,11/12] io_uring: do msg_ring in target task via tw

Message ID 4d76c7b28ed5d71b520de4482fbb7f660f21cd80.1670384893.git.asml.silence@gmail.com (mailing list archive)
State New
Headers show
Series CQ locking optimisation | expand

Commit Message

Pavel Begunkov Dec. 7, 2022, 3:53 a.m. UTC
While executing in a context of one io_uring instance msg_ring
manipulates another ring. We're trying to keep CQEs posting contained in
the context of the ring-owner task, use task_work to send the request to
the target ring's task when we're modifying its CQ or trying to install
a file. Note, we can't safely use io_uring task_work infra and have to
use task_work directly.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/msg_ring.c | 56 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 3 deletions(-)

Comments

Jens Axboe Dec. 7, 2022, 3:31 p.m. UTC | #1
On 12/6/22 8:53?PM, Pavel Begunkov wrote:
> @@ -43,6 +61,15 @@ static int io_msg_ring_data(struct io_kiocb *req)
>  	if (msg->src_fd || msg->dst_fd || msg->flags)
>  		return -EINVAL;
>  
> +	if (target_ctx->task_complete && current != target_ctx->submitter_task) {
> +		init_task_work(&msg->tw, io_msg_tw_complete);
> +		if (task_work_add(target_ctx->submitter_task, &msg->tw,
> +				  TWA_SIGNAL))
> +			return -EOWNERDEAD;
> +
> +		return IOU_ISSUE_SKIP_COMPLETE;
> +	}
> +

We should probably be able to get by with TWA_SIGNAL_NO_IPI here, no?
Jens Axboe Dec. 7, 2022, 3:51 p.m. UTC | #2
On 12/7/22 8:31 AM, Jens Axboe wrote:
> On 12/6/22 8:53?PM, Pavel Begunkov wrote:
>> @@ -43,6 +61,15 @@ static int io_msg_ring_data(struct io_kiocb *req)
>>  	if (msg->src_fd || msg->dst_fd || msg->flags)
>>  		return -EINVAL;
>>  
>> +	if (target_ctx->task_complete && current != target_ctx->submitter_task) {
>> +		init_task_work(&msg->tw, io_msg_tw_complete);
>> +		if (task_work_add(target_ctx->submitter_task, &msg->tw,
>> +				  TWA_SIGNAL))
>> +			return -EOWNERDEAD;
>> +
>> +		return IOU_ISSUE_SKIP_COMPLETE;
>> +	}
>> +
> 
> We should probably be able to get by with TWA_SIGNAL_NO_IPI here, no?

Considering we didn't even wake before, I'd say that's a solid yes.
Pavel Begunkov Dec. 7, 2022, 9:18 p.m. UTC | #3
On 12/7/22 15:51, Jens Axboe wrote:
> On 12/7/22 8:31 AM, Jens Axboe wrote:
>> On 12/6/22 8:53?PM, Pavel Begunkov wrote:
>>> @@ -43,6 +61,15 @@ static int io_msg_ring_data(struct io_kiocb *req)
>>>   	if (msg->src_fd || msg->dst_fd || msg->flags)
>>>   		return -EINVAL;
>>>   
>>> +	if (target_ctx->task_complete && current != target_ctx->submitter_task) {
>>> +		init_task_work(&msg->tw, io_msg_tw_complete);
>>> +		if (task_work_add(target_ctx->submitter_task, &msg->tw,
>>> +				  TWA_SIGNAL))
>>> +			return -EOWNERDEAD;
>>> +
>>> +		return IOU_ISSUE_SKIP_COMPLETE;
>>> +	}
>>> +
>>
>> We should probably be able to get by with TWA_SIGNAL_NO_IPI here, no?
> 
> Considering we didn't even wake before, I'd say that's a solid yes.

I'm not so sure. It'll work, but a naive approach would also lack
IORING_SETUP_TASKRUN_FLAG and so mess with latencies when it's not
desirable.

option 1)

method = TWA_SIGNAL;
if (flags & IORING_SETUP_TASKRUN_FLAG)
	method = NO_IPI;

option 2)

task_work_add(NO_IPI);
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);


Might be better to have one of those.
Jens Axboe Dec. 7, 2022, 9:22 p.m. UTC | #4
On 12/7/22 2:18 PM, Pavel Begunkov wrote:
> On 12/7/22 15:51, Jens Axboe wrote:
>> On 12/7/22 8:31 AM, Jens Axboe wrote:
>>> On 12/6/22 8:53?PM, Pavel Begunkov wrote:
>>>> @@ -43,6 +61,15 @@ static int io_msg_ring_data(struct io_kiocb *req)
>>>>       if (msg->src_fd || msg->dst_fd || msg->flags)
>>>>           return -EINVAL;
>>>>   +    if (target_ctx->task_complete && current != target_ctx->submitter_task) {
>>>> +        init_task_work(&msg->tw, io_msg_tw_complete);
>>>> +        if (task_work_add(target_ctx->submitter_task, &msg->tw,
>>>> +                  TWA_SIGNAL))
>>>> +            return -EOWNERDEAD;
>>>> +
>>>> +        return IOU_ISSUE_SKIP_COMPLETE;
>>>> +    }
>>>> +
>>>
>>> We should probably be able to get by with TWA_SIGNAL_NO_IPI here, no?
>>
>> Considering we didn't even wake before, I'd say that's a solid yes.
> 
> I'm not so sure. It'll work, but a naive approach would also lack
> IORING_SETUP_TASKRUN_FLAG and so mess with latencies when it's not
> desirable.
> 
> option 1)
> 
> method = TWA_SIGNAL;
> if (flags & IORING_SETUP_TASKRUN_FLAG)
>     method = NO_IPI;
> 
> option 2)
> 
> task_work_add(NO_IPI);
> atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
> 
> 
> Might be better to have one of those.

I like option 2, which should be fine.
diff mbox series

Patch

diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 525063ac3dab..24e6cc477515 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -16,6 +16,7 @@ 
 struct io_msg {
 	struct file			*file;
 	struct file			*src_file;
+	struct callback_head		tw;
 	u64 user_data;
 	u32 len;
 	u32 cmd;
@@ -35,6 +36,23 @@  void io_msg_ring_cleanup(struct io_kiocb *req)
 	msg->src_file = NULL;
 }
 
+static void io_msg_tw_complete(struct callback_head *head)
+{
+	struct io_msg *msg = container_of(head, struct io_msg, tw);
+	struct io_kiocb *req = cmd_to_io_kiocb(msg);
+	struct io_ring_ctx *target_ctx = req->file->private_data;
+	int ret = 0;
+
+	if (current->flags & PF_EXITING)
+		ret = -EOWNERDEAD;
+	else if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+		ret = -EOVERFLOW;
+
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_queue_tw_complete(req, ret);
+}
+
 static int io_msg_ring_data(struct io_kiocb *req)
 {
 	struct io_ring_ctx *target_ctx = req->file->private_data;
@@ -43,6 +61,15 @@  static int io_msg_ring_data(struct io_kiocb *req)
 	if (msg->src_fd || msg->dst_fd || msg->flags)
 		return -EINVAL;
 
+	if (target_ctx->task_complete && current != target_ctx->submitter_task) {
+		init_task_work(&msg->tw, io_msg_tw_complete);
+		if (task_work_add(target_ctx->submitter_task, &msg->tw,
+				  TWA_SIGNAL))
+			return -EOWNERDEAD;
+
+		return IOU_ISSUE_SKIP_COMPLETE;
+	}
+
 	if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
 		return 0;
 
@@ -124,6 +151,19 @@  static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
 	return ret;
 }
 
+static void io_msg_tw_fd_complete(struct callback_head *head)
+{
+	struct io_msg *msg = container_of(head, struct io_msg, tw);
+	struct io_kiocb *req = cmd_to_io_kiocb(msg);
+	int ret = -EOWNERDEAD;
+
+	if (!(current->flags & PF_EXITING))
+		ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED);
+	if (ret < 0)
+		req_set_fail(req);
+	io_req_queue_tw_complete(req, ret);
+}
+
 static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_ring_ctx *target_ctx = req->file->private_data;
@@ -140,6 +180,15 @@  static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
 		msg->src_file = src_file;
 		req->flags |= REQ_F_NEED_CLEANUP;
 	}
+
+	if (target_ctx->task_complete && current != target_ctx->submitter_task) {
+		init_task_work(&msg->tw, io_msg_tw_fd_complete);
+		if (task_work_add(target_ctx->submitter_task, &msg->tw,
+				  TWA_SIGNAL))
+			return -EOWNERDEAD;
+
+		return IOU_ISSUE_SKIP_COMPLETE;
+	}
 	return io_msg_install_complete(req, issue_flags);
 }
 
@@ -185,10 +234,11 @@  int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
 	}
 
 done:
-	if (ret == -EAGAIN)
-		return -EAGAIN;
-	if (ret < 0)
+	if (ret < 0) {
+		if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE)
+			return ret;
 		req_set_fail(req);
+	}
 	io_req_set_res(req, ret, 0);
 	return IOU_OK;
 }