Message ID | 4d76c7b28ed5d71b520de4482fbb7f660f21cd80.1670384893.git.asml.silence@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | CQ locking optimisation | expand |
On 12/6/22 8:53?PM, Pavel Begunkov wrote: > @@ -43,6 +61,15 @@ static int io_msg_ring_data(struct io_kiocb *req) > if (msg->src_fd || msg->dst_fd || msg->flags) > return -EINVAL; > > + if (target_ctx->task_complete && current != target_ctx->submitter_task) { > + init_task_work(&msg->tw, io_msg_tw_complete); > + if (task_work_add(target_ctx->submitter_task, &msg->tw, > + TWA_SIGNAL)) > + return -EOWNERDEAD; > + > + return IOU_ISSUE_SKIP_COMPLETE; > + } > + We should probably be able to get by with TWA_SIGNAL_NO_IPI here, no?
On 12/7/22 8:31 AM, Jens Axboe wrote: > On 12/6/22 8:53?PM, Pavel Begunkov wrote: >> @@ -43,6 +61,15 @@ static int io_msg_ring_data(struct io_kiocb *req) >> if (msg->src_fd || msg->dst_fd || msg->flags) >> return -EINVAL; >> >> + if (target_ctx->task_complete && current != target_ctx->submitter_task) { >> + init_task_work(&msg->tw, io_msg_tw_complete); >> + if (task_work_add(target_ctx->submitter_task, &msg->tw, >> + TWA_SIGNAL)) >> + return -EOWNERDEAD; >> + >> + return IOU_ISSUE_SKIP_COMPLETE; >> + } >> + > > We should probably be able to get by with TWA_SIGNAL_NO_IPI here, no? Considering we didn't even wake before, I'd say that's a solid yes.
On 12/7/22 15:51, Jens Axboe wrote: > On 12/7/22 8:31 AM, Jens Axboe wrote: >> On 12/6/22 8:53?PM, Pavel Begunkov wrote: >>> @@ -43,6 +61,15 @@ static int io_msg_ring_data(struct io_kiocb *req) >>> if (msg->src_fd || msg->dst_fd || msg->flags) >>> return -EINVAL; >>> >>> + if (target_ctx->task_complete && current != target_ctx->submitter_task) { >>> + init_task_work(&msg->tw, io_msg_tw_complete); >>> + if (task_work_add(target_ctx->submitter_task, &msg->tw, >>> + TWA_SIGNAL)) >>> + return -EOWNERDEAD; >>> + >>> + return IOU_ISSUE_SKIP_COMPLETE; >>> + } >>> + >> >> We should probably be able to get by with TWA_SIGNAL_NO_IPI here, no? > > Considering we didn't even wake before, I'd say that's a solid yes. I'm not so sure. It'll work, but a naive approach would also lack IORING_SETUP_TASKRUN_FLAG and so mess with latencies when it's not desirable. option 1) method = TWA_SIGNAL; if (flags & IORING_SETUP_TASKRUN_FLAG) method = NO_IPI; option 2) task_work_add(NO_IPI); atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); Might be better to have one of those.
On 12/7/22 2:18 PM, Pavel Begunkov wrote: > On 12/7/22 15:51, Jens Axboe wrote: >> On 12/7/22 8:31 AM, Jens Axboe wrote: >>> On 12/6/22 8:53?PM, Pavel Begunkov wrote: >>>> @@ -43,6 +61,15 @@ static int io_msg_ring_data(struct io_kiocb *req) >>>> if (msg->src_fd || msg->dst_fd || msg->flags) >>>> return -EINVAL; >>>> + if (target_ctx->task_complete && current != target_ctx->submitter_task) { >>>> + init_task_work(&msg->tw, io_msg_tw_complete); >>>> + if (task_work_add(target_ctx->submitter_task, &msg->tw, >>>> + TWA_SIGNAL)) >>>> + return -EOWNERDEAD; >>>> + >>>> + return IOU_ISSUE_SKIP_COMPLETE; >>>> + } >>>> + >>> >>> We should probably be able to get by with TWA_SIGNAL_NO_IPI here, no? >> >> Considering we didn't even wake before, I'd say that's a solid yes. > > I'm not so sure. It'll work, but a naive approach would also lack > IORING_SETUP_TASKRUN_FLAG and so mess with latencies when it's not > desirable. > > option 1) > > method = TWA_SIGNAL; > if (flags & IORING_SETUP_TASKRUN_FLAG) > method = NO_IPI; > > option 2) > > task_work_add(NO_IPI); > atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); > > > Might be better to have one of those. I like option 2, which should be fine.
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 525063ac3dab..24e6cc477515 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -16,6 +16,7 @@ struct io_msg { struct file *file; struct file *src_file; + struct callback_head tw; u64 user_data; u32 len; u32 cmd; @@ -35,6 +36,23 @@ void io_msg_ring_cleanup(struct io_kiocb *req) msg->src_file = NULL; } +static void io_msg_tw_complete(struct callback_head *head) +{ + struct io_msg *msg = container_of(head, struct io_msg, tw); + struct io_kiocb *req = cmd_to_io_kiocb(msg); + struct io_ring_ctx *target_ctx = req->file->private_data; + int ret = 0; + + if (current->flags & PF_EXITING) + ret = -EOWNERDEAD; + else if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) + ret = -EOVERFLOW; + + if (ret < 0) + req_set_fail(req); + io_req_queue_tw_complete(req, ret); +} + static int io_msg_ring_data(struct io_kiocb *req) { struct io_ring_ctx *target_ctx = req->file->private_data; @@ -43,6 +61,15 @@ static int io_msg_ring_data(struct io_kiocb *req) if (msg->src_fd || msg->dst_fd || msg->flags) return -EINVAL; + if (target_ctx->task_complete && current != target_ctx->submitter_task) { + init_task_work(&msg->tw, io_msg_tw_complete); + if (task_work_add(target_ctx->submitter_task, &msg->tw, + TWA_SIGNAL)) + return -EOWNERDEAD; + + return IOU_ISSUE_SKIP_COMPLETE; + } + if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0)) return 0; @@ -124,6 +151,19 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag return ret; } +static void io_msg_tw_fd_complete(struct callback_head *head) +{ + struct io_msg *msg = container_of(head, struct io_msg, tw); + struct io_kiocb *req = cmd_to_io_kiocb(msg); + int ret = -EOWNERDEAD; + + if (!(current->flags & PF_EXITING)) + ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED); + if (ret < 0) + req_set_fail(req); + io_req_queue_tw_complete(req, ret); +} + static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) { struct io_ring_ctx *target_ctx = req->file->private_data; @@ -140,6 +180,15 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags) msg->src_file = src_file; req->flags |= REQ_F_NEED_CLEANUP; } + + if (target_ctx->task_complete && current != target_ctx->submitter_task) { + init_task_work(&msg->tw, io_msg_tw_fd_complete); + if (task_work_add(target_ctx->submitter_task, &msg->tw, + TWA_SIGNAL)) + return -EOWNERDEAD; + + return IOU_ISSUE_SKIP_COMPLETE; + } return io_msg_install_complete(req, issue_flags); } @@ -185,10 +234,11 @@ int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) } done: - if (ret == -EAGAIN) - return -EAGAIN; - if (ret < 0) + if (ret < 0) { + if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE) + return ret; req_set_fail(req); + } io_req_set_res(req, ret, 0); return IOU_OK; }
While executing in a context of one io_uring instance msg_ring manipulates another ring. We're trying to keep CQEs posting contained in the context of the ring-owner task, use task_work to send the request to the target ring's task when we're modifying its CQ or trying to install a file. Note, we can't safely use io_uring task_work infra and have to use task_work directly. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> --- io_uring/msg_ring.c | 56 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 3 deletions(-)