diff mbox series

[v2,02/14] io_uring/cmd: fix tw <-> issue_flags conversion

Message ID c48a7f3919eecbee0319020fd640e6b3d2e60e5f.1710720150.git.asml.silence@gmail.com (mailing list archive)
State New, archived
Headers show
Series remove aux CQE caches | expand

Commit Message

Pavel Begunkov March 18, 2024, 12:41 a.m. UTC
!IO_URING_F_UNLOCKED does not translate to availability of the deferred
completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
pass and look for to use io_req_complete_defer() and other variants.

Luckily, it's not a real problem as two wrongs actually made it right,
at least as far as io_uring_cmd_work() goes.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/uring_cmd.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

Comments

Ming Lei March 18, 2024, 2:23 a.m. UTC | #1
On Mon, Mar 18, 2024 at 12:41:47AM +0000, Pavel Begunkov wrote:
> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
> pass and look for to use io_req_complete_defer() and other variants.
> 
> Luckily, it's not a real problem as two wrongs actually made it right,
> at least as far as io_uring_cmd_work() goes.
> 
> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  io_uring/uring_cmd.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
> index f197e8c22965..ec38a8d4836d 100644
> --- a/io_uring/uring_cmd.c
> +++ b/io_uring/uring_cmd.c
> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
>  static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
>  {
>  	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
> -	unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
> +	unsigned issue_flags = IO_URING_F_UNLOCKED;
> +
> +	/* locked task_work executor checks the deffered list completion */
> +	if (ts->locked)
> +		issue_flags = IO_URING_F_COMPLETE_DEFER;
>  
>  	ioucmd->task_work_cb(ioucmd, issue_flags);
>  }
> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
>  	if (req->ctx->flags & IORING_SETUP_IOPOLL) {
>  		/* order with io_iopoll_req_issued() checking ->iopoll_complete */
>  		smp_store_release(&req->iopoll_completed, 1);
> -	} else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
> +	} else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
> +		if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
> +			return;
>  		io_req_complete_defer(req);
>  	} else {
>  		req->io_task_work.func = io_req_task_complete;

'git-bisect' shows the reported warning starts from this patch.

Thanks,
Ming
Jens Axboe March 18, 2024, 2:25 a.m. UTC | #2
On 3/17/24 8:23 PM, Ming Lei wrote:
> On Mon, Mar 18, 2024 at 12:41:47AM +0000, Pavel Begunkov wrote:
>> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
>> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
>> pass and look for to use io_req_complete_defer() and other variants.
>>
>> Luckily, it's not a real problem as two wrongs actually made it right,
>> at least as far as io_uring_cmd_work() goes.
>>
>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>> Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>>  io_uring/uring_cmd.c | 10 ++++++++--
>>  1 file changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
>> index f197e8c22965..ec38a8d4836d 100644
>> --- a/io_uring/uring_cmd.c
>> +++ b/io_uring/uring_cmd.c
>> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
>>  static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
>>  {
>>  	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
>> -	unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
>> +	unsigned issue_flags = IO_URING_F_UNLOCKED;
>> +
>> +	/* locked task_work executor checks the deffered list completion */
>> +	if (ts->locked)
>> +		issue_flags = IO_URING_F_COMPLETE_DEFER;
>>  
>>  	ioucmd->task_work_cb(ioucmd, issue_flags);
>>  }
>> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
>>  	if (req->ctx->flags & IORING_SETUP_IOPOLL) {
>>  		/* order with io_iopoll_req_issued() checking ->iopoll_complete */
>>  		smp_store_release(&req->iopoll_completed, 1);
>> -	} else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
>> +	} else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
>> +		if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
>> +			return;
>>  		io_req_complete_defer(req);
>>  	} else {
>>  		req->io_task_work.func = io_req_task_complete;
> 
> 'git-bisect' shows the reported warning starts from this patch.

That does make sense, as probably:

+	/* locked task_work executor checks the deffered list completion */
+	if (ts->locked)
+		issue_flags = IO_URING_F_COMPLETE_DEFER;

this assumption isn't true, and that would mess with the task management
(which is in your oops).
Pavel Begunkov March 18, 2024, 2:32 a.m. UTC | #3
On 3/18/24 02:25, Jens Axboe wrote:
> On 3/17/24 8:23 PM, Ming Lei wrote:
>> On Mon, Mar 18, 2024 at 12:41:47AM +0000, Pavel Begunkov wrote:
>>> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
>>> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
>>> pass and look for to use io_req_complete_defer() and other variants.
>>>
>>> Luckily, it's not a real problem as two wrongs actually made it right,
>>> at least as far as io_uring_cmd_work() goes.
>>>
>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>> Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>

oops, I should've removed all the signed-offs

>>> ---
>>>   io_uring/uring_cmd.c | 10 ++++++++--
>>>   1 file changed, 8 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
>>> index f197e8c22965..ec38a8d4836d 100644
>>> --- a/io_uring/uring_cmd.c
>>> +++ b/io_uring/uring_cmd.c
>>> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
>>>   static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
>>>   {
>>>   	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
>>> -	unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
>>> +	unsigned issue_flags = IO_URING_F_UNLOCKED;
>>> +
>>> +	/* locked task_work executor checks the deffered list completion */
>>> +	if (ts->locked)
>>> +		issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>   
>>>   	ioucmd->task_work_cb(ioucmd, issue_flags);
>>>   }
>>> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
>>>   	if (req->ctx->flags & IORING_SETUP_IOPOLL) {
>>>   		/* order with io_iopoll_req_issued() checking ->iopoll_complete */
>>>   		smp_store_release(&req->iopoll_completed, 1);
>>> -	} else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
>>> +	} else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
>>> +		if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
>>> +			return;
>>>   		io_req_complete_defer(req);
>>>   	} else {
>>>   		req->io_task_work.func = io_req_task_complete;
>>
>> 'git-bisect' shows the reported warning starts from this patch.

Thanks Ming

> 
> That does make sense, as probably:
> 
> +	/* locked task_work executor checks the deffered list completion */
> +	if (ts->locked)
> +		issue_flags = IO_URING_F_COMPLETE_DEFER;
> 
> this assumption isn't true, and that would mess with the task management
> (which is in your oops).

I'm missing it, how it's not true?


static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
{
	...
	if (ts->locked) {
		io_submit_flush_completions(ctx);
		...
	}
}

static __cold void io_fallback_req_func(struct work_struct *work)
{
	...
	mutex_lock(&ctx->uring_lock);
	llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
		req->io_task_work.func(req, &ts);
	io_submit_flush_completions(ctx);
	mutex_unlock(&ctx->uring_lock);
	...
}
Jens Axboe March 18, 2024, 2:40 a.m. UTC | #4
On 3/17/24 8:32 PM, Pavel Begunkov wrote:
> On 3/18/24 02:25, Jens Axboe wrote:
>> On 3/17/24 8:23 PM, Ming Lei wrote:
>>> On Mon, Mar 18, 2024 at 12:41:47AM +0000, Pavel Begunkov wrote:
>>>> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
>>>> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
>>>> pass and look for to use io_req_complete_defer() and other variants.
>>>>
>>>> Luckily, it's not a real problem as two wrongs actually made it right,
>>>> at least as far as io_uring_cmd_work() goes.
>>>>
>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>> Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
>>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> 
> oops, I should've removed all the signed-offs
> 
>>>> ---
>>>>   io_uring/uring_cmd.c | 10 ++++++++--
>>>>   1 file changed, 8 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
>>>> index f197e8c22965..ec38a8d4836d 100644
>>>> --- a/io_uring/uring_cmd.c
>>>> +++ b/io_uring/uring_cmd.c
>>>> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
>>>>   static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
>>>>   {
>>>>       struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
>>>> -    unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
>>>> +    unsigned issue_flags = IO_URING_F_UNLOCKED;
>>>> +
>>>> +    /* locked task_work executor checks the deffered list completion */
>>>> +    if (ts->locked)
>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>>         ioucmd->task_work_cb(ioucmd, issue_flags);
>>>>   }
>>>> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
>>>>       if (req->ctx->flags & IORING_SETUP_IOPOLL) {
>>>>           /* order with io_iopoll_req_issued() checking ->iopoll_complete */
>>>>           smp_store_release(&req->iopoll_completed, 1);
>>>> -    } else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
>>>> +    } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
>>>> +        if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
>>>> +            return;
>>>>           io_req_complete_defer(req);
>>>>       } else {
>>>>           req->io_task_work.func = io_req_task_complete;
>>>
>>> 'git-bisect' shows the reported warning starts from this patch.
> 
> Thanks Ming
> 
>>
>> That does make sense, as probably:
>>
>> +    /* locked task_work executor checks the deffered list completion */
>> +    if (ts->locked)
>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>
>> this assumption isn't true, and that would mess with the task management
>> (which is in your oops).
> 
> I'm missing it, how it's not true?
> 
> 
> static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
> {
>     ...
>     if (ts->locked) {
>         io_submit_flush_completions(ctx);
>         ...
>     }
> }
> 
> static __cold void io_fallback_req_func(struct work_struct *work)
> {
>     ...
>     mutex_lock(&ctx->uring_lock);
>     llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
>         req->io_task_work.func(req, &ts);
>     io_submit_flush_completions(ctx);
>     mutex_unlock(&ctx->uring_lock);
>     ...
> }

I took a look too, and don't immediately see it. Those are also the two
only cases I found, and before the patches, looks fine too.

So no immediate answer there... But I can confirm that before this
patch, test passes fine. With the patch, it goes boom pretty quick.
Either directly off putting the task, or an unrelated memory crash
instead.
Pavel Begunkov March 18, 2024, 2:43 a.m. UTC | #5
On 3/18/24 02:40, Jens Axboe wrote:
> On 3/17/24 8:32 PM, Pavel Begunkov wrote:
>> On 3/18/24 02:25, Jens Axboe wrote:
>>> On 3/17/24 8:23 PM, Ming Lei wrote:
>>>> On Mon, Mar 18, 2024 at 12:41:47AM +0000, Pavel Begunkov wrote:
>>>>> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
>>>>> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
>>>>> pass and look for to use io_req_complete_defer() and other variants.
>>>>>
>>>>> Luckily, it's not a real problem as two wrongs actually made it right,
>>>>> at least as far as io_uring_cmd_work() goes.
>>>>>
>>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>>> Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
>>>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>
>> oops, I should've removed all the signed-offs
>>
>>>>> ---
>>>>>    io_uring/uring_cmd.c | 10 ++++++++--
>>>>>    1 file changed, 8 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
>>>>> index f197e8c22965..ec38a8d4836d 100644
>>>>> --- a/io_uring/uring_cmd.c
>>>>> +++ b/io_uring/uring_cmd.c
>>>>> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
>>>>>    static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
>>>>>    {
>>>>>        struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
>>>>> -    unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
>>>>> +    unsigned issue_flags = IO_URING_F_UNLOCKED;
>>>>> +
>>>>> +    /* locked task_work executor checks the deffered list completion */
>>>>> +    if (ts->locked)
>>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>>>          ioucmd->task_work_cb(ioucmd, issue_flags);
>>>>>    }
>>>>> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
>>>>>        if (req->ctx->flags & IORING_SETUP_IOPOLL) {
>>>>>            /* order with io_iopoll_req_issued() checking ->iopoll_complete */
>>>>>            smp_store_release(&req->iopoll_completed, 1);
>>>>> -    } else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
>>>>> +    } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
>>>>> +        if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
>>>>> +            return;
>>>>>            io_req_complete_defer(req);
>>>>>        } else {
>>>>>            req->io_task_work.func = io_req_task_complete;
>>>>
>>>> 'git-bisect' shows the reported warning starts from this patch.
>>
>> Thanks Ming
>>
>>>
>>> That does make sense, as probably:
>>>
>>> +    /* locked task_work executor checks the deffered list completion */
>>> +    if (ts->locked)
>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>
>>> this assumption isn't true, and that would mess with the task management
>>> (which is in your oops).
>>
>> I'm missing it, how it's not true?
>>
>>
>> static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
>> {
>>      ...
>>      if (ts->locked) {
>>          io_submit_flush_completions(ctx);
>>          ...
>>      }
>> }
>>
>> static __cold void io_fallback_req_func(struct work_struct *work)
>> {
>>      ...
>>      mutex_lock(&ctx->uring_lock);
>>      llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
>>          req->io_task_work.func(req, &ts);
>>      io_submit_flush_completions(ctx);
>>      mutex_unlock(&ctx->uring_lock);
>>      ...
>> }
> 
> I took a look too, and don't immediately see it. Those are also the two
> only cases I found, and before the patches, looks fine too.
> 
> So no immediate answer there... But I can confirm that before this
> patch, test passes fine. With the patch, it goes boom pretty quick.

Which test is that? ublk generic/004?

> Either directly off putting the task, or an unrelated memory crash
> instead.

If tw locks it should be checking for deferred completions,
that's the rule. Maybe there is some rouge conversion and locked
came not from task work executor... I'll need to stare more
closely
Jens Axboe March 18, 2024, 2:46 a.m. UTC | #6
On 3/17/24 8:43 PM, Pavel Begunkov wrote:
>>>>>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
>>>>>> index f197e8c22965..ec38a8d4836d 100644
>>>>>> --- a/io_uring/uring_cmd.c
>>>>>> +++ b/io_uring/uring_cmd.c
>>>>>> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
>>>>>>    static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
>>>>>>    {
>>>>>>        struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
>>>>>> -    unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
>>>>>> +    unsigned issue_flags = IO_URING_F_UNLOCKED;
>>>>>> +
>>>>>> +    /* locked task_work executor checks the deffered list completion */
>>>>>> +    if (ts->locked)
>>>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>>>>          ioucmd->task_work_cb(ioucmd, issue_flags);
>>>>>>    }
>>>>>> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
>>>>>>        if (req->ctx->flags & IORING_SETUP_IOPOLL) {
>>>>>>            /* order with io_iopoll_req_issued() checking ->iopoll_complete */
>>>>>>            smp_store_release(&req->iopoll_completed, 1);
>>>>>> -    } else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
>>>>>> +    } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
>>>>>> +        if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
>>>>>> +            return;
>>>>>>            io_req_complete_defer(req);
>>>>>>        } else {
>>>>>>            req->io_task_work.func = io_req_task_complete;
>>>>>
>>>>> 'git-bisect' shows the reported warning starts from this patch.
>>>
>>> Thanks Ming
>>>
>>>>
>>>> That does make sense, as probably:
>>>>
>>>> +    /* locked task_work executor checks the deffered list completion */
>>>> +    if (ts->locked)
>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>>
>>>> this assumption isn't true, and that would mess with the task management
>>>> (which is in your oops).
>>>
>>> I'm missing it, how it's not true?
>>>
>>>
>>> static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
>>> {
>>>      ...
>>>      if (ts->locked) {
>>>          io_submit_flush_completions(ctx);
>>>          ...
>>>      }
>>> }
>>>
>>> static __cold void io_fallback_req_func(struct work_struct *work)
>>> {
>>>      ...
>>>      mutex_lock(&ctx->uring_lock);
>>>      llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
>>>          req->io_task_work.func(req, &ts);
>>>      io_submit_flush_completions(ctx);
>>>      mutex_unlock(&ctx->uring_lock);
>>>      ...
>>> }
>>
>> I took a look too, and don't immediately see it. Those are also the two
>> only cases I found, and before the patches, looks fine too.
>>
>> So no immediate answer there... But I can confirm that before this
>> patch, test passes fine. With the patch, it goes boom pretty quick.
> 
> Which test is that? ublk generic/004?

Yep, it won't make it through one run of:

sudo make test T=generic/004

with it.

>> Either directly off putting the task, or an unrelated memory crash
>> instead.
> 
> If tw locks it should be checking for deferred completions,
> that's the rule. Maybe there is some rouge conversion and locked
> came not from task work executor... I'll need to stare more
> closely

Yeah not sure what it is, but I hope you can reproduce with the above.
Ming Lei March 18, 2024, 2:47 a.m. UTC | #7
On Sun, Mar 17, 2024 at 08:40:59PM -0600, Jens Axboe wrote:
> On 3/17/24 8:32 PM, Pavel Begunkov wrote:
> > On 3/18/24 02:25, Jens Axboe wrote:
> >> On 3/17/24 8:23 PM, Ming Lei wrote:
> >>> On Mon, Mar 18, 2024 at 12:41:47AM +0000, Pavel Begunkov wrote:
> >>>> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
> >>>> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
> >>>> pass and look for to use io_req_complete_defer() and other variants.
> >>>>
> >>>> Luckily, it's not a real problem as two wrongs actually made it right,
> >>>> at least as far as io_uring_cmd_work() goes.
> >>>>
> >>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> >>>> Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
> >>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> > 
> > oops, I should've removed all the signed-offs
> > 
> >>>> ---
> >>>>   io_uring/uring_cmd.c | 10 ++++++++--
> >>>>   1 file changed, 8 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
> >>>> index f197e8c22965..ec38a8d4836d 100644
> >>>> --- a/io_uring/uring_cmd.c
> >>>> +++ b/io_uring/uring_cmd.c
> >>>> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
> >>>>   static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
> >>>>   {
> >>>>       struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
> >>>> -    unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
> >>>> +    unsigned issue_flags = IO_URING_F_UNLOCKED;
> >>>> +
> >>>> +    /* locked task_work executor checks the deffered list completion */
> >>>> +    if (ts->locked)
> >>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
> >>>>         ioucmd->task_work_cb(ioucmd, issue_flags);
> >>>>   }
> >>>> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
> >>>>       if (req->ctx->flags & IORING_SETUP_IOPOLL) {
> >>>>           /* order with io_iopoll_req_issued() checking ->iopoll_complete */
> >>>>           smp_store_release(&req->iopoll_completed, 1);
> >>>> -    } else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
> >>>> +    } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
> >>>> +        if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
> >>>> +            return;
> >>>>           io_req_complete_defer(req);
> >>>>       } else {
> >>>>           req->io_task_work.func = io_req_task_complete;
> >>>
> >>> 'git-bisect' shows the reported warning starts from this patch.
> > 
> > Thanks Ming
> > 
> >>
> >> That does make sense, as probably:
> >>
> >> +    /* locked task_work executor checks the deffered list completion */
> >> +    if (ts->locked)
> >> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
> >>
> >> this assumption isn't true, and that would mess with the task management
> >> (which is in your oops).
> > 
> > I'm missing it, how it's not true?
> > 
> > 
> > static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
> > {
> >     ...
> >     if (ts->locked) {
> >         io_submit_flush_completions(ctx);
> >         ...
> >     }
> > }
> > 
> > static __cold void io_fallback_req_func(struct work_struct *work)
> > {
> >     ...
> >     mutex_lock(&ctx->uring_lock);
> >     llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
> >         req->io_task_work.func(req, &ts);
> >     io_submit_flush_completions(ctx);
> >     mutex_unlock(&ctx->uring_lock);
> >     ...
> > }
> 
> I took a look too, and don't immediately see it. Those are also the two
> only cases I found, and before the patches, looks fine too.
> 
> So no immediate answer there... But I can confirm that before this
> patch, test passes fine. With the patch, it goes boom pretty quick.
> Either directly off putting the task, or an unrelated memory crash
> instead.

In ublk, the translated 'issue_flags' is passed to io_uring_cmd_done()
from ioucmd->task_work_cb()(__ublk_rq_task_work()). That might be
related with the reason.


Thanks,
Ming
Jens Axboe March 18, 2024, 3:11 a.m. UTC | #8
On 3/17/24 8:47 PM, Ming Lei wrote:
> On Sun, Mar 17, 2024 at 08:40:59PM -0600, Jens Axboe wrote:
>> On 3/17/24 8:32 PM, Pavel Begunkov wrote:
>>> On 3/18/24 02:25, Jens Axboe wrote:
>>>> On 3/17/24 8:23 PM, Ming Lei wrote:
>>>>> On Mon, Mar 18, 2024 at 12:41:47AM +0000, Pavel Begunkov wrote:
>>>>>> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
>>>>>> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
>>>>>> pass and look for to use io_req_complete_defer() and other variants.
>>>>>>
>>>>>> Luckily, it's not a real problem as two wrongs actually made it right,
>>>>>> at least as far as io_uring_cmd_work() goes.
>>>>>>
>>>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>>>> Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
>>>>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>>
>>> oops, I should've removed all the signed-offs
>>>
>>>>>> ---
>>>>>>   io_uring/uring_cmd.c | 10 ++++++++--
>>>>>>   1 file changed, 8 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
>>>>>> index f197e8c22965..ec38a8d4836d 100644
>>>>>> --- a/io_uring/uring_cmd.c
>>>>>> +++ b/io_uring/uring_cmd.c
>>>>>> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
>>>>>>   static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
>>>>>>   {
>>>>>>       struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
>>>>>> -    unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
>>>>>> +    unsigned issue_flags = IO_URING_F_UNLOCKED;
>>>>>> +
>>>>>> +    /* locked task_work executor checks the deffered list completion */
>>>>>> +    if (ts->locked)
>>>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>>>>         ioucmd->task_work_cb(ioucmd, issue_flags);
>>>>>>   }
>>>>>> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
>>>>>>       if (req->ctx->flags & IORING_SETUP_IOPOLL) {
>>>>>>           /* order with io_iopoll_req_issued() checking ->iopoll_complete */
>>>>>>           smp_store_release(&req->iopoll_completed, 1);
>>>>>> -    } else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
>>>>>> +    } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
>>>>>> +        if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
>>>>>> +            return;
>>>>>>           io_req_complete_defer(req);
>>>>>>       } else {
>>>>>>           req->io_task_work.func = io_req_task_complete;
>>>>>
>>>>> 'git-bisect' shows the reported warning starts from this patch.
>>>
>>> Thanks Ming
>>>
>>>>
>>>> That does make sense, as probably:
>>>>
>>>> +    /* locked task_work executor checks the deffered list completion */
>>>> +    if (ts->locked)
>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>>
>>>> this assumption isn't true, and that would mess with the task management
>>>> (which is in your oops).
>>>
>>> I'm missing it, how it's not true?
>>>
>>>
>>> static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
>>> {
>>>     ...
>>>     if (ts->locked) {
>>>         io_submit_flush_completions(ctx);
>>>         ...
>>>     }
>>> }
>>>
>>> static __cold void io_fallback_req_func(struct work_struct *work)
>>> {
>>>     ...
>>>     mutex_lock(&ctx->uring_lock);
>>>     llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
>>>         req->io_task_work.func(req, &ts);
>>>     io_submit_flush_completions(ctx);
>>>     mutex_unlock(&ctx->uring_lock);
>>>     ...
>>> }
>>
>> I took a look too, and don't immediately see it. Those are also the two
>> only cases I found, and before the patches, looks fine too.
>>
>> So no immediate answer there... But I can confirm that before this
>> patch, test passes fine. With the patch, it goes boom pretty quick.
>> Either directly off putting the task, or an unrelated memory crash
>> instead.
> 
> In ublk, the translated 'issue_flags' is passed to io_uring_cmd_done()
> from ioucmd->task_work_cb()(__ublk_rq_task_work()). That might be
> related with the reason.

Or maybe ublk is doing multiple invocations of task_work completions? I
added this:

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index a2cb8da3cc33..ba7641b380a9 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -739,6 +739,7 @@ static void io_put_task_remote(struct task_struct *task)
 {
        struct io_uring_task *tctx = task->io_uring;
 
+       WARN_ON_ONCE(!percpu_counter_read(&tctx->inflight));
        percpu_counter_sub(&tctx->inflight, 1);
        if (unlikely(atomic_read(&tctx->in_cancel)))
                wake_up(&tctx->wait);

and hit this:

[   77.386845] ------------[ cut here ]------------
[   77.387128] WARNING: CPU: 5 PID: 109 at io_uring/io_uring.c:742
io_put_task_remote+0x164/0x1a8
[   77.387608] Modules linked in:
[   77.387784] CPU: 5 PID: 109 Comm: kworker/5:1 Not tainted
6.8.0-11436-g340741d86a53-dirty #5750
[   77.388277] Hardware name: linux,dummy-virt (DT)
[   77.388601] Workqueue: events io_fallback_req_func
[   77.388930] pstate: 81400005 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS
BTYPE=--)
[   77.389402] pc : io_put_task_remote+0x164/0x1a8
[   77.389711] lr : __io_submit_flush_completions+0x8b8/0x1308
[   77.390087] sp : ffff800087327a60
[   77.390317] x29: ffff800087327a60 x28: 1fffe0002040b329 x27:
1fffe0002040b32f
[   77.390817] x26: ffff000103c4e900 x25: ffff000102059900 x24:
ffff000104670000
[   77.391314] x23: ffff0000d2195000 x22: 00000000002ce20c x21:
ffff0000ced4fcc8
[   77.391787] x20: ffff0000ced4fc00 x19: ffff000103c4e900 x18:
0000000000000000
[   77.392209] x17: ffff8000814b0c34 x16: ffff8000814affac x15:
ffff8000814ac4a8
[   77.392633] x14: ffff80008069327c x13: ffff800080018c9c x12:
ffff600020789d26
[   77.393055] x11: 1fffe00020789d25 x10: ffff600020789d25 x9 :
dfff800000000000
[   77.393479] x8 : 00009fffdf8762db x7 : ffff000103c4e92b x6 :
0000000000000001
[   77.393904] x5 : ffff000103c4e928 x4 : ffff600020789d26 x3 :
1fffe0001a432a7a
[   77.394334] x2 : 1fffe00019da9f9a x1 : 0000000000000000 x0 :
0000000000000000
[   77.394761] Call trace:
[   77.394913]  io_put_task_remote+0x164/0x1a8
[   77.395168]  __io_submit_flush_completions+0x8b8/0x1308
[   77.395481]  io_fallback_req_func+0x138/0x1e8
[   77.395742]  process_one_work+0x538/0x1048
[   77.395992]  worker_thread+0x760/0xbd4
[   77.396221]  kthread+0x2dc/0x368
[   77.396417]  ret_from_fork+0x10/0x20
[   77.396634] ---[ end trace 0000000000000000 ]---
[   77.397706] ------------[ cut here ]------------

which is showing either an imbalance in the task references, or multiple
completions from the same io_uring request.

Anyway, I'll pop back in tomrrow, but hopefully the above is somewhat
useful at least. I'd suspect the __ublk_rq_task_work() abort check for
current != ubq->ubq_daemon and what happens off that.
Pavel Begunkov March 18, 2024, 3:24 a.m. UTC | #9
On 3/18/24 03:11, Jens Axboe wrote:
> On 3/17/24 8:47 PM, Ming Lei wrote:
>> On Sun, Mar 17, 2024 at 08:40:59PM -0600, Jens Axboe wrote:
>>> On 3/17/24 8:32 PM, Pavel Begunkov wrote:
>>>> On 3/18/24 02:25, Jens Axboe wrote:
>>>>> On 3/17/24 8:23 PM, Ming Lei wrote:
>>>>>> On Mon, Mar 18, 2024 at 12:41:47AM +0000, Pavel Begunkov wrote:
>>>>>>> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
>>>>>>> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
>>>>>>> pass and look for to use io_req_complete_defer() and other variants.
>>>>>>>
>>>>>>> Luckily, it's not a real problem as two wrongs actually made it right,
>>>>>>> at least as far as io_uring_cmd_work() goes.
>>>>>>>
>>>>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>>>>> Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
>>>>>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>>>
>>>> oops, I should've removed all the signed-offs
>>>>
>>>>>>> ---
>>>>>>>    io_uring/uring_cmd.c | 10 ++++++++--
>>>>>>>    1 file changed, 8 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
>>>>>>> index f197e8c22965..ec38a8d4836d 100644
>>>>>>> --- a/io_uring/uring_cmd.c
>>>>>>> +++ b/io_uring/uring_cmd.c
>>>>>>> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
>>>>>>>    static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
>>>>>>>    {
>>>>>>>        struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
>>>>>>> -    unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
>>>>>>> +    unsigned issue_flags = IO_URING_F_UNLOCKED;
>>>>>>> +
>>>>>>> +    /* locked task_work executor checks the deffered list completion */
>>>>>>> +    if (ts->locked)
>>>>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>>>>>          ioucmd->task_work_cb(ioucmd, issue_flags);
>>>>>>>    }
>>>>>>> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
>>>>>>>        if (req->ctx->flags & IORING_SETUP_IOPOLL) {
>>>>>>>            /* order with io_iopoll_req_issued() checking ->iopoll_complete */
>>>>>>>            smp_store_release(&req->iopoll_completed, 1);
>>>>>>> -    } else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
>>>>>>> +    } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
>>>>>>> +        if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
>>>>>>> +            return;
>>>>>>>            io_req_complete_defer(req);
>>>>>>>        } else {
>>>>>>>            req->io_task_work.func = io_req_task_complete;
>>>>>>
>>>>>> 'git-bisect' shows the reported warning starts from this patch.
>>>>
>>>> Thanks Ming
>>>>
>>>>>
>>>>> That does make sense, as probably:
>>>>>
>>>>> +    /* locked task_work executor checks the deffered list completion */
>>>>> +    if (ts->locked)
>>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>>>
>>>>> this assumption isn't true, and that would mess with the task management
>>>>> (which is in your oops).
>>>>
>>>> I'm missing it, how it's not true?
>>>>
>>>>
>>>> static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
>>>> {
>>>>      ...
>>>>      if (ts->locked) {
>>>>          io_submit_flush_completions(ctx);
>>>>          ...
>>>>      }
>>>> }
>>>>
>>>> static __cold void io_fallback_req_func(struct work_struct *work)
>>>> {
>>>>      ...
>>>>      mutex_lock(&ctx->uring_lock);
>>>>      llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
>>>>          req->io_task_work.func(req, &ts);
>>>>      io_submit_flush_completions(ctx);
>>>>      mutex_unlock(&ctx->uring_lock);
>>>>      ...
>>>> }
>>>
>>> I took a look too, and don't immediately see it. Those are also the two
>>> only cases I found, and before the patches, looks fine too.
>>>
>>> So no immediate answer there... But I can confirm that before this
>>> patch, test passes fine. With the patch, it goes boom pretty quick.
>>> Either directly off putting the task, or an unrelated memory crash
>>> instead.
>>
>> In ublk, the translated 'issue_flags' is passed to io_uring_cmd_done()
>> from ioucmd->task_work_cb()(__ublk_rq_task_work()). That might be
>> related with the reason.
> 
> Or maybe ublk is doing multiple invocations of task_work completions? I
> added this:
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index a2cb8da3cc33..ba7641b380a9 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -739,6 +739,7 @@ static void io_put_task_remote(struct task_struct *task)
>   {
>          struct io_uring_task *tctx = task->io_uring;
>   
> +       WARN_ON_ONCE(!percpu_counter_read(&tctx->inflight));
>          percpu_counter_sub(&tctx->inflight, 1);
>          if (unlikely(atomic_read(&tctx->in_cancel)))
>                  wake_up(&tctx->wait);
> 
> and hit this:
> 
> [   77.386845] ------------[ cut here ]------------
> [   77.387128] WARNING: CPU: 5 PID: 109 at io_uring/io_uring.c:742
> io_put_task_remote+0x164/0x1a8
> [   77.387608] Modules linked in:
> [   77.387784] CPU: 5 PID: 109 Comm: kworker/5:1 Not tainted
> 6.8.0-11436-g340741d86a53-dirty #5750
> [   77.388277] Hardware name: linux,dummy-virt (DT)
> [   77.388601] Workqueue: events io_fallback_req_func
> [   77.388930] pstate: 81400005 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS
> BTYPE=--)
> [   77.389402] pc : io_put_task_remote+0x164/0x1a8
> [   77.389711] lr : __io_submit_flush_completions+0x8b8/0x1308
> [   77.390087] sp : ffff800087327a60
> [   77.390317] x29: ffff800087327a60 x28: 1fffe0002040b329 x27:
> 1fffe0002040b32f
> [   77.390817] x26: ffff000103c4e900 x25: ffff000102059900 x24:
> ffff000104670000
> [   77.391314] x23: ffff0000d2195000 x22: 00000000002ce20c x21:
> ffff0000ced4fcc8
> [   77.391787] x20: ffff0000ced4fc00 x19: ffff000103c4e900 x18:
> 0000000000000000
> [   77.392209] x17: ffff8000814b0c34 x16: ffff8000814affac x15:
> ffff8000814ac4a8
> [   77.392633] x14: ffff80008069327c x13: ffff800080018c9c x12:
> ffff600020789d26
> [   77.393055] x11: 1fffe00020789d25 x10: ffff600020789d25 x9 :
> dfff800000000000
> [   77.393479] x8 : 00009fffdf8762db x7 : ffff000103c4e92b x6 :
> 0000000000000001
> [   77.393904] x5 : ffff000103c4e928 x4 : ffff600020789d26 x3 :
> 1fffe0001a432a7a
> [   77.394334] x2 : 1fffe00019da9f9a x1 : 0000000000000000 x0 :
> 0000000000000000
> [   77.394761] Call trace:
> [   77.394913]  io_put_task_remote+0x164/0x1a8
> [   77.395168]  __io_submit_flush_completions+0x8b8/0x1308
> [   77.395481]  io_fallback_req_func+0x138/0x1e8
> [   77.395742]  process_one_work+0x538/0x1048
> [   77.395992]  worker_thread+0x760/0xbd4
> [   77.396221]  kthread+0x2dc/0x368
> [   77.396417]  ret_from_fork+0x10/0x20
> [   77.396634] ---[ end trace 0000000000000000 ]---
> [   77.397706] ------------[ cut here ]------------
> 
> which is showing either an imbalance in the task references, or multiple
> completions from the same io_uring request.
> 
> Anyway, I'll pop back in tomrrow, but hopefully the above is somewhat
> useful at least. I'd suspect the __ublk_rq_task_work() abort check for
> current != ubq->ubq_daemon and what happens off that.

We can enable refcounting for all requests, then it should trigger
on double free. i.e. adding io_req_set_refcount(req) somewhere in
io_init_req().
Ming Lei March 18, 2024, 6:59 a.m. UTC | #10
On Sun, Mar 17, 2024 at 09:11:27PM -0600, Jens Axboe wrote:
> On 3/17/24 8:47 PM, Ming Lei wrote:
> > On Sun, Mar 17, 2024 at 08:40:59PM -0600, Jens Axboe wrote:
> >> On 3/17/24 8:32 PM, Pavel Begunkov wrote:
> >>> On 3/18/24 02:25, Jens Axboe wrote:
> >>>> On 3/17/24 8:23 PM, Ming Lei wrote:
> >>>>> On Mon, Mar 18, 2024 at 12:41:47AM +0000, Pavel Begunkov wrote:
> >>>>>> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
> >>>>>> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
> >>>>>> pass and look for to use io_req_complete_defer() and other variants.
> >>>>>>
> >>>>>> Luckily, it's not a real problem as two wrongs actually made it right,
> >>>>>> at least as far as io_uring_cmd_work() goes.
> >>>>>>
> >>>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
> >>>>>> Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
> >>>>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> >>>
> >>> oops, I should've removed all the signed-offs
> >>>
> >>>>>> ---
> >>>>>>   io_uring/uring_cmd.c | 10 ++++++++--
> >>>>>>   1 file changed, 8 insertions(+), 2 deletions(-)
> >>>>>>
> >>>>>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
> >>>>>> index f197e8c22965..ec38a8d4836d 100644
> >>>>>> --- a/io_uring/uring_cmd.c
> >>>>>> +++ b/io_uring/uring_cmd.c
> >>>>>> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
> >>>>>>   static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
> >>>>>>   {
> >>>>>>       struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
> >>>>>> -    unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
> >>>>>> +    unsigned issue_flags = IO_URING_F_UNLOCKED;
> >>>>>> +
> >>>>>> +    /* locked task_work executor checks the deffered list completion */
> >>>>>> +    if (ts->locked)
> >>>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
> >>>>>>         ioucmd->task_work_cb(ioucmd, issue_flags);
> >>>>>>   }
> >>>>>> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
> >>>>>>       if (req->ctx->flags & IORING_SETUP_IOPOLL) {
> >>>>>>           /* order with io_iopoll_req_issued() checking ->iopoll_complete */
> >>>>>>           smp_store_release(&req->iopoll_completed, 1);
> >>>>>> -    } else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
> >>>>>> +    } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
> >>>>>> +        if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
> >>>>>> +            return;
> >>>>>>           io_req_complete_defer(req);
> >>>>>>       } else {
> >>>>>>           req->io_task_work.func = io_req_task_complete;
> >>>>>
> >>>>> 'git-bisect' shows the reported warning starts from this patch.
> >>>
> >>> Thanks Ming
> >>>
> >>>>
> >>>> That does make sense, as probably:
> >>>>
> >>>> +    /* locked task_work executor checks the deffered list completion */
> >>>> +    if (ts->locked)
> >>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
> >>>>
> >>>> this assumption isn't true, and that would mess with the task management
> >>>> (which is in your oops).
> >>>
> >>> I'm missing it, how it's not true?
> >>>
> >>>
> >>> static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
> >>> {
> >>>     ...
> >>>     if (ts->locked) {
> >>>         io_submit_flush_completions(ctx);
> >>>         ...
> >>>     }
> >>> }
> >>>
> >>> static __cold void io_fallback_req_func(struct work_struct *work)
> >>> {
> >>>     ...
> >>>     mutex_lock(&ctx->uring_lock);
> >>>     llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
> >>>         req->io_task_work.func(req, &ts);
> >>>     io_submit_flush_completions(ctx);
> >>>     mutex_unlock(&ctx->uring_lock);
> >>>     ...
> >>> }
> >>
> >> I took a look too, and don't immediately see it. Those are also the two
> >> only cases I found, and before the patches, looks fine too.
> >>
> >> So no immediate answer there... But I can confirm that before this
> >> patch, test passes fine. With the patch, it goes boom pretty quick.
> >> Either directly off putting the task, or an unrelated memory crash
> >> instead.
> > 
> > In ublk, the translated 'issue_flags' is passed to io_uring_cmd_done()
> > from ioucmd->task_work_cb()(__ublk_rq_task_work()). That might be
> > related with the reason.
> 
> Or maybe ublk is doing multiple invocations of task_work completions? I
> added this:

Yes, your debug log & point does help.

This patch convert zero flag(!IO_URING_F_UNLOCKED) into IO_URING_F_COMPLETE_DEFER,
and somewhere is easily ignored, and follows the fix, which need to be
folded into patch 2.

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5d4b448fdc50..22f2b52390a9 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3259,7 +3259,8 @@ static bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
                        /* ->sqe isn't available if no async data */
                        if (!req_has_async_data(req))
                                cmd->sqe = NULL;
-                       file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL);
+                       file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL |
+                                       IO_URING_F_COMPLETE_DEFER);
                        ret = true;
                }
        }


Thanks,
Ming
Pavel Begunkov March 18, 2024, 11:45 a.m. UTC | #11
On 3/18/24 06:59, Ming Lei wrote:
> On Sun, Mar 17, 2024 at 09:11:27PM -0600, Jens Axboe wrote:
>> On 3/17/24 8:47 PM, Ming Lei wrote:
>>> On Sun, Mar 17, 2024 at 08:40:59PM -0600, Jens Axboe wrote:
>>>> On 3/17/24 8:32 PM, Pavel Begunkov wrote:
>>>>> On 3/18/24 02:25, Jens Axboe wrote:
>>>>>> On 3/17/24 8:23 PM, Ming Lei wrote:
>>>>>>> On Mon, Mar 18, 2024 at 12:41:47AM +0000, Pavel Begunkov wrote:
>>>>>>>> !IO_URING_F_UNLOCKED does not translate to availability of the deferred
>>>>>>>> completion infra, IO_URING_F_COMPLETE_DEFER does, that what we should
>>>>>>>> pass and look for to use io_req_complete_defer() and other variants.
>>>>>>>>
>>>>>>>> Luckily, it's not a real problem as two wrongs actually made it right,
>>>>>>>> at least as far as io_uring_cmd_work() goes.
>>>>>>>>
>>>>>>>> Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
>>>>>>>> Link: https://lore.kernel.org/r/eb08e72e837106963bc7bc7dccfd93d646cc7f36.1710514702.git.asml.silence@gmail.com
>>>>>>>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>>>>>
>>>>> oops, I should've removed all the signed-offs
>>>>>
>>>>>>>> ---
>>>>>>>>    io_uring/uring_cmd.c | 10 ++++++++--
>>>>>>>>    1 file changed, 8 insertions(+), 2 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
>>>>>>>> index f197e8c22965..ec38a8d4836d 100644
>>>>>>>> --- a/io_uring/uring_cmd.c
>>>>>>>> +++ b/io_uring/uring_cmd.c
>>>>>>>> @@ -56,7 +56,11 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
>>>>>>>>    static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
>>>>>>>>    {
>>>>>>>>        struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
>>>>>>>> -    unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
>>>>>>>> +    unsigned issue_flags = IO_URING_F_UNLOCKED;
>>>>>>>> +
>>>>>>>> +    /* locked task_work executor checks the deffered list completion */
>>>>>>>> +    if (ts->locked)
>>>>>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>>>>>>          ioucmd->task_work_cb(ioucmd, issue_flags);
>>>>>>>>    }
>>>>>>>> @@ -100,7 +104,9 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
>>>>>>>>        if (req->ctx->flags & IORING_SETUP_IOPOLL) {
>>>>>>>>            /* order with io_iopoll_req_issued() checking ->iopoll_complete */
>>>>>>>>            smp_store_release(&req->iopoll_completed, 1);
>>>>>>>> -    } else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
>>>>>>>> +    } else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
>>>>>>>> +        if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
>>>>>>>> +            return;
>>>>>>>>            io_req_complete_defer(req);
>>>>>>>>        } else {
>>>>>>>>            req->io_task_work.func = io_req_task_complete;
>>>>>>>
>>>>>>> 'git-bisect' shows the reported warning starts from this patch.
>>>>>
>>>>> Thanks Ming
>>>>>
>>>>>>
>>>>>> That does make sense, as probably:
>>>>>>
>>>>>> +    /* locked task_work executor checks the deffered list completion */
>>>>>> +    if (ts->locked)
>>>>>> +        issue_flags = IO_URING_F_COMPLETE_DEFER;
>>>>>>
>>>>>> this assumption isn't true, and that would mess with the task management
>>>>>> (which is in your oops).
>>>>>
>>>>> I'm missing it, how it's not true?
>>>>>
>>>>>
>>>>> static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
>>>>> {
>>>>>      ...
>>>>>      if (ts->locked) {
>>>>>          io_submit_flush_completions(ctx);
>>>>>          ...
>>>>>      }
>>>>> }
>>>>>
>>>>> static __cold void io_fallback_req_func(struct work_struct *work)
>>>>> {
>>>>>      ...
>>>>>      mutex_lock(&ctx->uring_lock);
>>>>>      llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
>>>>>          req->io_task_work.func(req, &ts);
>>>>>      io_submit_flush_completions(ctx);
>>>>>      mutex_unlock(&ctx->uring_lock);
>>>>>      ...
>>>>> }
>>>>
>>>> I took a look too, and don't immediately see it. Those are also the two
>>>> only cases I found, and before the patches, looks fine too.
>>>>
>>>> So no immediate answer there... But I can confirm that before this
>>>> patch, test passes fine. With the patch, it goes boom pretty quick.
>>>> Either directly off putting the task, or an unrelated memory crash
>>>> instead.
>>>
>>> In ublk, the translated 'issue_flags' is passed to io_uring_cmd_done()
>>> from ioucmd->task_work_cb()(__ublk_rq_task_work()). That might be
>>> related with the reason.
>>
>> Or maybe ublk is doing multiple invocations of task_work completions? I
>> added this:
> 
> Yes, your debug log & point does help.
> 
> This patch convert zero flag(!IO_URING_F_UNLOCKED) into IO_URING_F_COMPLETE_DEFER,
> and somewhere is easily ignored, and follows the fix, which need to be
> folded into patch 2.

Thanks, was totally unaware of this chunk. A side note, it's
better to be moved to uring_cmd, i'll do the change


> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 5d4b448fdc50..22f2b52390a9 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -3259,7 +3259,8 @@ static bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
>                          /* ->sqe isn't available if no async data */
>                          if (!req_has_async_data(req))
>                                  cmd->sqe = NULL;
> -                       file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL);
> +                       file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL |
> +                                       IO_URING_F_COMPLETE_DEFER);
>                          ret = true;
>                  }
>          }
diff mbox series

Patch

diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index f197e8c22965..ec38a8d4836d 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -56,7 +56,11 @@  EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
 static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-	unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
+	unsigned issue_flags = IO_URING_F_UNLOCKED;
+
+	/* locked task_work executor checks the deffered list completion */
+	if (ts->locked)
+		issue_flags = IO_URING_F_COMPLETE_DEFER;
 
 	ioucmd->task_work_cb(ioucmd, issue_flags);
 }
@@ -100,7 +104,9 @@  void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
 	if (req->ctx->flags & IORING_SETUP_IOPOLL) {
 		/* order with io_iopoll_req_issued() checking ->iopoll_complete */
 		smp_store_release(&req->iopoll_completed, 1);
-	} else if (!(issue_flags & IO_URING_F_UNLOCKED)) {
+	} else if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
+		if (WARN_ON_ONCE(issue_flags & IO_URING_F_UNLOCKED))
+			return;
 		io_req_complete_defer(req);
 	} else {
 		req->io_task_work.func = io_req_task_complete;