diff mbox series

[-next,v3,3/6] nbd: don't clear 'NBD_CMD_INFLIGHT' flag if request is not completed

Message ID 20220521073749.3146892-4-yukuai3@huawei.com (mailing list archive)
State New, archived
Headers show
Series nbd: bugfix and cleanup patches | expand

Commit Message

Yu Kuai May 21, 2022, 7:37 a.m. UTC
Otherwise io will hung because request will only be completed if the
cmd has the flag 'NBD_CMD_INFLIGHT'.

Fixes: 07175cb1baf4 ("nbd: make sure request completion won't concurrent")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/block/nbd.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

Comments

Josef Bacik May 23, 2022, 2:12 p.m. UTC | #1
On Sat, May 21, 2022 at 03:37:46PM +0800, Yu Kuai wrote:
> Otherwise io will hung because request will only be completed if the
> cmd has the flag 'NBD_CMD_INFLIGHT'.
> 
> Fixes: 07175cb1baf4 ("nbd: make sure request completion won't concurrent")
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/block/nbd.c | 18 ++++++++++++++----
>  1 file changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 2ee1e376d5c4..a0d0910dae2a 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -403,13 +403,14 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
>  	if (!mutex_trylock(&cmd->lock))
>  		return BLK_EH_RESET_TIMER;
>  
> -	if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
> +	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>  		mutex_unlock(&cmd->lock);
>  		return BLK_EH_DONE;
>  	}
>  
>  	if (!refcount_inc_not_zero(&nbd->config_refs)) {
>  		cmd->status = BLK_STS_TIMEOUT;
> +		__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
>  		mutex_unlock(&cmd->lock);
>  		goto done;
>  	}
> @@ -478,6 +479,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
>  	dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
>  	set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
>  	cmd->status = BLK_STS_IOERR;
> +	__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
>  	mutex_unlock(&cmd->lock);
>  	sock_shutdown(nbd);
>  	nbd_config_put(nbd);
> @@ -745,7 +747,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
>  	cmd = blk_mq_rq_to_pdu(req);
>  
>  	mutex_lock(&cmd->lock);
> -	if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
> +	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>  		dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
>  			tag, cmd->status, cmd->flags);
>  		ret = -ENOENT;
> @@ -854,8 +856,16 @@ static void recv_work(struct work_struct *work)
>  		}
>  
>  		rq = blk_mq_rq_from_pdu(cmd);
> -		if (likely(!blk_should_fake_timeout(rq->q)))
> -			blk_mq_complete_request(rq);
> +		if (likely(!blk_should_fake_timeout(rq->q))) {
> +			bool complete;
> +
> +			mutex_lock(&cmd->lock);
> +			complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
> +							&cmd->flags);
> +			mutex_unlock(&cmd->lock);
> +			if (complete)
> +				blk_mq_complete_request(rq);
> +		}

I'd rather this be handled in nbd_handle_reply.  We should return with it
cleared if it's ready to be completed.  Thanks,

Josef
Yu Kuai May 24, 2022, 1:07 a.m. UTC | #2
在 2022/05/23 22:12, Josef Bacik 写道:
> On Sat, May 21, 2022 at 03:37:46PM +0800, Yu Kuai wrote:
>> Otherwise io will hung because request will only be completed if the
>> cmd has the flag 'NBD_CMD_INFLIGHT'.
>>
>> Fixes: 07175cb1baf4 ("nbd: make sure request completion won't concurrent")
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/block/nbd.c | 18 ++++++++++++++----
>>   1 file changed, 14 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>> index 2ee1e376d5c4..a0d0910dae2a 100644
>> --- a/drivers/block/nbd.c
>> +++ b/drivers/block/nbd.c
>> @@ -403,13 +403,14 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
>>   	if (!mutex_trylock(&cmd->lock))
>>   		return BLK_EH_RESET_TIMER;
>>   
>> -	if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>> +	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>>   		mutex_unlock(&cmd->lock);
>>   		return BLK_EH_DONE;
>>   	}
>>   
>>   	if (!refcount_inc_not_zero(&nbd->config_refs)) {
>>   		cmd->status = BLK_STS_TIMEOUT;
>> +		__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
>>   		mutex_unlock(&cmd->lock);
>>   		goto done;
>>   	}
>> @@ -478,6 +479,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
>>   	dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
>>   	set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
>>   	cmd->status = BLK_STS_IOERR;
>> +	__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
>>   	mutex_unlock(&cmd->lock);
>>   	sock_shutdown(nbd);
>>   	nbd_config_put(nbd);
>> @@ -745,7 +747,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
>>   	cmd = blk_mq_rq_to_pdu(req);
>>   
>>   	mutex_lock(&cmd->lock);
>> -	if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>> +	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>>   		dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
>>   			tag, cmd->status, cmd->flags);
>>   		ret = -ENOENT;
>> @@ -854,8 +856,16 @@ static void recv_work(struct work_struct *work)
>>   		}
>>   
>>   		rq = blk_mq_rq_from_pdu(cmd);
>> -		if (likely(!blk_should_fake_timeout(rq->q)))
>> -			blk_mq_complete_request(rq);
>> +		if (likely(!blk_should_fake_timeout(rq->q))) {
>> +			bool complete;
>> +
>> +			mutex_lock(&cmd->lock);
>> +			complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
>> +							&cmd->flags);
>> +			mutex_unlock(&cmd->lock);
>> +			if (complete)
>> +				blk_mq_complete_request(rq);
>> +		}
> 
> I'd rather this be handled in nbd_handle_reply.  We should return with it
> cleared if it's ready to be completed.  Thanks,
Hi,

Thanks for your advice, I'll do that in next version. I'll still have to
hold the lock to set the bit again in case blk_should_fake_timeout()
pass...

Thanks,
Kuai
> 
> Josef
> .
>
Yu Kuai May 24, 2022, 1:51 a.m. UTC | #3
在 2022/05/24 9:07, Yu Kuai 写道:
> 在 2022/05/23 22:12, Josef Bacik 写道:
>> On Sat, May 21, 2022 at 03:37:46PM +0800, Yu Kuai wrote:
>>> Otherwise io will hung because request will only be completed if the
>>> cmd has the flag 'NBD_CMD_INFLIGHT'.
>>>
>>> Fixes: 07175cb1baf4 ("nbd: make sure request completion won't 
>>> concurrent")
>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>> ---
>>>   drivers/block/nbd.c | 18 ++++++++++++++----
>>>   1 file changed, 14 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
>>> index 2ee1e376d5c4..a0d0910dae2a 100644
>>> --- a/drivers/block/nbd.c
>>> +++ b/drivers/block/nbd.c
>>> @@ -403,13 +403,14 @@ static enum blk_eh_timer_return 
>>> nbd_xmit_timeout(struct request *req,
>>>       if (!mutex_trylock(&cmd->lock))
>>>           return BLK_EH_RESET_TIMER;
>>> -    if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>>> +    if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>>>           mutex_unlock(&cmd->lock);
>>>           return BLK_EH_DONE;
>>>       }
>>>       if (!refcount_inc_not_zero(&nbd->config_refs)) {
>>>           cmd->status = BLK_STS_TIMEOUT;
>>> +        __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
>>>           mutex_unlock(&cmd->lock);
>>>           goto done;
>>>       }
>>> @@ -478,6 +479,7 @@ static enum blk_eh_timer_return 
>>> nbd_xmit_timeout(struct request *req,
>>>       dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
>>>       set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
>>>       cmd->status = BLK_STS_IOERR;
>>> +    __clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
>>>       mutex_unlock(&cmd->lock);
>>>       sock_shutdown(nbd);
>>>       nbd_config_put(nbd);
>>> @@ -745,7 +747,7 @@ static struct nbd_cmd *nbd_handle_reply(struct 
>>> nbd_device *nbd, int index,
>>>       cmd = blk_mq_rq_to_pdu(req);
>>>       mutex_lock(&cmd->lock);
>>> -    if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>>> +    if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
>>>           dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d 
>>> (status %u flags %lu)",
>>>               tag, cmd->status, cmd->flags);
>>>           ret = -ENOENT;
>>> @@ -854,8 +856,16 @@ static void recv_work(struct work_struct *work)
>>>           }
>>>           rq = blk_mq_rq_from_pdu(cmd);
>>> -        if (likely(!blk_should_fake_timeout(rq->q)))
>>> -            blk_mq_complete_request(rq);
>>> +        if (likely(!blk_should_fake_timeout(rq->q))) {
>>> +            bool complete;
>>> +
>>> +            mutex_lock(&cmd->lock);
>>> +            complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
>>> +                            &cmd->flags);
>>> +            mutex_unlock(&cmd->lock);
>>> +            if (complete)
>>> +                blk_mq_complete_request(rq);
>>> +        }
>>
>> I'd rather this be handled in nbd_handle_reply.  We should return with it
>> cleared if it's ready to be completed.  Thanks,
> Hi,
> 
> Thanks for your advice, I'll do that in next version. I'll still have to
> hold the lock to set the bit again in case blk_should_fake_timeout()
> pass...

Hi, Josef

I just found out that this way is problematic:
t1:			t2:
recv_work
  nbd_handle_reply
   __clear_bit
			nbd_xmit_timeout
			 test_bit(NBD_CMD_INFLIGHT, &cmd->flags) -> fail
			 return BLK_EH_DONE -> rq can't complete
  blk_should_fake_timeout -> true
  __set_bit

__clear_bit and then __set_bit from recv_work leaves a window, and
concurrent nbd_xmit_timeout() may lead to that request can't be
completed through both timeout and recv_work().

Do you think it's ok to keep the current implementation with some
comments to explain the above scenario?

Thanks,
Kuai
diff mbox series

Patch

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 2ee1e376d5c4..a0d0910dae2a 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -403,13 +403,14 @@  static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	if (!mutex_trylock(&cmd->lock))
 		return BLK_EH_RESET_TIMER;
 
-	if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
 		mutex_unlock(&cmd->lock);
 		return BLK_EH_DONE;
 	}
 
 	if (!refcount_inc_not_zero(&nbd->config_refs)) {
 		cmd->status = BLK_STS_TIMEOUT;
+		__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
 		mutex_unlock(&cmd->lock);
 		goto done;
 	}
@@ -478,6 +479,7 @@  static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
 	dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
 	set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
 	cmd->status = BLK_STS_IOERR;
+	__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
 	mutex_unlock(&cmd->lock);
 	sock_shutdown(nbd);
 	nbd_config_put(nbd);
@@ -745,7 +747,7 @@  static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
 	cmd = blk_mq_rq_to_pdu(req);
 
 	mutex_lock(&cmd->lock);
-	if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
+	if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
 		dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
 			tag, cmd->status, cmd->flags);
 		ret = -ENOENT;
@@ -854,8 +856,16 @@  static void recv_work(struct work_struct *work)
 		}
 
 		rq = blk_mq_rq_from_pdu(cmd);
-		if (likely(!blk_should_fake_timeout(rq->q)))
-			blk_mq_complete_request(rq);
+		if (likely(!blk_should_fake_timeout(rq->q))) {
+			bool complete;
+
+			mutex_lock(&cmd->lock);
+			complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
+							&cmd->flags);
+			mutex_unlock(&cmd->lock);
+			if (complete)
+				blk_mq_complete_request(rq);
+		}
 		percpu_ref_put(&q->q_usage_counter);
 	}