diff mbox series

[v4,4/5] nvme-rdma: avoid IO error for nvme native multipath

Message ID 20210126081539.13320-5-lengchao@huawei.com (mailing list archive)
State New, archived
Headers show
Series avoid double request completion and IO error | expand

Commit Message

Chao Leng Jan. 26, 2021, 8:15 a.m. UTC
Work with nvme native multipath, if a path related error occurs when
queue_rq call HBA drive to send request, queue_rq will return
BLK_STS_IOERR to blk-mq. The request is completed with BLK_STS_IOERR
instead of fail over to retry.
queue_rq need call nvme_complete_rq to complete the request with
NVME_SC_HOST_PATH_ERROR, the request will fail over to retry if needed.

Signed-off-by: Chao Leng <lengchao@huawei.com>
---
 drivers/nvme/host/rdma.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

Comments

Sagi Grimberg Jan. 28, 2021, 8:39 a.m. UTC | #1
> @@ -2084,8 +2085,10 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
>   
>   	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>   			req->mr ? &req->reg_wr.wr : NULL);
> -	if (unlikely(err))
> +	if (unlikely(err)) {
> +		driver_error = true;
>   		goto err_unmap;

Why not just call set the status and call nvme_rdma_complete_rq and
return here?
Chao Leng Jan. 28, 2021, 9:31 a.m. UTC | #2
On 2021/1/28 16:39, Sagi Grimberg wrote:
> 
>> @@ -2084,8 +2085,10 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
>>       err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>>               req->mr ? &req->reg_wr.wr : NULL);
>> -    if (unlikely(err))
>> +    if (unlikely(err)) {
>> +        driver_error = true;
>>           goto err_unmap;
> 
> Why not just call set the status and call nvme_rdma_complete_rq and
> return here?
If the err is ENOMEM or EAGAIN, I am not sure the err must be a
path-related error for all HBA drivers. So reused the error check code.
I think it would be more reasonable to assume any errors returned by HBA
driver as path-related errors.
If you think so, I will modify it in next patch version.
Sagi Grimberg Jan. 29, 2021, 1:35 a.m. UTC | #3
>>> @@ -2084,8 +2085,10 @@ static blk_status_t nvme_rdma_queue_rq(struct 
>>> blk_mq_hw_ctx *hctx,
>>>       err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>>>               req->mr ? &req->reg_wr.wr : NULL);
>>> -    if (unlikely(err))
>>> +    if (unlikely(err)) {
>>> +        driver_error = true;
>>>           goto err_unmap;
>>
>> Why not just call set the status and call nvme_rdma_complete_rq and
>> return here?
> If the err is ENOMEM or EAGAIN, I am not sure the err must be a
> path-related error for all HBA drivers. So reused the error check code.
> I think it would be more reasonable to assume any errors returned by HBA
> driver as path-related errors.
> If you think so, I will modify it in next patch version.

Meant to do that only for -EIO. We should absolutely not do any of this
for stuff like EINVAL, EOPNOTSUPP, EPERM or any strange error that may
return due to a bug or anything like that.
Chao Leng Jan. 29, 2021, 2:48 a.m. UTC | #4
On 2021/1/29 9:35, Sagi Grimberg wrote:
> 
>>>> @@ -2084,8 +2085,10 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
>>>>       err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>>>>               req->mr ? &req->reg_wr.wr : NULL);
>>>> -    if (unlikely(err))
>>>> +    if (unlikely(err)) {
>>>> +        driver_error = true;
>>>>           goto err_unmap;
>>>
>>> Why not just call set the status and call nvme_rdma_complete_rq and
>>> return here?
>> If the err is ENOMEM or EAGAIN, I am not sure the err must be a
>> path-related error for all HBA drivers. So reused the error check code.
>> I think it would be more reasonable to assume any errors returned by HBA
>> driver as path-related errors.
>> If you think so, I will modify it in next patch version.
> 
> Meant to do that only for -EIO. We should absolutely not do any of this
> for stuff like EINVAL, EOPNOTSUPP, EPERM or any strange error that may
> return due to a bug or anything like that.
ok, please review again, thank you.
---
  drivers/nvme/host/rdma.c | 9 +++++++--
  1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index b7ce4f221d99..66b697461bd9 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,

         err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
                         req->mr ? &req->reg_wr.wr : NULL);
-       if (unlikely(err))
+       if (unlikely(err)) {
+               if (err == -EIO) {
+                       nvme_complete_failed_rq(rq, NVME_SC_HOST_PATH_ERROR);
+                       err = 0;
+               }
                 goto err_unmap;
+       }

         return BLK_STS_OK;

@@ -2094,7 +2099,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
  err:
         if (err == -ENOMEM || err == -EAGAIN)
                 ret = BLK_STS_RESOURCE;
-       else
+       else if (err)
                 ret = BLK_STS_IOERR;
         nvme_cleanup_cmd(rq);
  unmap_qe:
Sagi Grimberg Jan. 29, 2021, 3:24 a.m. UTC | #5
>>>> Why not just call set the status and call nvme_rdma_complete_rq and
>>>> return here?
>>> If the err is ENOMEM or EAGAIN, I am not sure the err must be a
>>> path-related error for all HBA drivers. So reused the error check code.
>>> I think it would be more reasonable to assume any errors returned by HBA
>>> driver as path-related errors.
>>> If you think so, I will modify it in next patch version.
>>
>> Meant to do that only for -EIO. We should absolutely not do any of this
>> for stuff like EINVAL, EOPNOTSUPP, EPERM or any strange error that may
>> return due to a bug or anything like that.
> ok, please review again, thank you.
> ---
>   drivers/nvme/host/rdma.c | 9 +++++++--
>   1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index b7ce4f221d99..66b697461bd9 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct 
> blk_mq_hw_ctx *hctx,
> 
>          err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>                          req->mr ? &req->reg_wr.wr : NULL);
> -       if (unlikely(err))
> +       if (unlikely(err)) {
> +               if (err == -EIO) {
> +                       nvme_complete_failed_rq(rq, 
> NVME_SC_HOST_PATH_ERROR);

I was thinking about:
--
         err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
                         req->mr ? &req->reg_wr.wr : NULL);
         if (unlikely(err)) {
                 if (err == -EIO) {
                         /*
                          * Fail the reuqest so upper layer can failover I/O
                          * if another path is available
                          */
                         req->status = NVME_SC_HOST_PATH_ERROR;
                         nvme_rdma_complete_rq(rq);
                         return BLK_STS_OK;

                 }
                 goto err_unmap;
         }
--
Chao Leng Jan. 29, 2021, 3:30 a.m. UTC | #6
On 2021/1/29 11:24, Sagi Grimberg wrote:
> 
>>>>> Why not just call set the status and call nvme_rdma_complete_rq and
>>>>> return here?
>>>> If the err is ENOMEM or EAGAIN, I am not sure the err must be a
>>>> path-related error for all HBA drivers. So reused the error check code.
>>>> I think it would be more reasonable to assume any errors returned by HBA
>>>> driver as path-related errors.
>>>> If you think so, I will modify it in next patch version.
>>>
>>> Meant to do that only for -EIO. We should absolutely not do any of this
>>> for stuff like EINVAL, EOPNOTSUPP, EPERM or any strange error that may
>>> return due to a bug or anything like that.
>> ok, please review again, thank you.
>> ---
>>   drivers/nvme/host/rdma.c | 9 +++++++--
>>   1 file changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index b7ce4f221d99..66b697461bd9 100644
>> --- a/drivers/nvme/host/rdma.c
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
>>
>>          err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>>                          req->mr ? &req->reg_wr.wr : NULL);
>> -       if (unlikely(err))
>> +       if (unlikely(err)) {
>> +               if (err == -EIO) {
>> +                       nvme_complete_failed_rq(rq, NVME_SC_HOST_PATH_ERROR);
> 
> I was thinking about:
> -- 
>          err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>                          req->mr ? &req->reg_wr.wr : NULL);
>          if (unlikely(err)) {
>                  if (err == -EIO) {
>                          /*
>                           * Fail the reuqest so upper layer can failover I/O
>                           * if another path is available
>                           */
>                          req->status = NVME_SC_HOST_PATH_ERROR;
>                          nvme_rdma_complete_rq(rq);
>                          return BLK_STS_OK;Need to do clean. so can not directly return.
> 
>                  }
>                  goto err_unmap;
>          }
> -- 
> .
Chao Leng Jan. 29, 2021, 3:37 a.m. UTC | #7
On 2021/1/29 11:24, Sagi Grimberg wrote:
> 
>>>>> Why not just call set the status and call nvme_rdma_complete_rq and
>>>>> return here?
>>>> If the err is ENOMEM or EAGAIN, I am not sure the err must be a
>>>> path-related error for all HBA drivers. So reused the error check code.
>>>> I think it would be more reasonable to assume any errors returned by HBA
>>>> driver as path-related errors.
>>>> If you think so, I will modify it in next patch version.
>>>
>>> Meant to do that only for -EIO. We should absolutely not do any of this
>>> for stuff like EINVAL, EOPNOTSUPP, EPERM or any strange error that may
>>> return due to a bug or anything like that.
>> ok, please review again, thank you.
>> ---
>>   drivers/nvme/host/rdma.c | 9 +++++++--
>>   1 file changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index b7ce4f221d99..66b697461bd9 100644
>> --- a/drivers/nvme/host/rdma.c
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
>>
>>          err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>>                          req->mr ? &req->reg_wr.wr : NULL);
>> -       if (unlikely(err))
>> +       if (unlikely(err)) {
>> +               if (err == -EIO) {
>> +                       nvme_complete_failed_rq(rq, NVME_SC_HOST_PATH_ERROR);
> 
> I was thinking about:
> -- 
>          err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>                          req->mr ? &req->reg_wr.wr : NULL);
>          if (unlikely(err)) {
>                  if (err == -EIO) {
>                          /*
>                           * Fail the reuqest so upper layer can failover I/O
>                           * if another path is available
>                           */
>                          req->status = NVME_SC_HOST_PATH_ERROR;
>                          nvme_rdma_complete_rq(rq);
>                          return BLK_STS_OK;
Need to do clean. so can not directly return.
> 
>                  }
>                  goto err_unmap;
>          }
> -- 
> .
Sagi Grimberg Jan. 29, 2021, 3:37 a.m. UTC | #8
>>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>>> index b7ce4f221d99..66b697461bd9 100644
>>> --- a/drivers/nvme/host/rdma.c
>>> +++ b/drivers/nvme/host/rdma.c
>>> @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct 
>>> blk_mq_hw_ctx *hctx,
>>>
>>>          err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>>>                          req->mr ? &req->reg_wr.wr : NULL);
>>> -       if (unlikely(err))
>>> +       if (unlikely(err)) {
>>> +               if (err == -EIO) {
>>> +                       nvme_complete_failed_rq(rq, 
>>> NVME_SC_HOST_PATH_ERROR);
>>
>> I was thinking about:
>> -- 
>>          err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>>                          req->mr ? &req->reg_wr.wr : NULL);
>>          if (unlikely(err)) {
>>                  if (err == -EIO) {
>>                          /*
>>                           * Fail the reuqest so upper layer can 
>> failover I/O
>>                           * if another path is available
>>                           */
>>                          req->status = NVME_SC_HOST_PATH_ERROR;
>>                          nvme_rdma_complete_rq(rq);
>>                          return BLK_STS_OK;Need to do clean. so can 
>> not directly return.

The completion path cleans up though

>>
>>                  }
>>                  goto err_unmap;
>>          }
>> -- 
>> .
> 
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme
Chao Leng Jan. 29, 2021, 3:50 a.m. UTC | #9
On 2021/1/29 11:37, Sagi Grimberg wrote:
> 
>>>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>>>> index b7ce4f221d99..66b697461bd9 100644
>>>> --- a/drivers/nvme/host/rdma.c
>>>> +++ b/drivers/nvme/host/rdma.c
>>>> @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
>>>>
>>>>          err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>>>>                          req->mr ? &req->reg_wr.wr : NULL);
>>>> -       if (unlikely(err))
>>>> +       if (unlikely(err)) {
>>>> +               if (err == -EIO) {
>>>> +                       nvme_complete_failed_rq(rq, NVME_SC_HOST_PATH_ERROR);
>>>
>>> I was thinking about:
>>> -- 
>>>          err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
>>>                          req->mr ? &req->reg_wr.wr : NULL);
>>>          if (unlikely(err)) {
>>>                  if (err == -EIO) {
>>>                          /*
>>>                           * Fail the reuqest so upper layer can failover I/O
>>>                           * if another path is available
>>>                           */
>>>                          req->status = NVME_SC_HOST_PATH_ERROR;
>>>                          nvme_rdma_complete_rq(rq);
>>>                          return BLK_STS_OK;Need to do clean. so can not directly return.
> 
> The completion path cleans up though
ok, i see.
But we need to use the new helper nvme_complete_failed_rq to avoid double request completion.
Or we can do like this:
req->status = NVME_SC_HOST_PATH_ERROR;
blk_mq_set_request_complete(req);
nvme_rdma_complete_rq(rq);
> 
>>>
>>>                  }
>>>                  goto err_unmap;
>>>          }
>>> -- 
>>> .
>>
>> _______________________________________________
>> Linux-nvme mailing list
>> Linux-nvme@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-nvme
> .
diff mbox series

Patch

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index b7ce4f221d99..7c801132d5ed 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2037,6 +2037,7 @@  static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 	bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
 	blk_status_t ret;
 	int err;
+	bool driver_error = false;
 
 	WARN_ON_ONCE(rq->tag < 0);
 
@@ -2084,8 +2085,10 @@  static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
 			req->mr ? &req->reg_wr.wr : NULL);
-	if (unlikely(err))
+	if (unlikely(err)) {
+		driver_error = true;
 		goto err_unmap;
+	}
 
 	return BLK_STS_OK;
 
@@ -2100,6 +2103,10 @@  static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 unmap_qe:
 	ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
 			    DMA_TO_DEVICE);
+	if (driver_error && ret == BLK_STS_IOERR) {
+		nvme_complete_failed_rq(rq, NVME_SC_HOST_PATH_ERROR);
+		ret = BLK_STS_OK;
+	}
 	return ret;
 }