Message ID | 20210126081539.13320-5-lengchao@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | avoid double request completion and IO error | expand |
> @@ -2084,8 +2085,10 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, > > err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, > req->mr ? &req->reg_wr.wr : NULL); > - if (unlikely(err)) > + if (unlikely(err)) { > + driver_error = true; > goto err_unmap; Why not just call set the status and call nvme_rdma_complete_rq and return here?
On 2021/1/28 16:39, Sagi Grimberg wrote: > >> @@ -2084,8 +2085,10 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, >> err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, >> req->mr ? &req->reg_wr.wr : NULL); >> - if (unlikely(err)) >> + if (unlikely(err)) { >> + driver_error = true; >> goto err_unmap; > > Why not just call set the status and call nvme_rdma_complete_rq and > return here? If the err is ENOMEM or EAGAIN, I am not sure the err must be a path-related error for all HBA drivers. So reused the error check code. I think it would be more reasonable to assume any errors returned by HBA driver as path-related errors. If you think so, I will modify it in next patch version.
>>> @@ -2084,8 +2085,10 @@ static blk_status_t nvme_rdma_queue_rq(struct >>> blk_mq_hw_ctx *hctx, >>> err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, >>> req->mr ? &req->reg_wr.wr : NULL); >>> - if (unlikely(err)) >>> + if (unlikely(err)) { >>> + driver_error = true; >>> goto err_unmap; >> >> Why not just call set the status and call nvme_rdma_complete_rq and >> return here? > If the err is ENOMEM or EAGAIN, I am not sure the err must be a > path-related error for all HBA drivers. So reused the error check code. > I think it would be more reasonable to assume any errors returned by HBA > driver as path-related errors. > If you think so, I will modify it in next patch version. Meant to do that only for -EIO. We should absolutely not do any of this for stuff like EINVAL, EOPNOTSUPP, EPERM or any strange error that may return due to a bug or anything like that.
On 2021/1/29 9:35, Sagi Grimberg wrote: > >>>> @@ -2084,8 +2085,10 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, >>>> err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, >>>> req->mr ? &req->reg_wr.wr : NULL); >>>> - if (unlikely(err)) >>>> + if (unlikely(err)) { >>>> + driver_error = true; >>>> goto err_unmap; >>> >>> Why not just call set the status and call nvme_rdma_complete_rq and >>> return here? >> If the err is ENOMEM or EAGAIN, I am not sure the err must be a >> path-related error for all HBA drivers. So reused the error check code. >> I think it would be more reasonable to assume any errors returned by HBA >> driver as path-related errors. >> If you think so, I will modify it in next patch version. > > Meant to do that only for -EIO. We should absolutely not do any of this > for stuff like EINVAL, EOPNOTSUPP, EPERM or any strange error that may > return due to a bug or anything like that. ok, please review again, thank you. --- drivers/nvme/host/rdma.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b7ce4f221d99..66b697461bd9 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, req->mr ? &req->reg_wr.wr : NULL); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -EIO) { + nvme_complete_failed_rq(rq, NVME_SC_HOST_PATH_ERROR); + err = 0; + } goto err_unmap; + } return BLK_STS_OK; @@ -2094,7 +2099,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, err: if (err == -ENOMEM || err == -EAGAIN) ret = BLK_STS_RESOURCE; - else + else if (err) ret = BLK_STS_IOERR; nvme_cleanup_cmd(rq); unmap_qe:
>>>> Why not just call set the status and call nvme_rdma_complete_rq and >>>> return here? >>> If the err is ENOMEM or EAGAIN, I am not sure the err must be a >>> path-related error for all HBA drivers. So reused the error check code. >>> I think it would be more reasonable to assume any errors returned by HBA >>> driver as path-related errors. >>> If you think so, I will modify it in next patch version. >> >> Meant to do that only for -EIO. We should absolutely not do any of this >> for stuff like EINVAL, EOPNOTSUPP, EPERM or any strange error that may >> return due to a bug or anything like that. > ok, please review again, thank you. > --- > drivers/nvme/host/rdma.c | 9 +++++++-- > 1 file changed, 7 insertions(+), 2 deletions(-) > > diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c > index b7ce4f221d99..66b697461bd9 100644 > --- a/drivers/nvme/host/rdma.c > +++ b/drivers/nvme/host/rdma.c > @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct > blk_mq_hw_ctx *hctx, > > err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, > req->mr ? &req->reg_wr.wr : NULL); > - if (unlikely(err)) > + if (unlikely(err)) { > + if (err == -EIO) { > + nvme_complete_failed_rq(rq, > NVME_SC_HOST_PATH_ERROR); I was thinking about: -- err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, req->mr ? &req->reg_wr.wr : NULL); if (unlikely(err)) { if (err == -EIO) { /* * Fail the reuqest so upper layer can failover I/O * if another path is available */ req->status = NVME_SC_HOST_PATH_ERROR; nvme_rdma_complete_rq(rq); return BLK_STS_OK; } goto err_unmap; } --
On 2021/1/29 11:24, Sagi Grimberg wrote: > >>>>> Why not just call set the status and call nvme_rdma_complete_rq and >>>>> return here? >>>> If the err is ENOMEM or EAGAIN, I am not sure the err must be a >>>> path-related error for all HBA drivers. So reused the error check code. >>>> I think it would be more reasonable to assume any errors returned by HBA >>>> driver as path-related errors. >>>> If you think so, I will modify it in next patch version. >>> >>> Meant to do that only for -EIO. We should absolutely not do any of this >>> for stuff like EINVAL, EOPNOTSUPP, EPERM or any strange error that may >>> return due to a bug or anything like that. >> ok, please review again, thank you. >> --- >> drivers/nvme/host/rdma.c | 9 +++++++-- >> 1 file changed, 7 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c >> index b7ce4f221d99..66b697461bd9 100644 >> --- a/drivers/nvme/host/rdma.c >> +++ b/drivers/nvme/host/rdma.c >> @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, >> >> err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, >> req->mr ? &req->reg_wr.wr : NULL); >> - if (unlikely(err)) >> + if (unlikely(err)) { >> + if (err == -EIO) { >> + nvme_complete_failed_rq(rq, NVME_SC_HOST_PATH_ERROR); > > I was thinking about: > -- > err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, > req->mr ? &req->reg_wr.wr : NULL); > if (unlikely(err)) { > if (err == -EIO) { > /* > * Fail the reuqest so upper layer can failover I/O > * if another path is available > */ > req->status = NVME_SC_HOST_PATH_ERROR; > nvme_rdma_complete_rq(rq); > return BLK_STS_OK;Need to do clean. so can not directly return. > > } > goto err_unmap; > } > -- > .
On 2021/1/29 11:24, Sagi Grimberg wrote: > >>>>> Why not just call set the status and call nvme_rdma_complete_rq and >>>>> return here? >>>> If the err is ENOMEM or EAGAIN, I am not sure the err must be a >>>> path-related error for all HBA drivers. So reused the error check code. >>>> I think it would be more reasonable to assume any errors returned by HBA >>>> driver as path-related errors. >>>> If you think so, I will modify it in next patch version. >>> >>> Meant to do that only for -EIO. We should absolutely not do any of this >>> for stuff like EINVAL, EOPNOTSUPP, EPERM or any strange error that may >>> return due to a bug or anything like that. >> ok, please review again, thank you. >> --- >> drivers/nvme/host/rdma.c | 9 +++++++-- >> 1 file changed, 7 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c >> index b7ce4f221d99..66b697461bd9 100644 >> --- a/drivers/nvme/host/rdma.c >> +++ b/drivers/nvme/host/rdma.c >> @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, >> >> err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, >> req->mr ? &req->reg_wr.wr : NULL); >> - if (unlikely(err)) >> + if (unlikely(err)) { >> + if (err == -EIO) { >> + nvme_complete_failed_rq(rq, NVME_SC_HOST_PATH_ERROR); > > I was thinking about: > -- > err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, > req->mr ? &req->reg_wr.wr : NULL); > if (unlikely(err)) { > if (err == -EIO) { > /* > * Fail the reuqest so upper layer can failover I/O > * if another path is available > */ > req->status = NVME_SC_HOST_PATH_ERROR; > nvme_rdma_complete_rq(rq); > return BLK_STS_OK; Need to do clean. so can not directly return. > > } > goto err_unmap; > } > -- > .
>>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c >>> index b7ce4f221d99..66b697461bd9 100644 >>> --- a/drivers/nvme/host/rdma.c >>> +++ b/drivers/nvme/host/rdma.c >>> @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct >>> blk_mq_hw_ctx *hctx, >>> >>> err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, >>> req->mr ? &req->reg_wr.wr : NULL); >>> - if (unlikely(err)) >>> + if (unlikely(err)) { >>> + if (err == -EIO) { >>> + nvme_complete_failed_rq(rq, >>> NVME_SC_HOST_PATH_ERROR); >> >> I was thinking about: >> -- >> err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, >> req->mr ? &req->reg_wr.wr : NULL); >> if (unlikely(err)) { >> if (err == -EIO) { >> /* >> * Fail the reuqest so upper layer can >> failover I/O >> * if another path is available >> */ >> req->status = NVME_SC_HOST_PATH_ERROR; >> nvme_rdma_complete_rq(rq); >> return BLK_STS_OK;Need to do clean. so can >> not directly return. The completion path cleans up though >> >> } >> goto err_unmap; >> } >> -- >> . > > _______________________________________________ > Linux-nvme mailing list > Linux-nvme@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-nvme
On 2021/1/29 11:37, Sagi Grimberg wrote: > >>>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c >>>> index b7ce4f221d99..66b697461bd9 100644 >>>> --- a/drivers/nvme/host/rdma.c >>>> +++ b/drivers/nvme/host/rdma.c >>>> @@ -2084,8 +2084,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, >>>> >>>> err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, >>>> req->mr ? &req->reg_wr.wr : NULL); >>>> - if (unlikely(err)) >>>> + if (unlikely(err)) { >>>> + if (err == -EIO) { >>>> + nvme_complete_failed_rq(rq, NVME_SC_HOST_PATH_ERROR); >>> >>> I was thinking about: >>> -- >>> err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, >>> req->mr ? &req->reg_wr.wr : NULL); >>> if (unlikely(err)) { >>> if (err == -EIO) { >>> /* >>> * Fail the reuqest so upper layer can failover I/O >>> * if another path is available >>> */ >>> req->status = NVME_SC_HOST_PATH_ERROR; >>> nvme_rdma_complete_rq(rq); >>> return BLK_STS_OK;Need to do clean. so can not directly return. > > The completion path cleans up though ok, i see. But we need to use the new helper nvme_complete_failed_rq to avoid double request completion. Or we can do like this: req->status = NVME_SC_HOST_PATH_ERROR; blk_mq_set_request_complete(req); nvme_rdma_complete_rq(rq); > >>> >>> } >>> goto err_unmap; >>> } >>> -- >>> . >> >> _______________________________________________ >> Linux-nvme mailing list >> Linux-nvme@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/linux-nvme > .
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b7ce4f221d99..7c801132d5ed 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2037,6 +2037,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags); blk_status_t ret; int err; + bool driver_error = false; WARN_ON_ONCE(rq->tag < 0); @@ -2084,8 +2085,10 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, req->mr ? &req->reg_wr.wr : NULL); - if (unlikely(err)) + if (unlikely(err)) { + driver_error = true; goto err_unmap; + } return BLK_STS_OK; @@ -2100,6 +2103,10 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, unmap_qe: ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command), DMA_TO_DEVICE); + if (driver_error && ret == BLK_STS_IOERR) { + nvme_complete_failed_rq(rq, NVME_SC_HOST_PATH_ERROR); + ret = BLK_STS_OK; + } return ret; }
Work with nvme native multipath, if a path related error occurs when queue_rq call HBA drive to send request, queue_rq will return BLK_STS_IOERR to blk-mq. The request is completed with BLK_STS_IOERR instead of fail over to retry. queue_rq need call nvme_complete_rq to complete the request with NVME_SC_HOST_PATH_ERROR, the request will fail over to retry if needed. Signed-off-by: Chao Leng <lengchao@huawei.com> --- drivers/nvme/host/rdma.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)