diff mbox series

[v2,3/4] nvme: introduce FAILUP handling for REQ_FAILFAST_TRANSPORT

Message ID 20210415231530.95464-4-snitzer@redhat.com (mailing list archive)
State New, archived
Headers show
Series nvme: improve error handling and ana_state to work well with dm-multipath | expand

Commit Message

Mike Snitzer April 15, 2021, 11:15 p.m. UTC
If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry
IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is
set by multipathing software (e.g. dm-multipath) before it issues IO.

Update NVMe to allow failover of requests marked with either
REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests
to be given a disposition of either FAILOVER or FAILUP respectively.
FAILUP handling ensures a retryable error is returned up from NVMe.

Introduce nvme_failup_req() for use in nvme_complete_rq() if
nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures
the request is completed with a retryable IO error when appropriate.
__nvme_end_req() was factored out for use by both nvme_end_req() and
nvme_failup_req().

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

Comments

Hannes Reinecke April 16, 2021, 2:07 p.m. UTC | #1
On 4/16/21 1:15 AM, Mike Snitzer wrote:
> If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry
> IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is
> set by multipathing software (e.g. dm-multipath) before it issues IO.
> 
> Update NVMe to allow failover of requests marked with either
> REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests
> to be given a disposition of either FAILOVER or FAILUP respectively.
> FAILUP handling ensures a retryable error is returned up from NVMe.
> 
> Introduce nvme_failup_req() for use in nvme_complete_rq() if
> nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures
> the request is completed with a retryable IO error when appropriate.
> __nvme_end_req() was factored out for use by both nvme_end_req() and
> nvme_failup_req().
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++-----
>  1 file changed, 26 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 4134cf3c7e48..10375197dd53 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -299,6 +299,7 @@ enum nvme_disposition {
>  	COMPLETE,
>  	RETRY,
>  	FAILOVER,
> +	FAILUP,
>  };
>  
>  static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
> @@ -318,10 +319,11 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
>  	    nvme_req(req)->retries >= nvme_max_retries)
>  		return COMPLETE;
>  
> -	if (req->cmd_flags & REQ_NVME_MPATH) {
> +	if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) {
>  		if (nvme_is_path_error(nvme_req(req)->status) ||
>  		    blk_queue_dying(req->q))
> -			return FAILOVER;
> +			return (req->cmd_flags & REQ_NVME_MPATH) ?
> +				FAILOVER : FAILUP;
>  	} else {
>  		if (blk_queue_dying(req->q))
>  			return COMPLETE;
> @@ -330,10 +332,8 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
>  	return RETRY;
>  }
>  
> -static inline void nvme_end_req(struct request *req)
> +static inline void __nvme_end_req(struct request *req, blk_status_t status)
>  {
> -	blk_status_t status = nvme_error_status(nvme_req(req)->status);
> -
>  	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
>  	    req_op(req) == REQ_OP_ZONE_APPEND)
>  		req->__sector = nvme_lba_to_sect(req->q->queuedata,
> @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request *req)
>  	blk_mq_end_request(req, status);
>  }
>  
> +static inline void nvme_end_req(struct request *req)
> +{
> +	__nvme_end_req(req, nvme_error_status(nvme_req(req)->status));
> +}
> +
> +static void nvme_failup_req(struct request *req)
> +{
> +	blk_status_t status = nvme_error_status(nvme_req(req)->status);
> +
> +	if (WARN_ON_ONCE(!blk_path_error(status))) {
> +		pr_debug("Request meant for failover but blk_status_t (errno=%d) was not retryable.\n",
> +			 blk_status_to_errno(status));
> +		status = BLK_STS_IOERR;
> +	}
> +
> +	__nvme_end_req(req, status);
> +}
> +
>  void nvme_complete_rq(struct request *req)
>  {
>  	trace_nvme_complete_rq(req);
> @@ -361,6 +379,9 @@ void nvme_complete_rq(struct request *req)
>  	case FAILOVER:
>  		nvme_failover_req(req);
>  		return;
> +	case FAILUP:
> +		nvme_failup_req(req);
> +		return;
>  	}
>  }
>  EXPORT_SYMBOL_GPL(nvme_complete_rq);
> 

Hmm. Quite convoluted, methinks.
Shouldn't this achieve the same thing?

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e89ec2522ca6..8c36a2196b66 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -303,8 +303,10 @@ static inline enum nvme_disposition
nvme_decide_disposition(struct request *req)
        if (likely(nvme_req(req)->status == 0))
                return COMPLETE;

-       if (blk_noretry_request(req) ||
-           (nvme_req(req)->status & NVME_SC_DNR) ||
+       if (blk_noretry_request(req))
+               nvme_req(req)->status |= NVME_SC_DNR;
+
+       if ((nvme_req(req)->status & NVME_SC_DNR) ||
            nvme_req(req)->retries >= nvme_max_retries)
                return COMPLETE;


Cheers,

Hannes
Mike Snitzer April 16, 2021, 3:03 p.m. UTC | #2
On Fri, Apr 16 2021 at 10:07am -0400,
Hannes Reinecke <hare@suse.de> wrote:

> On 4/16/21 1:15 AM, Mike Snitzer wrote:
> > If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry
> > IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is
> > set by multipathing software (e.g. dm-multipath) before it issues IO.
> > 
> > Update NVMe to allow failover of requests marked with either
> > REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests
> > to be given a disposition of either FAILOVER or FAILUP respectively.
> > FAILUP handling ensures a retryable error is returned up from NVMe.
> > 
> > Introduce nvme_failup_req() for use in nvme_complete_rq() if
> > nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures
> > the request is completed with a retryable IO error when appropriate.
> > __nvme_end_req() was factored out for use by both nvme_end_req() and
> > nvme_failup_req().
> > 
> > Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> > ---
> >  drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++-----
> >  1 file changed, 26 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 4134cf3c7e48..10375197dd53 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -299,6 +299,7 @@ enum nvme_disposition {
> >  	COMPLETE,
> >  	RETRY,
> >  	FAILOVER,
> > +	FAILUP,
> >  };
> >  
> >  static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
> > @@ -318,10 +319,11 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
> >  	    nvme_req(req)->retries >= nvme_max_retries)
> >  		return COMPLETE;
> >  
> > -	if (req->cmd_flags & REQ_NVME_MPATH) {
> > +	if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) {
> >  		if (nvme_is_path_error(nvme_req(req)->status) ||
> >  		    blk_queue_dying(req->q))
> > -			return FAILOVER;
> > +			return (req->cmd_flags & REQ_NVME_MPATH) ?
> > +				FAILOVER : FAILUP;
> >  	} else {
> >  		if (blk_queue_dying(req->q))
> >  			return COMPLETE;
> > @@ -330,10 +332,8 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
> >  	return RETRY;
> >  }
> >  
> > -static inline void nvme_end_req(struct request *req)
> > +static inline void __nvme_end_req(struct request *req, blk_status_t status)
> >  {
> > -	blk_status_t status = nvme_error_status(nvme_req(req)->status);
> > -
> >  	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> >  	    req_op(req) == REQ_OP_ZONE_APPEND)
> >  		req->__sector = nvme_lba_to_sect(req->q->queuedata,
> > @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request *req)
> >  	blk_mq_end_request(req, status);
> >  }
> >  
> > +static inline void nvme_end_req(struct request *req)
> > +{
> > +	__nvme_end_req(req, nvme_error_status(nvme_req(req)->status));
> > +}
> > +
> > +static void nvme_failup_req(struct request *req)
> > +{
> > +	blk_status_t status = nvme_error_status(nvme_req(req)->status);
> > +
> > +	if (WARN_ON_ONCE(!blk_path_error(status))) {
> > +		pr_debug("Request meant for failover but blk_status_t (errno=%d) was not retryable.\n",
> > +			 blk_status_to_errno(status));
> > +		status = BLK_STS_IOERR;
> > +	}
> > +
> > +	__nvme_end_req(req, status);
> > +}
> > +
> >  void nvme_complete_rq(struct request *req)
> >  {
> >  	trace_nvme_complete_rq(req);
> > @@ -361,6 +379,9 @@ void nvme_complete_rq(struct request *req)
> >  	case FAILOVER:
> >  		nvme_failover_req(req);
> >  		return;
> > +	case FAILUP:
> > +		nvme_failup_req(req);
> > +		return;
> >  	}
> >  }
> >  EXPORT_SYMBOL_GPL(nvme_complete_rq);
> > 
> 
> Hmm. Quite convoluted, methinks.

Maybe you didn't read the header or patch?

I'm cool with critical review when it is clear the reviewer fully
understands the patch but... ;)

> Shouldn't this achieve the same thing?
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index e89ec2522ca6..8c36a2196b66 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -303,8 +303,10 @@ static inline enum nvme_disposition
> nvme_decide_disposition(struct request *req)
>         if (likely(nvme_req(req)->status == 0))
>                 return COMPLETE;
> 
> -       if (blk_noretry_request(req) ||
> -           (nvme_req(req)->status & NVME_SC_DNR) ||
> +       if (blk_noretry_request(req))
> +               nvme_req(req)->status |= NVME_SC_DNR;
> +
> +       if ((nvme_req(req)->status & NVME_SC_DNR) ||
>             nvme_req(req)->retries >= nvme_max_retries)
>                 return COMPLETE;

Definitely won't achieve the same. And especially not with patch 1/4
("nvme: return BLK_STS_DO_NOT_RETRY if the DNR bit is set") that you
gave your Reviewed-by to earlier.

Instead of "FAILUP", I thought about using "FAILUP_AND_OVER" to convey
that this is a variant of failover.  Meaning it takes the same patch as
nvme "FAILOVER" until the very end; where it does REQ_FAILFAST_TRANSPORT
specific work detailed in nvme_failup_req().

And then patch 4/4 makes further use of nvme_failup_req() by adding a
call to the factored out nvme_update_ana().

Mike
Hannes Reinecke April 16, 2021, 4:23 p.m. UTC | #3
On 4/16/21 5:03 PM, Mike Snitzer wrote:
> On Fri, Apr 16 2021 at 10:07am -0400,
> Hannes Reinecke <hare@suse.de> wrote:
> 
>> On 4/16/21 1:15 AM, Mike Snitzer wrote:
>>> If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry
>>> IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is
>>> set by multipathing software (e.g. dm-multipath) before it issues IO.
>>>
>>> Update NVMe to allow failover of requests marked with either
>>> REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests
>>> to be given a disposition of either FAILOVER or FAILUP respectively.
>>> FAILUP handling ensures a retryable error is returned up from NVMe.
>>>
>>> Introduce nvme_failup_req() for use in nvme_complete_rq() if
>>> nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures
>>> the request is completed with a retryable IO error when appropriate.
>>> __nvme_end_req() was factored out for use by both nvme_end_req() and
>>> nvme_failup_req().
>>>
>>> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
>>> ---
>>>  drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++-----
>>>  1 file changed, 26 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>> index 4134cf3c7e48..10375197dd53 100644
>>> --- a/drivers/nvme/host/core.c
>>> +++ b/drivers/nvme/host/core.c
>>> @@ -299,6 +299,7 @@ enum nvme_disposition {
>>>  	COMPLETE,
>>>  	RETRY,
>>>  	FAILOVER,
>>> +	FAILUP,
>>>  };
>>>  
>>>  static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
>>> @@ -318,10 +319,11 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
>>>  	    nvme_req(req)->retries >= nvme_max_retries)
>>>  		return COMPLETE;
>>>  
>>> -	if (req->cmd_flags & REQ_NVME_MPATH) {
>>> +	if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) {
>>>  		if (nvme_is_path_error(nvme_req(req)->status) ||
>>>  		    blk_queue_dying(req->q))
>>> -			return FAILOVER;
>>> +			return (req->cmd_flags & REQ_NVME_MPATH) ?
>>> +				FAILOVER : FAILUP;
>>>  	} else {
>>>  		if (blk_queue_dying(req->q))
>>>  			return COMPLETE;
>>> @@ -330,10 +332,8 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
>>>  	return RETRY;
>>>  }
>>>  
>>> -static inline void nvme_end_req(struct request *req)
>>> +static inline void __nvme_end_req(struct request *req, blk_status_t status)
>>>  {
>>> -	blk_status_t status = nvme_error_status(nvme_req(req)->status);
>>> -
>>>  	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
>>>  	    req_op(req) == REQ_OP_ZONE_APPEND)
>>>  		req->__sector = nvme_lba_to_sect(req->q->queuedata,
>>> @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request *req)
>>>  	blk_mq_end_request(req, status);
>>>  }
>>>  
>>> +static inline void nvme_end_req(struct request *req)
>>> +{
>>> +	__nvme_end_req(req, nvme_error_status(nvme_req(req)->status));
>>> +}
>>> +
>>> +static void nvme_failup_req(struct request *req)
>>> +{
>>> +	blk_status_t status = nvme_error_status(nvme_req(req)->status);
>>> +
>>> +	if (WARN_ON_ONCE(!blk_path_error(status))) {
>>> +		pr_debug("Request meant for failover but blk_status_t (errno=%d) was not retryable.\n",
>>> +			 blk_status_to_errno(status));
>>> +		status = BLK_STS_IOERR;
>>> +	}
>>> +
>>> +	__nvme_end_req(req, status);
>>> +}
>>> +
>>>  void nvme_complete_rq(struct request *req)
>>>  {
>>>  	trace_nvme_complete_rq(req);
>>> @@ -361,6 +379,9 @@ void nvme_complete_rq(struct request *req)
>>>  	case FAILOVER:
>>>  		nvme_failover_req(req);
>>>  		return;
>>> +	case FAILUP:
>>> +		nvme_failup_req(req);
>>> +		return;
>>>  	}
>>>  }
>>>  EXPORT_SYMBOL_GPL(nvme_complete_rq);
>>>
>>
>> Hmm. Quite convoluted, methinks.
> 
> Maybe you didn't read the header or patch?
> 
> I'm cool with critical review when it is clear the reviewer fully
> understands the patch but... ;)
> 
>> Shouldn't this achieve the same thing?
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index e89ec2522ca6..8c36a2196b66 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -303,8 +303,10 @@ static inline enum nvme_disposition
>> nvme_decide_disposition(struct request *req)
>>         if (likely(nvme_req(req)->status == 0))
>>                 return COMPLETE;
>>
>> -       if (blk_noretry_request(req) ||
>> -           (nvme_req(req)->status & NVME_SC_DNR) ||
>> +       if (blk_noretry_request(req))
>> +               nvme_req(req)->status |= NVME_SC_DNR;
>> +
>> +       if ((nvme_req(req)->status & NVME_SC_DNR) ||
>>             nvme_req(req)->retries >= nvme_max_retries)
>>                 return COMPLETE;
> 
> Definitely won't achieve the same. And especially not with patch 1/4
> ("nvme: return BLK_STS_DO_NOT_RETRY if the DNR bit is set") that you
> gave your Reviewed-by to earlier.
> 
Ah. Right. Sorry.

> Instead of "FAILUP", I thought about using "FAILUP_AND_OVER" to convey
> that this is a variant of failover.  Meaning it takes the same patch as
> nvme "FAILOVER" until the very end; where it does REQ_FAILFAST_TRANSPORT
> specific work detailed in nvme_failup_req().
> 
All very intricate; will need to check the patches in their combined
version.
Not deliberately stalling, mind you, just wanting to figure out what the
net result will be.

Cheers,

Hannes
Ewan Milne April 16, 2021, 8:03 p.m. UTC | #4
On Fri, 2021-04-16 at 16:07 +0200, Hannes Reinecke wrote:
> 
> Hmm. Quite convoluted, methinks.
> Shouldn't this achieve the same thing?
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index e89ec2522ca6..8c36a2196b66 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -303,8 +303,10 @@ static inline enum nvme_disposition
> nvme_decide_disposition(struct request *req)
>         if (likely(nvme_req(req)->status == 0))
>                 return COMPLETE;
> 
> -       if (blk_noretry_request(req) ||
> -           (nvme_req(req)->status & NVME_SC_DNR) ||
> +       if (blk_noretry_request(req))
> +               nvme_req(req)->status |= NVME_SC_DNR;
> +
> +       if ((nvme_req(req)->status & NVME_SC_DNR) ||
>             nvme_req(req)->retries >= nvme_max_retries)
>                 return COMPLETE;
> 

I am not in favor of altering ->status to set DNR jus to
simplify the following conditional.

-Ewan
Ewan Milne April 16, 2021, 8:52 p.m. UTC | #5
On Thu, 2021-04-15 at 19:15 -0400, Mike Snitzer wrote:
> If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry
> IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is
> set by multipathing software (e.g. dm-multipath) before it issues IO.
> 
> Update NVMe to allow failover of requests marked with either
> REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests
> to be given a disposition of either FAILOVER or FAILUP respectively.
> FAILUP handling ensures a retryable error is returned up from NVMe.
> 
> Introduce nvme_failup_req() for use in nvme_complete_rq() if
> nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures
> the request is completed with a retryable IO error when appropriate.
> __nvme_end_req() was factored out for use by both nvme_end_req() and
> nvme_failup_req().
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++-----
>  1 file changed, 26 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 4134cf3c7e48..10375197dd53 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -299,6 +299,7 @@ enum nvme_disposition {
>  	COMPLETE,
>  	RETRY,
>  	FAILOVER,
> +	FAILUP,
>  };
>  
>  static inline enum nvme_disposition nvme_decide_disposition(struct
> request *req)
> @@ -318,10 +319,11 @@ static inline enum nvme_disposition
> nvme_decide_disposition(struct request *req)
>  	    nvme_req(req)->retries >= nvme_max_retries)
>  		return COMPLETE;
>  
> -	if (req->cmd_flags & REQ_NVME_MPATH) {
> +	if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT))
> {
>  		if (nvme_is_path_error(nvme_req(req)->status) ||
>  		    blk_queue_dying(req->q))
> -			return FAILOVER;
> +			return (req->cmd_flags & REQ_NVME_MPATH) ?
> +				FAILOVER : FAILUP;
>  	} else {
>  		if (blk_queue_dying(req->q))
>  			return COMPLETE;
> @@ -330,10 +332,8 @@ static inline enum nvme_disposition
> nvme_decide_disposition(struct request *req)
>  	return RETRY;
>  }
>  
> -static inline void nvme_end_req(struct request *req)
> +static inline void __nvme_end_req(struct request *req, blk_status_t
> status)
>  {
> -	blk_status_t status = nvme_error_status(nvme_req(req)->status);
> -
>  	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
>  	    req_op(req) == REQ_OP_ZONE_APPEND)
>  		req->__sector = nvme_lba_to_sect(req->q->queuedata,
> @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request
> *req)
>  	blk_mq_end_request(req, status);
>  }
>  
> +static inline void nvme_end_req(struct request *req)
> +{
> +	__nvme_end_req(req, nvme_error_status(nvme_req(req)->status));
> +}
> +
> +static void nvme_failup_req(struct request *req)
> +{
> +	blk_status_t status = nvme_error_status(nvme_req(req)->status);
> +
> +	if (WARN_ON_ONCE(!blk_path_error(status))) {
> +		pr_debug("Request meant for failover but blk_status_t
> (errno=%d) was not retryable.\n",
> +			 blk_status_to_errno(status));
> +		status = BLK_STS_IOERR;
> +	}
> +
> +	__nvme_end_req(req, status);

It seems like adding __nvme_end_req() was unnecessary, since
nvme_end_req() just calls it and does nothing else?

-Ewan

> +}
> +
>  void nvme_complete_rq(struct request *req)
>  {
>  	trace_nvme_complete_rq(req);
> @@ -361,6 +379,9 @@ void nvme_complete_rq(struct request *req)
>  	case FAILOVER:
>  		nvme_failover_req(req);
>  		return;
> +	case FAILUP:
> +		nvme_failup_req(req);
> +		return;
>  	}
>  }
>  EXPORT_SYMBOL_GPL(nvme_complete_rq);
Mike Snitzer April 16, 2021, 9:44 p.m. UTC | #6
On Fri, Apr 16 2021 at  4:52pm -0400,
Ewan D. Milne <emilne@redhat.com> wrote:

> On Thu, 2021-04-15 at 19:15 -0400, Mike Snitzer wrote:
> > If REQ_FAILFAST_TRANSPORT is set it means the driver should not retry
> > IO that completed with transport errors. REQ_FAILFAST_TRANSPORT is
> > set by multipathing software (e.g. dm-multipath) before it issues IO.
> > 
> > Update NVMe to allow failover of requests marked with either
> > REQ_NVME_MPATH or REQ_FAILFAST_TRANSPORT. This allows such requests
> > to be given a disposition of either FAILOVER or FAILUP respectively.
> > FAILUP handling ensures a retryable error is returned up from NVMe.
> > 
> > Introduce nvme_failup_req() for use in nvme_complete_rq() if
> > nvme_decide_disposition() returns FAILUP. nvme_failup_req() ensures
> > the request is completed with a retryable IO error when appropriate.
> > __nvme_end_req() was factored out for use by both nvme_end_req() and
> > nvme_failup_req().
> > 
> > Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> > ---
> >  drivers/nvme/host/core.c | 31 ++++++++++++++++++++++++++-----
> >  1 file changed, 26 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 4134cf3c7e48..10375197dd53 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -299,6 +299,7 @@ enum nvme_disposition {
> >  	COMPLETE,
> >  	RETRY,
> >  	FAILOVER,
> > +	FAILUP,
> >  };
> >  
> >  static inline enum nvme_disposition nvme_decide_disposition(struct
> > request *req)
> > @@ -318,10 +319,11 @@ static inline enum nvme_disposition
> > nvme_decide_disposition(struct request *req)
> >  	    nvme_req(req)->retries >= nvme_max_retries)
> >  		return COMPLETE;
> >  
> > -	if (req->cmd_flags & REQ_NVME_MPATH) {
> > +	if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT))
> > {
> >  		if (nvme_is_path_error(nvme_req(req)->status) ||
> >  		    blk_queue_dying(req->q))
> > -			return FAILOVER;
> > +			return (req->cmd_flags & REQ_NVME_MPATH) ?
> > +				FAILOVER : FAILUP;
> >  	} else {
> >  		if (blk_queue_dying(req->q))
> >  			return COMPLETE;
> > @@ -330,10 +332,8 @@ static inline enum nvme_disposition
> > nvme_decide_disposition(struct request *req)
> >  	return RETRY;
> >  }
> >  
> > -static inline void nvme_end_req(struct request *req)
> > +static inline void __nvme_end_req(struct request *req, blk_status_t
> > status)
> >  {
> > -	blk_status_t status = nvme_error_status(nvme_req(req)->status);
> > -
> >  	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> >  	    req_op(req) == REQ_OP_ZONE_APPEND)
> >  		req->__sector = nvme_lba_to_sect(req->q->queuedata,
> > @@ -343,6 +343,24 @@ static inline void nvme_end_req(struct request
> > *req)
> >  	blk_mq_end_request(req, status);
> >  }
> >  
> > +static inline void nvme_end_req(struct request *req)
> > +{
> > +	__nvme_end_req(req, nvme_error_status(nvme_req(req)->status));
> > +}
> > +
> > +static void nvme_failup_req(struct request *req)
> > +{
> > +	blk_status_t status = nvme_error_status(nvme_req(req)->status);
> > +
> > +	if (WARN_ON_ONCE(!blk_path_error(status))) {
> > +		pr_debug("Request meant for failover but blk_status_t
> > (errno=%d) was not retryable.\n",
> > +			 blk_status_to_errno(status));
> > +		status = BLK_STS_IOERR;
> > +	}
> > +
> > +	__nvme_end_req(req, status);
> 
> It seems like adding __nvme_end_req() was unnecessary, since
> nvme_end_req() just calls it and does nothing else?

The __nvme_end_req helper allowed the status to be passed in, making it
easy to override status for unlikley edge-case where the BLK_STS is
!blk_path_error() but should be given nvme_is_path_error() returned
true.

Anyway, I can avoid the need to factor out __nvme_end_req() and just:
  nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR;
  nvme_end_req(req);

Thanks,
Mike
diff mbox series

Patch

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 4134cf3c7e48..10375197dd53 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -299,6 +299,7 @@  enum nvme_disposition {
 	COMPLETE,
 	RETRY,
 	FAILOVER,
+	FAILUP,
 };
 
 static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
@@ -318,10 +319,11 @@  static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
 	    nvme_req(req)->retries >= nvme_max_retries)
 		return COMPLETE;
 
-	if (req->cmd_flags & REQ_NVME_MPATH) {
+	if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) {
 		if (nvme_is_path_error(nvme_req(req)->status) ||
 		    blk_queue_dying(req->q))
-			return FAILOVER;
+			return (req->cmd_flags & REQ_NVME_MPATH) ?
+				FAILOVER : FAILUP;
 	} else {
 		if (blk_queue_dying(req->q))
 			return COMPLETE;
@@ -330,10 +332,8 @@  static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
 	return RETRY;
 }
 
-static inline void nvme_end_req(struct request *req)
+static inline void __nvme_end_req(struct request *req, blk_status_t status)
 {
-	blk_status_t status = nvme_error_status(nvme_req(req)->status);
-
 	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
 	    req_op(req) == REQ_OP_ZONE_APPEND)
 		req->__sector = nvme_lba_to_sect(req->q->queuedata,
@@ -343,6 +343,24 @@  static inline void nvme_end_req(struct request *req)
 	blk_mq_end_request(req, status);
 }
 
+static inline void nvme_end_req(struct request *req)
+{
+	__nvme_end_req(req, nvme_error_status(nvme_req(req)->status));
+}
+
+static void nvme_failup_req(struct request *req)
+{
+	blk_status_t status = nvme_error_status(nvme_req(req)->status);
+
+	if (WARN_ON_ONCE(!blk_path_error(status))) {
+		pr_debug("Request meant for failover but blk_status_t (errno=%d) was not retryable.\n",
+			 blk_status_to_errno(status));
+		status = BLK_STS_IOERR;
+	}
+
+	__nvme_end_req(req, status);
+}
+
 void nvme_complete_rq(struct request *req)
 {
 	trace_nvme_complete_rq(req);
@@ -361,6 +379,9 @@  void nvme_complete_rq(struct request *req)
 	case FAILOVER:
 		nvme_failover_req(req);
 		return;
+	case FAILUP:
+		nvme_failup_req(req);
+		return;
 	}
 }
 EXPORT_SYMBOL_GPL(nvme_complete_rq);