diff mbox series

[3/3] nvme-pci: make nvme reset more reliable

Message ID 20200520115655.729705-4-ming.lei@redhat.com (mailing list archive)
State New, archived
Headers show
Series blk-mq/nvme: improve nvme-pci reset handler | expand

Commit Message

Ming Lei May 20, 2020, 11:56 a.m. UTC
During waiting for in-flight IO completion in reset handler, timeout
or controller failure still may happen, then the controller is deleted
and all inflight IOs are failed. This way is too violent.

Improve the reset handling by replacing nvme_wait_freeze with query
& check controller. If all ns queues are frozen, the controller is reset
successfully, otherwise check and see if the controller has been disabled.
If yes, break from the current recovery and schedule a fresh new reset.

This way avoids to failing IO & removing controller unnecessarily.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/nvme/host/pci.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

Comments

Dongli Zhang May 20, 2020, 5:10 p.m. UTC | #1
On 5/20/20 4:56 AM, Ming Lei wrote:
> During waiting for in-flight IO completion in reset handler, timeout
> or controller failure still may happen, then the controller is deleted
> and all inflight IOs are failed. This way is too violent.
> 
> Improve the reset handling by replacing nvme_wait_freeze with query
> & check controller. If all ns queues are frozen, the controller is reset
> successfully, otherwise check and see if the controller has been disabled.
> If yes, break from the current recovery and schedule a fresh new reset.
> 
> This way avoids to failing IO & removing controller unnecessarily.
> 
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Sagi Grimberg <sagi@grimberg.me>
> Cc: Keith Busch <kbusch@kernel.org>
> Cc: Max Gurtovoy <maxg@mellanox.com>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/nvme/host/pci.c | 37 ++++++++++++++++++++++++++++++-------
>  1 file changed, 30 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index ce0d1e79467a..b5aeed33a634 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -24,6 +24,7 @@
>  #include <linux/io-64-nonatomic-lo-hi.h>
>  #include <linux/sed-opal.h>
>  #include <linux/pci-p2pdma.h>
> +#include <linux/delay.h>
>  
>  #include "trace.h"
>  #include "nvme.h"
> @@ -1235,9 +1236,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
>  	 * shutdown, so we return BLK_EH_DONE.
>  	 */
>  	switch (dev->ctrl.state) {
> -	case NVME_CTRL_CONNECTING:
> -		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
> -		/* fall through */
>  	case NVME_CTRL_DELETING:
>  		dev_warn_ratelimited(dev->ctrl.device,
>  			 "I/O %d QID %d timeout, disable controller\n",
> @@ -2393,7 +2391,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
>  		u32 csts = readl(dev->bar + NVME_REG_CSTS);
>  
>  		if (dev->ctrl.state == NVME_CTRL_LIVE ||
> -		    dev->ctrl.state == NVME_CTRL_RESETTING) {
> +		    dev->ctrl.state == NVME_CTRL_RESETTING ||
> +		    dev->ctrl.state == NVME_CTRL_CONNECTING) {
>  			freeze = true;
>  			nvme_start_freeze(&dev->ctrl);
>  		}
> @@ -2504,12 +2503,29 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
>  		nvme_put_ctrl(&dev->ctrl);
>  }
>  
> +static bool nvme_wait_freeze_and_check(struct nvme_dev *dev)
> +{
> +	bool frozen;
> +
> +	while (true) {
> +		frozen = nvme_frozen(&dev->ctrl);
> +		if (frozen)
> +			break;
> +		if (!dev->online_queues)
> +			break;
> +		msleep(5);
> +	}
> +
> +	return frozen;
> +}
> +
>  static void nvme_reset_work(struct work_struct *work)
>  {
>  	struct nvme_dev *dev =
>  		container_of(work, struct nvme_dev, ctrl.reset_work);
>  	bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
>  	int result;
> +	bool reset_done = true;
>  
>  	if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
>  		result = -ENODEV;
> @@ -2606,8 +2622,9 @@ static void nvme_reset_work(struct work_struct *work)
>  		nvme_free_tagset(dev);
>  	} else {
>  		nvme_start_queues(&dev->ctrl);
> -		nvme_wait_freeze(&dev->ctrl);
> -		nvme_dev_add(dev);
> +		reset_done = nvme_wait_freeze_and_check(dev);

Once we arrive at here, it indicates "dev->online_queues >= 2".

2601         if (dev->online_queues < 2) {
2602                 dev_warn(dev->ctrl.device, "IO queues not created\n");
2603                 nvme_kill_queues(&dev->ctrl);
2604                 nvme_remove_namespaces(&dev->ctrl);
2605                 nvme_free_tagset(dev);
2606         } else {
2607                 nvme_start_queues(&dev->ctrl);
2608                 nvme_wait_freeze(&dev->ctrl);
2609                 nvme_dev_add(dev);
2610                 nvme_unfreeze(&dev->ctrl);
2611         }

Is there any reason to check "if (!dev->online_queues)" in
nvme_wait_freeze_and_check()?

Thank you very much!

Dongli Zhang




> +		if (reset_done)
> +			nvme_dev_add(dev);
>  		nvme_unfreeze(&dev->ctrl);
>  	}
>  
> @@ -2622,7 +2639,13 @@ static void nvme_reset_work(struct work_struct *work)
>  		goto out;
>  	}
>  
> -	nvme_start_ctrl(&dev->ctrl);
> +	/* New error happens during reset, so schedule a new reset */
> +	if (!reset_done) {
> +		dev_warn(dev->ctrl.device, "new error during reset\n");
> +		nvme_reset_ctrl(&dev->ctrl);
> +	} else {
> +		nvme_start_ctrl(&dev->ctrl);
> +	}
>  	return;
>  
>   out_unlock:
>
Dongli Zhang May 20, 2020, 5:27 p.m. UTC | #2
On 5/20/20 10:10 AM, Dongli Zhang wrote:
> 
> 
> On 5/20/20 4:56 AM, Ming Lei wrote:
>> During waiting for in-flight IO completion in reset handler, timeout
>> or controller failure still may happen, then the controller is deleted
>> and all inflight IOs are failed. This way is too violent.
>>
>> Improve the reset handling by replacing nvme_wait_freeze with query
>> & check controller. If all ns queues are frozen, the controller is reset
>> successfully, otherwise check and see if the controller has been disabled.
>> If yes, break from the current recovery and schedule a fresh new reset.
>>
>> This way avoids to failing IO & removing controller unnecessarily.
>>
>> Cc: Christoph Hellwig <hch@lst.de>
>> Cc: Sagi Grimberg <sagi@grimberg.me>
>> Cc: Keith Busch <kbusch@kernel.org>
>> Cc: Max Gurtovoy <maxg@mellanox.com>
>> Signed-off-by: Ming Lei <ming.lei@redhat.com>
>> ---
>>  drivers/nvme/host/pci.c | 37 ++++++++++++++++++++++++++++++-------
>>  1 file changed, 30 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
>> index ce0d1e79467a..b5aeed33a634 100644
>> --- a/drivers/nvme/host/pci.c
>> +++ b/drivers/nvme/host/pci.c
>> @@ -24,6 +24,7 @@
>>  #include <linux/io-64-nonatomic-lo-hi.h>
>>  #include <linux/sed-opal.h>
>>  #include <linux/pci-p2pdma.h>
>> +#include <linux/delay.h>
>>  
>>  #include "trace.h"
>>  #include "nvme.h"
>> @@ -1235,9 +1236,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
>>  	 * shutdown, so we return BLK_EH_DONE.
>>  	 */
>>  	switch (dev->ctrl.state) {
>> -	case NVME_CTRL_CONNECTING:
>> -		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
>> -		/* fall through */
>>  	case NVME_CTRL_DELETING:
>>  		dev_warn_ratelimited(dev->ctrl.device,
>>  			 "I/O %d QID %d timeout, disable controller\n",
>> @@ -2393,7 +2391,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
>>  		u32 csts = readl(dev->bar + NVME_REG_CSTS);
>>  
>>  		if (dev->ctrl.state == NVME_CTRL_LIVE ||
>> -		    dev->ctrl.state == NVME_CTRL_RESETTING) {
>> +		    dev->ctrl.state == NVME_CTRL_RESETTING ||
>> +		    dev->ctrl.state == NVME_CTRL_CONNECTING) {
>>  			freeze = true;
>>  			nvme_start_freeze(&dev->ctrl);
>>  		}
>> @@ -2504,12 +2503,29 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
>>  		nvme_put_ctrl(&dev->ctrl);
>>  }
>>  
>> +static bool nvme_wait_freeze_and_check(struct nvme_dev *dev)
>> +{
>> +	bool frozen;
>> +
>> +	while (true) {
>> +		frozen = nvme_frozen(&dev->ctrl);
>> +		if (frozen)
>> +			break;
>> +		if (!dev->online_queues)
>> +			break;
>> +		msleep(5);
>> +	}
>> +
>> +	return frozen;
>> +}
>> +
>>  static void nvme_reset_work(struct work_struct *work)
>>  {
>>  	struct nvme_dev *dev =
>>  		container_of(work, struct nvme_dev, ctrl.reset_work);
>>  	bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
>>  	int result;
>> +	bool reset_done = true;
>>  
>>  	if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
>>  		result = -ENODEV;
>> @@ -2606,8 +2622,9 @@ static void nvme_reset_work(struct work_struct *work)
>>  		nvme_free_tagset(dev);
>>  	} else {
>>  		nvme_start_queues(&dev->ctrl);
>> -		nvme_wait_freeze(&dev->ctrl);
>> -		nvme_dev_add(dev);
>> +		reset_done = nvme_wait_freeze_and_check(dev);
> 
> Once we arrive at here, it indicates "dev->online_queues >= 2".
> 
> 2601         if (dev->online_queues < 2) {
> 2602                 dev_warn(dev->ctrl.device, "IO queues not created\n");
> 2603                 nvme_kill_queues(&dev->ctrl);
> 2604                 nvme_remove_namespaces(&dev->ctrl);
> 2605                 nvme_free_tagset(dev);
> 2606         } else {
> 2607                 nvme_start_queues(&dev->ctrl);
> 2608                 nvme_wait_freeze(&dev->ctrl);
> 2609                 nvme_dev_add(dev);
> 2610                 nvme_unfreeze(&dev->ctrl);
> 2611         }
> 
> Is there any reason to check "if (!dev->online_queues)" in
> nvme_wait_freeze_and_check()?
> 

I think you meant another nvme_dev_disable() during reset?

Sorry for the misunderstanding in previous email.

Dongli Zhang
Keith Busch May 20, 2020, 5:52 p.m. UTC | #3
On Wed, May 20, 2020 at 10:10:47AM -0700, Dongli Zhang wrote:
> On 5/20/20 4:56 AM, Ming Lei wrote:
> > +static bool nvme_wait_freeze_and_check(struct nvme_dev *dev)
> > +{
> > +	bool frozen;
> > +
> > +	while (true) {
> > +		frozen = nvme_frozen(&dev->ctrl);
> > +		if (frozen)
> > +			break;
> > +		if (!dev->online_queues)
> > +			break;
> > +		msleep(5);
> > +	}
> > +
> > +	return frozen;
> > +}
> > +
> >  static void nvme_reset_work(struct work_struct *work)
> >  {
> >  	struct nvme_dev *dev =
> >  		container_of(work, struct nvme_dev, ctrl.reset_work);
> >  	bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
> >  	int result;
> > +	bool reset_done = true;
> >  
> >  	if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
> >  		result = -ENODEV;
> > @@ -2606,8 +2622,9 @@ static void nvme_reset_work(struct work_struct *work)
> >  		nvme_free_tagset(dev);
> >  	} else {
> >  		nvme_start_queues(&dev->ctrl);
> > -		nvme_wait_freeze(&dev->ctrl);
> > -		nvme_dev_add(dev);
> > +		reset_done = nvme_wait_freeze_and_check(dev);
> 
> Once we arrive at here, it indicates "dev->online_queues >= 2".
> 
> 2601         if (dev->online_queues < 2) {
> 2602                 dev_warn(dev->ctrl.device, "IO queues not created\n");
> 2603                 nvme_kill_queues(&dev->ctrl);
> 2604                 nvme_remove_namespaces(&dev->ctrl);
> 2605                 nvme_free_tagset(dev);
> 2606         } else {
> 2607                 nvme_start_queues(&dev->ctrl);
> 2608                 nvme_wait_freeze(&dev->ctrl);
> 2609                 nvme_dev_add(dev);
> 2610                 nvme_unfreeze(&dev->ctrl);
> 2611         }
> 
> Is there any reason to check "if (!dev->online_queues)" in
> nvme_wait_freeze_and_check()?

Looks correct to me. If the queues fail to freeze, that means a timeout
occured, and the nvme timeout handler tears down all online queues, so
this patch uses that for the criteria to break out of the loop.
Ming Lei May 21, 2020, 2:33 a.m. UTC | #4
On Wed, May 20, 2020 at 10:10:47AM -0700, Dongli Zhang wrote:
> 
> 
> On 5/20/20 4:56 AM, Ming Lei wrote:
> > During waiting for in-flight IO completion in reset handler, timeout
> > or controller failure still may happen, then the controller is deleted
> > and all inflight IOs are failed. This way is too violent.
> > 
> > Improve the reset handling by replacing nvme_wait_freeze with query
> > & check controller. If all ns queues are frozen, the controller is reset
> > successfully, otherwise check and see if the controller has been disabled.
> > If yes, break from the current recovery and schedule a fresh new reset.
> > 
> > This way avoids to failing IO & removing controller unnecessarily.
> > 
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: Sagi Grimberg <sagi@grimberg.me>
> > Cc: Keith Busch <kbusch@kernel.org>
> > Cc: Max Gurtovoy <maxg@mellanox.com>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >  drivers/nvme/host/pci.c | 37 ++++++++++++++++++++++++++++++-------
> >  1 file changed, 30 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> > index ce0d1e79467a..b5aeed33a634 100644
> > --- a/drivers/nvme/host/pci.c
> > +++ b/drivers/nvme/host/pci.c
> > @@ -24,6 +24,7 @@
> >  #include <linux/io-64-nonatomic-lo-hi.h>
> >  #include <linux/sed-opal.h>
> >  #include <linux/pci-p2pdma.h>
> > +#include <linux/delay.h>
> >  
> >  #include "trace.h"
> >  #include "nvme.h"
> > @@ -1235,9 +1236,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
> >  	 * shutdown, so we return BLK_EH_DONE.
> >  	 */
> >  	switch (dev->ctrl.state) {
> > -	case NVME_CTRL_CONNECTING:
> > -		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
> > -		/* fall through */
> >  	case NVME_CTRL_DELETING:
> >  		dev_warn_ratelimited(dev->ctrl.device,
> >  			 "I/O %d QID %d timeout, disable controller\n",
> > @@ -2393,7 +2391,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
> >  		u32 csts = readl(dev->bar + NVME_REG_CSTS);
> >  
> >  		if (dev->ctrl.state == NVME_CTRL_LIVE ||
> > -		    dev->ctrl.state == NVME_CTRL_RESETTING) {
> > +		    dev->ctrl.state == NVME_CTRL_RESETTING ||
> > +		    dev->ctrl.state == NVME_CTRL_CONNECTING) {
> >  			freeze = true;
> >  			nvme_start_freeze(&dev->ctrl);
> >  		}
> > @@ -2504,12 +2503,29 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
> >  		nvme_put_ctrl(&dev->ctrl);
> >  }
> >  
> > +static bool nvme_wait_freeze_and_check(struct nvme_dev *dev)
> > +{
> > +	bool frozen;
> > +
> > +	while (true) {
> > +		frozen = nvme_frozen(&dev->ctrl);
> > +		if (frozen)
> > +			break;
> > +		if (!dev->online_queues)
> > +			break;
> > +		msleep(5);
> > +	}
> > +
> > +	return frozen;
> > +}
> > +
> >  static void nvme_reset_work(struct work_struct *work)
> >  {
> >  	struct nvme_dev *dev =
> >  		container_of(work, struct nvme_dev, ctrl.reset_work);
> >  	bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
> >  	int result;
> > +	bool reset_done = true;
> >  
> >  	if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
> >  		result = -ENODEV;
> > @@ -2606,8 +2622,9 @@ static void nvme_reset_work(struct work_struct *work)
> >  		nvme_free_tagset(dev);
> >  	} else {
> >  		nvme_start_queues(&dev->ctrl);
> > -		nvme_wait_freeze(&dev->ctrl);
> > -		nvme_dev_add(dev);
> > +		reset_done = nvme_wait_freeze_and_check(dev);
> 
> Once we arrive at here, it indicates "dev->online_queues >= 2".
> 
> 2601         if (dev->online_queues < 2) {
> 2602                 dev_warn(dev->ctrl.device, "IO queues not created\n");
> 2603                 nvme_kill_queues(&dev->ctrl);
> 2604                 nvme_remove_namespaces(&dev->ctrl);
> 2605                 nvme_free_tagset(dev);
> 2606         } else {
> 2607                 nvme_start_queues(&dev->ctrl);
> 2608                 nvme_wait_freeze(&dev->ctrl);
> 2609                 nvme_dev_add(dev);
> 2610                 nvme_unfreeze(&dev->ctrl);
> 2611         }
> 
> Is there any reason to check "if (!dev->online_queues)" in
> nvme_wait_freeze_and_check()?
> 

nvme_dev_disable() suspends all io queues and admin queue, so dev->online_queues
will become 0 after nvme_dev_disable() is run from timeout handler.


thanks,
Ming
Dongli Zhang May 26, 2020, 5:01 a.m. UTC | #5
On 5/20/20 4:56 AM, Ming Lei wrote:
> During waiting for in-flight IO completion in reset handler, timeout

Does this indicate the window since nvme_start_queues() in nvme_reset_work(),
that is, just after the queues are unquiesced again?

If v2 is required in the future, how about to mention the specific function to
that it would be much more easier to track the issue.

> or controller failure still may happen, then the controller is deleted
> and all inflight IOs are failed. This way is too violent.
> 
> Improve the reset handling by replacing nvme_wait_freeze with query
> & check controller. If all ns queues are frozen, the controller is reset
> successfully, otherwise check and see if the controller has been disabled.
> If yes, break from the current recovery and schedule a fresh new reset.
> 
> This way avoids to failing IO & removing controller unnecessarily.
> 
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Sagi Grimberg <sagi@grimberg.me>
> Cc: Keith Busch <kbusch@kernel.org>
> Cc: Max Gurtovoy <maxg@mellanox.com>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/nvme/host/pci.c | 37 ++++++++++++++++++++++++++++++-------
>  1 file changed, 30 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index ce0d1e79467a..b5aeed33a634 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -24,6 +24,7 @@
>  #include <linux/io-64-nonatomic-lo-hi.h>
>  #include <linux/sed-opal.h>
>  #include <linux/pci-p2pdma.h>
> +#include <linux/delay.h>
>  
>  #include "trace.h"
>  #include "nvme.h"
> @@ -1235,9 +1236,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
>  	 * shutdown, so we return BLK_EH_DONE.
>  	 */
>  	switch (dev->ctrl.state) {
> -	case NVME_CTRL_CONNECTING:
> -		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
> -		/* fall through */
>  	case NVME_CTRL_DELETING:
>  		dev_warn_ratelimited(dev->ctrl.device,
>  			 "I/O %d QID %d timeout, disable controller\n",
> @@ -2393,7 +2391,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
>  		u32 csts = readl(dev->bar + NVME_REG_CSTS);
>  
>  		if (dev->ctrl.state == NVME_CTRL_LIVE ||
> -		    dev->ctrl.state == NVME_CTRL_RESETTING) {
> +		    dev->ctrl.state == NVME_CTRL_RESETTING ||
> +		    dev->ctrl.state == NVME_CTRL_CONNECTING) {
>  			freeze = true;
>  			nvme_start_freeze(&dev->ctrl);
>  		}
> @@ -2504,12 +2503,29 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
>  		nvme_put_ctrl(&dev->ctrl);
>  }
>  
> +static bool nvme_wait_freeze_and_check(struct nvme_dev *dev)
> +{
> +	bool frozen;
> +
> +	while (true) {
> +		frozen = nvme_frozen(&dev->ctrl);
> +		if (frozen)
> +			break;

... and how about to comment that the below is because of nvme timeout handler
as explained in another email (if v2 would be sent) so that it is not required
to query for "online_queues" with cscope :)

> +		if (!dev->online_queues)
> +			break;
> +		msleep(5);
> +	}
> +
> +	return frozen;
> +}
> +

Thank you very much!

Dongli Zhang
Ming Lei May 26, 2020, 7:12 a.m. UTC | #6
On Mon, May 25, 2020 at 10:01:18PM -0700, Dongli Zhang wrote:
> 
> 
> On 5/20/20 4:56 AM, Ming Lei wrote:
> > During waiting for in-flight IO completion in reset handler, timeout
> 
> Does this indicate the window since nvme_start_queues() in nvme_reset_work(),
> that is, just after the queues are unquiesced again?

Right, nvme_start_queues() starts to dispatch requests again, and
nvme_wait_freeze() waits completion of all these in-flight IOs.

> 
> If v2 is required in the future, how about to mention the specific function to
> that it would be much more easier to track the issue.

Not sure it is needed, cause it is quite straightforward.

> 
> > or controller failure still may happen, then the controller is deleted
> > and all inflight IOs are failed. This way is too violent.
> > 
> > Improve the reset handling by replacing nvme_wait_freeze with query
> > & check controller. If all ns queues are frozen, the controller is reset
> > successfully, otherwise check and see if the controller has been disabled.
> > If yes, break from the current recovery and schedule a fresh new reset.
> > 
> > This way avoids to failing IO & removing controller unnecessarily.
> > 
> > Cc: Christoph Hellwig <hch@lst.de>
> > Cc: Sagi Grimberg <sagi@grimberg.me>
> > Cc: Keith Busch <kbusch@kernel.org>
> > Cc: Max Gurtovoy <maxg@mellanox.com>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >  drivers/nvme/host/pci.c | 37 ++++++++++++++++++++++++++++++-------
> >  1 file changed, 30 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> > index ce0d1e79467a..b5aeed33a634 100644
> > --- a/drivers/nvme/host/pci.c
> > +++ b/drivers/nvme/host/pci.c
> > @@ -24,6 +24,7 @@
> >  #include <linux/io-64-nonatomic-lo-hi.h>
> >  #include <linux/sed-opal.h>
> >  #include <linux/pci-p2pdma.h>
> > +#include <linux/delay.h>
> >  
> >  #include "trace.h"
> >  #include "nvme.h"
> > @@ -1235,9 +1236,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
> >  	 * shutdown, so we return BLK_EH_DONE.
> >  	 */
> >  	switch (dev->ctrl.state) {
> > -	case NVME_CTRL_CONNECTING:
> > -		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
> > -		/* fall through */
> >  	case NVME_CTRL_DELETING:
> >  		dev_warn_ratelimited(dev->ctrl.device,
> >  			 "I/O %d QID %d timeout, disable controller\n",
> > @@ -2393,7 +2391,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
> >  		u32 csts = readl(dev->bar + NVME_REG_CSTS);
> >  
> >  		if (dev->ctrl.state == NVME_CTRL_LIVE ||
> > -		    dev->ctrl.state == NVME_CTRL_RESETTING) {
> > +		    dev->ctrl.state == NVME_CTRL_RESETTING ||
> > +		    dev->ctrl.state == NVME_CTRL_CONNECTING) {
> >  			freeze = true;
> >  			nvme_start_freeze(&dev->ctrl);
> >  		}
> > @@ -2504,12 +2503,29 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
> >  		nvme_put_ctrl(&dev->ctrl);
> >  }
> >  
> > +static bool nvme_wait_freeze_and_check(struct nvme_dev *dev)
> > +{
> > +	bool frozen;
> > +
> > +	while (true) {
> > +		frozen = nvme_frozen(&dev->ctrl);
> > +		if (frozen)
> > +			break;
> 
> ... and how about to comment that the below is because of nvme timeout handler
> as explained in another email (if v2 would be sent) so that it is not required
> to query for "online_queues" with cscope :)
> 
> > +		if (!dev->online_queues)
> > +			break;
> > +		msleep(5);

Fine.


Thanks,
Ming
diff mbox series

Patch

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ce0d1e79467a..b5aeed33a634 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -24,6 +24,7 @@ 
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/sed-opal.h>
 #include <linux/pci-p2pdma.h>
+#include <linux/delay.h>
 
 #include "trace.h"
 #include "nvme.h"
@@ -1235,9 +1236,6 @@  static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	 * shutdown, so we return BLK_EH_DONE.
 	 */
 	switch (dev->ctrl.state) {
-	case NVME_CTRL_CONNECTING:
-		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
-		/* fall through */
 	case NVME_CTRL_DELETING:
 		dev_warn_ratelimited(dev->ctrl.device,
 			 "I/O %d QID %d timeout, disable controller\n",
@@ -2393,7 +2391,8 @@  static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 		u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
 		if (dev->ctrl.state == NVME_CTRL_LIVE ||
-		    dev->ctrl.state == NVME_CTRL_RESETTING) {
+		    dev->ctrl.state == NVME_CTRL_RESETTING ||
+		    dev->ctrl.state == NVME_CTRL_CONNECTING) {
 			freeze = true;
 			nvme_start_freeze(&dev->ctrl);
 		}
@@ -2504,12 +2503,29 @@  static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
 		nvme_put_ctrl(&dev->ctrl);
 }
 
+static bool nvme_wait_freeze_and_check(struct nvme_dev *dev)
+{
+	bool frozen;
+
+	while (true) {
+		frozen = nvme_frozen(&dev->ctrl);
+		if (frozen)
+			break;
+		if (!dev->online_queues)
+			break;
+		msleep(5);
+	}
+
+	return frozen;
+}
+
 static void nvme_reset_work(struct work_struct *work)
 {
 	struct nvme_dev *dev =
 		container_of(work, struct nvme_dev, ctrl.reset_work);
 	bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
 	int result;
+	bool reset_done = true;
 
 	if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
 		result = -ENODEV;
@@ -2606,8 +2622,9 @@  static void nvme_reset_work(struct work_struct *work)
 		nvme_free_tagset(dev);
 	} else {
 		nvme_start_queues(&dev->ctrl);
-		nvme_wait_freeze(&dev->ctrl);
-		nvme_dev_add(dev);
+		reset_done = nvme_wait_freeze_and_check(dev);
+		if (reset_done)
+			nvme_dev_add(dev);
 		nvme_unfreeze(&dev->ctrl);
 	}
 
@@ -2622,7 +2639,13 @@  static void nvme_reset_work(struct work_struct *work)
 		goto out;
 	}
 
-	nvme_start_ctrl(&dev->ctrl);
+	/* New error happens during reset, so schedule a new reset */
+	if (!reset_done) {
+		dev_warn(dev->ctrl.device, "new error during reset\n");
+		nvme_reset_ctrl(&dev->ctrl);
+	} else {
+		nvme_start_ctrl(&dev->ctrl);
+	}
 	return;
 
  out_unlock: