diff mbox

[1/6] nvme: Sync request queues on reset

Message ID 20180518163823.27820-1-keith.busch@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Keith Busch May 18, 2018, 4:38 p.m. UTC
This patch fixes races that occur with simultaneous controller
resets by synchronizing request queues prior to initializing the
controller. Withouth this, a thread may attempt disabling a controller
at the same time as we're trying to enable it.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 drivers/nvme/host/core.c | 21 +++++++++++++++++++--
 drivers/nvme/host/nvme.h |  1 +
 drivers/nvme/host/pci.c  |  1 +
 3 files changed, 21 insertions(+), 2 deletions(-)

Comments

Ming Lei May 18, 2018, 10:32 p.m. UTC | #1
On Fri, May 18, 2018 at 10:38:18AM -0600, Keith Busch wrote:
> This patch fixes races that occur with simultaneous controller
> resets by synchronizing request queues prior to initializing the
> controller. Withouth this, a thread may attempt disabling a controller
> at the same time as we're trying to enable it.
> 
> Signed-off-by: Keith Busch <keith.busch@intel.com>
> ---
>  drivers/nvme/host/core.c | 21 +++++++++++++++++++--
>  drivers/nvme/host/nvme.h |  1 +
>  drivers/nvme/host/pci.c  |  1 +
>  3 files changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 99b857e5a7a9..1de68b56b318 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3471,6 +3471,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>  }
>  EXPORT_SYMBOL_GPL(nvme_init_ctrl);
>  
> +static void nvme_start_queue(struct nvme_ns *ns)
> +{
> +	blk_mq_unquiesce_queue(ns->queue);
> +	blk_mq_kick_requeue_list(ns->queue);
> +}
> +
>  /**
>   * nvme_kill_queues(): Ends all namespace queues
>   * @ctrl: the dead controller that needs to end
> @@ -3499,7 +3505,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
>  		blk_set_queue_dying(ns->queue);
>  
>  		/* Forcibly unquiesce queues to avoid blocking dispatch */
> -		blk_mq_unquiesce_queue(ns->queue);
> +		nvme_start_queue(ns);
>  	}
>  	up_read(&ctrl->namespaces_rwsem);
>  }
> @@ -3569,11 +3575,22 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
>  
>  	down_read(&ctrl->namespaces_rwsem);
>  	list_for_each_entry(ns, &ctrl->namespaces, list)
> -		blk_mq_unquiesce_queue(ns->queue);
> +		nvme_start_queue(ns);
>  	up_read(&ctrl->namespaces_rwsem);
>  }
>  EXPORT_SYMBOL_GPL(nvme_start_queues);
>  
> +void nvme_sync_queues(struct nvme_ctrl *ctrl)
> +{
> +	struct nvme_ns *ns;
> +
> +	down_read(&ctrl->namespaces_rwsem);
> +	list_for_each_entry(ns, &ctrl->namespaces, list)
> +		blk_sync_queue(ns->queue);
> +	up_read(&ctrl->namespaces_rwsem);
> +}
> +EXPORT_SYMBOL_GPL(nvme_sync_queues);

This way can't sync timeout reliably, since timeout events can
come from two NS at the same time, and one may be handled as
RESET_TIMER, and another one can be handled as EH_HANDLED.


Thanks,
Ming
Keith Busch May 18, 2018, 11:44 p.m. UTC | #2
On Sat, May 19, 2018 at 06:32:11AM +0800, Ming Lei wrote:
> This way can't sync timeout reliably, since timeout events can
> come from two NS at the same time, and one may be handled as
> RESET_TIMER, and another one can be handled as EH_HANDLED.

You keep saying that, but the controller state is global to the
controller. It doesn't matter which namespace request_queue started the
reset: every namespaces request queue sees the RESETTING controller state
from the point the syncing occurs, and they don't return RESET_TIMER,
and on top of that, the reset reclaims every single IO command no matter
what namespace request_queue initiated the reset.
Ming Lei May 19, 2018, 12:01 a.m. UTC | #3
On Fri, May 18, 2018 at 05:44:08PM -0600, Keith Busch wrote:
> On Sat, May 19, 2018 at 06:32:11AM +0800, Ming Lei wrote:
> > This way can't sync timeout reliably, since timeout events can
> > come from two NS at the same time, and one may be handled as
> > RESET_TIMER, and another one can be handled as EH_HANDLED.
> 
> You keep saying that, but the controller state is global to the
> controller. It doesn't matter which namespace request_queue started the
> reset: every namespaces request queue sees the RESETTING controller state

When timeouts come, the global state of RESETTING may not be updated
yet, so all the timeouts may not observe the state.

Please see my previous explanation:

https://marc.info/?l=linux-block&m=152600464317808&w=2


Thanks,
Ming
diff mbox

Patch

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 99b857e5a7a9..1de68b56b318 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3471,6 +3471,12 @@  int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 }
 EXPORT_SYMBOL_GPL(nvme_init_ctrl);
 
+static void nvme_start_queue(struct nvme_ns *ns)
+{
+	blk_mq_unquiesce_queue(ns->queue);
+	blk_mq_kick_requeue_list(ns->queue);
+}
+
 /**
  * nvme_kill_queues(): Ends all namespace queues
  * @ctrl: the dead controller that needs to end
@@ -3499,7 +3505,7 @@  void nvme_kill_queues(struct nvme_ctrl *ctrl)
 		blk_set_queue_dying(ns->queue);
 
 		/* Forcibly unquiesce queues to avoid blocking dispatch */
-		blk_mq_unquiesce_queue(ns->queue);
+		nvme_start_queue(ns);
 	}
 	up_read(&ctrl->namespaces_rwsem);
 }
@@ -3569,11 +3575,22 @@  void nvme_start_queues(struct nvme_ctrl *ctrl)
 
 	down_read(&ctrl->namespaces_rwsem);
 	list_for_each_entry(ns, &ctrl->namespaces, list)
-		blk_mq_unquiesce_queue(ns->queue);
+		nvme_start_queue(ns);
 	up_read(&ctrl->namespaces_rwsem);
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
 
+void nvme_sync_queues(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	down_read(&ctrl->namespaces_rwsem);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		blk_sync_queue(ns->queue);
+	up_read(&ctrl->namespaces_rwsem);
+}
+EXPORT_SYMBOL_GPL(nvme_sync_queues);
+
 int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
 {
 	if (!ctrl->ops->reinit_request)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 17d2f7cf3fed..c15c2ee7f61a 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -407,6 +407,7 @@  int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len,
 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
 		union nvme_result *res);
 
+void nvme_sync_queues(struct nvme_ctrl *ctrl);
 void nvme_stop_queues(struct nvme_ctrl *ctrl);
 void nvme_start_queues(struct nvme_ctrl *ctrl);
 void nvme_kill_queues(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 17a0190bd88f..8da63402d474 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2312,6 +2312,7 @@  static void nvme_reset_work(struct work_struct *work)
 	 */
 	if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
 		nvme_dev_disable(dev, false);
+	nvme_sync_queues(&dev->ctrl);
 
 	/*
 	 * Introduce CONNECTING state from nvme-fc/rdma transports to mark the