diff mbox

[5/6] nvme-pci: Attempt reset retry for IO failures

Message ID 20180518163823.27820-5-keith.busch@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Keith Busch May 18, 2018, 4:38 p.m. UTC
If the reset failed due to a non-fatal error, this patch will attempt
to reset the controller again, with a maximum of 4 attempts.

Since the failed reset case has changed purpose, this patch provides a
more appropriate name and warning message for the reset failure.

Signed-off-by: Keith Busch <keith.busch@intel.com>
---
 drivers/nvme/host/pci.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)
diff mbox

Patch

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6a7cbc631d92..ddfeb186d129 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -37,6 +37,8 @@ 
 
 #define SGES_PER_PAGE	(PAGE_SIZE / sizeof(struct nvme_sgl_desc))
 
+#define MAX_RESET_FAILURES 4
+
 static int use_threaded_interrupts;
 module_param(use_threaded_interrupts, int, 0);
 
@@ -101,6 +103,8 @@  struct nvme_dev {
 	struct completion ioq_wait;
 	bool queues_froze;
 
+	int reset_failures;
+
 	/* shadow doorbell buffer support: */
 	u32 *dbbuf_dbs;
 	dma_addr_t dbbuf_dbs_dma_addr;
@@ -2307,9 +2311,23 @@  static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
 	kfree(dev);
 }
 
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
+static void nvme_reset_failure(struct nvme_dev *dev, int status)
 {
-	dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", status);
+	dev->reset_failures++;
+	dev_warn(dev->ctrl.device, "Reset failure status: %d, failures:%d\n",
+		status, dev->reset_failures);
+
+	/* IO and Interrupted Call may indicate a retryable error */
+	switch (status) {
+	case -EIO:
+	case -EINTR:
+		if (dev->reset_failures < MAX_RESET_FAILURES &&
+		    !nvme_reset_ctrl(&dev->ctrl))
+			return;
+		break;
+	default:
+		break;
+	}
 
 	nvme_get_ctrl(&dev->ctrl);
 	nvme_dev_disable(dev, false);
@@ -2410,14 +2428,16 @@  static void nvme_reset_work(struct work_struct *work)
 	if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
 		dev_warn(dev->ctrl.device,
 			"failed to mark controller state %d\n", new_state);
+		result = -ENODEV;
 		goto out;
 	}
 
+	dev->reset_failures = 0;
 	nvme_start_ctrl(&dev->ctrl);
 	return;
 
  out:
-	nvme_remove_dead_ctrl(dev, result);
+	nvme_reset_failure(dev, result);
 }
 
 static void nvme_remove_dead_ctrl_work(struct work_struct *work)