diff mbox

[V3,3/9] aacraid: Added EEH support

Message ID 1452842182-684-4-git-send-email-RaghavaAditya.Renukunta@pmcs.com (mailing list archive)
State Changes Requested, archived
Headers show

Commit Message

Raghava Aditya Renukunta Jan. 15, 2016, 7:16 a.m. UTC
From: Raghava Aditya Renukunta <raghavaaditya.renukunta@pmcs.com>

Added support for PCI EEH(extended error handling).

Changes in V2:
Made local functions static
Removed call to  aac_fib_free_tag
Set adapter_shutdown flag when PCI error detected

Changes in V3:
None

Signed-off-by: Raghava Aditya Renukunta <raghavaaditya.renukunta@pmcs.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
---
 drivers/scsi/aacraid/aacraid.h |   1 +
 drivers/scsi/aacraid/linit.c   | 140 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 141 insertions(+)

Comments

Johannes Thumshirn Jan. 18, 2016, 11:16 a.m. UTC | #1
On Thu, Jan 14, 2016 at 11:16:16PM -0800, Raghava Aditya Renukunta wrote:
> From: Raghava Aditya Renukunta <raghavaaditya.renukunta@pmcs.com>
> 
> Added support for PCI EEH(extended error handling).
> 
> Changes in V2:
> Made local functions static
> Removed call to  aac_fib_free_tag
> Set adapter_shutdown flag when PCI error detected
> 
> Changes in V3:
> None
> 
> Signed-off-by: Raghava Aditya Renukunta <raghavaaditya.renukunta@pmcs.com>
> Reviewed-by: Tomas Henzl <thenzl@redhat.com>
> ---
>  drivers/scsi/aacraid/aacraid.h |   1 +
>  drivers/scsi/aacraid/linit.c   | 140 +++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 141 insertions(+)
> 
> diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
> index fff1306..2916288 100644
> --- a/drivers/scsi/aacraid/aacraid.h
> +++ b/drivers/scsi/aacraid/aacraid.h
> @@ -1235,6 +1235,7 @@ struct aac_dev
>  	struct msix_entry	msixentry[AAC_MAX_MSIX];
>  	struct aac_msix_ctx	aac_msix[AAC_MAX_MSIX]; /* context */
>  	u8			adapter_shutdown;
> +	u32			handle_pci_error;
>  };
>  
>  #define aac_adapter_interrupt(dev) \
> diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
> index 129a515..08c6835 100644
> --- a/drivers/scsi/aacraid/linit.c
> +++ b/drivers/scsi/aacraid/linit.c
> @@ -38,6 +38,7 @@
>  #include <linux/module.h>
>  #include <linux/moduleparam.h>
>  #include <linux/pci.h>
> +#include <linux/aer.h>
>  #include <linux/pci-aspm.h>
>  #include <linux/slab.h>
>  #include <linux/mutex.h>
> @@ -1298,6 +1299,9 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
>  		goto out_deinit;
>  	scsi_scan_host(shost);
>  
> +	pci_enable_pcie_error_reporting(pdev);
> +	pci_save_state(pdev);
> +
>  	return 0;
>  
>   out_deinit:
> @@ -1501,6 +1505,141 @@ static void aac_remove_one(struct pci_dev *pdev)
>  	}
>  }
>  
> +static void aac_flush_ios(struct aac_dev *aac)
> +{
> +	int i;
> +	struct scsi_cmnd *cmd;
> +
> +	for (i = 0; i < aac->scsi_host_ptr->can_queue; i++) {
> +		cmd = (struct scsi_cmnd *)aac->fibs[i].callback_data;
> +		if (cmd && (cmd->SCp.phase == AAC_OWNER_FIRMWARE)) {
> +			scsi_dma_unmap(cmd);
> +
> +			if (aac->handle_pci_error)
> +				cmd->result = DID_NO_CONNECT << 16;
> +			else
> +				cmd->result = DID_RESET << 16;
> +
> +			cmd->scsi_done(cmd);
> +		}
> +	}
> +}
> +
> +static pci_ers_result_t aac_pci_error_detected(struct pci_dev *pdev,
> +					enum pci_channel_state error)
> +{
> +	struct Scsi_Host *shost = pci_get_drvdata(pdev);
> +	struct aac_dev *aac = shost_priv(shost);
> +
> +	dev_err(&pdev->dev, "aacraid: PCI error detected %x\n", error);
> +
> +	switch (error) {
> +	case pci_channel_io_normal:
> +		return PCI_ERS_RESULT_CAN_RECOVER;
> +	case pci_channel_io_frozen:
> +
> +		aac->handle_pci_error = 1;
> +		aac->adapter_shutdown = 1;
> +
> +		scsi_block_requests(aac->scsi_host_ptr);
> +		aac_flush_ios(aac);
> +		aac_release_resources(aac);
> +
> +		pci_disable_pcie_error_reporting(pdev);
> +		aac_adapter_ioremap(aac, 0);
> +
> +		return PCI_ERS_RESULT_NEED_RESET;
> +	case pci_channel_io_perm_failure:
> +		aac->handle_pci_error = 1;
> +		aac->adapter_shutdown = 1;
> +
> +		aac_flush_ios(aac);
> +		return PCI_ERS_RESULT_DISCONNECT;
> +	}
> +
> +	return PCI_ERS_RESULT_NEED_RESET;
> +}
> +
> +static pci_ers_result_t aac_pci_mmio_enabled(struct pci_dev *pdev)
> +{
> +	dev_err(&pdev->dev, "aacraid: PCI error - mmio enabled\n");
> +	return PCI_ERS_RESULT_NEED_RESET;
> +}
> +
> +static pci_ers_result_t aac_pci_slot_reset(struct pci_dev *pdev)
> +{
> +	dev_err(&pdev->dev, "aacraid: PCI error - slot reset\n");
> +	pci_restore_state(pdev);
> +	if (pci_enable_device(pdev)) {
> +		dev_warn(&pdev->dev,
> +			"aacraid: failed to enable slave\n");
> +		goto fail_device;
> +	}
> +
> +	pci_set_master(pdev);
> +
> +	if (pci_enable_device_mem(pdev)) {
> +		dev_err(&pdev->dev, "pci_enable_device_mem failed\n");
> +		goto fail_device;
> +	}
> +
> +	return PCI_ERS_RESULT_RECOVERED;
> +
> +fail_device:
> +	dev_err(&pdev->dev, "aacraid: PCI error - slot reset failed\n");
> +	return PCI_ERS_RESULT_DISCONNECT;
> +}
> +
> +
> +static void aac_pci_resume(struct pci_dev *pdev)
> +{
> +	struct Scsi_Host *shost = pci_get_drvdata(pdev);
> +	struct scsi_device *sdev = NULL;
> +	struct aac_dev *aac = (struct aac_dev *)shost_priv(shost);
> +
> +	pci_cleanup_aer_uncorrect_error_status(pdev);
> +
> +	if (aac_adapter_ioremap(aac, aac->base_size)) {
> +
> +		dev_err(&pdev->dev, "aacraid: ioremap failed\n");
> +		/* remap failed, go back ... */
> +		aac->comm_interface = AAC_COMM_PRODUCER;
> +		if (aac_adapter_ioremap(aac, AAC_MIN_FOOTPRINT_SIZE)) {
> +			dev_warn(&pdev->dev,
> +				"aacraid: unable to map adapter.\n");
> +
> +			return;
> +		}
> +	}
> +
> +	msleep(10000);
> +
> +	aac_acquire_resources(aac);
> +
> +	/*
> +	 * reset this flag to unblock ioctl() as it was set
> +	 * at aac_send_shutdown() to block ioctls from upperlayer
> +	 */
> +	aac->adapter_shutdown = 0;
> +	aac->handle_pci_error = 0;
> +
> +	shost_for_each_device(sdev, shost)
> +		if (sdev->sdev_state == SDEV_OFFLINE)
> +			sdev->sdev_state = SDEV_RUNNING;
> +	scsi_unblock_requests(aac->scsi_host_ptr);
> +	scsi_scan_host(aac->scsi_host_ptr);
> +	pci_save_state(pdev);
> +
> +	dev_err(&pdev->dev, "aacraid: PCI error - resume\n");
> +}
> +
> +static struct pci_error_handlers aac_pci_err_handler = {
> +	.error_detected		= aac_pci_error_detected,
> +	.mmio_enabled		= aac_pci_mmio_enabled,
> +	.slot_reset		= aac_pci_slot_reset,
> +	.resume			= aac_pci_resume,
> +};
> +
>  static struct pci_driver aac_pci_driver = {
>  	.name		= AAC_DRIVERNAME,
>  	.id_table	= aac_pci_tbl,
> @@ -1511,6 +1650,7 @@ static struct pci_driver aac_pci_driver = {
>  	.resume		= aac_resume,
>  #endif
>  	.shutdown	= aac_shutdown,
> +	.err_handler    = &aac_pci_err_handler,
>  };
>  
>  static int __init aac_init(void)
> -- 
> 1.9.1
> 

Looks good,

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
kernel test robot Jan. 23, 2016, 10:55 p.m. UTC | #2
Hi Raghava,

[auto build test ERROR on scsi/for-next]
[also build test ERROR on v4.4 next-20160122]
[if your patch is applied to the wrong git tree, please drop us a note to help improving the system]

url:    https://github.com/0day-ci/linux/commits/Raghava-Aditya-Renukunta/aacraid-SCSI-blk-tag-support/20160115-070931
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
config: x86_64-randconfig-s1-01240612 (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All errors (new ones prefixed by >>):

   drivers/scsi/aacraid/linit.c: In function 'aac_pci_error_detected':
>> drivers/scsi/aacraid/linit.c:1546:3: error: implicit declaration of function 'aac_release_resources' [-Werror=implicit-function-declaration]
      aac_release_resources(aac);
      ^
   drivers/scsi/aacraid/linit.c: In function 'aac_pci_resume':
>> drivers/scsi/aacraid/linit.c:1617:2: error: implicit declaration of function 'aac_acquire_resources' [-Werror=implicit-function-declaration]
     aac_acquire_resources(aac);
     ^
   cc1: some warnings being treated as errors

vim +/aac_release_resources +1546 drivers/scsi/aacraid/linit.c

  1540	
  1541			aac->handle_pci_error = 1;
  1542			aac->adapter_shutdown = 1;
  1543	
  1544			scsi_block_requests(aac->scsi_host_ptr);
  1545			aac_flush_ios(aac);
> 1546			aac_release_resources(aac);
  1547	
  1548			pci_disable_pcie_error_reporting(pdev);
  1549			aac_adapter_ioremap(aac, 0);
  1550	
  1551			return PCI_ERS_RESULT_NEED_RESET;
  1552		case pci_channel_io_perm_failure:
  1553			aac->handle_pci_error = 1;
  1554			aac->adapter_shutdown = 1;
  1555	
  1556			aac_flush_ios(aac);
  1557			return PCI_ERS_RESULT_DISCONNECT;
  1558		}
  1559	
  1560		return PCI_ERS_RESULT_NEED_RESET;
  1561	}
  1562	
  1563	static pci_ers_result_t aac_pci_mmio_enabled(struct pci_dev *pdev)
  1564	{
  1565		dev_err(&pdev->dev, "aacraid: PCI error - mmio enabled\n");
  1566		return PCI_ERS_RESULT_NEED_RESET;
  1567	}
  1568	
  1569	static pci_ers_result_t aac_pci_slot_reset(struct pci_dev *pdev)
  1570	{
  1571		dev_err(&pdev->dev, "aacraid: PCI error - slot reset\n");
  1572		pci_restore_state(pdev);
  1573		if (pci_enable_device(pdev)) {
  1574			dev_warn(&pdev->dev,
  1575				"aacraid: failed to enable slave\n");
  1576			goto fail_device;
  1577		}
  1578	
  1579		pci_set_master(pdev);
  1580	
  1581		if (pci_enable_device_mem(pdev)) {
  1582			dev_err(&pdev->dev, "pci_enable_device_mem failed\n");
  1583			goto fail_device;
  1584		}
  1585	
  1586		return PCI_ERS_RESULT_RECOVERED;
  1587	
  1588	fail_device:
  1589		dev_err(&pdev->dev, "aacraid: PCI error - slot reset failed\n");
  1590		return PCI_ERS_RESULT_DISCONNECT;
  1591	}
  1592	
  1593	
  1594	static void aac_pci_resume(struct pci_dev *pdev)
  1595	{
  1596		struct Scsi_Host *shost = pci_get_drvdata(pdev);
  1597		struct scsi_device *sdev = NULL;
  1598		struct aac_dev *aac = (struct aac_dev *)shost_priv(shost);
  1599	
  1600		pci_cleanup_aer_uncorrect_error_status(pdev);
  1601	
  1602		if (aac_adapter_ioremap(aac, aac->base_size)) {
  1603	
  1604			dev_err(&pdev->dev, "aacraid: ioremap failed\n");
  1605			/* remap failed, go back ... */
  1606			aac->comm_interface = AAC_COMM_PRODUCER;
  1607			if (aac_adapter_ioremap(aac, AAC_MIN_FOOTPRINT_SIZE)) {
  1608				dev_warn(&pdev->dev,
  1609					"aacraid: unable to map adapter.\n");
  1610	
  1611				return;
  1612			}
  1613		}
  1614	
  1615		msleep(10000);
  1616	
> 1617		aac_acquire_resources(aac);
  1618	
  1619		/*
  1620		 * reset this flag to unblock ioctl() as it was set

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
diff mbox

Patch

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index fff1306..2916288 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1235,6 +1235,7 @@  struct aac_dev
 	struct msix_entry	msixentry[AAC_MAX_MSIX];
 	struct aac_msix_ctx	aac_msix[AAC_MAX_MSIX]; /* context */
 	u8			adapter_shutdown;
+	u32			handle_pci_error;
 };
 
 #define aac_adapter_interrupt(dev) \
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 129a515..08c6835 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -38,6 +38,7 @@ 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
+#include <linux/aer.h>
 #include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
@@ -1298,6 +1299,9 @@  static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto out_deinit;
 	scsi_scan_host(shost);
 
+	pci_enable_pcie_error_reporting(pdev);
+	pci_save_state(pdev);
+
 	return 0;
 
  out_deinit:
@@ -1501,6 +1505,141 @@  static void aac_remove_one(struct pci_dev *pdev)
 	}
 }
 
+static void aac_flush_ios(struct aac_dev *aac)
+{
+	int i;
+	struct scsi_cmnd *cmd;
+
+	for (i = 0; i < aac->scsi_host_ptr->can_queue; i++) {
+		cmd = (struct scsi_cmnd *)aac->fibs[i].callback_data;
+		if (cmd && (cmd->SCp.phase == AAC_OWNER_FIRMWARE)) {
+			scsi_dma_unmap(cmd);
+
+			if (aac->handle_pci_error)
+				cmd->result = DID_NO_CONNECT << 16;
+			else
+				cmd->result = DID_RESET << 16;
+
+			cmd->scsi_done(cmd);
+		}
+	}
+}
+
+static pci_ers_result_t aac_pci_error_detected(struct pci_dev *pdev,
+					enum pci_channel_state error)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct aac_dev *aac = shost_priv(shost);
+
+	dev_err(&pdev->dev, "aacraid: PCI error detected %x\n", error);
+
+	switch (error) {
+	case pci_channel_io_normal:
+		return PCI_ERS_RESULT_CAN_RECOVER;
+	case pci_channel_io_frozen:
+
+		aac->handle_pci_error = 1;
+		aac->adapter_shutdown = 1;
+
+		scsi_block_requests(aac->scsi_host_ptr);
+		aac_flush_ios(aac);
+		aac_release_resources(aac);
+
+		pci_disable_pcie_error_reporting(pdev);
+		aac_adapter_ioremap(aac, 0);
+
+		return PCI_ERS_RESULT_NEED_RESET;
+	case pci_channel_io_perm_failure:
+		aac->handle_pci_error = 1;
+		aac->adapter_shutdown = 1;
+
+		aac_flush_ios(aac);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t aac_pci_mmio_enabled(struct pci_dev *pdev)
+{
+	dev_err(&pdev->dev, "aacraid: PCI error - mmio enabled\n");
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t aac_pci_slot_reset(struct pci_dev *pdev)
+{
+	dev_err(&pdev->dev, "aacraid: PCI error - slot reset\n");
+	pci_restore_state(pdev);
+	if (pci_enable_device(pdev)) {
+		dev_warn(&pdev->dev,
+			"aacraid: failed to enable slave\n");
+		goto fail_device;
+	}
+
+	pci_set_master(pdev);
+
+	if (pci_enable_device_mem(pdev)) {
+		dev_err(&pdev->dev, "pci_enable_device_mem failed\n");
+		goto fail_device;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+
+fail_device:
+	dev_err(&pdev->dev, "aacraid: PCI error - slot reset failed\n");
+	return PCI_ERS_RESULT_DISCONNECT;
+}
+
+
+static void aac_pci_resume(struct pci_dev *pdev)
+{
+	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+	struct scsi_device *sdev = NULL;
+	struct aac_dev *aac = (struct aac_dev *)shost_priv(shost);
+
+	pci_cleanup_aer_uncorrect_error_status(pdev);
+
+	if (aac_adapter_ioremap(aac, aac->base_size)) {
+
+		dev_err(&pdev->dev, "aacraid: ioremap failed\n");
+		/* remap failed, go back ... */
+		aac->comm_interface = AAC_COMM_PRODUCER;
+		if (aac_adapter_ioremap(aac, AAC_MIN_FOOTPRINT_SIZE)) {
+			dev_warn(&pdev->dev,
+				"aacraid: unable to map adapter.\n");
+
+			return;
+		}
+	}
+
+	msleep(10000);
+
+	aac_acquire_resources(aac);
+
+	/*
+	 * reset this flag to unblock ioctl() as it was set
+	 * at aac_send_shutdown() to block ioctls from upperlayer
+	 */
+	aac->adapter_shutdown = 0;
+	aac->handle_pci_error = 0;
+
+	shost_for_each_device(sdev, shost)
+		if (sdev->sdev_state == SDEV_OFFLINE)
+			sdev->sdev_state = SDEV_RUNNING;
+	scsi_unblock_requests(aac->scsi_host_ptr);
+	scsi_scan_host(aac->scsi_host_ptr);
+	pci_save_state(pdev);
+
+	dev_err(&pdev->dev, "aacraid: PCI error - resume\n");
+}
+
+static struct pci_error_handlers aac_pci_err_handler = {
+	.error_detected		= aac_pci_error_detected,
+	.mmio_enabled		= aac_pci_mmio_enabled,
+	.slot_reset		= aac_pci_slot_reset,
+	.resume			= aac_pci_resume,
+};
+
 static struct pci_driver aac_pci_driver = {
 	.name		= AAC_DRIVERNAME,
 	.id_table	= aac_pci_tbl,
@@ -1511,6 +1650,7 @@  static struct pci_driver aac_pci_driver = {
 	.resume		= aac_resume,
 #endif
 	.shutdown	= aac_shutdown,
+	.err_handler    = &aac_pci_err_handler,
 };
 
 static int __init aac_init(void)