diff mbox series

[net-next,4/4] pds_core: add attempts to fix broken PCI

Message ID 20230914223200.65533-5-shannon.nelson@amd.com (mailing list archive)
State Accepted
Commit 1e18ec3e9d46e4ad2b6507c3bfc7f59e2ab449a2
Delegated to: Netdev Maintainers
Headers show
Series pds_core: add PCI reset handling | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 1363 this patch: 1363
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1365 this patch: 1365
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 51 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Nelson, Shannon Sept. 14, 2023, 10:32 p.m. UTC
If we see a 0xff value from a PCI register read, we know that
the PCI connection is broken, possibly by a low level reset that
didn't go through the nice pci_error_handlers path.

Make use of the PCI cleanup code that we already have from the
reset handlers and add some detection and attempted recovery
from a broken PCI connection.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Reviewed-by: Brett Creeley <brett.creeley@amd.com>
---
 drivers/net/ethernet/amd/pds_core/core.c | 14 ++++++++++++++
 drivers/net/ethernet/amd/pds_core/core.h |  3 +++
 drivers/net/ethernet/amd/pds_core/main.c |  4 ++--
 3 files changed, 19 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index c1b6b5f7c0b5..2a8643e167e1 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -578,6 +578,18 @@  void pdsc_fw_up(struct pdsc *pdsc)
 	pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 }
 
+static void pdsc_check_pci_health(struct pdsc *pdsc)
+{
+	u8 fw_status = ioread8(&pdsc->info_regs->fw_status);
+
+	/* is PCI broken? */
+	if (fw_status != PDS_RC_BAD_PCI)
+		return;
+
+	pdsc_reset_prepare(pdsc->pdev);
+	pdsc_reset_done(pdsc->pdev);
+}
+
 void pdsc_health_thread(struct work_struct *work)
 {
 	struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
@@ -604,6 +616,8 @@  void pdsc_health_thread(struct work_struct *work)
 			pdsc_fw_down(pdsc);
 	}
 
+	pdsc_check_pci_health(pdsc);
+
 	pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
 
 out_unlock:
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index 19c1957167da..f3a7deda9972 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -283,6 +283,9 @@  int pdsc_devcmd_reset(struct pdsc *pdsc);
 int pdsc_dev_reinit(struct pdsc *pdsc);
 int pdsc_dev_init(struct pdsc *pdsc);
 
+void pdsc_reset_prepare(struct pci_dev *pdev);
+void pdsc_reset_done(struct pci_dev *pdev);
+
 int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
 		    irq_handler_t handler, void *data);
 void pdsc_intr_free(struct pdsc *pdsc, int index);
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index 4c7f982c12a1..3080898d7b95 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -445,7 +445,7 @@  static void pdsc_remove(struct pci_dev *pdev)
 	devlink_free(dl);
 }
 
-static void pdsc_reset_prepare(struct pci_dev *pdev)
+void pdsc_reset_prepare(struct pci_dev *pdev)
 {
 	struct pdsc *pdsc = pci_get_drvdata(pdev);
 
@@ -457,7 +457,7 @@  static void pdsc_reset_prepare(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 }
 
-static void pdsc_reset_done(struct pci_dev *pdev)
+void pdsc_reset_done(struct pci_dev *pdev)
 {
 	struct pdsc *pdsc = pci_get_drvdata(pdev);
 	struct device *dev = pdsc->dev;