diff mbox series

[net,v3,1/2] mlxsw: pci: Fix driver initialization with Spectrum-4

Message ID d4474932101834e663af431624f90875bb4d41d4.1718954012.git.petrm@nvidia.com (mailing list archive)
State Accepted
Commit 0602697d6f4d72b0bc5edbc76afabf6aaa029a69
Delegated to: Netdev Maintainers
Headers show
Series mlxsw: Fixes | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 859 this patch: 859
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 863 this patch: 863
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 863 this patch: 863
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 60 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest fail net-next-2024-06-21--12-00 (tests: 660)

Commit Message

Petr Machata June 21, 2024, 7:19 a.m. UTC
From: Ido Schimmel <idosch@nvidia.com>

Cited commit added support for a new reset flow ("all reset") which is
deeper than the existing reset flow ("software reset") and allows the
device's PCI firmware to be upgraded.

In the new flow the driver first tells the firmware that "all reset" is
required by issuing a new reset command (i.e., MRSR.command=6) and then
triggers the reset by having the PCI core issue a secondary bus reset
(SBR).

However, due to a race condition in the device's firmware the device is
not always able to recover from this reset, resulting in initialization
failures [1].

New firmware versions include a fix for the bug and advertise it using a
new capability bit in the Management Capabilities Mask (MCAM) register.

Avoid initialization failures by reading the new capability bit and
triggering the new reset flow only if the bit is set. If the bit is not
set, trigger a normal PCI hot reset by skipping the call to the
Management Reset and Shutdown Register (MRSR).

Normal PCI hot reset is weaker than "all reset", but it results in a
fully operational driver and allows users to flash a new firmware, if
they want to.

[1]
mlxsw_spectrum4 0000:01:00.0: not ready 1023ms after bus reset; waiting
mlxsw_spectrum4 0000:01:00.0: not ready 2047ms after bus reset; waiting
mlxsw_spectrum4 0000:01:00.0: not ready 4095ms after bus reset; waiting
mlxsw_spectrum4 0000:01:00.0: not ready 8191ms after bus reset; waiting
mlxsw_spectrum4 0000:01:00.0: not ready 16383ms after bus reset; waiting
mlxsw_spectrum4 0000:01:00.0: not ready 32767ms after bus reset; waiting
mlxsw_spectrum4 0000:01:00.0: not ready 65535ms after bus reset; giving up
mlxsw_spectrum4 0000:01:00.0: PCI function reset failed with -25
mlxsw_spectrum4 0000:01:00.0: cannot register bus device
mlxsw_spectrum4: probe of 0000:01:00.0 failed with error -25

Fixes: f257c73e5356 ("mlxsw: pci: Add support for new reset flow")
Reported-by: Maksym Yaremchuk <maksymy@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Tested-by: Maksym Yaremchuk <maksymy@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Petr Machata <petrm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlxsw/pci.c | 18 +++++++++++++++---
 drivers/net/ethernet/mellanox/mlxsw/reg.h |  2 ++
 2 files changed, 17 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index bf66d996e32e..c0ced4d315f3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1594,18 +1594,25 @@  static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
 	return -EBUSY;
 }
 
-static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci)
+static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci,
+					  bool pci_reset_sbr_supported)
 {
 	struct pci_dev *pdev = mlxsw_pci->pdev;
 	char mrsr_pl[MLXSW_REG_MRSR_LEN];
 	int err;
 
+	if (!pci_reset_sbr_supported) {
+		pci_dbg(pdev, "Performing PCI hot reset instead of \"all reset\"\n");
+		goto sbr;
+	}
+
 	mlxsw_reg_mrsr_pack(mrsr_pl,
 			    MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE);
 	err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
 	if (err)
 		return err;
 
+sbr:
 	device_lock_assert(&pdev->dev);
 
 	pci_cfg_access_lock(pdev);
@@ -1633,6 +1640,7 @@  static int
 mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
 {
 	struct pci_dev *pdev = mlxsw_pci->pdev;
+	bool pci_reset_sbr_supported = false;
 	char mcam_pl[MLXSW_REG_MCAM_LEN];
 	bool pci_reset_supported = false;
 	u32 sys_status;
@@ -1652,13 +1660,17 @@  mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
 	mlxsw_reg_mcam_pack(mcam_pl,
 			    MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
 	err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
-	if (!err)
+	if (!err) {
 		mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
 				      &pci_reset_supported);
+		mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET_SBR,
+				      &pci_reset_sbr_supported);
+	}
 
 	if (pci_reset_supported) {
 		pci_dbg(pdev, "Starting PCI reset flow\n");
-		err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci);
+		err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci,
+						     pci_reset_sbr_supported);
 	} else {
 		pci_dbg(pdev, "Starting software reset flow\n");
 		err = mlxsw_pci_reset_sw(mlxsw_pci);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 8adf86a6f5cc..3bb89045eaf5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -10671,6 +10671,8 @@  enum mlxsw_reg_mcam_mng_feature_cap_mask_bits {
 	MLXSW_REG_MCAM_MCIA_128B = 34,
 	/* If set, MRSR.command=6 is supported. */
 	MLXSW_REG_MCAM_PCI_RESET = 48,
+	/* If set, MRSR.command=6 is supported with Secondary Bus Reset. */
+	MLXSW_REG_MCAM_PCI_RESET_SBR = 67,
 };
 
 #define MLXSW_REG_BYTES_PER_DWORD 0x4