diff mbox series

[net-next,12/14] mlxsw: pci: Add support for new reset flow

Message ID 2b6629170e815abed5dfc7f560a69a1accaeb1f2.1700047319.git.petrm@nvidia.com (mailing list archive)
State Accepted
Commit f257c73e53561f6c223c4bce883138e9f18b5f50
Delegated to: Netdev Maintainers
Headers show
Series mlxsw: Add support for new reset flow | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1134 this patch: 1134
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 1161 this patch: 1161
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1161 this patch: 1161
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 73 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Petr Machata Nov. 15, 2023, 12:17 p.m. UTC
From: Ido Schimmel <idosch@nvidia.com>

The driver resets the device during probe and during a devlink reload.
The current reset method reloads the current firmware version or a
pending one, if one was previously flashed using devlink. However, the
current reset method does not result in a PCI hot reset, preventing the
PCI firmware from being upgraded, unless the system is rebooted.

To solve this problem, a new reset command (6) was implemented in the
firmware. Unlike the current command (1), after issuing the new command
the device will not start the reset immediately, but only after a PCI
hot reset.

Implement the new reset method by first verifying that it is supported
by the current firmware version by querying the Management Capabilities
Mask (MCAM) register. If supported, issue the new reset command (6) via
MRSR register followed by a PCI reset by calling
__pci_reset_function_locked().

Once the PCI firmware is operational, go back to the regular reset flow
and wait for the entire device to become ready. That is, repeatedly read
the "system_status" register from the BAR until a value of "FW_READY"
(0x5E) appears.

Tested:

 # for i in $(seq 1 10); do devlink dev reload pci/0000:01:00.0; done

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlxsw/pci.c | 44 ++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlxsw/reg.h |  2 ++
 2 files changed, 45 insertions(+), 1 deletion(-)

Comments

Simon Horman Nov. 17, 2023, 3:53 p.m. UTC | #1
On Wed, Nov 15, 2023 at 01:17:21PM +0100, Petr Machata wrote:
> From: Ido Schimmel <idosch@nvidia.com>
> 
> The driver resets the device during probe and during a devlink reload.
> The current reset method reloads the current firmware version or a
> pending one, if one was previously flashed using devlink. However, the
> current reset method does not result in a PCI hot reset, preventing the
> PCI firmware from being upgraded, unless the system is rebooted.
> 
> To solve this problem, a new reset command (6) was implemented in the
> firmware. Unlike the current command (1), after issuing the new command
> the device will not start the reset immediately, but only after a PCI
> hot reset.
> 
> Implement the new reset method by first verifying that it is supported
> by the current firmware version by querying the Management Capabilities
> Mask (MCAM) register. If supported, issue the new reset command (6) via
> MRSR register followed by a PCI reset by calling
> __pci_reset_function_locked().
> 
> Once the PCI firmware is operational, go back to the regular reset flow
> and wait for the entire device to become ready. That is, repeatedly read
> the "system_status" register from the BAR until a value of "FW_READY"
> (0x5E) appears.
> 
> Tested:
> 
>  # for i in $(seq 1 10); do devlink dev reload pci/0000:01:00.0; done
> 
> Signed-off-by: Ido Schimmel <idosch@nvidia.com>
> Reviewed-by: Petr Machata <petrm@nvidia.com>
> Signed-off-by: Petr Machata <petrm@nvidia.com>

Reviewed-by: Simon Horman <horms@kernel.org>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 080881c94c5a..726ed707e27b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1476,6 +1476,33 @@  static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci,
 	return -EBUSY;
 }
 
+static int mlxsw_pci_reset_at_pci_disable(struct mlxsw_pci *mlxsw_pci)
+{
+	struct pci_dev *pdev = mlxsw_pci->pdev;
+	char mrsr_pl[MLXSW_REG_MRSR_LEN];
+	int err;
+
+	mlxsw_reg_mrsr_pack(mrsr_pl,
+			    MLXSW_REG_MRSR_COMMAND_RESET_AT_PCI_DISABLE);
+	err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl);
+	if (err)
+		return err;
+
+	device_lock_assert(&pdev->dev);
+
+	pci_cfg_access_lock(pdev);
+	pci_save_state(pdev);
+
+	err = __pci_reset_function_locked(pdev);
+	if (err)
+		pci_err(pdev, "PCI function reset failed with %d\n", err);
+
+	pci_restore_state(pdev);
+	pci_cfg_access_unlock(pdev);
+
+	return err;
+}
+
 static int mlxsw_pci_reset_sw(struct mlxsw_pci *mlxsw_pci)
 {
 	char mrsr_pl[MLXSW_REG_MRSR_LEN];
@@ -1488,6 +1515,8 @@  static int
 mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
 {
 	struct pci_dev *pdev = mlxsw_pci->pdev;
+	char mcam_pl[MLXSW_REG_MCAM_LEN];
+	bool pci_reset_supported;
 	u32 sys_status;
 	int err;
 
@@ -1498,10 +1527,23 @@  mlxsw_pci_reset(struct mlxsw_pci *mlxsw_pci, const struct pci_device_id *id)
 		return err;
 	}
 
-	err = mlxsw_pci_reset_sw(mlxsw_pci);
+	mlxsw_reg_mcam_pack(mcam_pl,
+			    MLXSW_REG_MCAM_FEATURE_GROUP_ENHANCED_FEATURES);
+	err = mlxsw_reg_query(mlxsw_pci->core, MLXSW_REG(mcam), mcam_pl);
 	if (err)
 		return err;
 
+	mlxsw_reg_mcam_unpack(mcam_pl, MLXSW_REG_MCAM_PCI_RESET,
+			      &pci_reset_supported);
+
+	if (pci_reset_supported) {
+		pci_dbg(pdev, "Starting PCI reset flow\n");
+		err = mlxsw_pci_reset_at_pci_disable(mlxsw_pci);
+	} else {
+		pci_dbg(pdev, "Starting software reset flow\n");
+		err = mlxsw_pci_reset_sw(mlxsw_pci);
+	}
+
 	err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n",
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 13c0ff994537..e26e9d06bd72 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -10594,6 +10594,8 @@  MLXSW_ITEM32(reg, mcam, feature_group, 0x00, 16, 8);
 enum mlxsw_reg_mcam_mng_feature_cap_mask_bits {
 	/* If set, MCIA supports 128 bytes payloads. Otherwise, 48 bytes. */
 	MLXSW_REG_MCAM_MCIA_128B = 34,
+	/* If set, MRSR.command=6 is supported. */
+	MLXSW_REG_MCAM_PCI_RESET = 48,
 };
 
 #define MLXSW_REG_BYTES_PER_DWORD 0x4