Message ID | 20240715191148.746362-1-kheib@redhat.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [iwl-next] i40e: Add support for fw health report | expand |
Mon, Jul 15, 2024 at 09:11:48PM CEST, kheib@redhat.com wrote: >Add support for reporting fw status via the devlink health report. > >Example: > # devlink health show pci/0000:02:00.0 reporter fw > pci/0000:02:00.0: > reporter fw > state healthy error 0 recover 0 > # devlink health diagnose pci/0000:02:00.0 reporter fw > Status: normal > >Signed-off-by: Kamal Heib <kheib@redhat.com> >--- > drivers/net/ethernet/intel/i40e/i40e.h | 1 + > .../net/ethernet/intel/i40e/i40e_devlink.c | 57 +++++++++++++++++++ > .../net/ethernet/intel/i40e/i40e_devlink.h | 2 + > drivers/net/ethernet/intel/i40e/i40e_main.c | 15 +++++ > 4 files changed, 75 insertions(+) > >diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h >index d546567e0286..f94671b6e7c6 100644 >--- a/drivers/net/ethernet/intel/i40e/i40e.h >+++ b/drivers/net/ethernet/intel/i40e/i40e.h >@@ -465,6 +465,7 @@ static inline const u8 *i40e_channel_mac(struct i40e_channel *ch) > struct i40e_pf { > struct pci_dev *pdev; > struct devlink_port devlink_port; >+ struct devlink_health_reporter *fw_health_report; > struct i40e_hw hw; > DECLARE_BITMAP(state, __I40E_STATE_SIZE__); > struct msix_entry *msix_entries; >diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.c b/drivers/net/ethernet/intel/i40e/i40e_devlink.c >index cc4e9e2addb7..ad91c150cdba 100644 >--- a/drivers/net/ethernet/intel/i40e/i40e_devlink.c >+++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.c >@@ -122,6 +122,25 @@ static int i40e_devlink_info_get(struct devlink *dl, > return err; > } > >+static int i40e_fw_reporter_diagnose(struct devlink_health_reporter *reporter, >+ struct devlink_fmsg *fmsg, >+ struct netlink_ext_ack *extack) >+{ >+ struct i40e_pf *pf = devlink_health_reporter_priv(reporter); >+ >+ if (test_bit(__I40E_RECOVERY_MODE, pf->state)) >+ devlink_fmsg_string_pair_put(fmsg, "Status", "recovery"); Is it "Status" or "Mode" ? >+ else >+ devlink_fmsg_string_pair_put(fmsg, "Status", "normal"); >+ >+ return 0; >+} >+ >+static const struct devlink_health_reporter_ops i40e_fw_reporter_ops = { >+ .name = "fw", >+ .diagnose = i40e_fw_reporter_diagnose, >+}; >+ > static const struct devlink_ops i40e_devlink_ops = { > .info_get = i40e_devlink_info_get, > }; >@@ -233,3 +252,41 @@ void i40e_devlink_destroy_port(struct i40e_pf *pf) > { > devlink_port_unregister(&pf->devlink_port); > } >+ >+/** >+ * i40e_devlink_create_health_reporter - Create the health reporter for this PF >+ * @pf: the PF to create reporter for >+ * >+ * Create health reporter for this PF. >+ * >+ * Return: zero on success or an error code on failure. >+ **/ >+int i40e_devlink_create_health_reporter(struct i40e_pf *pf) >+{ >+ struct devlink *devlink = priv_to_devlink(pf); >+ struct device *dev = &pf->pdev->dev; >+ int rc = 0; >+ >+ devl_lock(devlink); >+ pf->fw_health_report = >+ devl_health_reporter_create(devlink, &i40e_fw_reporter_ops, 0, pf); >+ if (IS_ERR(pf->fw_health_report)) { >+ rc = PTR_ERR(pf->fw_health_report); >+ dev_err(dev, "Failed to create fw reporter, err = %d\n", rc); >+ } >+ devl_unlock(devlink); >+ >+ return rc; >+} >+ >+/** >+ * i40e_devlink_destroy_health_reporter - Destroy the health reporter >+ * @pf: the PF to cleanup >+ * >+ * Destroy the health reporter >+ **/ >+void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf) >+{ >+ if (!IS_ERR_OR_NULL(pf->fw_health_report)) >+ devlink_health_reporter_destroy(pf->fw_health_report); >+} >diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.h b/drivers/net/ethernet/intel/i40e/i40e_devlink.h >index 469fb3d2ee25..018679094bb5 100644 >--- a/drivers/net/ethernet/intel/i40e/i40e_devlink.h >+++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.h >@@ -14,5 +14,7 @@ void i40e_devlink_register(struct i40e_pf *pf); > void i40e_devlink_unregister(struct i40e_pf *pf); > int i40e_devlink_create_port(struct i40e_pf *pf); > void i40e_devlink_destroy_port(struct i40e_pf *pf); >+int i40e_devlink_create_health_reporter(struct i40e_pf *pf); >+void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf); > > #endif /* _I40E_DEVLINK_H_ */ >diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c >index cbcfada7b357..13cad5f58029 100644 >--- a/drivers/net/ethernet/intel/i40e/i40e_main.c >+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c >@@ -15370,6 +15370,9 @@ static bool i40e_check_recovery_mode(struct i40e_pf *pf) > dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n"); > dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); > set_bit(__I40E_RECOVERY_MODE, pf->state); >+ if (pf->fw_health_report) >+ devlink_health_report(pf->fw_health_report, >+ "FW recovery mode detected", pf); You report it on "FW" reporter. Why "FW" is needed in the message? > > return true; > } >@@ -15636,6 +15639,14 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) > err = -ENOMEM; > goto err_pf_alloc; > } >+ >+ err = i40e_devlink_create_health_reporter(pf); >+ if (err) { >+ dev_err(&pdev->dev, >+ "Failed to create health reporter %d\n", err); >+ goto err_health_reporter; >+ } >+ > pf->next_vsi = 0; > pf->pdev = pdev; > set_bit(__I40E_DOWN, pf->state); >@@ -16180,6 +16191,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) > err_pf_reset: > iounmap(hw->hw_addr); > err_ioremap: >+ i40e_devlink_destroy_health_reporter(pf); >+err_health_reporter: > i40e_free_pf(pf); > err_pf_alloc: > pci_release_mem_regions(pdev); >@@ -16209,6 +16222,8 @@ static void i40e_remove(struct pci_dev *pdev) > > i40e_devlink_unregister(pf); > >+ i40e_devlink_destroy_health_reporter(pf); >+ > i40e_dbg_pf_exit(pf); > > i40e_ptp_stop(pf); >-- >2.45.2 > >
On Tue, Jul 16, 2024 at 03:35:34PM +0200, Jiri Pirko wrote: > Mon, Jul 15, 2024 at 09:11:48PM CEST, kheib@redhat.com wrote: > >Add support for reporting fw status via the devlink health report. > > > >Example: > > # devlink health show pci/0000:02:00.0 reporter fw > > pci/0000:02:00.0: > > reporter fw > > state healthy error 0 recover 0 > > # devlink health diagnose pci/0000:02:00.0 reporter fw > > Status: normal > > > >Signed-off-by: Kamal Heib <kheib@redhat.com> > >--- > > drivers/net/ethernet/intel/i40e/i40e.h | 1 + > > .../net/ethernet/intel/i40e/i40e_devlink.c | 57 +++++++++++++++++++ > > .../net/ethernet/intel/i40e/i40e_devlink.h | 2 + > > drivers/net/ethernet/intel/i40e/i40e_main.c | 15 +++++ > > 4 files changed, 75 insertions(+) > > > >diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h > >index d546567e0286..f94671b6e7c6 100644 > >--- a/drivers/net/ethernet/intel/i40e/i40e.h > >+++ b/drivers/net/ethernet/intel/i40e/i40e.h > >@@ -465,6 +465,7 @@ static inline const u8 *i40e_channel_mac(struct i40e_channel *ch) > > struct i40e_pf { > > struct pci_dev *pdev; > > struct devlink_port devlink_port; > >+ struct devlink_health_reporter *fw_health_report; > > struct i40e_hw hw; > > DECLARE_BITMAP(state, __I40E_STATE_SIZE__); > > struct msix_entry *msix_entries; > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.c b/drivers/net/ethernet/intel/i40e/i40e_devlink.c > >index cc4e9e2addb7..ad91c150cdba 100644 > >--- a/drivers/net/ethernet/intel/i40e/i40e_devlink.c > >+++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.c > >@@ -122,6 +122,25 @@ static int i40e_devlink_info_get(struct devlink *dl, > > return err; > > } > > > >+static int i40e_fw_reporter_diagnose(struct devlink_health_reporter *reporter, > >+ struct devlink_fmsg *fmsg, > >+ struct netlink_ext_ack *extack) > >+{ > >+ struct i40e_pf *pf = devlink_health_reporter_priv(reporter); > >+ > >+ if (test_bit(__I40E_RECOVERY_MODE, pf->state)) > >+ devlink_fmsg_string_pair_put(fmsg, "Status", "recovery"); > > Is it "Status" or "Mode" ? > Thank you for your review. It is "Mode", I'll fix it in v2. > > >+ else > >+ devlink_fmsg_string_pair_put(fmsg, "Status", "normal"); > >+ > >+ return 0; > >+} > >+ > >+static const struct devlink_health_reporter_ops i40e_fw_reporter_ops = { > >+ .name = "fw", > >+ .diagnose = i40e_fw_reporter_diagnose, > >+}; > >+ > > static const struct devlink_ops i40e_devlink_ops = { > > .info_get = i40e_devlink_info_get, > > }; > >@@ -233,3 +252,41 @@ void i40e_devlink_destroy_port(struct i40e_pf *pf) > > { > > devlink_port_unregister(&pf->devlink_port); > > } > >+ > >+/** > >+ * i40e_devlink_create_health_reporter - Create the health reporter for this PF > >+ * @pf: the PF to create reporter for > >+ * > >+ * Create health reporter for this PF. > >+ * > >+ * Return: zero on success or an error code on failure. > >+ **/ > >+int i40e_devlink_create_health_reporter(struct i40e_pf *pf) > >+{ > >+ struct devlink *devlink = priv_to_devlink(pf); > >+ struct device *dev = &pf->pdev->dev; > >+ int rc = 0; > >+ > >+ devl_lock(devlink); > >+ pf->fw_health_report = > >+ devl_health_reporter_create(devlink, &i40e_fw_reporter_ops, 0, pf); > >+ if (IS_ERR(pf->fw_health_report)) { > >+ rc = PTR_ERR(pf->fw_health_report); > >+ dev_err(dev, "Failed to create fw reporter, err = %d\n", rc); > >+ } > >+ devl_unlock(devlink); > >+ > >+ return rc; > >+} > >+ > >+/** > >+ * i40e_devlink_destroy_health_reporter - Destroy the health reporter > >+ * @pf: the PF to cleanup > >+ * > >+ * Destroy the health reporter > >+ **/ > >+void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf) > >+{ > >+ if (!IS_ERR_OR_NULL(pf->fw_health_report)) > >+ devlink_health_reporter_destroy(pf->fw_health_report); > >+} > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.h b/drivers/net/ethernet/intel/i40e/i40e_devlink.h > >index 469fb3d2ee25..018679094bb5 100644 > >--- a/drivers/net/ethernet/intel/i40e/i40e_devlink.h > >+++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.h > >@@ -14,5 +14,7 @@ void i40e_devlink_register(struct i40e_pf *pf); > > void i40e_devlink_unregister(struct i40e_pf *pf); > > int i40e_devlink_create_port(struct i40e_pf *pf); > > void i40e_devlink_destroy_port(struct i40e_pf *pf); > >+int i40e_devlink_create_health_reporter(struct i40e_pf *pf); > >+void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf); > > > > #endif /* _I40E_DEVLINK_H_ */ > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c > >index cbcfada7b357..13cad5f58029 100644 > >--- a/drivers/net/ethernet/intel/i40e/i40e_main.c > >+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c > >@@ -15370,6 +15370,9 @@ static bool i40e_check_recovery_mode(struct i40e_pf *pf) > > dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n"); > > dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); > > set_bit(__I40E_RECOVERY_MODE, pf->state); > >+ if (pf->fw_health_report) > >+ devlink_health_report(pf->fw_health_report, > >+ "FW recovery mode detected", pf); > > You report it on "FW" reporter. Why "FW" is needed in the message? > You are right, I will remove it in v2. > > > > > return true; > > } > >@@ -15636,6 +15639,14 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) > > err = -ENOMEM; > > goto err_pf_alloc; > > } > >+ > >+ err = i40e_devlink_create_health_reporter(pf); > >+ if (err) { > >+ dev_err(&pdev->dev, > >+ "Failed to create health reporter %d\n", err); > >+ goto err_health_reporter; > >+ } > >+ > > pf->next_vsi = 0; > > pf->pdev = pdev; > > set_bit(__I40E_DOWN, pf->state); > >@@ -16180,6 +16191,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) > > err_pf_reset: > > iounmap(hw->hw_addr); > > err_ioremap: > >+ i40e_devlink_destroy_health_reporter(pf); > >+err_health_reporter: > > i40e_free_pf(pf); > > err_pf_alloc: > > pci_release_mem_regions(pdev); > >@@ -16209,6 +16222,8 @@ static void i40e_remove(struct pci_dev *pdev) > > > > i40e_devlink_unregister(pf); > > > >+ i40e_devlink_destroy_health_reporter(pf); > >+ > > i40e_dbg_pf_exit(pf); > > > > i40e_ptp_stop(pf); > >-- > >2.45.2 > > > > >
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d546567e0286..f94671b6e7c6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -465,6 +465,7 @@ static inline const u8 *i40e_channel_mac(struct i40e_channel *ch) struct i40e_pf { struct pci_dev *pdev; struct devlink_port devlink_port; + struct devlink_health_reporter *fw_health_report; struct i40e_hw hw; DECLARE_BITMAP(state, __I40E_STATE_SIZE__); struct msix_entry *msix_entries; diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.c b/drivers/net/ethernet/intel/i40e/i40e_devlink.c index cc4e9e2addb7..ad91c150cdba 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devlink.c +++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.c @@ -122,6 +122,25 @@ static int i40e_devlink_info_get(struct devlink *dl, return err; } +static int i40e_fw_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct i40e_pf *pf = devlink_health_reporter_priv(reporter); + + if (test_bit(__I40E_RECOVERY_MODE, pf->state)) + devlink_fmsg_string_pair_put(fmsg, "Status", "recovery"); + else + devlink_fmsg_string_pair_put(fmsg, "Status", "normal"); + + return 0; +} + +static const struct devlink_health_reporter_ops i40e_fw_reporter_ops = { + .name = "fw", + .diagnose = i40e_fw_reporter_diagnose, +}; + static const struct devlink_ops i40e_devlink_ops = { .info_get = i40e_devlink_info_get, }; @@ -233,3 +252,41 @@ void i40e_devlink_destroy_port(struct i40e_pf *pf) { devlink_port_unregister(&pf->devlink_port); } + +/** + * i40e_devlink_create_health_reporter - Create the health reporter for this PF + * @pf: the PF to create reporter for + * + * Create health reporter for this PF. + * + * Return: zero on success or an error code on failure. + **/ +int i40e_devlink_create_health_reporter(struct i40e_pf *pf) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct device *dev = &pf->pdev->dev; + int rc = 0; + + devl_lock(devlink); + pf->fw_health_report = + devl_health_reporter_create(devlink, &i40e_fw_reporter_ops, 0, pf); + if (IS_ERR(pf->fw_health_report)) { + rc = PTR_ERR(pf->fw_health_report); + dev_err(dev, "Failed to create fw reporter, err = %d\n", rc); + } + devl_unlock(devlink); + + return rc; +} + +/** + * i40e_devlink_destroy_health_reporter - Destroy the health reporter + * @pf: the PF to cleanup + * + * Destroy the health reporter + **/ +void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf) +{ + if (!IS_ERR_OR_NULL(pf->fw_health_report)) + devlink_health_reporter_destroy(pf->fw_health_report); +} diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.h b/drivers/net/ethernet/intel/i40e/i40e_devlink.h index 469fb3d2ee25..018679094bb5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devlink.h +++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.h @@ -14,5 +14,7 @@ void i40e_devlink_register(struct i40e_pf *pf); void i40e_devlink_unregister(struct i40e_pf *pf); int i40e_devlink_create_port(struct i40e_pf *pf); void i40e_devlink_destroy_port(struct i40e_pf *pf); +int i40e_devlink_create_health_reporter(struct i40e_pf *pf); +void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf); #endif /* _I40E_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index cbcfada7b357..13cad5f58029 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -15370,6 +15370,9 @@ static bool i40e_check_recovery_mode(struct i40e_pf *pf) dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n"); dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); set_bit(__I40E_RECOVERY_MODE, pf->state); + if (pf->fw_health_report) + devlink_health_report(pf->fw_health_report, + "FW recovery mode detected", pf); return true; } @@ -15636,6 +15639,14 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = -ENOMEM; goto err_pf_alloc; } + + err = i40e_devlink_create_health_reporter(pf); + if (err) { + dev_err(&pdev->dev, + "Failed to create health reporter %d\n", err); + goto err_health_reporter; + } + pf->next_vsi = 0; pf->pdev = pdev; set_bit(__I40E_DOWN, pf->state); @@ -16180,6 +16191,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_pf_reset: iounmap(hw->hw_addr); err_ioremap: + i40e_devlink_destroy_health_reporter(pf); +err_health_reporter: i40e_free_pf(pf); err_pf_alloc: pci_release_mem_regions(pdev); @@ -16209,6 +16222,8 @@ static void i40e_remove(struct pci_dev *pdev) i40e_devlink_unregister(pf); + i40e_devlink_destroy_health_reporter(pf); + i40e_dbg_pf_exit(pf); i40e_ptp_stop(pf);
Add support for reporting fw status via the devlink health report. Example: # devlink health show pci/0000:02:00.0 reporter fw pci/0000:02:00.0: reporter fw state healthy error 0 recover 0 # devlink health diagnose pci/0000:02:00.0 reporter fw Status: normal Signed-off-by: Kamal Heib <kheib@redhat.com> --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + .../net/ethernet/intel/i40e/i40e_devlink.c | 57 +++++++++++++++++++ .../net/ethernet/intel/i40e/i40e_devlink.h | 2 + drivers/net/ethernet/intel/i40e/i40e_main.c | 15 +++++ 4 files changed, 75 insertions(+)