diff mbox series

[iwl-next,v2] i40e: Add support for fw health report

Message ID 20240718181319.145884-1-kheib@redhat.com (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series [iwl-next,v2] i40e: Add support for fw health report | expand

Checks

Context Check Description
netdev/series_format warning Single patches do not need cover letters; Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 661 this patch: 661
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: edumazet@google.com
netdev/build_clang success Errors and warnings before: 662 this patch: 662
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 662 this patch: 662
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 223 this patch: 223
netdev/source_inline success Was 0 now: 0

Commit Message

Kamal Heib July 18, 2024, 6:13 p.m. UTC
Add support for reporting fw status via the devlink health report.

Example:
 # devlink health show pci/0000:02:00.0 reporter fw
 pci/0000:02:00.0:
   reporter fw
     state healthy error 0 recover 0
 # devlink health diagnose pci/0000:02:00.0 reporter fw
 Mode: normal

Signed-off-by: Kamal Heib <kheib@redhat.com>
---
v2:
- Address comments from Jiri.
- Move the creation of the health report. 
---
 drivers/net/ethernet/intel/i40e/i40e.h        |  1 +
 .../net/ethernet/intel/i40e/i40e_devlink.c    | 57 +++++++++++++++++++
 .../net/ethernet/intel/i40e/i40e_devlink.h    |  2 +
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 14 +++++
 4 files changed, 74 insertions(+)

Comments

Paul Menzel July 18, 2024, 7:57 p.m. UTC | #1
Dear Kamal,


Thank you for your patch.

Am 18.07.24 um 20:13 schrieb Kamal Heib:
> Add support for reporting fw status via the devlink health report.

I do not know the interface. So, the driver already has some way to 
communicate with the firmware, and you only need to hook it up to devlink?

> Example:
>   # devlink health show pci/0000:02:00.0 reporter fw
>   pci/0000:02:00.0:
>     reporter fw
>       state healthy error 0 recover 0
>   # devlink health diagnose pci/0000:02:00.0 reporter fw
>   Mode: normal

Any way to force it into recovery mode to also check it?

> Signed-off-by: Kamal Heib <kheib@redhat.com>
> ---
> v2:
> - Address comments from Jiri.
> - Move the creation of the health report.
> ---
>   drivers/net/ethernet/intel/i40e/i40e.h        |  1 +
>   .../net/ethernet/intel/i40e/i40e_devlink.c    | 57 +++++++++++++++++++
>   .../net/ethernet/intel/i40e/i40e_devlink.h    |  2 +
>   drivers/net/ethernet/intel/i40e/i40e_main.c   | 14 +++++
>   4 files changed, 74 insertions(+)
> 
> diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
> index d546567e0286..f94671b6e7c6 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e.h
> @@ -465,6 +465,7 @@ static inline const u8 *i40e_channel_mac(struct i40e_channel *ch)
>   struct i40e_pf {
>   	struct pci_dev *pdev;
>   	struct devlink_port devlink_port;
> +	struct devlink_health_reporter *fw_health_report;
>   	struct i40e_hw hw;
>   	DECLARE_BITMAP(state, __I40E_STATE_SIZE__);
>   	struct msix_entry *msix_entries;
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.c b/drivers/net/ethernet/intel/i40e/i40e_devlink.c
> index cc4e9e2addb7..8fe64284e8d3 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_devlink.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.c
> @@ -122,6 +122,25 @@ static int i40e_devlink_info_get(struct devlink *dl,
>   	return err;
>   }
>   
> +static int i40e_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
> +				     struct devlink_fmsg *fmsg,
> +				     struct netlink_ext_ack *extack)
> +{
> +	struct i40e_pf *pf = devlink_health_reporter_priv(reporter);
> +
> +	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
> +		devlink_fmsg_string_pair_put(fmsg, "Mode", "recovery");
> +	else
> +		devlink_fmsg_string_pair_put(fmsg, "Mode", "normal");

I’d use ternary operator.

> +
> +	return 0;
> +}
> +
> +static const struct devlink_health_reporter_ops i40e_fw_reporter_ops = {
> +	.name = "fw",
> +	.diagnose = i40e_fw_reporter_diagnose,
> +};
> +
>   static const struct devlink_ops i40e_devlink_ops = {
>   	.info_get = i40e_devlink_info_get,
>   };
> @@ -233,3 +252,41 @@ void i40e_devlink_destroy_port(struct i40e_pf *pf)
>   {
>   	devlink_port_unregister(&pf->devlink_port);
>   }
> +
> +/**
> + * i40e_devlink_create_health_reporter - Create the health reporter for this PF
> + * @pf: the PF to create reporter for
> + *
> + * Create health reporter for this PF.
> + *
> + * Return: zero on success or an error code on failure.

No dot/period at the end.

> + **/
> +int i40e_devlink_create_health_reporter(struct i40e_pf *pf)
> +{
> +	struct devlink *devlink = priv_to_devlink(pf);
> +	struct device *dev = &pf->pdev->dev;
> +	int rc = 0;
> +
> +	devl_lock(devlink);
> +	pf->fw_health_report =
> +		devl_health_reporter_create(devlink, &i40e_fw_reporter_ops, 0, pf);
> +	if (IS_ERR(pf->fw_health_report)) {
> +		rc = PTR_ERR(pf->fw_health_report);
> +		dev_err(dev, "Failed to create fw reporter, err = %d\n", rc);
> +	}
> +	devl_unlock(devlink);
> +
> +	return rc;
> +}
> +
> +/**
> + * i40e_devlink_destroy_health_reporter - Destroy the health reporter
> + * @pf: the PF to cleanup

The verb is spelled with a space: clean up.

> + *
> + * Destroy the health reporter
> + **/
> +void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf)
> +{
> +	if (!IS_ERR_OR_NULL(pf->fw_health_report))
> +		devlink_health_reporter_destroy(pf->fw_health_report);
> +}
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.h b/drivers/net/ethernet/intel/i40e/i40e_devlink.h
> index 469fb3d2ee25..018679094bb5 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_devlink.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.h
> @@ -14,5 +14,7 @@ void i40e_devlink_register(struct i40e_pf *pf);
>   void i40e_devlink_unregister(struct i40e_pf *pf);
>   int i40e_devlink_create_port(struct i40e_pf *pf);
>   void i40e_devlink_destroy_port(struct i40e_pf *pf);
> +int i40e_devlink_create_health_reporter(struct i40e_pf *pf);
> +void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf);
>   
>   #endif /* _I40E_DEVLINK_H_ */
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index cbcfada7b357..b6b3ae299b28 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -15370,6 +15370,9 @@ static bool i40e_check_recovery_mode(struct i40e_pf *pf)
>   		dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n");
>   		dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
>   		set_bit(__I40E_RECOVERY_MODE, pf->state);
> +		if (pf->fw_health_report)
> +			devlink_health_report(pf->fw_health_report,
> +					      "recovery mode detected", pf);
>   
>   		return true;
>   	}
> @@ -15810,6 +15813,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
>   		return i40e_init_recovery_mode(pf, hw);
>   
> +	err = i40e_devlink_create_health_reporter(pf);
> +	if (err) {
> +		dev_err(&pdev->dev,
> +			"Failed to create health reporter %d\n", err);
> +		goto err_health_reporter;
> +	}
> +
>   	err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
>   				hw->func_caps.num_rx_qp, 0, 0);
>   	if (err) {
> @@ -16175,6 +16185,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	(void)i40e_shutdown_lan_hmc(hw);
>   err_init_lan_hmc:
>   	kfree(pf->qp_pile);
> +	i40e_devlink_destroy_health_reporter(pf);
> +err_health_reporter:
>   err_sw_init:
>   err_adminq_setup:
>   err_pf_reset:
> @@ -16209,6 +16221,8 @@ static void i40e_remove(struct pci_dev *pdev)
>   
>   	i40e_devlink_unregister(pf);
>   
> +	i40e_devlink_destroy_health_reporter(pf);
> +
>   	i40e_dbg_pf_exit(pf);
>   
>   	i40e_ptp_stop(pf);


Kind regards,

Paul
Jakub Kicinski July 23, 2024, 5:36 p.m. UTC | #2
On Thu, 18 Jul 2024 14:13:19 -0400 Kamal Heib wrote:
> Add support for reporting fw status via the devlink health report.
> 
> Example:
>  # devlink health show pci/0000:02:00.0 reporter fw
>  pci/0000:02:00.0:
>    reporter fw
>      state healthy error 0 recover 0
>  # devlink health diagnose pci/0000:02:00.0 reporter fw
>  Mode: normal

I'm not sure if health reporter is the right fit, depends on what 
the recovery mode is / does. We need documentation.
Brett Creeley July 23, 2024, 8:41 p.m. UTC | #3
On 7/18/2024 11:13 AM, Kamal Heib wrote:
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
> 
> 
> Add support for reporting fw status via the devlink health report.
> 
> Example:
>   # devlink health show pci/0000:02:00.0 reporter fw
>   pci/0000:02:00.0:
>     reporter fw
>       state healthy error 0 recover 0
>   # devlink health diagnose pci/0000:02:00.0 reporter fw
>   Mode: normal
> 
> Signed-off-by: Kamal Heib <kheib@redhat.com>
> ---
> v2:
> - Address comments from Jiri.
> - Move the creation of the health report.
> ---
>   drivers/net/ethernet/intel/i40e/i40e.h        |  1 +
>   .../net/ethernet/intel/i40e/i40e_devlink.c    | 57 +++++++++++++++++++
>   .../net/ethernet/intel/i40e/i40e_devlink.h    |  2 +
>   drivers/net/ethernet/intel/i40e/i40e_main.c   | 14 +++++
>   4 files changed, 74 insertions(+)
> 

<snip>

> diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
> index cbcfada7b357..b6b3ae299b28 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -15370,6 +15370,9 @@ static bool i40e_check_recovery_mode(struct i40e_pf *pf)
>                  dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n");
>                  dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
>                  set_bit(__I40E_RECOVERY_MODE, pf->state);
> +               if (pf->fw_health_report)
> +                       devlink_health_report(pf->fw_health_report,
> +                                             "recovery mode detected", pf);
> 
>                  return true;
>          }
> @@ -15810,6 +15813,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>          if (test_bit(__I40E_RECOVERY_MODE, pf->state))
>                  return i40e_init_recovery_mode(pf, hw);
> 
> +       err = i40e_devlink_create_health_reporter(pf);
> +       if (err) {
> +               dev_err(&pdev->dev,
> +                       "Failed to create health reporter %d\n", err);
> +               goto err_health_reporter;

Do you really want to fail probe if creating this simple health reporter 
fails?

Thanks,

Brett

> +       }
> +
>          err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
>                                  hw->func_caps.num_rx_qp, 0, 0);
>          if (err) {
> @@ -16175,6 +16185,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>          (void)i40e_shutdown_lan_hmc(hw);
>   err_init_lan_hmc:
>          kfree(pf->qp_pile);
> +       i40e_devlink_destroy_health_reporter(pf);
> +err_health_reporter:
>   err_sw_init:
>   err_adminq_setup:
>   err_pf_reset:
> @@ -16209,6 +16221,8 @@ static void i40e_remove(struct pci_dev *pdev)
> 
>          i40e_devlink_unregister(pf);
> 
> +       i40e_devlink_destroy_health_reporter(pf);
> +
>          i40e_dbg_pf_exit(pf);
> 
>          i40e_ptp_stop(pf);
> --
> 2.45.2
> 
>
Keller, Jacob E July 23, 2024, 9:17 p.m. UTC | #4
> -----Original Message-----
> From: Brett Creeley <bcreeley@amd.com>
> Sent: Tuesday, July 23, 2024 1:41 PM
> To: Kamal Heib <kheib@redhat.com>; intel-wired-lan@lists.osuosl.org
> Cc: netdev@vger.kernel.org; Nguyen, Anthony L <anthony.l.nguyen@intel.com>;
> Kitszel, Przemyslaw <przemyslaw.kitszel@intel.com>; ivecera
> <ivecera@redhat.com>; Jakub Kicinski <kuba@kernel.org>; David S . Miller
> <davem@davemloft.net>; Paolo Abeni <pabeni@redhat.com>
> Subject: Re: [PATCH iwl-next v2] i40e: Add support for fw health report
> 
> 
> 
> On 7/18/2024 11:13 AM, Kamal Heib wrote:
> > Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
> >
> >
> > Add support for reporting fw status via the devlink health report.
> >
> > Example:
> >   # devlink health show pci/0000:02:00.0 reporter fw
> >   pci/0000:02:00.0:
> >     reporter fw
> >       state healthy error 0 recover 0
> >   # devlink health diagnose pci/0000:02:00.0 reporter fw
> >   Mode: normal
> >
> > Signed-off-by: Kamal Heib <kheib@redhat.com>
> > ---
> > v2:
> > - Address comments from Jiri.
> > - Move the creation of the health report.
> > ---
> >   drivers/net/ethernet/intel/i40e/i40e.h        |  1 +
> >   .../net/ethernet/intel/i40e/i40e_devlink.c    | 57 +++++++++++++++++++
> >   .../net/ethernet/intel/i40e/i40e_devlink.h    |  2 +
> >   drivers/net/ethernet/intel/i40e/i40e_main.c   | 14 +++++
> >   4 files changed, 74 insertions(+)
> >
> 
> <snip>
> 
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c
> b/drivers/net/ethernet/intel/i40e/i40e_main.c
> > index cbcfada7b357..b6b3ae299b28 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> > @@ -15370,6 +15370,9 @@ static bool i40e_check_recovery_mode(struct
> i40e_pf *pf)
> >                  dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting
> functionality.\n");
> >                  dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and
> Devices User Guide for details on firmware recovery mode.\n");
> >                  set_bit(__I40E_RECOVERY_MODE, pf->state);
> > +               if (pf->fw_health_report)
> > +                       devlink_health_report(pf->fw_health_report,
> > +                                             "recovery mode detected", pf);
> >
> >                  return true;
> >          }
> > @@ -15810,6 +15813,13 @@ static int i40e_probe(struct pci_dev *pdev, const
> struct pci_device_id *ent)
> >          if (test_bit(__I40E_RECOVERY_MODE, pf->state))
> >                  return i40e_init_recovery_mode(pf, hw);
> >
> > +       err = i40e_devlink_create_health_reporter(pf);
> > +       if (err) {
> > +               dev_err(&pdev->dev,
> > +                       "Failed to create health reporter %d\n", err);
> > +               goto err_health_reporter;
> 
> Do you really want to fail probe if creating this simple health reporter
> fails?
> 
> Thanks,
> 
> Brett

I agree. I would make this a dev_warn and continue without failure.

Thanks,
Jake
Przemek Kitszel July 29, 2024, 12:04 p.m. UTC | #5
On 7/18/24 20:13, Kamal Heib wrote:
> Add support for reporting fw status via the devlink health report.
> 
> Example:
>   # devlink health show pci/0000:02:00.0 reporter fw
>   pci/0000:02:00.0:
>     reporter fw
>       state healthy error 0 recover 0
>   # devlink health diagnose pci/0000:02:00.0 reporter fw
>   Mode: normal
> 
> Signed-off-by: Kamal Heib <kheib@redhat.com>
> ---
> v2:
> - Address comments from Jiri.
> - Move the creation of the health report.
> ---
>   drivers/net/ethernet/intel/i40e/i40e.h        |  1 +
>   .../net/ethernet/intel/i40e/i40e_devlink.c    | 57 +++++++++++++++++++
>   .../net/ethernet/intel/i40e/i40e_devlink.h    |  2 +
>   drivers/net/ethernet/intel/i40e/i40e_main.c   | 14 +++++
>   4 files changed, 74 insertions(+)
> 

[...]

> +int i40e_devlink_create_health_reporter(struct i40e_pf *pf)
> +{
> +	struct devlink *devlink = priv_to_devlink(pf);
> +	struct device *dev = &pf->pdev->dev;
> +	int rc = 0;
> +
> +	devl_lock(devlink);

if you are going to have just one health reporter, you could just use
devlink_health_reporter_create() and avoid lock+unlock calls here

> +	pf->fw_health_report =
> +		devl_health_reporter_create(devlink, &i40e_fw_reporter_ops, 0, pf);
> +	if (IS_ERR(pf->fw_health_report)) {
> +		rc = PTR_ERR(pf->fw_health_report);
> +		dev_err(dev, "Failed to create fw reporter, err = %d\n", rc);

you are not zeroing pf->fw_health_report here, so ...

> +	}
> +	devl_unlock(devlink);
> +
> +	return rc;
> +}

[snip]

> --- a/drivers/net/ethernet/intel/i40e/i40e_main.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
> @@ -15370,6 +15370,9 @@ static bool i40e_check_recovery_mode(struct i40e_pf *pf)
>   		dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n");
>   		dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
>   		set_bit(__I40E_RECOVERY_MODE, pf->state);
> +		if (pf->fw_health_report)
> +			devlink_health_report(pf->fw_health_report,

... you could possibly call devlink_health_report() with ERR_PTR as
the first argument

> +					      "recovery mode detected", pf);
>   
>   		return true;
>   	}
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d546567e0286..f94671b6e7c6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -465,6 +465,7 @@  static inline const u8 *i40e_channel_mac(struct i40e_channel *ch)
 struct i40e_pf {
 	struct pci_dev *pdev;
 	struct devlink_port devlink_port;
+	struct devlink_health_reporter *fw_health_report;
 	struct i40e_hw hw;
 	DECLARE_BITMAP(state, __I40E_STATE_SIZE__);
 	struct msix_entry *msix_entries;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.c b/drivers/net/ethernet/intel/i40e/i40e_devlink.c
index cc4e9e2addb7..8fe64284e8d3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devlink.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.c
@@ -122,6 +122,25 @@  static int i40e_devlink_info_get(struct devlink *dl,
 	return err;
 }
 
+static int i40e_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+				     struct devlink_fmsg *fmsg,
+				     struct netlink_ext_ack *extack)
+{
+	struct i40e_pf *pf = devlink_health_reporter_priv(reporter);
+
+	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
+		devlink_fmsg_string_pair_put(fmsg, "Mode", "recovery");
+	else
+		devlink_fmsg_string_pair_put(fmsg, "Mode", "normal");
+
+	return 0;
+}
+
+static const struct devlink_health_reporter_ops i40e_fw_reporter_ops = {
+	.name = "fw",
+	.diagnose = i40e_fw_reporter_diagnose,
+};
+
 static const struct devlink_ops i40e_devlink_ops = {
 	.info_get = i40e_devlink_info_get,
 };
@@ -233,3 +252,41 @@  void i40e_devlink_destroy_port(struct i40e_pf *pf)
 {
 	devlink_port_unregister(&pf->devlink_port);
 }
+
+/**
+ * i40e_devlink_create_health_reporter - Create the health reporter for this PF
+ * @pf: the PF to create reporter for
+ *
+ * Create health reporter for this PF.
+ *
+ * Return: zero on success or an error code on failure.
+ **/
+int i40e_devlink_create_health_reporter(struct i40e_pf *pf)
+{
+	struct devlink *devlink = priv_to_devlink(pf);
+	struct device *dev = &pf->pdev->dev;
+	int rc = 0;
+
+	devl_lock(devlink);
+	pf->fw_health_report =
+		devl_health_reporter_create(devlink, &i40e_fw_reporter_ops, 0, pf);
+	if (IS_ERR(pf->fw_health_report)) {
+		rc = PTR_ERR(pf->fw_health_report);
+		dev_err(dev, "Failed to create fw reporter, err = %d\n", rc);
+	}
+	devl_unlock(devlink);
+
+	return rc;
+}
+
+/**
+ * i40e_devlink_destroy_health_reporter - Destroy the health reporter
+ * @pf: the PF to cleanup
+ *
+ * Destroy the health reporter
+ **/
+void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf)
+{
+	if (!IS_ERR_OR_NULL(pf->fw_health_report))
+		devlink_health_reporter_destroy(pf->fw_health_report);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devlink.h b/drivers/net/ethernet/intel/i40e/i40e_devlink.h
index 469fb3d2ee25..018679094bb5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devlink.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devlink.h
@@ -14,5 +14,7 @@  void i40e_devlink_register(struct i40e_pf *pf);
 void i40e_devlink_unregister(struct i40e_pf *pf);
 int i40e_devlink_create_port(struct i40e_pf *pf);
 void i40e_devlink_destroy_port(struct i40e_pf *pf);
+int i40e_devlink_create_health_reporter(struct i40e_pf *pf);
+void i40e_devlink_destroy_health_reporter(struct i40e_pf *pf);
 
 #endif /* _I40E_DEVLINK_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index cbcfada7b357..b6b3ae299b28 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -15370,6 +15370,9 @@  static bool i40e_check_recovery_mode(struct i40e_pf *pf)
 		dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n");
 		dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
 		set_bit(__I40E_RECOVERY_MODE, pf->state);
+		if (pf->fw_health_report)
+			devlink_health_report(pf->fw_health_report,
+					      "recovery mode detected", pf);
 
 		return true;
 	}
@@ -15810,6 +15813,13 @@  static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (test_bit(__I40E_RECOVERY_MODE, pf->state))
 		return i40e_init_recovery_mode(pf, hw);
 
+	err = i40e_devlink_create_health_reporter(pf);
+	if (err) {
+		dev_err(&pdev->dev,
+			"Failed to create health reporter %d\n", err);
+		goto err_health_reporter;
+	}
+
 	err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
 				hw->func_caps.num_rx_qp, 0, 0);
 	if (err) {
@@ -16175,6 +16185,8 @@  static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	(void)i40e_shutdown_lan_hmc(hw);
 err_init_lan_hmc:
 	kfree(pf->qp_pile);
+	i40e_devlink_destroy_health_reporter(pf);
+err_health_reporter:
 err_sw_init:
 err_adminq_setup:
 err_pf_reset:
@@ -16209,6 +16221,8 @@  static void i40e_remove(struct pci_dev *pdev)
 
 	i40e_devlink_unregister(pf);
 
+	i40e_devlink_destroy_health_reporter(pf);
+
 	i40e_dbg_pf_exit(pf);
 
 	i40e_ptp_stop(pf);