diff mbox series

[v5,net-next,04/14] pds_core: add devlink health facilities

Message ID 20230322185626.38758-5-shannon.nelson@amd.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series pds_core driver | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 18 this patch: 18
netdev/cc_maintainers warning 2 maintainers not CCed: edumazet@google.com pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 18 this patch: 18
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Nelson, Shannon March 22, 2023, 6:56 p.m. UTC
Add devlink health reporting on top of our fw watchdog.

Example:
  # devlink health show pci/0000:2b:00.0 reporter fw
  pci/0000:2b:00.0:
    reporter fw
      state healthy error 0 recover 0


Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
---
 drivers/net/ethernet/amd/pds_core/core.c    |  6 ++
 drivers/net/ethernet/amd/pds_core/core.h    |  2 +
 drivers/net/ethernet/amd/pds_core/devlink.c | 61 +++++++++++++++++++++
 3 files changed, 69 insertions(+)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 39e9a215f638..a9918c34018f 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -45,6 +45,8 @@  static void pdsc_fw_down(struct pdsc *pdsc)
 		return;
 	}
 
+	devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
+
 	pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 
 	mutex_unlock(&pdsc->config_lock);
@@ -68,6 +70,10 @@  static void pdsc_fw_up(struct pdsc *pdsc)
 
 	mutex_unlock(&pdsc->config_lock);
 
+	pdsc->fw_recoveries++;
+	devlink_health_reporter_state_update(pdsc->fw_reporter,
+					     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+
 	return;
 
 err_out:
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index e73af422fae0..0cf7810ba9de 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -68,6 +68,8 @@  struct pdsc {
 	struct timer_list wdtimer;
 	unsigned int wdtimer_period;
 	struct work_struct health_work;
+	struct devlink_health_reporter *fw_reporter;
+	u32 fw_recoveries;
 
 	struct pdsc_devinfo dev_info;
 	struct pds_core_dev_identity dev_ident;
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
index a9021bfe680a..a5a243bed5bc 100644
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -14,6 +14,63 @@  static const struct devlink_ops pdsc_dl_ops = {
 static const struct devlink_ops pdsc_dl_vf_ops = {
 };
 
+static int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+				     struct devlink_fmsg *fmsg,
+				     struct netlink_ext_ack *extack)
+{
+	struct pdsc *pdsc = devlink_health_reporter_priv(reporter);
+	int err = 0;
+
+	if (test_bit(PDSC_S_FW_DEAD, &pdsc->state))
+		err = devlink_fmsg_string_pair_put(fmsg, "Status", "dead");
+	else if (!pdsc_is_fw_good(pdsc))
+		err = devlink_fmsg_string_pair_put(fmsg, "Status", "unhealthy");
+	else
+		err = devlink_fmsg_string_pair_put(fmsg, "Status", "healthy");
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "State",
+					pdsc->fw_status & ~PDS_CORE_FW_STS_F_GENERATION);
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "Generation", pdsc->fw_generation >> 4);
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "Recoveries", pdsc->fw_recoveries);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static const struct devlink_health_reporter_ops pdsc_fw_reporter_ops = {
+		.name = "fw",
+		.diagnose = pdsc_fw_reporter_diagnose,
+};
+
+static void pdsc_dl_reporters_create(struct pdsc *pdsc)
+{
+	struct devlink *dl = priv_to_devlink(pdsc);
+	struct devlink_health_reporter *hr;
+
+	hr = devlink_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc);
+	if (IS_ERR(pdsc->fw_reporter)) {
+		dev_warn(pdsc->dev, "Failed to create fw reporter, err = %pe\n", hr);
+		return;
+	}
+
+	pdsc->fw_reporter = hr;
+}
+
+static void pdsc_dl_reporters_destroy(struct pdsc *pdsc)
+{
+	if (pdsc->fw_reporter) {
+		devlink_health_reporter_destroy(pdsc->fw_reporter);
+		pdsc->fw_reporter = NULL;
+	}
+}
+
 struct pdsc *pdsc_dl_alloc(struct device *dev, bool is_pf)
 {
 	const struct devlink_ops *ops;
@@ -38,6 +95,9 @@  int pdsc_dl_register(struct pdsc *pdsc)
 {
 	struct devlink *dl = priv_to_devlink(pdsc);
 
+	if (!pdsc->pdev->is_virtfn)
+		pdsc_dl_reporters_create(pdsc);
+
 	devlink_register(dl);
 
 	return 0;
@@ -48,4 +108,5 @@  void pdsc_dl_unregister(struct pdsc *pdsc)
 	struct devlink *dl = priv_to_devlink(pdsc);
 
 	devlink_unregister(dl);
+	pdsc_dl_reporters_destroy(pdsc);
 }