[ndctl] libndctl/papr_scm: Add support for reporting "life_used_percentage" metric
diff mbox series

Message ID 20200701140102.7795-1-vaibhav@linux.ibm.com
State Accepted
Commit 460c62054c59cf5e25797b5bf81dae4d436367e6
Headers show
Series
  • [ndctl] libndctl/papr_scm: Add support for reporting "life_used_percentage" metric
Related show

Commit Message

Vaibhav Jain July 1, 2020, 2:01 p.m. UTC
This adds support for reporting 'life_used_percentage' metric in
nvdimm health state output. It indicates an approximate share of
usable life consumed for a given NVDIMM. NDCTL output reported
for papr-scm based NVDIMMs is of the form below:

$ sudo ndctl list -DH
[
  {
    "dev":"nmem0",
    "health":{
      "health_state":"ok",
      "life_used_percentage":0,
      "shutdown_state":"clean"
    }
  }
]

For papr-scm based NVDIMMs this can be retrieved via an updated
PAPR_PDSM_HEALTH pdsm payload that adds a new field 'dimm_fuel_gauge'
at the end of 'struct nd_papr_pdsm_health'. Presence of this field is
indicated by presence of PDSM_DIMM_HEALTH_RUN_GAUGE_VALID flag in
field 'struct nd_papr_pdsm_health.extensions_flags'. To calculate
'life_used_percentage' metric we simply follow this identity:

"life_used_percentage = 100 - dimm_fuel_gauge"

The patch updates papr_smart_get_flags() to check for existence of
flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID in 'struct
nd_papr_pdsm_health.extensions_flags' and if found also return
ND_SMART_USED_VALID flag from the dimm-op callback.

A new dimm-op papr_smart_get_life_used() is introduced that calculates
the value of 'life_used_percentage' from the PAPR_PDSM_HEALTH pdsm
payload and returns it back to libndctl for output.

The patch is based on existing work to add support for report health
information for papr-scm based NVDIMMs [1][2]

[1] commit 880901b45cdf ("libndctl,papr_scm: Implement support for
PAPR_PDSM_HEALTH")

[2] https://github.com/pmem/ndctl/commits/vj/papr_health

Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
 ndctl/lib/papr.c      | 28 +++++++++++++++++++++++++++-
 ndctl/lib/papr_pdsm.h |  7 +++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

Patch
diff mbox series

diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c
index d9ce253369b3..8145412dec7d 100644
--- a/ndctl/lib/papr.c
+++ b/ndctl/lib/papr.c
@@ -141,6 +141,7 @@  static unsigned int papr_smart_get_flags(struct ndctl_cmd *cmd)
 {
 	struct nd_pkg_pdsm *pcmd;
 	struct nd_papr_pdsm_health health;
+	unsigned int flags;
 
 	if (!cmd_is_valid(cmd))
 		return 0;
@@ -160,7 +161,13 @@  static unsigned int papr_smart_get_flags(struct ndctl_cmd *cmd)
 	if (to_pdsm_cmd(cmd) == PAPR_PDSM_HEALTH) {
 		health = pcmd->payload.health;
 		update_dimm_flags(cmd->dimm, &health);
-		return ND_SMART_HEALTH_VALID | ND_SMART_SHUTDOWN_VALID;
+		flags = ND_SMART_HEALTH_VALID | ND_SMART_SHUTDOWN_VALID;
+
+		/* check for extension flags */
+		if (health.extension_flags & PDSM_DIMM_HEALTH_RUN_GAUGE_VALID)
+			flags |= ND_SMART_USED_VALID;
+
+		return flags;
 	}
 
 	/* Else return empty flags */
@@ -213,6 +220,24 @@  static unsigned int papr_smart_get_shutdown_state(struct ndctl_cmd *cmd)
 	return health.dimm_bad_shutdown;
 }
 
+static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd)
+{
+	struct nd_papr_pdsm_health health;
+
+	/* Ignore in case of error or invalid pdsm */
+	if (!cmd_is_valid(cmd) ||
+	    to_pdsm(cmd)->cmd_status != 0 ||
+	    to_pdsm_cmd(cmd) != PAPR_PDSM_HEALTH)
+		return 0;
+
+	/* get the payload from command */
+	health = to_payload(cmd)->health;
+
+	/* return dimm life remaining from the health payload */
+	return (health.extension_flags & PDSM_DIMM_HEALTH_RUN_GAUGE_VALID) ?
+		(100 - health.dimm_fuel_gauge) : 0;
+}
+
 struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
 	.cmd_is_supported = papr_cmd_is_supported,
 	.smart_get_flags = papr_smart_get_flags,
@@ -221,4 +246,5 @@  struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
 	.new_smart = papr_new_smart_health,
 	.smart_get_health = papr_smart_get_health,
 	.smart_get_shutdown_state = papr_smart_get_shutdown_state,
+	.smart_get_life_used = papr_smart_get_life_used,
 };
diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h
index 4c7c06757053..1bac8a7fc933 100644
--- a/ndctl/lib/papr_pdsm.h
+++ b/ndctl/lib/papr_pdsm.h
@@ -72,6 +72,9 @@ 
 #define PAPR_PDSM_DIMM_CRITICAL      2
 #define PAPR_PDSM_DIMM_FATAL         3
 
+/* Indicate that the 'dimm_fuel_gauge' field is valid */
+#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
+
 /*
  * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
  * Various flags indicate the health status of the dimm.
@@ -84,6 +87,7 @@ 
  * dimm_locked		: Contents of the dimm cant be modified until CEC reboot
  * dimm_encrypted	: Contents of dimm are encrypted.
  * dimm_health		: Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX
+ * dimm_fuel_gauge	: Life remaining of DIMM as a percentage from 0-100
  */
 struct nd_papr_pdsm_health {
 	union {
@@ -96,6 +100,9 @@  struct nd_papr_pdsm_health {
 			__u8 dimm_locked;
 			__u8 dimm_encrypted;
 			__u16 dimm_health;
+
+			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
+			__u16 dimm_fuel_gauge;
 		};
 		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
 	};