diff mbox series

[ndctl,v5,6/6] libndctl,papr_scm: Implement support for PAPR_PDSM_HEALTH

Message ID 20200529220600.225320-7-vaibhav@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series Add support for reporting papr nvdimm health | expand

Commit Message

Vaibhav Jain May 29, 2020, 10:06 p.m. UTC
Add support for reporting DIMM health and shutdown state by issuing
PAPR_PDSM_HEALTH request to papr_scm module. It returns an
instance of 'struct nd_papr_pdsm_health' as defined in
'papr_pdsm.h'. The patch provides support for dimm-ops
'new_smart', 'smart_get_health' & 'smart_get_shutdown_state' as newly
introduced functions papr_new_smart_health(), papr_smart_get_health()
& papr_smart_get_shutdown_state() respectively. These callbacks should
enable ndctl to report DIMM health.

Also a new member 'struct dimm_priv.health' is introduced which holds
the current health status of the dimm. This member is set inside newly
added function 'update_dimm_health_v1()' which parses the v1 payload
returned by the kernel after servicing PAPR_PDSM_HEALTH. The
function will also update dimm-flags viz 'struct ndctl_dimm.flags.f_*'
based on the flags set in the returned payload.

Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
Changelog:

v4..v5:
* Updated patch description to reflect updated names of struct and
  defines that have the term 'scm' removed.

v3..v4:
* None

v2..v3:
* None

v1..v2:
* Squashed patch to report nvdimm bad shutdown state with this patch.
* Switched to new structs/enums as defined in papr_scm_pdsm.h
---
 ndctl/lib/papr.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 87 insertions(+), 3 deletions(-)

Comments

Dan Williams June 4, 2020, 1:26 a.m. UTC | #1
> -----Original Message-----
> From: Vaibhav Jain <vaibhav@linux.ibm.com>
> Sent: Friday, May 29, 2020 3:06 PM
> To: linux-nvdimm@lists.01.org
> Cc: Vaibhav Jain <vaibhav@linux.ibm.com>; Williams, Dan J
> <dan.j.williams@intel.com>; Verma, Vishal L <vishal.l.verma@intel.com>;
> Aneesh Kumar K . V <aneesh.kumar@linux.ibm.com>; Jeff Moyer
> <jmoyer@redhat.com>; Oliver O'Halloran <oohall@gmail.com>; Santosh
> Sivaraj <santosh@fossix.org>; Weiny, Ira <ira.weiny@intel.com>
> Subject: [ndctl PATCH v5 6/6] libndctl,papr_scm: Implement support for
> PAPR_PDSM_HEALTH
> 
> Add support for reporting DIMM health and shutdown state by issuing
> PAPR_PDSM_HEALTH request to papr_scm module. It returns an instance of
> 'struct nd_papr_pdsm_health' as defined in 'papr_pdsm.h'. The patch
> provides support for dimm-ops 'new_smart', 'smart_get_health' &
> 'smart_get_shutdown_state' as newly introduced functions
> papr_new_smart_health(), papr_smart_get_health() &
> papr_smart_get_shutdown_state() respectively. These callbacks should
> enable ndctl to report DIMM health.
> 
> Also a new member 'struct dimm_priv.health' is introduced which holds the
> current health status of the dimm. This member is set inside newly added
> function 'update_dimm_health_v1()' which parses the v1 payload returned
> by the kernel after servicing PAPR_PDSM_HEALTH. The function will also
> update dimm-flags viz 'struct ndctl_dimm.flags.f_*'
> based on the flags set in the returned payload.
> 
> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> ---
> Changelog:
> 
> v4..v5:
> * Updated patch description to reflect updated names of struct and
>   defines that have the term 'scm' removed.
> 
> v3..v4:
> * None
> 
> v2..v3:
> * None
> 
> v1..v2:
> * Squashed patch to report nvdimm bad shutdown state with this patch.
> * Switched to new structs/enums as defined in papr_scm_pdsm.h
> ---
>  ndctl/lib/papr.c | 90
> ++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 87 insertions(+), 3 deletions(-)
> 
> diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c index
> 1b7870beb631..cb7ff9e0d5bd 100644
> --- a/ndctl/lib/papr.c
> +++ b/ndctl/lib/papr.c
> @@ -42,7 +42,9 @@
> 
>  /* Per dimm data. Holds per-dimm data parsed from the cmd_pkgs */  struct
> dimm_priv {
> -	/* Empty for now */
> +
> +	/* Cache the dimm health status */
> +	struct nd_papr_pdsm_health health;

I don't understand this. The kernel is caching this, why does libndctl need to cache it?
Vaibhav Jain June 4, 2020, 9:55 p.m. UTC | #2
"Williams, Dan J" <dan.j.williams@intel.com> writes:

>> -----Original Message-----
>> From: Vaibhav Jain <vaibhav@linux.ibm.com>
>> Sent: Friday, May 29, 2020 3:06 PM
>> To: linux-nvdimm@lists.01.org
>> Cc: Vaibhav Jain <vaibhav@linux.ibm.com>; Williams, Dan J
>> <dan.j.williams@intel.com>; Verma, Vishal L <vishal.l.verma@intel.com>;
>> Aneesh Kumar K . V <aneesh.kumar@linux.ibm.com>; Jeff Moyer
>> <jmoyer@redhat.com>; Oliver O'Halloran <oohall@gmail.com>; Santosh
>> Sivaraj <santosh@fossix.org>; Weiny, Ira <ira.weiny@intel.com>
>> Subject: [ndctl PATCH v5 6/6] libndctl,papr_scm: Implement support for
>> PAPR_PDSM_HEALTH
>> 
>> Add support for reporting DIMM health and shutdown state by issuing
>> PAPR_PDSM_HEALTH request to papr_scm module. It returns an instance of
>> 'struct nd_papr_pdsm_health' as defined in 'papr_pdsm.h'. The patch
>> provides support for dimm-ops 'new_smart', 'smart_get_health' &
>> 'smart_get_shutdown_state' as newly introduced functions
>> papr_new_smart_health(), papr_smart_get_health() &
>> papr_smart_get_shutdown_state() respectively. These callbacks should
>> enable ndctl to report DIMM health.
>> 
>> Also a new member 'struct dimm_priv.health' is introduced which holds the
>> current health status of the dimm. This member is set inside newly added
>> function 'update_dimm_health_v1()' which parses the v1 payload returned
>> by the kernel after servicing PAPR_PDSM_HEALTH. The function will also
>> update dimm-flags viz 'struct ndctl_dimm.flags.f_*'
>> based on the flags set in the returned payload.
>> 
>> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
>> ---
>> Changelog:
>> 
>> v4..v5:
>> * Updated patch description to reflect updated names of struct and
>>   defines that have the term 'scm' removed.
>> 
>> v3..v4:
>> * None
>> 
>> v2..v3:
>> * None
>> 
>> v1..v2:
>> * Squashed patch to report nvdimm bad shutdown state with this patch.
>> * Switched to new structs/enums as defined in papr_scm_pdsm.h
>> ---
>>  ndctl/lib/papr.c | 90
>> ++++++++++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 87 insertions(+), 3 deletions(-)
>> 
>> diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c index
>> 1b7870beb631..cb7ff9e0d5bd 100644
>> --- a/ndctl/lib/papr.c
>> +++ b/ndctl/lib/papr.c
>> @@ -42,7 +42,9 @@
>> 
>>  /* Per dimm data. Holds per-dimm data parsed from the cmd_pkgs */  struct
>> dimm_priv {
>> -	/* Empty for now */
>> +
>> +	/* Cache the dimm health status */
>> +	struct nd_papr_pdsm_health health;
>
[.]
> I don't understand this. The kernel is caching this, why does libndctl
> need to cache it?

Was caching it here as the returned nvdimm health payload from kernel
might be of different version than what was requested and reconstituting
'struct nd_papr_pdsm_health' from 'struct ndctl_cmd' in each dimm-ops
callback would have be costly. 

However with payload versioning scheme not used any more this caching
wont be needed anymore.
diff mbox series

Patch

diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c
index 1b7870beb631..cb7ff9e0d5bd 100644
--- a/ndctl/lib/papr.c
+++ b/ndctl/lib/papr.c
@@ -42,7 +42,9 @@ 
 
 /* Per dimm data. Holds per-dimm data parsed from the cmd_pkgs */
 struct dimm_priv {
-	/* Empty for now */
+
+	/* Cache the dimm health status */
+	struct nd_papr_pdsm_health health;
 };
 
 static bool papr_cmd_is_supported(struct ndctl_dimm *dimm, int cmd)
@@ -97,6 +99,43 @@  static bool cmd_is_valid(struct ndctl_dimm *dimm, struct ndctl_cmd *cmd)
 	return true;
 }
 
+/*
+ * Parse the nd_papr_pdsm_health_v1 payload embedded in ndctl_cmd and
+ * update dimm health/flags
+ */
+static int update_dimm_health_v1(struct ndctl_dimm *dimm, struct ndctl_cmd *cmd)
+{
+	struct nd_pdsm_cmd_pkg *pcmd = nd_to_pdsm_cmd_pkg(cmd->pkg);
+	struct dimm_priv *p = dimm->dimm_user_data;
+	const struct nd_papr_pdsm_health_v1 *health =
+		pdsm_cmd_to_payload(pcmd);
+
+	/* Update the dimm flags */
+	dimm->flags.f_arm = health->dimm_unarmed;
+	dimm->flags.f_flush = health->dimm_bad_shutdown;
+	dimm->flags.f_restore = health->dimm_bad_restore;
+	dimm->flags.f_smart = (health->dimm_health != 0);
+
+	/* Cache the dimm health information */
+	memcpy(&p->health, health, sizeof(*health));
+	return 0;
+}
+
+/* Check payload version returned and pass the packet to appropriate handler */
+static int update_dimm_health(struct ndctl_dimm *dimm, struct ndctl_cmd *cmd)
+{
+	const struct nd_pdsm_cmd_pkg *pcmd = nd_to_pdsm_cmd_pkg(cmd->pkg);
+
+	if (pcmd->payload_version == 1)
+		return update_dimm_health_v1(dimm, cmd);
+
+	/* unknown version */
+	papr_err(dimm, "Unknown payload version for dimm_health.\n");
+	papr_dbg(dimm, "dimm_health payload Ver=%d, Supported=%d\n",
+		 pcmd->payload_version, ND_PAPR_PDSM_HEALTH_VERSION);
+	return -EINVAL;
+}
+
 /* Parse a command payload and update dimm flags/private data */
 static int update_dimm_stats(struct ndctl_dimm *dimm, struct ndctl_cmd *cmd)
 {
@@ -122,6 +161,8 @@  static int update_dimm_stats(struct ndctl_dimm *dimm, struct ndctl_cmd *cmd)
 	/* Get the pdsm request and handle it */
 	pcmd = nd_to_pdsm_cmd_pkg(cmd->pkg);
 	switch (pcmd_to_pdsm(pcmd)) {
+	case PAPR_PDSM_HEALTH:
+		return update_dimm_health(dimm, cmd);
 	default:
 		papr_err(dimm, "Unhandled pdsm-request 0x%016llx\n",
 			 pcmd_to_pdsm(pcmd));
@@ -166,14 +207,54 @@  static struct ndctl_cmd *allocate_cmd(struct ndctl_dimm *dimm,
 	return cmd;
 }
 
+static struct ndctl_cmd *papr_new_smart_health(struct ndctl_dimm *dimm)
+{
+	struct ndctl_cmd *cmd_ret;
+
+	cmd_ret = allocate_cmd(dimm, PAPR_PDSM_HEALTH,
+			       sizeof(struct nd_papr_pdsm_health),
+			       ND_PAPR_PDSM_HEALTH_VERSION);
+	if (!cmd_ret) {
+		papr_err(dimm, "Unable to allocate smart_health command\n");
+		return NULL;
+	}
+
+	cmd_ret->pkg[0].nd_size_out = ND_PDSM_ENVELOPE_CONTENT_SIZE(
+		struct nd_papr_pdsm_health);
+
+	return cmd_ret;
+}
+
+static unsigned int papr_smart_get_health(struct ndctl_cmd *cmd)
+{
+	struct dimm_priv *p = cmd->dimm->dimm_user_data;
+
+	/*
+	 * Update the dimm stats and use some math to return one of
+	 * defined ND_SMART_*_HEALTH values
+	 */
+	if (update_dimm_stats(cmd->dimm, cmd) || !p->health.dimm_health)
+		return 0;
+	else
+		return 1 << (p->health.dimm_health - 1);
+}
+
+static unsigned int papr_smart_get_shutdown_state(struct ndctl_cmd *cmd)
+{
+	struct dimm_priv *p = cmd->dimm->dimm_user_data;
+
+	/* Update dimm state and return f_flush */
+	return update_dimm_stats(cmd->dimm, cmd) ?
+		0 : p->health.dimm_bad_shutdown;
+}
+
 static unsigned int papr_smart_get_flags(struct ndctl_cmd *cmd)
 {
 	/* In case of error return empty flags * */
 	if (update_dimm_stats(cmd->dimm, cmd))
 		return 0;
 
-	/* Return empty flags for now as no DSM support */
-	return 0;
+	return ND_SMART_HEALTH_VALID | ND_SMART_SHUTDOWN_VALID;
 }
 
 static int papr_dimm_init(struct ndctl_dimm *dimm)
@@ -214,4 +295,7 @@  struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
 	.dimm_uninit = papr_dimm_uninit,
 	.smart_get_flags = papr_smart_get_flags,
 	.get_firmware_status =  papr_get_firmware_status,
+	.new_smart = papr_new_smart_health,
+	.smart_get_health = papr_smart_get_health,
+	.smart_get_shutdown_state = papr_smart_get_shutdown_state,
 };