diff mbox series

[ndctl] libndctl/papr: Add support for reporting shutdown-count

Message ID 20210521112649.417210-1-vaibhav@linux.ibm.com (mailing list archive)
State New
Headers show
Series [ndctl] libndctl/papr: Add support for reporting shutdown-count | expand

Commit Message

Vaibhav Jain May 21, 2021, 11:26 a.m. UTC
Add support for reporting dirty-shutdown-count (DSC) for PAPR based
NVDIMMs. The sysfs attribute exposing this value is located at
nmemX/papr/dirty_shutdown.

This counter is also returned in payload for PAPR_PDSM_HEALTH as newly
introduced member 'dimm_dsc' in 'struct nd_papr_pdsm_health'. Presence
of 'DSC' is indicated by the PDSM_DIMM_DSC_VALID extension flag.

The patch implements 'ndctl_dimm_ops.smart_get_shutdown_count'
callback in implemented as papr_smart_get_shutdown_count().

Kernel side changes to support reporting DSC have been proposed at
[1]. With updated kernel 'ndctl list -DH' reports following output on
PPC64:

$ sudo ndctl list -DH
[
  {
    "dev":"nmem0",
    "health":{
      "health_state":"ok",
      "life_used_percentage":50,
      "shutdown_state":"clean",
      "shutdown_count":10
    }
  }
]

Link: https://lore.kernel.org/nvdimm/20210521111023.413732-1-vaibhav@linux.ibm.com
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
 ndctl/lib/libndctl.c  |  6 +++++-
 ndctl/lib/papr.c      | 23 +++++++++++++++++++++++
 ndctl/lib/papr_pdsm.h |  6 ++++++
 3 files changed, 34 insertions(+), 1 deletion(-)

Comments

Verma, Vishal L May 21, 2021, 7:29 p.m. UTC | #1
On Fri, 2021-05-21 at 16:56 +0530, Vaibhav Jain wrote:
> Add support for reporting dirty-shutdown-count (DSC) for PAPR based
> NVDIMMs. The sysfs attribute exposing this value is located at
> nmemX/papr/dirty_shutdown.
> 
> This counter is also returned in payload for PAPR_PDSM_HEALTH as newly
> introduced member 'dimm_dsc' in 'struct nd_papr_pdsm_health'. Presence
> of 'DSC' is indicated by the PDSM_DIMM_DSC_VALID extension flag.
> 
> The patch implements 'ndctl_dimm_ops.smart_get_shutdown_count'
> callback in implemented as papr_smart_get_shutdown_count().
> 
> Kernel side changes to support reporting DSC have been proposed at
> [1]. With updated kernel 'ndctl list -DH' reports following output on
> PPC64:
> 
> $ sudo ndctl list -DH
> [
>   {
>     "dev":"nmem0",
>     "health":{
>       "health_state":"ok",
>       "life_used_percentage":50,
>       "shutdown_state":"clean",
>       "shutdown_count":10
>     }
>   }
> ]
> 
> Link: https://lore.kernel.org/nvdimm/20210521111023.413732-1-vaibhav@linux.ibm.com

I'd suggest just using '[1]: <https://lore....'  for this. The Link:
trailer is added by 'b4' when I apply this patch, and points to this
patch on lore. It would be confusing to have two Link: trailers
pointing to different things.

> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> ---
>  ndctl/lib/libndctl.c  |  6 +++++-
>  ndctl/lib/papr.c      | 23 +++++++++++++++++++++++
>  ndctl/lib/papr_pdsm.h |  6 ++++++
>  3 files changed, 34 insertions(+), 1 deletion(-)

The patch looks okay to me - but I assume it depends on the kernel
interfaces not changing in the patch referenced above. Should I put
this on hold until the kernel side is accepted?

> 
> diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
> index aa36a3c87c57..6ee426ae30e1 100644
> --- a/ndctl/lib/libndctl.c
> +++ b/ndctl/lib/libndctl.c
> @@ -1795,8 +1795,12 @@ static int add_papr_dimm(struct ndctl_dimm *dimm, const char *dimm_base)
>  
>  		/* Allocate monitor mode fd */
>  		dimm->health_eventfd = open(path, O_RDONLY|O_CLOEXEC);
> -		rc = 0;
> +		/* Get the dirty shutdown counter value */
> +		sprintf(path, "%s/papr/dirty_shutdown", dimm_base);
> +		if (sysfs_read_attr(ctx, path, buf) == 0)
> +			dimm->dirty_shutdown = strtoll(buf, NULL, 0);
>  
> +		rc = 0;
>  	} else if (strcmp(buf, "nvdimm_test") == 0) {
>  		/* probe via common populate_dimm_attributes() */
>  		rc = populate_dimm_attributes(dimm, dimm_base, "papr");
> diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c
> index 9c6f2f045fc2..42ff200dc588 100644
> --- a/ndctl/lib/papr.c
> +++ b/ndctl/lib/papr.c
> @@ -165,6 +165,9 @@ static unsigned int papr_smart_get_flags(struct ndctl_cmd *cmd)
>  		if (health.extension_flags & PDSM_DIMM_HEALTH_RUN_GAUGE_VALID)
>  			flags |= ND_SMART_USED_VALID;
>  
> +		if (health.extension_flags &  PDSM_DIMM_DSC_VALID)
> +			flags |= ND_SMART_SHUTDOWN_COUNT_VALID;
> +
>  		return flags;
>  	}
>  
> @@ -236,6 +239,25 @@ static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd)
>  		(100 - health.dimm_fuel_gauge) : 0;
>  }
>  
> +static unsigned int papr_smart_get_shutdown_count(struct ndctl_cmd *cmd)
> +{
> +
> +	struct nd_papr_pdsm_health health;
> +
> +	/* Ignore in case of error or invalid pdsm */
> +	if (!cmd_is_valid(cmd) ||
> +	    to_pdsm(cmd)->cmd_status != 0 ||
> +	    to_pdsm_cmd(cmd) != PAPR_PDSM_HEALTH)
> +		return 0;
> +
> +	/* get the payload from command */
> +	health = to_payload(cmd)->health;
> +
> +	return (health.extension_flags & PDSM_DIMM_DSC_VALID) ?
> +		(health.dimm_dsc) : 0;
> +
> +}
> +
>  struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
>  	.cmd_is_supported = papr_cmd_is_supported,
>  	.smart_get_flags = papr_smart_get_flags,
> @@ -245,4 +267,5 @@ struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
>  	.smart_get_health = papr_smart_get_health,
>  	.smart_get_shutdown_state = papr_smart_get_shutdown_state,
>  	.smart_get_life_used = papr_smart_get_life_used,
> +	.smart_get_shutdown_count = papr_smart_get_shutdown_count,
>  };
> diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h
> index 1bac8a7fc933..f45b1e40c075 100644
> --- a/ndctl/lib/papr_pdsm.h
> +++ b/ndctl/lib/papr_pdsm.h
> @@ -75,6 +75,9 @@
>  /* Indicate that the 'dimm_fuel_gauge' field is valid */
>  #define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
>  
> +/* Indicate that the 'dimm_dsc' field is valid */
> +#define PDSM_DIMM_DSC_VALID 2
> +
>  /*
>   * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
>   * Various flags indicate the health status of the dimm.
> @@ -103,6 +106,9 @@ struct nd_papr_pdsm_health {
>  
>  			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
>  			__u16 dimm_fuel_gauge;
> +
> +			/* Extension flag PDSM_DIMM_DSC_VALID */
> +			__u64 dimm_dsc;
>  		};
>  		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
>  	};
Vaibhav Jain May 21, 2021, 8 p.m. UTC | #2
Hi Vishal,

Thanks for looking into this patch.

"Verma, Vishal L" <vishal.l.verma@intel.com> writes:

> On Fri, 2021-05-21 at 16:56 +0530, Vaibhav Jain wrote:
>> Add support for reporting dirty-shutdown-count (DSC) for PAPR based
>> NVDIMMs. The sysfs attribute exposing this value is located at
>> nmemX/papr/dirty_shutdown.
>> 
>> This counter is also returned in payload for PAPR_PDSM_HEALTH as newly
>> introduced member 'dimm_dsc' in 'struct nd_papr_pdsm_health'. Presence
>> of 'DSC' is indicated by the PDSM_DIMM_DSC_VALID extension flag.
>> 
>> The patch implements 'ndctl_dimm_ops.smart_get_shutdown_count'
>> callback in implemented as papr_smart_get_shutdown_count().
>> 
>> Kernel side changes to support reporting DSC have been proposed at
>> [1]. With updated kernel 'ndctl list -DH' reports following output on
>> PPC64:
>> 
>> $ sudo ndctl list -DH
>> [
>>   {
>>     "dev":"nmem0",
>>     "health":{
>>       "health_state":"ok",
>>       "life_used_percentage":50,
>>       "shutdown_state":"clean",
>>       "shutdown_count":10
>>     }
>>   }
>> ]
>> 
>> Link: https://lore.kernel.org/nvdimm/20210521111023.413732-1-vaibhav@linux.ibm.com
>
> I'd suggest just using '[1]: <https://lore....'  for this. The Link:
> trailer is added by 'b4' when I apply this patch, and points to this
> patch on lore. It would be confusing to have two Link: trailers
> pointing to different things.
>
Thanks for pointing this out. Will use your suggested format going
forward.

>> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
>> ---
>>  ndctl/lib/libndctl.c  |  6 +++++-
>>  ndctl/lib/papr.c      | 23 +++++++++++++++++++++++
>>  ndctl/lib/papr_pdsm.h |  6 ++++++
>>  3 files changed, 34 insertions(+), 1 deletion(-)
>
> The patch looks okay to me - but I assume it depends on the kernel
> interfaces not changing in the patch referenced above. Should I put
> this on hold until the kernel side is accepted?
>
Yes, it will be better to hold this until the corroponding kernel patch
is merged.

>> 
>> diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
>> index aa36a3c87c57..6ee426ae30e1 100644
>> --- a/ndctl/lib/libndctl.c
>> +++ b/ndctl/lib/libndctl.c
>> @@ -1795,8 +1795,12 @@ static int add_papr_dimm(struct ndctl_dimm *dimm, const char *dimm_base)
>>  
>>  		/* Allocate monitor mode fd */
>>  		dimm->health_eventfd = open(path, O_RDONLY|O_CLOEXEC);
>> -		rc = 0;
>> +		/* Get the dirty shutdown counter value */
>> +		sprintf(path, "%s/papr/dirty_shutdown", dimm_base);
>> +		if (sysfs_read_attr(ctx, path, buf) == 0)
>> +			dimm->dirty_shutdown = strtoll(buf, NULL, 0);
>>  
>> +		rc = 0;
>>  	} else if (strcmp(buf, "nvdimm_test") == 0) {
>>  		/* probe via common populate_dimm_attributes() */
>>  		rc = populate_dimm_attributes(dimm, dimm_base, "papr");
>> diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c
>> index 9c6f2f045fc2..42ff200dc588 100644
>> --- a/ndctl/lib/papr.c
>> +++ b/ndctl/lib/papr.c
>> @@ -165,6 +165,9 @@ static unsigned int papr_smart_get_flags(struct ndctl_cmd *cmd)
>>  		if (health.extension_flags & PDSM_DIMM_HEALTH_RUN_GAUGE_VALID)
>>  			flags |= ND_SMART_USED_VALID;
>>  
>> +		if (health.extension_flags &  PDSM_DIMM_DSC_VALID)
>> +			flags |= ND_SMART_SHUTDOWN_COUNT_VALID;
>> +
>>  		return flags;
>>  	}
>>  
>> @@ -236,6 +239,25 @@ static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd)
>>  		(100 - health.dimm_fuel_gauge) : 0;
>>  }
>>  
>> +static unsigned int papr_smart_get_shutdown_count(struct ndctl_cmd *cmd)
>> +{
>> +
>> +	struct nd_papr_pdsm_health health;
>> +
>> +	/* Ignore in case of error or invalid pdsm */
>> +	if (!cmd_is_valid(cmd) ||
>> +	    to_pdsm(cmd)->cmd_status != 0 ||
>> +	    to_pdsm_cmd(cmd) != PAPR_PDSM_HEALTH)
>> +		return 0;
>> +
>> +	/* get the payload from command */
>> +	health = to_payload(cmd)->health;
>> +
>> +	return (health.extension_flags & PDSM_DIMM_DSC_VALID) ?
>> +		(health.dimm_dsc) : 0;
>> +
>> +}
>> +
>>  struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
>>  	.cmd_is_supported = papr_cmd_is_supported,
>>  	.smart_get_flags = papr_smart_get_flags,
>> @@ -245,4 +267,5 @@ struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
>>  	.smart_get_health = papr_smart_get_health,
>>  	.smart_get_shutdown_state = papr_smart_get_shutdown_state,
>>  	.smart_get_life_used = papr_smart_get_life_used,
>> +	.smart_get_shutdown_count = papr_smart_get_shutdown_count,
>>  };
>> diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h
>> index 1bac8a7fc933..f45b1e40c075 100644
>> --- a/ndctl/lib/papr_pdsm.h
>> +++ b/ndctl/lib/papr_pdsm.h
>> @@ -75,6 +75,9 @@
>>  /* Indicate that the 'dimm_fuel_gauge' field is valid */
>>  #define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
>>  
>> +/* Indicate that the 'dimm_dsc' field is valid */
>> +#define PDSM_DIMM_DSC_VALID 2
>> +
>>  /*
>>   * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
>>   * Various flags indicate the health status of the dimm.
>> @@ -103,6 +106,9 @@ struct nd_papr_pdsm_health {
>>  
>>  			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
>>  			__u16 dimm_fuel_gauge;
>> +
>> +			/* Extension flag PDSM_DIMM_DSC_VALID */
>> +			__u64 dimm_dsc;
>>  		};
>>  		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
>>  	};
>
diff mbox series

Patch

diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
index aa36a3c87c57..6ee426ae30e1 100644
--- a/ndctl/lib/libndctl.c
+++ b/ndctl/lib/libndctl.c
@@ -1795,8 +1795,12 @@  static int add_papr_dimm(struct ndctl_dimm *dimm, const char *dimm_base)
 
 		/* Allocate monitor mode fd */
 		dimm->health_eventfd = open(path, O_RDONLY|O_CLOEXEC);
-		rc = 0;
+		/* Get the dirty shutdown counter value */
+		sprintf(path, "%s/papr/dirty_shutdown", dimm_base);
+		if (sysfs_read_attr(ctx, path, buf) == 0)
+			dimm->dirty_shutdown = strtoll(buf, NULL, 0);
 
+		rc = 0;
 	} else if (strcmp(buf, "nvdimm_test") == 0) {
 		/* probe via common populate_dimm_attributes() */
 		rc = populate_dimm_attributes(dimm, dimm_base, "papr");
diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c
index 9c6f2f045fc2..42ff200dc588 100644
--- a/ndctl/lib/papr.c
+++ b/ndctl/lib/papr.c
@@ -165,6 +165,9 @@  static unsigned int papr_smart_get_flags(struct ndctl_cmd *cmd)
 		if (health.extension_flags & PDSM_DIMM_HEALTH_RUN_GAUGE_VALID)
 			flags |= ND_SMART_USED_VALID;
 
+		if (health.extension_flags &  PDSM_DIMM_DSC_VALID)
+			flags |= ND_SMART_SHUTDOWN_COUNT_VALID;
+
 		return flags;
 	}
 
@@ -236,6 +239,25 @@  static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd)
 		(100 - health.dimm_fuel_gauge) : 0;
 }
 
+static unsigned int papr_smart_get_shutdown_count(struct ndctl_cmd *cmd)
+{
+
+	struct nd_papr_pdsm_health health;
+
+	/* Ignore in case of error or invalid pdsm */
+	if (!cmd_is_valid(cmd) ||
+	    to_pdsm(cmd)->cmd_status != 0 ||
+	    to_pdsm_cmd(cmd) != PAPR_PDSM_HEALTH)
+		return 0;
+
+	/* get the payload from command */
+	health = to_payload(cmd)->health;
+
+	return (health.extension_flags & PDSM_DIMM_DSC_VALID) ?
+		(health.dimm_dsc) : 0;
+
+}
+
 struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
 	.cmd_is_supported = papr_cmd_is_supported,
 	.smart_get_flags = papr_smart_get_flags,
@@ -245,4 +267,5 @@  struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
 	.smart_get_health = papr_smart_get_health,
 	.smart_get_shutdown_state = papr_smart_get_shutdown_state,
 	.smart_get_life_used = papr_smart_get_life_used,
+	.smart_get_shutdown_count = papr_smart_get_shutdown_count,
 };
diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h
index 1bac8a7fc933..f45b1e40c075 100644
--- a/ndctl/lib/papr_pdsm.h
+++ b/ndctl/lib/papr_pdsm.h
@@ -75,6 +75,9 @@ 
 /* Indicate that the 'dimm_fuel_gauge' field is valid */
 #define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
 
+/* Indicate that the 'dimm_dsc' field is valid */
+#define PDSM_DIMM_DSC_VALID 2
+
 /*
  * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
  * Various flags indicate the health status of the dimm.
@@ -103,6 +106,9 @@  struct nd_papr_pdsm_health {
 
 			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
 			__u16 dimm_fuel_gauge;
+
+			/* Extension flag PDSM_DIMM_DSC_VALID */
+			__u64 dimm_dsc;
 		};
 		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
 	};