diff mbox series

[ndctl,v4,17/17] cxl: add health information to cxl-list

Message ID 20211007082139.3088615-18-vishal.l.verma@intel.com (mailing list archive)
State Superseded
Headers show
Series Initial CXL support | expand

Commit Message

Verma, Vishal L Oct. 7, 2021, 8:21 a.m. UTC
Add JSON output for fields from the 'GET_HEALTH_INFO' mailbox command
to memory device listings.

Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 Documentation/cxl/cxl-list.txt |   4 +
 util/json.h                    |   1 +
 cxl/list.c                     |   5 +
 util/json.c                    | 189 +++++++++++++++++++++++++++++++++
 4 files changed, 199 insertions(+)

Comments

Verma, Vishal L Oct. 11, 2021, 10:07 p.m. UTC | #1
On Thu, 2021-10-07 at 02:21 -0600, Vishal Verma wrote:
> Add JSON output for fields from the 'GET_HEALTH_INFO' mailbox command
> to memory device listings.
> 
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> ---
>  Documentation/cxl/cxl-list.txt |   4 +
>  util/json.h                    |   1 +
>  cxl/list.c                     |   5 +
>  util/json.c                    | 189 +++++++++++++++++++++++++++++++++
>  4 files changed, 199 insertions(+)
> 
> diff --git a/Documentation/cxl/cxl-list.txt b/Documentation/cxl/cxl-list.txt
> index bd377b3..dc86651 100644
> --- a/Documentation/cxl/cxl-list.txt
> +++ b/Documentation/cxl/cxl-list.txt
> @@ -53,6 +53,10 @@ OPTIONS
>  --idle::
>  	Include idle (not enabled / zero-sized) devices in the listing
>  
> +-H::
> +--health::
> +	Include health information in the memdev listing
> +
>  include::human-option.txt[]
>  
>  include::verbose-option.txt[]
> diff --git a/util/json.h b/util/json.h
> index 91918c8..ce575e6 100644
> --- a/util/json.h
> +++ b/util/json.h
> @@ -19,6 +19,7 @@ enum util_json_flags {
>  	UTIL_JSON_CONFIGURED	= (1 << 7),
>  	UTIL_JSON_FIRMWARE	= (1 << 8),
>  	UTIL_JSON_DAX_MAPPINGS	= (1 << 9),
> +	UTIL_JSON_HEALTH	= (1 << 10),
>  };
>  
>  struct json_object;
> diff --git a/cxl/list.c b/cxl/list.c
> index 3dea73f..2fa155a 100644
> --- a/cxl/list.c
> +++ b/cxl/list.c
> @@ -16,6 +16,7 @@ static struct {
>  	bool memdevs;
>  	bool idle;
>  	bool human;
> +	bool health;
>  } list;
>  
>  static unsigned long listopts_to_flags(void)
> @@ -26,6 +27,8 @@ static unsigned long listopts_to_flags(void)
>  		flags |= UTIL_JSON_IDLE;
>  	if (list.human)
>  		flags |= UTIL_JSON_HUMAN;
> +	if (list.health)
> +		flags |= UTIL_JSON_HEALTH;
>  	return flags;
>  }
>  
> @@ -57,6 +60,8 @@ int cmd_list(int argc, const char **argv, struct cxl_ctx *ctx)
>  		OPT_BOOLEAN('i', "idle", &list.idle, "include idle devices"),
>  		OPT_BOOLEAN('u', "human", &list.human,
>  				"use human friendly number formats "),
> +		OPT_BOOLEAN('H', "health", &list.health,
> +				"include memory device health information "),
>  		OPT_END(),
>  	};
>  	const char * const u[] = {
> diff --git a/util/json.c b/util/json.c
> index 3be3a92..dfc7b8e 100644
> --- a/util/json.c
> +++ b/util/json.c
> @@ -1442,6 +1442,190 @@ struct json_object *util_badblock_rec_to_json(u64 block, u64 count,
>  	return NULL;
>  }
>  
> +static struct json_object *util_cxl_memdev_health_to_json(
> +		struct cxl_memdev *memdev, unsigned long flags)
> +{
> +	const char *devname = cxl_memdev_get_devname(memdev);
> +	struct json_object *jhealth;
> +	struct json_object *jobj;
> +	struct cxl_cmd *cmd;
> +	u32 field;
> +	int rc;
> +
> +	jhealth = json_object_new_object();
> +	if (!jhealth)
> +		return NULL;
> +	if (!memdev)
> +		goto err_jobj;
> +
> +	cmd = cxl_cmd_new_get_health_info(memdev);
> +	if (!cmd)
> +		goto err_jobj;
> +
> +	rc = cxl_cmd_submit(cmd);
> +	/* ENOTTY - command not supported by the memdev */
> +	if (rc == -ENOTTY)
> +		goto err_cmd;
> +	if (rc < 0) {
> +		fprintf(stderr, "%s: cmd submission failed: %s\n", devname,
> +		    strerror(-rc));
> +		goto err_cmd;
> +	}
> +	rc = cxl_cmd_get_mbox_status(cmd);
> +	if (rc != 0) {
> +		fprintf(stderr, "%s: firmware status: %d\n", devname, rc);
> +		rc = -ENXIO;
> +		goto err_cmd;
> +	}
> +
> +	/* health_status fields */
> +	rc = cxl_cmd_health_info_get_maintenance_needed(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "maintenance_needed", jobj);
> +
> +	rc = cxl_cmd_health_info_get_performance_degraded(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "performance_degraded", jobj);
> +
> +	rc = cxl_cmd_health_info_get_hw_replacement_needed(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "hw_replacement_needed", jobj);
> +
> +	/* media_status fields */
> +	rc = cxl_cmd_health_info_get_media_normal(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "media_normal", jobj);
> +
> +	rc = cxl_cmd_health_info_get_media_not_ready(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "media_not_ready", jobj);
> +
> +	rc = cxl_cmd_health_info_get_media_persistence_lost(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "media_persistence_lost", jobj);
> +
> +	rc = cxl_cmd_health_info_get_media_data_lost(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "media_data_lost", jobj);
> +
> +	rc = cxl_cmd_health_info_get_media_powerloss_persistence_loss(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "media_powerloss_persistence_loss", jobj);
> +
> +	rc = cxl_cmd_health_info_get_media_shutdown_persistence_loss(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "media_shutdown_persistence_loss", jobj);
> +
> +	rc = cxl_cmd_health_info_get_media_persistence_loss_imminent(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "media_persistence_loss_imminent", jobj);
> +
> +	rc = cxl_cmd_health_info_get_media_powerloss_data_loss(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "media_powerloss_data_loss", jobj);
> +
> +	rc = cxl_cmd_health_info_get_media_shutdown_data_loss(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "media_shutdown_data_loss", jobj);
> +
> +	rc = cxl_cmd_health_info_get_media_data_loss_imminent(cmd);
> +	jobj = json_object_new_boolean(rc);
> +	if (jobj)
> +		json_object_object_add(jhealth, "media_data_loss_imminent", jobj);
> +
> +	/* ext_status fields */
> +	if (cxl_cmd_health_info_get_ext_life_used_normal(cmd))
> +		jobj = json_object_new_string("normal");
> +	else if (cxl_cmd_health_info_get_ext_life_used_warning(cmd))
> +		jobj = json_object_new_string("warning");
> +	else if (cxl_cmd_health_info_get_ext_life_used_critical(cmd))
> +		jobj = json_object_new_string("critical");
> +	else
> +		jobj = json_object_new_string("unknown");
> +	if (jobj)
> +		json_object_object_add(jhealth, "ext_life_used", jobj);
> +
> +	if (cxl_cmd_health_info_get_ext_temperature_normal(cmd))
> +		jobj = json_object_new_string("normal");
> +	else if (cxl_cmd_health_info_get_ext_temperature_warning(cmd))
> +		jobj = json_object_new_string("warning");
> +	else if (cxl_cmd_health_info_get_ext_temperature_critical(cmd))
> +		jobj = json_object_new_string("critical");
> +	else
> +		jobj = json_object_new_string("unknown");
> +	if (jobj)
> +		json_object_object_add(jhealth, "ext_temperature", jobj);
> +
> +	if (cxl_cmd_health_info_get_ext_corrected_volatile_normal(cmd))
> +		jobj = json_object_new_string("normal");
> +	else if (cxl_cmd_health_info_get_ext_corrected_volatile_warning(cmd))
> +		jobj = json_object_new_string("warning");
> +	else
> +		jobj = json_object_new_string("unknown");
> +	if (jobj)
> +		json_object_object_add(jhealth, "ext_corrected_volatile", jobj);
> +
> +	if (cxl_cmd_health_info_get_ext_corrected_persistent_normal(cmd))
> +		jobj = json_object_new_string("normal");
> +	else if (cxl_cmd_health_info_get_ext_corrected_persistent_warning(cmd))
> +		jobj = json_object_new_string("warning");
> +	else
> +		jobj = json_object_new_string("unknown");
> +	if (jobj)
> +		json_object_object_add(jhealth, "ext_corrected_persistent", jobj);
> +
> +	/* other fields */
> +	field = cxl_cmd_health_info_get_life_used(cmd);
> +	if (field != 0xff) {
> +		jobj = json_object_new_int(field);
> +		if (jobj)
> +			json_object_object_add(jhealth, "life_used_percent", jobj);
> +	}
> +
> +	field = cxl_cmd_health_info_get_temperature(cmd);
> +	if (field != 0xffff) {
> +		jobj = json_object_new_int(field);
> +		if (jobj)
> +			json_object_object_add(jhealth, "temperature", jobj);
> +	}
> +
> +	field = cxl_cmd_health_info_get_dirty_shutdowns(cmd);
> +	jobj = json_object_new_uint64(field);
> +	if (jobj)
> +		json_object_object_add(jhealth, "dirty_shutdowns", jobj);
> +
> +	field = cxl_cmd_health_info_get_volatile_errors(cmd);
> +	jobj = json_object_new_uint64(field);
> +	if (jobj)
> +		json_object_object_add(jhealth, "volatile_errors", jobj);
> +
> +	field = cxl_cmd_health_info_get_pmem_errors(cmd);
> +	jobj = json_object_new_uint64(field);

json_object_new_uint64() seems to be a relatively new interface in
json-c - Some Ubuntu LTS and CentOS 8 distros dont' have a recent
enough json-c to have this.

I'm thinking I'll switch these three above to json_object_new_int64.
I'd originally chosen the uint64 variant because that seemsed to be the
only unsigned int option available. The fields we're trying to print
are all u32's. So I think using int64 should be sufficient that we
won't end up with spurious negative listings.

Thoughts?

> +	if (jobj)
> +		json_object_object_add(jhealth, "pmem_errors", jobj);
> +
> +	cxl_cmd_unref(cmd);
> +	return jhealth;
> +
> +err_cmd:
> +	cxl_cmd_unref(cmd);
> +err_jobj:
> +	json_object_put(jhealth);
> +	return NULL;
> +}
> +
>  struct json_object *util_cxl_memdev_to_json(struct cxl_memdev *memdev,
>  		unsigned long flags)
>  {
> @@ -1464,5 +1648,10 @@ struct json_object *util_cxl_memdev_to_json(struct cxl_memdev *memdev,
>  	if (jobj)
>  		json_object_object_add(jdev, "ram_size", jobj);
>  
> +	if (flags & UTIL_JSON_HEALTH) {
> +		jobj = util_cxl_memdev_health_to_json(memdev, flags);
> +		if (jobj)
> +			json_object_object_add(jdev, "health", jobj);
> +	}
>  	return jdev;
>  }
Verma, Vishal L Oct. 14, 2021, 11:42 p.m. UTC | #2
On Thu, 2021-10-07 at 02:21 -0600, Vishal Verma wrote:
> Add JSON output for fields from the 'GET_HEALTH_INFO' mailbox command
> to memory device listings.
> 
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> ---
>  Documentation/cxl/cxl-list.txt |   4 +
>  util/json.h                    |   1 +
>  cxl/list.c                     |   5 +
>  util/json.c                    | 189 +++++++++++++++++++++++++++++++++
>  4 files changed, 199 insertions(+)
> 
> diff --git a/Documentation/cxl/cxl-list.txt b/Documentation/cxl/cxl-list.txt
> index bd377b3..dc86651 100644
> --- a/Documentation/cxl/cxl-list.txt
> +++ b/Documentation/cxl/cxl-list.txt
> @@ -53,6 +53,10 @@ OPTIONS
>  --idle::
>  	Include idle (not enabled / zero-sized) devices in the listing
>  
> +-H::
> +--health::
> +	Include health information in the memdev listing
> +
>  include::human-option.txt[]
>  
>  include::verbose-option.txt[]
> diff --git a/util/json.h b/util/json.h
> index 91918c8..ce575e6 100644
> --- a/util/json.h
> +++ b/util/json.h
> @@ -19,6 +19,7 @@ enum util_json_flags {
>  	UTIL_JSON_CONFIGURED	= (1 << 7),
>  	UTIL_JSON_FIRMWARE	= (1 << 8),
>  	UTIL_JSON_DAX_MAPPINGS	= (1 << 9),
> +	UTIL_JSON_HEALTH	= (1 << 10),
>  };
>  
>  struct json_object;
> diff --git a/cxl/list.c b/cxl/list.c
> index 3dea73f..2fa155a 100644
> --- a/cxl/list.c
> +++ b/cxl/list.c
> @@ -16,6 +16,7 @@ static struct {
>  	bool memdevs;
>  	bool idle;
>  	bool human;
> +	bool health;
>  } list;
>  
>  static unsigned long listopts_to_flags(void)
> @@ -26,6 +27,8 @@ static unsigned long listopts_to_flags(void)
>  		flags |= UTIL_JSON_IDLE;
>  	if (list.human)
>  		flags |= UTIL_JSON_HUMAN;
> +	if (list.health)
> +		flags |= UTIL_JSON_HEALTH;
>  	return flags;
>  }
>  
> @@ -57,6 +60,8 @@ int cmd_list(int argc, const char **argv, struct cxl_ctx *ctx)
>  		OPT_BOOLEAN('i', "idle", &list.idle, "include idle devices"),
>  		OPT_BOOLEAN('u', "human", &list.human,
>  				"use human friendly number formats "),
> +		OPT_BOOLEAN('H', "health", &list.health,
> +				"include memory device health information "),
>  		OPT_END(),
>  	};
>  	const char * const u[] = {
> diff --git a/util/json.c b/util/json.c
> index 3be3a92..dfc7b8e 100644
> --- a/util/json.c
> +++ b/util/json.c
> @@ -1442,6 +1442,190 @@ struct json_object *util_badblock_rec_to_json(u64 block, u64 count,
>  	return NULL;
>  }
>  
> +static struct json_object *util_cxl_memdev_health_to_json(
> +		struct cxl_memdev *memdev, unsigned long flags)
> +{
> +	const char *devname = cxl_memdev_get_devname(memdev);
> +	struct json_object *jhealth;
> +	struct json_object *jobj;
> +	struct cxl_cmd *cmd;
> +	u32 field;
> +	int rc;
> +
> +	jhealth = json_object_new_object();
> +	if (!jhealth)
> +		return NULL;
> +	if (!memdev)
> +		goto err_jobj;
> +
> +	cmd = cxl_cmd_new_get_health_info(memdev);
> +	if (!cmd)
> +		goto err_jobj;
> +
> +	rc = cxl_cmd_submit(cmd);
> +	/* ENOTTY - command not supported by the memdev */
> +	if (rc == -ENOTTY)
> +		goto err_cmd;

I'll remove this special case, as well as the error prints below. The
cmd submission could fail for any number of reasons, including
unsupported by hardware - I think for any of those cases, we can just
silently skip printing the json fields here.

> +	if (rc < 0) {
> +		fprintf(stderr, "%s: cmd submission failed: %s\n", devname,
> +		    strerror(-rc));
> +		goto err_cmd;
> +	}
> +	rc = cxl_cmd_get_mbox_status(cmd);
> +	if (rc != 0) {
> +		fprintf(stderr, "%s: firmware status: %d\n", devname, rc);
> +		rc = -ENXIO;
> +		goto err_cmd;
> +	}
> +
>
Dan Williams Oct. 15, 2021, 12:09 a.m. UTC | #3
On Mon, Oct 11, 2021 at 3:07 PM Verma, Vishal L
<vishal.l.verma@intel.com> wrote:
>
> On Thu, 2021-10-07 at 02:21 -0600, Vishal Verma wrote:
> > Add JSON output for fields from the 'GET_HEALTH_INFO' mailbox command
> > to memory device listings.
> >
> > Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> > ---
> >  Documentation/cxl/cxl-list.txt |   4 +
> >  util/json.h                    |   1 +
> >  cxl/list.c                     |   5 +
> >  util/json.c                    | 189 +++++++++++++++++++++++++++++++++
> >  4 files changed, 199 insertions(+)
> >
> > diff --git a/Documentation/cxl/cxl-list.txt b/Documentation/cxl/cxl-list.txt
> > index bd377b3..dc86651 100644
> > --- a/Documentation/cxl/cxl-list.txt
> > +++ b/Documentation/cxl/cxl-list.txt
> > @@ -53,6 +53,10 @@ OPTIONS
> >  --idle::
> >       Include idle (not enabled / zero-sized) devices in the listing
> >
> > +-H::
> > +--health::
> > +     Include health information in the memdev listing
> > +
> >  include::human-option.txt[]
> >
> >  include::verbose-option.txt[]
> > diff --git a/util/json.h b/util/json.h
> > index 91918c8..ce575e6 100644
> > --- a/util/json.h
> > +++ b/util/json.h
> > @@ -19,6 +19,7 @@ enum util_json_flags {
> >       UTIL_JSON_CONFIGURED    = (1 << 7),
> >       UTIL_JSON_FIRMWARE      = (1 << 8),
> >       UTIL_JSON_DAX_MAPPINGS  = (1 << 9),
> > +     UTIL_JSON_HEALTH        = (1 << 10),
> >  };
> >
> >  struct json_object;
> > diff --git a/cxl/list.c b/cxl/list.c
> > index 3dea73f..2fa155a 100644
> > --- a/cxl/list.c
> > +++ b/cxl/list.c
> > @@ -16,6 +16,7 @@ static struct {
> >       bool memdevs;
> >       bool idle;
> >       bool human;
> > +     bool health;
> >  } list;
> >
> >  static unsigned long listopts_to_flags(void)
> > @@ -26,6 +27,8 @@ static unsigned long listopts_to_flags(void)
> >               flags |= UTIL_JSON_IDLE;
> >       if (list.human)
> >               flags |= UTIL_JSON_HUMAN;
> > +     if (list.health)
> > +             flags |= UTIL_JSON_HEALTH;
> >       return flags;
> >  }
> >
> > @@ -57,6 +60,8 @@ int cmd_list(int argc, const char **argv, struct cxl_ctx *ctx)
> >               OPT_BOOLEAN('i', "idle", &list.idle, "include idle devices"),
> >               OPT_BOOLEAN('u', "human", &list.human,
> >                               "use human friendly number formats "),
> > +             OPT_BOOLEAN('H', "health", &list.health,
> > +                             "include memory device health information "),
> >               OPT_END(),
> >       };
> >       const char * const u[] = {
> > diff --git a/util/json.c b/util/json.c
> > index 3be3a92..dfc7b8e 100644
> > --- a/util/json.c
> > +++ b/util/json.c
> > @@ -1442,6 +1442,190 @@ struct json_object *util_badblock_rec_to_json(u64 block, u64 count,
> >       return NULL;
> >  }
> >
> > +static struct json_object *util_cxl_memdev_health_to_json(
> > +             struct cxl_memdev *memdev, unsigned long flags)
> > +{
> > +     const char *devname = cxl_memdev_get_devname(memdev);
> > +     struct json_object *jhealth;
> > +     struct json_object *jobj;
> > +     struct cxl_cmd *cmd;
> > +     u32 field;
> > +     int rc;
> > +
> > +     jhealth = json_object_new_object();
> > +     if (!jhealth)
> > +             return NULL;
> > +     if (!memdev)
> > +             goto err_jobj;
> > +
> > +     cmd = cxl_cmd_new_get_health_info(memdev);
> > +     if (!cmd)
> > +             goto err_jobj;
> > +
> > +     rc = cxl_cmd_submit(cmd);
> > +     /* ENOTTY - command not supported by the memdev */
> > +     if (rc == -ENOTTY)
> > +             goto err_cmd;
> > +     if (rc < 0) {
> > +             fprintf(stderr, "%s: cmd submission failed: %s\n", devname,
> > +                 strerror(-rc));
> > +             goto err_cmd;
> > +     }
> > +     rc = cxl_cmd_get_mbox_status(cmd);
> > +     if (rc != 0) {
> > +             fprintf(stderr, "%s: firmware status: %d\n", devname, rc);
> > +             rc = -ENXIO;
> > +             goto err_cmd;
> > +     }
> > +
> > +     /* health_status fields */
> > +     rc = cxl_cmd_health_info_get_maintenance_needed(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "maintenance_needed", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_performance_degraded(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "performance_degraded", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_hw_replacement_needed(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "hw_replacement_needed", jobj);
> > +
> > +     /* media_status fields */
> > +     rc = cxl_cmd_health_info_get_media_normal(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "media_normal", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_media_not_ready(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "media_not_ready", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_media_persistence_lost(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "media_persistence_lost", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_media_data_lost(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "media_data_lost", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_media_powerloss_persistence_loss(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "media_powerloss_persistence_loss", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_media_shutdown_persistence_loss(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "media_shutdown_persistence_loss", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_media_persistence_loss_imminent(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "media_persistence_loss_imminent", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_media_powerloss_data_loss(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "media_powerloss_data_loss", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_media_shutdown_data_loss(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "media_shutdown_data_loss", jobj);
> > +
> > +     rc = cxl_cmd_health_info_get_media_data_loss_imminent(cmd);
> > +     jobj = json_object_new_boolean(rc);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "media_data_loss_imminent", jobj);
> > +
> > +     /* ext_status fields */
> > +     if (cxl_cmd_health_info_get_ext_life_used_normal(cmd))
> > +             jobj = json_object_new_string("normal");
> > +     else if (cxl_cmd_health_info_get_ext_life_used_warning(cmd))
> > +             jobj = json_object_new_string("warning");
> > +     else if (cxl_cmd_health_info_get_ext_life_used_critical(cmd))
> > +             jobj = json_object_new_string("critical");
> > +     else
> > +             jobj = json_object_new_string("unknown");
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "ext_life_used", jobj);
> > +
> > +     if (cxl_cmd_health_info_get_ext_temperature_normal(cmd))
> > +             jobj = json_object_new_string("normal");
> > +     else if (cxl_cmd_health_info_get_ext_temperature_warning(cmd))
> > +             jobj = json_object_new_string("warning");
> > +     else if (cxl_cmd_health_info_get_ext_temperature_critical(cmd))
> > +             jobj = json_object_new_string("critical");
> > +     else
> > +             jobj = json_object_new_string("unknown");
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "ext_temperature", jobj);
> > +
> > +     if (cxl_cmd_health_info_get_ext_corrected_volatile_normal(cmd))
> > +             jobj = json_object_new_string("normal");
> > +     else if (cxl_cmd_health_info_get_ext_corrected_volatile_warning(cmd))
> > +             jobj = json_object_new_string("warning");
> > +     else
> > +             jobj = json_object_new_string("unknown");
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "ext_corrected_volatile", jobj);
> > +
> > +     if (cxl_cmd_health_info_get_ext_corrected_persistent_normal(cmd))
> > +             jobj = json_object_new_string("normal");
> > +     else if (cxl_cmd_health_info_get_ext_corrected_persistent_warning(cmd))
> > +             jobj = json_object_new_string("warning");
> > +     else
> > +             jobj = json_object_new_string("unknown");
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "ext_corrected_persistent", jobj);
> > +
> > +     /* other fields */
> > +     field = cxl_cmd_health_info_get_life_used(cmd);
> > +     if (field != 0xff) {
> > +             jobj = json_object_new_int(field);
> > +             if (jobj)
> > +                     json_object_object_add(jhealth, "life_used_percent", jobj);
> > +     }
> > +
> > +     field = cxl_cmd_health_info_get_temperature(cmd);
> > +     if (field != 0xffff) {
> > +             jobj = json_object_new_int(field);
> > +             if (jobj)
> > +                     json_object_object_add(jhealth, "temperature", jobj);
> > +     }
> > +
> > +     field = cxl_cmd_health_info_get_dirty_shutdowns(cmd);
> > +     jobj = json_object_new_uint64(field);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "dirty_shutdowns", jobj);
> > +
> > +     field = cxl_cmd_health_info_get_volatile_errors(cmd);
> > +     jobj = json_object_new_uint64(field);
> > +     if (jobj)
> > +             json_object_object_add(jhealth, "volatile_errors", jobj);
> > +
> > +     field = cxl_cmd_health_info_get_pmem_errors(cmd);
> > +     jobj = json_object_new_uint64(field);
>
> json_object_new_uint64() seems to be a relatively new interface in
> json-c - Some Ubuntu LTS and CentOS 8 distros dont' have a recent
> enough json-c to have this.
>
> I'm thinking I'll switch these three above to json_object_new_int64.
> I'd originally chosen the uint64 variant because that seemsed to be the
> only unsigned int option available. The fields we're trying to print
> are all u32's. So I think using int64 should be sufficient that we
> won't end up with spurious negative listings.
>
> Thoughts?
>

We do have util_json_object_hex() to work around the earlier
unavailability of an unsigned 64-bit quantity. I think to move to
json_object_new_uint64() it would need to have build system
auto-detect support and likely a compat fallback for environments that
are already expecting that ndctl 64-bit integers might be negative. So
json_object_new_int64() in the near term sounds good to me.
Dan Williams Oct. 15, 2021, 9:15 p.m. UTC | #4
On Thu, Oct 14, 2021 at 4:42 PM Verma, Vishal L
<vishal.l.verma@intel.com> wrote:
>
> On Thu, 2021-10-07 at 02:21 -0600, Vishal Verma wrote:
> > Add JSON output for fields from the 'GET_HEALTH_INFO' mailbox command
> > to memory device listings.
> >
> > Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> > ---
> >  Documentation/cxl/cxl-list.txt |   4 +
> >  util/json.h                    |   1 +
> >  cxl/list.c                     |   5 +
> >  util/json.c                    | 189 +++++++++++++++++++++++++++++++++
> >  4 files changed, 199 insertions(+)
> >
> > diff --git a/Documentation/cxl/cxl-list.txt b/Documentation/cxl/cxl-list.txt
> > index bd377b3..dc86651 100644
> > --- a/Documentation/cxl/cxl-list.txt
> > +++ b/Documentation/cxl/cxl-list.txt
> > @@ -53,6 +53,10 @@ OPTIONS
> >  --idle::
> >       Include idle (not enabled / zero-sized) devices in the listing
> >
> > +-H::
> > +--health::
> > +     Include health information in the memdev listing
> > +
> >  include::human-option.txt[]
> >
> >  include::verbose-option.txt[]
> > diff --git a/util/json.h b/util/json.h
> > index 91918c8..ce575e6 100644
> > --- a/util/json.h
> > +++ b/util/json.h
> > @@ -19,6 +19,7 @@ enum util_json_flags {
> >       UTIL_JSON_CONFIGURED    = (1 << 7),
> >       UTIL_JSON_FIRMWARE      = (1 << 8),
> >       UTIL_JSON_DAX_MAPPINGS  = (1 << 9),
> > +     UTIL_JSON_HEALTH        = (1 << 10),
> >  };
> >
> >  struct json_object;
> > diff --git a/cxl/list.c b/cxl/list.c
> > index 3dea73f..2fa155a 100644
> > --- a/cxl/list.c
> > +++ b/cxl/list.c
> > @@ -16,6 +16,7 @@ static struct {
> >       bool memdevs;
> >       bool idle;
> >       bool human;
> > +     bool health;
> >  } list;
> >
> >  static unsigned long listopts_to_flags(void)
> > @@ -26,6 +27,8 @@ static unsigned long listopts_to_flags(void)
> >               flags |= UTIL_JSON_IDLE;
> >       if (list.human)
> >               flags |= UTIL_JSON_HUMAN;
> > +     if (list.health)
> > +             flags |= UTIL_JSON_HEALTH;
> >       return flags;
> >  }
> >
> > @@ -57,6 +60,8 @@ int cmd_list(int argc, const char **argv, struct cxl_ctx *ctx)
> >               OPT_BOOLEAN('i', "idle", &list.idle, "include idle devices"),
> >               OPT_BOOLEAN('u', "human", &list.human,
> >                               "use human friendly number formats "),
> > +             OPT_BOOLEAN('H', "health", &list.health,
> > +                             "include memory device health information "),
> >               OPT_END(),
> >       };
> >       const char * const u[] = {
> > diff --git a/util/json.c b/util/json.c
> > index 3be3a92..dfc7b8e 100644
> > --- a/util/json.c
> > +++ b/util/json.c
> > @@ -1442,6 +1442,190 @@ struct json_object *util_badblock_rec_to_json(u64 block, u64 count,
> >       return NULL;
> >  }
> >
> > +static struct json_object *util_cxl_memdev_health_to_json(
> > +             struct cxl_memdev *memdev, unsigned long flags)
> > +{
> > +     const char *devname = cxl_memdev_get_devname(memdev);
> > +     struct json_object *jhealth;
> > +     struct json_object *jobj;
> > +     struct cxl_cmd *cmd;
> > +     u32 field;
> > +     int rc;
> > +
> > +     jhealth = json_object_new_object();
> > +     if (!jhealth)
> > +             return NULL;
> > +     if (!memdev)
> > +             goto err_jobj;
> > +
> > +     cmd = cxl_cmd_new_get_health_info(memdev);
> > +     if (!cmd)
> > +             goto err_jobj;
> > +
> > +     rc = cxl_cmd_submit(cmd);
> > +     /* ENOTTY - command not supported by the memdev */
> > +     if (rc == -ENOTTY)
> > +             goto err_cmd;
>
> I'll remove this special case, as well as the error prints below. The
> cmd submission could fail for any number of reasons, including
> unsupported by hardware - I think for any of those cases, we can just
> silently skip printing the json fields here.

Sounds good. If someone really cares, the debug prints inside
cxl_cmd_submit() should be enough to indicate what went wrong.
diff mbox series

Patch

diff --git a/Documentation/cxl/cxl-list.txt b/Documentation/cxl/cxl-list.txt
index bd377b3..dc86651 100644
--- a/Documentation/cxl/cxl-list.txt
+++ b/Documentation/cxl/cxl-list.txt
@@ -53,6 +53,10 @@  OPTIONS
 --idle::
 	Include idle (not enabled / zero-sized) devices in the listing
 
+-H::
+--health::
+	Include health information in the memdev listing
+
 include::human-option.txt[]
 
 include::verbose-option.txt[]
diff --git a/util/json.h b/util/json.h
index 91918c8..ce575e6 100644
--- a/util/json.h
+++ b/util/json.h
@@ -19,6 +19,7 @@  enum util_json_flags {
 	UTIL_JSON_CONFIGURED	= (1 << 7),
 	UTIL_JSON_FIRMWARE	= (1 << 8),
 	UTIL_JSON_DAX_MAPPINGS	= (1 << 9),
+	UTIL_JSON_HEALTH	= (1 << 10),
 };
 
 struct json_object;
diff --git a/cxl/list.c b/cxl/list.c
index 3dea73f..2fa155a 100644
--- a/cxl/list.c
+++ b/cxl/list.c
@@ -16,6 +16,7 @@  static struct {
 	bool memdevs;
 	bool idle;
 	bool human;
+	bool health;
 } list;
 
 static unsigned long listopts_to_flags(void)
@@ -26,6 +27,8 @@  static unsigned long listopts_to_flags(void)
 		flags |= UTIL_JSON_IDLE;
 	if (list.human)
 		flags |= UTIL_JSON_HUMAN;
+	if (list.health)
+		flags |= UTIL_JSON_HEALTH;
 	return flags;
 }
 
@@ -57,6 +60,8 @@  int cmd_list(int argc, const char **argv, struct cxl_ctx *ctx)
 		OPT_BOOLEAN('i', "idle", &list.idle, "include idle devices"),
 		OPT_BOOLEAN('u', "human", &list.human,
 				"use human friendly number formats "),
+		OPT_BOOLEAN('H', "health", &list.health,
+				"include memory device health information "),
 		OPT_END(),
 	};
 	const char * const u[] = {
diff --git a/util/json.c b/util/json.c
index 3be3a92..dfc7b8e 100644
--- a/util/json.c
+++ b/util/json.c
@@ -1442,6 +1442,190 @@  struct json_object *util_badblock_rec_to_json(u64 block, u64 count,
 	return NULL;
 }
 
+static struct json_object *util_cxl_memdev_health_to_json(
+		struct cxl_memdev *memdev, unsigned long flags)
+{
+	const char *devname = cxl_memdev_get_devname(memdev);
+	struct json_object *jhealth;
+	struct json_object *jobj;
+	struct cxl_cmd *cmd;
+	u32 field;
+	int rc;
+
+	jhealth = json_object_new_object();
+	if (!jhealth)
+		return NULL;
+	if (!memdev)
+		goto err_jobj;
+
+	cmd = cxl_cmd_new_get_health_info(memdev);
+	if (!cmd)
+		goto err_jobj;
+
+	rc = cxl_cmd_submit(cmd);
+	/* ENOTTY - command not supported by the memdev */
+	if (rc == -ENOTTY)
+		goto err_cmd;
+	if (rc < 0) {
+		fprintf(stderr, "%s: cmd submission failed: %s\n", devname,
+		    strerror(-rc));
+		goto err_cmd;
+	}
+	rc = cxl_cmd_get_mbox_status(cmd);
+	if (rc != 0) {
+		fprintf(stderr, "%s: firmware status: %d\n", devname, rc);
+		rc = -ENXIO;
+		goto err_cmd;
+	}
+
+	/* health_status fields */
+	rc = cxl_cmd_health_info_get_maintenance_needed(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "maintenance_needed", jobj);
+
+	rc = cxl_cmd_health_info_get_performance_degraded(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "performance_degraded", jobj);
+
+	rc = cxl_cmd_health_info_get_hw_replacement_needed(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "hw_replacement_needed", jobj);
+
+	/* media_status fields */
+	rc = cxl_cmd_health_info_get_media_normal(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "media_normal", jobj);
+
+	rc = cxl_cmd_health_info_get_media_not_ready(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "media_not_ready", jobj);
+
+	rc = cxl_cmd_health_info_get_media_persistence_lost(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "media_persistence_lost", jobj);
+
+	rc = cxl_cmd_health_info_get_media_data_lost(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "media_data_lost", jobj);
+
+	rc = cxl_cmd_health_info_get_media_powerloss_persistence_loss(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "media_powerloss_persistence_loss", jobj);
+
+	rc = cxl_cmd_health_info_get_media_shutdown_persistence_loss(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "media_shutdown_persistence_loss", jobj);
+
+	rc = cxl_cmd_health_info_get_media_persistence_loss_imminent(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "media_persistence_loss_imminent", jobj);
+
+	rc = cxl_cmd_health_info_get_media_powerloss_data_loss(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "media_powerloss_data_loss", jobj);
+
+	rc = cxl_cmd_health_info_get_media_shutdown_data_loss(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "media_shutdown_data_loss", jobj);
+
+	rc = cxl_cmd_health_info_get_media_data_loss_imminent(cmd);
+	jobj = json_object_new_boolean(rc);
+	if (jobj)
+		json_object_object_add(jhealth, "media_data_loss_imminent", jobj);
+
+	/* ext_status fields */
+	if (cxl_cmd_health_info_get_ext_life_used_normal(cmd))
+		jobj = json_object_new_string("normal");
+	else if (cxl_cmd_health_info_get_ext_life_used_warning(cmd))
+		jobj = json_object_new_string("warning");
+	else if (cxl_cmd_health_info_get_ext_life_used_critical(cmd))
+		jobj = json_object_new_string("critical");
+	else
+		jobj = json_object_new_string("unknown");
+	if (jobj)
+		json_object_object_add(jhealth, "ext_life_used", jobj);
+
+	if (cxl_cmd_health_info_get_ext_temperature_normal(cmd))
+		jobj = json_object_new_string("normal");
+	else if (cxl_cmd_health_info_get_ext_temperature_warning(cmd))
+		jobj = json_object_new_string("warning");
+	else if (cxl_cmd_health_info_get_ext_temperature_critical(cmd))
+		jobj = json_object_new_string("critical");
+	else
+		jobj = json_object_new_string("unknown");
+	if (jobj)
+		json_object_object_add(jhealth, "ext_temperature", jobj);
+
+	if (cxl_cmd_health_info_get_ext_corrected_volatile_normal(cmd))
+		jobj = json_object_new_string("normal");
+	else if (cxl_cmd_health_info_get_ext_corrected_volatile_warning(cmd))
+		jobj = json_object_new_string("warning");
+	else
+		jobj = json_object_new_string("unknown");
+	if (jobj)
+		json_object_object_add(jhealth, "ext_corrected_volatile", jobj);
+
+	if (cxl_cmd_health_info_get_ext_corrected_persistent_normal(cmd))
+		jobj = json_object_new_string("normal");
+	else if (cxl_cmd_health_info_get_ext_corrected_persistent_warning(cmd))
+		jobj = json_object_new_string("warning");
+	else
+		jobj = json_object_new_string("unknown");
+	if (jobj)
+		json_object_object_add(jhealth, "ext_corrected_persistent", jobj);
+
+	/* other fields */
+	field = cxl_cmd_health_info_get_life_used(cmd);
+	if (field != 0xff) {
+		jobj = json_object_new_int(field);
+		if (jobj)
+			json_object_object_add(jhealth, "life_used_percent", jobj);
+	}
+
+	field = cxl_cmd_health_info_get_temperature(cmd);
+	if (field != 0xffff) {
+		jobj = json_object_new_int(field);
+		if (jobj)
+			json_object_object_add(jhealth, "temperature", jobj);
+	}
+
+	field = cxl_cmd_health_info_get_dirty_shutdowns(cmd);
+	jobj = json_object_new_uint64(field);
+	if (jobj)
+		json_object_object_add(jhealth, "dirty_shutdowns", jobj);
+
+	field = cxl_cmd_health_info_get_volatile_errors(cmd);
+	jobj = json_object_new_uint64(field);
+	if (jobj)
+		json_object_object_add(jhealth, "volatile_errors", jobj);
+
+	field = cxl_cmd_health_info_get_pmem_errors(cmd);
+	jobj = json_object_new_uint64(field);
+	if (jobj)
+		json_object_object_add(jhealth, "pmem_errors", jobj);
+
+	cxl_cmd_unref(cmd);
+	return jhealth;
+
+err_cmd:
+	cxl_cmd_unref(cmd);
+err_jobj:
+	json_object_put(jhealth);
+	return NULL;
+}
+
 struct json_object *util_cxl_memdev_to_json(struct cxl_memdev *memdev,
 		unsigned long flags)
 {
@@ -1464,5 +1648,10 @@  struct json_object *util_cxl_memdev_to_json(struct cxl_memdev *memdev,
 	if (jobj)
 		json_object_object_add(jdev, "ram_size", jobj);
 
+	if (flags & UTIL_JSON_HEALTH) {
+		jobj = util_cxl_memdev_health_to_json(memdev, flags);
+		if (jobj)
+			json_object_object_add(jdev, "health", jobj);
+	}
 	return jdev;
 }