diff mbox

[v2] libndctl: add support for the MSFT family of DSM functions

Message ID 20170326201753.1522-1-lijunpan2000@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lijun Pan March 26, 2017, 8:17 p.m. UTC
From: Lijun Pan <Lijun.Pan@dell.com>

This patch retrieves the health data from NVDIMM-N via
the MSFT _DSM function[1], following JESD245A[2] standards.
Now 'ndctl list --dimms --health --idle' could work
on MSFT type NVDIMM-N, but limited to health_state,
temperature_celsius, and life_used_percentage.

[1]. https://msdn.microsoft.com/library/windows/hardware/mt604741
[2]. https://www.jedec.org/sites/default/files/docs/JESD245A.pdf

Cc: Stuart Hayes <Stuart.Hayes@dell.com>
Signed-off-by: Lijun Pan <Lijun.Pan@dell.com>
---
v2:
 - v2 combines v1's 1/2 and 2/2
 - reuse the existing libndctl abstraction (suggested by Dan)
 - move the MSFT _DSM code under ndctl/lib/
 - the closet common field we can have between MSFT _DSM and smart is
   health_state, temperature_celsius, and life_used_percentage.


 ndctl/lib/Makefile.am        |   1 +
 ndctl/lib/libndctl-msft.c    | 143 +++++++++++++++++++++++++++++++++++++++++++
 ndctl/lib/libndctl-private.h |   4 ++
 ndctl/lib/libndctl.c         |   2 +
 ndctl/lib/ndctl-msft.h       |  63 +++++++++++++++++++
 5 files changed, 213 insertions(+)
 create mode 100644 ndctl/lib/libndctl-msft.c
 create mode 100644 ndctl/lib/ndctl-msft.h

Comments

Linda Knippers March 28, 2017, 10:01 p.m. UTC | #1
On 03/26/2017 04:17 PM, Lijun Pan wrote:
> From: Lijun Pan <Lijun.Pan@dell.com>
> 
> This patch retrieves the health data from NVDIMM-N via
> the MSFT _DSM function[1], following JESD245A[2] standards.
> Now 'ndctl list --dimms --health --idle' could work
> on MSFT type NVDIMM-N, but limited to health_state,
> temperature_celsius, and life_used_percentage.
> 
> [1]. https://msdn.microsoft.com/library/windows/hardware/mt604741
> [2]. https://www.jedec.org/sites/default/files/docs/JESD245A.pdf

Is there a public version of the JEDEC spec?  When I go there, I get
a login screen.  If it's not public, then perhaps you can extract whatever
the  interesting parts are.  Or is the reference even needed if the
MSFT DSM spec describes the fields you're exposing?
> 
> Cc: Stuart Hayes <Stuart.Hayes@dell.com>
> Signed-off-by: Lijun Pan <Lijun.Pan@dell.com>
> ---
> v2:
>  - v2 combines v1's 1/2 and 2/2
>  - reuse the existing libndctl abstraction (suggested by Dan)
>  - move the MSFT _DSM code under ndctl/lib/
>  - the closet common field we can have between MSFT _DSM and smart is
>    health_state, temperature_celsius, and life_used_percentage.
> 
> 
>  ndctl/lib/Makefile.am        |   1 +
>  ndctl/lib/libndctl-msft.c    | 143 +++++++++++++++++++++++++++++++++++++++++++
>  ndctl/lib/libndctl-private.h |   4 ++
>  ndctl/lib/libndctl.c         |   2 +
>  ndctl/lib/ndctl-msft.h       |  63 +++++++++++++++++++
>  5 files changed, 213 insertions(+)
>  create mode 100644 ndctl/lib/libndctl-msft.c
>  create mode 100644 ndctl/lib/ndctl-msft.h
> 
> diff --git a/ndctl/lib/Makefile.am b/ndctl/lib/Makefile.am
> index 58a0bb3..7a446be 100644
> --- a/ndctl/lib/Makefile.am
> +++ b/ndctl/lib/Makefile.am
> @@ -32,6 +32,7 @@ endif
>  if ENABLE_SMART
>  libndctl_la_SOURCES += libndctl-smart.c
>  libndctl_la_SOURCES += libndctl-hpe1.c
> +libndctl_la_SOURCES += libndctl-msft.c
>  endif
>  
>  EXTRA_DIST += libndctl.sym
> diff --git a/ndctl/lib/libndctl-msft.c b/ndctl/lib/libndctl-msft.c
> new file mode 100644
> index 0000000..868e5c0
> --- /dev/null
> +++ b/ndctl/lib/libndctl-msft.c
> @@ -0,0 +1,143 @@
> +/*
> + * Copyright (C) 2016-2017 Dell, Inc.
> + * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
> + * Copyright (c) 2016, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU Lesser General Public License,
> + * version 2.1, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT ANY
> + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> + * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
> + * more details.
> + */
> +#include <stdlib.h>
> +#include <limits.h>
> +#include <util/log.h>
> +#include <ndctl/libndctl.h>
> +#include "libndctl-private.h"
> +#include "ndctl-msft.h"
> +
> +#define CMD_MSFT(_c) ((_c)->msft)
> +#define CMD_MSFT_SMART(_c) (CMD_MSFT(_c)->u.smart.data)
> +
> +static struct ndctl_cmd *msft_dimm_cmd_new_smart(struct ndctl_dimm *dimm)
> +{
> +	struct ndctl_bus *bus = ndctl_dimm_get_bus(dimm);
> +	struct ndctl_ctx *ctx = ndctl_bus_get_ctx(bus);
> +	struct ndctl_cmd *cmd;
> +	size_t size;
> +	struct ndn_pkg_msft *msft;
> +
> +	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL)) {
> +		dbg(ctx, "unsupported cmd\n");
> +		return NULL;
> +	}
> +
> +	size = sizeof(*cmd) + sizeof(struct ndn_pkg_msft);
> +	cmd = calloc(1, size);
> +	if (!cmd)
> +		return NULL;
> +
> +	cmd->dimm = dimm;
> +	ndctl_cmd_ref(cmd);
> +	cmd->type = ND_CMD_CALL;
> +	cmd->size = size;
> +	cmd->status = 1;
> +
> +	msft = CMD_MSFT(cmd);
> +	msft->gen.nd_family = NVDIMM_FAMILY_MSFT;
> +	msft->gen.nd_command = NDN_MSFT_CMD_SMART;
> +	msft->gen.nd_fw_size = 0;
> +	msft->gen.nd_size_in = offsetof(struct ndn_msft_smart, status);
> +	msft->gen.nd_size_out = sizeof(msft->u.smart);
> +	msft->u.smart.status = 0;
> +
> +	cmd->firmware_status = &msft->u.smart.status;
> +
> +	return cmd;
> +}
> +
> +static int msft_smart_valid(struct ndctl_cmd *cmd)
> +{
> +	if (cmd->type != ND_CMD_CALL ||
> +	    cmd->size != sizeof(*cmd) + sizeof(struct ndn_pkg_msft) ||
> +	    CMD_MSFT(cmd)->gen.nd_family != NVDIMM_FAMILY_MSFT ||
> +	    CMD_MSFT(cmd)->gen.nd_command != NDN_MSFT_CMD_SMART ||
> +	    cmd->status != 0)
> +		return cmd->status < 0 ? cmd->status : -EINVAL;
> +	return 0;
> +}
> +
> +static unsigned int msft_cmd_smart_get_flags(struct ndctl_cmd *cmd)
> +{
> +	if (msft_smart_valid(cmd) < 0)
> +		return UINT_MAX;
> +
> +	/* below health data can be retrieved via MSFT _DSM function 11 */
> +	return NDN_MSFT_SMART_HEALTH_VALID |
> +		NDN_MSFT_SMART_TEMP_VALID |
> +		NDN_MSFT_SMART_USED_VALID;
> +}
> +
> +static unsigned int num_set_bit_health(__u16 num)
> +{
> +	int i;
> +	__u16 n = num & 0x7FFF;
> +	unsigned int count = 0;
> +
> +	for (i = 0; i < 15; i++)
> +		if (!!(n & (1 << i)))
> +			count++;
> +
> +	return count;
> +}
> +
> +static unsigned int msft_cmd_smart_get_health(struct ndctl_cmd *cmd)
> +{
> +	unsigned int health;
> +	unsigned int num;
> +
> +	if (msft_smart_valid(cmd) < 0)
> +		return UINT_MAX;
> +
> +	num = num_set_bit_health(CMD_MSFT_SMART(cmd)->health);
> +	if (num == 0)
> +		health = 0;
> +	else if (num < 2)
> +		health = ND_SMART_NON_CRITICAL_HEALTH;
> +	else if (num < 3)
> +		health = ND_SMART_CRITICAL_HEALTH;
> +	else
> +		health = ND_SMART_FATAL_HEALTH;
> +
> +	return health;
> +}
> +
> +static unsigned int msft_cmd_smart_get_temperature(struct ndctl_cmd *cmd)
> +{
> +	if (msft_smart_valid(cmd) < 0)
> +		return UINT_MAX;
> +	/*
> +	 * refer to JESD245 spec section 7.8 to calculate the temperature
> +	 * then convert to 1/16 Celsius granularity.
> +	 */

I'm confused by this comment because the Microsoft DSM spec says this value
is in degrees C.  Regardless of what the JEDAC spec says, wouldn't the
platform FW convert to degrees C?  And then this function needs to convert
to 1/16th of a degree?

> +	return CMD_MSFT_SMART(cmd)->temp & 0x0FFC;

> +}
> +
> +static unsigned int msft_cmd_smart_get_life_used(struct ndctl_cmd *cmd)
> +{
> +	if (msft_smart_valid(cmd) < 0)
> +		return UINT_MAX;
> +
> +	return 100 - CMD_MSFT_SMART(cmd)->nvm_lifetime;
> +}
> +
> +struct ndctl_smart_ops * const msft_smart_ops = &(struct ndctl_smart_ops) {
> +	.new_smart = msft_dimm_cmd_new_smart,
> +	.smart_get_flags = msft_cmd_smart_get_flags,
> +	.smart_get_health = msft_cmd_smart_get_health,
> +	.smart_get_temperature = msft_cmd_smart_get_temperature,
> +	.smart_get_life_used = msft_cmd_smart_get_life_used,
> +};
> diff --git a/ndctl/lib/libndctl-private.h b/ndctl/lib/libndctl-private.h
> index 3e67db0..8f10fbc 100644
> --- a/ndctl/lib/libndctl-private.h
> +++ b/ndctl/lib/libndctl-private.h
> @@ -32,6 +32,7 @@
>  #include <ccan/endian/endian.h>
>  #include <ccan/short_types/short_types.h>
>  #include "ndctl-hpe1.h"
> +#include "ndctl-msft.h"
>  
>  #define SZ_16M 0x01000000
>  
> @@ -196,6 +197,7 @@ struct ndctl_cmd {
>  		struct nd_cmd_clear_error clear_err[0];
>  #endif
>  		struct ndn_pkg_hpe1 hpe1[0];
> +		struct ndn_pkg_msft msft[0];
>  		struct nd_cmd_smart smart[0];
>  		struct nd_cmd_smart_threshold smart_t[0];
>  		struct nd_cmd_get_config_size get_size[0];
> @@ -226,9 +228,11 @@ struct ndctl_smart_ops {
>  #if HAS_SMART == 1
>  struct ndctl_smart_ops * const intel_smart_ops;
>  struct ndctl_smart_ops * const hpe1_smart_ops;
> +struct ndctl_smart_ops * const msft_smart_ops;
>  #else
>  static struct ndctl_smart_ops * const intel_smart_ops = NULL;
>  static struct ndctl_smart_ops * const hpe1_smart_ops = NULL;
> +static struct ndctl_smart_ops * const msft_smart_ops = NULL;
>  #endif
>  
>  /* internal library helpers for conditionally defined command numbers */
> diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
> index 565c969..e5e027a 100644
> --- a/ndctl/lib/libndctl.c
> +++ b/ndctl/lib/libndctl.c
> @@ -1254,6 +1254,8 @@ static void *add_dimm(void *parent, int id, const char *dimm_base)
>  		dimm->dsm_family = strtoul(buf, NULL, 0);
>  	if (dimm->dsm_family == NVDIMM_FAMILY_HPE1)
>  		dimm->smart_ops = hpe1_smart_ops;
> +	if (dimm->dsm_family == NVDIMM_FAMILY_MSFT)
> +		dimm->smart_ops = msft_smart_ops;
>  
>  	dimm->formats = formats;
>  	sprintf(path, "%s/nfit/format", dimm_base);
> diff --git a/ndctl/lib/ndctl-msft.h b/ndctl/lib/ndctl-msft.h
> new file mode 100644
> index 0000000..0a1c7c6
> --- /dev/null
> +++ b/ndctl/lib/ndctl-msft.h
> @@ -0,0 +1,63 @@
> +/*
> + * Copyright (C) 2016-2017 Dell, Inc.
> + * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
> + * Copyright (c) 2014-2015, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU Lesser General Public License,
> + * version 2.1, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT ANY
> + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> + * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
> + * more details.
> + */
> +#ifndef __NDCTL_MSFT_H__
> +#define __NDCTL_MSFT_H__
> +
> +enum {
> +	NDN_MSFT_CMD_QUERY = 0,
> +
> +	/* non-root commands */
> +	NDN_MSFT_CMD_SMART = 11,
> +};
> +
> +/* NDN_MSFT_CMD_SMART */
> +#define NDN_MSFT_SMART_HEALTH_VALID	ND_SMART_HEALTH_VALID
> +#define NDN_MSFT_SMART_TEMP_VALID	ND_SMART_TEMP_VALID
> +#define NDN_MSFT_SMART_USED_VALID	ND_SMART_USED_VALID
> +
> +/*
> + * This is actually function 11 data,
> + * This is the closest I can find to match smart
> + * Microsoft _DSM does not have smart function
> + */
> +struct ndn_msft_smart_data {
> +	__u16	health;
> +	__u16	temp;
> +	__u8	err_thresh_stat;
> +	__u8	warn_thresh_stat;
> +	__u8	nvm_lifetime;
> +	__u8	count_dram_uncorr_err;
> +	__u8	count_dram_corr_err;
> +} __attribute__((packed));
> +
> +struct ndn_msft_smart {
> +	__u32	status;
> +	union {
> +		__u8 buf[9];
> +		struct ndn_msft_smart_data data[0];
> +	};
> +} __attribute__((packed));
> +
> +union ndn_msft_cmd {
> +	__u32			query;
> +	struct ndn_msft_smart	smart;
> +} __attribute__((packed));
> +
> +struct ndn_pkg_msft {
> +	struct nd_cmd_pkg	gen;
> +	union ndn_msft_cmd	u;
> +} __attribute__((packed));
> +
> +#endif /* __NDCTL_MSFT_H__ */
>
Lijun.Pan@dell.com March 28, 2017, 11:14 p.m. UTC | #2
Dell - Internal Use - Confidential  



> -----Original Message-----
> From: Linda Knippers [mailto:linda.knippers@hpe.com]
> Sent: Tuesday, March 28, 2017 5:02 PM
> To: Lijun Pan <lijunpan2000@gmail.com>; linux-nvdimm@lists.01.org
> Cc: Pan, Lijun <Lijun_Pan@Dell.com>; Hayes, Stuart
> <Stuart_Hayes@Dell.com>
> Subject: Re: [PATCH v2] libndctl: add support for the MSFT family of DSM
> functions
> 
> [EXTERNAL EMAIL]
> 
> On 03/26/2017 04:17 PM, Lijun Pan wrote:
> > From: Lijun Pan <Lijun.Pan@dell.com>
> >
> > This patch retrieves the health data from NVDIMM-N via
> > the MSFT _DSM function[1], following JESD245A[2] standards.
> > Now 'ndctl list --dimms --health --idle' could work
> > on MSFT type NVDIMM-N, but limited to health_state,
> > temperature_celsius, and life_used_percentage.
> >
> > [1]. https://msdn.microsoft.com/library/windows/hardware/mt604741
> > [2]. https://www.jedec.org/sites/default/files/docs/JESD245A.pdf
> 
> Is there a public version of the JEDEC spec?  When I go there, I get
> a login screen.  If it's not public, then perhaps you can extract whatever
> the  interesting parts are.  Or is the reference even needed if the
> MSFT DSM spec describes the fields you're exposing?

You need to login to download a free version. You need a JEDEC spec to understand
the meaning of the fields in MSFT DSM spec.

Lijun

> >
> > Cc: Stuart Hayes <Stuart.Hayes@dell.com>
> > Signed-off-by: Lijun Pan <Lijun.Pan@dell.com>
> > ---
> > v2:
> >  - v2 combines v1's 1/2 and 2/2
> >  - reuse the existing libndctl abstraction (suggested by Dan)
> >  - move the MSFT _DSM code under ndctl/lib/
> >  - the closet common field we can have between MSFT _DSM and smart is
> >    health_state, temperature_celsius, and life_used_percentage.
> >
> >
> >  ndctl/lib/Makefile.am        |   1 +
> >  ndctl/lib/libndctl-msft.c    | 143
> +++++++++++++++++++++++++++++++++++++++++++
> >  ndctl/lib/libndctl-private.h |   4 ++
> >  ndctl/lib/libndctl.c         |   2 +
> >  ndctl/lib/ndctl-msft.h       |  63 +++++++++++++++++++
> >  5 files changed, 213 insertions(+)
> >  create mode 100644 ndctl/lib/libndctl-msft.c
> >  create mode 100644 ndctl/lib/ndctl-msft.h
> >
> > diff --git a/ndctl/lib/Makefile.am b/ndctl/lib/Makefile.am
> > index 58a0bb3..7a446be 100644
> > --- a/ndctl/lib/Makefile.am
> > +++ b/ndctl/lib/Makefile.am
> > @@ -32,6 +32,7 @@ endif
> >  if ENABLE_SMART
> >  libndctl_la_SOURCES += libndctl-smart.c
> >  libndctl_la_SOURCES += libndctl-hpe1.c
> > +libndctl_la_SOURCES += libndctl-msft.c
> >  endif
> >
> >  EXTRA_DIST += libndctl.sym
> > diff --git a/ndctl/lib/libndctl-msft.c b/ndctl/lib/libndctl-msft.c
> > new file mode 100644
> > index 0000000..868e5c0
> > --- /dev/null
> > +++ b/ndctl/lib/libndctl-msft.c
> > @@ -0,0 +1,143 @@
> > +/*
> > + * Copyright (C) 2016-2017 Dell, Inc.
> > + * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
> > + * Copyright (c) 2016, Intel Corporation.
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms and conditions of the GNU Lesser General Public
> License,
> > + * version 2.1, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but WITHOUT
> ANY
> > + * WARRANTY; without even the implied warranty of MERCHANTABILITY
> or FITNESS
> > + * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
> License for
> > + * more details.
> > + */
> > +#include <stdlib.h>
> > +#include <limits.h>
> > +#include <util/log.h>
> > +#include <ndctl/libndctl.h>
> > +#include "libndctl-private.h"
> > +#include "ndctl-msft.h"
> > +
> > +#define CMD_MSFT(_c) ((_c)->msft)
> > +#define CMD_MSFT_SMART(_c) (CMD_MSFT(_c)->u.smart.data)
> > +
> > +static struct ndctl_cmd *msft_dimm_cmd_new_smart(struct ndctl_dimm
> *dimm)
> > +{
> > +	struct ndctl_bus *bus = ndctl_dimm_get_bus(dimm);
> > +	struct ndctl_ctx *ctx = ndctl_bus_get_ctx(bus);
> > +	struct ndctl_cmd *cmd;
> > +	size_t size;
> > +	struct ndn_pkg_msft *msft;
> > +
> > +	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL)) {
> > +		dbg(ctx, "unsupported cmd\n");
> > +		return NULL;
> > +	}
> > +
> > +	size = sizeof(*cmd) + sizeof(struct ndn_pkg_msft);
> > +	cmd = calloc(1, size);
> > +	if (!cmd)
> > +		return NULL;
> > +
> > +	cmd->dimm = dimm;
> > +	ndctl_cmd_ref(cmd);
> > +	cmd->type = ND_CMD_CALL;
> > +	cmd->size = size;
> > +	cmd->status = 1;
> > +
> > +	msft = CMD_MSFT(cmd);
> > +	msft->gen.nd_family = NVDIMM_FAMILY_MSFT;
> > +	msft->gen.nd_command = NDN_MSFT_CMD_SMART;
> > +	msft->gen.nd_fw_size = 0;
> > +	msft->gen.nd_size_in = offsetof(struct ndn_msft_smart, status);
> > +	msft->gen.nd_size_out = sizeof(msft->u.smart);
> > +	msft->u.smart.status = 0;
> > +
> > +	cmd->firmware_status = &msft->u.smart.status;
> > +
> > +	return cmd;
> > +}
> > +
> > +static int msft_smart_valid(struct ndctl_cmd *cmd)
> > +{
> > +	if (cmd->type != ND_CMD_CALL ||
> > +	    cmd->size != sizeof(*cmd) + sizeof(struct ndn_pkg_msft) ||
> > +	    CMD_MSFT(cmd)->gen.nd_family != NVDIMM_FAMILY_MSFT ||
> > +	    CMD_MSFT(cmd)->gen.nd_command !=
> NDN_MSFT_CMD_SMART ||
> > +	    cmd->status != 0)
> > +		return cmd->status < 0 ? cmd->status : -EINVAL;
> > +	return 0;
> > +}
> > +
> > +static unsigned int msft_cmd_smart_get_flags(struct ndctl_cmd *cmd)
> > +{
> > +	if (msft_smart_valid(cmd) < 0)
> > +		return UINT_MAX;
> > +
> > +	/* below health data can be retrieved via MSFT _DSM function 11 */
> > +	return NDN_MSFT_SMART_HEALTH_VALID |
> > +		NDN_MSFT_SMART_TEMP_VALID |
> > +		NDN_MSFT_SMART_USED_VALID;
> > +}
> > +
> > +static unsigned int num_set_bit_health(__u16 num)
> > +{
> > +	int i;
> > +	__u16 n = num & 0x7FFF;
> > +	unsigned int count = 0;
> > +
> > +	for (i = 0; i < 15; i++)
> > +		if (!!(n & (1 << i)))
> > +			count++;
> > +
> > +	return count;
> > +}
> > +
> > +static unsigned int msft_cmd_smart_get_health(struct ndctl_cmd *cmd)
> > +{
> > +	unsigned int health;
> > +	unsigned int num;
> > +
> > +	if (msft_smart_valid(cmd) < 0)
> > +		return UINT_MAX;
> > +
> > +	num = num_set_bit_health(CMD_MSFT_SMART(cmd)->health);
> > +	if (num == 0)
> > +		health = 0;
> > +	else if (num < 2)
> > +		health = ND_SMART_NON_CRITICAL_HEALTH;
> > +	else if (num < 3)
> > +		health = ND_SMART_CRITICAL_HEALTH;
> > +	else
> > +		health = ND_SMART_FATAL_HEALTH;
> > +
> > +	return health;
> > +}
> > +
> > +static unsigned int msft_cmd_smart_get_temperature(struct ndctl_cmd
> *cmd)
> > +{
> > +	if (msft_smart_valid(cmd) < 0)
> > +		return UINT_MAX;
> > +	/*
> > +	 * refer to JESD245 spec section 7.8 to calculate the temperature
> > +	 * then convert to 1/16 Celsius granularity.
> > +	 */
> 
> I'm confused by this comment because the Microsoft DSM spec says this
> value
> is in degrees C.  Regardless of what the JEDAC spec says, wouldn't the
> platform FW convert to degrees C?  And then this function needs to convert
> to 1/16th of a degree?
> 
> > +	return CMD_MSFT_SMART(cmd)->temp & 0x0FFC;
> 
> > +}
> > +
> > +static unsigned int msft_cmd_smart_get_life_used(struct ndctl_cmd
> *cmd)
> > +{
> > +	if (msft_smart_valid(cmd) < 0)
> > +		return UINT_MAX;
> > +
> > +	return 100 - CMD_MSFT_SMART(cmd)->nvm_lifetime;
> > +}
> > +
> > +struct ndctl_smart_ops * const msft_smart_ops = &(struct
> ndctl_smart_ops) {
> > +	.new_smart = msft_dimm_cmd_new_smart,
> > +	.smart_get_flags = msft_cmd_smart_get_flags,
> > +	.smart_get_health = msft_cmd_smart_get_health,
> > +	.smart_get_temperature = msft_cmd_smart_get_temperature,
> > +	.smart_get_life_used = msft_cmd_smart_get_life_used,
> > +};
> > diff --git a/ndctl/lib/libndctl-private.h b/ndctl/lib/libndctl-private.h
> > index 3e67db0..8f10fbc 100644
> > --- a/ndctl/lib/libndctl-private.h
> > +++ b/ndctl/lib/libndctl-private.h
> > @@ -32,6 +32,7 @@
> >  #include <ccan/endian/endian.h>
> >  #include <ccan/short_types/short_types.h>
> >  #include "ndctl-hpe1.h"
> > +#include "ndctl-msft.h"
> >
> >  #define SZ_16M 0x01000000
> >
> > @@ -196,6 +197,7 @@ struct ndctl_cmd {
> >  		struct nd_cmd_clear_error clear_err[0];
> >  #endif
> >  		struct ndn_pkg_hpe1 hpe1[0];
> > +		struct ndn_pkg_msft msft[0];
> >  		struct nd_cmd_smart smart[0];
> >  		struct nd_cmd_smart_threshold smart_t[0];
> >  		struct nd_cmd_get_config_size get_size[0];
> > @@ -226,9 +228,11 @@ struct ndctl_smart_ops {
> >  #if HAS_SMART == 1
> >  struct ndctl_smart_ops * const intel_smart_ops;
> >  struct ndctl_smart_ops * const hpe1_smart_ops;
> > +struct ndctl_smart_ops * const msft_smart_ops;
> >  #else
> >  static struct ndctl_smart_ops * const intel_smart_ops = NULL;
> >  static struct ndctl_smart_ops * const hpe1_smart_ops = NULL;
> > +static struct ndctl_smart_ops * const msft_smart_ops = NULL;
> >  #endif
> >
> >  /* internal library helpers for conditionally defined command numbers */
> > diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
> > index 565c969..e5e027a 100644
> > --- a/ndctl/lib/libndctl.c
> > +++ b/ndctl/lib/libndctl.c
> > @@ -1254,6 +1254,8 @@ static void *add_dimm(void *parent, int id, const
> char *dimm_base)
> >  		dimm->dsm_family = strtoul(buf, NULL, 0);
> >  	if (dimm->dsm_family == NVDIMM_FAMILY_HPE1)
> >  		dimm->smart_ops = hpe1_smart_ops;
> > +	if (dimm->dsm_family == NVDIMM_FAMILY_MSFT)
> > +		dimm->smart_ops = msft_smart_ops;
> >
> >  	dimm->formats = formats;
> >  	sprintf(path, "%s/nfit/format", dimm_base);
> > diff --git a/ndctl/lib/ndctl-msft.h b/ndctl/lib/ndctl-msft.h
> > new file mode 100644
> > index 0000000..0a1c7c6
> > --- /dev/null
> > +++ b/ndctl/lib/ndctl-msft.h
> > @@ -0,0 +1,63 @@
> > +/*
> > + * Copyright (C) 2016-2017 Dell, Inc.
> > + * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
> > + * Copyright (c) 2014-2015, Intel Corporation.
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms and conditions of the GNU Lesser General Public
> License,
> > + * version 2.1, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but WITHOUT
> ANY
> > + * WARRANTY; without even the implied warranty of MERCHANTABILITY
> or FITNESS
> > + * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
> License for
> > + * more details.
> > + */
> > +#ifndef __NDCTL_MSFT_H__
> > +#define __NDCTL_MSFT_H__
> > +
> > +enum {
> > +	NDN_MSFT_CMD_QUERY = 0,
> > +
> > +	/* non-root commands */
> > +	NDN_MSFT_CMD_SMART = 11,
> > +};
> > +
> > +/* NDN_MSFT_CMD_SMART */
> > +#define NDN_MSFT_SMART_HEALTH_VALID
> 	ND_SMART_HEALTH_VALID
> > +#define NDN_MSFT_SMART_TEMP_VALID	ND_SMART_TEMP_VALID
> > +#define NDN_MSFT_SMART_USED_VALID	ND_SMART_USED_VALID
> > +
> > +/*
> > + * This is actually function 11 data,
> > + * This is the closest I can find to match smart
> > + * Microsoft _DSM does not have smart function
> > + */
> > +struct ndn_msft_smart_data {
> > +	__u16	health;
> > +	__u16	temp;
> > +	__u8	err_thresh_stat;
> > +	__u8	warn_thresh_stat;
> > +	__u8	nvm_lifetime;
> > +	__u8	count_dram_uncorr_err;
> > +	__u8	count_dram_corr_err;
> > +} __attribute__((packed));
> > +
> > +struct ndn_msft_smart {
> > +	__u32	status;
> > +	union {
> > +		__u8 buf[9];
> > +		struct ndn_msft_smart_data data[0];
> > +	};
> > +} __attribute__((packed));
> > +
> > +union ndn_msft_cmd {
> > +	__u32			query;
> > +	struct ndn_msft_smart	smart;
> > +} __attribute__((packed));
> > +
> > +struct ndn_pkg_msft {
> > +	struct nd_cmd_pkg	gen;
> > +	union ndn_msft_cmd	u;
> > +} __attribute__((packed));
> > +
> > +#endif /* __NDCTL_MSFT_H__ */
> >
>
Linda Knippers March 28, 2017, 11:37 p.m. UTC | #3
On 03/28/2017 07:14 PM, Lijun_Pan@Dell.com wrote:
> Dell - Internal Use - Confidential  

Um, but you just sent this to a public mailing list.
More below...
> 
> 
> 
>> -----Original Message-----
>> From: Linda Knippers [mailto:linda.knippers@hpe.com]
>> Sent: Tuesday, March 28, 2017 5:02 PM
>> To: Lijun Pan <lijunpan2000@gmail.com>; linux-nvdimm@lists.01.org
>> Cc: Pan, Lijun <Lijun_Pan@Dell.com>; Hayes, Stuart
>> <Stuart_Hayes@Dell.com>
>> Subject: Re: [PATCH v2] libndctl: add support for the MSFT family of DSM
>> functions
>>
>> [EXTERNAL EMAIL]
>>
>> On 03/26/2017 04:17 PM, Lijun Pan wrote:
>>> From: Lijun Pan <Lijun.Pan@dell.com>
>>>
>>> This patch retrieves the health data from NVDIMM-N via
>>> the MSFT _DSM function[1], following JESD245A[2] standards.
>>> Now 'ndctl list --dimms --health --idle' could work
>>> on MSFT type NVDIMM-N, but limited to health_state,
>>> temperature_celsius, and life_used_percentage.
>>>
>>> [1]. https://msdn.microsoft.com/library/windows/hardware/mt604741
>>> [2]. https://www.jedec.org/sites/default/files/docs/JESD245A.pdf
>>
>> Is there a public version of the JEDEC spec?  When I go there, I get
>> a login screen.  If it's not public, then perhaps you can extract whatever
>> the  interesting parts are.  Or is the reference even needed if the
>> MSFT DSM spec describes the fields you're exposing?
> 
> You need to login to download a free version. You need a JEDEC spec to understand
> the meaning of the fields in MSFT DSM spec.

I don't think I do for the fields that you're exposing.

There was another comment further down about msft_cmd_smart_get_temperature() that you missed so
keep going.


> 
> Lijun
> 
>>>
>>> Cc: Stuart Hayes <Stuart.Hayes@dell.com>
>>> Signed-off-by: Lijun Pan <Lijun.Pan@dell.com>
>>> ---
>>> v2:
>>>  - v2 combines v1's 1/2 and 2/2
>>>  - reuse the existing libndctl abstraction (suggested by Dan)
>>>  - move the MSFT _DSM code under ndctl/lib/
>>>  - the closet common field we can have between MSFT _DSM and smart is
>>>    health_state, temperature_celsius, and life_used_percentage.
>>>
>>>
>>>  ndctl/lib/Makefile.am        |   1 +
>>>  ndctl/lib/libndctl-msft.c    | 143
>> +++++++++++++++++++++++++++++++++++++++++++
>>>  ndctl/lib/libndctl-private.h |   4 ++
>>>  ndctl/lib/libndctl.c         |   2 +
>>>  ndctl/lib/ndctl-msft.h       |  63 +++++++++++++++++++
>>>  5 files changed, 213 insertions(+)
>>>  create mode 100644 ndctl/lib/libndctl-msft.c
>>>  create mode 100644 ndctl/lib/ndctl-msft.h
>>>
>>> diff --git a/ndctl/lib/Makefile.am b/ndctl/lib/Makefile.am
>>> index 58a0bb3..7a446be 100644
>>> --- a/ndctl/lib/Makefile.am
>>> +++ b/ndctl/lib/Makefile.am
>>> @@ -32,6 +32,7 @@ endif
>>>  if ENABLE_SMART
>>>  libndctl_la_SOURCES += libndctl-smart.c
>>>  libndctl_la_SOURCES += libndctl-hpe1.c
>>> +libndctl_la_SOURCES += libndctl-msft.c
>>>  endif
>>>
>>>  EXTRA_DIST += libndctl.sym
>>> diff --git a/ndctl/lib/libndctl-msft.c b/ndctl/lib/libndctl-msft.c
>>> new file mode 100644
>>> index 0000000..868e5c0
>>> --- /dev/null
>>> +++ b/ndctl/lib/libndctl-msft.c
>>> @@ -0,0 +1,143 @@
>>> +/*
>>> + * Copyright (C) 2016-2017 Dell, Inc.
>>> + * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
>>> + * Copyright (c) 2016, Intel Corporation.
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify it
>>> + * under the terms and conditions of the GNU Lesser General Public
>> License,
>>> + * version 2.1, as published by the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> ANY
>>> + * WARRANTY; without even the implied warranty of MERCHANTABILITY
>> or FITNESS
>>> + * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
>> License for
>>> + * more details.
>>> + */
>>> +#include <stdlib.h>
>>> +#include <limits.h>
>>> +#include <util/log.h>
>>> +#include <ndctl/libndctl.h>
>>> +#include "libndctl-private.h"
>>> +#include "ndctl-msft.h"
>>> +
>>> +#define CMD_MSFT(_c) ((_c)->msft)
>>> +#define CMD_MSFT_SMART(_c) (CMD_MSFT(_c)->u.smart.data)
>>> +
>>> +static struct ndctl_cmd *msft_dimm_cmd_new_smart(struct ndctl_dimm
>> *dimm)
>>> +{
>>> +	struct ndctl_bus *bus = ndctl_dimm_get_bus(dimm);
>>> +	struct ndctl_ctx *ctx = ndctl_bus_get_ctx(bus);
>>> +	struct ndctl_cmd *cmd;
>>> +	size_t size;
>>> +	struct ndn_pkg_msft *msft;
>>> +
>>> +	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL)) {
>>> +		dbg(ctx, "unsupported cmd\n");
>>> +		return NULL;
>>> +	}
>>> +
>>> +	size = sizeof(*cmd) + sizeof(struct ndn_pkg_msft);
>>> +	cmd = calloc(1, size);
>>> +	if (!cmd)
>>> +		return NULL;
>>> +
>>> +	cmd->dimm = dimm;
>>> +	ndctl_cmd_ref(cmd);
>>> +	cmd->type = ND_CMD_CALL;
>>> +	cmd->size = size;
>>> +	cmd->status = 1;
>>> +
>>> +	msft = CMD_MSFT(cmd);
>>> +	msft->gen.nd_family = NVDIMM_FAMILY_MSFT;
>>> +	msft->gen.nd_command = NDN_MSFT_CMD_SMART;
>>> +	msft->gen.nd_fw_size = 0;
>>> +	msft->gen.nd_size_in = offsetof(struct ndn_msft_smart, status);
>>> +	msft->gen.nd_size_out = sizeof(msft->u.smart);
>>> +	msft->u.smart.status = 0;
>>> +
>>> +	cmd->firmware_status = &msft->u.smart.status;
>>> +
>>> +	return cmd;
>>> +}
>>> +
>>> +static int msft_smart_valid(struct ndctl_cmd *cmd)
>>> +{
>>> +	if (cmd->type != ND_CMD_CALL ||
>>> +	    cmd->size != sizeof(*cmd) + sizeof(struct ndn_pkg_msft) ||
>>> +	    CMD_MSFT(cmd)->gen.nd_family != NVDIMM_FAMILY_MSFT ||
>>> +	    CMD_MSFT(cmd)->gen.nd_command !=
>> NDN_MSFT_CMD_SMART ||
>>> +	    cmd->status != 0)
>>> +		return cmd->status < 0 ? cmd->status : -EINVAL;
>>> +	return 0;
>>> +}
>>> +
>>> +static unsigned int msft_cmd_smart_get_flags(struct ndctl_cmd *cmd)
>>> +{
>>> +	if (msft_smart_valid(cmd) < 0)
>>> +		return UINT_MAX;
>>> +
>>> +	/* below health data can be retrieved via MSFT _DSM function 11 */
>>> +	return NDN_MSFT_SMART_HEALTH_VALID |
>>> +		NDN_MSFT_SMART_TEMP_VALID |
>>> +		NDN_MSFT_SMART_USED_VALID;
>>> +}
>>> +
>>> +static unsigned int num_set_bit_health(__u16 num)
>>> +{
>>> +	int i;
>>> +	__u16 n = num & 0x7FFF;
>>> +	unsigned int count = 0;
>>> +
>>> +	for (i = 0; i < 15; i++)
>>> +		if (!!(n & (1 << i)))
>>> +			count++;
>>> +
>>> +	return count;
>>> +}
>>> +
>>> +static unsigned int msft_cmd_smart_get_health(struct ndctl_cmd *cmd)
>>> +{
>>> +	unsigned int health;
>>> +	unsigned int num;
>>> +
>>> +	if (msft_smart_valid(cmd) < 0)
>>> +		return UINT_MAX;
>>> +
>>> +	num = num_set_bit_health(CMD_MSFT_SMART(cmd)->health);
>>> +	if (num == 0)
>>> +		health = 0;
>>> +	else if (num < 2)
>>> +		health = ND_SMART_NON_CRITICAL_HEALTH;
>>> +	else if (num < 3)
>>> +		health = ND_SMART_CRITICAL_HEALTH;
>>> +	else
>>> +		health = ND_SMART_FATAL_HEALTH;
>>> +
>>> +	return health;
>>> +}
>>> +
>>> +static unsigned int msft_cmd_smart_get_temperature(struct ndctl_cmd
>> *cmd)
>>> +{
>>> +	if (msft_smart_valid(cmd) < 0)
>>> +		return UINT_MAX;
>>> +	/*
>>> +	 * refer to JESD245 spec section 7.8 to calculate the temperature
>>> +	 * then convert to 1/16 Celsius granularity.
>>> +	 */
>>
>> I'm confused by this comment because the Microsoft DSM spec says this
>> value
>> is in degrees C.  Regardless of what the JEDAC spec says, wouldn't the
>> platform FW convert to degrees C?  And then this function needs to convert
>> to 1/16th of a degree?
>>
>>> +	return CMD_MSFT_SMART(cmd)->temp & 0x0FFC;
>>
>>> +}
>>> +
>>> +static unsigned int msft_cmd_smart_get_life_used(struct ndctl_cmd
>> *cmd)
>>> +{
>>> +	if (msft_smart_valid(cmd) < 0)
>>> +		return UINT_MAX;
>>> +
>>> +	return 100 - CMD_MSFT_SMART(cmd)->nvm_lifetime;
>>> +}
>>> +
>>> +struct ndctl_smart_ops * const msft_smart_ops = &(struct
>> ndctl_smart_ops) {
>>> +	.new_smart = msft_dimm_cmd_new_smart,
>>> +	.smart_get_flags = msft_cmd_smart_get_flags,
>>> +	.smart_get_health = msft_cmd_smart_get_health,
>>> +	.smart_get_temperature = msft_cmd_smart_get_temperature,
>>> +	.smart_get_life_used = msft_cmd_smart_get_life_used,
>>> +};
>>> diff --git a/ndctl/lib/libndctl-private.h b/ndctl/lib/libndctl-private.h
>>> index 3e67db0..8f10fbc 100644
>>> --- a/ndctl/lib/libndctl-private.h
>>> +++ b/ndctl/lib/libndctl-private.h
>>> @@ -32,6 +32,7 @@
>>>  #include <ccan/endian/endian.h>
>>>  #include <ccan/short_types/short_types.h>
>>>  #include "ndctl-hpe1.h"
>>> +#include "ndctl-msft.h"
>>>
>>>  #define SZ_16M 0x01000000
>>>
>>> @@ -196,6 +197,7 @@ struct ndctl_cmd {
>>>  		struct nd_cmd_clear_error clear_err[0];
>>>  #endif
>>>  		struct ndn_pkg_hpe1 hpe1[0];
>>> +		struct ndn_pkg_msft msft[0];
>>>  		struct nd_cmd_smart smart[0];
>>>  		struct nd_cmd_smart_threshold smart_t[0];
>>>  		struct nd_cmd_get_config_size get_size[0];
>>> @@ -226,9 +228,11 @@ struct ndctl_smart_ops {
>>>  #if HAS_SMART == 1
>>>  struct ndctl_smart_ops * const intel_smart_ops;
>>>  struct ndctl_smart_ops * const hpe1_smart_ops;
>>> +struct ndctl_smart_ops * const msft_smart_ops;
>>>  #else
>>>  static struct ndctl_smart_ops * const intel_smart_ops = NULL;
>>>  static struct ndctl_smart_ops * const hpe1_smart_ops = NULL;
>>> +static struct ndctl_smart_ops * const msft_smart_ops = NULL;
>>>  #endif
>>>
>>>  /* internal library helpers for conditionally defined command numbers */
>>> diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
>>> index 565c969..e5e027a 100644
>>> --- a/ndctl/lib/libndctl.c
>>> +++ b/ndctl/lib/libndctl.c
>>> @@ -1254,6 +1254,8 @@ static void *add_dimm(void *parent, int id, const
>> char *dimm_base)
>>>  		dimm->dsm_family = strtoul(buf, NULL, 0);
>>>  	if (dimm->dsm_family == NVDIMM_FAMILY_HPE1)
>>>  		dimm->smart_ops = hpe1_smart_ops;
>>> +	if (dimm->dsm_family == NVDIMM_FAMILY_MSFT)
>>> +		dimm->smart_ops = msft_smart_ops;
>>>
>>>  	dimm->formats = formats;
>>>  	sprintf(path, "%s/nfit/format", dimm_base);
>>> diff --git a/ndctl/lib/ndctl-msft.h b/ndctl/lib/ndctl-msft.h
>>> new file mode 100644
>>> index 0000000..0a1c7c6
>>> --- /dev/null
>>> +++ b/ndctl/lib/ndctl-msft.h
>>> @@ -0,0 +1,63 @@
>>> +/*
>>> + * Copyright (C) 2016-2017 Dell, Inc.
>>> + * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
>>> + * Copyright (c) 2014-2015, Intel Corporation.
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify it
>>> + * under the terms and conditions of the GNU Lesser General Public
>> License,
>>> + * version 2.1, as published by the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope it will be useful, but WITHOUT
>> ANY
>>> + * WARRANTY; without even the implied warranty of MERCHANTABILITY
>> or FITNESS
>>> + * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
>> License for
>>> + * more details.
>>> + */
>>> +#ifndef __NDCTL_MSFT_H__
>>> +#define __NDCTL_MSFT_H__
>>> +
>>> +enum {
>>> +	NDN_MSFT_CMD_QUERY = 0,
>>> +
>>> +	/* non-root commands */
>>> +	NDN_MSFT_CMD_SMART = 11,
>>> +};
>>> +
>>> +/* NDN_MSFT_CMD_SMART */
>>> +#define NDN_MSFT_SMART_HEALTH_VALID
>> 	ND_SMART_HEALTH_VALID
>>> +#define NDN_MSFT_SMART_TEMP_VALID	ND_SMART_TEMP_VALID
>>> +#define NDN_MSFT_SMART_USED_VALID	ND_SMART_USED_VALID
>>> +
>>> +/*
>>> + * This is actually function 11 data,
>>> + * This is the closest I can find to match smart
>>> + * Microsoft _DSM does not have smart function
>>> + */
>>> +struct ndn_msft_smart_data {
>>> +	__u16	health;
>>> +	__u16	temp;
>>> +	__u8	err_thresh_stat;
>>> +	__u8	warn_thresh_stat;
>>> +	__u8	nvm_lifetime;
>>> +	__u8	count_dram_uncorr_err;
>>> +	__u8	count_dram_corr_err;
>>> +} __attribute__((packed));
>>> +
>>> +struct ndn_msft_smart {
>>> +	__u32	status;
>>> +	union {
>>> +		__u8 buf[9];
>>> +		struct ndn_msft_smart_data data[0];
>>> +	};
>>> +} __attribute__((packed));
>>> +
>>> +union ndn_msft_cmd {
>>> +	__u32			query;
>>> +	struct ndn_msft_smart	smart;
>>> +} __attribute__((packed));
>>> +
>>> +struct ndn_pkg_msft {
>>> +	struct nd_cmd_pkg	gen;
>>> +	union ndn_msft_cmd	u;
>>> +} __attribute__((packed));
>>> +
>>> +#endif /* __NDCTL_MSFT_H__ */
>>>
>>
>
Lijun.Pan@dell.com March 29, 2017, 3:08 p.m. UTC | #4
Dell - Internal Use - Confidential  



> -----Original Message-----
> From: Linda Knippers [mailto:linda.knippers@hpe.com]
> Sent: Tuesday, March 28, 2017 5:02 PM
> To: Lijun Pan <lijunpan2000@gmail.com>; linux-nvdimm@lists.01.org
> Cc: Pan, Lijun <Lijun_Pan@Dell.com>; Hayes, Stuart
> <Stuart_Hayes@Dell.com>
> Subject: Re: [PATCH v2] libndctl: add support for the MSFT family of DSM
> functions
> 
> [EXTERNAL EMAIL]
> 
> On 03/26/2017 04:17 PM, Lijun Pan wrote:
> > From: Lijun Pan <Lijun.Pan@dell.com>
> >
> > This patch retrieves the health data from NVDIMM-N via
> > the MSFT _DSM function[1], following JESD245A[2] standards.
> > Now 'ndctl list --dimms --health --idle' could work
> > on MSFT type NVDIMM-N, but limited to health_state,
> > temperature_celsius, and life_used_percentage.
> >
> > [1]. https://msdn.microsoft.com/library/windows/hardware/mt604741
> > [2]. https://www.jedec.org/sites/default/files/docs/JESD245A.pdf
> 
> Is there a public version of the JEDEC spec?  When I go there, I get
> a login screen.  If it's not public, then perhaps you can extract whatever
> the  interesting parts are.  Or is the reference even needed if the
> MSFT DSM spec describes the fields you're exposing?
> >
> > Cc: Stuart Hayes <Stuart.Hayes@dell.com>
> > Signed-off-by: Lijun Pan <Lijun.Pan@dell.com>
> > ---
> > v2:
> >  - v2 combines v1's 1/2 and 2/2
> >  - reuse the existing libndctl abstraction (suggested by Dan)
> >  - move the MSFT _DSM code under ndctl/lib/
> >  - the closet common field we can have between MSFT _DSM and smart is
> >    health_state, temperature_celsius, and life_used_percentage.
> >
> >
> >  ndctl/lib/Makefile.am        |   1 +
> >  ndctl/lib/libndctl-msft.c    | 143
> +++++++++++++++++++++++++++++++++++++++++++
> >  ndctl/lib/libndctl-private.h |   4 ++
> >  ndctl/lib/libndctl.c         |   2 +
> >  ndctl/lib/ndctl-msft.h       |  63 +++++++++++++++++++
> >  5 files changed, 213 insertions(+)
> >  create mode 100644 ndctl/lib/libndctl-msft.c
> >  create mode 100644 ndctl/lib/ndctl-msft.h
> >
> > diff --git a/ndctl/lib/Makefile.am b/ndctl/lib/Makefile.am
> > index 58a0bb3..7a446be 100644
> > --- a/ndctl/lib/Makefile.am
> > +++ b/ndctl/lib/Makefile.am
> > @@ -32,6 +32,7 @@ endif
> >  if ENABLE_SMART
> >  libndctl_la_SOURCES += libndctl-smart.c
> >  libndctl_la_SOURCES += libndctl-hpe1.c
> > +libndctl_la_SOURCES += libndctl-msft.c
> >  endif
> >
> >  EXTRA_DIST += libndctl.sym
> > diff --git a/ndctl/lib/libndctl-msft.c b/ndctl/lib/libndctl-msft.c
> > new file mode 100644
> > index 0000000..868e5c0
> > --- /dev/null
> > +++ b/ndctl/lib/libndctl-msft.c
> > @@ -0,0 +1,143 @@
> > +/*
> > + * Copyright (C) 2016-2017 Dell, Inc.
> > + * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
> > + * Copyright (c) 2016, Intel Corporation.
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms and conditions of the GNU Lesser General Public
> License,
> > + * version 2.1, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but WITHOUT
> ANY
> > + * WARRANTY; without even the implied warranty of MERCHANTABILITY
> or FITNESS
> > + * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
> License for
> > + * more details.
> > + */
> > +#include <stdlib.h>
> > +#include <limits.h>
> > +#include <util/log.h>
> > +#include <ndctl/libndctl.h>
> > +#include "libndctl-private.h"
> > +#include "ndctl-msft.h"
> > +
> > +#define CMD_MSFT(_c) ((_c)->msft)
> > +#define CMD_MSFT_SMART(_c) (CMD_MSFT(_c)->u.smart.data)
> > +
> > +static struct ndctl_cmd *msft_dimm_cmd_new_smart(struct ndctl_dimm
> *dimm)
> > +{
> > +	struct ndctl_bus *bus = ndctl_dimm_get_bus(dimm);
> > +	struct ndctl_ctx *ctx = ndctl_bus_get_ctx(bus);
> > +	struct ndctl_cmd *cmd;
> > +	size_t size;
> > +	struct ndn_pkg_msft *msft;
> > +
> > +	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL)) {
> > +		dbg(ctx, "unsupported cmd\n");
> > +		return NULL;
> > +	}
> > +
> > +	size = sizeof(*cmd) + sizeof(struct ndn_pkg_msft);
> > +	cmd = calloc(1, size);
> > +	if (!cmd)
> > +		return NULL;
> > +
> > +	cmd->dimm = dimm;
> > +	ndctl_cmd_ref(cmd);
> > +	cmd->type = ND_CMD_CALL;
> > +	cmd->size = size;
> > +	cmd->status = 1;
> > +
> > +	msft = CMD_MSFT(cmd);
> > +	msft->gen.nd_family = NVDIMM_FAMILY_MSFT;
> > +	msft->gen.nd_command = NDN_MSFT_CMD_SMART;
> > +	msft->gen.nd_fw_size = 0;
> > +	msft->gen.nd_size_in = offsetof(struct ndn_msft_smart, status);
> > +	msft->gen.nd_size_out = sizeof(msft->u.smart);
> > +	msft->u.smart.status = 0;
> > +
> > +	cmd->firmware_status = &msft->u.smart.status;
> > +
> > +	return cmd;
> > +}
> > +
> > +static int msft_smart_valid(struct ndctl_cmd *cmd)
> > +{
> > +	if (cmd->type != ND_CMD_CALL ||
> > +	    cmd->size != sizeof(*cmd) + sizeof(struct ndn_pkg_msft) ||
> > +	    CMD_MSFT(cmd)->gen.nd_family != NVDIMM_FAMILY_MSFT ||
> > +	    CMD_MSFT(cmd)->gen.nd_command !=
> NDN_MSFT_CMD_SMART ||
> > +	    cmd->status != 0)
> > +		return cmd->status < 0 ? cmd->status : -EINVAL;
> > +	return 0;
> > +}
> > +
> > +static unsigned int msft_cmd_smart_get_flags(struct ndctl_cmd *cmd)
> > +{
> > +	if (msft_smart_valid(cmd) < 0)
> > +		return UINT_MAX;
> > +
> > +	/* below health data can be retrieved via MSFT _DSM function 11 */
> > +	return NDN_MSFT_SMART_HEALTH_VALID |
> > +		NDN_MSFT_SMART_TEMP_VALID |
> > +		NDN_MSFT_SMART_USED_VALID;
> > +}
> > +
> > +static unsigned int num_set_bit_health(__u16 num)
> > +{
> > +	int i;
> > +	__u16 n = num & 0x7FFF;
> > +	unsigned int count = 0;
> > +
> > +	for (i = 0; i < 15; i++)
> > +		if (!!(n & (1 << i)))
> > +			count++;
> > +
> > +	return count;
> > +}
> > +
> > +static unsigned int msft_cmd_smart_get_health(struct ndctl_cmd *cmd)
> > +{
> > +	unsigned int health;
> > +	unsigned int num;
> > +
> > +	if (msft_smart_valid(cmd) < 0)
> > +		return UINT_MAX;
> > +
> > +	num = num_set_bit_health(CMD_MSFT_SMART(cmd)->health);
> > +	if (num == 0)
> > +		health = 0;
> > +	else if (num < 2)
> > +		health = ND_SMART_NON_CRITICAL_HEALTH;
> > +	else if (num < 3)
> > +		health = ND_SMART_CRITICAL_HEALTH;
> > +	else
> > +		health = ND_SMART_FATAL_HEALTH;
> > +
> > +	return health;
> > +}
> > +
> > +static unsigned int msft_cmd_smart_get_temperature(struct ndctl_cmd
> *cmd)
> > +{
> > +	if (msft_smart_valid(cmd) < 0)
> > +		return UINT_MAX;
> > +	/*
> > +	 * refer to JESD245 spec section 7.8 to calculate the temperature
> > +	 * then convert to 1/16 Celsius granularity.
> > +	 */
> 
> I'm confused by this comment because the Microsoft DSM spec says this
> value
> is in degrees C.  Regardless of what the JEDAC spec says, wouldn't the
> platform FW convert to degrees C?  And then this function needs to convert
> to 1/16th of a degree?

MSFT DSM function 11 has a field called "Module Current Temperature".
JESD245.pdf section 7.8 p. 29 table 3 explains the bit definition.
Bit 3 to bit 0 is 1/2 1/4 (1/8) (1/16) granularity.
&0FFC will get it correctly.
No negative temperature is defined.
 
Lijun

> 
> > +	return CMD_MSFT_SMART(cmd)->temp & 0x0FFC;
> 
> > +}
> > +
> > +static unsigned int msft_cmd_smart_get_life_used(struct ndctl_cmd
> *cmd)
> > +{
> > +	if (msft_smart_valid(cmd) < 0)
> > +		return UINT_MAX;
> > +
> > +	return 100 - CMD_MSFT_SMART(cmd)->nvm_lifetime;
> > +}
> > +
> > +struct ndctl_smart_ops * const msft_smart_ops = &(struct
> ndctl_smart_ops) {
> > +	.new_smart = msft_dimm_cmd_new_smart,
> > +	.smart_get_flags = msft_cmd_smart_get_flags,
> > +	.smart_get_health = msft_cmd_smart_get_health,
> > +	.smart_get_temperature = msft_cmd_smart_get_temperature,
> > +	.smart_get_life_used = msft_cmd_smart_get_life_used,
> > +};
> > diff --git a/ndctl/lib/libndctl-private.h b/ndctl/lib/libndctl-private.h
> > index 3e67db0..8f10fbc 100644
> > --- a/ndctl/lib/libndctl-private.h
> > +++ b/ndctl/lib/libndctl-private.h
> > @@ -32,6 +32,7 @@
> >  #include <ccan/endian/endian.h>
> >  #include <ccan/short_types/short_types.h>
> >  #include "ndctl-hpe1.h"
> > +#include "ndctl-msft.h"
> >
> >  #define SZ_16M 0x01000000
> >
> > @@ -196,6 +197,7 @@ struct ndctl_cmd {
> >  		struct nd_cmd_clear_error clear_err[0];
> >  #endif
> >  		struct ndn_pkg_hpe1 hpe1[0];
> > +		struct ndn_pkg_msft msft[0];
> >  		struct nd_cmd_smart smart[0];
> >  		struct nd_cmd_smart_threshold smart_t[0];
> >  		struct nd_cmd_get_config_size get_size[0];
> > @@ -226,9 +228,11 @@ struct ndctl_smart_ops {
> >  #if HAS_SMART == 1
> >  struct ndctl_smart_ops * const intel_smart_ops;
> >  struct ndctl_smart_ops * const hpe1_smart_ops;
> > +struct ndctl_smart_ops * const msft_smart_ops;
> >  #else
> >  static struct ndctl_smart_ops * const intel_smart_ops = NULL;
> >  static struct ndctl_smart_ops * const hpe1_smart_ops = NULL;
> > +static struct ndctl_smart_ops * const msft_smart_ops = NULL;
> >  #endif
> >
> >  /* internal library helpers for conditionally defined command numbers */
> > diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
> > index 565c969..e5e027a 100644
> > --- a/ndctl/lib/libndctl.c
> > +++ b/ndctl/lib/libndctl.c
> > @@ -1254,6 +1254,8 @@ static void *add_dimm(void *parent, int id, const
> char *dimm_base)
> >  		dimm->dsm_family = strtoul(buf, NULL, 0);
> >  	if (dimm->dsm_family == NVDIMM_FAMILY_HPE1)
> >  		dimm->smart_ops = hpe1_smart_ops;
> > +	if (dimm->dsm_family == NVDIMM_FAMILY_MSFT)
> > +		dimm->smart_ops = msft_smart_ops;
> >
> >  	dimm->formats = formats;
> >  	sprintf(path, "%s/nfit/format", dimm_base);
> > diff --git a/ndctl/lib/ndctl-msft.h b/ndctl/lib/ndctl-msft.h
> > new file mode 100644
> > index 0000000..0a1c7c6
> > --- /dev/null
> > +++ b/ndctl/lib/ndctl-msft.h
> > @@ -0,0 +1,63 @@
> > +/*
> > + * Copyright (C) 2016-2017 Dell, Inc.
> > + * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
> > + * Copyright (c) 2014-2015, Intel Corporation.
> > + *
> > + * This program is free software; you can redistribute it and/or modify it
> > + * under the terms and conditions of the GNU Lesser General Public
> License,
> > + * version 2.1, as published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope it will be useful, but WITHOUT
> ANY
> > + * WARRANTY; without even the implied warranty of MERCHANTABILITY
> or FITNESS
> > + * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
> License for
> > + * more details.
> > + */
> > +#ifndef __NDCTL_MSFT_H__
> > +#define __NDCTL_MSFT_H__
> > +
> > +enum {
> > +	NDN_MSFT_CMD_QUERY = 0,
> > +
> > +	/* non-root commands */
> > +	NDN_MSFT_CMD_SMART = 11,
> > +};
> > +
> > +/* NDN_MSFT_CMD_SMART */
> > +#define NDN_MSFT_SMART_HEALTH_VALID
> 	ND_SMART_HEALTH_VALID
> > +#define NDN_MSFT_SMART_TEMP_VALID	ND_SMART_TEMP_VALID
> > +#define NDN_MSFT_SMART_USED_VALID	ND_SMART_USED_VALID
> > +
> > +/*
> > + * This is actually function 11 data,
> > + * This is the closest I can find to match smart
> > + * Microsoft _DSM does not have smart function
> > + */
> > +struct ndn_msft_smart_data {
> > +	__u16	health;
> > +	__u16	temp;
> > +	__u8	err_thresh_stat;
> > +	__u8	warn_thresh_stat;
> > +	__u8	nvm_lifetime;
> > +	__u8	count_dram_uncorr_err;
> > +	__u8	count_dram_corr_err;
> > +} __attribute__((packed));
> > +
> > +struct ndn_msft_smart {
> > +	__u32	status;
> > +	union {
> > +		__u8 buf[9];
> > +		struct ndn_msft_smart_data data[0];
> > +	};
> > +} __attribute__((packed));
> > +
> > +union ndn_msft_cmd {
> > +	__u32			query;
> > +	struct ndn_msft_smart	smart;
> > +} __attribute__((packed));
> > +
> > +struct ndn_pkg_msft {
> > +	struct nd_cmd_pkg	gen;
> > +	union ndn_msft_cmd	u;
> > +} __attribute__((packed));
> > +
> > +#endif /* __NDCTL_MSFT_H__ */
> >
>
Linda Knippers March 29, 2017, 3:29 p.m. UTC | #5
On 03/29/2017 11:08 AM, Lijun_Pan@Dell.com wrote:
<snip>

>>> +static unsigned int msft_cmd_smart_get_temperature(struct ndctl_cmd
>> *cmd)
>>> +{
>>> +	if (msft_smart_valid(cmd) < 0)
>>> +		return UINT_MAX;
>>> +	/*
>>> +	 * refer to JESD245 spec section 7.8 to calculate the temperature
>>> +	 * then convert to 1/16 Celsius granularity.
>>> +	 */
>>
>> I'm confused by this comment because the Microsoft DSM spec says this
>> value
>> is in degrees C.  Regardless of what the JEDAC spec says, wouldn't the
>> platform FW convert to degrees C?  And then this function needs to convert
>> to 1/16th of a degree?
> 
> MSFT DSM function 11 has a field called "Module Current Temperature".

When I look here:
https://msdn.microsoft.com/en-us/library/windows/hardware/mt604717
at the definition of function 11, it says that Module Current Temperature
 is 2 bytes at offset 6 and that the value is "The module temperature
in degrees Celsius. The minimum value is 0".

> JESD245.pdf section 7.8 p. 29 table 3 explains the bit definition.
> Bit 3 to bit 0 is 1/2 1/4 (1/8) (1/16) granularity.
> &0FFC will get it correctly.

That might be what the BIOS had to get from the device, but the BIOS
should then be converting it to what the DSM says it returns.

And since the rest of ndctl expects temperatures in 16ths of a degree C,
your function here will need to multiply the value from the DSM by 16.

> No negative temperature is defined.
>  
> Lijun
> 
>>
>>> +	return CMD_MSFT_SMART(cmd)->temp & 0x0FFC;

return CMD_MSFT_SMART(cmd)->temp * 16;

-- ljk
Dan Williams March 29, 2017, 6:13 p.m. UTC | #6
On Sun, Mar 26, 2017 at 1:17 PM, Lijun Pan <lijunpan2000@gmail.com> wrote:
> From: Lijun Pan <Lijun.Pan@dell.com>
>
> This patch retrieves the health data from NVDIMM-N via
> the MSFT _DSM function[1], following JESD245A[2] standards.
> Now 'ndctl list --dimms --health --idle' could work
> on MSFT type NVDIMM-N, but limited to health_state,
> temperature_celsius, and life_used_percentage.

Looks good, can you add sample output of:

    ndctl list --dimms --health --idle

...so users know what to expect. With that change and addressing
Linda's comment about the temperature multiplier I think we're good to
go.

Also, if you want to add Microsoft-only health attributes we'll need
to add new "valid" flags beyond the current ND_SMART_*_VALID set. If
this goes beyond the current 32 "valid" flags that that
ndctl_cmd_smart_get_flags() returns we might need a new
ndctl_cmd_smart_get_flags2() call that returns an arbitrary bitmap of
valid flags.

We'd also need to move those definitions to an ndctl local header.
Currently where they are defined now in ndctl.h is owned by the
kernel. We can cross this bridge later in a follow-on patch.
Lijun.Pan@dell.com March 29, 2017, 9:41 p.m. UTC | #7
Dell - Internal Use - Confidential  



> -----Original Message-----
> From: Dan Williams [mailto:dan.j.williams@intel.com]
> Sent: Wednesday, March 29, 2017 1:13 PM
> To: Lijun Pan <lijunpan2000@gmail.com>
> Cc: linux-nvdimm@lists.01.org; Pan, Lijun <Lijun_Pan@Dell.com>; Hayes,
> Stuart <Stuart_Hayes@Dell.com>
> Subject: Re: [PATCH v2] libndctl: add support for the MSFT family of DSM
> functions
> 
> On Sun, Mar 26, 2017 at 1:17 PM, Lijun Pan <lijunpan2000@gmail.com> wrote:
> > From: Lijun Pan <Lijun.Pan@dell.com>
> >
> > This patch retrieves the health data from NVDIMM-N via
> > the MSFT _DSM function[1], following JESD245A[2] standards.
> > Now 'ndctl list --dimms --health --idle' could work
> > on MSFT type NVDIMM-N, but limited to health_state,
> > temperature_celsius, and life_used_percentage.
> 
> Looks good, can you add sample output of:
> 
>     ndctl list --dimms --health --idle
> 


Dan,

Do you want me to put this sample output in the v3 patch's commit message?

Output is something like,

  "health":{
    "health_state":"ok",
    "temperature_celsius":193.750000,
    "life_used_percentage":1
  }

If the BIOS returns a value say 45.75, how can it be represented in (__u16) 
instead of using a 'double' type? I think the ((__u16) temp & 0x0FFC) already 
represents a temperature in 1/16 Celsius granularity. No need to multiple it by
16 again.

Below I quote the section 7.8 of JESD245.pdf
Bit 3 has a resolution of  1/2 Celsius, 
Bit 2 has a resolution of 1/4 Celsius,
Bit 1 has 1/8 Celsius, but is reserved, so I assume it be zero,
Bit 0 has 1/16 Celsius, but is reserved, so I assume it be zero.

((__u16) temp & 0x0FFC) will only get the bit11 - bit 0. This presents the 
1/16 Celsius resolution, kind of left shifted 4 bits.
So we don't need to do
'return CMD_MSFT_SMART(cmd)->temp * 16;' again.
 
==== = quotation starts  =====
"
Temperature measurement shall have a minimum resolution of 0.25 Celsius. Registers containing measured temperature value shall be 16-bits and report temperature as a 10-bit value based on the following definition.

Table 3: Temperature value bit definition
Bit15	Bit14	Bit13	Bit12	Bit11	Bit10	Bit9	Bit8
Reserved	Reserved	Reserved	Reserved	128	64	32	16
Bit7	Bit6	Bit5	Bit4	Bit3	Bit2	Bit1	Bit0
8	4	2	1	0.5	0.25	Reserved	Reserved

Examples:

A temperature value of 10.5 Celsius is represented as 0000000010101000b.

A temperature value of 64.75 Celsius is represented as 0000010000001100b.
"

==== quotation ends ========


Lijun


> ...so users know what to expect. With that change and addressing
> Linda's comment about the temperature multiplier I think we're good to
> go.
> 
> Also, if you want to add Microsoft-only health attributes we'll need
> to add new "valid" flags beyond the current ND_SMART_*_VALID set. If
> this goes beyond the current 32 "valid" flags that that
> ndctl_cmd_smart_get_flags() returns we might need a new
> ndctl_cmd_smart_get_flags2() call that returns an arbitrary bitmap of
> valid flags.
> 
> We'd also need to move those definitions to an ndctl local header.
> Currently where they are defined now in ndctl.h is owned by the
> kernel. We can cross this bridge later in a follow-on patch.

I will do it on a follow-up patch later.

Lijun
Elliott, Robert (Servers) March 29, 2017, 10:26 p.m. UTC | #8
> -----Original Message-----
> From: Linux-nvdimm [mailto:linux-nvdimm-bounces@lists.01.org] On Behalf
> Of Lijun.Pan@dell.com
> Sent: Wednesday, March 29, 2017 4:41 PM
> To: dan.j.williams@intel.com; lijunpan2000@gmail.com
> Subject: RE: [PATCH v2] libndctl: add support for the MSFT family of DSM
> functions

> > -----Original Message-----
> > From: Dan Williams [mailto:dan.j.williams@intel.com]

> > > From: Lijun Pan <Lijun.Pan@dell.com>
> > >
> > > This patch retrieves the health data from NVDIMM-N via
> > > the MSFT _DSM function[1], following JESD245A[2] standards.
> > > Now 'ndctl list --dimms --health --idle' could work
> > > on MSFT type NVDIMM-N, but limited to health_state,
> > > temperature_celsius, and life_used_percentage.
> >
...
> Output is something like,
> 
>   "health":{
>     "health_state":"ok",
>     "temperature_celsius":193.750000,
>     "life_used_percentage":1
>   }
> 
> If the BIOS returns a value say 45.75, how can it be represented in (__u16)
> instead of using a 'double' type? I think the ((__u16) temp & 0x0FFC) already
> represents a temperature in 1/16 Celsius granularity. No need to multiple it
> by
> 16 again.
> 
> Below I quote the section 7.8 of JESD245.pdf
> Bit 3 has a resolution of  1/2 Celsius,
> Bit 2 has a resolution of 1/4 Celsius,
> Bit 1 has 1/8 Celsius, but is reserved, so I assume it be zero,
> Bit 0 has 1/16 Celsius, but is reserved, so I assume it be zero.
> 
> ((__u16) temp & 0x0FFC) will only get the bit11 - bit 0. This presents the
> 1/16 Celsius resolution, kind of left shifted 4 bits.
> So we don't need to do
> 'return CMD_MSFT_SMART(cmd)->temp * 16;' again.
> 
> ==== = quotation starts  =====
> "
> Temperature measurement shall have a minimum resolution of 0.25 Celsius.
> Registers containing measured temperature value shall be 16-bits and report
> temperature as a 10-bit value based on the following definition.
> 
> Table 3: Temperature value bit definition
> Bit15	Bit14	Bit13	Bit12	Bit11	Bit10	Bit9	Bit8
> Reserved	Reserved	Reserved	Reserved	128	64
> 	32	16
> Bit7	Bit6	Bit5	Bit4	Bit3	Bit2	Bit1	Bit0
> 8	4	2	1	0.5	0.25	Reserved	Reserved
> 
> Examples:
> 
> A temperature value of 10.5 Celsius is represented as 0000000010101000b.
> 
> A temperature value of 64.75 Celsius is represented as 0000010000001100b.
> "
> 
> ==== quotation ends ========
> 
> 
> Lijun
> 
> 
> > ...so users know what to expect. With that change and addressing
> > Linda's comment about the temperature multiplier I think we're good to
> > go.
> >

You should plan to support temperatures compliant with JESD21C 
Page 4.1.6-2 (TSE2004 SPD EEPROM with Temperature Sensor), which include:
* a sign bit to support negative temperatures (in bit 12)
* 0.125 and 0.0625 degrees C precision (i.e., bits 1 and 0 are defined)

Industrial Grade and Automotive Grade components operate down to
-40 degrees C, so negative values will be required.

The importance of extra precision is a but fuzzy given that sensor
accuracy varies from +/- 1 to +/- 3 degrees C (depending on the
temperature range).  A software API should pass along all the
available information, though.


---
Robert Elliott, HPE Persistent Memory
Lijun.Pan@dell.com March 29, 2017, 11:35 p.m. UTC | #9
Dell - Internal Use - Confidential  



> -----Original Message-----
> From: Elliott, Robert (Persistent Memory) [mailto:elliott@hpe.com]
> Sent: Wednesday, March 29, 2017 5:27 PM
> To: Pan, Lijun <Lijun_Pan@Dell.com>; dan.j.williams@intel.com;
> lijunpan2000@gmail.com
> Cc: Hayes, Stuart <Stuart_Hayes@Dell.com>; linux-nvdimm@lists.01.org
> Subject: RE: [PATCH v2] libndctl: add support for the MSFT family of DSM
> functions
> 
> > -----Original Message-----
> > From: Linux-nvdimm [mailto:linux-nvdimm-bounces@lists.01.org] On
> Behalf
> > Of Lijun.Pan@dell.com
> > Sent: Wednesday, March 29, 2017 4:41 PM
> > To: dan.j.williams@intel.com; lijunpan2000@gmail.com
> > Subject: RE: [PATCH v2] libndctl: add support for the MSFT family of DSM
> > functions
> 
> > > -----Original Message-----
> > > From: Dan Williams [mailto:dan.j.williams@intel.com]
> 
> > > > From: Lijun Pan <Lijun.Pan@dell.com>
> > > >
> > > > This patch retrieves the health data from NVDIMM-N via
> > > > the MSFT _DSM function[1], following JESD245A[2] standards.
> > > > Now 'ndctl list --dimms --health --idle' could work
> > > > on MSFT type NVDIMM-N, but limited to health_state,
> > > > temperature_celsius, and life_used_percentage.
> > >
> ...
> > Output is something like,
> >
> >   "health":{
> >     "health_state":"ok",
> >     "temperature_celsius":193.750000,
> >     "life_used_percentage":1
> >   }
> >
> > If the BIOS returns a value say 45.75, how can it be represented in (__u16)
> > instead of using a 'double' type? I think the ((__u16) temp & 0x0FFC)
> already
> > represents a temperature in 1/16 Celsius granularity. No need to multiple it
> > by
> > 16 again.
> >
> > Below I quote the section 7.8 of JESD245.pdf
> > Bit 3 has a resolution of  1/2 Celsius,
> > Bit 2 has a resolution of 1/4 Celsius,
> > Bit 1 has 1/8 Celsius, but is reserved, so I assume it be zero,
> > Bit 0 has 1/16 Celsius, but is reserved, so I assume it be zero.
> >
> > ((__u16) temp & 0x0FFC) will only get the bit11 - bit 0. This presents the
> > 1/16 Celsius resolution, kind of left shifted 4 bits.
> > So we don't need to do
> > 'return CMD_MSFT_SMART(cmd)->temp * 16;' again.
> >
> > ==== = quotation starts  =====
> > "
> > Temperature measurement shall have a minimum resolution of 0.25 Celsius.
> > Registers containing measured temperature value shall be 16-bits and
> report
> > temperature as a 10-bit value based on the following definition.
> >
> > Table 3: Temperature value bit definition
> > Bit15	Bit14	Bit13	Bit12	Bit11	Bit10	Bit9	Bit8
> > Reserved	Reserved	Reserved	Reserved	128	64
> > 	32	16
> > Bit7	Bit6	Bit5	Bit4	Bit3	Bit2	Bit1	Bit0
> > 8	4	2	1	0.5	0.25	Reserved	Reserved
> >
> > Examples:
> >
> > A temperature value of 10.5 Celsius is represented as 0000000010101000b.
> >
> > A temperature value of 64.75 Celsius is represented as 0000010000001100b.
> > "
> >
> > ==== quotation ends ========
> >
> >
> > Lijun
> >
> >
> > > ...so users know what to expect. With that change and addressing
> > > Linda's comment about the temperature multiplier I think we're good to
> > > go.
> > >
> 
> You should plan to support temperatures compliant with JESD21C
> Page 4.1.6-2 (TSE2004 SPD EEPROM with Temperature Sensor), which include:
> * a sign bit to support negative temperatures (in bit 12)
> * 0.125 and 0.0625 degrees C precision (i.e., bits 1 and 0 are defined)

Thanks for the suggestions.
I agree JESD21C looks better.
For now we need to bear with the JESD245A.

Lijun

> 
> Industrial Grade and Automotive Grade components operate down to
> -40 degrees C, so negative values will be required.
> 
> The importance of extra precision is a but fuzzy given that sensor
> accuracy varies from +/- 1 to +/- 3 degrees C (depending on the
> temperature range).  A software API should pass along all the
> available information, though.
> 
> 
> ---
> Robert Elliott, HPE Persistent Memory
>
Linda Knippers March 30, 2017, 3:19 p.m. UTC | #10
On 3/29/2017 5:41 PM, Lijun.Pan@dell.com wrote:
> Dell - Internal Use - Confidential
>
>
>
>> -----Original Message-----
>> From: Dan Williams [mailto:dan.j.williams@intel.com]
>> Sent: Wednesday, March 29, 2017 1:13 PM
>> To: Lijun Pan <lijunpan2000@gmail.com>
>> Cc: linux-nvdimm@lists.01.org; Pan, Lijun <Lijun_Pan@Dell.com>; Hayes,
>> Stuart <Stuart_Hayes@Dell.com>
>> Subject: Re: [PATCH v2] libndctl: add support for the MSFT family of DSM
>> functions
>>
>> On Sun, Mar 26, 2017 at 1:17 PM, Lijun Pan <lijunpan2000@gmail.com> wrote:
>>> From: Lijun Pan <Lijun.Pan@dell.com>
>>>
>>> This patch retrieves the health data from NVDIMM-N via
>>> the MSFT _DSM function[1], following JESD245A[2] standards.
>>> Now 'ndctl list --dimms --health --idle' could work
>>> on MSFT type NVDIMM-N, but limited to health_state,
>>> temperature_celsius, and life_used_percentage.
>>
>> Looks good, can you add sample output of:
>>
>>     ndctl list --dimms --health --idle
>>
>
>
> Dan,
>
> Do you want me to put this sample output in the v3 patch's commit message?
>
> Output is something like,
>
>   "health":{
>     "health_state":"ok",
>     "temperature_celsius":193.750000,

Is that a real example from a running system?
If so, something is clearly wrong.

>     "life_used_percentage":1
>   }
>
> If the BIOS returns a value say 45.75, how can it be represented in (__u16)
> instead of using a 'double' type? I think the ((__u16) temp & 0x0FFC) already
> represents a temperature in 1/16 Celsius granularity. No need to multiple it by
> 16 again.
>
> Below I quote the section 7.8 of JESD245.pdf

Why are you quoting that spec?  It doesn't matter what the JEDEC
spec says. What matters is what the Microsoft DSM spec says.
The Microsoft DSM spec says that the temperature is returned in degrees C.

If you're a FW developer, then you care very much what the JEDEC
spec says but the FW should convert the temperature to the units
that the DSM says it returns to the OS, which is degrees C.

> Bit 3 has a resolution of  1/2 Celsius,
> Bit 2 has a resolution of 1/4 Celsius,
> Bit 1 has 1/8 Celsius, but is reserved, so I assume it be zero,
> Bit 0 has 1/16 Celsius, but is reserved, so I assume it be zero.
>
> ((__u16) temp & 0x0FFC) will only get the bit11 - bit 0. This presents the
> 1/16 Celsius resolution, kind of left shifted 4 bits.
> So we don't need to do
> 'return CMD_MSFT_SMART(cmd)->temp * 16;' again.

You're looking at the wrong spec.  This is what you should
be looking at.
https://msdn.microsoft.com/en-us/library/windows/hardware/mt604717(v=vs.85).aspx

-- ljk

>
> ==== = quotation starts  =====
> "
> Temperature measurement shall have a minimum resolution of 0.25 Celsius. Registers containing measured temperature value shall be 16-bits and report temperature as a 10-bit value based on the following definition.
>
> Table 3: Temperature value bit definition
> Bit15	Bit14	Bit13	Bit12	Bit11	Bit10	Bit9	Bit8
> Reserved	Reserved	Reserved	Reserved	128	64	32	16
> Bit7	Bit6	Bit5	Bit4	Bit3	Bit2	Bit1	Bit0
> 8	4	2	1	0.5	0.25	Reserved	Reserved
>
> Examples:
>
> A temperature value of 10.5 Celsius is represented as 0000000010101000b.
>
> A temperature value of 64.75 Celsius is represented as 0000010000001100b.
> "
>
> ==== quotation ends ========
>
>
> Lijun
>
>
>> ...so users know what to expect. With that change and addressing
>> Linda's comment about the temperature multiplier I think we're good to
>> go.
>>
>> Also, if you want to add Microsoft-only health attributes we'll need
>> to add new "valid" flags beyond the current ND_SMART_*_VALID set. If
>> this goes beyond the current 32 "valid" flags that that
>> ndctl_cmd_smart_get_flags() returns we might need a new
>> ndctl_cmd_smart_get_flags2() call that returns an arbitrary bitmap of
>> valid flags.
>>
>> We'd also need to move those definitions to an ndctl local header.
>> Currently where they are defined now in ndctl.h is owned by the
>> kernel. We can cross this bridge later in a follow-on patch.
>
> I will do it on a follow-up patch later.
>
> Lijun
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
>
Linda Knippers March 30, 2017, 3:20 p.m. UTC | #11
On 3/29/2017 6:26 PM, Elliott, Robert (Persistent Memory) wrote:
>> -----Original Message-----
>> From: Linux-nvdimm [mailto:linux-nvdimm-bounces@lists.01.org] On Behalf
>> Of Lijun.Pan@dell.com
>> Sent: Wednesday, March 29, 2017 4:41 PM
>> To: dan.j.williams@intel.com; lijunpan2000@gmail.com
>> Subject: RE: [PATCH v2] libndctl: add support for the MSFT family of DSM
>> functions
>
>>> -----Original Message-----
>>> From: Dan Williams [mailto:dan.j.williams@intel.com]
>
>>>> From: Lijun Pan <Lijun.Pan@dell.com>
>>>>
>>>> This patch retrieves the health data from NVDIMM-N via
>>>> the MSFT _DSM function[1], following JESD245A[2] standards.
>>>> Now 'ndctl list --dimms --health --idle' could work
>>>> on MSFT type NVDIMM-N, but limited to health_state,
>>>> temperature_celsius, and life_used_percentage.
>>>
> ...
>> Output is something like,
>>
>>   "health":{
>>     "health_state":"ok",
>>     "temperature_celsius":193.750000,
>>     "life_used_percentage":1
>>   }
>>
>> If the BIOS returns a value say 45.75, how can it be represented in (__u16)
>> instead of using a 'double' type? I think the ((__u16) temp & 0x0FFC) already
>> represents a temperature in 1/16 Celsius granularity. No need to multiple it
>> by
>> 16 again.
>>
>> Below I quote the section 7.8 of JESD245.pdf
>> Bit 3 has a resolution of  1/2 Celsius,
>> Bit 2 has a resolution of 1/4 Celsius,
>> Bit 1 has 1/8 Celsius, but is reserved, so I assume it be zero,
>> Bit 0 has 1/16 Celsius, but is reserved, so I assume it be zero.
>>
>> ((__u16) temp & 0x0FFC) will only get the bit11 - bit 0. This presents the
>> 1/16 Celsius resolution, kind of left shifted 4 bits.
>> So we don't need to do
>> 'return CMD_MSFT_SMART(cmd)->temp * 16;' again.
>>
>> ==== = quotation starts  =====
>> "
>> Temperature measurement shall have a minimum resolution of 0.25 Celsius.
>> Registers containing measured temperature value shall be 16-bits and report
>> temperature as a 10-bit value based on the following definition.
>>
>> Table 3: Temperature value bit definition
>> Bit15	Bit14	Bit13	Bit12	Bit11	Bit10	Bit9	Bit8
>> Reserved	Reserved	Reserved	Reserved	128	64
>> 	32	16
>> Bit7	Bit6	Bit5	Bit4	Bit3	Bit2	Bit1	Bit0
>> 8	4	2	1	0.5	0.25	Reserved	Reserved
>>
>> Examples:
>>
>> A temperature value of 10.5 Celsius is represented as 0000000010101000b.
>>
>> A temperature value of 64.75 Celsius is represented as 0000010000001100b.
>> "
>>
>> ==== quotation ends ========
>>
>>
>> Lijun
>>
>>
>>> ...so users know what to expect. With that change and addressing
>>> Linda's comment about the temperature multiplier I think we're good to
>>> go.
>>>
>
> You should plan to support temperatures compliant with JESD21C
> Page 4.1.6-2 (TSE2004 SPD EEPROM with Temperature Sensor), which include:
> * a sign bit to support negative temperatures (in bit 12)
> * 0.125 and 0.0625 degrees C precision (i.e., bits 1 and 0 are defined)
>
> Industrial Grade and Automotive Grade components operate down to
> -40 degrees C, so negative values will be required.
>
> The importance of extra precision is a but fuzzy given that sensor
> accuracy varies from +/- 1 to +/- 3 degrees C (depending on the
> temperature range).  A software API should pass along all the
> available information, though.

Interesting, but until someone changes their DSM spec, none of that matters.
This patch is supposed to be implementing the MSFT DSM spec.

-- ljk
>
>
> ---
> Robert Elliott, HPE Persistent Memory
>
>
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm
>
diff mbox

Patch

diff --git a/ndctl/lib/Makefile.am b/ndctl/lib/Makefile.am
index 58a0bb3..7a446be 100644
--- a/ndctl/lib/Makefile.am
+++ b/ndctl/lib/Makefile.am
@@ -32,6 +32,7 @@  endif
 if ENABLE_SMART
 libndctl_la_SOURCES += libndctl-smart.c
 libndctl_la_SOURCES += libndctl-hpe1.c
+libndctl_la_SOURCES += libndctl-msft.c
 endif
 
 EXTRA_DIST += libndctl.sym
diff --git a/ndctl/lib/libndctl-msft.c b/ndctl/lib/libndctl-msft.c
new file mode 100644
index 0000000..868e5c0
--- /dev/null
+++ b/ndctl/lib/libndctl-msft.c
@@ -0,0 +1,143 @@ 
+/*
+ * Copyright (C) 2016-2017 Dell, Inc.
+ * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ */
+#include <stdlib.h>
+#include <limits.h>
+#include <util/log.h>
+#include <ndctl/libndctl.h>
+#include "libndctl-private.h"
+#include "ndctl-msft.h"
+
+#define CMD_MSFT(_c) ((_c)->msft)
+#define CMD_MSFT_SMART(_c) (CMD_MSFT(_c)->u.smart.data)
+
+static struct ndctl_cmd *msft_dimm_cmd_new_smart(struct ndctl_dimm *dimm)
+{
+	struct ndctl_bus *bus = ndctl_dimm_get_bus(dimm);
+	struct ndctl_ctx *ctx = ndctl_bus_get_ctx(bus);
+	struct ndctl_cmd *cmd;
+	size_t size;
+	struct ndn_pkg_msft *msft;
+
+	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL)) {
+		dbg(ctx, "unsupported cmd\n");
+		return NULL;
+	}
+
+	size = sizeof(*cmd) + sizeof(struct ndn_pkg_msft);
+	cmd = calloc(1, size);
+	if (!cmd)
+		return NULL;
+
+	cmd->dimm = dimm;
+	ndctl_cmd_ref(cmd);
+	cmd->type = ND_CMD_CALL;
+	cmd->size = size;
+	cmd->status = 1;
+
+	msft = CMD_MSFT(cmd);
+	msft->gen.nd_family = NVDIMM_FAMILY_MSFT;
+	msft->gen.nd_command = NDN_MSFT_CMD_SMART;
+	msft->gen.nd_fw_size = 0;
+	msft->gen.nd_size_in = offsetof(struct ndn_msft_smart, status);
+	msft->gen.nd_size_out = sizeof(msft->u.smart);
+	msft->u.smart.status = 0;
+
+	cmd->firmware_status = &msft->u.smart.status;
+
+	return cmd;
+}
+
+static int msft_smart_valid(struct ndctl_cmd *cmd)
+{
+	if (cmd->type != ND_CMD_CALL ||
+	    cmd->size != sizeof(*cmd) + sizeof(struct ndn_pkg_msft) ||
+	    CMD_MSFT(cmd)->gen.nd_family != NVDIMM_FAMILY_MSFT ||
+	    CMD_MSFT(cmd)->gen.nd_command != NDN_MSFT_CMD_SMART ||
+	    cmd->status != 0)
+		return cmd->status < 0 ? cmd->status : -EINVAL;
+	return 0;
+}
+
+static unsigned int msft_cmd_smart_get_flags(struct ndctl_cmd *cmd)
+{
+	if (msft_smart_valid(cmd) < 0)
+		return UINT_MAX;
+
+	/* below health data can be retrieved via MSFT _DSM function 11 */
+	return NDN_MSFT_SMART_HEALTH_VALID |
+		NDN_MSFT_SMART_TEMP_VALID |
+		NDN_MSFT_SMART_USED_VALID;
+}
+
+static unsigned int num_set_bit_health(__u16 num)
+{
+	int i;
+	__u16 n = num & 0x7FFF;
+	unsigned int count = 0;
+
+	for (i = 0; i < 15; i++)
+		if (!!(n & (1 << i)))
+			count++;
+
+	return count;
+}
+
+static unsigned int msft_cmd_smart_get_health(struct ndctl_cmd *cmd)
+{
+	unsigned int health;
+	unsigned int num;
+
+	if (msft_smart_valid(cmd) < 0)
+		return UINT_MAX;
+
+	num = num_set_bit_health(CMD_MSFT_SMART(cmd)->health);
+	if (num == 0)
+		health = 0;
+	else if (num < 2)
+		health = ND_SMART_NON_CRITICAL_HEALTH;
+	else if (num < 3)
+		health = ND_SMART_CRITICAL_HEALTH;
+	else
+		health = ND_SMART_FATAL_HEALTH;
+
+	return health;
+}
+
+static unsigned int msft_cmd_smart_get_temperature(struct ndctl_cmd *cmd)
+{
+	if (msft_smart_valid(cmd) < 0)
+		return UINT_MAX;
+	/*
+	 * refer to JESD245 spec section 7.8 to calculate the temperature
+	 * then convert to 1/16 Celsius granularity.
+	 */
+	return CMD_MSFT_SMART(cmd)->temp & 0x0FFC;
+}
+
+static unsigned int msft_cmd_smart_get_life_used(struct ndctl_cmd *cmd)
+{
+	if (msft_smart_valid(cmd) < 0)
+		return UINT_MAX;
+
+	return 100 - CMD_MSFT_SMART(cmd)->nvm_lifetime;
+}
+
+struct ndctl_smart_ops * const msft_smart_ops = &(struct ndctl_smart_ops) {
+	.new_smart = msft_dimm_cmd_new_smart,
+	.smart_get_flags = msft_cmd_smart_get_flags,
+	.smart_get_health = msft_cmd_smart_get_health,
+	.smart_get_temperature = msft_cmd_smart_get_temperature,
+	.smart_get_life_used = msft_cmd_smart_get_life_used,
+};
diff --git a/ndctl/lib/libndctl-private.h b/ndctl/lib/libndctl-private.h
index 3e67db0..8f10fbc 100644
--- a/ndctl/lib/libndctl-private.h
+++ b/ndctl/lib/libndctl-private.h
@@ -32,6 +32,7 @@ 
 #include <ccan/endian/endian.h>
 #include <ccan/short_types/short_types.h>
 #include "ndctl-hpe1.h"
+#include "ndctl-msft.h"
 
 #define SZ_16M 0x01000000
 
@@ -196,6 +197,7 @@  struct ndctl_cmd {
 		struct nd_cmd_clear_error clear_err[0];
 #endif
 		struct ndn_pkg_hpe1 hpe1[0];
+		struct ndn_pkg_msft msft[0];
 		struct nd_cmd_smart smart[0];
 		struct nd_cmd_smart_threshold smart_t[0];
 		struct nd_cmd_get_config_size get_size[0];
@@ -226,9 +228,11 @@  struct ndctl_smart_ops {
 #if HAS_SMART == 1
 struct ndctl_smart_ops * const intel_smart_ops;
 struct ndctl_smart_ops * const hpe1_smart_ops;
+struct ndctl_smart_ops * const msft_smart_ops;
 #else
 static struct ndctl_smart_ops * const intel_smart_ops = NULL;
 static struct ndctl_smart_ops * const hpe1_smart_ops = NULL;
+static struct ndctl_smart_ops * const msft_smart_ops = NULL;
 #endif
 
 /* internal library helpers for conditionally defined command numbers */
diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
index 565c969..e5e027a 100644
--- a/ndctl/lib/libndctl.c
+++ b/ndctl/lib/libndctl.c
@@ -1254,6 +1254,8 @@  static void *add_dimm(void *parent, int id, const char *dimm_base)
 		dimm->dsm_family = strtoul(buf, NULL, 0);
 	if (dimm->dsm_family == NVDIMM_FAMILY_HPE1)
 		dimm->smart_ops = hpe1_smart_ops;
+	if (dimm->dsm_family == NVDIMM_FAMILY_MSFT)
+		dimm->smart_ops = msft_smart_ops;
 
 	dimm->formats = formats;
 	sprintf(path, "%s/nfit/format", dimm_base);
diff --git a/ndctl/lib/ndctl-msft.h b/ndctl/lib/ndctl-msft.h
new file mode 100644
index 0000000..0a1c7c6
--- /dev/null
+++ b/ndctl/lib/ndctl-msft.h
@@ -0,0 +1,63 @@ 
+/*
+ * Copyright (C) 2016-2017 Dell, Inc.
+ * Copyright (C) 2016 Hewlett Packard Enterprise Development LP
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ */
+#ifndef __NDCTL_MSFT_H__
+#define __NDCTL_MSFT_H__
+
+enum {
+	NDN_MSFT_CMD_QUERY = 0,
+
+	/* non-root commands */
+	NDN_MSFT_CMD_SMART = 11,
+};
+
+/* NDN_MSFT_CMD_SMART */
+#define NDN_MSFT_SMART_HEALTH_VALID	ND_SMART_HEALTH_VALID
+#define NDN_MSFT_SMART_TEMP_VALID	ND_SMART_TEMP_VALID
+#define NDN_MSFT_SMART_USED_VALID	ND_SMART_USED_VALID
+
+/*
+ * This is actually function 11 data,
+ * This is the closest I can find to match smart
+ * Microsoft _DSM does not have smart function
+ */
+struct ndn_msft_smart_data {
+	__u16	health;
+	__u16	temp;
+	__u8	err_thresh_stat;
+	__u8	warn_thresh_stat;
+	__u8	nvm_lifetime;
+	__u8	count_dram_uncorr_err;
+	__u8	count_dram_corr_err;
+} __attribute__((packed));
+
+struct ndn_msft_smart {
+	__u32	status;
+	union {
+		__u8 buf[9];
+		struct ndn_msft_smart_data data[0];
+	};
+} __attribute__((packed));
+
+union ndn_msft_cmd {
+	__u32			query;
+	struct ndn_msft_smart	smart;
+} __attribute__((packed));
+
+struct ndn_pkg_msft {
+	struct nd_cmd_pkg	gen;
+	union ndn_msft_cmd	u;
+} __attribute__((packed));
+
+#endif /* __NDCTL_MSFT_H__ */