diff mbox series

[ndctl,2/2] libndctl/papr: Add limited support for inject-smart

Message ID 20210712173132.1205192-3-vaibhav@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series papr: Implement initial support for injecting smart errors | expand

Commit Message

Vaibhav Jain July 12, 2021, 5:31 p.m. UTC
Implements support for ndctl inject-smart command by providing an
implementation of 'smart_inject*' dimm-ops callbacks. Presently only
support for injecting unsafe-shutdown and fatal-health states is
available.

The patch also introduce various PAPR PDSM structures that are used to
communicate the inject-smart errors to the papr_scm kernel
module. This is done via SMART_INJECT PDSM which sends a payload of
type 'struct nd_papr_pdsm_smart_inject'.

The patch depends on the kernel PAPR PDSM implementation for
PDSM_SMART_INJECT posted at [1].

[1] : https://lore.kernel.org/nvdimm/20210712084819.1150350-1-vaibhav@linux.ibm.com
Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
 ndctl/lib/papr.c      | 61 +++++++++++++++++++++++++++++++++++++++++++
 ndctl/lib/papr_pdsm.h | 17 ++++++++++++
 2 files changed, 78 insertions(+)

Comments

Aneesh Kumar K.V July 13, 2021, 4:43 a.m. UTC | #1
On 7/12/21 11:01 PM, Vaibhav Jain wrote:
> Implements support for ndctl inject-smart command by providing an
> implementation of 'smart_inject*' dimm-ops callbacks. Presently only
> support for injecting unsafe-shutdown and fatal-health states is
> available.
> 
> The patch also introduce various PAPR PDSM structures that are used to
> communicate the inject-smart errors to the papr_scm kernel
> module. This is done via SMART_INJECT PDSM which sends a payload of
> type 'struct nd_papr_pdsm_smart_inject'.
> 
> The patch depends on the kernel PAPR PDSM implementation for
> PDSM_SMART_INJECT posted at [1].
> 
> [1] : https://lore.kernel.org/nvdimm/20210712084819.1150350-1-vaibhav@linux.ibm.com
> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> ---
>   ndctl/lib/papr.c      | 61 +++++++++++++++++++++++++++++++++++++++++++
>   ndctl/lib/papr_pdsm.h | 17 ++++++++++++
>   2 files changed, 78 insertions(+)
> 
> diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c
> index 42ff200dc588..b797e1e5fe8b 100644
> --- a/ndctl/lib/papr.c
> +++ b/ndctl/lib/papr.c
> @@ -221,6 +221,41 @@ static unsigned int papr_smart_get_shutdown_state(struct ndctl_cmd *cmd)
>   	return health.dimm_bad_shutdown;
>   }
>   
> +static int papr_smart_inject_supported(struct ndctl_dimm *dimm)
> +{
> +	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL))
> +		return -EOPNOTSUPP;
> +
> +	if (!test_dimm_dsm(dimm, PAPR_PDSM_SMART_INJECT))
> +		return -EIO;
> +
> +	return ND_SMART_INJECT_HEALTH_STATE | ND_SMART_INJECT_UNCLEAN_SHUTDOWN;
> +}
> +

with ndtest PAPR_SCM_FAMILY driver, should we test more inject types? if 
so should the supported inject types be fetched from the driver?

> +static int papr_smart_inject_valid(struct ndctl_cmd *cmd)
> +{
> +	if (cmd->type != ND_CMD_CALL ||
> +	    to_pdsm(cmd)->cmd_status != 0 ||
> +	    to_pdsm_cmd(cmd) != PAPR_PDSM_SMART_INJECT)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +static struct ndctl_cmd *papr_new_smart_inject(struct ndctl_dimm *dimm)
> +{
> +	struct ndctl_cmd *cmd;
> +
> +	cmd = allocate_cmd(dimm, PAPR_PDSM_SMART_INJECT,
> +			sizeof(struct nd_papr_pdsm_smart_inject));
> +	if (!cmd)
> +		return NULL;
> +	/* Set the input payload size */
> +	to_ndcmd(cmd)->nd_size_in = ND_PDSM_HDR_SIZE +
> +		sizeof(struct nd_papr_pdsm_smart_inject);
> +	return cmd;
> +}
> +
>   static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd)
>   {
>   	struct nd_papr_pdsm_health health;
> @@ -255,11 +290,37 @@ static unsigned int papr_smart_get_shutdown_count(struct ndctl_cmd *cmd)
>   
>   	return (health.extension_flags & PDSM_DIMM_DSC_VALID) ?
>   		(health.dimm_dsc) : 0;
> +}
> +
> +static int papr_cmd_smart_inject_fatal(struct ndctl_cmd *cmd, bool enable)
> +{
> +	if (papr_smart_inject_valid(cmd) < 0)
> +		return -EINVAL;
> +
> +	to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_HEALTH_FATAL;
> +	to_payload(cmd)->inject.fatal_enable = enable;
>   
> +	return 0;
> +}
> +
> +static int papr_cmd_smart_inject_unsafe_shutdown(struct ndctl_cmd *cmd,
> +						 bool enable)
> +{
> +	if (papr_smart_inject_valid(cmd) < 0)
> +		return -EINVAL;
> +
> +	to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN;
> +	to_payload(cmd)->inject.unsafe_shutdown_enable = enable;
> +
> +	return 0;
>   }
>   
>   struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
>   	.cmd_is_supported = papr_cmd_is_supported,
> +	.new_smart_inject = papr_new_smart_inject,
> +	.smart_inject_supported = papr_smart_inject_supported,
> +	.smart_inject_fatal = papr_cmd_smart_inject_fatal,
> +	.smart_inject_unsafe_shutdown = papr_cmd_smart_inject_unsafe_shutdown,
>   	.smart_get_flags = papr_smart_get_flags,
>   	.get_firmware_status =  papr_get_firmware_status,
>   	.xlat_firmware_status = papr_xlat_firmware_status,
> diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h
> index f45b1e40c075..20ac20f89acd 100644
> --- a/ndctl/lib/papr_pdsm.h
> +++ b/ndctl/lib/papr_pdsm.h
> @@ -121,12 +121,29 @@ struct nd_papr_pdsm_health {
>   enum papr_pdsm {
>   	PAPR_PDSM_MIN = 0x0,
>   	PAPR_PDSM_HEALTH,
> +	PAPR_PDSM_SMART_INJECT,
>   	PAPR_PDSM_MAX,
>   };
> +/* Flags for injecting specific smart errors */
> +#define PDSM_SMART_INJECT_HEALTH_FATAL		(1 << 0)
> +#define PDSM_SMART_INJECT_BAD_SHUTDOWN		(1 << 1)
> +
> +struct nd_papr_pdsm_smart_inject {
> +	union {
> +		struct {
> +			/* One or more of PDSM_SMART_INJECT_ */
> +			__u32 flags;
> +			__u8 fatal_enable;
> +			__u8 unsafe_shutdown_enable;
> +		};
> +		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
> +	};
> +};
>   
>   /* Maximal union that can hold all possible payload types */
>   union nd_pdsm_payload {
>   	struct nd_papr_pdsm_health health;
> +	struct nd_papr_pdsm_smart_inject inject;
>   	__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
>   } __attribute__((packed));
>   
> 


-aneesh
Vaibhav Jain July 13, 2021, 7:51 a.m. UTC | #2
Thanks Aneesh for looking into this patch

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> On 7/12/21 11:01 PM, Vaibhav Jain wrote:
>> Implements support for ndctl inject-smart command by providing an
>> implementation of 'smart_inject*' dimm-ops callbacks. Presently only
>> support for injecting unsafe-shutdown and fatal-health states is
>> available.
>> 
>> The patch also introduce various PAPR PDSM structures that are used to
>> communicate the inject-smart errors to the papr_scm kernel
>> module. This is done via SMART_INJECT PDSM which sends a payload of
>> type 'struct nd_papr_pdsm_smart_inject'.
>> 
>> The patch depends on the kernel PAPR PDSM implementation for
>> PDSM_SMART_INJECT posted at [1].
>> 
>> [1] : https://lore.kernel.org/nvdimm/20210712084819.1150350-1-vaibhav@linux.ibm.com
>> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
>> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
>> ---
>>   ndctl/lib/papr.c      | 61 +++++++++++++++++++++++++++++++++++++++++++
>>   ndctl/lib/papr_pdsm.h | 17 ++++++++++++
>>   2 files changed, 78 insertions(+)
>> 
>> diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c
>> index 42ff200dc588..b797e1e5fe8b 100644
>> --- a/ndctl/lib/papr.c
>> +++ b/ndctl/lib/papr.c
>> @@ -221,6 +221,41 @@ static unsigned int papr_smart_get_shutdown_state(struct ndctl_cmd *cmd)
>>   	return health.dimm_bad_shutdown;
>>   }
>>   
>> +static int papr_smart_inject_supported(struct ndctl_dimm *dimm)
>> +{
>> +	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL))
>> +		return -EOPNOTSUPP;
>> +
>> +	if (!test_dimm_dsm(dimm, PAPR_PDSM_SMART_INJECT))
>> +		return -EIO;
>> +
>> +	return ND_SMART_INJECT_HEALTH_STATE | ND_SMART_INJECT_UNCLEAN_SHUTDOWN;
>> +}
>> +
>
> with ndtest PAPR_SCM_FAMILY driver, should we test more inject types?

Presently a commmon PDSM structure 'struct nd_papr_pdsm_smart_inject'
used between ndtest and papr_scm. If we want to add support for more
inject types in ndtest then that structure would need to be extended.

However even with that, libndctl still shares common dimm-ops
callback for papr_scm & ndtest which only supports injecting smart fatal
health and dirty-shutdown at the moment. So with only ndtest supporting an inject
type for example temprature-threshold, not sure which libndctl code
patch we will be testing.

> if 
> so should the supported inject types be fetched from the driver?
>
Good suggestion.
Surely that can be a implemented in future once papr_scm and ndtest
starts supporting more smart inject types.

>> +static int papr_smart_inject_valid(struct ndctl_cmd *cmd)
>> +{
>> +	if (cmd->type != ND_CMD_CALL ||
>> +	    to_pdsm(cmd)->cmd_status != 0 ||
>> +	    to_pdsm_cmd(cmd) != PAPR_PDSM_SMART_INJECT)
>> +		return -EINVAL;
>> +
>> +	return 0;
>> +}
>> +
>> +static struct ndctl_cmd *papr_new_smart_inject(struct ndctl_dimm *dimm)
>> +{
>> +	struct ndctl_cmd *cmd;
>> +
>> +	cmd = allocate_cmd(dimm, PAPR_PDSM_SMART_INJECT,
>> +			sizeof(struct nd_papr_pdsm_smart_inject));
>> +	if (!cmd)
>> +		return NULL;
>> +	/* Set the input payload size */
>> +	to_ndcmd(cmd)->nd_size_in = ND_PDSM_HDR_SIZE +
>> +		sizeof(struct nd_papr_pdsm_smart_inject);
>> +	return cmd;
>> +}
>> +
>>   static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd)
>>   {
>>   	struct nd_papr_pdsm_health health;
>> @@ -255,11 +290,37 @@ static unsigned int papr_smart_get_shutdown_count(struct ndctl_cmd *cmd)
>>   
>>   	return (health.extension_flags & PDSM_DIMM_DSC_VALID) ?
>>   		(health.dimm_dsc) : 0;
>> +}
>> +
>> +static int papr_cmd_smart_inject_fatal(struct ndctl_cmd *cmd, bool enable)
>> +{
>> +	if (papr_smart_inject_valid(cmd) < 0)
>> +		return -EINVAL;
>> +
>> +	to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_HEALTH_FATAL;
>> +	to_payload(cmd)->inject.fatal_enable = enable;
>>   
>> +	return 0;
>> +}
>> +
>> +static int papr_cmd_smart_inject_unsafe_shutdown(struct ndctl_cmd *cmd,
>> +						 bool enable)
>> +{
>> +	if (papr_smart_inject_valid(cmd) < 0)
>> +		return -EINVAL;
>> +
>> +	to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN;
>> +	to_payload(cmd)->inject.unsafe_shutdown_enable = enable;
>> +
>> +	return 0;
>>   }
>>   
>>   struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
>>   	.cmd_is_supported = papr_cmd_is_supported,
>> +	.new_smart_inject = papr_new_smart_inject,
>> +	.smart_inject_supported = papr_smart_inject_supported,
>> +	.smart_inject_fatal = papr_cmd_smart_inject_fatal,
>> +	.smart_inject_unsafe_shutdown = papr_cmd_smart_inject_unsafe_shutdown,
>>   	.smart_get_flags = papr_smart_get_flags,
>>   	.get_firmware_status =  papr_get_firmware_status,
>>   	.xlat_firmware_status = papr_xlat_firmware_status,
>> diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h
>> index f45b1e40c075..20ac20f89acd 100644
>> --- a/ndctl/lib/papr_pdsm.h
>> +++ b/ndctl/lib/papr_pdsm.h
>> @@ -121,12 +121,29 @@ struct nd_papr_pdsm_health {
>>   enum papr_pdsm {
>>   	PAPR_PDSM_MIN = 0x0,
>>   	PAPR_PDSM_HEALTH,
>> +	PAPR_PDSM_SMART_INJECT,
>>   	PAPR_PDSM_MAX,
>>   };
>> +/* Flags for injecting specific smart errors */
>> +#define PDSM_SMART_INJECT_HEALTH_FATAL		(1 << 0)
>> +#define PDSM_SMART_INJECT_BAD_SHUTDOWN		(1 << 1)
>> +
>> +struct nd_papr_pdsm_smart_inject {
>> +	union {
>> +		struct {
>> +			/* One or more of PDSM_SMART_INJECT_ */
>> +			__u32 flags;
>> +			__u8 fatal_enable;
>> +			__u8 unsafe_shutdown_enable;
>> +		};
>> +		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
>> +	};
>> +};
>>   
>>   /* Maximal union that can hold all possible payload types */
>>   union nd_pdsm_payload {
>>   	struct nd_papr_pdsm_health health;
>> +	struct nd_papr_pdsm_smart_inject inject;
>>   	__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
>>   } __attribute__((packed));
>>   
>> 
>
>
> -aneesh
>
>
diff mbox series

Patch

diff --git a/ndctl/lib/papr.c b/ndctl/lib/papr.c
index 42ff200dc588..b797e1e5fe8b 100644
--- a/ndctl/lib/papr.c
+++ b/ndctl/lib/papr.c
@@ -221,6 +221,41 @@  static unsigned int papr_smart_get_shutdown_state(struct ndctl_cmd *cmd)
 	return health.dimm_bad_shutdown;
 }
 
+static int papr_smart_inject_supported(struct ndctl_dimm *dimm)
+{
+	if (!ndctl_dimm_is_cmd_supported(dimm, ND_CMD_CALL))
+		return -EOPNOTSUPP;
+
+	if (!test_dimm_dsm(dimm, PAPR_PDSM_SMART_INJECT))
+		return -EIO;
+
+	return ND_SMART_INJECT_HEALTH_STATE | ND_SMART_INJECT_UNCLEAN_SHUTDOWN;
+}
+
+static int papr_smart_inject_valid(struct ndctl_cmd *cmd)
+{
+	if (cmd->type != ND_CMD_CALL ||
+	    to_pdsm(cmd)->cmd_status != 0 ||
+	    to_pdsm_cmd(cmd) != PAPR_PDSM_SMART_INJECT)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct ndctl_cmd *papr_new_smart_inject(struct ndctl_dimm *dimm)
+{
+	struct ndctl_cmd *cmd;
+
+	cmd = allocate_cmd(dimm, PAPR_PDSM_SMART_INJECT,
+			sizeof(struct nd_papr_pdsm_smart_inject));
+	if (!cmd)
+		return NULL;
+	/* Set the input payload size */
+	to_ndcmd(cmd)->nd_size_in = ND_PDSM_HDR_SIZE +
+		sizeof(struct nd_papr_pdsm_smart_inject);
+	return cmd;
+}
+
 static unsigned int papr_smart_get_life_used(struct ndctl_cmd *cmd)
 {
 	struct nd_papr_pdsm_health health;
@@ -255,11 +290,37 @@  static unsigned int papr_smart_get_shutdown_count(struct ndctl_cmd *cmd)
 
 	return (health.extension_flags & PDSM_DIMM_DSC_VALID) ?
 		(health.dimm_dsc) : 0;
+}
+
+static int papr_cmd_smart_inject_fatal(struct ndctl_cmd *cmd, bool enable)
+{
+	if (papr_smart_inject_valid(cmd) < 0)
+		return -EINVAL;
+
+	to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_HEALTH_FATAL;
+	to_payload(cmd)->inject.fatal_enable = enable;
 
+	return 0;
+}
+
+static int papr_cmd_smart_inject_unsafe_shutdown(struct ndctl_cmd *cmd,
+						 bool enable)
+{
+	if (papr_smart_inject_valid(cmd) < 0)
+		return -EINVAL;
+
+	to_payload(cmd)->inject.flags |= PDSM_SMART_INJECT_BAD_SHUTDOWN;
+	to_payload(cmd)->inject.unsafe_shutdown_enable = enable;
+
+	return 0;
 }
 
 struct ndctl_dimm_ops * const papr_dimm_ops = &(struct ndctl_dimm_ops) {
 	.cmd_is_supported = papr_cmd_is_supported,
+	.new_smart_inject = papr_new_smart_inject,
+	.smart_inject_supported = papr_smart_inject_supported,
+	.smart_inject_fatal = papr_cmd_smart_inject_fatal,
+	.smart_inject_unsafe_shutdown = papr_cmd_smart_inject_unsafe_shutdown,
 	.smart_get_flags = papr_smart_get_flags,
 	.get_firmware_status =  papr_get_firmware_status,
 	.xlat_firmware_status = papr_xlat_firmware_status,
diff --git a/ndctl/lib/papr_pdsm.h b/ndctl/lib/papr_pdsm.h
index f45b1e40c075..20ac20f89acd 100644
--- a/ndctl/lib/papr_pdsm.h
+++ b/ndctl/lib/papr_pdsm.h
@@ -121,12 +121,29 @@  struct nd_papr_pdsm_health {
 enum papr_pdsm {
 	PAPR_PDSM_MIN = 0x0,
 	PAPR_PDSM_HEALTH,
+	PAPR_PDSM_SMART_INJECT,
 	PAPR_PDSM_MAX,
 };
+/* Flags for injecting specific smart errors */
+#define PDSM_SMART_INJECT_HEALTH_FATAL		(1 << 0)
+#define PDSM_SMART_INJECT_BAD_SHUTDOWN		(1 << 1)
+
+struct nd_papr_pdsm_smart_inject {
+	union {
+		struct {
+			/* One or more of PDSM_SMART_INJECT_ */
+			__u32 flags;
+			__u8 fatal_enable;
+			__u8 unsafe_shutdown_enable;
+		};
+		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
+	};
+};
 
 /* Maximal union that can hold all possible payload types */
 union nd_pdsm_payload {
 	struct nd_papr_pdsm_health health;
+	struct nd_papr_pdsm_smart_inject inject;
 	__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
 } __attribute__((packed));