diff mbox series

[ndctl] libndctl: Update nvdimm flags after inject-smart

Message ID 20210713202523.190113-1-vaibhav@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series [ndctl] libndctl: Update nvdimm flags after inject-smart | expand

Commit Message

Vaibhav Jain July 13, 2021, 8:25 p.m. UTC
Presently after performing a inject-smart the nvdimm flags reported are out
of date as shown below where no 'smart_notify' or 'flush_fail' flags were
reported even though they are set after injecting the smart error:

$ sudo inject-smart -fU nmem0
[
  {
    "dev":"nmem0",
    "health":{
      "health_state":"fatal",
      "shutdown_state":"dirty",
      "shutdown_count":0
    }
  }
]
$ sudo cat /sys/class/nd/ndctl0/device/nmem0/papr/flags
flush_fail smart_notify

This happens because nvdimm flags are only parsed once during its probe and
not refreshed even after a inject-smart operation makes them out of
date. To fix this the patch adds a new export from libndctl named as
ndctl_refresh_dimm_flags() that can be called after inject-smart that
forces a refresh of nvdimm flags. This ensures that correct nvdimm flags
are reported after the inject-smart operation as shown below:

$ sudo ndctl inject-smart -fU nmem0
[
  {
    "dev":"nmem0",
    "flag_failed_flush":true,
    "flag_smart_event":true,
    "health":{
      "health_state":"fatal",
      "shutdown_state":"dirty",
      "shutdown_count":0
    }
  }
]

The patch refactors populate_dimm_attributes() to move the nvdimm flags
parsing code to the newly introduced ndctl_refresh_dimm_flags()
export. Since reading nvdimm flags requires constructing path using
'bus_prefix' which is only available during add_dimm(), the patch
introduces a new member 'struct ndctl_dimm.bus_prefix' to cache its
value. During ndctl_refresh_dimm_flags() the cached bus_prefix is used to
read the contents of the nvdimm flag file and pass it on to the appropriate
flag parsing function.

Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
---
 ndctl/inject-smart.c   |  3 +++
 ndctl/lib/libndctl.c   | 45 +++++++++++++++++++++++++++++-------------
 ndctl/lib/libndctl.sym |  4 ++++
 ndctl/lib/private.h    |  1 +
 ndctl/libndctl.h       |  1 +
 5 files changed, 40 insertions(+), 14 deletions(-)

Comments

Dan Williams July 13, 2021, 9:06 p.m. UTC | #1
On Tue, Jul 13, 2021 at 1:25 PM Vaibhav Jain <vaibhav@linux.ibm.com> wrote:
>
> Presently after performing a inject-smart the nvdimm flags reported are out
> of date as shown below where no 'smart_notify' or 'flush_fail' flags were
> reported even though they are set after injecting the smart error:
>
> $ sudo inject-smart -fU nmem0
> [
>   {
>     "dev":"nmem0",
>     "health":{
>       "health_state":"fatal",
>       "shutdown_state":"dirty",
>       "shutdown_count":0
>     }
>   }
> ]
> $ sudo cat /sys/class/nd/ndctl0/device/nmem0/papr/flags
> flush_fail smart_notify
>
> This happens because nvdimm flags are only parsed once during its probe and
> not refreshed even after a inject-smart operation makes them out of
> date. To fix this the patch adds a new export from libndctl named as
> ndctl_refresh_dimm_flags() that can be called after inject-smart that
> forces a refresh of nvdimm flags. This ensures that correct nvdimm flags
> are reported after the inject-smart operation as shown below:
>
> $ sudo ndctl inject-smart -fU nmem0
> [
>   {
>     "dev":"nmem0",
>     "flag_failed_flush":true,
>     "flag_smart_event":true,
>     "health":{
>       "health_state":"fatal",
>       "shutdown_state":"dirty",
>       "shutdown_count":0
>     }
>   }
> ]
>
> The patch refactors populate_dimm_attributes() to move the nvdimm flags
> parsing code to the newly introduced ndctl_refresh_dimm_flags()
> export. Since reading nvdimm flags requires constructing path using
> 'bus_prefix' which is only available during add_dimm(), the patch
> introduces a new member 'struct ndctl_dimm.bus_prefix' to cache its
> value. During ndctl_refresh_dimm_flags() the cached bus_prefix is used to
> read the contents of the nvdimm flag file and pass it on to the appropriate
> flag parsing function.

I think this can be handled without needing an explicit
ndctl_refresh_dimm_flags() api. Teach all the flag retrieval apis to
check if the cached value has been invalidated and re-parse the flags.
Then teach the inject-smart path to invalidate the cached copy of the
flags.
Vaibhav Jain July 14, 2021, 7:58 a.m. UTC | #2
Thanks for looking into this patch Dan

Dan Williams <dan.j.williams@intel.com> writes:

> On Tue, Jul 13, 2021 at 1:25 PM Vaibhav Jain <vaibhav@linux.ibm.com> wrote:
>>
>> Presently after performing a inject-smart the nvdimm flags reported are out
>> of date as shown below where no 'smart_notify' or 'flush_fail' flags were
>> reported even though they are set after injecting the smart error:
>>
>> $ sudo inject-smart -fU nmem0
>> [
>>   {
>>     "dev":"nmem0",
>>     "health":{
>>       "health_state":"fatal",
>>       "shutdown_state":"dirty",
>>       "shutdown_count":0
>>     }
>>   }
>> ]
>> $ sudo cat /sys/class/nd/ndctl0/device/nmem0/papr/flags
>> flush_fail smart_notify
>>
>> This happens because nvdimm flags are only parsed once during its probe and
>> not refreshed even after a inject-smart operation makes them out of
>> date. To fix this the patch adds a new export from libndctl named as
>> ndctl_refresh_dimm_flags() that can be called after inject-smart that
>> forces a refresh of nvdimm flags. This ensures that correct nvdimm flags
>> are reported after the inject-smart operation as shown below:
>>
>> $ sudo ndctl inject-smart -fU nmem0
>> [
>>   {
>>     "dev":"nmem0",
>>     "flag_failed_flush":true,
>>     "flag_smart_event":true,
>>     "health":{
>>       "health_state":"fatal",
>>       "shutdown_state":"dirty",
>>       "shutdown_count":0
>>     }
>>   }
>> ]
>>
>> The patch refactors populate_dimm_attributes() to move the nvdimm flags
>> parsing code to the newly introduced ndctl_refresh_dimm_flags()
>> export. Since reading nvdimm flags requires constructing path using
>> 'bus_prefix' which is only available during add_dimm(), the patch
>> introduces a new member 'struct ndctl_dimm.bus_prefix' to cache its
>> value. During ndctl_refresh_dimm_flags() the cached bus_prefix is used to
>> read the contents of the nvdimm flag file and pass it on to the appropriate
>> flag parsing function.
>
> I think this can be handled without needing an explicit
> ndctl_refresh_dimm_flags() api. Teach all the flag retrieval apis to
> check if the cached value has been invalidated and re-parse the flags.
> Then teach the inject-smart path to invalidate the cached copy of the
> flags.
>
Thanks for the suggestion. On the same line I think refreshing nvdimm
flags can also be done implicitly after a successful ndctl_cmd_submit()
call as that point we have interacted with the kernel_module which may
have triggered a nvdimm flags change.

Such a changeset would also be smaller compared to updating all flags
retrieval api's to invalidate and refresh nvdimm flags.
diff mbox series

Patch

diff --git a/ndctl/inject-smart.c b/ndctl/inject-smart.c
index ef0620f55531..6d9abe612e73 100644
--- a/ndctl/inject-smart.c
+++ b/ndctl/inject-smart.c
@@ -463,6 +463,9 @@  static int dimm_inject_smart(struct ndctl_dimm *dimm)
 			goto out;
 	}
 
+	/* Force update of dimm flags after smart-inject */
+	ndctl_refresh_dimm_flags(dimm);
+
 	if (rc == 0) {
 		jdimms = json_object_new_array();
 		if (!jdimms)
diff --git a/ndctl/lib/libndctl.c b/ndctl/lib/libndctl.c
index bb0ea094a153..59ea656e205d 100644
--- a/ndctl/lib/libndctl.c
+++ b/ndctl/lib/libndctl.c
@@ -588,6 +588,7 @@  static void free_dimm(struct ndctl_dimm *dimm)
 	free(dimm->unique_id);
 	free(dimm->dimm_buf);
 	free(dimm->dimm_path);
+	free(dimm->bus_prefix);
 	if (dimm->module)
 		kmod_module_unref(dimm->module);
 	if (dimm->health_eventfd > -1)
@@ -1650,14 +1651,34 @@  static int ndctl_bind(struct ndctl_ctx *ctx, struct kmod_module *module,
 static int ndctl_unbind(struct ndctl_ctx *ctx, const char *devpath);
 static struct kmod_module *to_module(struct ndctl_ctx *ctx, const char *alias);
 
+NDCTL_EXPORT void ndctl_refresh_dimm_flags(struct ndctl_dimm *dimm)
+{
+	struct ndctl_ctx *ctx = dimm->bus->ctx;
+	char *path = dimm->dimm_buf;
+	char buf[SYSFS_ATTR_SIZE];
+
+	/* Construct path to dimm flags sysfs file */
+	sprintf(path, "%s/%s/flags", dimm->dimm_path, dimm->bus_prefix);
+
+	if (sysfs_read_attr(ctx, path, buf) < 0)
+		return;
+
+	/* Reset the flags */
+	dimm->flags.flags = 0;
+	if (ndctl_bus_has_nfit(dimm->bus))
+		parse_nfit_mem_flags(dimm, buf);
+	else if (ndctl_bus_is_papr_scm(dimm->bus))
+		parse_papr_flags(dimm, buf);
+}
+
 static int populate_dimm_attributes(struct ndctl_dimm *dimm,
-				    const char *dimm_base,
-				    const char *bus_prefix)
+				    const char *dimm_base)
 {
 	int i, rc = -1;
 	char buf[SYSFS_ATTR_SIZE];
 	struct ndctl_ctx *ctx = dimm->bus->ctx;
 	char *path = calloc(1, strlen(dimm_base) + 100);
+	const char *bus_prefix = dimm->bus_prefix;
 
 	if (!path)
 		return -ENOMEM;
@@ -1740,15 +1761,7 @@  static int populate_dimm_attributes(struct ndctl_dimm *dimm,
 			dimm->format[i] = strtoul(buf, NULL, 0);
 	}
 
-	sprintf(path, "%s/%s/flags", dimm_base, bus_prefix);
-	if (sysfs_read_attr(ctx, path, buf) == 0) {
-		if (ndctl_bus_has_nfit(dimm->bus))
-			parse_nfit_mem_flags(dimm, buf);
-		else if (ndctl_bus_is_papr_scm(dimm->bus)) {
-			dimm->cmd_family = NVDIMM_FAMILY_PAPR;
-			parse_papr_flags(dimm, buf);
-		}
-	}
+	ndctl_refresh_dimm_flags(dimm);
 
 	dimm->health_eventfd = open(path, O_RDONLY|O_CLOEXEC);
 	rc = 0;
@@ -1807,7 +1820,7 @@  static int add_papr_dimm(struct ndctl_dimm *dimm, const char *dimm_base)
 		rc = 0;
 	} else if (strcmp(buf, "nvdimm_test") == 0) {
 		/* probe via common populate_dimm_attributes() */
-		rc = populate_dimm_attributes(dimm, dimm_base, "papr");
+		rc = populate_dimm_attributes(dimm, dimm_base);
 	}
 out:
 	free(path);
@@ -1904,9 +1917,13 @@  static void *add_dimm(void *parent, int id, const char *dimm_base)
 	dimm->formats = formats;
 	/* Check if the given dimm supports nfit */
 	if (ndctl_bus_has_nfit(bus)) {
-		rc = populate_dimm_attributes(dimm, dimm_base, "nfit");
+		dimm->bus_prefix = strdup("nfit");
+		rc = dimm->bus_prefix ?
+			populate_dimm_attributes(dimm, dimm_base) : -ENOMEM;
 	} else if (ndctl_bus_has_of_node(bus)) {
-		rc = add_papr_dimm(dimm, dimm_base);
+		dimm->bus_prefix = strdup("papr");
+		rc = dimm->bus_prefix ?
+			add_papr_dimm(dimm, dimm_base) : -ENOMEM;
 	}
 
 	if (rc == -ENODEV) {
diff --git a/ndctl/lib/libndctl.sym b/ndctl/lib/libndctl.sym
index 58afb74229fe..a498535af283 100644
--- a/ndctl/lib/libndctl.sym
+++ b/ndctl/lib/libndctl.sym
@@ -455,3 +455,7 @@  LIBNDCTL_25 {
 LIBNDCTL_26 {
 	ndctl_bus_nfit_translate_spa;
 } LIBNDCTL_25;
+
+LIBNDCTL_27 {
+	ndctl_refresh_dimm_flags;
+} LIBNDCTL_26;
diff --git a/ndctl/lib/private.h b/ndctl/lib/private.h
index 8f4510e562c4..a4b920ca0c94 100644
--- a/ndctl/lib/private.h
+++ b/ndctl/lib/private.h
@@ -75,6 +75,7 @@  struct ndctl_dimm {
 	char *unique_id;
 	char *dimm_path;
 	char *dimm_buf;
+	char *bus_prefix;
 	int health_eventfd;
 	int buf_len;
 	int id;
diff --git a/ndctl/libndctl.h b/ndctl/libndctl.h
index 3a5013007038..54539ac7a6c2 100644
--- a/ndctl/libndctl.h
+++ b/ndctl/libndctl.h
@@ -221,6 +221,7 @@  int ndctl_dimm_is_active(struct ndctl_dimm *dimm);
 int ndctl_dimm_is_enabled(struct ndctl_dimm *dimm);
 int ndctl_dimm_disable(struct ndctl_dimm *dimm);
 int ndctl_dimm_enable(struct ndctl_dimm *dimm);
+void ndctl_refresh_dimm_flags(struct ndctl_dimm *dimm);
 
 struct ndctl_cmd;
 struct ndctl_cmd *ndctl_bus_cmd_new_ars_cap(struct ndctl_bus *bus,