diff mbox

[v2] libnvdimm, pmem: Add sysfs notifications to badblocks

Message ID 20170612222511.22030-1-toshi.kani@hpe.com (mailing list archive)
State Accepted
Commit 975750a98c26
Headers show

Commit Message

Kani, Toshi June 12, 2017, 10:25 p.m. UTC
Sysfs "badblocks" information may be updated during run-time that:
 - MCE, SCI, and sysfs "scrub" may add new bad blocks
 - Writes and ioctl() may clear bad blocks

Add support to send sysfs notifications to sysfs "badblocks" file
under region and pmem directories when their badblocks information
is re-evaluated (but is not necessarily changed) during run-time.

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Linda Knippers <linda.knippers@hpe.com>
---
v2: Send notifications for the clearing case
---
 drivers/nvdimm/bus.c    |    3 +++
 drivers/nvdimm/nd.h     |    1 +
 drivers/nvdimm/pmem.c   |   14 ++++++++++++++
 drivers/nvdimm/pmem.h   |    1 +
 drivers/nvdimm/region.c |   12 ++++++++++--
 5 files changed, 29 insertions(+), 2 deletions(-)

Comments

Dan Williams June 15, 2017, 9:33 p.m. UTC | #1
On Mon, Jun 12, 2017 at 3:25 PM, Toshi Kani <toshi.kani@hpe.com> wrote:
> Sysfs "badblocks" information may be updated during run-time that:
>  - MCE, SCI, and sysfs "scrub" may add new bad blocks
>  - Writes and ioctl() may clear bad blocks
>
> Add support to send sysfs notifications to sysfs "badblocks" file
> under region and pmem directories when their badblocks information
> is re-evaluated (but is not necessarily changed) during run-time.
>
> Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Vishal Verma <vishal.l.verma@intel.com>
> Cc: Linda Knippers <linda.knippers@hpe.com>
> ---
> v2: Send notifications for the clearing case
> ---

This looks good to me, I've applied it, but I also want to extend the
ndctl unit tests to cover this mechanism.
Kani, Toshi June 17, 2017, 12:35 a.m. UTC | #2
> On Mon, Jun 12, 2017 at 3:25 PM, Toshi Kani <toshi.kani@hpe.com> wrote:

> > Sysfs "badblocks" information may be updated during run-time that:

> >  - MCE, SCI, and sysfs "scrub" may add new bad blocks

> >  - Writes and ioctl() may clear bad blocks

> >

> > Add support to send sysfs notifications to sysfs "badblocks" file

> > under region and pmem directories when their badblocks information

> > is re-evaluated (but is not necessarily changed) during run-time.

> >

> > Signed-off-by: Toshi Kani <toshi.kani@hpe.com>

> > Cc: Dan Williams <dan.j.williams@intel.com>

> > Cc: Vishal Verma <vishal.l.verma@intel.com>

> > Cc: Linda Knippers <linda.knippers@hpe.com>

> > ---

> > v2: Send notifications for the clearing case

> > ---

> 

> This looks good to me, I've applied it, but I also want to extend the

> ndctl unit tests to cover this mechanism.


Right.  For the time being, would you mind to use the attached test
program for your sanity tests?  It simply monitors sysfs notifications
and prints badblocks info...  Sorry for inconvenience.

Since I am not familiar with the ndctl unit tests and I am traveling for
the rest of the month, I may have to look into it after I am back.

Thanks!
-Toshi
/*
 * Copyright (C) 2017  Hewlett Packard Enterprise Development LP
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h> 
#include <sys/stat.h> 

int main(int argc, char **argv)
{
	int fd, ret;
	fd_set fds;
	char buf[256];

	if (argc != 2) {
		printf("USAGE: test_sysfs_notify badblocks-path\n");
		exit(1);
	}

	if ((fd = open(argv[1], O_RDONLY)) < 0) {
		printf("Unable to open %s\n", argv[1]);
		exit(1);
	}
	printf("Monitoring %s - ctl-c to stop\n", argv[1]);

	while (1) {
		memset(buf, 0, sizeof(buf));
		ret = lseek(fd, 0, SEEK_SET);
		ret = read(fd, buf, sizeof(buf));
		printf("%s\n", buf);

		FD_ZERO(&fds);
		FD_SET(fd, &fds);

		ret = select(fd + 1, NULL, NULL, &fds, NULL);
		if (ret <= 0) {
			printf("error (%d)\n", ret);
			exit(1);
		} else if (FD_ISSET(fd, &fds)) {
			printf("NOTIFIED!!\n");
		}
	}

	close(fd);
}
Dan Williams June 17, 2017, 12:46 a.m. UTC | #3
On Fri, Jun 16, 2017 at 5:35 PM, Kani, Toshimitsu <toshi.kani@hpe.com> wrote:
>> On Mon, Jun 12, 2017 at 3:25 PM, Toshi Kani <toshi.kani@hpe.com> wrote:
>> > Sysfs "badblocks" information may be updated during run-time that:
>> >  - MCE, SCI, and sysfs "scrub" may add new bad blocks
>> >  - Writes and ioctl() may clear bad blocks
>> >
>> > Add support to send sysfs notifications to sysfs "badblocks" file
>> > under region and pmem directories when their badblocks information
>> > is re-evaluated (but is not necessarily changed) during run-time.
>> >
>> > Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
>> > Cc: Dan Williams <dan.j.williams@intel.com>
>> > Cc: Vishal Verma <vishal.l.verma@intel.com>
>> > Cc: Linda Knippers <linda.knippers@hpe.com>
>> > ---
>> > v2: Send notifications for the clearing case
>> > ---
>>
>> This looks good to me, I've applied it, but I also want to extend the
>> ndctl unit tests to cover this mechanism.
>
> Right.  For the time being, would you mind to use the attached test
> program for your sanity tests?  It simply monitors sysfs notifications
> and prints badblocks info...  Sorry for inconvenience.
>
> Since I am not familiar with the ndctl unit tests and I am traveling for
> the rest of the month, I may have to look into it after I am back.
>

No worries, I'll take a look at integrating this. Have a nice trip!
Kani, Toshi June 17, 2017, 12:51 a.m. UTC | #4
> On Fri, Jun 16, 2017 at 5:35 PM, Kani, Toshimitsu <toshi.kani@hpe.com> wrote:
> >> On Mon, Jun 12, 2017 at 3:25 PM, Toshi Kani <toshi.kani@hpe.com> wrote:
> >> > Sysfs "badblocks" information may be updated during run-time that:
> >> >  - MCE, SCI, and sysfs "scrub" may add new bad blocks
> >> >  - Writes and ioctl() may clear bad blocks
> >> >
> >> > Add support to send sysfs notifications to sysfs "badblocks" file
> >> > under region and pmem directories when their badblocks information
> >> > is re-evaluated (but is not necessarily changed) during run-time.
> >> >
> >> > Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
> >> > Cc: Dan Williams <dan.j.williams@intel.com>
> >> > Cc: Vishal Verma <vishal.l.verma@intel.com>
> >> > Cc: Linda Knippers <linda.knippers@hpe.com>
> >> > ---
> >> > v2: Send notifications for the clearing case
> >> > ---
> >>
> >> This looks good to me, I've applied it, but I also want to extend the
> >> ndctl unit tests to cover this mechanism.
> >
> > Right.  For the time being, would you mind to use the attached test
> > program for your sanity tests?  It simply monitors sysfs notifications
> > and prints badblocks info...  Sorry for inconvenience.
> >
> > Since I am not familiar with the ndctl unit tests and I am traveling for
> > the rest of the month, I may have to look into it after I am back.
> >
> 
> No worries, I'll take a look at integrating this. Have a nice trip!

Thanks Dan!!  I really appreciate it! 
-Toshi
Dan Williams June 30, 2017, 4:47 p.m. UTC | #5
On Mon, Jun 12, 2017 at 3:25 PM, Toshi Kani <toshi.kani@hpe.com> wrote:
> Sysfs "badblocks" information may be updated during run-time that:
>  - MCE, SCI, and sysfs "scrub" may add new bad blocks
>  - Writes and ioctl() may clear bad blocks
>
> Add support to send sysfs notifications to sysfs "badblocks" file
> under region and pmem directories when their badblocks information
> is re-evaluated (but is not necessarily changed) during run-time.
>
> Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Vishal Verma <vishal.l.verma@intel.com>
> Cc: Linda Knippers <linda.knippers@hpe.com>
> ---
> v2: Send notifications for the clearing case
> ---
>  drivers/nvdimm/bus.c    |    3 +++
>  drivers/nvdimm/nd.h     |    1 +
>  drivers/nvdimm/pmem.c   |   14 ++++++++++++++
>  drivers/nvdimm/pmem.h   |    1 +
>  drivers/nvdimm/region.c |   12 ++++++++++--
>  5 files changed, 29 insertions(+), 2 deletions(-)
>
[..]
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index c544d46..6c14c72 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
[..]
> @@ -377,6 +379,13 @@ static int pmem_attach_disk(struct device *dev,
>
>         revalidate_disk(disk);
>
> +       pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd,
> +                                         "badblocks");
> +       if (pmem->bb_state)
> +               sysfs_put(pmem->bb_state);

Sorry I missed this on the first review, but this looks broken. We
need to hold the reference for as long as we might trigger
notifications, so the sysfs_put() should wait until
pmem_release_disk().

[..]
> diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
> index 869a886..ca94029 100644
> --- a/drivers/nvdimm/region.c
> +++ b/drivers/nvdimm/region.c
> @@ -58,10 +58,16 @@ static int nd_region_probe(struct device *dev)
>
>                 if (devm_init_badblocks(dev, &nd_region->bb))
>                         return -ENODEV;
> +               nd_region->bb_state = sysfs_get_dirent(nd_region->dev.kobj.sd,
> +                                                      "badblocks");
> +               if (nd_region->bb_state)
> +                       sysfs_put(nd_region->bb_state);

...same here. This should wait until we tear down the region.

I'll take a look at an incremental fix patch.
Kani, Toshi July 1, 2017, 12:41 a.m. UTC | #6
> > Sysfs "badblocks" information may be updated during run-time that:
> >  - MCE, SCI, and sysfs "scrub" may add new bad blocks
> >  - Writes and ioctl() may clear bad blocks
> >
> > Add support to send sysfs notifications to sysfs "badblocks" file
> > under region and pmem directories when their badblocks information
> > is re-evaluated (but is not necessarily changed) during run-time.
> >
> > Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
> > Cc: Dan Williams <dan.j.williams@intel.com>
> > Cc: Vishal Verma <vishal.l.verma@intel.com>
> > Cc: Linda Knippers <linda.knippers@hpe.com>
> > ---
> > v2: Send notifications for the clearing case
> > ---
> >  drivers/nvdimm/bus.c    |    3 +++
> >  drivers/nvdimm/nd.h     |    1 +
> >  drivers/nvdimm/pmem.c   |   14 ++++++++++++++
> >  drivers/nvdimm/pmem.h   |    1 +
> >  drivers/nvdimm/region.c |   12 ++++++++++--
> >  5 files changed, 29 insertions(+), 2 deletions(-)
> >
> [..]
> > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> > index c544d46..6c14c72 100644
> > --- a/drivers/nvdimm/pmem.c
> > +++ b/drivers/nvdimm/pmem.c
> [..]
> > @@ -377,6 +379,13 @@ static int pmem_attach_disk(struct device *dev,
> >
> >         revalidate_disk(disk);
> >
> > +       pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd,
> > +                                         "badblocks");
> > +       if (pmem->bb_state)
> > +               sysfs_put(pmem->bb_state);
> 
> Sorry I missed this on the first review, but this looks broken. We
> need to hold the reference for as long as we might trigger
> notifications, so the sysfs_put() should wait until
> pmem_release_disk().

I see.
 
> [..]
> > diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
> > index 869a886..ca94029 100644
> > --- a/drivers/nvdimm/region.c
> > +++ b/drivers/nvdimm/region.c
> > @@ -58,10 +58,16 @@ static int nd_region_probe(struct device *dev)
> >
> >                 if (devm_init_badblocks(dev, &nd_region->bb))
> >                         return -ENODEV;
> > +               nd_region->bb_state = sysfs_get_dirent(nd_region->dev.kobj.sd,
> > +                                                      "badblocks");
> > +               if (nd_region->bb_state)
> > +                       sysfs_put(nd_region->bb_state);
> 
> ...same here. This should wait until we tear down the region.
> 
> I'll take a look at an incremental fix patch.

Thanks Dan!
-Toshi
diff mbox

Patch

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index e9361bf..63ce50d 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -198,6 +198,9 @@  static int nvdimm_clear_badblocks_region(struct device *dev, void *data)
 	sector = (ctx->phys - nd_region->ndr_start) / 512;
 	badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512);
 
+	if (nd_region->bb_state)
+		sysfs_notify_dirent(nd_region->bb_state);
+
 	return 0;
 }
 
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 03852d7..4bb57ff 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -155,6 +155,7 @@  struct nd_region {
 	u64 ndr_start;
 	int id, num_lanes, ro, numa_node;
 	void *provider_data;
+	struct kernfs_node *bb_state;
 	struct badblocks bb;
 	struct nd_interleave_set *nd_set;
 	struct nd_percpu_lane __percpu *lane;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d46..6c14c72 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -68,6 +68,8 @@  static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
 				(unsigned long long) sector, cleared,
 				cleared > 1 ? "s" : "");
 		badblocks_clear(&pmem->bb, sector, cleared);
+		if (pmem->bb_state)
+			sysfs_notify_dirent(pmem->bb_state);
 	}
 
 	invalidate_pmem(pmem->virt_addr + offset, len);
@@ -377,6 +379,13 @@  static int pmem_attach_disk(struct device *dev,
 
 	revalidate_disk(disk);
 
+	pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd,
+					  "badblocks");
+	if (pmem->bb_state)
+		sysfs_put(pmem->bb_state);
+	else
+		dev_warn(dev, "sysfs_get_dirent 'badblocks' failed\n");
+
 	return 0;
 }
 
@@ -428,6 +437,7 @@  static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
 	struct nd_namespace_io *nsio;
 	struct resource res;
 	struct badblocks *bb;
+	struct kernfs_node *bb_state;
 
 	if (event != NVDIMM_REVALIDATE_POISON)
 		return;
@@ -439,11 +449,13 @@  static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
 		nd_region = to_nd_region(ndns->dev.parent);
 		nsio = to_nd_namespace_io(&ndns->dev);
 		bb = &nsio->bb;
+		bb_state = NULL;
 	} else {
 		struct pmem_device *pmem = dev_get_drvdata(dev);
 
 		nd_region = to_region(pmem);
 		bb = &pmem->bb;
+		bb_state = pmem->bb_state;
 
 		if (is_nd_pfn(dev)) {
 			struct nd_pfn *nd_pfn = to_nd_pfn(dev);
@@ -463,6 +475,8 @@  static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
 	res.start = nsio->res.start + offset;
 	res.end = nsio->res.end - end_trunc;
 	nvdimm_badblocks_populate(nd_region, bb, &res);
+	if (bb_state)
+		sysfs_notify_dirent(bb_state);
 }
 
 MODULE_ALIAS("pmem");
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 7f4dbd7..c5917f0 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -17,6 +17,7 @@  struct pmem_device {
 	size_t			size;
 	/* trim size when namespace capacity has been section aligned */
 	u32			pfn_pad;
+	struct kernfs_node	*bb_state;
 	struct badblocks	bb;
 	struct dax_device	*dax_dev;
 	struct gendisk		*disk;
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 869a886..ca94029 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -58,10 +58,16 @@  static int nd_region_probe(struct device *dev)
 
 		if (devm_init_badblocks(dev, &nd_region->bb))
 			return -ENODEV;
+		nd_region->bb_state = sysfs_get_dirent(nd_region->dev.kobj.sd,
+						       "badblocks");
+		if (nd_region->bb_state)
+			sysfs_put(nd_region->bb_state);
+		else
+			dev_warn(&nd_region->dev,
+				"sysfs_get_dirent 'badblocks' failed\n");
 		ndr_res.start = nd_region->ndr_start;
 		ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1;
-		nvdimm_badblocks_populate(nd_region,
-				&nd_region->bb, &ndr_res);
+		nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
 	}
 
 	nd_region->btt_seed = nd_btt_create(nd_region);
@@ -126,6 +132,8 @@  static void nd_region_notify(struct device *dev, enum nvdimm_event event)
 				nd_region->ndr_size - 1;
 			nvdimm_badblocks_populate(nd_region,
 					&nd_region->bb, &res);
+			if (nd_region->bb_state)
+				sysfs_notify_dirent(nd_region->bb_state);
 		}
 	}
 	device_for_each_child(dev, &event, child_notify);