diff mbox

[v6,2/3] ndctl: add list-errors support

Message ID 149393454673.2642.10490569511477839936.stgit@djiang5-desk3.ch.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Jiang May 4, 2017, 9:49 p.m. UTC
Adding list-errors command to support displaying of all badblocks in
relation to the device rather than the region. This allows the user to
know what badblocks to pass in when doing clear-error calls.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 Documentation/ndctl-list-errors.txt |   26 +++++++
 builtin.h                           |    1 
 ndctl/clear-error.c                 |  133 ++++++++++++++++++++++++++++++++---
 ndctl/ndctl.c                       |    1 
 4 files changed, 151 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/ndctl-list-errors.txt

Comments

Kani, Toshi May 5, 2017, 5:07 p.m. UTC | #1
On Thu, 2017-05-04 at 14:49 -0700, Dave Jiang wrote:
> Adding list-errors command to support displaying of all badblocks in

> relation to the device rather than the region. This allows the user

> to know what badblocks to pass in when doing clear-error calls.

> 

> Signed-off-by: Dave Jiang <dave.jiang@intel.com>

> ---

>  Documentation/ndctl-list-errors.txt |   26 +++++++

>  builtin.h                           |    1 

>  ndctl/clear-error.c                 |  133

> ++++++++++++++++++++++++++++++++---

>  ndctl/ndctl.c                       |    1 

>  4 files changed, 151 insertions(+), 10 deletions(-)

>  create mode 100644 Documentation/ndctl-list-errors.txt

> 

> diff --git a/Documentation/ndctl-list-errors.txt

> b/Documentation/ndctl-list-errors.txt

> new file mode 100644

> index 0000000..f831ba0

> --- /dev/null

> +++ b/Documentation/ndctl-list-errors.txt

> @@ -0,0 +1,26 @@

> +ndctl-list-errors(1)

> +====================

> +

> +NAME

> +----

> +ndctl-list-errors - list badblocks specifically in relation to a

> device

> +

> +SYNOPSIS

> +--------

> +[verse]

> +'ndctl list-errors' [<options>]

> +

> +EXAMPLES

> +--------

> +

> +List bad blocks for the provided device

> +[verse]

> +ndctl list-errors -f /dev/dax0.0

> +

> +List all badblocks for device /dev/dax0.0.


Hi Dave,

I am not getting sensible values from list-errors.  Also, please
describe the output format of this command in the document.

# cat /sys/bus/nd/devices/region0/size
17179869184

# cat /sys/class/dax/dax0.0/size
16909336576

# cat /sys/bus/nd/devices/region0/badblocks
1048576 1
1572864 1

# ndctl list-errors -f /dev/dax0.0
0 3145729
0 3670017

Thanks,
-Toshi
Dave Jiang May 5, 2017, 5:14 p.m. UTC | #2
On 05/05/2017 10:07 AM, Kani, Toshimitsu wrote:
> On Thu, 2017-05-04 at 14:49 -0700, Dave Jiang wrote:
>> Adding list-errors command to support displaying of all badblocks in
>> relation to the device rather than the region. This allows the user
>> to know what badblocks to pass in when doing clear-error calls.
>>
>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>> ---
>>  Documentation/ndctl-list-errors.txt |   26 +++++++
>>  builtin.h                           |    1
>>  ndctl/clear-error.c                 |  133
>> ++++++++++++++++++++++++++++++++---
>>  ndctl/ndctl.c                       |    1
>>  4 files changed, 151 insertions(+), 10 deletions(-)
>>  create mode 100644 Documentation/ndctl-list-errors.txt
>>
>> diff --git a/Documentation/ndctl-list-errors.txt
>> b/Documentation/ndctl-list-errors.txt
>> new file mode 100644
>> index 0000000..f831ba0
>> --- /dev/null
>> +++ b/Documentation/ndctl-list-errors.txt
>> @@ -0,0 +1,26 @@
>> +ndctl-list-errors(1)
>> +====================
>> +
>> +NAME
>> +----
>> +ndctl-list-errors - list badblocks specifically in relation to a
>> device
>> +
>> +SYNOPSIS
>> +--------
>> +[verse]
>> +'ndctl list-errors' [<options>]
>> +
>> +EXAMPLES
>> +--------
>> +
>> +List bad blocks for the provided device
>> +[verse]
>> +ndctl list-errors -f /dev/dax0.0
>> +
>> +List all badblocks for device /dev/dax0.0.
> 
> Hi Dave,
> 
> I am not getting sensible values from list-errors.  Also, please
> describe the output format of this command in the document.
> 
> # cat /sys/bus/nd/devices/region0/size
> 17179869184
> 
> # cat /sys/class/dax/dax0.0/size
> 16909336576
> 
> # cat /sys/bus/nd/devices/region0/badblocks
> 1048576 1
> 1572864 1
> 
> # ndctl list-errors -f /dev/dax0.0
> 0 3145729
> 0 3670017

That is very strange. It looks correct with nfit_test for me. How do I
reproduce your case or do I actually need actual hardware?


> 
> Thanks,
> -Toshi
>
Kani, Toshi May 5, 2017, 5:21 p.m. UTC | #3
On Fri, 2017-05-05 at 10:14 -0700, Dave Jiang wrote:
> 

> On 05/05/2017 10:07 AM, Kani, Toshimitsu wrote:

> > On Thu, 2017-05-04 at 14:49 -0700, Dave Jiang wrote:

> > > Adding list-errors command to support displaying of all badblocks

> > > in relation to the device rather than the region. This allows the

> > > user to know what badblocks to pass in when doing clear-error

> > > calls.

> > > 

> > > Signed-off-by: Dave Jiang <dave.jiang@intel.com>

> > > ---

> > >  Documentation/ndctl-list-errors.txt |   26 +++++++

> > >  builtin.h                           |    1

> > >  ndctl/clear-error.c                 |  133

> > > ++++++++++++++++++++++++++++++++---

> > >  ndctl/ndctl.c                       |    1

> > >  4 files changed, 151 insertions(+), 10 deletions(-)

> > >  create mode 100644 Documentation/ndctl-list-errors.txt

> > > 

> > > diff --git a/Documentation/ndctl-list-errors.txt

> > > b/Documentation/ndctl-list-errors.txt

> > > new file mode 100644

> > > index 0000000..f831ba0

> > > --- /dev/null

> > > +++ b/Documentation/ndctl-list-errors.txt

> > > @@ -0,0 +1,26 @@

> > > +ndctl-list-errors(1)

> > > +====================

> > > +

> > > +NAME

> > > +----

> > > +ndctl-list-errors - list badblocks specifically in relation to a

> > > device

> > > +

> > > +SYNOPSIS

> > > +--------

> > > +[verse]

> > > +'ndctl list-errors' [<options>]

> > > +

> > > +EXAMPLES

> > > +--------

> > > +

> > > +List bad blocks for the provided device

> > > +[verse]

> > > +ndctl list-errors -f /dev/dax0.0

> > > +

> > > +List all badblocks for device /dev/dax0.0.

> > 

> > Hi Dave,

> > 

> > I am not getting sensible values from list-errors.  Also, please

> > describe the output format of this command in the document.

> > 

> > # cat /sys/bus/nd/devices/region0/size

> > 17179869184

> > 

> > # cat /sys/class/dax/dax0.0/size

> > 16909336576

> > 

> > # cat /sys/bus/nd/devices/region0/badblocks

> > 1048576 1

> > 1572864 1

> > 

> > # ndctl list-errors -f /dev/dax0.0

> > 0 3145729

> > 0 3670017

> 

> That is very strange. It looks correct with nfit_test for me. How do

> I reproduce your case or do I actually need actual hardware?


Yes, my steps need a hardware, but this badblocks handling itself
should be platform-independent.  I will check to see why I got these
values.

Thanks,
-Toshi
Dave Jiang May 5, 2017, 5:27 p.m. UTC | #4
On 05/05/2017 10:21 AM, Kani, Toshimitsu wrote:
> On Fri, 2017-05-05 at 10:14 -0700, Dave Jiang wrote:
>>
>> On 05/05/2017 10:07 AM, Kani, Toshimitsu wrote:
>>> On Thu, 2017-05-04 at 14:49 -0700, Dave Jiang wrote:
>>>> Adding list-errors command to support displaying of all badblocks
>>>> in relation to the device rather than the region. This allows the
>>>> user to know what badblocks to pass in when doing clear-error
>>>> calls.
>>>>
>>>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>>>> ---
>>>>  Documentation/ndctl-list-errors.txt |   26 +++++++
>>>>  builtin.h                           |    1
>>>>  ndctl/clear-error.c                 |  133
>>>> ++++++++++++++++++++++++++++++++---
>>>>  ndctl/ndctl.c                       |    1
>>>>  4 files changed, 151 insertions(+), 10 deletions(-)
>>>>  create mode 100644 Documentation/ndctl-list-errors.txt
>>>>
>>>> diff --git a/Documentation/ndctl-list-errors.txt
>>>> b/Documentation/ndctl-list-errors.txt
>>>> new file mode 100644
>>>> index 0000000..f831ba0
>>>> --- /dev/null
>>>> +++ b/Documentation/ndctl-list-errors.txt
>>>> @@ -0,0 +1,26 @@
>>>> +ndctl-list-errors(1)
>>>> +====================
>>>> +
>>>> +NAME
>>>> +----
>>>> +ndctl-list-errors - list badblocks specifically in relation to a
>>>> device
>>>> +
>>>> +SYNOPSIS
>>>> +--------
>>>> +[verse]
>>>> +'ndctl list-errors' [<options>]
>>>> +
>>>> +EXAMPLES
>>>> +--------
>>>> +
>>>> +List bad blocks for the provided device
>>>> +[verse]
>>>> +ndctl list-errors -f /dev/dax0.0
>>>> +
>>>> +List all badblocks for device /dev/dax0.0.
>>>
>>> Hi Dave,
>>>
>>> I am not getting sensible values from list-errors.  Also, please
>>> describe the output format of this command in the document.
>>>
>>> # cat /sys/bus/nd/devices/region0/size
>>> 17179869184
>>>
>>> # cat /sys/class/dax/dax0.0/size
>>> 16909336576
>>>
>>> # cat /sys/bus/nd/devices/region0/badblocks
>>> 1048576 1
>>> 1572864 1
>>>
>>> # ndctl list-errors -f /dev/dax0.0
>>> 0 3145729
>>> 0 3670017
>>
>> That is very strange. It looks correct with nfit_test for me. How do
>> I reproduce your case or do I actually need actual hardware?
> 
> Yes, my steps need a hardware, but this badblocks handling itself
> should be platform-independent.  I will check to see why I got these
> values.

Thanks! what is your region/resource and dax/resource?
Kani, Toshi May 5, 2017, 5:35 p.m. UTC | #5
On Fri, 2017-05-05 at 10:27 -0700, Dave Jiang wrote:
> 

> On 05/05/2017 10:21 AM, Kani, Toshimitsu wrote:

> > On Fri, 2017-05-05 at 10:14 -0700, Dave Jiang wrote:

> > > 

> > > On 05/05/2017 10:07 AM, Kani, Toshimitsu wrote:

> > > > On Thu, 2017-05-04 at 14:49 -0700, Dave Jiang wrote:

> > > > > Adding list-errors command to support displaying of all

> > > > > badblocks in relation to the device rather than the region.

> > > > > This allows the user to know what badblocks to pass in when

> > > > > doing clear-error calls.

> > > > > 

> > > > > Signed-off-by: Dave Jiang <dave.jiang@intel.com>

> > > > > ---

> > > > >  Documentation/ndctl-list-errors.txt |   26 +++++++

> > > > >  builtin.h                           |    1

> > > > >  ndctl/clear-error.c                 |  133

> > > > > ++++++++++++++++++++++++++++++++---

> > > > >  ndctl/ndctl.c                       |    1

> > > > >  4 files changed, 151 insertions(+), 10 deletions(-)

> > > > >  create mode 100644 Documentation/ndctl-list-errors.txt

> > > > > 

> > > > > diff --git a/Documentation/ndctl-list-errors.txt

> > > > > b/Documentation/ndctl-list-errors.txt

> > > > > new file mode 100644

> > > > > index 0000000..f831ba0

> > > > > --- /dev/null

> > > > > +++ b/Documentation/ndctl-list-errors.txt

> > > > > @@ -0,0 +1,26 @@

> > > > > +ndctl-list-errors(1)

> > > > > +====================

> > > > > +

> > > > > +NAME

> > > > > +----

> > > > > +ndctl-list-errors - list badblocks specifically in relation

> > > > > to a

> > > > > device

> > > > > +

> > > > > +SYNOPSIS

> > > > > +--------

> > > > > +[verse]

> > > > > +'ndctl list-errors' [<options>]

> > > > > +

> > > > > +EXAMPLES

> > > > > +--------

> > > > > +

> > > > > +List bad blocks for the provided device

> > > > > +[verse]

> > > > > +ndctl list-errors -f /dev/dax0.0

> > > > > +

> > > > > +List all badblocks for device /dev/dax0.0.

> > > > 

> > > > Hi Dave,

> > > > 

> > > > I am not getting sensible values from list-errors.  Also,

> > > > please describe the output format of this command in the

> > > > document.

> > > > 

> > > > # cat /sys/bus/nd/devices/region0/size

> > > > 17179869184

> > > > 

> > > > # cat /sys/class/dax/dax0.0/size

> > > > 16909336576

> > > > 

> > > > # cat /sys/bus/nd/devices/region0/badblocks

> > > > 1048576 1

> > > > 1572864 1

> > > > 

> > > > # ndctl list-errors -f /dev/dax0.0

> > > > 0 3145729

> > > > 0 3670017

> > > 

> > > That is very strange. It looks correct with nfit_test for me. How

> > > do I reproduce your case or do I actually need actual hardware?

> > 

> > Yes, my steps need a hardware, but this badblocks handling itself

> > should be platform-independent.  I will check to see why I got

> > these values.

> 

> Thanks! what is your region/resource and dax/resource?


See attached. Let me know if you need more info.

Thanks,
-Toshi
Kani, Toshi May 5, 2017, 5:49 p.m. UTC | #6
On Fri, 2017-05-05 at 11:35 -0600, Toshi Kani wrote:
> On Fri, 2017-05-05 at 10:27 -0700, Dave Jiang wrote:

> > 

> > On 05/05/2017 10:21 AM, Kani, Toshimitsu wrote:

> > > On Fri, 2017-05-05 at 10:14 -0700, Dave Jiang wrote:

> > > > 

> > > > On 05/05/2017 10:07 AM, Kani, Toshimitsu wrote:

> > > > > On Thu, 2017-05-04 at 14:49 -0700, Dave Jiang wrote:

> > > > > > Adding list-errors command to support displaying of all

> > > > > > badblocks in relation to the device rather than the region.

> > > > > > This allows the user to know what badblocks to pass in when

> > > > > > doing clear-error calls.

> > > > > > 

> > > > > > Signed-off-by: Dave Jiang <dave.jiang@intel.com>

> > > > > > ---

> > > > > >  Documentation/ndctl-list-errors.txt |   26 +++++++

> > > > > >  builtin.h                           |    1

> > > > > >  ndctl/clear-error.c                 |  133

> > > > > > ++++++++++++++++++++++++++++++++---

> > > > > >  ndctl/ndctl.c                       |    1

> > > > > >  4 files changed, 151 insertions(+), 10 deletions(-)

> > > > > >  create mode 100644 Documentation/ndctl-list-errors.txt

> > > > > > 

> > > > > > diff --git a/Documentation/ndctl-list-errors.txt

> > > > > > b/Documentation/ndctl-list-errors.txt

> > > > > > new file mode 100644

> > > > > > index 0000000..f831ba0

> > > > > > --- /dev/null

> > > > > > +++ b/Documentation/ndctl-list-errors.txt

> > > > > > @@ -0,0 +1,26 @@

> > > > > > +ndctl-list-errors(1)

> > > > > > +====================

> > > > > > +

> > > > > > +NAME

> > > > > > +----

> > > > > > +ndctl-list-errors - list badblocks specifically in

> > > > > > relation

> > > > > > to a

> > > > > > device

> > > > > > +

> > > > > > +SYNOPSIS

> > > > > > +--------

> > > > > > +[verse]

> > > > > > +'ndctl list-errors' [<options>]

> > > > > > +

> > > > > > +EXAMPLES

> > > > > > +--------

> > > > > > +

> > > > > > +List bad blocks for the provided device

> > > > > > +[verse]

> > > > > > +ndctl list-errors -f /dev/dax0.0

> > > > > > +

> > > > > > +List all badblocks for device /dev/dax0.0.

> > > > > 

> > > > > Hi Dave,

> > > > > 

> > > > > I am not getting sensible values from list-errors.  Also,

> > > > > please describe the output format of this command in the

> > > > > document.

> > > > > 

> > > > > # cat /sys/bus/nd/devices/region0/size

> > > > > 17179869184

> > > > > 

> > > > > # cat /sys/class/dax/dax0.0/size

> > > > > 16909336576

> > > > > 

> > > > > # cat /sys/bus/nd/devices/region0/badblocks

> > > > > 1048576 1

> > > > > 1572864 1

> > > > > 

> > > > > # ndctl list-errors -f /dev/dax0.0

> > > > > 0 3145729

> > > > > 0 3670017

> > > > 

> > > > That is very strange. It looks correct with nfit_test for me.

> > > > How do I reproduce your case or do I actually need actual

> > > > hardware?

> > > 

> > > Yes, my steps need a hardware, but this badblocks handling itself

> > > should be platform-independent.  I will check to see why I got

> > > these values.

> > 

> > Thanks! what is your region/resource and dax/resource?

> 

> See attached. Let me know if you need more info.


dax_begin and dax_end get -1 in print_dax_badblocks() because
ndctl_dax_get_resource() and ndctl_dax_get_size() failed.  I think this
is due to the numbering of sysfs dax files, which Dan an I talked about
yesterday.

Thanks,
-Toshi
diff mbox

Patch

diff --git a/Documentation/ndctl-list-errors.txt b/Documentation/ndctl-list-errors.txt
new file mode 100644
index 0000000..f831ba0
--- /dev/null
+++ b/Documentation/ndctl-list-errors.txt
@@ -0,0 +1,26 @@ 
+ndctl-list-errors(1)
+====================
+
+NAME
+----
+ndctl-list-errors - list badblocks specifically in relation to a device
+
+SYNOPSIS
+--------
+[verse]
+'ndctl list-errors' [<options>]
+
+EXAMPLES
+--------
+
+List bad blocks for the provided device
+[verse]
+ndctl list-errors -f /dev/dax0.0
+
+List all badblocks for device /dev/dax0.0.
+
+OPTIONS
+-------
+-f::
+--file::
+	The device/file to show the badblocks.
diff --git a/builtin.h b/builtin.h
index f522d00..655904f 100644
--- a/builtin.h
+++ b/builtin.h
@@ -31,4 +31,5 @@  int cmd_test(int argc, const char **argv, void *ctx);
 int cmd_bat(int argc, const char **argv, void *ctx);
 #endif
 int cmd_clear_error(int argc, const char **argv, void *ctx);
+int cmd_list_errors(int argc, const char **argv, void *ctx);
 #endif /* _NDCTL_BUILTIN_H_ */
diff --git a/ndctl/clear-error.c b/ndctl/clear-error.c
index 29cd471..e15e821 100644
--- a/ndctl/clear-error.c
+++ b/ndctl/clear-error.c
@@ -131,29 +131,37 @@  static int check_user_input_range(struct ndctl_dax *dax,
 	return 0;
 }
 
-static int clear_error(struct clear_err *ce)
+static int check_dax_dev(const char *dev_name)
 {
-	struct stat stats;
 	int rc;
-	char dev_name[256];
-	uint64_t dev_base;
-	unsigned long long start;
-	unsigned int len;
-
-	strncpy(dev_name, ce->dev_name, 256);
+	struct stat stats;
 
 	rc = stat(dev_name, &stats);
 	if (rc < 0) {
+		rc = -errno;
 		perror("stat failed");
 		error("Unable to stat %s\n", dev_name);
-		return -1;
+		return rc;
 	}
 
 	if (!S_ISCHR(stats.st_mode)) {
 		error("%s not DAX device\n", dev_name);
-		return -1;
+		return -ENODEV;
 	}
 
+	return 0;
+}
+
+static int clear_error(struct clear_err *ce)
+{
+	int rc;
+	char dev_name[256];
+	uint64_t dev_base;
+	unsigned long long start;
+	unsigned int len;
+
+	strncpy(dev_name, ce->dev_name, 256);
+
 	rc = ndctl_new(&ce->ctx);
 	if (rc)
 		return rc;
@@ -231,9 +239,114 @@  int cmd_clear_error(int argc, const char **argv, void *ctx)
 		return -EINVAL;
 	}
 
+	if ((rc = check_dax_dev(clear_err.dev_name)) < 0)
+		return rc;
+
 	rc = clear_error(&clear_err);
 	if (rc)
 		return rc;
 
 	return 0;
 }
+
+static int print_dax_badblocks(struct ndctl_dax *dax)
+{
+	struct ndctl_region *region = ndctl_dax_get_region(dax);
+	struct badblock *bb;
+	uint64_t region_begin;
+	uint64_t dax_begin, dax_end;
+
+	region_begin = ndctl_region_get_resource(region);
+	dax_begin = ndctl_dax_get_resource(dax);
+	dax_end = dax_begin + ndctl_dax_get_size(dax) - 1;
+
+	/* check region */
+	ndctl_region_badblock_foreach(region, bb) {
+		uint64_t bb_begin, bb_end;
+		uint64_t begin, end;
+
+		bb_begin = region_begin + (bb->offset << 9);
+		bb_end = bb_begin + (bb->len << 9) - 1;
+
+		if (bb_begin <= dax_begin)
+			begin = dax_begin;
+		else if (bb_begin < dax_end)
+			begin = bb_begin;
+		else
+			begin = 0;
+
+		if (begin) {
+			if (bb_end <= dax_end)
+				end = bb_end;
+			else
+				end = dax_end;
+		} else
+			continue;
+
+		printf("%llu %u\n",
+			(unsigned long long)(begin - dax_begin) >> 9,
+			(unsigned int)(end - begin + 1) >> 9);
+	}
+
+	return 0;
+}
+
+static int list_errors(struct clear_err *ce)
+{
+	int rc;
+	char dev_name[256];
+
+	strncpy(dev_name, ce->dev_name, 256);
+
+	rc = ndctl_new(&ce->ctx);
+	if (rc)
+		return rc;
+
+	if ((rc = match_dev(ce, dev_name)) < 0)
+		goto cleanup;
+
+	if ((rc = print_dax_badblocks(ce->dax)) < 0)
+		goto cleanup;
+
+	rc = 0;
+
+cleanup:
+	ndctl_unref(ce->ctx);
+	return rc;
+}
+
+int cmd_list_errors(int argc, const char **argv, void *ctx)
+{
+	int i, rc;
+	const char * const u[] = {
+		"ndctl clear-error [<options>]",
+		NULL
+	};
+	const struct option options[] = {
+		OPT_STRING('f', "file", &clear_err.dev_name, "device-name",
+			"device/file name to be operated on"),
+		OPT_END(),
+	};
+
+	argc = parse_options(argc, argv, options, u, 0);
+
+	for (i = 0; i < argc; i++)
+		error("Unknown parameter \"%s\"\n", argv[i]);
+
+	if (argc)
+		usage_with_options(u, options);
+
+	if (!clear_err.dev_name) {
+		error("Missing device/file name passed in\n");
+		usage_with_options(u, options);
+		return -EINVAL;
+	}
+
+	if ((rc = check_dax_dev(clear_err.dev_name)) < 0)
+		return rc;
+
+	if ((rc = list_errors(&clear_err)) < 0)
+		return rc;
+
+	return 0;
+}
diff --git a/ndctl/ndctl.c b/ndctl/ndctl.c
index 144dc23..a698096 100644
--- a/ndctl/ndctl.c
+++ b/ndctl/ndctl.c
@@ -68,6 +68,7 @@  static struct cmd_struct commands[] = {
 	{ "init-labels", cmd_init_labels },
 	{ "check-labels", cmd_check_labels },
 	{ "clear-error", cmd_clear_error },
+	{ "list-errors", cmd_list_errors },
 	{ "list", cmd_list },
 	{ "help", cmd_help },
 	#ifdef ENABLE_TEST