diff mbox

libnvdimm: check and clear poison before writing to pmem

Message ID 147873364251.45471.9613527969861188296.stgit@djiang5-desk3.ch.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Jiang Nov. 9, 2016, 11:20 p.m. UTC
We need to clear any poison when we are writing to pmem. The granularity
will be sector size. If it's less then we can't do anything about it
barring corruption.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/nvdimm/claim.c |   20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

Comments

Dan Williams Nov. 9, 2016, 11:39 p.m. UTC | #1
On Wed, Nov 9, 2016 at 3:20 PM, Dave Jiang <dave.jiang@intel.com> wrote:
> We need to clear any poison when we are writing to pmem. The granularity
> will be sector size. If it's less then we can't do anything about it
> barring corruption.
>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/nvdimm/claim.c |   20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
> index d5dc80c..2078d25 100644
> --- a/drivers/nvdimm/claim.c
> +++ b/drivers/nvdimm/claim.c
> @@ -226,6 +226,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
>                 resource_size_t offset, void *buf, size_t size, int rw)
>  {
>         struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
> +       unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
>
>         if (unlikely(offset + size > nsio->size)) {
>                 dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
> @@ -233,12 +234,27 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
>         }
>
>         if (rw == READ) {
> -               unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
> -
>                 if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align)))
>                         return -EIO;
>                 return memcpy_from_pmem(buf, nsio->addr + offset, size);
>         } else {
> +               sector_t sector = offset / 512;
> +
> +               if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
> +                       if (IS_ALIGNED(offset, 512) &&
> +                           IS_ALIGNED(size, 512) && (size >= 512)) {
> +                               int rc;
> +
> +                               rc = nvdimm_clear_poison(&ndns->dev, offset,
> +                                                        size);
> +                               if (rc != size)
> +                                       return -EIO;
> +
> +                               badblocks_clear(&nsio->bb, sector, rc / 512);
> +                       } else
> +                               return -EIO;
> +               }
> +
>                 memcpy_to_pmem(nsio->addr + offset, buf, size);

Looks good.

After this the last piece remaining to fully close the "badblocks in
metadata" problem is to go update __nvdimm_setup_pfn().  Teach it to
walk and clear the badblocks in the range from the end of the 'pfn'
infoblock (8K offset from the start of the namespace) to the start of
data (pfn_sb->dataoff).
Verma, Vishal L Nov. 10, 2016, 12:14 a.m. UTC | #2
On 11/09, Dave Jiang wrote:
> We need to clear any poison when we are writing to pmem. The granularity
> will be sector size. If it's less then we can't do anything about it
> barring corruption.
> 
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/nvdimm/claim.c |   20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
> index d5dc80c..2078d25 100644
> --- a/drivers/nvdimm/claim.c
> +++ b/drivers/nvdimm/claim.c
> @@ -226,6 +226,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
>  		resource_size_t offset, void *buf, size_t size, int rw)
>  {
>  	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
> +	unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
>  
>  	if (unlikely(offset + size > nsio->size)) {
>  		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
> @@ -233,12 +234,27 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
>  	}
>  
>  	if (rw == READ) {
> -		unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
> -
>  		if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align)))
>  			return -EIO;
>  		return memcpy_from_pmem(buf, nsio->addr + offset, size);
>  	} else {
> +		sector_t sector = offset / 512;

Small nit, but if you move the above line to the top of the function,
you can reuse 'sector' int he rw == READ case's is_bad_pmem call too,
making it read the same as the one below.

> +
> +		if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
> +			if (IS_ALIGNED(offset, 512) &&
> +			    IS_ALIGNED(size, 512) && (size >= 512)) {

Having the size >= 512 check here feels a bit odd.. If you can refactor
this to have something near the top to simply exit out for size == 0
(that should never happen anyway), then you can simply remove this last
part of the condition. If size > 0, and if it is aligned to 512
automatically implies it will at least be 512.

> +				int rc;
> +
> +				rc = nvdimm_clear_poison(&ndns->dev, offset,
> +							 size);

nvdimm_clear_poison returns a long, rc is an int.. While at it, since
this is the only palce where 'rc' is being used, perhaps rename it to
something like 'cleared' (like in pmem_clear_poison), so that the usage
is better described.

> +				if (rc != size)
> +					return -EIO;

I'm not sure of this myself - perhaps a question to Dan - 
Is it safe to return EIO here? We have potentially cleared some poison,
and lost some data, but subsequent reads will not indicate any errors,
returning either 0s or something unpredictable. Would it be better to
simply go ahead with the write even if we weren't able to clear the
whole range?

> +
> +				badblocks_clear(&nsio->bb, sector, rc / 512);
> +			} else
> +				return -EIO;
> +		}
> +
>  		memcpy_to_pmem(nsio->addr + offset, buf, size);
>  		nvdimm_flush(to_nd_region(ndns->dev.parent));
>  	}
>
Dan Williams Nov. 10, 2016, 12:54 a.m. UTC | #3
On Wed, Nov 9, 2016 at 4:14 PM, Vishal Verma <vishal.l.verma@intel.com> wrote:
> On 11/09, Dave Jiang wrote:
>> We need to clear any poison when we are writing to pmem. The granularity
>> will be sector size. If it's less then we can't do anything about it
>> barring corruption.
>>
>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>> ---
>>  drivers/nvdimm/claim.c |   20 ++++++++++++++++++--
>>  1 file changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
>> index d5dc80c..2078d25 100644
>> --- a/drivers/nvdimm/claim.c
>> +++ b/drivers/nvdimm/claim.c
>> @@ -226,6 +226,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
>>               resource_size_t offset, void *buf, size_t size, int rw)
>>  {
>>       struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
>> +     unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
>>
>>       if (unlikely(offset + size > nsio->size)) {
>>               dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
>> @@ -233,12 +234,27 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
>>       }
>>
>>       if (rw == READ) {
>> -             unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
>> -
>>               if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align)))
>>                       return -EIO;
>>               return memcpy_from_pmem(buf, nsio->addr + offset, size);
>>       } else {
>> +             sector_t sector = offset / 512;
>
> Small nit, but if you move the above line to the top of the function,
> you can reuse 'sector' int he rw == READ case's is_bad_pmem call too,
> making it read the same as the one below.
>
>> +
>> +             if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
>> +                     if (IS_ALIGNED(offset, 512) &&
>> +                         IS_ALIGNED(size, 512) && (size >= 512)) {
>
> Having the size >= 512 check here feels a bit odd.. If you can refactor
> this to have something near the top to simply exit out for size == 0
> (that should never happen anyway), then you can simply remove this last
> part of the condition. If size > 0, and if it is aligned to 512
> automatically implies it will at least be 512.
>
>> +                             int rc;
>> +
>> +                             rc = nvdimm_clear_poison(&ndns->dev, offset,
>> +                                                      size);
>
> nvdimm_clear_poison returns a long, rc is an int.. While at it, since
> this is the only palce where 'rc' is being used, perhaps rename it to
> something like 'cleared' (like in pmem_clear_poison), so that the usage
> is better described.
>
>> +                             if (rc != size)
>> +                                     return -EIO;
>
> I'm not sure of this myself - perhaps a question to Dan -
> Is it safe to return EIO here? We have potentially cleared some poison,
> and lost some data, but subsequent reads will not indicate any errors,
> returning either 0s or something unpredictable. Would it be better to
> simply go ahead with the write even if we weren't able to clear the
> whole range?

Good point, the interface allows partial completions, so it's
reasonable to go write what we were able to clear.  However that only
shrinks the exposure not eliminate it since we could immediately crash
for some other reason before the write completes.

We would an error clearing interface that had deterministic data after
clearing to do something better, but I can get on board with writing
new data after a partial completion.
Elliott, Robert (Servers) Nov. 10, 2016, 5:43 a.m. UTC | #4
> -----Original Message-----
> From: Linux-nvdimm [mailto:linux-nvdimm-bounces@lists.01.org] On
> Behalf Of Dan Williams
> Sent: Wednesday, November 09, 2016 6:55 PM
> To: Vishal Verma <vishal.l.verma@intel.com>
> Cc: linux-nvdimm@lists.01.org
> Subject: Re: [PATCH] libnvdimm: check and clear poison before writing
> to pmem
...
> >> @@ -233,12 +234,27 @@ static int nsio_rw_bytes(struct
> nd_namespace_common *ndns,
> >
> >> +                             int rc;
> >> +
> >> +                             rc = nvdimm_clear_poison(&ndns->dev, offset,
> >> +                                                      size);
> >> +                             if (rc != size)
> >> +                                     return -EIO;
> >
> > I'm not sure of this myself - perhaps a question to Dan -
> > Is it safe to return EIO here? We have potentially cleared some
> > poison, and lost some data, but subsequent reads will not indicate
> > any errors, returning either 0s or something unpredictable. Would
> > it be better to simply go ahead with the write even if we weren't
> > able to clear the whole range?
> 
> Good point, the interface allows partial completions, so it's
> reasonable to go write what we were able to clear.  However that only
> shrinks the exposure not eliminate it since we could immediately
> crash for some other reason before the write completes.
> 
> We would [like] an error clearing interface that had deterministic data
> after clearing to do something better, but I can get on board with
> writing new data after a partial completion.

You should call badblocks_clear for the blocks for which uncorrectable
errors were successfully cleared.  Future writes constrained to that
area will work fine.

nsio_rw_bytes doesn't return how many bytes were successfully written,
so I don't think doing a partial memcpy_to_pmem would be helpful.


---
Robert Elliott, HPE Persistent Memory
diff mbox

Patch

diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index d5dc80c..2078d25 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -226,6 +226,7 @@  static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 		resource_size_t offset, void *buf, size_t size, int rw)
 {
 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+	unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
 
 	if (unlikely(offset + size > nsio->size)) {
 		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
@@ -233,12 +234,27 @@  static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 	}
 
 	if (rw == READ) {
-		unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
-
 		if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align)))
 			return -EIO;
 		return memcpy_from_pmem(buf, nsio->addr + offset, size);
 	} else {
+		sector_t sector = offset / 512;
+
+		if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
+			if (IS_ALIGNED(offset, 512) &&
+			    IS_ALIGNED(size, 512) && (size >= 512)) {
+				int rc;
+
+				rc = nvdimm_clear_poison(&ndns->dev, offset,
+							 size);
+				if (rc != size)
+					return -EIO;
+
+				badblocks_clear(&nsio->bb, sector, rc / 512);
+			} else
+				return -EIO;
+		}
+
 		memcpy_to_pmem(nsio->addr + offset, buf, size);
 		nvdimm_flush(to_nd_region(ndns->dev.parent));
 	}