diff mbox

pmem: fix a NULL pointer BUG in nd_pmem_notify

Message ID 20170425230413.363-1-toshi.kani@hpe.com (mailing list archive)
State Accepted
Commit b2518c78ce76
Headers show

Commit Message

Kani, Toshi April 25, 2017, 11:04 p.m. UTC
The following BUG was observed when nd_pmem_notify() was called
for a BTT device.  The use of a pmem_device pointer is not valid
with BTT.

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
 IP: nd_pmem_notify+0x30/0xf0 [nd_pmem]
 Call Trace:
  nd_device_notify+0x40/0x50
  child_notify+0x10/0x20
  device_for_each_child+0x50/0x90
  nd_region_notify+0x20/0x30
  nd_device_notify+0x40/0x50
  nvdimm_region_notify+0x27/0x30
  acpi_nfit_scrub+0x341/0x590 [nfit]
  process_one_work+0x197/0x450
  worker_thread+0x4e/0x4a0
  kthread+0x109/0x140

Fix nd_pmem_notify() by setting nd_region and badblocks pointers
properly for BTT.

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
---
 drivers/nvdimm/pmem.c |   37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

Comments

Dan Williams April 25, 2017, 11:07 p.m. UTC | #1
On Tue, Apr 25, 2017 at 4:04 PM, Toshi Kani <toshi.kani@hpe.com> wrote:
> The following BUG was observed when nd_pmem_notify() was called
> for a BTT device.  The use of a pmem_device pointer is not valid
> with BTT.
>
>  BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
>  IP: nd_pmem_notify+0x30/0xf0 [nd_pmem]
>  Call Trace:
>   nd_device_notify+0x40/0x50
>   child_notify+0x10/0x20
>   device_for_each_child+0x50/0x90
>   nd_region_notify+0x20/0x30
>   nd_device_notify+0x40/0x50
>   nvdimm_region_notify+0x27/0x30
>   acpi_nfit_scrub+0x341/0x590 [nfit]
>   process_one_work+0x197/0x450
>   worker_thread+0x4e/0x4a0
>   kthread+0x109/0x140
>
> Fix nd_pmem_notify() by setting nd_region and badblocks pointers
> properly for BTT.
>
> Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Vishal Verma <vishal.l.verma@intel.com>


Hi Toshi, how did you trigger this? I'd like to get your test into the
regression suite.


> ---
>  drivers/nvdimm/pmem.c |   37 +++++++++++++++++++++++++------------
>  1 file changed, 25 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index 5b536be..0fc1826 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -388,12 +388,12 @@ static void nd_pmem_shutdown(struct device *dev)
>
>  static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
>  {
> -       struct pmem_device *pmem = dev_get_drvdata(dev);
> -       struct nd_region *nd_region = to_region(pmem);
> +       struct nd_region *nd_region;
>         resource_size_t offset = 0, end_trunc = 0;
>         struct nd_namespace_common *ndns;
>         struct nd_namespace_io *nsio;
>         struct resource res;
> +       struct badblocks *bb;
>
>         if (event != NVDIMM_REVALIDATE_POISON)
>                 return;
> @@ -402,20 +402,33 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
>                 struct nd_btt *nd_btt = to_nd_btt(dev);
>
>                 ndns = nd_btt->ndns;
> -       } else if (is_nd_pfn(dev)) {
> -               struct nd_pfn *nd_pfn = to_nd_pfn(dev);
> -               struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
> +               nd_region = to_nd_region(ndns->dev.parent);
> +               nsio = to_nd_namespace_io(&ndns->dev);
> +               bb = &nsio->bb;
> +       } else {
> +               struct pmem_device *pmem = dev_get_drvdata(dev);
>
> -               ndns = nd_pfn->ndns;
> -               offset = pmem->data_offset + __le32_to_cpu(pfn_sb->start_pad);
> -               end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
> -       } else
> -               ndns = to_ndns(dev);
> +               nd_region = to_region(pmem);
> +               bb = &pmem->bb;
> +
> +               if (is_nd_pfn(dev)) {
> +                       struct nd_pfn *nd_pfn = to_nd_pfn(dev);
> +                       struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
> +
> +                       ndns = nd_pfn->ndns;
> +                       offset = pmem->data_offset +
> +                                       __le32_to_cpu(pfn_sb->start_pad);
> +                       end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
> +               } else {
> +                       ndns = to_ndns(dev);
> +               }
> +
> +               nsio = to_nd_namespace_io(&ndns->dev);
> +       }
>
> -       nsio = to_nd_namespace_io(&ndns->dev);
>         res.start = nsio->res.start + offset;
>         res.end = nsio->res.end - end_trunc;
> -       nvdimm_badblocks_populate(nd_region, &pmem->bb, &res);
> +       nvdimm_badblocks_populate(nd_region, bb, &res);
>  }
>
>  MODULE_ALIAS("pmem");
Kani, Toshi April 25, 2017, 11:40 p.m. UTC | #2
On Tue, 2017-04-25 at 16:07 -0700, Dan Williams wrote:
> On Tue, Apr 25, 2017 at 4:04 PM, Toshi Kani <toshi.kani@hpe.com>

> wrote:

> > The following BUG was observed when nd_pmem_notify() was called

> > for a BTT device.  The use of a pmem_device pointer is not valid

> > with BTT.

> > 

> >  BUG: unable to handle kernel NULL pointer dereference at

> > 0000000000000030

> >  IP: nd_pmem_notify+0x30/0xf0 [nd_pmem]

> >  Call Trace:

> >   nd_device_notify+0x40/0x50

> >   child_notify+0x10/0x20

> >   device_for_each_child+0x50/0x90

> >   nd_region_notify+0x20/0x30

> >   nd_device_notify+0x40/0x50

> >   nvdimm_region_notify+0x27/0x30

> >   acpi_nfit_scrub+0x341/0x590 [nfit]

> >   process_one_work+0x197/0x450

> >   worker_thread+0x4e/0x4a0

> >   kthread+0x109/0x140

> > 

> > Fix nd_pmem_notify() by setting nd_region and badblocks pointers

> > properly for BTT.

> > 

> > Signed-off-by: Toshi Kani <toshi.kani@hpe.com>

> > Cc: Dan Williams <dan.j.williams@intel.com>

> > Cc: Vishal Verma <vishal.l.verma@intel.com>

> 

> 

> Hi Toshi, how did you trigger this? I'd like to get your test into

> the regression suite.


Hi Dan,

I injected an error and started an ARS scan.  Unfortunately, my test
steps need to run on our platforms.  I think these error injection
features can be emulated, though.

Thanks,
-Toshi
Dan Williams April 26, 2017, 2:05 p.m. UTC | #3
On Tue, Apr 25, 2017 at 4:40 PM, Kani, Toshimitsu <toshi.kani@hpe.com> wrote:
> On Tue, 2017-04-25 at 16:07 -0700, Dan Williams wrote:
>> On Tue, Apr 25, 2017 at 4:04 PM, Toshi Kani <toshi.kani@hpe.com>
>> wrote:
>> > The following BUG was observed when nd_pmem_notify() was called
>> > for a BTT device.  The use of a pmem_device pointer is not valid
>> > with BTT.
>> >
>> >  BUG: unable to handle kernel NULL pointer dereference at
>> > 0000000000000030
>> >  IP: nd_pmem_notify+0x30/0xf0 [nd_pmem]
>> >  Call Trace:
>> >   nd_device_notify+0x40/0x50
>> >   child_notify+0x10/0x20
>> >   device_for_each_child+0x50/0x90
>> >   nd_region_notify+0x20/0x30
>> >   nd_device_notify+0x40/0x50
>> >   nvdimm_region_notify+0x27/0x30
>> >   acpi_nfit_scrub+0x341/0x590 [nfit]
>> >   process_one_work+0x197/0x450
>> >   worker_thread+0x4e/0x4a0
>> >   kthread+0x109/0x140
>> >
>> > Fix nd_pmem_notify() by setting nd_region and badblocks pointers
>> > properly for BTT.
>> >
>> > Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
>> > Cc: Dan Williams <dan.j.williams@intel.com>
>> > Cc: Vishal Verma <vishal.l.verma@intel.com>
>>
>>
>> Hi Toshi, how did you trigger this? I'd like to get your test into
>> the regression suite.
>
> Hi Dan,
>
> I injected an error and started an ARS scan.  Unfortunately, my test
> steps need to run on our platforms.  I think these error injection
> features can be emulated, though.

Yes, I'll try to come up with something for nfit_test.

Thanks Toshi!
diff mbox

Patch

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 5b536be..0fc1826 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -388,12 +388,12 @@  static void nd_pmem_shutdown(struct device *dev)
 
 static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
 {
-	struct pmem_device *pmem = dev_get_drvdata(dev);
-	struct nd_region *nd_region = to_region(pmem);
+	struct nd_region *nd_region;
 	resource_size_t offset = 0, end_trunc = 0;
 	struct nd_namespace_common *ndns;
 	struct nd_namespace_io *nsio;
 	struct resource res;
+	struct badblocks *bb;
 
 	if (event != NVDIMM_REVALIDATE_POISON)
 		return;
@@ -402,20 +402,33 @@  static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
 		struct nd_btt *nd_btt = to_nd_btt(dev);
 
 		ndns = nd_btt->ndns;
-	} else if (is_nd_pfn(dev)) {
-		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
-		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+		nd_region = to_nd_region(ndns->dev.parent);
+		nsio = to_nd_namespace_io(&ndns->dev);
+		bb = &nsio->bb;
+	} else {
+		struct pmem_device *pmem = dev_get_drvdata(dev);
 
-		ndns = nd_pfn->ndns;
-		offset = pmem->data_offset + __le32_to_cpu(pfn_sb->start_pad);
-		end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
-	} else
-		ndns = to_ndns(dev);
+		nd_region = to_region(pmem);
+		bb = &pmem->bb;
+
+		if (is_nd_pfn(dev)) {
+			struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+			struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+
+			ndns = nd_pfn->ndns;
+			offset = pmem->data_offset +
+					__le32_to_cpu(pfn_sb->start_pad);
+			end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
+		} else {
+			ndns = to_ndns(dev);
+		}
+
+		nsio = to_nd_namespace_io(&ndns->dev);
+	}
 
-	nsio = to_nd_namespace_io(&ndns->dev);
 	res.start = nsio->res.start + offset;
 	res.end = nsio->res.end - end_trunc;
-	nvdimm_badblocks_populate(nd_region, &pmem->bb, &res);
+	nvdimm_badblocks_populate(nd_region, bb, &res);
 }
 
 MODULE_ALIAS("pmem");