[v3,1/6] libnvdimm/namespace: Make namespace size validation arch dependent
diff mbox series

Message ID 20200108065219.171221-1-aneesh.kumar@linux.ibm.com
State New
Headers show
Series
  • [v3,1/6] libnvdimm/namespace: Make namespace size validation arch dependent
Related show

Commit Message

Aneesh Kumar K.V Jan. 8, 2020, 6:52 a.m. UTC
The page size used to map the namespace is arch dependent. For example
architectures like ppc64 use 16MB page size for direct-mapping. If the namespace
size is not aligned to the mapping page size, we can observe kernel crash
during namespace init and destroy.

This is due to kernel doing partial map/unmap of the resource range

BUG: Unable to handle kernel data access at 0xc001000406000000
Faulting instruction address: 0xc000000000090790
NIP [c000000000090790] arch_add_memory+0xc0/0x130
LR [c000000000090744] arch_add_memory+0x74/0x130
Call Trace:
 arch_add_memory+0x74/0x130 (unreliable)
 memremap_pages+0x74c/0xa30
 devm_memremap_pages+0x3c/0xa0
 pmem_attach_disk+0x188/0x770
 nvdimm_bus_probe+0xd8/0x470
 really_probe+0x148/0x570
 driver_probe_device+0x19c/0x1d0
 device_driver_attach+0xcc/0x100
 bind_store+0x134/0x1c0
 drv_attr_store+0x44/0x60
 sysfs_kf_write+0x74/0xc0
 kernfs_fop_write+0x1b4/0x290
 __vfs_write+0x3c/0x70
 vfs_write+0xd0/0x260
 ksys_write+0xdc/0x130
 system_call+0x5c/0x68

Kernel should also ensure that namespace size is also mulitple of subsection size.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
Changes from v2:
* Use SUBSECTION_SIZE instead of PAGE_SIZE. Namespace size should be multiple
of SUBSECTION size.

 arch/arm64/mm/flush.c     |  6 ++++++
 arch/powerpc/lib/pmem.c   | 11 +++++++++++
 arch/x86/mm/pageattr.c    |  7 +++++++
 include/linux/libnvdimm.h |  1 +
 4 files changed, 25 insertions(+)

Comments

Jeff Moyer Jan. 10, 2020, 8:38 p.m. UTC | #1
Hi, Aneesh,

After applying this patch series, several of my namespaces no longer
enumerate:

Before:

# ndctl list
[
  {
    "dev":"namespace0.2",
    "mode":"sector",
    "size":106541672960,
    "uuid":"ea1122b2-c219-424c-b09c-38a6e94a1042",
    "sector_size":512,
    "blockdev":"pmem0.2s"
  },
  {
    "dev":"namespace0.1",
    "mode":"fsdax",
    "map":"dev",
    "size":10567548928,
    "uuid":"68b6746f-481a-4ae6-80b5-71d62176606c",
    "sector_size":512,
    "align":4096,
    "blockdev":"pmem0.1"
  },
  {
    "dev":"namespace0.0",
    "mode":"fsdax",
    "map":"dev",
    "size":52850327552,
    "uuid":"6d3a0199-5d9a-4fed-830d-e25249b70571",
    "sector_size":512,
    "align":2097152,
    "blockdev":"pmem0"
  }
]

After:

# ndctl list
[
  {
    "dev":"namespace0.0",
    "mode":"fsdax",
    "map":"dev",
    "size":52850327552,
    "uuid":"6d3a0199-5d9a-4fed-830d-e25249b70571",
    "sector_size":512,
    "align":2097152,
    "blockdev":"pmem0"
  }
]

I won't have time to dig into it this week, but I wanted to mention it
before Dan merged these patches.

I'll follow up next week with more information.

Cheers,
Jeff
Dan Williams Jan. 10, 2020, 9:03 p.m. UTC | #2
On Fri, Jan 10, 2020 at 12:39 PM Jeff Moyer <jmoyer@redhat.com> wrote:
>
> Hi, Aneesh,
>
> After applying this patch series, several of my namespaces no longer
> enumerate:
>
> Before:
>
> # ndctl list
> [
>   {
>     "dev":"namespace0.2",
>     "mode":"sector",
>     "size":106541672960,
>     "uuid":"ea1122b2-c219-424c-b09c-38a6e94a1042",
>     "sector_size":512,
>     "blockdev":"pmem0.2s"
>   },
>   {
>     "dev":"namespace0.1",
>     "mode":"fsdax",
>     "map":"dev",
>     "size":10567548928,
>     "uuid":"68b6746f-481a-4ae6-80b5-71d62176606c",
>     "sector_size":512,
>     "align":4096,
>     "blockdev":"pmem0.1"
>   },
>   {
>     "dev":"namespace0.0",
>     "mode":"fsdax",
>     "map":"dev",
>     "size":52850327552,
>     "uuid":"6d3a0199-5d9a-4fed-830d-e25249b70571",
>     "sector_size":512,
>     "align":2097152,
>     "blockdev":"pmem0"
>   }
> ]
>
> After:
>
> # ndctl list
> [
>   {
>     "dev":"namespace0.0",
>     "mode":"fsdax",
>     "map":"dev",
>     "size":52850327552,
>     "uuid":"6d3a0199-5d9a-4fed-830d-e25249b70571",
>     "sector_size":512,
>     "align":2097152,
>     "blockdev":"pmem0"
>   }
> ]
>
> I won't have time to dig into it this week, but I wanted to mention it
> before Dan merged these patches.
>
> I'll follow up next week with more information.

Thanks Jeff, I hadn't had a chance to dig in on these yet.

Aneesh are you able to run the ndctl unit tests? Even if they don't
run on powerpc you should be able to get them to run on x86 qemu just
to catch the basics.
Aneesh Kumar K.V Jan. 11, 2020, 4:29 a.m. UTC | #3
On 1/11/20 2:33 AM, Dan Williams wrote:
> On Fri, Jan 10, 2020 at 12:39 PM Jeff Moyer <jmoyer@redhat.com> wrote:
>>
>> Hi, Aneesh,
>>
>> After applying this patch series, several of my namespaces no longer
>> enumerate:
>>
>> Before:
>>
>> # ndctl list
>> [
>>    {
>>      "dev":"namespace0.2",
>>      "mode":"sector",
>>      "size":106541672960,
>>      "uuid":"ea1122b2-c219-424c-b09c-38a6e94a1042",
>>      "sector_size":512,
>>      "blockdev":"pmem0.2s"
>>    },
>>    {
>>      "dev":"namespace0.1",
>>      "mode":"fsdax",
>>      "map":"dev",
>>      "size":

>>      "uuid":"68b6746f-481a-4ae6-80b5-71d62176606c",
>>      "sector_size":512,
>>      "align":4096,
>>      "blockdev":"pmem0.1"
>>    },
>>    {
>>      "dev":"namespace0.0",
>>      "mode":"fsdax",
>>      "map":"dev",
>>      "size":52850327552,
>>      "uuid":"6d3a0199-5d9a-4fed-830d-e25249b70571",
>>      "sector_size":512,
>>      "align":2097152,
>>      "blockdev":"pmem0"
>>    }
>> ]
>>
>> After:
>>
>> # ndctl list
>> [
>>    {
>>      "dev":"namespace0.0",
>>      "mode":"fsdax",
>>      "map":"dev",
>>      "size":52850327552,
>>      "uuid":"6d3a0199-5d9a-4fed-830d-e25249b70571",
>>      "sector_size":512,
>>      "align":2097152,
>>      "blockdev":"pmem0"
>>    }
>> ]
>>
>> I won't have time to dig into it this week, but I wanted to mention it
>> before Dan merged these patches.
>>
>> I'll follow up next week with more information.
> 
> Thanks Jeff, I hadn't had a chance to dig in on these yet.
> 
> Aneesh are you able to run the ndctl unit tests? Even if they don't
> run on powerpc you should be able to get them to run on x86 qemu just
> to catch the basics.
> 

Thanks for the feedback. I will test the series with qemu.

-aneesh
Aneesh Kumar K.V Jan. 11, 2020, 4:33 a.m. UTC | #4
On 1/11/20 2:08 AM, Jeff Moyer wrote:
> Hi, Aneesh,
> 
> After applying this patch series, several of my namespaces no longer
> enumerate:
> 
> Before:
> 
> # ndctl list
> [
>    {
>      "dev":"namespace0.2",
>      "mode":"sector",
>      "size":106541672960,
>      "uuid":"ea1122b2-c219-424c-b09c-38a6e94a1042",
>      "sector_size":512,
>      "blockdev":"pmem0.2s"
>    },
>    {
>      "dev":"namespace0.1",
>      "mode":"fsdax",
>      "map":"dev",
>      "size":10567548928,
>      "uuid":"68b6746f-481a-4ae6-80b5-71d62176606c",
>      "sector_size":512,
>      "align":4096,
>      "blockdev":"pmem0.1"
>    },
>    {
>      "dev":"namespace0.0",
>      "mode":"fsdax",
>      "map":"dev",
>      "size":52850327552,
>      "uuid":"6d3a0199-5d9a-4fed-830d-e25249b70571",
>      "sector_size":512,
>      "align":2097152,
>      "blockdev":"pmem0"
>    }
> ]
> 
> After:
> 
> # ndctl list
> [
>    {
>      "dev":"namespace0.0",
>      "mode":"fsdax",
>      "map":"dev",
>      "size":52850327552,
>      "uuid":"6d3a0199-5d9a-4fed-830d-e25249b70571",
>      "sector_size":512,
>      "align":2097152,
>      "blockdev":"pmem0"
>    }
> ]
> 
> I won't have time to dig into it this week, but I wanted to mention it
> before Dan merged these patches.
> 
> I'll follow up next week with more information.
> 

dmesg should contain details  like

[    5.810939] nd_pmem namespace0.1: invalid size/SPA
[    5.810969] nd_pmem: probe of namespace0.1 failed with error -95

This is mostly due to the namespace start address not aligned to 
subsection size.

"namespace0.2" not having a 2MB aligned size which cause namespace 0.1 
start addr to be not aligned. Hence both the namespace are marked disabled.

-aneesh
Dan Williams Jan. 11, 2020, 4:55 a.m. UTC | #5
On Fri, Jan 10, 2020 at 8:33 PM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> On 1/11/20 2:08 AM, Jeff Moyer wrote:
> > Hi, Aneesh,
> >
> > After applying this patch series, several of my namespaces no longer
> > enumerate:
> >
> > Before:
> >
> > # ndctl list
> > [
> >    {
> >      "dev":"namespace0.2",
> >      "mode":"sector",
> >      "size":106541672960,
> >      "uuid":"ea1122b2-c219-424c-b09c-38a6e94a1042",
> >      "sector_size":512,
> >      "blockdev":"pmem0.2s"
> >    },
> >    {
> >      "dev":"namespace0.1",
> >      "mode":"fsdax",
> >      "map":"dev",
> >      "size":10567548928,
> >      "uuid":"68b6746f-481a-4ae6-80b5-71d62176606c",
> >      "sector_size":512,
> >      "align":4096,
> >      "blockdev":"pmem0.1"
> >    },
> >    {
> >      "dev":"namespace0.0",
> >      "mode":"fsdax",
> >      "map":"dev",
> >      "size":52850327552,
> >      "uuid":"6d3a0199-5d9a-4fed-830d-e25249b70571",
> >      "sector_size":512,
> >      "align":2097152,
> >      "blockdev":"pmem0"
> >    }
> > ]
> >
> > After:
> >
> > # ndctl list
> > [
> >    {
> >      "dev":"namespace0.0",
> >      "mode":"fsdax",
> >      "map":"dev",
> >      "size":52850327552,
> >      "uuid":"6d3a0199-5d9a-4fed-830d-e25249b70571",
> >      "sector_size":512,
> >      "align":2097152,
> >      "blockdev":"pmem0"
> >    }
> > ]
> >
> > I won't have time to dig into it this week, but I wanted to mention it
> > before Dan merged these patches.
> >
> > I'll follow up next week with more information.
> >
>
> dmesg should contain details  like
>
> [    5.810939] nd_pmem namespace0.1: invalid size/SPA
> [    5.810969] nd_pmem: probe of namespace0.1 failed with error -95
>
> This is mostly due to the namespace start address not aligned to
> subsection size.
>
> "namespace0.2" not having a 2MB aligned size which cause namespace 0.1
> start addr to be not aligned. Hence both the namespace are marked disabled.
>

2 observations:

- It's ok if the namespace start address is not subsection aligned as
long as the mapped portion for data access is subsection aligned, at
least on x86.

- "sector" mode namespaces are not mapped by devm_memremap_pages() so
there should be no restriction there. If powerpc can't map them that's
a separate concern.

So, cross arch compatible namespaces is a goal, but not regressing
existing namespaces takes precedence. I'd be happy if newly created
namespaces tried to account for all the arch quirks, but if libnvdimm
can enable a namespace it should try.
Aneesh Kumar K.V Jan. 13, 2020, 3:08 p.m. UTC | #6
Dan Williams <dan.j.williams@intel.com> writes:

> On Fri, Jan 10, 2020 at 8:33 PM Aneesh Kumar K.V
> <aneesh.kumar@linux.ibm.com> wrote:
>>
>> On 1/11/20 2:08 AM, Jeff Moyer wrote:
>> > Hi, Aneesh,
>
>> "namespace0.2" not having a 2MB aligned size which cause namespace 0.1
>> start addr to be not aligned. Hence both the namespace are marked disabled.
>>
>
> 2 observations:
>
> - It's ok if the namespace start address is not subsection aligned as
> long as the mapped portion for data access is subsection aligned, at
> least on x86.
>
> - "sector" mode namespaces are not mapped by devm_memremap_pages() so
> there should be no restriction there. If powerpc can't map them that's
> a separate concern.

Does that mean the `supported_size_align` attribute should be a property
of pfn and dax seed device? Considering we don't want to apply this
restrictions for blk, raw namespace, and btt mode namespace should we
make the attribute a seed device property rather than a namespace
property?


>
> So, cross arch compatible namespaces is a goal, but not regressing
> existing namespaces takes precedence. I'd be happy if newly created
> namespaces tried to account for all the arch quirks, but if libnvdimm
> can enable a namespace it should try.

Ok. So that means we apply the alignment rules when creating new
namespaces irrespective of its type/mode. But when initializing via scan
label we only apply them for devdax and fsdax namespace?

Something like

static bool nvdimm_valid_namespace(struct device *dev,
		struct nd_namespace_common *ndns, resource_size_t size)
{
	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
	unsigned long align_size = arch_namespace_align_size();
	struct resource *res;
	u32 remainder;

	/*
	 * Don't validate the start and size for blk namespace type
	 */
	if (is_namespace_blk(&ndns->dev))
		return true;

	/*
	 * For btt and raw namespace we use ioremap. Assume both can work
	 * with PAGE_SIZE alignment.
	 */
	if (is_nd_btt(dev) || is_namespace_io(dev))
		return true;

	div_u64_rem(size, align_size * nd_region->ndr_mappings, &remainder);
	if (remainder)
		return false;

	if (is_namespace_pmem(&ndns->dev)) {
		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(&ndns->dev);

		res = &nspm->nsio.res;
	} else
		/* cannot reach */
		return false;

	div_u64_rem(res->start, align_size * nd_region->ndr_mappings, &remainder);
	if (remainder)
		return false;

	return true;
}

-aneesh

Patch
diff mbox series

diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index ac485163a4a7..5d82484ac8ca 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -91,4 +91,10 @@  void arch_invalidate_pmem(void *addr, size_t size)
 	__inval_dcache_area(addr, size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
+unsigned long arch_namespace_align_size(void)
+{
+	return (1UL << SUBSECTION_SHIFT);
+}
+EXPORT_SYMBOL_GPL(arch_namespace_align_size);
 #endif
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 0666a8d29596..b94e7d4876d1 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -26,6 +26,17 @@  void arch_invalidate_pmem(void *addr, size_t size)
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 
+unsigned long arch_namespace_align_size(void)
+{
+	unsigned long sub_section_size = (1UL << SUBSECTION_SHIFT);
+
+	if (radix_enabled())
+		return sub_section_size;
+	return max(sub_section_size, (1UL << mmu_psize_defs[mmu_linear_psize].shift));
+
+}
+EXPORT_SYMBOL_GPL(arch_namespace_align_size);
+
 /*
  * CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
  */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1b99ad05b117..0bcd22e11dd0 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -310,6 +310,13 @@  void arch_invalidate_pmem(void *addr, size_t size)
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 
+unsigned long arch_namespace_align_size(void)
+{
+	return (1UL << SUBSECTION_SHIFT);
+}
+EXPORT_SYMBOL_GPL(arch_namespace_align_size);
+
+
 static void __cpa_flush_all(void *arg)
 {
 	unsigned long cache = (unsigned long)arg;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 9df091bd30ba..f2a33f2e3ba8 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -284,4 +284,5 @@  static inline void arch_invalidate_pmem(void *addr, size_t size)
 }
 #endif
 
+unsigned long arch_namespace_align_size(void);
 #endif /* __LIBNVDIMM_H__ */