diff mbox series

[v2] fs/dax: deposit pagetable even when installing zero page

Message ID 20190309120721.21416-1-aneesh.kumar@linux.ibm.com (mailing list archive)
State Mainlined
Commit 11cf9d863dcb583345723b0ed72173348761e9c0
Headers show
Series [v2] fs/dax: deposit pagetable even when installing zero page | expand

Commit Message

Aneesh Kumar K.V March 9, 2019, 12:07 p.m. UTC
Architectures like ppc64 use the deposited page table to store hardware
page table slot information. Make sure we deposit a page table when
using zero page at the pmd level for hash.

Without this we hit

Unable to handle kernel paging request for data at address 0x00000000
Faulting instruction address: 0xc000000000082a74
Oops: Kernel access of bad area, sig: 11 [#1]
....

NIP [c000000000082a74] __hash_page_thp+0x224/0x5b0
LR [c0000000000829a4] __hash_page_thp+0x154/0x5b0
Call Trace:
 hash_page_mm+0x43c/0x740
 do_hash_page+0x2c/0x3c
 copy_from_iter_flushcache+0xa4/0x4a0
 pmem_copy_from_iter+0x2c/0x50 [nd_pmem]
 dax_copy_from_iter+0x40/0x70
 dax_iomap_actor+0x134/0x360
 iomap_apply+0xfc/0x1b0
 dax_iomap_rw+0xac/0x130
 ext4_file_write_iter+0x254/0x460 [ext4]
 __vfs_write+0x120/0x1e0
 vfs_write+0xd8/0x220
 SyS_write+0x6c/0x110
 system_call+0x3c/0x130

Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions")
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
Changes from v1:
* Add reviewed-by:
* Add Fixes:

 fs/dax.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

Comments

Aneesh Kumar K.V March 13, 2019, 4:47 a.m. UTC | #1
Hi Dan/Andrew/Jan,

"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:

> Architectures like ppc64 use the deposited page table to store hardware
> page table slot information. Make sure we deposit a page table when
> using zero page at the pmd level for hash.
>
> Without this we hit
>
> Unable to handle kernel paging request for data at address 0x00000000
> Faulting instruction address: 0xc000000000082a74
> Oops: Kernel access of bad area, sig: 11 [#1]
> ....
>
> NIP [c000000000082a74] __hash_page_thp+0x224/0x5b0
> LR [c0000000000829a4] __hash_page_thp+0x154/0x5b0
> Call Trace:
>  hash_page_mm+0x43c/0x740
>  do_hash_page+0x2c/0x3c
>  copy_from_iter_flushcache+0xa4/0x4a0
>  pmem_copy_from_iter+0x2c/0x50 [nd_pmem]
>  dax_copy_from_iter+0x40/0x70
>  dax_iomap_actor+0x134/0x360
>  iomap_apply+0xfc/0x1b0
>  dax_iomap_rw+0xac/0x130
>  ext4_file_write_iter+0x254/0x460 [ext4]
>  __vfs_write+0x120/0x1e0
>  vfs_write+0xd8/0x220
>  SyS_write+0x6c/0x110
>  system_call+0x3c/0x130
>
> Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions")
> Reviewed-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>

Any suggestion on which tree this patch should got to? Also since this
fix a kernel crash, we may want to get this to 5.1?

> ---
> Changes from v1:
> * Add reviewed-by:
> * Add Fixes:
>
>  fs/dax.c | 15 +++++++++++++++
>  1 file changed, 15 insertions(+)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 6959837cc465..01bfb2ac34f9 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -33,6 +33,7 @@
>  #include <linux/sizes.h>
>  #include <linux/mmu_notifier.h>
>  #include <linux/iomap.h>
> +#include <asm/pgalloc.h>
>  #include "internal.h"
>  
>  #define CREATE_TRACE_POINTS
> @@ -1410,7 +1411,9 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
>  {
>  	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
>  	unsigned long pmd_addr = vmf->address & PMD_MASK;
> +	struct vm_area_struct *vma = vmf->vma;
>  	struct inode *inode = mapping->host;
> +	pgtable_t pgtable = NULL;
>  	struct page *zero_page;
>  	spinlock_t *ptl;
>  	pmd_t pmd_entry;
> @@ -1425,12 +1428,22 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
>  	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
>  			DAX_PMD | DAX_ZERO_PAGE, false);
>  
> +	if (arch_needs_pgtable_deposit()) {
> +		pgtable = pte_alloc_one(vma->vm_mm);
> +		if (!pgtable)
> +			return VM_FAULT_OOM;
> +	}
> +
>  	ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
>  	if (!pmd_none(*(vmf->pmd))) {
>  		spin_unlock(ptl);
>  		goto fallback;
>  	}
>  
> +	if (pgtable) {
> +		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
> +		mm_inc_nr_ptes(vma->vm_mm);
> +	}
>  	pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
>  	pmd_entry = pmd_mkhuge(pmd_entry);
>  	set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
> @@ -1439,6 +1452,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
>  	return VM_FAULT_NOPAGE;
>  
>  fallback:
> +	if (pgtable)
> +		pte_free(vma->vm_mm, pgtable);
>  	trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
>  	return VM_FAULT_FALLBACK;
>  }
> -- 
> 2.20.1

-aneesh
Jan Kara March 13, 2019, 9:58 a.m. UTC | #2
On Wed 13-03-19 10:17:17, Aneesh Kumar K.V wrote:
> 
> Hi Dan/Andrew/Jan,
> 
> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> 
> > Architectures like ppc64 use the deposited page table to store hardware
> > page table slot information. Make sure we deposit a page table when
> > using zero page at the pmd level for hash.
> >
> > Without this we hit
> >
> > Unable to handle kernel paging request for data at address 0x00000000
> > Faulting instruction address: 0xc000000000082a74
> > Oops: Kernel access of bad area, sig: 11 [#1]
> > ....
> >
> > NIP [c000000000082a74] __hash_page_thp+0x224/0x5b0
> > LR [c0000000000829a4] __hash_page_thp+0x154/0x5b0
> > Call Trace:
> >  hash_page_mm+0x43c/0x740
> >  do_hash_page+0x2c/0x3c
> >  copy_from_iter_flushcache+0xa4/0x4a0
> >  pmem_copy_from_iter+0x2c/0x50 [nd_pmem]
> >  dax_copy_from_iter+0x40/0x70
> >  dax_iomap_actor+0x134/0x360
> >  iomap_apply+0xfc/0x1b0
> >  dax_iomap_rw+0xac/0x130
> >  ext4_file_write_iter+0x254/0x460 [ext4]
> >  __vfs_write+0x120/0x1e0
> >  vfs_write+0xd8/0x220
> >  SyS_write+0x6c/0x110
> >  system_call+0x3c/0x130
> >
> > Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions")
> > Reviewed-by: Jan Kara <jack@suse.cz>
> > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> 
> Any suggestion on which tree this patch should got to? Also since this
> fix a kernel crash, we may want to get this to 5.1?

I think this should go through Dan's tree...

								Honza

> > ---
> > Changes from v1:
> > * Add reviewed-by:
> > * Add Fixes:
> >
> >  fs/dax.c | 15 +++++++++++++++
> >  1 file changed, 15 insertions(+)
> >
> > diff --git a/fs/dax.c b/fs/dax.c
> > index 6959837cc465..01bfb2ac34f9 100644
> > --- a/fs/dax.c
> > +++ b/fs/dax.c
> > @@ -33,6 +33,7 @@
> >  #include <linux/sizes.h>
> >  #include <linux/mmu_notifier.h>
> >  #include <linux/iomap.h>
> > +#include <asm/pgalloc.h>
> >  #include "internal.h"
> >  
> >  #define CREATE_TRACE_POINTS
> > @@ -1410,7 +1411,9 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
> >  {
> >  	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
> >  	unsigned long pmd_addr = vmf->address & PMD_MASK;
> > +	struct vm_area_struct *vma = vmf->vma;
> >  	struct inode *inode = mapping->host;
> > +	pgtable_t pgtable = NULL;
> >  	struct page *zero_page;
> >  	spinlock_t *ptl;
> >  	pmd_t pmd_entry;
> > @@ -1425,12 +1428,22 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
> >  	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
> >  			DAX_PMD | DAX_ZERO_PAGE, false);
> >  
> > +	if (arch_needs_pgtable_deposit()) {
> > +		pgtable = pte_alloc_one(vma->vm_mm);
> > +		if (!pgtable)
> > +			return VM_FAULT_OOM;
> > +	}
> > +
> >  	ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
> >  	if (!pmd_none(*(vmf->pmd))) {
> >  		spin_unlock(ptl);
> >  		goto fallback;
> >  	}
> >  
> > +	if (pgtable) {
> > +		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
> > +		mm_inc_nr_ptes(vma->vm_mm);
> > +	}
> >  	pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
> >  	pmd_entry = pmd_mkhuge(pmd_entry);
> >  	set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
> > @@ -1439,6 +1452,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
> >  	return VM_FAULT_NOPAGE;
> >  
> >  fallback:
> > +	if (pgtable)
> > +		pte_free(vma->vm_mm, pgtable);
> >  	trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
> >  	return VM_FAULT_FALLBACK;
> >  }
> > -- 
> > 2.20.1
> 
> -aneesh
>
Dan Williams March 13, 2019, 3:46 p.m. UTC | #3
On Wed, Mar 13, 2019 at 2:58 AM Jan Kara <jack@suse.cz> wrote:
>
> On Wed 13-03-19 10:17:17, Aneesh Kumar K.V wrote:
> >
> > Hi Dan/Andrew/Jan,
> >
> > "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> >
> > > Architectures like ppc64 use the deposited page table to store hardware
> > > page table slot information. Make sure we deposit a page table when
> > > using zero page at the pmd level for hash.
> > >
> > > Without this we hit
> > >
> > > Unable to handle kernel paging request for data at address 0x00000000
> > > Faulting instruction address: 0xc000000000082a74
> > > Oops: Kernel access of bad area, sig: 11 [#1]
> > > ....
> > >
> > > NIP [c000000000082a74] __hash_page_thp+0x224/0x5b0
> > > LR [c0000000000829a4] __hash_page_thp+0x154/0x5b0
> > > Call Trace:
> > >  hash_page_mm+0x43c/0x740
> > >  do_hash_page+0x2c/0x3c
> > >  copy_from_iter_flushcache+0xa4/0x4a0
> > >  pmem_copy_from_iter+0x2c/0x50 [nd_pmem]
> > >  dax_copy_from_iter+0x40/0x70
> > >  dax_iomap_actor+0x134/0x360
> > >  iomap_apply+0xfc/0x1b0
> > >  dax_iomap_rw+0xac/0x130
> > >  ext4_file_write_iter+0x254/0x460 [ext4]
> > >  __vfs_write+0x120/0x1e0
> > >  vfs_write+0xd8/0x220
> > >  SyS_write+0x6c/0x110
> > >  system_call+0x3c/0x130
> > >
> > > Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions")
> > > Reviewed-by: Jan Kara <jack@suse.cz>
> > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> >
> > Any suggestion on which tree this patch should got to? Also since this
> > fix a kernel crash, we may want to get this to 5.1?
>
> I think this should go through Dan's tree...

I'll merge this and let it soak in -next for a week and then submit for 5.1-rc2.
Aneesh Kumar K.V April 8, 2019, 9:38 a.m. UTC | #4
Hi Dan,

Dan Williams <dan.j.williams@intel.com> writes:

> On Wed, Mar 13, 2019 at 2:58 AM Jan Kara <jack@suse.cz> wrote:
>>
>> On Wed 13-03-19 10:17:17, Aneesh Kumar K.V wrote:
>> >
>> > Hi Dan/Andrew/Jan,
>> >
>> > "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>> >
>> > > Architectures like ppc64 use the deposited page table to store hardware
>> > > page table slot information. Make sure we deposit a page table when
>> > > using zero page at the pmd level for hash.
>> > >
>> > > Without this we hit
>> > >
>> > > Unable to handle kernel paging request for data at address 0x00000000
>> > > Faulting instruction address: 0xc000000000082a74
>> > > Oops: Kernel access of bad area, sig: 11 [#1]
>> > > ....
>> > >
>> > > NIP [c000000000082a74] __hash_page_thp+0x224/0x5b0
>> > > LR [c0000000000829a4] __hash_page_thp+0x154/0x5b0
>> > > Call Trace:
>> > >  hash_page_mm+0x43c/0x740
>> > >  do_hash_page+0x2c/0x3c
>> > >  copy_from_iter_flushcache+0xa4/0x4a0
>> > >  pmem_copy_from_iter+0x2c/0x50 [nd_pmem]
>> > >  dax_copy_from_iter+0x40/0x70
>> > >  dax_iomap_actor+0x134/0x360
>> > >  iomap_apply+0xfc/0x1b0
>> > >  dax_iomap_rw+0xac/0x130
>> > >  ext4_file_write_iter+0x254/0x460 [ext4]
>> > >  __vfs_write+0x120/0x1e0
>> > >  vfs_write+0xd8/0x220
>> > >  SyS_write+0x6c/0x110
>> > >  system_call+0x3c/0x130
>> > >
>> > > Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions")
>> > > Reviewed-by: Jan Kara <jack@suse.cz>
>> > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> >
>> > Any suggestion on which tree this patch should got to? Also since this
>> > fix a kernel crash, we may want to get this to 5.1?
>>
>> I think this should go through Dan's tree...
>
> I'll merge this and let it soak in -next for a week and then submit for 5.1-rc2.

Any update on this? Did you get to merge this?

-aneesh
Dan Williams April 8, 2019, 3:54 p.m. UTC | #5
On Mon, Apr 8, 2019 at 2:39 AM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
>
>  Hi Dan,
>
> Dan Williams <dan.j.williams@intel.com> writes:
>
> > On Wed, Mar 13, 2019 at 2:58 AM Jan Kara <jack@suse.cz> wrote:
> >>
> >> On Wed 13-03-19 10:17:17, Aneesh Kumar K.V wrote:
> >> >
> >> > Hi Dan/Andrew/Jan,
> >> >
> >> > "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> >> >
> >> > > Architectures like ppc64 use the deposited page table to store hardware
> >> > > page table slot information. Make sure we deposit a page table when
> >> > > using zero page at the pmd level for hash.
> >> > >
> >> > > Without this we hit
> >> > >
> >> > > Unable to handle kernel paging request for data at address 0x00000000
> >> > > Faulting instruction address: 0xc000000000082a74
> >> > > Oops: Kernel access of bad area, sig: 11 [#1]
> >> > > ....
> >> > >
> >> > > NIP [c000000000082a74] __hash_page_thp+0x224/0x5b0
> >> > > LR [c0000000000829a4] __hash_page_thp+0x154/0x5b0
> >> > > Call Trace:
> >> > >  hash_page_mm+0x43c/0x740
> >> > >  do_hash_page+0x2c/0x3c
> >> > >  copy_from_iter_flushcache+0xa4/0x4a0
> >> > >  pmem_copy_from_iter+0x2c/0x50 [nd_pmem]
> >> > >  dax_copy_from_iter+0x40/0x70
> >> > >  dax_iomap_actor+0x134/0x360
> >> > >  iomap_apply+0xfc/0x1b0
> >> > >  dax_iomap_rw+0xac/0x130
> >> > >  ext4_file_write_iter+0x254/0x460 [ext4]
> >> > >  __vfs_write+0x120/0x1e0
> >> > >  vfs_write+0xd8/0x220
> >> > >  SyS_write+0x6c/0x110
> >> > >  system_call+0x3c/0x130
> >> > >
> >> > > Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions")
> >> > > Reviewed-by: Jan Kara <jack@suse.cz>
> >> > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> >> >
> >> > Any suggestion on which tree this patch should got to? Also since this
> >> > fix a kernel crash, we may want to get this to 5.1?
> >>
> >> I think this should go through Dan's tree...
> >
> > I'll merge this and let it soak in -next for a week and then submit for 5.1-rc2.
>
> Any update on this? Did you get to merge this?

Thanks for the reminder. Will send this week along with some other
libnvdimm related fixes.
diff mbox series

Patch

diff --git a/fs/dax.c b/fs/dax.c
index 6959837cc465..01bfb2ac34f9 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -33,6 +33,7 @@ 
 #include <linux/sizes.h>
 #include <linux/mmu_notifier.h>
 #include <linux/iomap.h>
+#include <asm/pgalloc.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -1410,7 +1411,9 @@  static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
 {
 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 	unsigned long pmd_addr = vmf->address & PMD_MASK;
+	struct vm_area_struct *vma = vmf->vma;
 	struct inode *inode = mapping->host;
+	pgtable_t pgtable = NULL;
 	struct page *zero_page;
 	spinlock_t *ptl;
 	pmd_t pmd_entry;
@@ -1425,12 +1428,22 @@  static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
 	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
 			DAX_PMD | DAX_ZERO_PAGE, false);
 
+	if (arch_needs_pgtable_deposit()) {
+		pgtable = pte_alloc_one(vma->vm_mm);
+		if (!pgtable)
+			return VM_FAULT_OOM;
+	}
+
 	ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
 	if (!pmd_none(*(vmf->pmd))) {
 		spin_unlock(ptl);
 		goto fallback;
 	}
 
+	if (pgtable) {
+		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+		mm_inc_nr_ptes(vma->vm_mm);
+	}
 	pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
 	pmd_entry = pmd_mkhuge(pmd_entry);
 	set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
@@ -1439,6 +1452,8 @@  static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
 	return VM_FAULT_NOPAGE;
 
 fallback:
+	if (pgtable)
+		pte_free(vma->vm_mm, pgtable);
 	trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, *entry);
 	return VM_FAULT_FALLBACK;
 }