[1/7] Revert "mm: take i_mmap_lock in unmap_mapping_range() for DAX"
diff mbox

Message ID 1443685599-4843-2-git-send-email-david@fromorbit.com
State Superseded
Headers show

Commit Message

Dave Chinner Oct. 1, 2015, 7:46 a.m. UTC
This reverts commit 46c043ede4711e8d598b9d63c5616c1fedb0605e.
---
 fs/dax.c    | 36 ++++++++++++++++--------------------
 mm/memory.c | 11 +++++++++--
 2 files changed, 25 insertions(+), 22 deletions(-)

Comments

kernel test robot Oct. 1, 2015, 8:35 a.m. UTC | #1
Hi Dave,

[auto build test results on v4.3-rc3 -- if it's inappropriate base, please ignore]

reproduce:
        # apt-get install sparse
        make ARCH=x86_64 allmodconfig
        make C=1 CF=-D__CHECK_ENDIAN__


sparse warnings: (new ones prefixed by >>)

>> fs/dax.c:626:50: sparse: incorrect type in argument 1 (different address spaces)
   fs/dax.c:626:50:    expected void *page
   fs/dax.c:626:50:    got void [noderef] <asn:5>*

vim +626 fs/dax.c

   610			result = VM_FAULT_NOPAGE;
   611			spin_unlock(ptl);
   612		} else {
   613			sector = bh.b_blocknr << (blkbits - 9);
   614			length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
   615							bh.b_size);
   616			if (length < 0) {
   617				result = VM_FAULT_SIGBUS;
   618				goto out;
   619			}
   620			if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
   621				goto fallback;
   622	
   623			if (buffer_unwritten(&bh) || buffer_new(&bh)) {
   624				int i;
   625				for (i = 0; i < PTRS_PER_PMD; i++)
 > 626					clear_page(kaddr + i * PAGE_SIZE);
   627				count_vm_event(PGMAJFAULT);
   628				mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
   629				result |= VM_FAULT_MAJOR;
   630			}
   631	
   632			result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
   633		}
   634	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Ross Zwisler Oct. 1, 2015, 8:27 p.m. UTC | #2
On Thu, Oct 01, 2015 at 05:46:33PM +1000, Dave Chinner wrote:
> This reverts commit 46c043ede4711e8d598b9d63c5616c1fedb0605e.
> ---
>  fs/dax.c    | 36 ++++++++++++++++--------------------
>  mm/memory.c | 11 +++++++++--
>  2 files changed, 25 insertions(+), 22 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 7ae6df7..400fe95 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -569,26 +569,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
>  	if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
>  		goto fallback;
>  
> -	if (buffer_unwritten(&bh) || buffer_new(&bh)) {
> -		int i;
> -		for (i = 0; i < PTRS_PER_PMD; i++)
> -			clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
> -		wmb_pmem();

The above two lines were updated to use the PMEM API with this commit:

commit d77e92e270ed ("dax: update PMD fault handler with PMEM API")

but they aren't updated in the reverted version here: 

> @@ -633,6 +620,15 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
>  		if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
>  			goto fallback;
>  
> +		if (buffer_unwritten(&bh) || buffer_new(&bh)) {
> +			int i;
> +			for (i = 0; i < PTRS_PER_PMD; i++)
> +				clear_page(kaddr + i * PAGE_SIZE);
> +			count_vm_event(PGMAJFAULT);
> +			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> +			result |= VM_FAULT_MAJOR;
> +		}
> +
>  		result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
>  	}

This is the source of the follow-up sparse warning from the kbuild robot.


Also, if I understood your previous mails correctly you were targeting the
first two revert patches for v4.3 so we get back to v4.2 level locking, and
the rest of the series will target v4.4, correct?  How does this work?  Do the
patches need to be split into two series and tested separately?
Dave Chinner Oct. 1, 2015, 10:32 p.m. UTC | #3
On Thu, Oct 01, 2015 at 02:27:29PM -0600, Ross Zwisler wrote:
> On Thu, Oct 01, 2015 at 05:46:33PM +1000, Dave Chinner wrote:
> > This reverts commit 46c043ede4711e8d598b9d63c5616c1fedb0605e.
> > ---
> >  fs/dax.c    | 36 ++++++++++++++++--------------------
> >  mm/memory.c | 11 +++++++++--
> >  2 files changed, 25 insertions(+), 22 deletions(-)
> > 
> > diff --git a/fs/dax.c b/fs/dax.c
> > index 7ae6df7..400fe95 100644
> > --- a/fs/dax.c
> > +++ b/fs/dax.c
> > @@ -569,26 +569,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> >  	if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
> >  		goto fallback;
> >  
> > -	if (buffer_unwritten(&bh) || buffer_new(&bh)) {
> > -		int i;
> > -		for (i = 0; i < PTRS_PER_PMD; i++)
> > -			clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
> > -		wmb_pmem();
> 
> The above two lines were updated to use the PMEM API with this commit:
> 
> commit d77e92e270ed ("dax: update PMD fault handler with PMEM API")
> 
> but they aren't updated in the reverted version here: 
> 
> > @@ -633,6 +620,15 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> >  		if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
> >  			goto fallback;
> >  
> > +		if (buffer_unwritten(&bh) || buffer_new(&bh)) {
> > +			int i;
> > +			for (i = 0; i < PTRS_PER_PMD; i++)
> > +				clear_page(kaddr + i * PAGE_SIZE);
> > +			count_vm_event(PGMAJFAULT);
> > +			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
> > +			result |= VM_FAULT_MAJOR;
> > +		}
> > +
> >  		result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
> >  	}
> 
> This is the source of the follow-up sparse warning from the kbuild robot.

I couldn't work out what set of commits I needed to revert to get a
clean revert, so I just reverted the commits and hacked out the
revert failures to what looked ok. Feel free to send me a clean set
of reverts, and I'll replace these patches with them... :)

> Also, if I understood your previous mails correctly you were targeting the
> first two revert patches for v4.3 so we get back to v4.2 level locking, and
> the rest of the series will target v4.4, correct?  How does this work?  Do the
> patches need to be split into two series and tested separately?

Test it and push the reverts however you like. I don't care how the
reverts get to 4.3 - I'll be carrying them locally in my trees from
now and so my development and testing is now unaffected by the bugs
that are in the 4.3 code. If you aren't going to push them for 4.3
then I'd suggest that they go to linus along with the rest of the
XFS changes in this series.

FWIW, I'm quite happy to host all the pending DAX changes in a
public git tree and ask for it to be included in linux-next. It's
probably a good idea to do this because it makes it much easier to
co-ordinate merges when we are touching multiple subsystems (ext4,
xfs, dax, mm, etc). And it will help prevent the "patches molder on
the list until Andrew hoovers them up" problem and so prevent this
situation from happening in the future...

Cheers,

Dave.
Ross Zwisler Oct. 1, 2015, 10:47 p.m. UTC | #4
On Fri, Oct 02, 2015 at 08:32:40AM +1000, Dave Chinner wrote:
> I couldn't work out what set of commits I needed to revert to get a
> clean revert, so I just reverted the commits and hacked out the
> revert failures to what looked ok. Feel free to send me a clean set
> of reverts, and I'll replace these patches with them... :)

Will do.  I will queue the reverts in my external tree & ask Linus to pull
them into v4.3 so we don't ship with deadlocks.

> > Also, if I understood your previous mails correctly you were targeting the
> > first two revert patches for v4.3 so we get back to v4.2 level locking, and
> > the rest of the series will target v4.4, correct?  How does this work?  Do the
> > patches need to be split into two series and tested separately?
> 
> Test it and push the reverts however you like. I don't care how the
> reverts get to 4.3 - I'll be carrying them locally in my trees from
> now and so my development and testing is now unaffected by the bugs
> that are in the 4.3 code. If you aren't going to push them for 4.3
> then I'd suggest that they go to linus along with the rest of the
> XFS changes in this series.
> 
> FWIW, I'm quite happy to host all the pending DAX changes in a
> public git tree and ask for it to be included in linux-next. It's
> probably a good idea to do this because it makes it much easier to
> co-ordinate merges when we are touching multiple subsystems (ext4,
> xfs, dax, mm, etc). And it will help prevent the "patches molder on
> the list until Andrew hoovers them up" problem and so prevent this
> situation from happening in the future...

No objections from me. :)  I agree that it would be nice to have a central
home for all the DAX patches.

Patch
diff mbox

diff --git a/fs/dax.c b/fs/dax.c
index 7ae6df7..400fe95 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -569,26 +569,6 @@  int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
 		goto fallback;
 
-	if (buffer_unwritten(&bh) || buffer_new(&bh)) {
-		int i;
-		for (i = 0; i < PTRS_PER_PMD; i++)
-			clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
-		wmb_pmem();
-		count_vm_event(PGMAJFAULT);
-		mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
-		result |= VM_FAULT_MAJOR;
-	}
-
-	/*
-	 * If we allocated new storage, make sure no process has any
-	 * zero pages covering this hole
-	 */
-	if (buffer_new(&bh)) {
-		i_mmap_unlock_write(mapping);
-		unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
-		i_mmap_lock_write(mapping);
-	}
-
 	/*
 	 * If a truncate happened while we were allocating blocks, we may
 	 * leave blocks allocated to the file that are beyond EOF.  We can't
@@ -603,6 +583,13 @@  int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 	if ((pgoff | PG_PMD_COLOUR) >= size)
 		goto fallback;
 
+	/*
+	 * If we allocated new storage, make sure no process has any
+	 * zero pages covering this hole
+	 */
+	if (buffer_new(&bh))
+		unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
+
 	if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) {
 		spinlock_t *ptl;
 		pmd_t entry;
@@ -633,6 +620,15 @@  int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
 		if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
 			goto fallback;
 
+		if (buffer_unwritten(&bh) || buffer_new(&bh)) {
+			int i;
+			for (i = 0; i < PTRS_PER_PMD; i++)
+				clear_page(kaddr + i * PAGE_SIZE);
+			count_vm_event(PGMAJFAULT);
+			mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
+			result |= VM_FAULT_MAJOR;
+		}
+
 		result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
 	}
 
diff --git a/mm/memory.c b/mm/memory.c
index 9cb2747..5ec066f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2426,10 +2426,17 @@  void unmap_mapping_range(struct address_space *mapping,
 	if (details.last_index < details.first_index)
 		details.last_index = ULONG_MAX;
 
-	i_mmap_lock_write(mapping);
+
+	/*
+	 * DAX already holds i_mmap_lock to serialise file truncate vs
+	 * page fault and page fault vs page fault.
+	 */
+	if (!IS_DAX(mapping->host))
+		i_mmap_lock_write(mapping);
 	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
-	i_mmap_unlock_write(mapping);
+	if (!IS_DAX(mapping->host))
+		i_mmap_unlock_write(mapping);
 }
 EXPORT_SYMBOL(unmap_mapping_range);