diff mbox series

vfs: fix page locking deadlocks when deduping files

Message ID 20190807145114.GP7138@magnolia (mailing list archive)
State New, archived
Headers show
Series vfs: fix page locking deadlocks when deduping files | expand

Commit Message

Darrick J. Wong Aug. 7, 2019, 2:51 p.m. UTC
From: Darrick J. Wong <darrick.wong@oracle.com>

When dedupe wants to use the page cache to compare parts of two files
for dedupe, we must be very careful to handle locking correctly.  The
current code doesn't do this.  It must lock and unlock the page only
once if the two pages are the same, since the overlapping range check
doesn't catch this when blocksize < pagesize.  If the pages are distinct
but from the same file, we must observe page locking order and lock them
in order of increasing offset to avoid clashing with writeback locking.

Fixes: 876bec6f9bbfcb3 ("vfs: refactor clone/dedupe_file_range common functions")
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/read_write.c |   31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

Comments

Dave Chinner Aug. 7, 2019, 10:38 p.m. UTC | #1
On Wed, Aug 07, 2019 at 07:51:14AM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@oracle.com>
> +/*
> + * Lock two pages, ensuring that we lock in offset order if the pages are from
> + * the same file.
> + */
> +static void vfs_lock_two_pages(struct page *page1, struct page *page2)
> +{
> +	if (page1 == page2) {
> +		lock_page(page1);
> +		return;
> +	}
> +
> +	if (page1->mapping == page2->mapping && page1->index > page2->index)
> +		swap(page1, page2);

I would do this even if the pages are on different mappings. That
way we don't expose a landmine if some other code locks two pages
from the same mappings in a different order...

> +	lock_page(page1);
> +	lock_page(page2);
> +}
> +
>  /*
>   * Compare extents of two files to see if they are the same.
>   * Caller must have locked both inodes to prevent write races.
> @@ -1867,10 +1881,12 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
>  		dest_page = vfs_dedupe_get_page(dest, destoff);
>  		if (IS_ERR(dest_page)) {
>  			error = PTR_ERR(dest_page);
> -			unlock_page(src_page);
>  			put_page(src_page);
>  			goto out_error;
>  		}
> +
> +		vfs_lock_two_pages(src_page, dest_page);
> +
>  		src_addr = kmap_atomic(src_page);
>  		dest_addr = kmap_atomic(dest_page);
>  
> @@ -1882,7 +1898,8 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
>  
>  		kunmap_atomic(dest_addr);
>  		kunmap_atomic(src_addr);
> -		unlock_page(dest_page);
> +		if (dest_page != src_page)
> +			unlock_page(dest_page);
>  		unlock_page(src_page);

Would it make sense for symmetry to wrap these in
vfs_unlock_two_pages()?

-Dave.
Darrick J. Wong Aug. 8, 2019, 2:10 a.m. UTC | #2
On Thu, Aug 08, 2019 at 08:38:50AM +1000, Dave Chinner wrote:
> On Wed, Aug 07, 2019 at 07:51:14AM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@oracle.com>
> > +/*
> > + * Lock two pages, ensuring that we lock in offset order if the pages are from
> > + * the same file.
> > + */
> > +static void vfs_lock_two_pages(struct page *page1, struct page *page2)
> > +{
> > +	if (page1 == page2) {
> > +		lock_page(page1);
> > +		return;
> > +	}
> > +
> > +	if (page1->mapping == page2->mapping && page1->index > page2->index)
> > +		swap(page1, page2);
> 
> I would do this even if the pages are on different mappings. That
> way we don't expose a landmine if some other code locks two pages
> from the same mappings in a different order...

Sure.

> > +	lock_page(page1);
> > +	lock_page(page2);
> > +}
> > +
> >  /*
> >   * Compare extents of two files to see if they are the same.
> >   * Caller must have locked both inodes to prevent write races.
> > @@ -1867,10 +1881,12 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
> >  		dest_page = vfs_dedupe_get_page(dest, destoff);
> >  		if (IS_ERR(dest_page)) {
> >  			error = PTR_ERR(dest_page);
> > -			unlock_page(src_page);
> >  			put_page(src_page);
> >  			goto out_error;
> >  		}
> > +
> > +		vfs_lock_two_pages(src_page, dest_page);
> > +
> >  		src_addr = kmap_atomic(src_page);
> >  		dest_addr = kmap_atomic(dest_page);
> >  
> > @@ -1882,7 +1898,8 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
> >  
> >  		kunmap_atomic(dest_addr);
> >  		kunmap_atomic(src_addr);
> > -		unlock_page(dest_page);
> > +		if (dest_page != src_page)
> > +			unlock_page(dest_page);
> >  		unlock_page(src_page);
> 
> Would it make sense for symmetry to wrap these in
> vfs_unlock_two_pages()?

Sure.

--D

> -Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
diff mbox series

Patch

diff --git a/fs/read_write.c b/fs/read_write.c
index 1f5088dec566..7bf9cc009f04 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1811,10 +1811,7 @@  static int generic_remap_check_len(struct inode *inode_in,
 	return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
 }
 
-/*
- * Read a page's worth of file data into the page cache.  Return the page
- * locked.
- */
+/* Read a page's worth of file data into the page cache. */
 static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
 {
 	struct page *page;
@@ -1826,10 +1823,27 @@  static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
 		put_page(page);
 		return ERR_PTR(-EIO);
 	}
-	lock_page(page);
 	return page;
 }
 
+/*
+ * Lock two pages, ensuring that we lock in offset order if the pages are from
+ * the same file.
+ */
+static void vfs_lock_two_pages(struct page *page1, struct page *page2)
+{
+	if (page1 == page2) {
+		lock_page(page1);
+		return;
+	}
+
+	if (page1->mapping == page2->mapping && page1->index > page2->index)
+		swap(page1, page2);
+
+	lock_page(page1);
+	lock_page(page2);
+}
+
 /*
  * Compare extents of two files to see if they are the same.
  * Caller must have locked both inodes to prevent write races.
@@ -1867,10 +1881,12 @@  static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		dest_page = vfs_dedupe_get_page(dest, destoff);
 		if (IS_ERR(dest_page)) {
 			error = PTR_ERR(dest_page);
-			unlock_page(src_page);
 			put_page(src_page);
 			goto out_error;
 		}
+
+		vfs_lock_two_pages(src_page, dest_page);
+
 		src_addr = kmap_atomic(src_page);
 		dest_addr = kmap_atomic(dest_page);
 
@@ -1882,7 +1898,8 @@  static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 
 		kunmap_atomic(dest_addr);
 		kunmap_atomic(src_addr);
-		unlock_page(dest_page);
+		if (dest_page != src_page)
+			unlock_page(dest_page);
 		unlock_page(src_page);
 		put_page(dest_page);
 		put_page(src_page);