diff mbox series

[04/17] mm/filemap: Support readpage splitting a page

Message ID 20201102184312.25926-5-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series Refactor generic_file_buffered_read | expand

Commit Message

Matthew Wilcox Nov. 2, 2020, 6:42 p.m. UTC
For page splitting to succeed, the thread asking to split the
page has to be the only one with a reference to the page.  Calling
wait_on_page_locked() while holding a reference to the page will
effectively prevent this from happening with sufficient threads waiting
on the same page.  Use put_and_wait_on_page_locked() to sleep without
holding a reference to the page, then retry the page lookup after the
page is unlocked.

Since we now get the page lock a little earlier in filemap_update_page(),
we can eliminate a number of duplicate checks.  The original intent
(commit ebded02788b5 ("avoid unnecessary calls to lock_page when waiting
for IO to complete during a read")) behind getting the page lock later
was to avoid re-locking the page after it has been brought uptodate by
another thread.  We still avoid that because we go through the normal
lookup path again after the winning thread has brought the page uptodate.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 76 ++++++++++++++++------------------------------------
 1 file changed, 23 insertions(+), 53 deletions(-)

Comments

Kent Overstreet Nov. 2, 2020, 7 p.m. UTC | #1
On Mon, Nov 02, 2020 at 06:42:59PM +0000, Matthew Wilcox (Oracle) wrote:
> For page splitting to succeed, the thread asking to split the
> page has to be the only one with a reference to the page.  Calling
> wait_on_page_locked() while holding a reference to the page will
> effectively prevent this from happening with sufficient threads waiting
> on the same page.  Use put_and_wait_on_page_locked() to sleep without
> holding a reference to the page, then retry the page lookup after the
> page is unlocked.
> 
> Since we now get the page lock a little earlier in filemap_update_page(),
> we can eliminate a number of duplicate checks.  The original intent
> (commit ebded02788b5 ("avoid unnecessary calls to lock_page when waiting
> for IO to complete during a read")) behind getting the page lock later
> was to avoid re-locking the page after it has been brought uptodate by
> another thread.  We still avoid that because we go through the normal
> lookup path again after the winning thread has brought the page uptodate.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/filemap.c | 76 ++++++++++++++++------------------------------------
>  1 file changed, 23 insertions(+), 53 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 709774a60379..550e023abb52 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1347,14 +1347,6 @@ static int __wait_on_page_locked_async(struct page *page,
>  	return ret;
>  }
>  
> -static int wait_on_page_locked_async(struct page *page,
> -				     struct wait_page_queue *wait)
> -{
> -	if (!PageLocked(page))
> -		return 0;
> -	return __wait_on_page_locked_async(compound_head(page), wait, false);
> -}
> -
>  /**
>   * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
>   * @page: The page to wait for.
> @@ -2281,64 +2273,42 @@ static struct page *filemap_update_page(struct kiocb *iocb, struct file *filp,
>  	struct inode *inode = mapping->host;
>  	int error;
>  
> -	/*
> -	 * See comment in do_read_cache_page on why
> -	 * wait_on_page_locked is used to avoid unnecessarily
> -	 * serialisations and why it's safe.
> -	 */
>  	if (iocb->ki_flags & IOCB_WAITQ) {
> -		error = wait_on_page_locked_async(page,
> -						iocb->ki_waitq);
> +		error = lock_page_async(page, iocb->ki_waitq);
> +		if (error) {
> +			put_page(page);
> +			return ERR_PTR(error);
> +		}
>  	} else {
> -		error = wait_on_page_locked_killable(page);
> -	}
> -	if (unlikely(error)) {
> -		put_page(page);
> -		return ERR_PTR(error);
> +		if (!trylock_page(page)) {
> +			put_and_wait_on_page_locked(page, TASK_KILLABLE);
> +			return NULL;
> +		}
>  	}
> -	if (PageUptodate(page))
> -		return page;
>  
> +	if (!page->mapping)
> +		goto truncated;

Since we're dropping our ref to the page, it could potentially be truncated and
then reused, no? So we should be checking page->mapping == mapping &&
page->index == index (and stashing page->index before dropping our ref, or
passing it in).

> +	if (PageUptodate(page))
> +		goto uptodate;
>  	if (inode->i_blkbits == PAGE_SHIFT ||
>  			!mapping->a_ops->is_partially_uptodate)
> -		goto page_not_up_to_date;
> +		goto readpage;
>  	/* pipes can't handle partially uptodate pages */
>  	if (unlikely(iov_iter_is_pipe(iter)))
> -		goto page_not_up_to_date;
> -	if (!trylock_page(page))
> -		goto page_not_up_to_date;
> -	/* Did it get truncated before we got the lock? */
> -	if (!page->mapping)
> -		goto page_not_up_to_date_locked;
> +		goto readpage;
>  	if (!mapping->a_ops->is_partially_uptodate(page,
> -				pos & ~PAGE_MASK, count))
> -		goto page_not_up_to_date_locked;
> +				pos & (thp_size(page) - 1), count))
> +		goto readpage;
> +uptodate:
>  	unlock_page(page);
>  	return page;
>  
> -page_not_up_to_date:
> -	/* Get exclusive access to the page ... */
> -	error = lock_page_for_iocb(iocb, page);
> -	if (unlikely(error)) {
> -		put_page(page);
> -		return ERR_PTR(error);
> -	}
> -
> -page_not_up_to_date_locked:
> -	/* Did it get truncated before we got the lock? */
> -	if (!page->mapping) {
> -		unlock_page(page);
> -		put_page(page);
> -		return NULL;
> -	}
> -
> -	/* Did somebody else fill it already? */
> -	if (PageUptodate(page)) {
> -		unlock_page(page);
> -		return page;
> -	}
> -
> +readpage:
>  	return filemap_read_page(iocb, filp, mapping, page);
> +truncated:
> +	unlock_page(page);
> +	put_page(page);
> +	return NULL;
>  }
>  
>  static struct page *filemap_create_page(struct kiocb *iocb,
> -- 
> 2.28.0
>
Matthew Wilcox Nov. 2, 2020, 7:03 p.m. UTC | #2
On Mon, Nov 02, 2020 at 02:00:08PM -0500, Kent Overstreet wrote:
(snipped the deleted lines for clarity)
> >  	if (iocb->ki_flags & IOCB_WAITQ) {
> > +		error = lock_page_async(page, iocb->ki_waitq);
> > +		if (error) {
> > +			put_page(page);
> > +			return ERR_PTR(error);
> > +		}
> >  	} else {
> > +		if (!trylock_page(page)) {
> > +			put_and_wait_on_page_locked(page, TASK_KILLABLE);
> > +			return NULL;
> > +		}
> >  	}
> >  
> > +	if (!page->mapping)
> > +		goto truncated;
> 
> Since we're dropping our ref to the page, it could potentially be truncated and
> then reused, no? So we should be checking page->mapping == mapping &&
> page->index == index (and stashing page->index before dropping our ref, or
> passing it in).

If we get to this point, then we _didn't_ drop our ref to the page.
All the paths above that call put_page() then return.
Kent Overstreet Nov. 2, 2020, 7:12 p.m. UTC | #3
On Mon, Nov 02, 2020 at 06:42:59PM +0000, Matthew Wilcox (Oracle) wrote:
> For page splitting to succeed, the thread asking to split the
> page has to be the only one with a reference to the page.  Calling
> wait_on_page_locked() while holding a reference to the page will
> effectively prevent this from happening with sufficient threads waiting
> on the same page.  Use put_and_wait_on_page_locked() to sleep without
> holding a reference to the page, then retry the page lookup after the
> page is unlocked.
> 
> Since we now get the page lock a little earlier in filemap_update_page(),
> we can eliminate a number of duplicate checks.  The original intent
> (commit ebded02788b5 ("avoid unnecessary calls to lock_page when waiting
> for IO to complete during a read")) behind getting the page lock later
> was to avoid re-locking the page after it has been brought uptodate by
> another thread.  We still avoid that because we go through the normal
> lookup path again after the winning thread has brought the page uptodate.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Kent Overstreet <kent.overstreet@gmail.com>
Christoph Hellwig Nov. 3, 2020, 7:31 a.m. UTC | #4
On Mon, Nov 02, 2020 at 06:42:59PM +0000, Matthew Wilcox (Oracle) wrote:
> For page splitting to succeed, the thread asking to split the
> page has to be the only one with a reference to the page.  Calling
> wait_on_page_locked() while holding a reference to the page will
> effectively prevent this from happening with sufficient threads waiting
> on the same page.  Use put_and_wait_on_page_locked() to sleep without
> holding a reference to the page, then retry the page lookup after the
> page is unlocked.
> 
> Since we now get the page lock a little earlier in filemap_update_page(),
> we can eliminate a number of duplicate checks.  The original intent
> (commit ebded02788b5 ("avoid unnecessary calls to lock_page when waiting
> for IO to complete during a read")) behind getting the page lock later
> was to avoid re-locking the page after it has been brought uptodate by
> another thread.  We still avoid that because we go through the normal
> lookup path again after the winning thread has brought the page uptodate.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index 709774a60379..550e023abb52 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1347,14 +1347,6 @@  static int __wait_on_page_locked_async(struct page *page,
 	return ret;
 }
 
-static int wait_on_page_locked_async(struct page *page,
-				     struct wait_page_queue *wait)
-{
-	if (!PageLocked(page))
-		return 0;
-	return __wait_on_page_locked_async(compound_head(page), wait, false);
-}
-
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -2281,64 +2273,42 @@  static struct page *filemap_update_page(struct kiocb *iocb, struct file *filp,
 	struct inode *inode = mapping->host;
 	int error;
 
-	/*
-	 * See comment in do_read_cache_page on why
-	 * wait_on_page_locked is used to avoid unnecessarily
-	 * serialisations and why it's safe.
-	 */
 	if (iocb->ki_flags & IOCB_WAITQ) {
-		error = wait_on_page_locked_async(page,
-						iocb->ki_waitq);
+		error = lock_page_async(page, iocb->ki_waitq);
+		if (error) {
+			put_page(page);
+			return ERR_PTR(error);
+		}
 	} else {
-		error = wait_on_page_locked_killable(page);
-	}
-	if (unlikely(error)) {
-		put_page(page);
-		return ERR_PTR(error);
+		if (!trylock_page(page)) {
+			put_and_wait_on_page_locked(page, TASK_KILLABLE);
+			return NULL;
+		}
 	}
-	if (PageUptodate(page))
-		return page;
 
+	if (!page->mapping)
+		goto truncated;
+	if (PageUptodate(page))
+		goto uptodate;
 	if (inode->i_blkbits == PAGE_SHIFT ||
 			!mapping->a_ops->is_partially_uptodate)
-		goto page_not_up_to_date;
+		goto readpage;
 	/* pipes can't handle partially uptodate pages */
 	if (unlikely(iov_iter_is_pipe(iter)))
-		goto page_not_up_to_date;
-	if (!trylock_page(page))
-		goto page_not_up_to_date;
-	/* Did it get truncated before we got the lock? */
-	if (!page->mapping)
-		goto page_not_up_to_date_locked;
+		goto readpage;
 	if (!mapping->a_ops->is_partially_uptodate(page,
-				pos & ~PAGE_MASK, count))
-		goto page_not_up_to_date_locked;
+				pos & (thp_size(page) - 1), count))
+		goto readpage;
+uptodate:
 	unlock_page(page);
 	return page;
 
-page_not_up_to_date:
-	/* Get exclusive access to the page ... */
-	error = lock_page_for_iocb(iocb, page);
-	if (unlikely(error)) {
-		put_page(page);
-		return ERR_PTR(error);
-	}
-
-page_not_up_to_date_locked:
-	/* Did it get truncated before we got the lock? */
-	if (!page->mapping) {
-		unlock_page(page);
-		put_page(page);
-		return NULL;
-	}
-
-	/* Did somebody else fill it already? */
-	if (PageUptodate(page)) {
-		unlock_page(page);
-		return page;
-	}
-
+readpage:
 	return filemap_read_page(iocb, filp, mapping, page);
+truncated:
+	unlock_page(page);
+	put_page(page);
+	return NULL;
 }
 
 static struct page *filemap_create_page(struct kiocb *iocb,