diff mbox series

[2/3] mm: Don't hold a page reference while waiting for unlock

Message ID 20201013030008.27219-3-willy@infradead.org (mailing list archive)
State New, archived
Headers show
Series Wait for I/O without holding a page reference | expand

Commit Message

Matthew Wilcox Oct. 13, 2020, 3 a.m. UTC
In the upcoming THP patch series, if we find a !Uptodate page, it
is because of a read error.  In this case, we want to split the THP
into smaller pages so we can handle the error in as granular a fashion
as possible.  But xfstests generic/273 defeats this strategy by having
500 threads all sleeping on the same page, each waiting for their turn
to split the page.  None of them will ever succeed because splitting a
page requires that you hold the only reference to it.

To fix this, use put_and_wait_on_page_locked() to sleep without holding
a reference.  Each of the readers will then go back and retry the
page lookup.

This requires a few changes since we now get the page lock a little
earlier in generic_file_buffered_read().  This is unlikely to affect any
normal workloads as pages in the page cache are generally uptodate and
will not hit this path.  With the THP patch set and the readahead error
injector, I see about a 25% performance improvement with this patch over
an alternate approach which moves the page locking down.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/filemap.c | 51 ++++++++++++++-------------------------------------
 1 file changed, 14 insertions(+), 37 deletions(-)

Comments

Matthew Wilcox Oct. 13, 2020, 1:32 p.m. UTC | #1
This patch was inadvertently included.  I blame my tools.  The actual
patch 2/3 is the other one.

On Tue, Oct 13, 2020 at 04:00:06AM +0100, Matthew Wilcox (Oracle) wrote:
> In the upcoming THP patch series, if we find a !Uptodate page, it
> is because of a read error.  In this case, we want to split the THP
> into smaller pages so we can handle the error in as granular a fashion
> as possible.  But xfstests generic/273 defeats this strategy by having
> 500 threads all sleeping on the same page, each waiting for their turn
> to split the page.  None of them will ever succeed because splitting a
> page requires that you hold the only reference to it.
> 
> To fix this, use put_and_wait_on_page_locked() to sleep without holding
> a reference.  Each of the readers will then go back and retry the
> page lookup.
> 
> This requires a few changes since we now get the page lock a little
> earlier in generic_file_buffered_read().  This is unlikely to affect any
> normal workloads as pages in the page cache are generally uptodate and
> will not hit this path.  With the THP patch set and the readahead error
> injector, I see about a 25% performance improvement with this patch over
> an alternate approach which moves the page locking down.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/filemap.c | 51 ++++++++++++++-------------------------------------
>  1 file changed, 14 insertions(+), 37 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index f70227941627..9916353f0f0d 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1254,14 +1254,6 @@ static int __wait_on_page_locked_async(struct page *page,
>  	return ret;
>  }
>  
> -static int wait_on_page_locked_async(struct page *page,
> -				     struct wait_page_queue *wait)
> -{
> -	if (!PageLocked(page))
> -		return 0;
> -	return __wait_on_page_locked_async(compound_head(page), wait, false);
> -}
> -
>  /**
>   * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
>   * @page: The page to wait for.
> @@ -2128,19 +2120,21 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
>  					put_page(page);
>  					goto out;
>  				}
> -				error = wait_on_page_locked_async(page,
> -								iocb->ki_waitq);
> +				error = lock_page_async(page, iocb->ki_waitq);
> +				if (error)
> +					goto readpage_error;
> +			} else if (iocb->ki_flags & IOCB_NOWAIT) {
> +				put_page(page);
> +				goto would_block;
>  			} else {
> -				if (iocb->ki_flags & IOCB_NOWAIT) {
> -					put_page(page);
> -					goto would_block;
> +				if (!trylock_page(page)) {
> +					put_and_wait_on_page_locked(page,
> +							TASK_KILLABLE);
> +					goto find_page;
>  				}
> -				error = wait_on_page_locked_killable(page);
>  			}
> -			if (unlikely(error))
> -				goto readpage_error;
>  			if (PageUptodate(page))
> -				goto page_ok;
> +				goto uptodate;
>  
>  			if (inode->i_blkbits == PAGE_SHIFT ||
>  					!mapping->a_ops->is_partially_uptodate)
> @@ -2148,14 +2142,13 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
>  			/* pipes can't handle partially uptodate pages */
>  			if (unlikely(iov_iter_is_pipe(iter)))
>  				goto page_not_up_to_date;
> -			if (!trylock_page(page))
> -				goto page_not_up_to_date;
>  			/* Did it get truncated before we got the lock? */
>  			if (!page->mapping)
> -				goto page_not_up_to_date_locked;
> +				goto page_not_up_to_date;
>  			if (!mapping->a_ops->is_partially_uptodate(page,
>  							offset, iter->count))
> -				goto page_not_up_to_date_locked;
> +				goto page_not_up_to_date;
> +uptodate:
>  			unlock_page(page);
>  		}
>  page_ok:
> @@ -2223,28 +2216,12 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
>  		continue;
>  
>  page_not_up_to_date:
> -		/* Get exclusive access to the page ... */
> -		if (iocb->ki_flags & IOCB_WAITQ)
> -			error = lock_page_async(page, iocb->ki_waitq);
> -		else
> -			error = lock_page_killable(page);
> -		if (unlikely(error))
> -			goto readpage_error;
> -
> -page_not_up_to_date_locked:
>  		/* Did it get truncated before we got the lock? */
>  		if (!page->mapping) {
>  			unlock_page(page);
>  			put_page(page);
>  			continue;
>  		}
> -
> -		/* Did somebody else fill it already? */
> -		if (PageUptodate(page)) {
> -			unlock_page(page);
> -			goto page_ok;
> -		}
> -
>  readpage:
>  		if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
>  			unlock_page(page);
> -- 
> 2.28.0
>
diff mbox series

Patch

diff --git a/mm/filemap.c b/mm/filemap.c
index f70227941627..9916353f0f0d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1254,14 +1254,6 @@  static int __wait_on_page_locked_async(struct page *page,
 	return ret;
 }
 
-static int wait_on_page_locked_async(struct page *page,
-				     struct wait_page_queue *wait)
-{
-	if (!PageLocked(page))
-		return 0;
-	return __wait_on_page_locked_async(compound_head(page), wait, false);
-}
-
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
@@ -2128,19 +2120,21 @@  ssize_t generic_file_buffered_read(struct kiocb *iocb,
 					put_page(page);
 					goto out;
 				}
-				error = wait_on_page_locked_async(page,
-								iocb->ki_waitq);
+				error = lock_page_async(page, iocb->ki_waitq);
+				if (error)
+					goto readpage_error;
+			} else if (iocb->ki_flags & IOCB_NOWAIT) {
+				put_page(page);
+				goto would_block;
 			} else {
-				if (iocb->ki_flags & IOCB_NOWAIT) {
-					put_page(page);
-					goto would_block;
+				if (!trylock_page(page)) {
+					put_and_wait_on_page_locked(page,
+							TASK_KILLABLE);
+					goto find_page;
 				}
-				error = wait_on_page_locked_killable(page);
 			}
-			if (unlikely(error))
-				goto readpage_error;
 			if (PageUptodate(page))
-				goto page_ok;
+				goto uptodate;
 
 			if (inode->i_blkbits == PAGE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
@@ -2148,14 +2142,13 @@  ssize_t generic_file_buffered_read(struct kiocb *iocb,
 			/* pipes can't handle partially uptodate pages */
 			if (unlikely(iov_iter_is_pipe(iter)))
 				goto page_not_up_to_date;
-			if (!trylock_page(page))
-				goto page_not_up_to_date;
 			/* Did it get truncated before we got the lock? */
 			if (!page->mapping)
-				goto page_not_up_to_date_locked;
+				goto page_not_up_to_date;
 			if (!mapping->a_ops->is_partially_uptodate(page,
 							offset, iter->count))
-				goto page_not_up_to_date_locked;
+				goto page_not_up_to_date;
+uptodate:
 			unlock_page(page);
 		}
 page_ok:
@@ -2223,28 +2216,12 @@  ssize_t generic_file_buffered_read(struct kiocb *iocb,
 		continue;
 
 page_not_up_to_date:
-		/* Get exclusive access to the page ... */
-		if (iocb->ki_flags & IOCB_WAITQ)
-			error = lock_page_async(page, iocb->ki_waitq);
-		else
-			error = lock_page_killable(page);
-		if (unlikely(error))
-			goto readpage_error;
-
-page_not_up_to_date_locked:
 		/* Did it get truncated before we got the lock? */
 		if (!page->mapping) {
 			unlock_page(page);
 			put_page(page);
 			continue;
 		}
-
-		/* Did somebody else fill it already? */
-		if (PageUptodate(page)) {
-			unlock_page(page);
-			goto page_ok;
-		}
-
 readpage:
 		if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
 			unlock_page(page);