diff mbox series

[RFC,v2,5/5] mm: Split move_pages_to_lru into 3 separate passes

Message ID 20200819042738.23414.60815.stgit@localhost.localdomain (mailing list archive)
State New, archived
Headers show
Series Minor cleanups and performance optimizations for LRU rework | expand

Commit Message

Alexander Duyck Aug. 19, 2020, 4:27 a.m. UTC
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>

The current code for move_pages_to_lru is meant to release the LRU lock
every time it encounters an unevictable page or a compound page that must
be freed. This results in a fair amount of code bulk because the lruvec has
to be reacquired every time the lock is released and reacquired.

Instead of doing this I believe we can break the code up into 3 passes. The
first pass will identify the pages we can move to LRU and move those. In
addition it will sort the list out leaving the unevictable pages in the
list and moving those pages that have dropped to a reference count of 0 to
pages_to_free. The second pass will return the unevictable pages to the
LRU. The final pass will free any compound pages we have in the
pages_to_free list before we merge it back with the original list and
return from the function.

The advantage of doing it this way is that we only have to release the lock
between pass 1 and 2, and then we reacquire the lock after pass 3 after we
merge the pages_to_free back into the original list. As such we only have
to release the lock at most once in an entire call instead of having to
test to see if we need to relock with each page.

Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
---
 mm/vmscan.c |   68 ++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 39 insertions(+), 29 deletions(-)

Comments

Alex Shi Aug. 19, 2020, 7:56 a.m. UTC | #1
在 2020/8/19 下午12:27, Alexander Duyck 写道:
> From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> 
> The current code for move_pages_to_lru is meant to release the LRU lock
> every time it encounters an unevictable page or a compound page that must
> be freed. This results in a fair amount of code bulk because the lruvec has
> to be reacquired every time the lock is released and reacquired.
> 
> Instead of doing this I believe we can break the code up into 3 passes. The
> first pass will identify the pages we can move to LRU and move those. In
> addition it will sort the list out leaving the unevictable pages in the
> list and moving those pages that have dropped to a reference count of 0 to
> pages_to_free. The second pass will return the unevictable pages to the
> LRU. The final pass will free any compound pages we have in the
> pages_to_free list before we merge it back with the original list and
> return from the function.
> 
> The advantage of doing it this way is that we only have to release the lock
> between pass 1 and 2, and then we reacquire the lock after pass 3 after we
> merge the pages_to_free back into the original list. As such we only have
> to release the lock at most once in an entire call instead of having to
> test to see if we need to relock with each page.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> ---
>  mm/vmscan.c |   68 ++++++++++++++++++++++++++++++++++-------------------------
>  1 file changed, 39 insertions(+), 29 deletions(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 3ebe3f9b653b..6a2bdbc1a9eb 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1850,22 +1850,21 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
>  {
>  	int nr_pages, nr_moved = 0;
>  	LIST_HEAD(pages_to_free);
> -	struct page *page;
> -	struct lruvec *orig_lruvec = lruvec;
> +	struct page *page, *next;
>  	enum lru_list lru;
>  
> -	while (!list_empty(list)) {
> -		page = lru_to_page(list);
> +	list_for_each_entry_safe(page, next, list, lru) {
>  		VM_BUG_ON_PAGE(PageLRU(page), page);
> -		list_del(&page->lru);
> -		if (unlikely(!page_evictable(page))) {
> -			if (lruvec) {
> -				spin_unlock_irq(&lruvec->lru_lock);
> -				lruvec = NULL;
> -			}
> -			putback_lru_page(page);
> +
> +		/*
> +		 * if page is unevictable leave it on the list to be returned
> +		 * to the LRU after we have finished processing the other
> +		 * entries in the list.
> +		 */
> +		if (unlikely(!page_evictable(page)))
>  			continue;
> -		}
> +
> +		list_del(&page->lru);
>  
>  		/*
>  		 * The SetPageLRU needs to be kept here for list intergrity.
> @@ -1878,20 +1877,14 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
>  		 *     list_add(&page->lru,)
>  		 *                                        list_add(&page->lru,)
>  		 */
> -		lruvec = relock_page_lruvec_irq(page, lruvec);

It's actually changed the meaning from current func. which I had seen a bug if no relock.
but after move to 5.9 kernel, I can not reprodce the bug any more. I am not sure if 5.9 fixed 
the problem, and we don't need relock here. 

For the rest of this patch. 
Reviewed-by: Alex Shi <alex.shi@linux.alibaba.com>


>  		SetPageLRU(page);
>  
>  		if (unlikely(put_page_testzero(page))) {
>  			__ClearPageLRU(page);
>  			__ClearPageActive(page);
>  
> -			if (unlikely(PageCompound(page))) {
> -				spin_unlock_irq(&lruvec->lru_lock);
> -				lruvec = NULL;
> -				destroy_compound_page(page);
> -			} else
> -				list_add(&page->lru, &pages_to_free);
> -
> +			/* defer freeing until we can release lru_lock */
> +			list_add(&page->lru, &pages_to_free);
>  			continue;
>  		}
>  
> @@ -1904,16 +1897,33 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
>  		if (PageActive(page))
>  			workingset_age_nonresident(lruvec, nr_pages);
>  	}
> -	if (orig_lruvec != lruvec) {
> -		if (lruvec)
> -			spin_unlock_irq(&lruvec->lru_lock);
> -		spin_lock_irq(&orig_lruvec->lru_lock);
> -	}
>  
> -	/*
> -	 * To save our caller's stack, now use input list for pages to free.
> -	 */
> -	list_splice(&pages_to_free, list);
> +	if (unlikely(!list_empty(list) || !list_empty(&pages_to_free))) {
> +		spin_unlock_irq(&lruvec->lru_lock);
> +
> +		/* return any unevictable pages to the LRU list */
> +		while (!list_empty(list)) {
> +			page = lru_to_page(list);
> +			list_del(&page->lru);
> +			putback_lru_page(page);
> +		}
> +
> +		/*
> +		 * To save our caller's stack use input
> +		 * list for pages to free.
> +		 */
> +		list_splice(&pages_to_free, list);
> +
> +		/* free any compound pages we have in the list */
> +		list_for_each_entry_safe(page, next, list, lru) {
> +			if (likely(!PageCompound(page)))
> +				continue;
> +			list_del(&page->lru);
> +			destroy_compound_page(page);
> +		}
> +
> +		spin_lock_irq(&lruvec->lru_lock);
> +	}
>  
>  	return nr_moved;
>  }
>
Alexander Duyck Aug. 19, 2020, 2:42 p.m. UTC | #2
On Wed, Aug 19, 2020 at 12:58 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>
>
>
> 在 2020/8/19 下午12:27, Alexander Duyck 写道:
> > From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> >
> > The current code for move_pages_to_lru is meant to release the LRU lock
> > every time it encounters an unevictable page or a compound page that must
> > be freed. This results in a fair amount of code bulk because the lruvec has
> > to be reacquired every time the lock is released and reacquired.
> >
> > Instead of doing this I believe we can break the code up into 3 passes. The
> > first pass will identify the pages we can move to LRU and move those. In
> > addition it will sort the list out leaving the unevictable pages in the
> > list and moving those pages that have dropped to a reference count of 0 to
> > pages_to_free. The second pass will return the unevictable pages to the
> > LRU. The final pass will free any compound pages we have in the
> > pages_to_free list before we merge it back with the original list and
> > return from the function.
> >
> > The advantage of doing it this way is that we only have to release the lock
> > between pass 1 and 2, and then we reacquire the lock after pass 3 after we
> > merge the pages_to_free back into the original list. As such we only have
> > to release the lock at most once in an entire call instead of having to
> > test to see if we need to relock with each page.
> >
> > Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
> > ---
> >  mm/vmscan.c |   68 ++++++++++++++++++++++++++++++++++-------------------------
> >  1 file changed, 39 insertions(+), 29 deletions(-)
> >
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index 3ebe3f9b653b..6a2bdbc1a9eb 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -1850,22 +1850,21 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
> >  {
> >       int nr_pages, nr_moved = 0;
> >       LIST_HEAD(pages_to_free);
> > -     struct page *page;
> > -     struct lruvec *orig_lruvec = lruvec;
> > +     struct page *page, *next;
> >       enum lru_list lru;
> >
> > -     while (!list_empty(list)) {
> > -             page = lru_to_page(list);
> > +     list_for_each_entry_safe(page, next, list, lru) {
> >               VM_BUG_ON_PAGE(PageLRU(page), page);
> > -             list_del(&page->lru);
> > -             if (unlikely(!page_evictable(page))) {
> > -                     if (lruvec) {
> > -                             spin_unlock_irq(&lruvec->lru_lock);
> > -                             lruvec = NULL;
> > -                     }
> > -                     putback_lru_page(page);
> > +
> > +             /*
> > +              * if page is unevictable leave it on the list to be returned
> > +              * to the LRU after we have finished processing the other
> > +              * entries in the list.
> > +              */
> > +             if (unlikely(!page_evictable(page)))
> >                       continue;
> > -             }
> > +
> > +             list_del(&page->lru);
> >
> >               /*
> >                * The SetPageLRU needs to be kept here for list intergrity.
> > @@ -1878,20 +1877,14 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
> >                *     list_add(&page->lru,)
> >                *                                        list_add(&page->lru,)
> >                */
> > -             lruvec = relock_page_lruvec_irq(page, lruvec);
>
> It's actually changed the meaning from current func. which I had seen a bug if no relock.
> but after move to 5.9 kernel, I can not reprodce the bug any more. I am not sure if 5.9 fixed
> the problem, and we don't need relock here.

So I am not sure what you mean here about "changed the meaning from
the current func". Which function are you referring to and what
changed?

From what I can tell the pages cannot change memcg because they were
isolated and had the LRU flag stripped. They shouldn't be able to
change destination LRU vector as a result. Assuming that, then they
can all be processed under same LRU lock and we can avoid having to
release it until we are forced to do so to call putback_lru_page or
destroy the compound pages that were freed while we were shrinking the
LRU lists.

> For the rest of this patch.
> Reviewed-by: Alex Shi <alex.shi@linux.alibaba.com>

Thanks for the review.

- Alex
Alex Shi Aug. 20, 2020, 9:56 a.m. UTC | #3
在 2020/8/19 下午10:42, Alexander Duyck 写道:
>> It's actually changed the meaning from current func. which I had seen a bug if no relock.
>> but after move to 5.9 kernel, I can not reprodce the bug any more. I am not sure if 5.9 fixed
>> the problem, and we don't need relock here.
> So I am not sure what you mean here about "changed the meaning from
> the current func". Which function are you referring to and what
> changed?
> 
> From what I can tell the pages cannot change memcg because they were
> isolated and had the LRU flag stripped. They shouldn't be able to
> change destination LRU vector as a result. Assuming that, then they
> can all be processed under same LRU lock and we can avoid having to
> release it until we are forced to do so to call putback_lru_page or
> destroy the compound pages that were freed while we were shrinking the
> LRU lists.
> 

I had sent a bug which base on 5.8 kernel.
https://lkml.org/lkml/2020/7/28/465

I am not sure it was fixed in new kernel. The original line was introduced by Hugh Dickins
I believe it would be great if you can get comments from him.

Thanks
Alex
Alexander Duyck Aug. 20, 2020, 5:15 p.m. UTC | #4
On Thu, Aug 20, 2020 at 2:58 AM Alex Shi <alex.shi@linux.alibaba.com> wrote:
>
>
>
> 在 2020/8/19 下午10:42, Alexander Duyck 写道:
> >> It's actually changed the meaning from current func. which I had seen a bug if no relock.
> >> but after move to 5.9 kernel, I can not reprodce the bug any more. I am not sure if 5.9 fixed
> >> the problem, and we don't need relock here.
> > So I am not sure what you mean here about "changed the meaning from
> > the current func". Which function are you referring to and what
> > changed?
> >
> > From what I can tell the pages cannot change memcg because they were
> > isolated and had the LRU flag stripped. They shouldn't be able to
> > change destination LRU vector as a result. Assuming that, then they
> > can all be processed under same LRU lock and we can avoid having to
> > release it until we are forced to do so to call putback_lru_page or
> > destroy the compound pages that were freed while we were shrinking the
> > LRU lists.
> >
>
> I had sent a bug which base on 5.8 kernel.
> https://lkml.org/lkml/2020/7/28/465
>
> I am not sure it was fixed in new kernel. The original line was introduced by Hugh Dickins
> I believe it would be great if you can get comments from him.

When I brought this up before you had pointed to the relocking being
due to the fact that the function was reacquiring the lruvec for some
reason. I wonder if the fact that the LRU bit stripping serializing
things made it so that the check for the lruvec after releasing the
lock became redundant.

- Alex
diff mbox series

Patch

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 3ebe3f9b653b..6a2bdbc1a9eb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1850,22 +1850,21 @@  static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
 {
 	int nr_pages, nr_moved = 0;
 	LIST_HEAD(pages_to_free);
-	struct page *page;
-	struct lruvec *orig_lruvec = lruvec;
+	struct page *page, *next;
 	enum lru_list lru;
 
-	while (!list_empty(list)) {
-		page = lru_to_page(list);
+	list_for_each_entry_safe(page, next, list, lru) {
 		VM_BUG_ON_PAGE(PageLRU(page), page);
-		list_del(&page->lru);
-		if (unlikely(!page_evictable(page))) {
-			if (lruvec) {
-				spin_unlock_irq(&lruvec->lru_lock);
-				lruvec = NULL;
-			}
-			putback_lru_page(page);
+
+		/*
+		 * if page is unevictable leave it on the list to be returned
+		 * to the LRU after we have finished processing the other
+		 * entries in the list.
+		 */
+		if (unlikely(!page_evictable(page)))
 			continue;
-		}
+
+		list_del(&page->lru);
 
 		/*
 		 * The SetPageLRU needs to be kept here for list intergrity.
@@ -1878,20 +1877,14 @@  static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
 		 *     list_add(&page->lru,)
 		 *                                        list_add(&page->lru,)
 		 */
-		lruvec = relock_page_lruvec_irq(page, lruvec);
 		SetPageLRU(page);
 
 		if (unlikely(put_page_testzero(page))) {
 			__ClearPageLRU(page);
 			__ClearPageActive(page);
 
-			if (unlikely(PageCompound(page))) {
-				spin_unlock_irq(&lruvec->lru_lock);
-				lruvec = NULL;
-				destroy_compound_page(page);
-			} else
-				list_add(&page->lru, &pages_to_free);
-
+			/* defer freeing until we can release lru_lock */
+			list_add(&page->lru, &pages_to_free);
 			continue;
 		}
 
@@ -1904,16 +1897,33 @@  static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
 		if (PageActive(page))
 			workingset_age_nonresident(lruvec, nr_pages);
 	}
-	if (orig_lruvec != lruvec) {
-		if (lruvec)
-			spin_unlock_irq(&lruvec->lru_lock);
-		spin_lock_irq(&orig_lruvec->lru_lock);
-	}
 
-	/*
-	 * To save our caller's stack, now use input list for pages to free.
-	 */
-	list_splice(&pages_to_free, list);
+	if (unlikely(!list_empty(list) || !list_empty(&pages_to_free))) {
+		spin_unlock_irq(&lruvec->lru_lock);
+
+		/* return any unevictable pages to the LRU list */
+		while (!list_empty(list)) {
+			page = lru_to_page(list);
+			list_del(&page->lru);
+			putback_lru_page(page);
+		}
+
+		/*
+		 * To save our caller's stack use input
+		 * list for pages to free.
+		 */
+		list_splice(&pages_to_free, list);
+
+		/* free any compound pages we have in the list */
+		list_for_each_entry_safe(page, next, list, lru) {
+			if (likely(!PageCompound(page)))
+				continue;
+			list_del(&page->lru);
+			destroy_compound_page(page);
+		}
+
+		spin_lock_irq(&lruvec->lru_lock);
+	}
 
 	return nr_moved;
 }