diff mbox series

[2/5] mm: migrate: simplify the logic for handling permanent failure

Message ID 20201103130334.13468-3-shy828301@gmail.com (mailing list archive)
State New, archived
Headers show
Series mm: misc migrate cleanup and improvement | expand

Commit Message

Yang Shi Nov. 3, 2020, 1:03 p.m. UTC
When unmap_and_move{_huge_page}() returns !-EAGAIN and !MIGRATEPAGE_SUCCESS,
the page would be put back to LRU or proper list if it is non-LRU movable
page.  But, the callers always call putback_movable_pages() to put the
failed pages back later on, so it seems not very efficient to put every
single page back immediately, and the code looks convoluted.

Put the failed page on a separate list, then splice the list to migrate
list when all pages are tried.  It is the caller's responsibility to
call putback_movable_pages() to handle failures.  This also makes the
code simpler and more readable.

After the change the rules are:
    * Success: non hugetlb page will be freed, hugetlb page will be put
               back
    * -EAGAIN: stay on the from list
    * -ENOMEM: stay on the from list
    * Other errno: put on ret_pages list then splice to from list

The from list would be empty iff all pages are migrated successfully, it
was not so before.  This has no impact to current existing callsites.

Signed-off-by: Yang Shi <shy828301@gmail.com>
---
 mm/migrate.c | 58 ++++++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

Comments

Zi Yan Nov. 6, 2020, 8:03 p.m. UTC | #1
On 3 Nov 2020, at 8:03, Yang Shi wrote:

> When unmap_and_move{_huge_page}() returns !-EAGAIN and !MIGRATEPAGE_SUCCESS,
> the page would be put back to LRU or proper list if it is non-LRU movable
> page.  But, the callers always call putback_movable_pages() to put the
> failed pages back later on, so it seems not very efficient to put every
> single page back immediately, and the code looks convoluted.
>
> Put the failed page on a separate list, then splice the list to migrate
> list when all pages are tried.  It is the caller's responsibility to
> call putback_movable_pages() to handle failures.  This also makes the
> code simpler and more readable.
>
> After the change the rules are:
>     * Success: non hugetlb page will be freed, hugetlb page will be put
>                back
>     * -EAGAIN: stay on the from list
>     * -ENOMEM: stay on the from list
>     * Other errno: put on ret_pages list then splice to from list

Can you put this before the switch case in the migrate_pages? That will
be very helpful to understand the code.
>
> The from list would be empty iff all pages are migrated successfully, it

s/iff/if unless you really mean if and only if. :)


Everything else looks good to me. Thanks for making the code cleaner.
With the changes above, you can add Reviewed-by: Zi Yan <ziy@nvidia.com>.

> was not so before.  This has no impact to current existing callsites.
>
> Signed-off-by: Yang Shi <shy828301@gmail.com>
> ---
>  mm/migrate.c | 58 ++++++++++++++++++++++++++--------------------------
>  1 file changed, 29 insertions(+), 29 deletions(-)
>
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 8a2e7e19e27b..c33c92495ead 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1169,7 +1169,8 @@ static int unmap_and_move(new_page_t get_new_page,
>  				   free_page_t put_new_page,
>  				   unsigned long private, struct page *page,
>  				   int force, enum migrate_mode mode,
> -				   enum migrate_reason reason)
> +				   enum migrate_reason reason,
> +				   struct list_head *ret)
>  {
>  	int rc = MIGRATEPAGE_SUCCESS;
>  	struct page *newpage = NULL;
> @@ -1206,7 +1207,14 @@ static int unmap_and_move(new_page_t get_new_page,
>  		 * migrated will have kept its references and be restored.
>  		 */
>  		list_del(&page->lru);
> +	}
>
> +	/*
> +	 * If migration is successful, releases reference grabbed during
> +	 * isolation. Otherwise, restore the page to right list unless
> +	 * we want to retry.
> +	 */
> +	if (rc == MIGRATEPAGE_SUCCESS) {
>  		/*
>  		 * Compaction can migrate also non-LRU pages which are
>  		 * not accounted to NR_ISOLATED_*. They can be recognized
> @@ -1215,35 +1223,16 @@ static int unmap_and_move(new_page_t get_new_page,
>  		if (likely(!__PageMovable(page)))
>  			mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
>  					page_is_file_lru(page), -thp_nr_pages(page));
> -	}
>
> -	/*
> -	 * If migration is successful, releases reference grabbed during
> -	 * isolation. Otherwise, restore the page to right list unless
> -	 * we want to retry.
> -	 */
> -	if (rc == MIGRATEPAGE_SUCCESS) {
>  		if (reason != MR_MEMORY_FAILURE)
>  			/*
>  			 * We release the page in page_handle_poison.
>  			 */
>  			put_page(page);
>  	} else {
> -		if (rc != -EAGAIN) {
> -			if (likely(!__PageMovable(page))) {
> -				putback_lru_page(page);
> -				goto put_new;
> -			}
> +		if (rc != -EAGAIN)
> +			list_add_tail(&page->lru, ret);
>
> -			lock_page(page);
> -			if (PageMovable(page))
> -				putback_movable_page(page);
> -			else
> -				__ClearPageIsolated(page);
> -			unlock_page(page);
> -			put_page(page);
> -		}
> -put_new:
>  		if (put_new_page)
>  			put_new_page(newpage, private);
>  		else
> @@ -1274,7 +1263,8 @@ static int unmap_and_move(new_page_t get_new_page,
>  static int unmap_and_move_huge_page(new_page_t get_new_page,
>  				free_page_t put_new_page, unsigned long private,
>  				struct page *hpage, int force,
> -				enum migrate_mode mode, int reason)
> +				enum migrate_mode mode, int reason,
> +				struct list_head *ret)
>  {
>  	int rc = -EAGAIN;
>  	int page_was_mapped = 0;
> @@ -1290,7 +1280,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
>  	 * kicking migration.
>  	 */
>  	if (!hugepage_migration_supported(page_hstate(hpage))) {
> -		putback_active_hugepage(hpage);
> +		list_move_tail(&hpage->lru, ret);
>  		return -ENOSYS;
>  	}
>
> @@ -1372,8 +1362,10 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
>  out_unlock:
>  	unlock_page(hpage);
>  out:
> -	if (rc != -EAGAIN)
> +	if (rc == MIGRATEPAGE_SUCCESS)
>  		putback_active_hugepage(hpage);
> +	else if (rc != -EAGAIN && rc != MIGRATEPAGE_SUCCESS)
> +		list_move_tail(&hpage->lru, ret);
>
>  	/*
>  	 * If migration was not successful and there's a freeing callback, use
> @@ -1404,8 +1396,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
>   *
>   * The function returns after 10 attempts or if no pages are movable any more
>   * because the list has become empty or no retryable pages exist any more.
> - * The caller should call putback_movable_pages() to return pages to the LRU
> - * or free list only if ret != 0.
> + * It is caller's responsibility to call putback_movable_pages() to return pages
> + * to the LRU or free list only if ret != 0.
>   *
>   * Returns the number of pages that were not migrated, or an error code.
>   */
> @@ -1426,6 +1418,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>  	struct page *page2;
>  	int swapwrite = current->flags & PF_SWAPWRITE;
>  	int rc, nr_subpages;
> +	LIST_HEAD(ret_pages);
>
>  	if (!swapwrite)
>  		current->flags |= PF_SWAPWRITE;
> @@ -1448,11 +1441,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>  			if (PageHuge(page))
>  				rc = unmap_and_move_huge_page(get_new_page,
>  						put_new_page, private, page,
> -						pass > 2, mode, reason);
> +						pass > 2, mode, reason,
> +						&ret_pages);
>  			else
>  				rc = unmap_and_move(get_new_page, put_new_page,
>  						private, page, pass > 2, mode,
> -						reason);
> +						reason, &ret_pages);
>
>  			switch(rc) {
>  			case -ENOMEM:
> @@ -1519,6 +1513,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
>  	nr_thp_failed += thp_retry;
>  	rc = nr_failed;
>  out:
> +	/*
> +	 * Put the permanent failure page back to migration list, they
> +	 * will be put back to the right list by the caller.
> +	 */
> +	list_splice(&ret_pages, from);
> +
>  	count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
>  	count_vm_events(PGMIGRATE_FAIL, nr_failed);
>  	count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
> -- 
> 2.26.2


—
Best Regards,
Yan Zi
Yang Shi Nov. 6, 2020, 9:34 p.m. UTC | #2
On Fri, Nov 6, 2020 at 12:03 PM Zi Yan <ziy@nvidia.com> wrote:
>
> On 3 Nov 2020, at 8:03, Yang Shi wrote:
>
> > When unmap_and_move{_huge_page}() returns !-EAGAIN and !MIGRATEPAGE_SUCCESS,
> > the page would be put back to LRU or proper list if it is non-LRU movable
> > page.  But, the callers always call putback_movable_pages() to put the
> > failed pages back later on, so it seems not very efficient to put every
> > single page back immediately, and the code looks convoluted.
> >
> > Put the failed page on a separate list, then splice the list to migrate
> > list when all pages are tried.  It is the caller's responsibility to
> > call putback_movable_pages() to handle failures.  This also makes the
> > code simpler and more readable.
> >
> > After the change the rules are:
> >     * Success: non hugetlb page will be freed, hugetlb page will be put
> >                back
> >     * -EAGAIN: stay on the from list
> >     * -ENOMEM: stay on the from list
> >     * Other errno: put on ret_pages list then splice to from list
>
> Can you put this before the switch case in the migrate_pages? That will
> be very helpful to understand the code.

Sure, I agree the switch case deserves some comments.

> >
> > The from list would be empty iff all pages are migrated successfully, it
>
> s/iff/if unless you really mean if and only if. :)

Yes, I mean if and only if.

>
>
> Everything else looks good to me. Thanks for making the code cleaner.
> With the changes above, you can add Reviewed-by: Zi Yan <ziy@nvidia.com>.

Thanks.

>
> > was not so before.  This has no impact to current existing callsites.
> >
> > Signed-off-by: Yang Shi <shy828301@gmail.com>
> > ---
> >  mm/migrate.c | 58 ++++++++++++++++++++++++++--------------------------
> >  1 file changed, 29 insertions(+), 29 deletions(-)
> >
> > diff --git a/mm/migrate.c b/mm/migrate.c
> > index 8a2e7e19e27b..c33c92495ead 100644
> > --- a/mm/migrate.c
> > +++ b/mm/migrate.c
> > @@ -1169,7 +1169,8 @@ static int unmap_and_move(new_page_t get_new_page,
> >                                  free_page_t put_new_page,
> >                                  unsigned long private, struct page *page,
> >                                  int force, enum migrate_mode mode,
> > -                                enum migrate_reason reason)
> > +                                enum migrate_reason reason,
> > +                                struct list_head *ret)
> >  {
> >       int rc = MIGRATEPAGE_SUCCESS;
> >       struct page *newpage = NULL;
> > @@ -1206,7 +1207,14 @@ static int unmap_and_move(new_page_t get_new_page,
> >                * migrated will have kept its references and be restored.
> >                */
> >               list_del(&page->lru);
> > +     }
> >
> > +     /*
> > +      * If migration is successful, releases reference grabbed during
> > +      * isolation. Otherwise, restore the page to right list unless
> > +      * we want to retry.
> > +      */
> > +     if (rc == MIGRATEPAGE_SUCCESS) {
> >               /*
> >                * Compaction can migrate also non-LRU pages which are
> >                * not accounted to NR_ISOLATED_*. They can be recognized
> > @@ -1215,35 +1223,16 @@ static int unmap_and_move(new_page_t get_new_page,
> >               if (likely(!__PageMovable(page)))
> >                       mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
> >                                       page_is_file_lru(page), -thp_nr_pages(page));
> > -     }
> >
> > -     /*
> > -      * If migration is successful, releases reference grabbed during
> > -      * isolation. Otherwise, restore the page to right list unless
> > -      * we want to retry.
> > -      */
> > -     if (rc == MIGRATEPAGE_SUCCESS) {
> >               if (reason != MR_MEMORY_FAILURE)
> >                       /*
> >                        * We release the page in page_handle_poison.
> >                        */
> >                       put_page(page);
> >       } else {
> > -             if (rc != -EAGAIN) {
> > -                     if (likely(!__PageMovable(page))) {
> > -                             putback_lru_page(page);
> > -                             goto put_new;
> > -                     }
> > +             if (rc != -EAGAIN)
> > +                     list_add_tail(&page->lru, ret);
> >
> > -                     lock_page(page);
> > -                     if (PageMovable(page))
> > -                             putback_movable_page(page);
> > -                     else
> > -                             __ClearPageIsolated(page);
> > -                     unlock_page(page);
> > -                     put_page(page);
> > -             }
> > -put_new:
> >               if (put_new_page)
> >                       put_new_page(newpage, private);
> >               else
> > @@ -1274,7 +1263,8 @@ static int unmap_and_move(new_page_t get_new_page,
> >  static int unmap_and_move_huge_page(new_page_t get_new_page,
> >                               free_page_t put_new_page, unsigned long private,
> >                               struct page *hpage, int force,
> > -                             enum migrate_mode mode, int reason)
> > +                             enum migrate_mode mode, int reason,
> > +                             struct list_head *ret)
> >  {
> >       int rc = -EAGAIN;
> >       int page_was_mapped = 0;
> > @@ -1290,7 +1280,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
> >        * kicking migration.
> >        */
> >       if (!hugepage_migration_supported(page_hstate(hpage))) {
> > -             putback_active_hugepage(hpage);
> > +             list_move_tail(&hpage->lru, ret);
> >               return -ENOSYS;
> >       }
> >
> > @@ -1372,8 +1362,10 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
> >  out_unlock:
> >       unlock_page(hpage);
> >  out:
> > -     if (rc != -EAGAIN)
> > +     if (rc == MIGRATEPAGE_SUCCESS)
> >               putback_active_hugepage(hpage);
> > +     else if (rc != -EAGAIN && rc != MIGRATEPAGE_SUCCESS)
> > +             list_move_tail(&hpage->lru, ret);
> >
> >       /*
> >        * If migration was not successful and there's a freeing callback, use
> > @@ -1404,8 +1396,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
> >   *
> >   * The function returns after 10 attempts or if no pages are movable any more
> >   * because the list has become empty or no retryable pages exist any more.
> > - * The caller should call putback_movable_pages() to return pages to the LRU
> > - * or free list only if ret != 0.
> > + * It is caller's responsibility to call putback_movable_pages() to return pages
> > + * to the LRU or free list only if ret != 0.
> >   *
> >   * Returns the number of pages that were not migrated, or an error code.
> >   */
> > @@ -1426,6 +1418,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
> >       struct page *page2;
> >       int swapwrite = current->flags & PF_SWAPWRITE;
> >       int rc, nr_subpages;
> > +     LIST_HEAD(ret_pages);
> >
> >       if (!swapwrite)
> >               current->flags |= PF_SWAPWRITE;
> > @@ -1448,11 +1441,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
> >                       if (PageHuge(page))
> >                               rc = unmap_and_move_huge_page(get_new_page,
> >                                               put_new_page, private, page,
> > -                                             pass > 2, mode, reason);
> > +                                             pass > 2, mode, reason,
> > +                                             &ret_pages);
> >                       else
> >                               rc = unmap_and_move(get_new_page, put_new_page,
> >                                               private, page, pass > 2, mode,
> > -                                             reason);
> > +                                             reason, &ret_pages);
> >
> >                       switch(rc) {
> >                       case -ENOMEM:
> > @@ -1519,6 +1513,12 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
> >       nr_thp_failed += thp_retry;
> >       rc = nr_failed;
> >  out:
> > +     /*
> > +      * Put the permanent failure page back to migration list, they
> > +      * will be put back to the right list by the caller.
> > +      */
> > +     list_splice(&ret_pages, from);
> > +
> >       count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
> >       count_vm_events(PGMIGRATE_FAIL, nr_failed);
> >       count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
> > --
> > 2.26.2
>
>
> —
> Best Regards,
> Yan Zi
diff mbox series

Patch

diff --git a/mm/migrate.c b/mm/migrate.c
index 8a2e7e19e27b..c33c92495ead 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1169,7 +1169,8 @@  static int unmap_and_move(new_page_t get_new_page,
 				   free_page_t put_new_page,
 				   unsigned long private, struct page *page,
 				   int force, enum migrate_mode mode,
-				   enum migrate_reason reason)
+				   enum migrate_reason reason,
+				   struct list_head *ret)
 {
 	int rc = MIGRATEPAGE_SUCCESS;
 	struct page *newpage = NULL;
@@ -1206,7 +1207,14 @@  static int unmap_and_move(new_page_t get_new_page,
 		 * migrated will have kept its references and be restored.
 		 */
 		list_del(&page->lru);
+	}
 
+	/*
+	 * If migration is successful, releases reference grabbed during
+	 * isolation. Otherwise, restore the page to right list unless
+	 * we want to retry.
+	 */
+	if (rc == MIGRATEPAGE_SUCCESS) {
 		/*
 		 * Compaction can migrate also non-LRU pages which are
 		 * not accounted to NR_ISOLATED_*. They can be recognized
@@ -1215,35 +1223,16 @@  static int unmap_and_move(new_page_t get_new_page,
 		if (likely(!__PageMovable(page)))
 			mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
 					page_is_file_lru(page), -thp_nr_pages(page));
-	}
 
-	/*
-	 * If migration is successful, releases reference grabbed during
-	 * isolation. Otherwise, restore the page to right list unless
-	 * we want to retry.
-	 */
-	if (rc == MIGRATEPAGE_SUCCESS) {
 		if (reason != MR_MEMORY_FAILURE)
 			/*
 			 * We release the page in page_handle_poison.
 			 */
 			put_page(page);
 	} else {
-		if (rc != -EAGAIN) {
-			if (likely(!__PageMovable(page))) {
-				putback_lru_page(page);
-				goto put_new;
-			}
+		if (rc != -EAGAIN)
+			list_add_tail(&page->lru, ret);
 
-			lock_page(page);
-			if (PageMovable(page))
-				putback_movable_page(page);
-			else
-				__ClearPageIsolated(page);
-			unlock_page(page);
-			put_page(page);
-		}
-put_new:
 		if (put_new_page)
 			put_new_page(newpage, private);
 		else
@@ -1274,7 +1263,8 @@  static int unmap_and_move(new_page_t get_new_page,
 static int unmap_and_move_huge_page(new_page_t get_new_page,
 				free_page_t put_new_page, unsigned long private,
 				struct page *hpage, int force,
-				enum migrate_mode mode, int reason)
+				enum migrate_mode mode, int reason,
+				struct list_head *ret)
 {
 	int rc = -EAGAIN;
 	int page_was_mapped = 0;
@@ -1290,7 +1280,7 @@  static int unmap_and_move_huge_page(new_page_t get_new_page,
 	 * kicking migration.
 	 */
 	if (!hugepage_migration_supported(page_hstate(hpage))) {
-		putback_active_hugepage(hpage);
+		list_move_tail(&hpage->lru, ret);
 		return -ENOSYS;
 	}
 
@@ -1372,8 +1362,10 @@  static int unmap_and_move_huge_page(new_page_t get_new_page,
 out_unlock:
 	unlock_page(hpage);
 out:
-	if (rc != -EAGAIN)
+	if (rc == MIGRATEPAGE_SUCCESS)
 		putback_active_hugepage(hpage);
+	else if (rc != -EAGAIN && rc != MIGRATEPAGE_SUCCESS)
+		list_move_tail(&hpage->lru, ret);
 
 	/*
 	 * If migration was not successful and there's a freeing callback, use
@@ -1404,8 +1396,8 @@  static int unmap_and_move_huge_page(new_page_t get_new_page,
  *
  * The function returns after 10 attempts or if no pages are movable any more
  * because the list has become empty or no retryable pages exist any more.
- * The caller should call putback_movable_pages() to return pages to the LRU
- * or free list only if ret != 0.
+ * It is caller's responsibility to call putback_movable_pages() to return pages
+ * to the LRU or free list only if ret != 0.
  *
  * Returns the number of pages that were not migrated, or an error code.
  */
@@ -1426,6 +1418,7 @@  int migrate_pages(struct list_head *from, new_page_t get_new_page,
 	struct page *page2;
 	int swapwrite = current->flags & PF_SWAPWRITE;
 	int rc, nr_subpages;
+	LIST_HEAD(ret_pages);
 
 	if (!swapwrite)
 		current->flags |= PF_SWAPWRITE;
@@ -1448,11 +1441,12 @@  int migrate_pages(struct list_head *from, new_page_t get_new_page,
 			if (PageHuge(page))
 				rc = unmap_and_move_huge_page(get_new_page,
 						put_new_page, private, page,
-						pass > 2, mode, reason);
+						pass > 2, mode, reason,
+						&ret_pages);
 			else
 				rc = unmap_and_move(get_new_page, put_new_page,
 						private, page, pass > 2, mode,
-						reason);
+						reason, &ret_pages);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -1519,6 +1513,12 @@  int migrate_pages(struct list_head *from, new_page_t get_new_page,
 	nr_thp_failed += thp_retry;
 	rc = nr_failed;
 out:
+	/*
+	 * Put the permanent failure page back to migration list, they
+	 * will be put back to the right list by the caller.
+	 */
+	list_splice(&ret_pages, from);
+
 	count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
 	count_vm_events(PGMIGRATE_FAIL, nr_failed);
 	count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);