diff mbox series

mm/gup.c: Simplify and fix check_and_migrate_movable_pages() return codes

Message ID 20220729024645.764366-1-apopple@nvidia.com (mailing list archive)
State New
Headers show
Series mm/gup.c: Simplify and fix check_and_migrate_movable_pages() return codes | expand

Commit Message

Alistair Popple July 29, 2022, 2:46 a.m. UTC
When pinning pages with FOLL_LONGTERM check_and_migrate_movable_pages()
is called to migrate pages out of zones which should not contain any
longterm pinned pages.

When migration succeeds all pages will have been unpinned so pinning
needs to be retried. This is indicated by returning zero. When all pages
are in the correct zone the number of pinned pages is returned.

However migration can also fail, in which case pages are unpinned and
-ENOMEM is returned. However if the failure was due to not being unable
to isolate a page zero is returned. This leads to indefinite looping in
__gup_longterm_locked().

Fix this by simplifying the return codes such that zero indicates all
pages were successfully pinned in the correct zone while errors indicate
either pages were migrated and pinning should be retried or that
migration has failed and therefore the pinning operation should fail.

Signed-off-by: Alistair Popple <apopple@nvidia.com>
---
 mm/gup.c | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

Comments

Jason Gunthorpe July 29, 2022, 7:44 p.m. UTC | #1
On Fri, Jul 29, 2022 at 12:46:45PM +1000, Alistair Popple wrote:
> When pinning pages with FOLL_LONGTERM check_and_migrate_movable_pages()
> is called to migrate pages out of zones which should not contain any
> longterm pinned pages.
> 
> When migration succeeds all pages will have been unpinned so pinning
> needs to be retried. This is indicated by returning zero. When all pages
> are in the correct zone the number of pinned pages is returned.
> 
> However migration can also fail, in which case pages are unpinned and
> -ENOMEM is returned. However if the failure was due to not being unable
> to isolate a page zero is returned. This leads to indefinite looping in
> __gup_longterm_locked().
> 
> Fix this by simplifying the return codes such that zero indicates all
> pages were successfully pinned in the correct zone while errors indicate
> either pages were migrated and pinning should be retried or that
> migration has failed and therefore the pinning operation should fail.
> 
> Signed-off-by: Alistair Popple <apopple@nvidia.com>
> ---
>  mm/gup.c | 46 +++++++++++++++++++++++-----------------------
>  1 file changed, 23 insertions(+), 23 deletions(-)

I have to say I prefer the usual style where all the places that error
exit do 'goto error' instead of trying to keep track in 'ret'

AFAICT there is no reason to 'continue' in most of these paths since
we intend to return to userspace with an error anyhow? Why try to
isolate more pages?

> @@ -1980,19 +1980,18 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>  				    folio_nr_pages(folio));
>  	}
>  
> -	if (!list_empty(&movable_page_list) || isolation_error_count
> -		|| coherent_pages)
> -		goto unpin_pages;
> -
>  	/*
>  	 * If list is empty, and no isolation errors, means that all pages are
> -	 * in the correct zone.
> +	 * in the correct zone. If there were device coherent pages some pages
> +	 * have been unpinned.
>  	 */

That comment is a bit confusing.. I guess it is trying to explain why
coherent_pages is doing?

Maybe just:

All the given pages are fine, nothing was done

> +	if (list_empty(&movable_page_list) && !ret && !coherent_pages)
> +		return 0;
>  
> -unpin_pages:

Now that this label is removed this if following it

	if (!list_empty(&movable_page_list)) {

is also now unneeded because the above 'return 0' already checked it

I came up with this ontop:

diff --git a/mm/gup.c b/mm/gup.c
index 9e7c76d1e4ee3c..eddcf3c0eba727 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1912,11 +1912,15 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
 					    struct page **pages,
 					    unsigned int gup_flags)
 {
+	struct migration_target_control mtc = {
+		.nid = NUMA_NO_NODE,
+		.gfp_mask = GFP_USER | __GFP_NOWARN,
+	};
 	unsigned long i;
 	struct folio *prev_folio = NULL;
 	LIST_HEAD(movable_page_list);
 	bool drain_allow = true, coherent_pages = false;
-	int ret = 0;
+	int ret = -EBUSY;
 
 	for (i = 0; i < nr_pages; i++) {
 		struct folio *folio = page_folio(pages[i]);
@@ -1948,10 +1952,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
 				unpin_user_page(&folio->page);
 			}
 
-			if (migrate_device_coherent_page(&folio->page)) {
-				ret = -EBUSY;
-				break;
-			}
+			if (migrate_device_coherent_page(&folio->page))
+				goto error;
 			continue;
 		}
 
@@ -1963,7 +1965,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
 		if (folio_test_hugetlb(folio)) {
 			if (isolate_hugetlb(&folio->page,
 						&movable_page_list))
-				ret = -EBUSY;
+				goto error;
 			continue;
 		}
 
@@ -1972,10 +1974,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
 			drain_allow = false;
 		}
 
-		if (folio_isolate_lru(folio)) {
-			ret = -EBUSY;
-			continue;
-		}
+		if (folio_isolate_lru(folio))
+			goto error;
 		list_add_tail(&folio->lru, &movable_page_list);
 		node_stat_mod_folio(folio,
 				    NR_ISOLATED_ANON + folio_is_file_lru(folio),
@@ -1987,7 +1987,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
 	 * in the correct zone. If there were device coherent pages some pages
 	 * have been unpinned.
 	 */
-	if (list_empty(&movable_page_list) && !ret && !coherent_pages)
+	if (list_empty(&movable_page_list) && !coherent_pages)
 		return 0;
 
 	/*
@@ -2005,23 +2005,19 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
 			put_page(pages[i]);
 	}
 
-	if (!list_empty(&movable_page_list)) {
-		struct migration_target_control mtc = {
-			.nid = NUMA_NO_NODE,
-			.gfp_mask = GFP_USER | __GFP_NOWARN,
-		};
-
-		ret = migrate_pages(&movable_page_list, alloc_migration_target,
-				    NULL, (unsigned long)&mtc, MIGRATE_SYNC,
-				    MR_LONGTERM_PIN, NULL);
-		if (ret > 0) /* number of pages not migrated */
-			ret = -ENOMEM;
+	not_migrated = migrate_pages(&movable_page_list, alloc_migration_target,
+				     NULL, (unsigned long)&mtc, MIGRATE_SYNC,
+				     MR_LONGTERM_PIN, NULL);
+	if (not_migrated > 0) {
+		ret = -ENOMEM;
+		goto error;
 	}
+	return -EAGAIN;
 
-	if (ret && !list_empty(&movable_page_list))
+error:
+	if (!list_empty(&movable_page_list))
 		putback_movable_pages(&movable_page_list);
-
-	return ret ? ret : -EAGAIN;
+	return ret;
 }
 #else
 static long check_and_migrate_movable_pages(unsigned long nr_pages,
John Hubbard July 29, 2022, 9:22 p.m. UTC | #2
On 7/29/22 12:44, Jason Gunthorpe wrote:

> I came up with this ontop:
> 
That cleans it up even more, looks nice. I'd go just a touch further,
and also (unless there is some odd reason?) stay with -EAGAIN rather
than -EBUSY, because otherwise both the function's comment header, and
the caller, should change from -EBUSY to -EAGAIN just for consistency.

And also because the way it's used: the caller is literally "trying
again".

So on top of the ontop:

diff --git a/mm/gup.c b/mm/gup.c
index 43c1fc532842..5f04033ee0ed 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1901,10 +1901,12 @@ struct page *get_dump_page(unsigned long addr)

  #ifdef CONFIG_MIGRATION
  /*
- * Check whether all pages are pinnable. If some pages are not pinnable migrate
- * them and unpin all the pages. Returns -EAGAIN if pages were unpinned or zero
- * if all pages are pinnable and in the right zone. Other errors indicate
- * migration failure.
+ * Check whether all pages are pinnable. If some pages are not pinnable, migrate
+ * them and unpin all the pages.
+ * Return values:
+ *       0:      all pages are already pinnable and in the right zone
+ *      -EAGAIN: some pages were unpinned or were zero
+ *      -ENOMEM: migration of some pages failed
   */
  static long check_and_migrate_movable_pages(unsigned long nr_pages,
  					    struct page **pages,
@@ -1914,11 +1916,11 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
  		.nid = NUMA_NO_NODE,
  		.gfp_mask = GFP_USER | __GFP_NOWARN,
  	};
-	unsigned long i;
+	unsigned long i, not_migrated;
  	struct folio *prev_folio = NULL;
  	LIST_HEAD(movable_page_list);
  	bool drain_allow = true, coherent_pages = false;
-	int ret = -EBUSY;
+	int ret = -EAGAIN;

  	for (i = 0; i < nr_pages; i++) {
  		struct folio *folio = page_folio(pages[i]);
@@ -1990,7 +1992,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,

  	/*
  	 * Unpin all pages. If device coherent pages were found
-	 * migrate_deivce_coherent_page() will have dropped the pin and set
+	 * migrate_device_coherent_page() will have dropped the pin and set
  	 * pages[i] == NULL.
  	 */
  	for (i = 0; i < nr_pages; i++) {


thanks,
Alistair Popple Aug. 1, 2022, 2:18 a.m. UTC | #3
Jason Gunthorpe <jgg@nvidia.com> writes:

> On Fri, Jul 29, 2022 at 12:46:45PM +1000, Alistair Popple wrote:

[...]

> I have to say I prefer the usual style where all the places that error
> exit do 'goto error' instead of trying to keep track in 'ret'

Ok. Part of the complexity was my understanding from the documentation
for migrate_pages() is that putback_movable_pages() should only be
called if migrate_pages() != 0:

 * It is caller's responsibility to call putback_movable_pages() to return pages
 * to the LRU or free list only if ret != 0.

But I think it should be fine to do regardless, because on success the
pages will be deleted from movable_page_list. Eg. From unmap_and_move():

	if (rc != -EAGAIN) {
		/*
		 * A page that has been migrated has all references
		 * removed and will be freed. A page that has not been
		 * migrated will have kept its references and be restored.
		 */
		list_del(&page->lru);
	}

So will post a v2 doing this.

> AFAICT there is no reason to 'continue' in most of these paths since
> we intend to return to userspace with an error anyhow? Why try to
> isolate more pages?

The main reason would be if callers want to retry the operation. AFAIK
isolate_folio_lru() can have transient failures, so if callers want to
retry it makes sense to isolate and migrate as many pages as possible
rather than one page at a time as subsequent retries may find different
pages that can't be isolated.

Actually I should have called this out more clearly - the previous
behaviour on isolation failure was to retry indefinitely which is what
lead to looping in the kernel. This patch turns isolation failure into
an error and doesn't retry. I wonder though if we need to maintain a
retry count similar to what migrate_pages() does if there are unexpected
page refs?

>> @@ -1980,19 +1980,18 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>>  				    folio_nr_pages(folio));
>>  	}
>>
>> -	if (!list_empty(&movable_page_list) || isolation_error_count
>> -		|| coherent_pages)
>> -		goto unpin_pages;
>> -
>>  	/*
>>  	 * If list is empty, and no isolation errors, means that all pages are
>> -	 * in the correct zone.
>> +	 * in the correct zone. If there were device coherent pages some pages
>> +	 * have been unpinned.
>>  	 */
>
> That comment is a bit confusing.. I guess it is trying to explain why
> coherent_pages is doing?
>
> Maybe just:
>
> All the given pages are fine, nothing was done

Ok.

>> +	if (list_empty(&movable_page_list) && !ret && !coherent_pages)

Actually I think we can drop the coherent_pages variable too. At this
point coherent_pages will either be in the correct zone or we will have
jumped to the error label.

>> +		return 0;
>>
>> -unpin_pages:
>
> Now that this label is removed this if following it
>
> 	if (!list_empty(&movable_page_list)) {
>
> is also now unneeded because the above 'return 0' already checked it
>
> I came up with this ontop:

Thanks for the suggestions.

 - Alistair

> diff --git a/mm/gup.c b/mm/gup.c
> index 9e7c76d1e4ee3c..eddcf3c0eba727 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1912,11 +1912,15 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>  					    struct page **pages,
>  					    unsigned int gup_flags)
>  {
> +	struct migration_target_control mtc = {
> +		.nid = NUMA_NO_NODE,
> +		.gfp_mask = GFP_USER | __GFP_NOWARN,
> +	};
>  	unsigned long i;
>  	struct folio *prev_folio = NULL;
>  	LIST_HEAD(movable_page_list);
>  	bool drain_allow = true, coherent_pages = false;
> -	int ret = 0;
> +	int ret = -EBUSY;
>
>  	for (i = 0; i < nr_pages; i++) {
>  		struct folio *folio = page_folio(pages[i]);
> @@ -1948,10 +1952,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>  				unpin_user_page(&folio->page);
>  			}
>
> -			if (migrate_device_coherent_page(&folio->page)) {
> -				ret = -EBUSY;
> -				break;
> -			}
> +			if (migrate_device_coherent_page(&folio->page))
> +				goto error;
>  			continue;
>  		}
>
> @@ -1963,7 +1965,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>  		if (folio_test_hugetlb(folio)) {
>  			if (isolate_hugetlb(&folio->page,
>  						&movable_page_list))
> -				ret = -EBUSY;
> +				goto error;
>  			continue;
>  		}
>
> @@ -1972,10 +1974,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>  			drain_allow = false;
>  		}
>
> -		if (folio_isolate_lru(folio)) {
> -			ret = -EBUSY;
> -			continue;
> -		}
> +		if (folio_isolate_lru(folio))
> +			goto error;
>  		list_add_tail(&folio->lru, &movable_page_list);
>  		node_stat_mod_folio(folio,
>  				    NR_ISOLATED_ANON + folio_is_file_lru(folio),
> @@ -1987,7 +1987,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>  	 * in the correct zone. If there were device coherent pages some pages
>  	 * have been unpinned.
>  	 */
> -	if (list_empty(&movable_page_list) && !ret && !coherent_pages)
> +	if (list_empty(&movable_page_list) && !coherent_pages)
>  		return 0;
>
>  	/*
> @@ -2005,23 +2005,19 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>  			put_page(pages[i]);
>  	}
>
> -	if (!list_empty(&movable_page_list)) {
> -		struct migration_target_control mtc = {
> -			.nid = NUMA_NO_NODE,
> -			.gfp_mask = GFP_USER | __GFP_NOWARN,
> -		};
> -
> -		ret = migrate_pages(&movable_page_list, alloc_migration_target,
> -				    NULL, (unsigned long)&mtc, MIGRATE_SYNC,
> -				    MR_LONGTERM_PIN, NULL);
> -		if (ret > 0) /* number of pages not migrated */
> -			ret = -ENOMEM;
> +	not_migrated = migrate_pages(&movable_page_list, alloc_migration_target,
> +				     NULL, (unsigned long)&mtc, MIGRATE_SYNC,
> +				     MR_LONGTERM_PIN, NULL);
> +	if (not_migrated > 0) {
> +		ret = -ENOMEM;
> +		goto error;
>  	}
> +	return -EAGAIN;
>
> -	if (ret && !list_empty(&movable_page_list))
> +error:
> +	if (!list_empty(&movable_page_list))
>  		putback_movable_pages(&movable_page_list);
> -
> -	return ret ? ret : -EAGAIN;
> +	return ret;
>  }
>  #else
>  static long check_and_migrate_movable_pages(unsigned long nr_pages,
Alistair Popple Aug. 1, 2022, 2:38 a.m. UTC | #4
John Hubbard <jhubbard@nvidia.com> writes:

> On 7/29/22 12:44, Jason Gunthorpe wrote:
>
>> I came up with this ontop:
>>
> That cleans it up even more, looks nice. I'd go just a touch further,
> and also (unless there is some odd reason?) stay with -EAGAIN rather
> than -EBUSY, because otherwise both the function's comment header, and
> the caller, should change from -EBUSY to -EAGAIN just for consistency.

We return both:

-EBUSY to indicate that we should fail the entire PUP operation. -EAGAIN
 to indicate to __gup_longterm_locked() that the pages have been
 unpinned and it should repin them and check again.

> And also because the way it's used: the caller is literally "trying
> again".
>
> So on top of the ontop:
>
> diff --git a/mm/gup.c b/mm/gup.c
> index 43c1fc532842..5f04033ee0ed 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1901,10 +1901,12 @@ struct page *get_dump_page(unsigned long addr)
>
>  #ifdef CONFIG_MIGRATION
>  /*
> - * Check whether all pages are pinnable. If some pages are not pinnable migrate
> - * them and unpin all the pages. Returns -EAGAIN if pages were unpinned or zero
> - * if all pages are pinnable and in the right zone. Other errors indicate
> - * migration failure.
> + * Check whether all pages are pinnable. If some pages are not pinnable, migrate
> + * them and unpin all the pages.
> + * Return values:
> + *       0:      all pages are already pinnable and in the right zone
> + *      -EAGAIN: some pages were unpinned or were zero
> + *      -ENOMEM: migration of some pages failed
>   */
>  static long check_and_migrate_movable_pages(unsigned long nr_pages,
>  					    struct page **pages,
> @@ -1914,11 +1916,11 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>  		.nid = NUMA_NO_NODE,
>  		.gfp_mask = GFP_USER | __GFP_NOWARN,
>  	};
> -	unsigned long i;
> +	unsigned long i, not_migrated;
>  	struct folio *prev_folio = NULL;
>  	LIST_HEAD(movable_page_list);
>  	bool drain_allow = true, coherent_pages = false;
> -	int ret = -EBUSY;
> +	int ret = -EAGAIN;
>
>  	for (i = 0; i < nr_pages; i++) {
>  		struct folio *folio = page_folio(pages[i]);
> @@ -1990,7 +1992,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>
>  	/*
>  	 * Unpin all pages. If device coherent pages were found
> -	 * migrate_deivce_coherent_page() will have dropped the pin and set
> +	 * migrate_device_coherent_page() will have dropped the pin and set
>  	 * pages[i] == NULL.
>  	 */
>  	for (i = 0; i < nr_pages; i++) {
>
>
> thanks,
Alistair Popple Aug. 1, 2022, 2:46 a.m. UTC | #5
Alistair Popple <apopple@nvidia.com> writes:

> Jason Gunthorpe <jgg@nvidia.com> writes:
>
>> On Fri, Jul 29, 2022 at 12:46:45PM +1000, Alistair Popple wrote:
>
> [...]
>
>> I have to say I prefer the usual style where all the places that error
>> exit do 'goto error' instead of trying to keep track in 'ret'
>
> Ok. Part of the complexity was my understanding from the documentation
> for migrate_pages() is that putback_movable_pages() should only be
> called if migrate_pages() != 0:
>
>  * It is caller's responsibility to call putback_movable_pages() to return pages
>  * to the LRU or free list only if ret != 0.
>
> But I think it should be fine to do regardless, because on success the
> pages will be deleted from movable_page_list. Eg. From unmap_and_move():
>
> 	if (rc != -EAGAIN) {
> 		/*
> 		 * A page that has been migrated has all references
> 		 * removed and will be freed. A page that has not been
> 		 * migrated will have kept its references and be restored.
> 		 */
> 		list_del(&page->lru);
> 	}
>
> So will post a v2 doing this.

Actually sorry, ignore the above. I'd missed the return -EAGAIN after
the migrate_pages() call in your proposed patch.

>> AFAICT there is no reason to 'continue' in most of these paths since
>> we intend to return to userspace with an error anyhow? Why try to
>> isolate more pages?
>
> The main reason would be if callers want to retry the operation. AFAIK
> isolate_folio_lru() can have transient failures, so if callers want to
> retry it makes sense to isolate and migrate as many pages as possible
> rather than one page at a time as subsequent retries may find different
> pages that can't be isolated.
>
> Actually I should have called this out more clearly - the previous
> behaviour on isolation failure was to retry indefinitely which is what
> lead to looping in the kernel. This patch turns isolation failure into
> an error and doesn't retry. I wonder though if we need to maintain a
> retry count similar to what migrate_pages() does if there are unexpected
> page refs?
>
>>> @@ -1980,19 +1980,18 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>>>  				    folio_nr_pages(folio));
>>>  	}
>>>
>>> -	if (!list_empty(&movable_page_list) || isolation_error_count
>>> -		|| coherent_pages)
>>> -		goto unpin_pages;
>>> -
>>>  	/*
>>>  	 * If list is empty, and no isolation errors, means that all pages are
>>> -	 * in the correct zone.
>>> +	 * in the correct zone. If there were device coherent pages some pages
>>> +	 * have been unpinned.
>>>  	 */
>>
>> That comment is a bit confusing.. I guess it is trying to explain why
>> coherent_pages is doing?
>>
>> Maybe just:
>>
>> All the given pages are fine, nothing was done
>
> Ok.
>
>>> +	if (list_empty(&movable_page_list) && !ret && !coherent_pages)
>
> Actually I think we can drop the coherent_pages variable too. At this
> point coherent_pages will either be in the correct zone or we will have
> jumped to the error label.
>
>>> +		return 0;
>>>
>>> -unpin_pages:
>>
>> Now that this label is removed this if following it
>>
>> 	if (!list_empty(&movable_page_list)) {
>>
>> is also now unneeded because the above 'return 0' already checked it
>>
>> I came up with this ontop:
>
> Thanks for the suggestions.
>
>  - Alistair
>
>> diff --git a/mm/gup.c b/mm/gup.c
>> index 9e7c76d1e4ee3c..eddcf3c0eba727 100644
>> --- a/mm/gup.c
>> +++ b/mm/gup.c
>> @@ -1912,11 +1912,15 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>>  					    struct page **pages,
>>  					    unsigned int gup_flags)
>>  {
>> +	struct migration_target_control mtc = {
>> +		.nid = NUMA_NO_NODE,
>> +		.gfp_mask = GFP_USER | __GFP_NOWARN,
>> +	};
>>  	unsigned long i;
>>  	struct folio *prev_folio = NULL;
>>  	LIST_HEAD(movable_page_list);
>>  	bool drain_allow = true, coherent_pages = false;
>> -	int ret = 0;
>> +	int ret = -EBUSY;
>>
>>  	for (i = 0; i < nr_pages; i++) {
>>  		struct folio *folio = page_folio(pages[i]);
>> @@ -1948,10 +1952,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>>  				unpin_user_page(&folio->page);
>>  			}
>>
>> -			if (migrate_device_coherent_page(&folio->page)) {
>> -				ret = -EBUSY;
>> -				break;
>> -			}
>> +			if (migrate_device_coherent_page(&folio->page))
>> +				goto error;
>>  			continue;
>>  		}
>>
>> @@ -1963,7 +1965,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>>  		if (folio_test_hugetlb(folio)) {
>>  			if (isolate_hugetlb(&folio->page,
>>  						&movable_page_list))
>> -				ret = -EBUSY;
>> +				goto error;
>>  			continue;
>>  		}
>>
>> @@ -1972,10 +1974,8 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>>  			drain_allow = false;
>>  		}
>>
>> -		if (folio_isolate_lru(folio)) {
>> -			ret = -EBUSY;
>> -			continue;
>> -		}
>> +		if (folio_isolate_lru(folio))
>> +			goto error;
>>  		list_add_tail(&folio->lru, &movable_page_list);
>>  		node_stat_mod_folio(folio,
>>  				    NR_ISOLATED_ANON + folio_is_file_lru(folio),
>> @@ -1987,7 +1987,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>>  	 * in the correct zone. If there were device coherent pages some pages
>>  	 * have been unpinned.
>>  	 */
>> -	if (list_empty(&movable_page_list) && !ret && !coherent_pages)
>> +	if (list_empty(&movable_page_list) && !coherent_pages)
>>  		return 0;
>>
>>  	/*
>> @@ -2005,23 +2005,19 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
>>  			put_page(pages[i]);
>>  	}
>>
>> -	if (!list_empty(&movable_page_list)) {
>> -		struct migration_target_control mtc = {
>> -			.nid = NUMA_NO_NODE,
>> -			.gfp_mask = GFP_USER | __GFP_NOWARN,
>> -		};
>> -
>> -		ret = migrate_pages(&movable_page_list, alloc_migration_target,
>> -				    NULL, (unsigned long)&mtc, MIGRATE_SYNC,
>> -				    MR_LONGTERM_PIN, NULL);
>> -		if (ret > 0) /* number of pages not migrated */
>> -			ret = -ENOMEM;
>> +	not_migrated = migrate_pages(&movable_page_list, alloc_migration_target,
>> +				     NULL, (unsigned long)&mtc, MIGRATE_SYNC,
>> +				     MR_LONGTERM_PIN, NULL);
>> +	if (not_migrated > 0) {
>> +		ret = -ENOMEM;
>> +		goto error;
>>  	}
>> +	return -EAGAIN;
>>
>> -	if (ret && !list_empty(&movable_page_list))
>> +error:
>> +	if (!list_empty(&movable_page_list))
>>  		putback_movable_pages(&movable_page_list);
>> -
>> -	return ret ? ret : -EAGAIN;
>> +	return ret;
>>  }
>>  #else
>>  static long check_and_migrate_movable_pages(unsigned long nr_pages,
Jason Gunthorpe Aug. 2, 2022, 12:21 p.m. UTC | #6
On Mon, Aug 01, 2022 at 12:18:53PM +1000, Alistair Popple wrote:

> > AFAICT there is no reason to 'continue' in most of these paths since
> > we intend to return to userspace with an error anyhow? Why try to
> > isolate more pages?
> 
> The main reason would be if callers want to retry the operation. AFAIK
> isolate_folio_lru() can have transient failures, so if callers want to
> retry it makes sense to isolate and migrate as many pages as possible
> rather than one page at a time as subsequent retries may find different
> pages that can't be isolated.

Except we don't try to do the migrate, we just isolate and then
unisolate and return failure.

> Actually I should have called this out more clearly - the previous
> behaviour on isolation failure was to retry indefinitely which is what
> lead to looping in the kernel. This patch turns isolation failure into
> an error and doesn't retry. I wonder though if we need to maintain a
> retry count similar to what migrate_pages() does if there are unexpected
> page refs?

This makes more sense, exporting this mess to the caller and hoping
they retry (they won't) doesn't make sense..

Jason
Alistair Popple Aug. 2, 2022, 12:52 p.m. UTC | #7
Jason Gunthorpe <jgg@nvidia.com> writes:

> On Mon, Aug 01, 2022 at 12:18:53PM +1000, Alistair Popple wrote:
>
>> > AFAICT there is no reason to 'continue' in most of these paths since
>> > we intend to return to userspace with an error anyhow? Why try to
>> > isolate more pages?
>>
>> The main reason would be if callers want to retry the operation. AFAIK
>> isolate_folio_lru() can have transient failures, so if callers want to
>> retry it makes sense to isolate and migrate as many pages as possible
>> rather than one page at a time as subsequent retries may find different
>> pages that can't be isolated.
>
> Except we don't try to do the migrate, we just isolate and then
> unisolate and return failure.

Unless I'm missing something any pages successfully isolated are still
added to movable_page_list then migrated if we 'continue' (at least in
the original code and this patch version, but not v2). Obviously pages
that couldn't be isolated can't be migrated, but subsequent retries
should only need to deal with those pages as the rest should already be
in the correct zone.

>> Actually I should have called this out more clearly - the previous
>> behaviour on isolation failure was to retry indefinitely which is what
>> lead to looping in the kernel. This patch turns isolation failure into
>> an error and doesn't retry. I wonder though if we need to maintain a
>> retry count similar to what migrate_pages() does if there are unexpected
>> page refs?
>
> This makes more sense, exporting this mess to the caller and hoping
> they retry (they won't) doesn't make sense..

Ok, sounds reasonable. Will post a v3 that does this instead.

> Jason
diff mbox series

Patch

diff --git a/mm/gup.c b/mm/gup.c
index 364b274a10c2..8b8ce8b7719c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1901,16 +1901,16 @@  struct page *get_dump_page(unsigned long addr)
 
 #ifdef CONFIG_MIGRATION
 /*
- * Check whether all pages are pinnable, if so return number of pages.  If some
- * pages are not pinnable, migrate them, and unpin all pages. Return zero if
- * pages were migrated, or if some pages were not successfully isolated.
- * Return negative error if migration fails.
+ * Check whether all pages are pinnable. If some pages are not pinnable migrate
+ * them and unpin all the pages. Returns -EAGAIN if pages were unpinned or zero
+ * if all pages are pinnable and in the right zone. Other errors indicate
+ * migration failure.
  */
 static long check_and_migrate_movable_pages(unsigned long nr_pages,
 					    struct page **pages,
 					    unsigned int gup_flags)
 {
-	unsigned long isolation_error_count = 0, i;
+	unsigned long i;
 	struct folio *prev_folio = NULL;
 	LIST_HEAD(movable_page_list);
 	bool drain_allow = true, coherent_pages = false;
@@ -1946,10 +1946,10 @@  static long check_and_migrate_movable_pages(unsigned long nr_pages,
 				unpin_user_page(&folio->page);
 			}
 
-			ret = migrate_device_coherent_page(&folio->page);
-			if (ret)
-				goto unpin_pages;
-
+			if (migrate_device_coherent_page(&folio->page)) {
+				ret = -EBUSY;
+				break;
+			}
 			continue;
 		}
 
@@ -1961,7 +1961,7 @@  static long check_and_migrate_movable_pages(unsigned long nr_pages,
 		if (folio_test_hugetlb(folio)) {
 			if (isolate_hugetlb(&folio->page,
 						&movable_page_list))
-				isolation_error_count++;
+				ret = -EBUSY;
 			continue;
 		}
 
@@ -1971,7 +1971,7 @@  static long check_and_migrate_movable_pages(unsigned long nr_pages,
 		}
 
 		if (folio_isolate_lru(folio)) {
-			isolation_error_count++;
+			ret = -EBUSY;
 			continue;
 		}
 		list_add_tail(&folio->lru, &movable_page_list);
@@ -1980,19 +1980,18 @@  static long check_and_migrate_movable_pages(unsigned long nr_pages,
 				    folio_nr_pages(folio));
 	}
 
-	if (!list_empty(&movable_page_list) || isolation_error_count
-		|| coherent_pages)
-		goto unpin_pages;
-
 	/*
 	 * If list is empty, and no isolation errors, means that all pages are
-	 * in the correct zone.
+	 * in the correct zone. If there were device coherent pages some pages
+	 * have been unpinned.
 	 */
-	return nr_pages;
+	if (list_empty(&movable_page_list) && !ret && !coherent_pages)
+		return 0;
 
-unpin_pages:
 	/*
-	 * pages[i] might be NULL if any device coherent pages were found.
+	 * Unpin all pages. If device coherent pages were found
+	 * migrate_deivce_coherent_page() will have dropped the pin and set
+	 * pages[i] == NULL.
 	 */
 	for (i = 0; i < nr_pages; i++) {
 		if (!pages[i])
@@ -2019,14 +2018,15 @@  static long check_and_migrate_movable_pages(unsigned long nr_pages,
 
 	if (ret && !list_empty(&movable_page_list))
 		putback_movable_pages(&movable_page_list);
-	return ret;
+
+	return ret ? ret : -EAGAIN;
 }
 #else
 static long check_and_migrate_movable_pages(unsigned long nr_pages,
 					    struct page **pages,
 					    unsigned int gup_flags)
 {
-	return nr_pages;
+	return 0;
 }
 #endif /* CONFIG_MIGRATION */
 
@@ -2054,10 +2054,10 @@  static long __gup_longterm_locked(struct mm_struct *mm,
 		if (rc <= 0)
 			break;
 		rc = check_and_migrate_movable_pages(rc, pages, gup_flags);
-	} while (!rc);
+	} while (rc == -EAGAIN);
 	memalloc_pin_restore(flags);
 
-	return rc;
+	return rc ? rc : nr_pages;
 }
 
 static bool is_valid_gup_flags(unsigned int gup_flags)