diff mbox series

[v2,5/5] mm: memory_hotplug: unify Huge/LRU/non-LRU movable folio isolation

Message ID 20240817084941.2375713-6-wangkefeng.wang@huawei.com (mailing list archive)
State New
Headers show
Series mm: memory_hotplug: improve do_migrate_range() | expand

Commit Message

Kefeng Wang Aug. 17, 2024, 8:49 a.m. UTC
Use the isolate_folio_to_list() to unify hugetlb/LRU/non-LRU
folio isolation, which cleanup code a bit and save a few calls
to compound_head().

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/memory_hotplug.c | 45 +++++++++++++++++----------------------------
 1 file changed, 17 insertions(+), 28 deletions(-)

Comments

Miaohe Lin Aug. 22, 2024, 7:20 a.m. UTC | #1
On 2024/8/17 16:49, Kefeng Wang wrote:
> Use the isolate_folio_to_list() to unify hugetlb/LRU/non-LRU
> folio isolation, which cleanup code a bit and save a few calls
> to compound_head().
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  mm/memory_hotplug.c | 45 +++++++++++++++++----------------------------
>  1 file changed, 17 insertions(+), 28 deletions(-)
> 
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 02a0d4fbc3fe..cc9c16db2f8c 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1773,14 +1773,14 @@ static int scan_movable_pages(unsigned long start, unsigned long end,
>  static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
>  {
>  	unsigned long pfn;
> -	struct page *page;
>  	LIST_HEAD(source);
> +	struct folio *folio;
>  	static DEFINE_RATELIMIT_STATE(migrate_rs, DEFAULT_RATELIMIT_INTERVAL,
>  				      DEFAULT_RATELIMIT_BURST);
>  
>  	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
> -		struct folio *folio;
> -		bool isolated;
> +		struct page *page;
> +		bool huge;
>  
>  		if (!pfn_valid(pfn))
>  			continue;
> @@ -1812,34 +1812,22 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
>  			continue;
>  		}
>  
> -		if (folio_test_hugetlb(folio)) {
> -			isolate_hugetlb(folio, &source);
> -			continue;
> +		huge = folio_test_hugetlb(folio);
> +		if (!huge) {
> +			folio = folio_get_nontail_page(page);
> +			if (!folio)
> +				continue;
>  		}
>  
> -		if (!get_page_unless_zero(page))

Does page always equal to head? Page is used in most cases in this function and head is only used to calculate pfn.
If not, folio can not simply replace page?

> -			continue;
> -		/*
> -		 * We can skip free pages. And we can deal with pages on
> -		 * LRU and non-lru movable pages.
> -		 */
> -		if (PageLRU(page))

I think this check is incorrect. We should check __PageMovable(page) instead. So this patch
fixes this issue too.

Thanks.
.
Kefeng Wang Aug. 22, 2024, 12:08 p.m. UTC | #2
On 2024/8/22 15:20, Miaohe Lin wrote:
> On 2024/8/17 16:49, Kefeng Wang wrote:
>> Use the isolate_folio_to_list() to unify hugetlb/LRU/non-LRU
>> folio isolation, which cleanup code a bit and save a few calls
>> to compound_head().
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   mm/memory_hotplug.c | 45 +++++++++++++++++----------------------------
>>   1 file changed, 17 insertions(+), 28 deletions(-)
>>
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index 02a0d4fbc3fe..cc9c16db2f8c 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -1773,14 +1773,14 @@ static int scan_movable_pages(unsigned long start, unsigned long end,
>>   static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
>>   {
>>   	unsigned long pfn;
>> -	struct page *page;
>>   	LIST_HEAD(source);
>> +	struct folio *folio;
>>   	static DEFINE_RATELIMIT_STATE(migrate_rs, DEFAULT_RATELIMIT_INTERVAL,
>>   				      DEFAULT_RATELIMIT_BURST);
>>   
>>   	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
>> -		struct folio *folio;
>> -		bool isolated;
>> +		struct page *page;
>> +		bool huge;
>>   
>>   		if (!pfn_valid(pfn))
>>   			continue;
>> @@ -1812,34 +1812,22 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
>>   			continue;
>>   		}
>>   
>> -		if (folio_test_hugetlb(folio)) {
>> -			isolate_hugetlb(folio, &source);
>> -			continue;
>> +		huge = folio_test_hugetlb(folio);
>> +		if (!huge) {
>> +			folio = folio_get_nontail_page(page);
>> +			if (!folio)
>> +				continue;
>>   		}
>>   
>> -		if (!get_page_unless_zero(page))
> 
> Does page always equal to head? Page is used in most cases in this function and head is only used to calculate pfn.
> If not, folio can not simply replace page?

Not very clear what your mean, no guarantee for page == head, that
is why we need use get_page_unless_zero or folio_get_nontail_page to get 
the folio, then try to isolate folio, since we only migrate the wholo
folio, I don't know why we can't convert page to folio here.

> 
>> -			continue;
>> -		/*
>> -		 * We can skip free pages. And we can deal with pages on
>> -		 * LRU and non-lru movable pages.
>> -		 */
>> -		if (PageLRU(page))
> 
> I think this check is incorrect. We should check __PageMovable(page) instead. So this patch
> fixes this issue too.
> 
> Thanks.
> .
David Hildenbrand Aug. 26, 2024, 2:55 p.m. UTC | #3
On 17.08.24 10:49, Kefeng Wang wrote:
> Use the isolate_folio_to_list() to unify hugetlb/LRU/non-LRU
> folio isolation, which cleanup code a bit and save a few calls
> to compound_head().
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   mm/memory_hotplug.c | 45 +++++++++++++++++----------------------------
>   1 file changed, 17 insertions(+), 28 deletions(-)
> 
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 02a0d4fbc3fe..cc9c16db2f8c 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1773,14 +1773,14 @@ static int scan_movable_pages(unsigned long start, unsigned long end,
>   static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
>   {
>   	unsigned long pfn;
> -	struct page *page;
>   	LIST_HEAD(source);
> +	struct folio *folio;
>   	static DEFINE_RATELIMIT_STATE(migrate_rs, DEFAULT_RATELIMIT_INTERVAL,
>   				      DEFAULT_RATELIMIT_BURST);
>   
>   	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
> -		struct folio *folio;
> -		bool isolated;
> +		struct page *page;
> +		bool huge;

Please use "hugetlb" if you mean hugetlb :)

>   
>   		if (!pfn_valid(pfn))
>   			continue;
> @@ -1812,34 +1812,22 @@ static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
>   			continue;
>   		}
>   
> -		if (folio_test_hugetlb(folio)) {
> -			isolate_hugetlb(folio, &source);
> -			continue;
> +		huge = folio_test_hugetlb(folio);
> +		if (!huge) {
> +			folio = folio_get_nontail_page(page);
> +			if (!folio)
> +				continue;
>   		}

Hm, remind me why we are changing the hugetlb code to not take a 
reference here? It does look odd.
utback_movable_pages(&source);
Kefeng Wang Aug. 27, 2024, 1:26 a.m. UTC | #4
On 2024/8/26 22:55, David Hildenbrand wrote:
> On 17.08.24 10:49, Kefeng Wang wrote:
>> Use the isolate_folio_to_list() to unify hugetlb/LRU/non-LRU
>> folio isolation, which cleanup code a bit and save a few calls
>> to compound_head().
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>> ---
>>   mm/memory_hotplug.c | 45 +++++++++++++++++----------------------------
>>   1 file changed, 17 insertions(+), 28 deletions(-)
>>
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index 02a0d4fbc3fe..cc9c16db2f8c 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -1773,14 +1773,14 @@ static int scan_movable_pages(unsigned long 
>> start, unsigned long end,
>>   static void do_migrate_range(unsigned long start_pfn, unsigned long 
>> end_pfn)
>>   {
>>       unsigned long pfn;
>> -    struct page *page;
>>       LIST_HEAD(source);
>> +    struct folio *folio;
>>       static DEFINE_RATELIMIT_STATE(migrate_rs, 
>> DEFAULT_RATELIMIT_INTERVAL,
>>                         DEFAULT_RATELIMIT_BURST);
>>       for (pfn = start_pfn; pfn < end_pfn; pfn++) {
>> -        struct folio *folio;
>> -        bool isolated;
>> +        struct page *page;
>> +        bool huge;
> 
> Please use "hugetlb" if you mean hugetlb :)

OK.
> 
>>           if (!pfn_valid(pfn))
>>               continue;
>> @@ -1812,34 +1812,22 @@ static void do_migrate_range(unsigned long 
>> start_pfn, unsigned long end_pfn)
>>               continue;
>>           }
>> -        if (folio_test_hugetlb(folio)) {
>> -            isolate_hugetlb(folio, &source);
>> -            continue;
>> +        huge = folio_test_hugetlb(folio);
>> +        if (!huge) {
>> +            folio = folio_get_nontail_page(page);
>> +            if (!folio)
>> +                continue;
>>           }
> 
> Hm, remind me why we are changing the hugetlb code to not take a 
> reference here? It does look odd.

Different from folio_isolate_lru(), isolate_hugetlb() will check folio
and try get a reference of folio after get hugetlb_lock, other hugetlb
operation protected by this big lock too, so no need to take a reference
here.
David Hildenbrand Aug. 27, 2024, 3:10 p.m. UTC | #5
On 27.08.24 03:26, Kefeng Wang wrote:
> 
> 
> On 2024/8/26 22:55, David Hildenbrand wrote:
>> On 17.08.24 10:49, Kefeng Wang wrote:
>>> Use the isolate_folio_to_list() to unify hugetlb/LRU/non-LRU
>>> folio isolation, which cleanup code a bit and save a few calls
>>> to compound_head().
>>>
>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>> ---
>>>    mm/memory_hotplug.c | 45 +++++++++++++++++----------------------------
>>>    1 file changed, 17 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>>> index 02a0d4fbc3fe..cc9c16db2f8c 100644
>>> --- a/mm/memory_hotplug.c
>>> +++ b/mm/memory_hotplug.c
>>> @@ -1773,14 +1773,14 @@ static int scan_movable_pages(unsigned long
>>> start, unsigned long end,
>>>    static void do_migrate_range(unsigned long start_pfn, unsigned long
>>> end_pfn)
>>>    {
>>>        unsigned long pfn;
>>> -    struct page *page;
>>>        LIST_HEAD(source);
>>> +    struct folio *folio;
>>>        static DEFINE_RATELIMIT_STATE(migrate_rs,
>>> DEFAULT_RATELIMIT_INTERVAL,
>>>                          DEFAULT_RATELIMIT_BURST);
>>>        for (pfn = start_pfn; pfn < end_pfn; pfn++) {
>>> -        struct folio *folio;
>>> -        bool isolated;
>>> +        struct page *page;
>>> +        bool huge;
>>
>> Please use "hugetlb" if you mean hugetlb :)
> 
> OK.
>>
>>>            if (!pfn_valid(pfn))
>>>                continue;
>>> @@ -1812,34 +1812,22 @@ static void do_migrate_range(unsigned long
>>> start_pfn, unsigned long end_pfn)
>>>                continue;
>>>            }
>>> -        if (folio_test_hugetlb(folio)) {
>>> -            isolate_hugetlb(folio, &source);
>>> -            continue;
>>> +        huge = folio_test_hugetlb(folio);
>>> +        if (!huge) {
>>> +            folio = folio_get_nontail_page(page);
>>> +            if (!folio)
>>> +                continue;
>>>            }
>>
>> Hm, remind me why we are changing the hugetlb code to not take a
>> reference here? It does look odd.
> 
> Different from folio_isolate_lru(), isolate_hugetlb() will check folio
> and try get a reference of folio after get hugetlb_lock, other hugetlb
> operation protected by this big lock too, so no need to take a reference
> here.

But this hugetlb-special casing looks quite ... special TBH.

Is there no way to avoid it?

I'd prefer something like the following, with a comment

if (hugetlb)
	/*
	 * We also want to migrate hugetlb folios that span multiple
          * memory blocks. So use whatever head page we identified.
	 */
	folio = folio_try_get(folio);
else
	folio = folio_get_nontail_page(page);

if (!folio)
	continue;


And then just dropping the reference unconditionally.

Is there a problem with that? Optimizing for hugetlb references during 
migration is not worth the trouble.


But now I wonder, why we not simply unconditionally do a folio_try_get() ...
Kefeng Wang Aug. 27, 2024, 3:35 p.m. UTC | #6
On 2024/8/27 23:10, David Hildenbrand wrote:
> On 27.08.24 03:26, Kefeng Wang wrote:
>>
>>
>> On 2024/8/26 22:55, David Hildenbrand wrote:
>>> On 17.08.24 10:49, Kefeng Wang wrote:
>>>> Use the isolate_folio_to_list() to unify hugetlb/LRU/non-LRU
>>>> folio isolation, which cleanup code a bit and save a few calls
>>>> to compound_head().
>>>>
>>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>>> ---

...
>>> Hm, remind me why we are changing the hugetlb code to not take a
>>> reference here? It does look odd.
>>
>> Different from folio_isolate_lru(), isolate_hugetlb() will check folio
>> and try get a reference of folio after get hugetlb_lock, other hugetlb
>> operation protected by this big lock too, so no need to take a reference
>> here.
> 
> But this hugetlb-special casing looks quite ... special TBH.
> 
> Is there no way to avoid it?
> 
> I'd prefer something like the following, with a comment
> 
> if (hugetlb)
>      /*
>       * We also want to migrate hugetlb folios that span multiple
>           * memory blocks. So use whatever head page we identified.
>       */
>      folio = folio_try_get(folio);
> else
>      folio = folio_get_nontail_page(page);
> 
> if (!folio)
>      continue;
> 
> 
> And then just dropping the reference unconditionally.
> 
> Is there a problem with that? Optimizing for hugetlb references during 
> migration is not worth the trouble.
> 
> 
> But now I wonder, why we not simply unconditionally do a folio_try_get() 

Yes,  I use folio_get_nontail_page() and folio_put() unconditionally in v1,
this will skip tail page, not consistent with previous behavior for
hugetlb, so I change to current way, but for migration, use
folio_try_get()/folio_put() is enough since we always migrate the whole
folio, it will be more simple. I will push an additional fix to v3,
which just sent a couple of hours ago.

[1] 
https://lore.kernel.org/linux-mm/20240827114728.3212578-1-wangkefeng.wang@huawei.com/T/#t
David Hildenbrand Aug. 27, 2024, 3:38 p.m. UTC | #7
On 27.08.24 17:35, Kefeng Wang wrote:
> 
> 
> On 2024/8/27 23:10, David Hildenbrand wrote:
>> On 27.08.24 03:26, Kefeng Wang wrote:
>>>
>>>
>>> On 2024/8/26 22:55, David Hildenbrand wrote:
>>>> On 17.08.24 10:49, Kefeng Wang wrote:
>>>>> Use the isolate_folio_to_list() to unify hugetlb/LRU/non-LRU
>>>>> folio isolation, which cleanup code a bit and save a few calls
>>>>> to compound_head().
>>>>>
>>>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>>>> ---
> 
> ...
>>>> Hm, remind me why we are changing the hugetlb code to not take a
>>>> reference here? It does look odd.
>>>
>>> Different from folio_isolate_lru(), isolate_hugetlb() will check folio
>>> and try get a reference of folio after get hugetlb_lock, other hugetlb
>>> operation protected by this big lock too, so no need to take a reference
>>> here.
>>
>> But this hugetlb-special casing looks quite ... special TBH.
>>
>> Is there no way to avoid it?
>>
>> I'd prefer something like the following, with a comment
>>
>> if (hugetlb)
>>       /*
>>        * We also want to migrate hugetlb folios that span multiple
>>            * memory blocks. So use whatever head page we identified.
>>        */
>>       folio = folio_try_get(folio);
>> else
>>       folio = folio_get_nontail_page(page);
>>
>> if (!folio)
>>       continue;
>>
>>
>> And then just dropping the reference unconditionally.
>>
>> Is there a problem with that? Optimizing for hugetlb references during
>> migration is not worth the trouble.
>>
>>
>> But now I wonder, why we not simply unconditionally do a folio_try_get()
> 
> Yes,  I use folio_get_nontail_page() and folio_put() unconditionally in v1,

Yes, that part I remember.

> this will skip tail page, not consistent with previous behavior for
> hugetlb, so I change to current way, but for migration, use
> folio_try_get()/folio_put() is enough since we always migrate the whole
> folio, it will be more simple.

I'm wondering if anything relies on the folio_get_nontail_page() part, 
but I cannot really think "what" that should be. Using folio_try_get() 
would be much simpler: we have a page, we want to migrate it away, to do 
that we have to migrate the folio (wherever that starts or ends).

Also, make sure to test with free hugetlb folios.

> I will push an additional fix to v3,
> which just sent a couple of hours ago.

Feel free to send a fixup for v2 instead.
diff mbox series

Patch

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 02a0d4fbc3fe..cc9c16db2f8c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1773,14 +1773,14 @@  static int scan_movable_pages(unsigned long start, unsigned long end,
 static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 {
 	unsigned long pfn;
-	struct page *page;
 	LIST_HEAD(source);
+	struct folio *folio;
 	static DEFINE_RATELIMIT_STATE(migrate_rs, DEFAULT_RATELIMIT_INTERVAL,
 				      DEFAULT_RATELIMIT_BURST);
 
 	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
-		struct folio *folio;
-		bool isolated;
+		struct page *page;
+		bool huge;
 
 		if (!pfn_valid(pfn))
 			continue;
@@ -1812,34 +1812,22 @@  static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 			continue;
 		}
 
-		if (folio_test_hugetlb(folio)) {
-			isolate_hugetlb(folio, &source);
-			continue;
+		huge = folio_test_hugetlb(folio);
+		if (!huge) {
+			folio = folio_get_nontail_page(page);
+			if (!folio)
+				continue;
 		}
 
-		if (!get_page_unless_zero(page))
-			continue;
-		/*
-		 * We can skip free pages. And we can deal with pages on
-		 * LRU and non-lru movable pages.
-		 */
-		if (PageLRU(page))
-			isolated = isolate_lru_page(page);
-		else
-			isolated = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
-		if (isolated) {
-			list_add_tail(&page->lru, &source);
-			if (!__PageMovable(page))
-				inc_node_page_state(page, NR_ISOLATED_ANON +
-						    page_is_file_lru(page));
-
-		} else {
+		if (!isolate_folio_to_list(folio, &source)) {
 			if (__ratelimit(&migrate_rs)) {
 				pr_warn("failed to isolate pfn %lx\n", pfn);
 				dump_page(page, "isolation failed");
 			}
 		}
-		put_page(page);
+
+		if (!huge)
+			folio_put(folio);
 	}
 	if (!list_empty(&source)) {
 		nodemask_t nmask = node_states[N_MEMORY];
@@ -1854,7 +1842,7 @@  static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		 * We have checked that migration range is on a single zone so
 		 * we can use the nid of the first page to all the others.
 		 */
-		mtc.nid = page_to_nid(list_first_entry(&source, struct page, lru));
+		mtc.nid = folio_nid(list_first_entry(&source, struct folio, lru));
 
 		/*
 		 * try to allocate from a different node but reuse this node
@@ -1867,11 +1855,12 @@  static void do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		ret = migrate_pages(&source, alloc_migration_target, NULL,
 			(unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG, NULL);
 		if (ret) {
-			list_for_each_entry(page, &source, lru) {
+			list_for_each_entry(folio, &source, lru) {
 				if (__ratelimit(&migrate_rs)) {
 					pr_warn("migrating pfn %lx failed ret:%d\n",
-						page_to_pfn(page), ret);
-					dump_page(page, "migration failure");
+						folio_pfn(folio), ret);
+					dump_page(&folio->page,
+						  "migration failure");
 				}
 			}
 			putback_movable_pages(&source);