diff mbox series

mm/migrate: put dest folio on deferred split list if source was there.

Message ID 20240311193641.133981-1-zi.yan@sent.com (mailing list archive)
State New
Headers show
Series mm/migrate: put dest folio on deferred split list if source was there. | expand

Commit Message

Zi Yan March 11, 2024, 7:36 p.m. UTC
From: Zi Yan <ziy@nvidia.com>

Commit 616b8371539a6 ("mm: thp: enable thp migration in generic path")
did not check if a THP is on deferred split list before migration, thus,
the destination THP is never put on deferred split list even if the source
THP might be. The opportunity of reclaiming free pages in a partially
mapped THP during deferred list scanning is lost, but no other harmful
consequence is present[1]. Checking source folio deferred split list
status before page unmapped and add destination folio to the list if
source was after migration.

[1]: https://lore.kernel.org/linux-mm/03CE3A00-917C-48CC-8E1C-6A98713C817C@nvidia.com/

Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 mm/huge_memory.c | 22 ----------------------
 mm/internal.h    | 23 +++++++++++++++++++++++
 mm/migrate.c     | 26 +++++++++++++++++++++++++-
 3 files changed, 48 insertions(+), 23 deletions(-)

Comments

Ryan Roberts March 11, 2024, 7:53 p.m. UTC | #1
On 11/03/2024 19:36, Zi Yan wrote:
> From: Zi Yan <ziy@nvidia.com>
> 
> Commit 616b8371539a6 ("mm: thp: enable thp migration in generic path")
> did not check if a THP is on deferred split list before migration, thus,
> the destination THP is never put on deferred split list even if the source
> THP might be. The opportunity of reclaiming free pages in a partially
> mapped THP during deferred list scanning is lost, but no other harmful
> consequence is present[1]. Checking source folio deferred split list
> status before page unmapped and add destination folio to the list if
> source was after migration.
> 
> [1]: https://lore.kernel.org/linux-mm/03CE3A00-917C-48CC-8E1C-6A98713C817C@nvidia.com/
> 
> Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
>  mm/huge_memory.c | 22 ----------------------
>  mm/internal.h    | 23 +++++++++++++++++++++++
>  mm/migrate.c     | 26 +++++++++++++++++++++++++-
>  3 files changed, 48 insertions(+), 23 deletions(-)
> 
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 9859aa4f7553..c6d4d0cdf4b3 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -766,28 +766,6 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
>  	return pmd;
>  }
>  
> -#ifdef CONFIG_MEMCG
> -static inline
> -struct deferred_split *get_deferred_split_queue(struct folio *folio)
> -{
> -	struct mem_cgroup *memcg = folio_memcg(folio);
> -	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
> -
> -	if (memcg)
> -		return &memcg->deferred_split_queue;
> -	else
> -		return &pgdat->deferred_split_queue;
> -}
> -#else
> -static inline
> -struct deferred_split *get_deferred_split_queue(struct folio *folio)
> -{
> -	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
> -
> -	return &pgdat->deferred_split_queue;
> -}
> -#endif
> -
>  void folio_prep_large_rmappable(struct folio *folio)
>  {
>  	if (!folio || !folio_test_large(folio))
> diff --git a/mm/internal.h b/mm/internal.h
> index d1c69119b24f..8fa36e84463a 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1107,6 +1107,29 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
>  				   unsigned long addr, pmd_t *pmd,
>  				   unsigned int flags);
>  
> +#ifdef CONFIG_MEMCG
> +static inline
> +struct deferred_split *get_deferred_split_queue(struct folio *folio)
> +{
> +	struct mem_cgroup *memcg = folio_memcg(folio);
> +	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
> +
> +	if (memcg)
> +		return &memcg->deferred_split_queue;
> +	else
> +		return &pgdat->deferred_split_queue;
> +}
> +#else
> +static inline
> +struct deferred_split *get_deferred_split_queue(struct folio *folio)
> +{
> +	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
> +
> +	return &pgdat->deferred_split_queue;
> +}
> +#endif
> +
> +
>  /*
>   * mm/mmap.c
>   */
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 73a052a382f1..84ba1c65d20d 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -20,6 +20,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/buffer_head.h>
>  #include <linux/mm_inline.h>
> +#include <linux/mmzone.h>
>  #include <linux/nsproxy.h>
>  #include <linux/ksm.h>
>  #include <linux/rmap.h>
> @@ -1037,7 +1038,10 @@ static int move_to_new_folio(struct folio *dst, struct folio *src,
>  enum {
>  	PAGE_WAS_MAPPED = BIT(0),
>  	PAGE_WAS_MLOCKED = BIT(1),
> -	PAGE_OLD_STATES = PAGE_WAS_MAPPED | PAGE_WAS_MLOCKED,
> +	PAGE_WAS_ON_DEFERRED_LIST = BIT(2),
> +	PAGE_OLD_STATES = PAGE_WAS_MAPPED |
> +			  PAGE_WAS_MLOCKED |
> +			  PAGE_WAS_ON_DEFERRED_LIST,
>  };
>  
>  static void __migrate_folio_record(struct folio *dst,
> @@ -1168,6 +1172,17 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>  		folio_lock(src);
>  	}
>  	locked = true;
> +	if (folio_test_large_rmappable(src) &&
> +		!list_empty(&src->_deferred_list)) {
> +		struct deferred_split *ds_queue = get_deferred_split_queue(src);
> +
> +		spin_lock(&ds_queue->split_queue_lock);
> +		ds_queue->split_queue_len--;
> +		list_del_init(&src->_deferred_list);
> +		spin_unlock(&ds_queue->split_queue_lock);
> +		old_page_state |= PAGE_WAS_ON_DEFERRED_LIST;
> +	}
> +
>  	if (folio_test_mlocked(src))
>  		old_page_state |= PAGE_WAS_MLOCKED;
>  
> @@ -1307,6 +1322,15 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>  	if (old_page_state & PAGE_WAS_MAPPED)
>  		remove_migration_ptes(src, dst, false);
>  
> +	if (old_page_state & PAGE_WAS_ON_DEFERRED_LIST) {
> +		struct deferred_split *ds_queue = get_deferred_split_queue(src);

do you mean dst here?

> +
> +		spin_lock(&ds_queue->split_queue_lock);
> +		ds_queue->split_queue_len++;
> +		list_add(&dst->_deferred_list, &ds_queue->split_queue);
> +		spin_unlock(&ds_queue->split_queue_lock);
> +	}
> +
>  out_unlock_both:
>  	folio_unlock(dst);
>  	set_page_owner_migrate_reason(&dst->page, reason);
Zi Yan March 11, 2024, 7:56 p.m. UTC | #2
On 11 Mar 2024, at 15:53, Ryan Roberts wrote:

> On 11/03/2024 19:36, Zi Yan wrote:
>> From: Zi Yan <ziy@nvidia.com>
>>
>> Commit 616b8371539a6 ("mm: thp: enable thp migration in generic path")
>> did not check if a THP is on deferred split list before migration, thus,
>> the destination THP is never put on deferred split list even if the source
>> THP might be. The opportunity of reclaiming free pages in a partially
>> mapped THP during deferred list scanning is lost, but no other harmful
>> consequence is present[1]. Checking source folio deferred split list
>> status before page unmapped and add destination folio to the list if
>> source was after migration.
>>
>> [1]: https://lore.kernel.org/linux-mm/03CE3A00-917C-48CC-8E1C-6A98713C817C@nvidia.com/
>>
>> Fixes: 616b8371539a ("mm: thp: enable thp migration in generic path")
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>>  mm/huge_memory.c | 22 ----------------------
>>  mm/internal.h    | 23 +++++++++++++++++++++++
>>  mm/migrate.c     | 26 +++++++++++++++++++++++++-
>>  3 files changed, 48 insertions(+), 23 deletions(-)
>>
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 9859aa4f7553..c6d4d0cdf4b3 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -766,28 +766,6 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
>>  	return pmd;
>>  }
>>
>> -#ifdef CONFIG_MEMCG
>> -static inline
>> -struct deferred_split *get_deferred_split_queue(struct folio *folio)
>> -{
>> -	struct mem_cgroup *memcg = folio_memcg(folio);
>> -	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
>> -
>> -	if (memcg)
>> -		return &memcg->deferred_split_queue;
>> -	else
>> -		return &pgdat->deferred_split_queue;
>> -}
>> -#else
>> -static inline
>> -struct deferred_split *get_deferred_split_queue(struct folio *folio)
>> -{
>> -	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
>> -
>> -	return &pgdat->deferred_split_queue;
>> -}
>> -#endif
>> -
>>  void folio_prep_large_rmappable(struct folio *folio)
>>  {
>>  	if (!folio || !folio_test_large(folio))
>> diff --git a/mm/internal.h b/mm/internal.h
>> index d1c69119b24f..8fa36e84463a 100644
>> --- a/mm/internal.h
>> +++ b/mm/internal.h
>> @@ -1107,6 +1107,29 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
>>  				   unsigned long addr, pmd_t *pmd,
>>  				   unsigned int flags);
>>
>> +#ifdef CONFIG_MEMCG
>> +static inline
>> +struct deferred_split *get_deferred_split_queue(struct folio *folio)
>> +{
>> +	struct mem_cgroup *memcg = folio_memcg(folio);
>> +	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
>> +
>> +	if (memcg)
>> +		return &memcg->deferred_split_queue;
>> +	else
>> +		return &pgdat->deferred_split_queue;
>> +}
>> +#else
>> +static inline
>> +struct deferred_split *get_deferred_split_queue(struct folio *folio)
>> +{
>> +	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
>> +
>> +	return &pgdat->deferred_split_queue;
>> +}
>> +#endif
>> +
>> +
>>  /*
>>   * mm/mmap.c
>>   */
>> diff --git a/mm/migrate.c b/mm/migrate.c
>> index 73a052a382f1..84ba1c65d20d 100644
>> --- a/mm/migrate.c
>> +++ b/mm/migrate.c
>> @@ -20,6 +20,7 @@
>>  #include <linux/pagemap.h>
>>  #include <linux/buffer_head.h>
>>  #include <linux/mm_inline.h>
>> +#include <linux/mmzone.h>
>>  #include <linux/nsproxy.h>
>>  #include <linux/ksm.h>
>>  #include <linux/rmap.h>
>> @@ -1037,7 +1038,10 @@ static int move_to_new_folio(struct folio *dst, struct folio *src,
>>  enum {
>>  	PAGE_WAS_MAPPED = BIT(0),
>>  	PAGE_WAS_MLOCKED = BIT(1),
>> -	PAGE_OLD_STATES = PAGE_WAS_MAPPED | PAGE_WAS_MLOCKED,
>> +	PAGE_WAS_ON_DEFERRED_LIST = BIT(2),
>> +	PAGE_OLD_STATES = PAGE_WAS_MAPPED |
>> +			  PAGE_WAS_MLOCKED |
>> +			  PAGE_WAS_ON_DEFERRED_LIST,
>>  };
>>
>>  static void __migrate_folio_record(struct folio *dst,
>> @@ -1168,6 +1172,17 @@ static int migrate_folio_unmap(new_folio_t get_new_folio,
>>  		folio_lock(src);
>>  	}
>>  	locked = true;
>> +	if (folio_test_large_rmappable(src) &&
>> +		!list_empty(&src->_deferred_list)) {
>> +		struct deferred_split *ds_queue = get_deferred_split_queue(src);
>> +
>> +		spin_lock(&ds_queue->split_queue_lock);
>> +		ds_queue->split_queue_len--;
>> +		list_del_init(&src->_deferred_list);
>> +		spin_unlock(&ds_queue->split_queue_lock);
>> +		old_page_state |= PAGE_WAS_ON_DEFERRED_LIST;
>> +	}
>> +
>>  	if (folio_test_mlocked(src))
>>  		old_page_state |= PAGE_WAS_MLOCKED;
>>
>> @@ -1307,6 +1322,15 @@ static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
>>  	if (old_page_state & PAGE_WAS_MAPPED)
>>  		remove_migration_ptes(src, dst, false);
>>
>> +	if (old_page_state & PAGE_WAS_ON_DEFERRED_LIST) {
>> +		struct deferred_split *ds_queue = get_deferred_split_queue(src);
>
> do you mean dst here?

Right. I forgot the case of migrating from one node to another and thought
either src or dst works. Thanks.
>
>> +
>> +		spin_lock(&ds_queue->split_queue_lock);
>> +		ds_queue->split_queue_len++;
>> +		list_add(&dst->_deferred_list, &ds_queue->split_queue);
>> +		spin_unlock(&ds_queue->split_queue_lock);
>> +	}
>> +
>>  out_unlock_both:
>>  	folio_unlock(dst);
>>  	set_page_owner_migrate_reason(&dst->page, reason);


--
Best Regards,
Yan, Zi
diff mbox series

Patch

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9859aa4f7553..c6d4d0cdf4b3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -766,28 +766,6 @@  pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 	return pmd;
 }
 
-#ifdef CONFIG_MEMCG
-static inline
-struct deferred_split *get_deferred_split_queue(struct folio *folio)
-{
-	struct mem_cgroup *memcg = folio_memcg(folio);
-	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
-
-	if (memcg)
-		return &memcg->deferred_split_queue;
-	else
-		return &pgdat->deferred_split_queue;
-}
-#else
-static inline
-struct deferred_split *get_deferred_split_queue(struct folio *folio)
-{
-	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
-
-	return &pgdat->deferred_split_queue;
-}
-#endif
-
 void folio_prep_large_rmappable(struct folio *folio)
 {
 	if (!folio || !folio_test_large(folio))
diff --git a/mm/internal.h b/mm/internal.h
index d1c69119b24f..8fa36e84463a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1107,6 +1107,29 @@  struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 				   unsigned long addr, pmd_t *pmd,
 				   unsigned int flags);
 
+#ifdef CONFIG_MEMCG
+static inline
+struct deferred_split *get_deferred_split_queue(struct folio *folio)
+{
+	struct mem_cgroup *memcg = folio_memcg(folio);
+	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
+
+	if (memcg)
+		return &memcg->deferred_split_queue;
+	else
+		return &pgdat->deferred_split_queue;
+}
+#else
+static inline
+struct deferred_split *get_deferred_split_queue(struct folio *folio)
+{
+	struct pglist_data *pgdat = NODE_DATA(folio_nid(folio));
+
+	return &pgdat->deferred_split_queue;
+}
+#endif
+
+
 /*
  * mm/mmap.c
  */
diff --git a/mm/migrate.c b/mm/migrate.c
index 73a052a382f1..84ba1c65d20d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -20,6 +20,7 @@ 
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/mm_inline.h>
+#include <linux/mmzone.h>
 #include <linux/nsproxy.h>
 #include <linux/ksm.h>
 #include <linux/rmap.h>
@@ -1037,7 +1038,10 @@  static int move_to_new_folio(struct folio *dst, struct folio *src,
 enum {
 	PAGE_WAS_MAPPED = BIT(0),
 	PAGE_WAS_MLOCKED = BIT(1),
-	PAGE_OLD_STATES = PAGE_WAS_MAPPED | PAGE_WAS_MLOCKED,
+	PAGE_WAS_ON_DEFERRED_LIST = BIT(2),
+	PAGE_OLD_STATES = PAGE_WAS_MAPPED |
+			  PAGE_WAS_MLOCKED |
+			  PAGE_WAS_ON_DEFERRED_LIST,
 };
 
 static void __migrate_folio_record(struct folio *dst,
@@ -1168,6 +1172,17 @@  static int migrate_folio_unmap(new_folio_t get_new_folio,
 		folio_lock(src);
 	}
 	locked = true;
+	if (folio_test_large_rmappable(src) &&
+		!list_empty(&src->_deferred_list)) {
+		struct deferred_split *ds_queue = get_deferred_split_queue(src);
+
+		spin_lock(&ds_queue->split_queue_lock);
+		ds_queue->split_queue_len--;
+		list_del_init(&src->_deferred_list);
+		spin_unlock(&ds_queue->split_queue_lock);
+		old_page_state |= PAGE_WAS_ON_DEFERRED_LIST;
+	}
+
 	if (folio_test_mlocked(src))
 		old_page_state |= PAGE_WAS_MLOCKED;
 
@@ -1307,6 +1322,15 @@  static int migrate_folio_move(free_folio_t put_new_folio, unsigned long private,
 	if (old_page_state & PAGE_WAS_MAPPED)
 		remove_migration_ptes(src, dst, false);
 
+	if (old_page_state & PAGE_WAS_ON_DEFERRED_LIST) {
+		struct deferred_split *ds_queue = get_deferred_split_queue(src);
+
+		spin_lock(&ds_queue->split_queue_lock);
+		ds_queue->split_queue_len++;
+		list_add(&dst->_deferred_list, &ds_queue->split_queue);
+		spin_unlock(&ds_queue->split_queue_lock);
+	}
+
 out_unlock_both:
 	folio_unlock(dst);
 	set_page_owner_migrate_reason(&dst->page, reason);