diff mbox series

[v2,1/5] mm: Separate folio_split_memcg_refs() from split_page_memcg()

Message ID 20250314133617.138071-2-willy@infradead.org (mailing list archive)
State New
Headers show
Series Minor memcg cleanups & prep for memdescs | expand

Commit Message

Matthew Wilcox March 14, 2025, 1:36 p.m. UTC
Folios always use memcg_data to refer to the mem_cgroup while pages
allocated with GFP_ACCOUNT have a pointer to the obj_cgroup.  Since the
caller already knows what it has, split the function into two and then
we don't need to check.

Move the assignment of split folio memcg_data to the point where we set
up the other parts of the new folio.  That leaves folio_split_memcg_refs()
just handling the memcg accounting.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Zi Yan <ziy@nvidia.com>
---
 include/linux/memcontrol.h |  7 +++++++
 mm/huge_memory.c           | 16 ++++------------
 mm/memcontrol.c            | 17 +++++++++++++----
 3 files changed, 24 insertions(+), 16 deletions(-)

Comments

David Hildenbrand March 14, 2025, 9:49 p.m. UTC | #1
On 14.03.25 14:36, Matthew Wilcox (Oracle) wrote:
> Folios always use memcg_data to refer to the mem_cgroup while pages
> allocated with GFP_ACCOUNT have a pointer to the obj_cgroup.  Since the
> caller already knows what it has, split the function into two and then
> we don't need to check.
> 
> Move the assignment of split folio memcg_data to the point where we set
> up the other parts of the new folio.  That leaves folio_split_memcg_refs()
> just handling the memcg accounting.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Acked-by: Zi Yan <ziy@nvidia.com>
> ---
>   include/linux/memcontrol.h |  7 +++++++
>   mm/huge_memory.c           | 16 ++++------------
>   mm/memcontrol.c            | 17 +++++++++++++----
>   3 files changed, 24 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 57664e2a8fb7..d090089c5497 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -1039,6 +1039,8 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
>   }
>   
>   void split_page_memcg(struct page *head, int old_order, int new_order);
> +void folio_split_memcg_refs(struct folio *folio, unsigned old_order,
> +		unsigned new_order);
>   
>   static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
>   {
> @@ -1463,6 +1465,11 @@ static inline void split_page_memcg(struct page *head, int old_order, int new_or
>   {
>   }
>   
> +static inline void folio_split_memcg_refs(struct folio *folio,
> +		unsigned old_order, unsigned new_order)
> +{
> +}
> +
>   static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
>   {
>   	return 0;
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 14b1963898a7..3e5ecc8f3d13 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3394,6 +3394,9 @@ static void __split_folio_to_order(struct folio *folio, int old_order,
>   			folio_set_young(new_folio);
>   		if (folio_test_idle(folio))
>   			folio_set_idle(new_folio);
> +#ifdef CONFIG_MEMCG
> +		new_folio->memcg_data = folio->memcg_data;
> +#endif
>   
>   		folio_xchg_last_cpupid(new_folio, folio_last_cpupid(folio));
>   	}
> @@ -3525,18 +3528,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
>   			}
>   		}
>   
> -		/*
> -		 * Reset any memcg data overlay in the tail pages.
> -		 * folio_nr_pages() is unreliable until prep_compound_page()
> -		 * was called again.
> -		 */
> -#ifdef NR_PAGES_IN_LARGE_FOLIO
> -		folio->_nr_pages = 0;
> -#endif


I remember that we could trigger a warning without that, but I don't 
immediately find where that warning was. IIRC, if we'd split to order-0, 
page[1] would have indicated that it had a memcg set, and something 
bailed out.

Maybe Zi Yan recalls where that check fired.

In any case, if that warning no longer fires this is a very nice cleanup!
Zi Yan March 14, 2025, 11:15 p.m. UTC | #2
On 14 Mar 2025, at 17:49, David Hildenbrand wrote:

> On 14.03.25 14:36, Matthew Wilcox (Oracle) wrote:
>> Folios always use memcg_data to refer to the mem_cgroup while pages
>> allocated with GFP_ACCOUNT have a pointer to the obj_cgroup.  Since the
>> caller already knows what it has, split the function into two and then
>> we don't need to check.
>>
>> Move the assignment of split folio memcg_data to the point where we set
>> up the other parts of the new folio.  That leaves folio_split_memcg_refs()
>> just handling the memcg accounting.
>>
>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
>> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
>> Acked-by: Zi Yan <ziy@nvidia.com>
>> ---
>>   include/linux/memcontrol.h |  7 +++++++
>>   mm/huge_memory.c           | 16 ++++------------
>>   mm/memcontrol.c            | 17 +++++++++++++----
>>   3 files changed, 24 insertions(+), 16 deletions(-)
>>
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index 57664e2a8fb7..d090089c5497 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -1039,6 +1039,8 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
>>   }
>>    void split_page_memcg(struct page *head, int old_order, int new_order);
>> +void folio_split_memcg_refs(struct folio *folio, unsigned old_order,
>> +		unsigned new_order);
>>    static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
>>   {
>> @@ -1463,6 +1465,11 @@ static inline void split_page_memcg(struct page *head, int old_order, int new_or
>>   {
>>   }
>>  +static inline void folio_split_memcg_refs(struct folio *folio,
>> +		unsigned old_order, unsigned new_order)
>> +{
>> +}
>> +
>>   static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
>>   {
>>   	return 0;
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 14b1963898a7..3e5ecc8f3d13 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -3394,6 +3394,9 @@ static void __split_folio_to_order(struct folio *folio, int old_order,
>>   			folio_set_young(new_folio);
>>   		if (folio_test_idle(folio))
>>   			folio_set_idle(new_folio);
>> +#ifdef CONFIG_MEMCG
>> +		new_folio->memcg_data = folio->memcg_data;
>> +#endif
>>    		folio_xchg_last_cpupid(new_folio, folio_last_cpupid(folio));
>>   	}
>> @@ -3525,18 +3528,7 @@ static int __split_unmapped_folio(struct folio *folio, int new_order,
>>   			}
>>   		}
>>  -		/*
>> -		 * Reset any memcg data overlay in the tail pages.
>> -		 * folio_nr_pages() is unreliable until prep_compound_page()
>> -		 * was called again.
>> -		 */
>> -#ifdef NR_PAGES_IN_LARGE_FOLIO
>> -		folio->_nr_pages = 0;
>> -#endif
>
>
> I remember that we could trigger a warning without that, but I don't immediately find where that warning was. IIRC, if we'd split to order-0, page[1] would have indicated that it had a memcg set, and something bailed out.
>
> Maybe Zi Yan recalls where that check fired.

The error I encountered is different. When I rebase my folio_split()
on top of David’s mapcount patchset, my original patch used folio_nr_pages()
after memcg split. Since memcg overlays with _nr_pages, when splitting
to order-0, folio->_nr_page is overwritten with memcg_data, causing
folio_nr_pages() to return a bogus value. With Matthew’s this patch,
memcg_data of page[1] is written inside __split_folio_to_order(),
so in theory __split_folio_to_order() can call folio_nr_pages() like
my original patch.

For folio->_nr_pages = 0, I suppose it is trying to suppress any
page[1]->memcg_data != NULL check in the following code. But I could
not find any.

>
> In any case, if that warning no longer fires this is a very nice cleanup!

Yeah, if we see any warning on memcg later, we know how to fix it. :)


Best Regards,
Yan, Zi
diff mbox series

Patch

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 57664e2a8fb7..d090089c5497 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1039,6 +1039,8 @@  static inline void memcg_memory_event_mm(struct mm_struct *mm,
 }
 
 void split_page_memcg(struct page *head, int old_order, int new_order);
+void folio_split_memcg_refs(struct folio *folio, unsigned old_order,
+		unsigned new_order);
 
 static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
 {
@@ -1463,6 +1465,11 @@  static inline void split_page_memcg(struct page *head, int old_order, int new_or
 {
 }
 
+static inline void folio_split_memcg_refs(struct folio *folio,
+		unsigned old_order, unsigned new_order)
+{
+}
+
 static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
 {
 	return 0;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 14b1963898a7..3e5ecc8f3d13 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3394,6 +3394,9 @@  static void __split_folio_to_order(struct folio *folio, int old_order,
 			folio_set_young(new_folio);
 		if (folio_test_idle(folio))
 			folio_set_idle(new_folio);
+#ifdef CONFIG_MEMCG
+		new_folio->memcg_data = folio->memcg_data;
+#endif
 
 		folio_xchg_last_cpupid(new_folio, folio_last_cpupid(folio));
 	}
@@ -3525,18 +3528,7 @@  static int __split_unmapped_folio(struct folio *folio, int new_order,
 			}
 		}
 
-		/*
-		 * Reset any memcg data overlay in the tail pages.
-		 * folio_nr_pages() is unreliable until prep_compound_page()
-		 * was called again.
-		 */
-#ifdef NR_PAGES_IN_LARGE_FOLIO
-		folio->_nr_pages = 0;
-#endif
-
-
-		/* complete memcg works before add pages to LRU */
-		split_page_memcg(&folio->page, old_order, split_order);
+		folio_split_memcg_refs(folio, old_order, split_order);
 		split_page_owner(&folio->page, old_order, split_order);
 		pgalloc_tag_split(folio, old_order, split_order);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 87544df4c3b8..4674d9815a50 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3101,10 +3101,19 @@  void split_page_memcg(struct page *head, int old_order, int new_order)
 	for (i = new_nr; i < old_nr; i += new_nr)
 		folio_page(folio, i)->memcg_data = folio->memcg_data;
 
-	if (folio_memcg_kmem(folio))
-		obj_cgroup_get_many(__folio_objcg(folio), old_nr / new_nr - 1);
-	else
-		css_get_many(&folio_memcg(folio)->css, old_nr / new_nr - 1);
+	obj_cgroup_get_many(__folio_objcg(folio), old_nr / new_nr - 1);
+}
+
+void folio_split_memcg_refs(struct folio *folio, unsigned old_order,
+		unsigned new_order)
+{
+	unsigned new_refs;
+
+	if (mem_cgroup_disabled() || !folio_memcg_charged(folio))
+		return;
+
+	new_refs = (1 << (old_order - new_order)) - 1;
+	css_get_many(&__folio_memcg(folio)->css, new_refs);
 }
 
 unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)