diff mbox series

[v2] mm: do not update memcg stats for NR_{FILE/SHMEM}_PMDMAPPED

Message ID 20240506192924.271999-1-yosryahmed@google.com (mailing list archive)
State New
Headers show
Series [v2] mm: do not update memcg stats for NR_{FILE/SHMEM}_PMDMAPPED | expand

Commit Message

Yosry Ahmed May 6, 2024, 7:29 p.m. UTC
Previously, all NR_VM_EVENT_ITEMS stats were maintained per-memcg,
although some of those fields are not exposed anywhere. Commit
14e0f6c957e39 ("memcg: reduce memory for the lruvec and memcg stats")
changed this such that we only maintain the stats we actually expose
per-memcg via a translation table.

Additionally, commit 514462bbe927b ("memcg: warn for unexpected events
and stats") added a warning if a per-memcg stat update is attempted for
a stat that is not in the translation table. The warning started firing
for the NR_{FILE/SHMEM}_PMDMAPPED stat updates in the rmap code. These
stats are not maintained per-memcg, and hence are not in the translation
table.

Do not use __lruvec_stat_mod_folio() when updating NR_FILE_PMDMAPPED and
NR_SHMEM_PMDMAPPED. Use __mod_node_page_state() instead, which updates
the global per-node stats only.

Reported-by: syzbot+9319a4268a640e26b72b@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/lkml/0000000000001b9d500617c8b23c@google.com
Fixes: 514462bbe927 ("memcg: warn for unexpected events and stats")
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
 mm/rmap.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

Comments

David Hildenbrand May 6, 2024, 7:50 p.m. UTC | #1
On 06.05.24 21:29, Yosry Ahmed wrote:
> Previously, all NR_VM_EVENT_ITEMS stats were maintained per-memcg,
> although some of those fields are not exposed anywhere. Commit
> 14e0f6c957e39 ("memcg: reduce memory for the lruvec and memcg stats")
> changed this such that we only maintain the stats we actually expose
> per-memcg via a translation table.
> 
> Additionally, commit 514462bbe927b ("memcg: warn for unexpected events
> and stats") added a warning if a per-memcg stat update is attempted for
> a stat that is not in the translation table. The warning started firing
> for the NR_{FILE/SHMEM}_PMDMAPPED stat updates in the rmap code. These
> stats are not maintained per-memcg, and hence are not in the translation
> table.
> 
> Do not use __lruvec_stat_mod_folio() when updating NR_FILE_PMDMAPPED and
> NR_SHMEM_PMDMAPPED. Use __mod_node_page_state() instead, which updates
> the global per-node stats only.
> 
> Reported-by: syzbot+9319a4268a640e26b72b@syzkaller.appspotmail.com
> Closes: https://lore.kernel.org/lkml/0000000000001b9d500617c8b23c@google.com
> Fixes: 514462bbe927 ("memcg: warn for unexpected events and stats")
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> ---
>   mm/rmap.c | 15 +++++++++------
>   1 file changed, 9 insertions(+), 6 deletions(-)
> 
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 12be4241474ab..ed7f820369864 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1435,13 +1435,14 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio,
>   		struct page *page, int nr_pages, struct vm_area_struct *vma,
>   		enum rmap_level level)
>   {
> +	pg_data_t *pgdat = folio_pgdat(folio);
>   	int nr, nr_pmdmapped = 0;
>   
>   	VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
>   
>   	nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
>   	if (nr_pmdmapped)
> -		__lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
> +		__mod_node_page_state(pgdat, folio_test_swapbacked(folio) ?
>   			NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
>   	if (nr)
>   		__lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
> @@ -1493,6 +1494,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
>   		enum rmap_level level)
>   {
>   	atomic_t *mapped = &folio->_nr_pages_mapped;
> +	pg_data_t *pgdat = folio_pgdat(folio);
>   	int last, nr = 0, nr_pmdmapped = 0;
>   	bool partially_mapped = false;
>   	enum node_stat_item idx;
> @@ -1540,13 +1542,14 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
>   	}
>   
>   	if (nr_pmdmapped) {
> +		/* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */
>   		if (folio_test_anon(folio))
> -			idx = NR_ANON_THPS;
> -		else if (folio_test_swapbacked(folio))
> -			idx = NR_SHMEM_PMDMAPPED;
> +			__lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped);
>   		else
> -			idx = NR_FILE_PMDMAPPED;
> -		__lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
> +			__mod_node_page_state(pgdat,

folio_pgdat(folio) should fit here easily. :)

But I would actually suggest something like the following in mm/rmap.c

static void __folio_mod_node_file_state(folio, int nr_pages)
{
	enum node_stat_item idx = NR_FILE_PMDMAPPED;

	if (folio_test_swapbacked(folio))
		idx = NR_SHMEM_PMDMAPPED;

	__mod_node_page_state(folio_pgdat(folio), idx, nr_pages);
}

And then simply calling here

__folio_mod_node_file_state(folio, -nr_pmdmapped);

And likewise in __folio_add_file_rmap()


... will be cleaner.

In any case

Acked-by: David Hildenbrand <david@redhat.com>
Yosry Ahmed May 6, 2024, 8:18 p.m. UTC | #2
On Mon, May 06, 2024 at 09:50:10PM +0200, David Hildenbrand wrote:
> On 06.05.24 21:29, Yosry Ahmed wrote:
> > Previously, all NR_VM_EVENT_ITEMS stats were maintained per-memcg,
> > although some of those fields are not exposed anywhere. Commit
> > 14e0f6c957e39 ("memcg: reduce memory for the lruvec and memcg stats")
> > changed this such that we only maintain the stats we actually expose
> > per-memcg via a translation table.
> > 
> > Additionally, commit 514462bbe927b ("memcg: warn for unexpected events
> > and stats") added a warning if a per-memcg stat update is attempted for
> > a stat that is not in the translation table. The warning started firing
> > for the NR_{FILE/SHMEM}_PMDMAPPED stat updates in the rmap code. These
> > stats are not maintained per-memcg, and hence are not in the translation
> > table.
> > 
> > Do not use __lruvec_stat_mod_folio() when updating NR_FILE_PMDMAPPED and
> > NR_SHMEM_PMDMAPPED. Use __mod_node_page_state() instead, which updates
> > the global per-node stats only.
> > 
> > Reported-by: syzbot+9319a4268a640e26b72b@syzkaller.appspotmail.com
> > Closes: https://lore.kernel.org/lkml/0000000000001b9d500617c8b23c@google.com
> > Fixes: 514462bbe927 ("memcg: warn for unexpected events and stats")
> > Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> > ---
> >   mm/rmap.c | 15 +++++++++------
> >   1 file changed, 9 insertions(+), 6 deletions(-)
> > 
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index 12be4241474ab..ed7f820369864 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -1435,13 +1435,14 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio,
> >   		struct page *page, int nr_pages, struct vm_area_struct *vma,
> >   		enum rmap_level level)
> >   {
> > +	pg_data_t *pgdat = folio_pgdat(folio);
> >   	int nr, nr_pmdmapped = 0;
> >   	VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
> >   	nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
> >   	if (nr_pmdmapped)
> > -		__lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
> > +		__mod_node_page_state(pgdat, folio_test_swapbacked(folio) ?
> >   			NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
> >   	if (nr)
> >   		__lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
> > @@ -1493,6 +1494,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
> >   		enum rmap_level level)
> >   {
> >   	atomic_t *mapped = &folio->_nr_pages_mapped;
> > +	pg_data_t *pgdat = folio_pgdat(folio);
> >   	int last, nr = 0, nr_pmdmapped = 0;
> >   	bool partially_mapped = false;
> >   	enum node_stat_item idx;
> > @@ -1540,13 +1542,14 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
> >   	}
> >   	if (nr_pmdmapped) {
> > +		/* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */
> >   		if (folio_test_anon(folio))
> > -			idx = NR_ANON_THPS;
> > -		else if (folio_test_swapbacked(folio))
> > -			idx = NR_SHMEM_PMDMAPPED;
> > +			__lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped);
> >   		else
> > -			idx = NR_FILE_PMDMAPPED;
> > -		__lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
> > +			__mod_node_page_state(pgdat,
> 
> folio_pgdat(folio) should fit here easily. :)
> 
> But I would actually suggest something like the following in mm/rmap.c

I am not a big fan of this. Not because I don't like the abstraction,
but because I think it doesn't go all the way. It abstracts a very
certain case: updating nr_pmdmapped for file folios.

I think if we opt for abstracting the stats updates in mm/rmap.c, we
should go all the way with something like the following (probably split
as two patches: refactoring then bug fix). WDYT about the below?

diff --git a/mm/rmap.c b/mm/rmap.c
index 12be4241474ab..70d6f6309da01 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1269,6 +1269,28 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page,
 		       page);
 }
 
+static void __foio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped)
+{
+	int idx;
+
+	if (nr) {
+		idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
+		__lruvec_stat_mod_folio(folio, idx, nr);
+	}
+	if (nr_pmdmapped) {
+		if (folio_test_anon(folio)) {
+			idx = NR_ANON_THPS;
+			__lruvec_stat_mod_folio(folio, idx, nr_pmdmapped);
+		} else {
+			/* NR_*_PMDMAPPED are not maintained per-memcg */
+			idx = folio_test_swapbacked(folio) ?
+				NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED;
+			__mod_node_page_state(folio_pgdat(folio), idx,
+					      nr_pmdmapped);
+		}
+	}
+}
+
 static __always_inline void __folio_add_anon_rmap(struct folio *folio,
 		struct page *page, int nr_pages, struct vm_area_struct *vma,
 		unsigned long address, rmap_t flags, enum rmap_level level)
@@ -1276,10 +1298,6 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio,
 	int i, nr, nr_pmdmapped = 0;
 
 	nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
-	if (nr_pmdmapped)
-		__lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr_pmdmapped);
-	if (nr)
-		__lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);
 
 	if (unlikely(!folio_test_anon(folio))) {
 		VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -1297,6 +1315,8 @@ static __always_inline void __folio_add_anon_rmap(struct folio *folio,
 		__page_check_anon_rmap(folio, page, vma, address);
 	}
 
+	__folio_mod_stat(folio, nr, nr_pmdmapped);
+
 	if (flags & RMAP_EXCLUSIVE) {
 		switch (level) {
 		case RMAP_LEVEL_PTE:
@@ -1393,6 +1413,7 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
 		unsigned long address)
 {
 	int nr = folio_nr_pages(folio);
+	int nr_pmdmapped = 0;
 
 	VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
 	VM_BUG_ON_VMA(address < vma->vm_start ||
@@ -1425,10 +1446,10 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
 		atomic_set(&folio->_large_mapcount, 0);
 		atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
 		SetPageAnonExclusive(&folio->page);
-		__lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr);
+		nr_pmdmapped = nr;
 	}
 
-	__lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);
+	__folio_mod_stat(folio, nr, nr_pmdmapped);
 }
 
 static __always_inline void __folio_add_file_rmap(struct folio *folio,
@@ -1440,11 +1461,7 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio,
 	VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
 
 	nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
-	if (nr_pmdmapped)
-		__lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
-			NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
-	if (nr)
-		__lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
+	__folio_mod_stat(folio, nr, nr_pmdmapped);
 
 	/* See comments in folio_add_anon_rmap_*() */
 	if (!folio_test_large(folio))
@@ -1539,19 +1556,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
 		break;
 	}
 
-	if (nr_pmdmapped) {
-		if (folio_test_anon(folio))
-			idx = NR_ANON_THPS;
-		else if (folio_test_swapbacked(folio))
-			idx = NR_SHMEM_PMDMAPPED;
-		else
-			idx = NR_FILE_PMDMAPPED;
-		__lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
-	}
 	if (nr) {
-		idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
-		__lruvec_stat_mod_folio(folio, idx, -nr);
-
 		/*
 		 * Queue anon large folio for deferred split if at least one
 		 * page of the folio is unmapped and at least one page
@@ -1563,6 +1568,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
 		    list_empty(&folio->_deferred_list))
 			deferred_split_folio(folio);
 	}
+	__foio_mod_stat(folio, nr, nr_pmdmapped);
 
 	/*
 	 * It would be tidy to reset folio_test_anon mapping when fully

> 
> static void __folio_mod_node_file_state(folio, int nr_pages)
> {
> 	enum node_stat_item idx = NR_FILE_PMDMAPPED;
> 
> 	if (folio_test_swapbacked(folio))
> 		idx = NR_SHMEM_PMDMAPPED;
> 
> 	__mod_node_page_state(folio_pgdat(folio), idx, nr_pages);
> }
> 
> And then simply calling here
> 
> __folio_mod_node_file_state(folio, -nr_pmdmapped);
> 
> And likewise in __folio_add_file_rmap()
> 
> 
> ... will be cleaner.
> 
> In any case
> 
> Acked-by: David Hildenbrand <david@redhat.com>
> 
> -- 
> Cheers,
> 
> David / dhildenb
>
Roman Gushchin May 6, 2024, 8:30 p.m. UTC | #3
On Mon, May 06, 2024 at 07:29:24PM +0000, Yosry Ahmed wrote:
> Previously, all NR_VM_EVENT_ITEMS stats were maintained per-memcg,
> although some of those fields are not exposed anywhere. Commit
> 14e0f6c957e39 ("memcg: reduce memory for the lruvec and memcg stats")
> changed this such that we only maintain the stats we actually expose
> per-memcg via a translation table.
> 
> Additionally, commit 514462bbe927b ("memcg: warn for unexpected events
> and stats") added a warning if a per-memcg stat update is attempted for
> a stat that is not in the translation table. The warning started firing
> for the NR_{FILE/SHMEM}_PMDMAPPED stat updates in the rmap code. These
> stats are not maintained per-memcg, and hence are not in the translation
> table.
> 
> Do not use __lruvec_stat_mod_folio() when updating NR_FILE_PMDMAPPED and
> NR_SHMEM_PMDMAPPED. Use __mod_node_page_state() instead, which updates
> the global per-node stats only.
> 
> Reported-by: syzbot+9319a4268a640e26b72b@syzkaller.appspotmail.com
> Closes: https://lore.kernel.org/lkml/0000000000001b9d500617c8b23c@google.com
> Fixes: 514462bbe927 ("memcg: warn for unexpected events and stats")
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>

Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>

Thanks!
David Hildenbrand May 6, 2024, 8:31 p.m. UTC | #4
On 06.05.24 22:18, Yosry Ahmed wrote:
> On Mon, May 06, 2024 at 09:50:10PM +0200, David Hildenbrand wrote:
>> On 06.05.24 21:29, Yosry Ahmed wrote:
>>> Previously, all NR_VM_EVENT_ITEMS stats were maintained per-memcg,
>>> although some of those fields are not exposed anywhere. Commit
>>> 14e0f6c957e39 ("memcg: reduce memory for the lruvec and memcg stats")
>>> changed this such that we only maintain the stats we actually expose
>>> per-memcg via a translation table.
>>>
>>> Additionally, commit 514462bbe927b ("memcg: warn for unexpected events
>>> and stats") added a warning if a per-memcg stat update is attempted for
>>> a stat that is not in the translation table. The warning started firing
>>> for the NR_{FILE/SHMEM}_PMDMAPPED stat updates in the rmap code. These
>>> stats are not maintained per-memcg, and hence are not in the translation
>>> table.
>>>
>>> Do not use __lruvec_stat_mod_folio() when updating NR_FILE_PMDMAPPED and
>>> NR_SHMEM_PMDMAPPED. Use __mod_node_page_state() instead, which updates
>>> the global per-node stats only.
>>>
>>> Reported-by: syzbot+9319a4268a640e26b72b@syzkaller.appspotmail.com
>>> Closes: https://lore.kernel.org/lkml/0000000000001b9d500617c8b23c@google.com
>>> Fixes: 514462bbe927 ("memcg: warn for unexpected events and stats")
>>> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
>>> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
>>> ---
>>>    mm/rmap.c | 15 +++++++++------
>>>    1 file changed, 9 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/mm/rmap.c b/mm/rmap.c
>>> index 12be4241474ab..ed7f820369864 100644
>>> --- a/mm/rmap.c
>>> +++ b/mm/rmap.c
>>> @@ -1435,13 +1435,14 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio,
>>>    		struct page *page, int nr_pages, struct vm_area_struct *vma,
>>>    		enum rmap_level level)
>>>    {
>>> +	pg_data_t *pgdat = folio_pgdat(folio);
>>>    	int nr, nr_pmdmapped = 0;
>>>    	VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
>>>    	nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
>>>    	if (nr_pmdmapped)
>>> -		__lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
>>> +		__mod_node_page_state(pgdat, folio_test_swapbacked(folio) ?
>>>    			NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
>>>    	if (nr)
>>>    		__lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
>>> @@ -1493,6 +1494,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
>>>    		enum rmap_level level)
>>>    {
>>>    	atomic_t *mapped = &folio->_nr_pages_mapped;
>>> +	pg_data_t *pgdat = folio_pgdat(folio);
>>>    	int last, nr = 0, nr_pmdmapped = 0;
>>>    	bool partially_mapped = false;
>>>    	enum node_stat_item idx;
>>> @@ -1540,13 +1542,14 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
>>>    	}
>>>    	if (nr_pmdmapped) {
>>> +		/* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */
>>>    		if (folio_test_anon(folio))
>>> -			idx = NR_ANON_THPS;
>>> -		else if (folio_test_swapbacked(folio))
>>> -			idx = NR_SHMEM_PMDMAPPED;
>>> +			__lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped);
>>>    		else
>>> -			idx = NR_FILE_PMDMAPPED;
>>> -		__lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
>>> +			__mod_node_page_state(pgdat,
>>
>> folio_pgdat(folio) should fit here easily. :)
>>
>> But I would actually suggest something like the following in mm/rmap.c
> 
> I am not a big fan of this. Not because I don't like the abstraction,
> but because I think it doesn't go all the way. It abstracts a very
> certain case: updating nr_pmdmapped for file folios.
> 

Right. It only removes some of the ugliness ;)

> I think if we opt for abstracting the stats updates in mm/rmap.c, we
> should go all the way with something like the following (probably split
> as two patches: refactoring then bug fix). WDYT about the below?
> 
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 12be4241474ab..70d6f6309da01 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1269,6 +1269,28 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page,
>   		       page);
>   }
>   
> +static void __foio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped)
> +{
> +	int idx;
> +
> +	if (nr) {
> +		idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
> +		__lruvec_stat_mod_folio(folio, idx, nr);
> +	}
> +	if (nr_pmdmapped) {
> +		if (folio_test_anon(folio)) {
> +			idx = NR_ANON_THPS;
> +			__lruvec_stat_mod_folio(folio, idx, nr_pmdmapped);
> +		} else {
> +			/* NR_*_PMDMAPPED are not maintained per-memcg */
> +			idx = folio_test_swapbacked(folio) ?
> +				NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED;
> +			__mod_node_page_state(folio_pgdat(folio), idx,
> +					      nr_pmdmapped);
> +		}
> +	}
> +}
> +

I didn't suggest that, because in the _anon and _file functions we'll 
end up introducing unnecessary folio_test_anon() checks that the 
compiler cannot optimize out.

But at least in the removal path it's a clear win.
Yosry Ahmed May 6, 2024, 8:41 p.m. UTC | #5
On Mon, May 6, 2024 at 1:31 PM David Hildenbrand <david@redhat.com> wrote:
>
> On 06.05.24 22:18, Yosry Ahmed wrote:
> > On Mon, May 06, 2024 at 09:50:10PM +0200, David Hildenbrand wrote:
> >> On 06.05.24 21:29, Yosry Ahmed wrote:
> >>> Previously, all NR_VM_EVENT_ITEMS stats were maintained per-memcg,
> >>> although some of those fields are not exposed anywhere. Commit
> >>> 14e0f6c957e39 ("memcg: reduce memory for the lruvec and memcg stats")
> >>> changed this such that we only maintain the stats we actually expose
> >>> per-memcg via a translation table.
> >>>
> >>> Additionally, commit 514462bbe927b ("memcg: warn for unexpected events
> >>> and stats") added a warning if a per-memcg stat update is attempted for
> >>> a stat that is not in the translation table. The warning started firing
> >>> for the NR_{FILE/SHMEM}_PMDMAPPED stat updates in the rmap code. These
> >>> stats are not maintained per-memcg, and hence are not in the translation
> >>> table.
> >>>
> >>> Do not use __lruvec_stat_mod_folio() when updating NR_FILE_PMDMAPPED and
> >>> NR_SHMEM_PMDMAPPED. Use __mod_node_page_state() instead, which updates
> >>> the global per-node stats only.
> >>>
> >>> Reported-by: syzbot+9319a4268a640e26b72b@syzkaller.appspotmail.com
> >>> Closes: https://lore.kernel.org/lkml/0000000000001b9d500617c8b23c@google.com
> >>> Fixes: 514462bbe927 ("memcg: warn for unexpected events and stats")
> >>> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> >>> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> >>> ---
> >>>    mm/rmap.c | 15 +++++++++------
> >>>    1 file changed, 9 insertions(+), 6 deletions(-)
> >>>
> >>> diff --git a/mm/rmap.c b/mm/rmap.c
> >>> index 12be4241474ab..ed7f820369864 100644
> >>> --- a/mm/rmap.c
> >>> +++ b/mm/rmap.c
> >>> @@ -1435,13 +1435,14 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio,
> >>>             struct page *page, int nr_pages, struct vm_area_struct *vma,
> >>>             enum rmap_level level)
> >>>    {
> >>> +   pg_data_t *pgdat = folio_pgdat(folio);
> >>>     int nr, nr_pmdmapped = 0;
> >>>     VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
> >>>     nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
> >>>     if (nr_pmdmapped)
> >>> -           __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
> >>> +           __mod_node_page_state(pgdat, folio_test_swapbacked(folio) ?
> >>>                     NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
> >>>     if (nr)
> >>>             __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
> >>> @@ -1493,6 +1494,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
> >>>             enum rmap_level level)
> >>>    {
> >>>     atomic_t *mapped = &folio->_nr_pages_mapped;
> >>> +   pg_data_t *pgdat = folio_pgdat(folio);
> >>>     int last, nr = 0, nr_pmdmapped = 0;
> >>>     bool partially_mapped = false;
> >>>     enum node_stat_item idx;
> >>> @@ -1540,13 +1542,14 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
> >>>     }
> >>>     if (nr_pmdmapped) {
> >>> +           /* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */
> >>>             if (folio_test_anon(folio))
> >>> -                   idx = NR_ANON_THPS;
> >>> -           else if (folio_test_swapbacked(folio))
> >>> -                   idx = NR_SHMEM_PMDMAPPED;
> >>> +                   __lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped);
> >>>             else
> >>> -                   idx = NR_FILE_PMDMAPPED;
> >>> -           __lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
> >>> +                   __mod_node_page_state(pgdat,
> >>
> >> folio_pgdat(folio) should fit here easily. :)
> >>
> >> But I would actually suggest something like the following in mm/rmap.c
> >
> > I am not a big fan of this. Not because I don't like the abstraction,
> > but because I think it doesn't go all the way. It abstracts a very
> > certain case: updating nr_pmdmapped for file folios.
> >
>
> Right. It only removes some of the ugliness ;)

I think if we do this we just add one unnecessary layer of indirection
to one case. If anything people will wonder why we have a helper only
for this case. Just my 2c :)

>
> > I think if we opt for abstracting the stats updates in mm/rmap.c, we
> > should go all the way with something like the following (probably split
> > as two patches: refactoring then bug fix). WDYT about the below?
> >
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index 12be4241474ab..70d6f6309da01 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -1269,6 +1269,28 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page,
> >                      page);
> >   }
> >
> > +static void __foio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped)
> > +{
> > +     int idx;
> > +
> > +     if (nr) {
> > +             idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
> > +             __lruvec_stat_mod_folio(folio, idx, nr);
> > +     }
> > +     if (nr_pmdmapped) {
> > +             if (folio_test_anon(folio)) {
> > +                     idx = NR_ANON_THPS;
> > +                     __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped);
> > +             } else {
> > +                     /* NR_*_PMDMAPPED are not maintained per-memcg */
> > +                     idx = folio_test_swapbacked(folio) ?
> > +                             NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED;
> > +                     __mod_node_page_state(folio_pgdat(folio), idx,
> > +                                           nr_pmdmapped);
> > +             }
> > +     }
> > +}
> > +
>
> I didn't suggest that, because in the _anon and _file functions we'll
> end up introducing unnecessary folio_test_anon() checks that the
> compiler cannot optimize out.

I convinced myself that the folio_test_anon() will be #free because
the struct folio should be already in the cache at this point, of
course I may be delusional :)

We can pass in an @anon boolean parameter, but it becomes an ugliness
tradeoff at this point :)

Anyway, I don't feel strongly either way. I am fine with keeping the
patch as-is, the diff I proposed above, or the diff I proposed with an
@anon parameter of folio_test_anon(). The only option I don't really
like is adding a helper just for the file pmdmapped case.

>
> But at least in the removal path it's a clear win.
>
> --
> Cheers,
>
> David / dhildenb
>
David Hildenbrand May 6, 2024, 8:53 p.m. UTC | #6
On 06.05.24 22:41, Yosry Ahmed wrote:
> On Mon, May 6, 2024 at 1:31 PM David Hildenbrand <david@redhat.com> wrote:
>>
>> On 06.05.24 22:18, Yosry Ahmed wrote:
>>> On Mon, May 06, 2024 at 09:50:10PM +0200, David Hildenbrand wrote:
>>>> On 06.05.24 21:29, Yosry Ahmed wrote:
>>>>> Previously, all NR_VM_EVENT_ITEMS stats were maintained per-memcg,
>>>>> although some of those fields are not exposed anywhere. Commit
>>>>> 14e0f6c957e39 ("memcg: reduce memory for the lruvec and memcg stats")
>>>>> changed this such that we only maintain the stats we actually expose
>>>>> per-memcg via a translation table.
>>>>>
>>>>> Additionally, commit 514462bbe927b ("memcg: warn for unexpected events
>>>>> and stats") added a warning if a per-memcg stat update is attempted for
>>>>> a stat that is not in the translation table. The warning started firing
>>>>> for the NR_{FILE/SHMEM}_PMDMAPPED stat updates in the rmap code. These
>>>>> stats are not maintained per-memcg, and hence are not in the translation
>>>>> table.
>>>>>
>>>>> Do not use __lruvec_stat_mod_folio() when updating NR_FILE_PMDMAPPED and
>>>>> NR_SHMEM_PMDMAPPED. Use __mod_node_page_state() instead, which updates
>>>>> the global per-node stats only.
>>>>>
>>>>> Reported-by: syzbot+9319a4268a640e26b72b@syzkaller.appspotmail.com
>>>>> Closes: https://lore.kernel.org/lkml/0000000000001b9d500617c8b23c@google.com
>>>>> Fixes: 514462bbe927 ("memcg: warn for unexpected events and stats")
>>>>> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
>>>>> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
>>>>> ---
>>>>>     mm/rmap.c | 15 +++++++++------
>>>>>     1 file changed, 9 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/mm/rmap.c b/mm/rmap.c
>>>>> index 12be4241474ab..ed7f820369864 100644
>>>>> --- a/mm/rmap.c
>>>>> +++ b/mm/rmap.c
>>>>> @@ -1435,13 +1435,14 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio,
>>>>>              struct page *page, int nr_pages, struct vm_area_struct *vma,
>>>>>              enum rmap_level level)
>>>>>     {
>>>>> +   pg_data_t *pgdat = folio_pgdat(folio);
>>>>>      int nr, nr_pmdmapped = 0;
>>>>>      VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
>>>>>      nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
>>>>>      if (nr_pmdmapped)
>>>>> -           __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
>>>>> +           __mod_node_page_state(pgdat, folio_test_swapbacked(folio) ?
>>>>>                      NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
>>>>>      if (nr)
>>>>>              __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
>>>>> @@ -1493,6 +1494,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
>>>>>              enum rmap_level level)
>>>>>     {
>>>>>      atomic_t *mapped = &folio->_nr_pages_mapped;
>>>>> +   pg_data_t *pgdat = folio_pgdat(folio);
>>>>>      int last, nr = 0, nr_pmdmapped = 0;
>>>>>      bool partially_mapped = false;
>>>>>      enum node_stat_item idx;
>>>>> @@ -1540,13 +1542,14 @@ static __always_inline void __folio_remove_rmap(struct folio *folio,
>>>>>      }
>>>>>      if (nr_pmdmapped) {
>>>>> +           /* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */
>>>>>              if (folio_test_anon(folio))
>>>>> -                   idx = NR_ANON_THPS;
>>>>> -           else if (folio_test_swapbacked(folio))
>>>>> -                   idx = NR_SHMEM_PMDMAPPED;
>>>>> +                   __lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped);
>>>>>              else
>>>>> -                   idx = NR_FILE_PMDMAPPED;
>>>>> -           __lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
>>>>> +                   __mod_node_page_state(pgdat,
>>>>
>>>> folio_pgdat(folio) should fit here easily. :)
>>>>
>>>> But I would actually suggest something like the following in mm/rmap.c
>>>
>>> I am not a big fan of this. Not because I don't like the abstraction,
>>> but because I think it doesn't go all the way. It abstracts a very
>>> certain case: updating nr_pmdmapped for file folios.
>>>
>>
>> Right. It only removes some of the ugliness ;)
> 
> I think if we do this we just add one unnecessary layer of indirection
> to one case. If anything people will wonder why we have a helper only
> for this case. Just my 2c :)
> 
>>
>>> I think if we opt for abstracting the stats updates in mm/rmap.c, we
>>> should go all the way with something like the following (probably split
>>> as two patches: refactoring then bug fix). WDYT about the below?
>>>
>>> diff --git a/mm/rmap.c b/mm/rmap.c
>>> index 12be4241474ab..70d6f6309da01 100644
>>> --- a/mm/rmap.c
>>> +++ b/mm/rmap.c
>>> @@ -1269,6 +1269,28 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page,
>>>                       page);
>>>    }
>>>
>>> +static void __foio_mod_stat(struct folio *folio, int nr, int nr_pmdmapped)
>>> +{
>>> +     int idx;
>>> +
>>> +     if (nr) {
>>> +             idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;
>>> +             __lruvec_stat_mod_folio(folio, idx, nr);
>>> +     }
>>> +     if (nr_pmdmapped) {
>>> +             if (folio_test_anon(folio)) {
>>> +                     idx = NR_ANON_THPS;
>>> +                     __lruvec_stat_mod_folio(folio, idx, nr_pmdmapped);
>>> +             } else {
>>> +                     /* NR_*_PMDMAPPED are not maintained per-memcg */
>>> +                     idx = folio_test_swapbacked(folio) ?
>>> +                             NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED;
>>> +                     __mod_node_page_state(folio_pgdat(folio), idx,
>>> +                                           nr_pmdmapped);
>>> +             }
>>> +     }
>>> +}
>>> +
>>
>> I didn't suggest that, because in the _anon and _file functions we'll
>> end up introducing unnecessary folio_test_anon() checks that the
>> compiler cannot optimize out.
> 
> I convinced myself that the folio_test_anon() will be #free because
> the struct folio should be already in the cache at this point, of
> course I may be delusional :)

Well, nothing is free :) But yes, likely it won't matter much.

> 
> We can pass in an @anon boolean parameter, but it becomes an ugliness
> tradeoff at this point :)

Agreed.

> 
> Anyway, I don't feel strongly either way. I am fine with keeping the
> patch as-is, the diff I proposed above, or the diff I proposed with an
> @anon parameter of folio_test_anon(). The only option I don't really
> like is adding a helper just for the file pmdmapped case.

Let's go with your cleanup here (could do as separate cleanup on top of 
v2, whatever you prefer).
diff mbox series

Patch

diff --git a/mm/rmap.c b/mm/rmap.c
index 12be4241474ab..ed7f820369864 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1435,13 +1435,14 @@  static __always_inline void __folio_add_file_rmap(struct folio *folio,
 		struct page *page, int nr_pages, struct vm_area_struct *vma,
 		enum rmap_level level)
 {
+	pg_data_t *pgdat = folio_pgdat(folio);
 	int nr, nr_pmdmapped = 0;
 
 	VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
 
 	nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
 	if (nr_pmdmapped)
-		__lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
+		__mod_node_page_state(pgdat, folio_test_swapbacked(folio) ?
 			NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
 	if (nr)
 		__lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
@@ -1493,6 +1494,7 @@  static __always_inline void __folio_remove_rmap(struct folio *folio,
 		enum rmap_level level)
 {
 	atomic_t *mapped = &folio->_nr_pages_mapped;
+	pg_data_t *pgdat = folio_pgdat(folio);
 	int last, nr = 0, nr_pmdmapped = 0;
 	bool partially_mapped = false;
 	enum node_stat_item idx;
@@ -1540,13 +1542,14 @@  static __always_inline void __folio_remove_rmap(struct folio *folio,
 	}
 
 	if (nr_pmdmapped) {
+		/* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */
 		if (folio_test_anon(folio))
-			idx = NR_ANON_THPS;
-		else if (folio_test_swapbacked(folio))
-			idx = NR_SHMEM_PMDMAPPED;
+			__lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped);
 		else
-			idx = NR_FILE_PMDMAPPED;
-		__lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped);
+			__mod_node_page_state(pgdat,
+					folio_test_swapbacked(folio) ?
+					NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED,
+					-nr_pmdmapped);
 	}
 	if (nr) {
 		idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED;