diff mbox series

memcg: vmalloc: simplify MEMCG_VMALLOC updates

Message ID 20250403053326.26860-1-shakeel.butt@linux.dev (mailing list archive)
State New
Headers show
Series memcg: vmalloc: simplify MEMCG_VMALLOC updates | expand

Commit Message

Shakeel Butt April 3, 2025, 5:33 a.m. UTC
The vmalloc region can either be charged to a single memcg or none. At
the moment kernel traverses all the pages backing the vmalloc region to
update the MEMCG_VMALLOC stat. However there is no need to look at all
the pages as all those pages will be charged to a single memcg or none.
Simplify the MEMCG_VMALLOC update by just looking at the first page of
the vmalloc region.

Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
 mm/vmalloc.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

Comments

Michal Hocko April 3, 2025, 7:45 a.m. UTC | #1
On Wed 02-04-25 22:33:26, Shakeel Butt wrote:
> The vmalloc region can either be charged to a single memcg or none. At
> the moment kernel traverses all the pages backing the vmalloc region to
> update the MEMCG_VMALLOC stat. However there is no need to look at all
> the pages as all those pages will be charged to a single memcg or none.
> Simplify the MEMCG_VMALLOC update by just looking at the first page of
> the vmalloc region.

I do not rememeber why this was done on page by page but I suspect
originally we could have mixed more memcgs on one vm.

The patch makes sense.

> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>

Acked-by: Michal Hocko <mhocko@suse.com>

Thanks!

> ---
>  mm/vmalloc.c | 13 +++++--------
>  1 file changed, 5 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 3ed720a787ec..cdae76994488 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -3370,12 +3370,12 @@ void vfree(const void *addr)
>  
>  	if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
>  		vm_reset_perms(vm);
> +	if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES))
> +		mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages);
>  	for (i = 0; i < vm->nr_pages; i++) {
>  		struct page *page = vm->pages[i];
>  
>  		BUG_ON(!page);
> -		if (!(vm->flags & VM_MAP_PUT_PAGES))
> -			mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
>  		/*
>  		 * High-order allocs for huge vmallocs are split, so
>  		 * can be freed as an array of order-0 allocations
> @@ -3671,12 +3671,9 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
>  		node, page_order, nr_small_pages, area->pages);
>  
>  	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
> -	if (gfp_mask & __GFP_ACCOUNT) {
> -		int i;
> -
> -		for (i = 0; i < area->nr_pages; i++)
> -			mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
> -	}
> +	if (gfp_mask & __GFP_ACCOUNT && area->nr_pages)
> +		mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC,
> +				     area->nr_pages);
>  
>  	/*
>  	 * If not enough pages were obtained to accomplish an
> -- 
> 2.47.1
Uladzislau Rezki April 3, 2025, 11:17 a.m. UTC | #2
On Wed, Apr 02, 2025 at 10:33:26PM -0700, Shakeel Butt wrote:
> The vmalloc region can either be charged to a single memcg or none. At
> the moment kernel traverses all the pages backing the vmalloc region to
> update the MEMCG_VMALLOC stat. However there is no need to look at all
> the pages as all those pages will be charged to a single memcg or none.
> Simplify the MEMCG_VMALLOC update by just looking at the first page of
> the vmalloc region.
> 
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> ---
>  mm/vmalloc.c | 13 +++++--------
>  1 file changed, 5 insertions(+), 8 deletions(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 3ed720a787ec..cdae76994488 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -3370,12 +3370,12 @@ void vfree(const void *addr)
>  
>  	if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
>  		vm_reset_perms(vm);
> +	if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES))
> +		mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages);
>
Could you please add a comment stating that the first page should be
modified?

Yes, the comment is clear, but git blame/log takes time.

Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>

--
Uladzislau Rezki
Johannes Weiner April 3, 2025, 4:47 p.m. UTC | #3
On Wed, Apr 02, 2025 at 10:33:26PM -0700, Shakeel Butt wrote:
> The vmalloc region can either be charged to a single memcg or none. At
> the moment kernel traverses all the pages backing the vmalloc region to
> update the MEMCG_VMALLOC stat. However there is no need to look at all
> the pages as all those pages will be charged to a single memcg or none.
> Simplify the MEMCG_VMALLOC update by just looking at the first page of
> the vmalloc region.
> 
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>

It's definitely pointless to handle each page with the stat being
per-cgroup only. But I do wonder why it's not a regular vmstat item.

There is no real reason it *should* be a private memcg stat, is there?
Shakeel Butt April 3, 2025, 6:20 p.m. UTC | #4
On Thu, Apr 03, 2025 at 01:17:22PM +0200, Uladzislau Rezki wrote:
> On Wed, Apr 02, 2025 at 10:33:26PM -0700, Shakeel Butt wrote:
> > The vmalloc region can either be charged to a single memcg or none. At
> > the moment kernel traverses all the pages backing the vmalloc region to
> > update the MEMCG_VMALLOC stat. However there is no need to look at all
> > the pages as all those pages will be charged to a single memcg or none.
> > Simplify the MEMCG_VMALLOC update by just looking at the first page of
> > the vmalloc region.
> > 
> > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> > ---
> >  mm/vmalloc.c | 13 +++++--------
> >  1 file changed, 5 insertions(+), 8 deletions(-)
> > 
> > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > index 3ed720a787ec..cdae76994488 100644
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -3370,12 +3370,12 @@ void vfree(const void *addr)
> >  
> >  	if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
> >  		vm_reset_perms(vm);
> > +	if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES))
> > +		mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages);
> >
> Could you please add a comment stating that the first page should be
> modified?
> 

Sorry, what do you mean by first page should be modified?
mod_memcg_page_state() will not modify the page but extract memcg from
it and modify its vmalloc stat.


> Yes, the comment is clear, but git blame/log takes time.
> 
> Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>

Thanks.

> 
> --
> Uladzislau Rezki
Shakeel Butt April 3, 2025, 6:23 p.m. UTC | #5
On Thu, Apr 03, 2025 at 12:47:41PM -0400, Johannes Weiner wrote:
> On Wed, Apr 02, 2025 at 10:33:26PM -0700, Shakeel Butt wrote:
> > The vmalloc region can either be charged to a single memcg or none. At
> > the moment kernel traverses all the pages backing the vmalloc region to
> > update the MEMCG_VMALLOC stat. However there is no need to look at all
> > the pages as all those pages will be charged to a single memcg or none.
> > Simplify the MEMCG_VMALLOC update by just looking at the first page of
> > the vmalloc region.
> > 
> > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> 
> It's definitely pointless to handle each page with the stat being
> per-cgroup only. But I do wonder why it's not a regular vmstat item.
> 
> There is no real reason it *should* be a private memcg stat, is there?

Yes, it can be a regular vmstat item (enum node_stat_item). However then
we have go over each page as node_stat_item are per-node and vmalloc
region can have pages from different nodes (I think but let me check).
Uladzislau Rezki April 4, 2025, 10:34 a.m. UTC | #6
On Thu, Apr 03, 2025 at 11:20:18AM -0700, Shakeel Butt wrote:
> On Thu, Apr 03, 2025 at 01:17:22PM +0200, Uladzislau Rezki wrote:
> > On Wed, Apr 02, 2025 at 10:33:26PM -0700, Shakeel Butt wrote:
> > > The vmalloc region can either be charged to a single memcg or none. At
> > > the moment kernel traverses all the pages backing the vmalloc region to
> > > update the MEMCG_VMALLOC stat. However there is no need to look at all
> > > the pages as all those pages will be charged to a single memcg or none.
> > > Simplify the MEMCG_VMALLOC update by just looking at the first page of
> > > the vmalloc region.
> > > 
> > > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> > > ---
> > >  mm/vmalloc.c | 13 +++++--------
> > >  1 file changed, 5 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > index 3ed720a787ec..cdae76994488 100644
> > > --- a/mm/vmalloc.c
> > > +++ b/mm/vmalloc.c
> > > @@ -3370,12 +3370,12 @@ void vfree(const void *addr)
> > >  
> > >  	if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
> > >  		vm_reset_perms(vm);
> > > +	if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES))
> > > +		mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages);
> > >
> > Could you please add a comment stating that the first page should be
> > modified?
> > 
> 
> Sorry, what do you mean by first page should be modified?
> mod_memcg_page_state() will not modify the page but extract memcg from
> it and modify its vmalloc stat.
> 
I meant what you wrote in the commit message. A mod_memcg_page_state() can
be invoked only on a first page within a mapped range, because the rest is
anyway is associated with the same mem_cgroup struct.

Just add a comment that we do not need to check all pages. Can you add it?

--
Uladzislau Rezki
Shakeel Butt April 4, 2025, 5:44 p.m. UTC | #7
On Fri, Apr 04, 2025 at 12:34:33PM +0200, Uladzislau Rezki wrote:
> On Thu, Apr 03, 2025 at 11:20:18AM -0700, Shakeel Butt wrote:
> > On Thu, Apr 03, 2025 at 01:17:22PM +0200, Uladzislau Rezki wrote:
> > > On Wed, Apr 02, 2025 at 10:33:26PM -0700, Shakeel Butt wrote:
> > > > The vmalloc region can either be charged to a single memcg or none. At
> > > > the moment kernel traverses all the pages backing the vmalloc region to
> > > > update the MEMCG_VMALLOC stat. However there is no need to look at all
> > > > the pages as all those pages will be charged to a single memcg or none.
> > > > Simplify the MEMCG_VMALLOC update by just looking at the first page of
> > > > the vmalloc region.
> > > > 
> > > > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> > > > ---
> > > >  mm/vmalloc.c | 13 +++++--------
> > > >  1 file changed, 5 insertions(+), 8 deletions(-)
> > > > 
> > > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > > index 3ed720a787ec..cdae76994488 100644
> > > > --- a/mm/vmalloc.c
> > > > +++ b/mm/vmalloc.c
> > > > @@ -3370,12 +3370,12 @@ void vfree(const void *addr)
> > > >  
> > > >  	if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
> > > >  		vm_reset_perms(vm);
> > > > +	if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES))
> > > > +		mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages);
> > > >
> > > Could you please add a comment stating that the first page should be
> > > modified?
> > > 
> > 
> > Sorry, what do you mean by first page should be modified?
> > mod_memcg_page_state() will not modify the page but extract memcg from
> > it and modify its vmalloc stat.
> > 
> I meant what you wrote in the commit message. A mod_memcg_page_state() can
> be invoked only on a first page within a mapped range, because the rest is
> anyway is associated with the same mem_cgroup struct.
> 
> Just add a comment that we do not need to check all pages. Can you add it?

Ack. Andrew, please squash the following into the patch.


From 982971062e6bd04feabf4f6a745469cb9bddef03 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeel.butt@linux.dev>
Date: Fri, 4 Apr 2025 10:41:52 -0700
Subject: [PATCH] memcg : simplify MEMCG_VMALLOC updates - fix

Add comment

Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
---
 mm/vmalloc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index cdae76994488..bcc90d4357e4 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3370,6 +3370,7 @@ void vfree(const void *addr)
 
 	if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
 		vm_reset_perms(vm);
+	/* All pages of vm should be charged to same memcg, so use first one. */
 	if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES))
 		mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages);
 	for (i = 0; i < vm->nr_pages; i++) {
@@ -3671,6 +3672,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		node, page_order, nr_small_pages, area->pages);
 
 	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
+	/* All pages of vm should be charged to same memcg, so use first one. */
 	if (gfp_mask & __GFP_ACCOUNT && area->nr_pages)
 		mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC,
 				     area->nr_pages);
diff mbox series

Patch

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 3ed720a787ec..cdae76994488 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3370,12 +3370,12 @@  void vfree(const void *addr)
 
 	if (unlikely(vm->flags & VM_FLUSH_RESET_PERMS))
 		vm_reset_perms(vm);
+	if (vm->nr_pages && !(vm->flags & VM_MAP_PUT_PAGES))
+		mod_memcg_page_state(vm->pages[0], MEMCG_VMALLOC, -vm->nr_pages);
 	for (i = 0; i < vm->nr_pages; i++) {
 		struct page *page = vm->pages[i];
 
 		BUG_ON(!page);
-		if (!(vm->flags & VM_MAP_PUT_PAGES))
-			mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
 		/*
 		 * High-order allocs for huge vmallocs are split, so
 		 * can be freed as an array of order-0 allocations
@@ -3671,12 +3671,9 @@  static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		node, page_order, nr_small_pages, area->pages);
 
 	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
-	if (gfp_mask & __GFP_ACCOUNT) {
-		int i;
-
-		for (i = 0; i < area->nr_pages; i++)
-			mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
-	}
+	if (gfp_mask & __GFP_ACCOUNT && area->nr_pages)
+		mod_memcg_page_state(area->pages[0], MEMCG_VMALLOC,
+				     area->nr_pages);
 
 	/*
 	 * If not enough pages were obtained to accomplish an