diff mbox series

mm: memcontrol: fix swap counter leak from offline cgroup

Message ID 20250306023133.44838-1-songmuchun@bytedance.com (mailing list archive)
State New
Headers show
Series mm: memcontrol: fix swap counter leak from offline cgroup | expand

Commit Message

Muchun Song March 6, 2025, 2:31 a.m. UTC
The commit 6769183166b3 has removed the parameter of id from
swap_cgroup_record() and get the memcg id from
mem_cgroup_id(folio_memcg(folio)). However, the caller of it
may update a different memcg's counter instead of
folio_memcg(folio). E.g. in the caller of mem_cgroup_swapout(),
@swap_memcg could be different with @memcg and update the counter
of @swap_memcg, but swap_cgroup_record() records the wrong memcg's
ID. When it is uncharged from __mem_cgroup_uncharge_swap(), the
swap counter will leak since the wrong recorded ID. Fix it by
bring the parameter of id back.

Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing")
Cc: <stable@vger.kernel.org>
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 include/linux/swap_cgroup.h | 4 ++--
 mm/memcontrol.c             | 4 ++--
 mm/swap_cgroup.c            | 7 ++++---
 3 files changed, 8 insertions(+), 7 deletions(-)

Comments

Kairui Song March 6, 2025, 2:54 a.m. UTC | #1
On Thu, Mar 6, 2025 at 10:32 AM Muchun Song <songmuchun@bytedance.com> wrote:
>
> The commit 6769183166b3 has removed the parameter of id from
> swap_cgroup_record() and get the memcg id from
> mem_cgroup_id(folio_memcg(folio)). However, the caller of it
> may update a different memcg's counter instead of
> folio_memcg(folio). E.g. in the caller of mem_cgroup_swapout(),
> @swap_memcg could be different with @memcg and update the counter
> of @swap_memcg, but swap_cgroup_record() records the wrong memcg's
> ID. When it is uncharged from __mem_cgroup_uncharge_swap(), the
> swap counter will leak since the wrong recorded ID. Fix it by
> bring the parameter of id back.
>
> Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing")
> Cc: <stable@vger.kernel.org>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---
>  include/linux/swap_cgroup.h | 4 ++--
>  mm/memcontrol.c             | 4 ++--
>  mm/swap_cgroup.c            | 7 ++++---
>  3 files changed, 8 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h
> index b5ec038069dab..91cdf12190a03 100644
> --- a/include/linux/swap_cgroup.h
> +++ b/include/linux/swap_cgroup.h
> @@ -6,7 +6,7 @@
>
>  #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
>
> -extern void swap_cgroup_record(struct folio *folio, swp_entry_t ent);
> +extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent);
>  extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents);
>  extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
>  extern int swap_cgroup_swapon(int type, unsigned long max_pages);
> @@ -15,7 +15,7 @@ extern void swap_cgroup_swapoff(int type);
>  #else
>
>  static inline
> -void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
> +void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent)
>  {
>  }
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index a5d870fbb4321..a5ab603806fbb 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4988,7 +4988,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
>                 mem_cgroup_id_get_many(swap_memcg, nr_entries - 1);
>         mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
>
> -       swap_cgroup_record(folio, entry);
> +       swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry);
>
>         folio_unqueue_deferred_split(folio);
>         folio->memcg_data = 0;
> @@ -5050,7 +5050,7 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
>                 mem_cgroup_id_get_many(memcg, nr_pages - 1);
>         mod_memcg_state(memcg, MEMCG_SWAP, nr_pages);
>
> -       swap_cgroup_record(folio, entry);
> +       swap_cgroup_record(folio, mem_cgroup_id(memcg), entry);
>
>         return 0;
>  }
> diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
> index be39078f255be..1007c30f12e2c 100644
> --- a/mm/swap_cgroup.c
> +++ b/mm/swap_cgroup.c
> @@ -58,9 +58,11 @@ static unsigned short __swap_cgroup_id_xchg(struct swap_cgroup *map,
>   * entries must not have been charged
>   *
>   * @folio: the folio that the swap entry belongs to
> + * @id: mem_cgroup ID to be recorded
>   * @ent: the first swap entry to be recorded
>   */
> -void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
> +void swap_cgroup_record(struct folio *folio, unsigned short id,
> +                       swp_entry_t ent)
>  {
>         unsigned int nr_ents = folio_nr_pages(folio);
>         struct swap_cgroup *map;
> @@ -72,8 +74,7 @@ void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
>         map = swap_cgroup_ctrl[swp_type(ent)].map;
>
>         do {
> -               old = __swap_cgroup_id_xchg(map, offset,
> -                                           mem_cgroup_id(folio_memcg(folio)));
> +               old = __swap_cgroup_id_xchg(map, offset, id);
>                 VM_BUG_ON(old);
>         } while (++offset != end);
>  }
> --
> 2.20.1
>

Good catch, Thanks!

Reviewed-by: Kairui Song <kasong@tencent.com>
Kairui Song March 6, 2025, 3:04 a.m. UTC | #2
On Thu, Mar 6, 2025 at 10:54 AM Kairui Song <ryncsn@gmail.com> wrote:
>
> On Thu, Mar 6, 2025 at 10:32 AM Muchun Song <songmuchun@bytedance.com> wrote:
> >
> > The commit 6769183166b3 has removed the parameter of id from
> > swap_cgroup_record() and get the memcg id from
> > mem_cgroup_id(folio_memcg(folio)). However, the caller of it
> > may update a different memcg's counter instead of
> > folio_memcg(folio). E.g. in the caller of mem_cgroup_swapout(),
> > @swap_memcg could be different with @memcg and update the counter
> > of @swap_memcg, but swap_cgroup_record() records the wrong memcg's
> > ID. When it is uncharged from __mem_cgroup_uncharge_swap(), the
> > swap counter will leak since the wrong recorded ID. Fix it by
> > bring the parameter of id back.
> >
> > Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing")
> > Cc: <stable@vger.kernel.org>
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > ---
> >  include/linux/swap_cgroup.h | 4 ++--
> >  mm/memcontrol.c             | 4 ++--
> >  mm/swap_cgroup.c            | 7 ++++---
> >  3 files changed, 8 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h
> > index b5ec038069dab..91cdf12190a03 100644
> > --- a/include/linux/swap_cgroup.h
> > +++ b/include/linux/swap_cgroup.h
> > @@ -6,7 +6,7 @@
> >
> >  #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
> >
> > -extern void swap_cgroup_record(struct folio *folio, swp_entry_t ent);
> > +extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent);
> >  extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents);
> >  extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
> >  extern int swap_cgroup_swapon(int type, unsigned long max_pages);
> > @@ -15,7 +15,7 @@ extern void swap_cgroup_swapoff(int type);
> >  #else
> >
> >  static inline
> > -void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
> > +void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent)
> >  {
> >  }
> >
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index a5d870fbb4321..a5ab603806fbb 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -4988,7 +4988,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
> >                 mem_cgroup_id_get_many(swap_memcg, nr_entries - 1);
> >         mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
> >
> > -       swap_cgroup_record(folio, entry);
> > +       swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry);
> >
> >         folio_unqueue_deferred_split(folio);
> >         folio->memcg_data = 0;
> > @@ -5050,7 +5050,7 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
> >                 mem_cgroup_id_get_many(memcg, nr_pages - 1);
> >         mod_memcg_state(memcg, MEMCG_SWAP, nr_pages);
> >
> > -       swap_cgroup_record(folio, entry);
> > +       swap_cgroup_record(folio, mem_cgroup_id(memcg), entry);
> >
> >         return 0;
> >  }
> > diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
> > index be39078f255be..1007c30f12e2c 100644
> > --- a/mm/swap_cgroup.c
> > +++ b/mm/swap_cgroup.c
> > @@ -58,9 +58,11 @@ static unsigned short __swap_cgroup_id_xchg(struct swap_cgroup *map,
> >   * entries must not have been charged
> >   *
> >   * @folio: the folio that the swap entry belongs to
> > + * @id: mem_cgroup ID to be recorded
> >   * @ent: the first swap entry to be recorded
> >   */
> > -void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
> > +void swap_cgroup_record(struct folio *folio, unsigned short id,
> > +                       swp_entry_t ent)
> >  {
> >         unsigned int nr_ents = folio_nr_pages(folio);
> >         struct swap_cgroup *map;
> > @@ -72,8 +74,7 @@ void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
> >         map = swap_cgroup_ctrl[swp_type(ent)].map;
> >
> >         do {
> > -               old = __swap_cgroup_id_xchg(map, offset,
> > -                                           mem_cgroup_id(folio_memcg(folio)));
> > +               old = __swap_cgroup_id_xchg(map, offset, id);
> >                 VM_BUG_ON(old);
> >         } while (++offset != end);
> >  }
> > --
> > 2.20.1
> >
>
> Good catch, Thanks!
>
> Reviewed-by: Kairui Song <kasong@tencent.com>

BTW, it need to be fixed in 6.14, no stable fix is needed, just double
checked the commit is not in 6.13.
Johannes Weiner March 6, 2025, 2:37 p.m. UTC | #3
On Thu, Mar 06, 2025 at 10:54:12AM +0800, Kairui Song wrote:
> On Thu, Mar 6, 2025 at 10:32 AM Muchun Song <songmuchun@bytedance.com> wrote:
> >
> > The commit 6769183166b3 has removed the parameter of id from
> > swap_cgroup_record() and get the memcg id from
> > mem_cgroup_id(folio_memcg(folio)). However, the caller of it
> > may update a different memcg's counter instead of
> > folio_memcg(folio). E.g. in the caller of mem_cgroup_swapout(),
> > @swap_memcg could be different with @memcg and update the counter
> > of @swap_memcg, but swap_cgroup_record() records the wrong memcg's
> > ID. When it is uncharged from __mem_cgroup_uncharge_swap(), the
> > swap counter will leak since the wrong recorded ID. Fix it by
> > bring the parameter of id back.
> >
> > Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing")
> > Cc: <stable@vger.kernel.org>
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

Looking at the original commit again, we also should do this:

---

From 2685ca87d73d0c2b91cfd6959e381a40db235119 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 6 Mar 2025 09:31:42 -0500
Subject: [PATCH] mm: swap_cgroup: remove double initialization of locals

Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 mm/swap_cgroup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index 1007c30f12e2..de779fed8c21 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -92,8 +92,7 @@ void swap_cgroup_record(struct folio *folio, unsigned short id,
  */
 unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents)
 {
-	pgoff_t offset = swp_offset(ent);
-	pgoff_t end = offset + nr_ents;
+	pgoff_t offset, end;
 	struct swap_cgroup *map;
 	unsigned short old, iter = 0;
Muchun Song March 7, 2025, 2:28 a.m. UTC | #4
> On Mar 6, 2025, at 22:37, Johannes Weiner <hannes@cmpxchg.org> wrote:
> 
> On Thu, Mar 06, 2025 at 10:54:12AM +0800, Kairui Song wrote:
>> On Thu, Mar 6, 2025 at 10:32 AM Muchun Song <songmuchun@bytedance.com> wrote:
>>> 
>>> The commit 6769183166b3 has removed the parameter of id from
>>> swap_cgroup_record() and get the memcg id from
>>> mem_cgroup_id(folio_memcg(folio)). However, the caller of it
>>> may update a different memcg's counter instead of
>>> folio_memcg(folio). E.g. in the caller of mem_cgroup_swapout(),
>>> @swap_memcg could be different with @memcg and update the counter
>>> of @swap_memcg, but swap_cgroup_record() records the wrong memcg's
>>> ID. When it is uncharged from __mem_cgroup_uncharge_swap(), the
>>> swap counter will leak since the wrong recorded ID. Fix it by
>>> bring the parameter of id back.
>>> 
>>> Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing")
>>> Cc: <stable@vger.kernel.org>
>>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> 
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> 
> Looking at the original commit again, we also should do this:
> 
> ---
> 
> From 2685ca87d73d0c2b91cfd6959e381a40db235119 Mon Sep 17 00:00:00 2001
> From: Johannes Weiner <hannes@cmpxchg.org>
> Date: Thu, 6 Mar 2025 09:31:42 -0500
> Subject: [PATCH] mm: swap_cgroup: remove double initialization of locals
> 
> Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing")
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>

Yes.

Reviewed-by: Muchun Song <muchun.song@linux.dev>

Thanks.

> ---
> mm/swap_cgroup.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
> index 1007c30f12e2..de779fed8c21 100644
> --- a/mm/swap_cgroup.c
> +++ b/mm/swap_cgroup.c
> @@ -92,8 +92,7 @@ void swap_cgroup_record(struct folio *folio, unsigned short id,
>  */
> unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents)
> {
> - 	pgoff_t offset = swp_offset(ent);
> - 	pgoff_t end = offset + nr_ents;
> + 	pgoff_t offset, end;
> 	struct swap_cgroup *map;
> 	unsigned short old, iter = 0;
> 
> -- 
> 2.48.1
diff mbox series

Patch

diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h
index b5ec038069dab..91cdf12190a03 100644
--- a/include/linux/swap_cgroup.h
+++ b/include/linux/swap_cgroup.h
@@ -6,7 +6,7 @@ 
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
 
-extern void swap_cgroup_record(struct folio *folio, swp_entry_t ent);
+extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent);
 extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents);
 extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
@@ -15,7 +15,7 @@  extern void swap_cgroup_swapoff(int type);
 #else
 
 static inline
-void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
+void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a5d870fbb4321..a5ab603806fbb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4988,7 +4988,7 @@  void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
 		mem_cgroup_id_get_many(swap_memcg, nr_entries - 1);
 	mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries);
 
-	swap_cgroup_record(folio, entry);
+	swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry);
 
 	folio_unqueue_deferred_split(folio);
 	folio->memcg_data = 0;
@@ -5050,7 +5050,7 @@  int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
 		mem_cgroup_id_get_many(memcg, nr_pages - 1);
 	mod_memcg_state(memcg, MEMCG_SWAP, nr_pages);
 
-	swap_cgroup_record(folio, entry);
+	swap_cgroup_record(folio, mem_cgroup_id(memcg), entry);
 
 	return 0;
 }
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index be39078f255be..1007c30f12e2c 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -58,9 +58,11 @@  static unsigned short __swap_cgroup_id_xchg(struct swap_cgroup *map,
  * entries must not have been charged
  *
  * @folio: the folio that the swap entry belongs to
+ * @id: mem_cgroup ID to be recorded
  * @ent: the first swap entry to be recorded
  */
-void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
+void swap_cgroup_record(struct folio *folio, unsigned short id,
+			swp_entry_t ent)
 {
 	unsigned int nr_ents = folio_nr_pages(folio);
 	struct swap_cgroup *map;
@@ -72,8 +74,7 @@  void swap_cgroup_record(struct folio *folio, swp_entry_t ent)
 	map = swap_cgroup_ctrl[swp_type(ent)].map;
 
 	do {
-		old = __swap_cgroup_id_xchg(map, offset,
-					    mem_cgroup_id(folio_memcg(folio)));
+		old = __swap_cgroup_id_xchg(map, offset, id);
 		VM_BUG_ON(old);
 	} while (++offset != end);
 }