Message ID | 20250306023133.44838-1-songmuchun@bytedance.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: memcontrol: fix swap counter leak from offline cgroup | expand |
On Thu, Mar 6, 2025 at 10:32 AM Muchun Song <songmuchun@bytedance.com> wrote: > > The commit 6769183166b3 has removed the parameter of id from > swap_cgroup_record() and get the memcg id from > mem_cgroup_id(folio_memcg(folio)). However, the caller of it > may update a different memcg's counter instead of > folio_memcg(folio). E.g. in the caller of mem_cgroup_swapout(), > @swap_memcg could be different with @memcg and update the counter > of @swap_memcg, but swap_cgroup_record() records the wrong memcg's > ID. When it is uncharged from __mem_cgroup_uncharge_swap(), the > swap counter will leak since the wrong recorded ID. Fix it by > bring the parameter of id back. > > Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing") > Cc: <stable@vger.kernel.org> > Signed-off-by: Muchun Song <songmuchun@bytedance.com> > --- > include/linux/swap_cgroup.h | 4 ++-- > mm/memcontrol.c | 4 ++-- > mm/swap_cgroup.c | 7 ++++--- > 3 files changed, 8 insertions(+), 7 deletions(-) > > diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h > index b5ec038069dab..91cdf12190a03 100644 > --- a/include/linux/swap_cgroup.h > +++ b/include/linux/swap_cgroup.h > @@ -6,7 +6,7 @@ > > #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) > > -extern void swap_cgroup_record(struct folio *folio, swp_entry_t ent); > +extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent); > extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents); > extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); > extern int swap_cgroup_swapon(int type, unsigned long max_pages); > @@ -15,7 +15,7 @@ extern void swap_cgroup_swapoff(int type); > #else > > static inline > -void swap_cgroup_record(struct folio *folio, swp_entry_t ent) > +void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent) > { > } > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index a5d870fbb4321..a5ab603806fbb 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -4988,7 +4988,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) > mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); > mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); > > - swap_cgroup_record(folio, entry); > + swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry); > > folio_unqueue_deferred_split(folio); > folio->memcg_data = 0; > @@ -5050,7 +5050,7 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) > mem_cgroup_id_get_many(memcg, nr_pages - 1); > mod_memcg_state(memcg, MEMCG_SWAP, nr_pages); > > - swap_cgroup_record(folio, entry); > + swap_cgroup_record(folio, mem_cgroup_id(memcg), entry); > > return 0; > } > diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c > index be39078f255be..1007c30f12e2c 100644 > --- a/mm/swap_cgroup.c > +++ b/mm/swap_cgroup.c > @@ -58,9 +58,11 @@ static unsigned short __swap_cgroup_id_xchg(struct swap_cgroup *map, > * entries must not have been charged > * > * @folio: the folio that the swap entry belongs to > + * @id: mem_cgroup ID to be recorded > * @ent: the first swap entry to be recorded > */ > -void swap_cgroup_record(struct folio *folio, swp_entry_t ent) > +void swap_cgroup_record(struct folio *folio, unsigned short id, > + swp_entry_t ent) > { > unsigned int nr_ents = folio_nr_pages(folio); > struct swap_cgroup *map; > @@ -72,8 +74,7 @@ void swap_cgroup_record(struct folio *folio, swp_entry_t ent) > map = swap_cgroup_ctrl[swp_type(ent)].map; > > do { > - old = __swap_cgroup_id_xchg(map, offset, > - mem_cgroup_id(folio_memcg(folio))); > + old = __swap_cgroup_id_xchg(map, offset, id); > VM_BUG_ON(old); > } while (++offset != end); > } > -- > 2.20.1 > Good catch, Thanks! Reviewed-by: Kairui Song <kasong@tencent.com>
On Thu, Mar 6, 2025 at 10:54 AM Kairui Song <ryncsn@gmail.com> wrote: > > On Thu, Mar 6, 2025 at 10:32 AM Muchun Song <songmuchun@bytedance.com> wrote: > > > > The commit 6769183166b3 has removed the parameter of id from > > swap_cgroup_record() and get the memcg id from > > mem_cgroup_id(folio_memcg(folio)). However, the caller of it > > may update a different memcg's counter instead of > > folio_memcg(folio). E.g. in the caller of mem_cgroup_swapout(), > > @swap_memcg could be different with @memcg and update the counter > > of @swap_memcg, but swap_cgroup_record() records the wrong memcg's > > ID. When it is uncharged from __mem_cgroup_uncharge_swap(), the > > swap counter will leak since the wrong recorded ID. Fix it by > > bring the parameter of id back. > > > > Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing") > > Cc: <stable@vger.kernel.org> > > Signed-off-by: Muchun Song <songmuchun@bytedance.com> > > --- > > include/linux/swap_cgroup.h | 4 ++-- > > mm/memcontrol.c | 4 ++-- > > mm/swap_cgroup.c | 7 ++++--- > > 3 files changed, 8 insertions(+), 7 deletions(-) > > > > diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h > > index b5ec038069dab..91cdf12190a03 100644 > > --- a/include/linux/swap_cgroup.h > > +++ b/include/linux/swap_cgroup.h > > @@ -6,7 +6,7 @@ > > > > #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) > > > > -extern void swap_cgroup_record(struct folio *folio, swp_entry_t ent); > > +extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent); > > extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents); > > extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); > > extern int swap_cgroup_swapon(int type, unsigned long max_pages); > > @@ -15,7 +15,7 @@ extern void swap_cgroup_swapoff(int type); > > #else > > > > static inline > > -void swap_cgroup_record(struct folio *folio, swp_entry_t ent) > > +void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent) > > { > > } > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > > index a5d870fbb4321..a5ab603806fbb 100644 > > --- a/mm/memcontrol.c > > +++ b/mm/memcontrol.c > > @@ -4988,7 +4988,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) > > mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); > > mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); > > > > - swap_cgroup_record(folio, entry); > > + swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry); > > > > folio_unqueue_deferred_split(folio); > > folio->memcg_data = 0; > > @@ -5050,7 +5050,7 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) > > mem_cgroup_id_get_many(memcg, nr_pages - 1); > > mod_memcg_state(memcg, MEMCG_SWAP, nr_pages); > > > > - swap_cgroup_record(folio, entry); > > + swap_cgroup_record(folio, mem_cgroup_id(memcg), entry); > > > > return 0; > > } > > diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c > > index be39078f255be..1007c30f12e2c 100644 > > --- a/mm/swap_cgroup.c > > +++ b/mm/swap_cgroup.c > > @@ -58,9 +58,11 @@ static unsigned short __swap_cgroup_id_xchg(struct swap_cgroup *map, > > * entries must not have been charged > > * > > * @folio: the folio that the swap entry belongs to > > + * @id: mem_cgroup ID to be recorded > > * @ent: the first swap entry to be recorded > > */ > > -void swap_cgroup_record(struct folio *folio, swp_entry_t ent) > > +void swap_cgroup_record(struct folio *folio, unsigned short id, > > + swp_entry_t ent) > > { > > unsigned int nr_ents = folio_nr_pages(folio); > > struct swap_cgroup *map; > > @@ -72,8 +74,7 @@ void swap_cgroup_record(struct folio *folio, swp_entry_t ent) > > map = swap_cgroup_ctrl[swp_type(ent)].map; > > > > do { > > - old = __swap_cgroup_id_xchg(map, offset, > > - mem_cgroup_id(folio_memcg(folio))); > > + old = __swap_cgroup_id_xchg(map, offset, id); > > VM_BUG_ON(old); > > } while (++offset != end); > > } > > -- > > 2.20.1 > > > > Good catch, Thanks! > > Reviewed-by: Kairui Song <kasong@tencent.com> BTW, it need to be fixed in 6.14, no stable fix is needed, just double checked the commit is not in 6.13.
On Thu, Mar 06, 2025 at 10:54:12AM +0800, Kairui Song wrote: > On Thu, Mar 6, 2025 at 10:32 AM Muchun Song <songmuchun@bytedance.com> wrote: > > > > The commit 6769183166b3 has removed the parameter of id from > > swap_cgroup_record() and get the memcg id from > > mem_cgroup_id(folio_memcg(folio)). However, the caller of it > > may update a different memcg's counter instead of > > folio_memcg(folio). E.g. in the caller of mem_cgroup_swapout(), > > @swap_memcg could be different with @memcg and update the counter > > of @swap_memcg, but swap_cgroup_record() records the wrong memcg's > > ID. When it is uncharged from __mem_cgroup_uncharge_swap(), the > > swap counter will leak since the wrong recorded ID. Fix it by > > bring the parameter of id back. > > > > Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing") > > Cc: <stable@vger.kernel.org> > > Signed-off-by: Muchun Song <songmuchun@bytedance.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Looking at the original commit again, we also should do this: --- From 2685ca87d73d0c2b91cfd6959e381a40db235119 Mon Sep 17 00:00:00 2001 From: Johannes Weiner <hannes@cmpxchg.org> Date: Thu, 6 Mar 2025 09:31:42 -0500 Subject: [PATCH] mm: swap_cgroup: remove double initialization of locals Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing") Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> --- mm/swap_cgroup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c index 1007c30f12e2..de779fed8c21 100644 --- a/mm/swap_cgroup.c +++ b/mm/swap_cgroup.c @@ -92,8 +92,7 @@ void swap_cgroup_record(struct folio *folio, unsigned short id, */ unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents) { - pgoff_t offset = swp_offset(ent); - pgoff_t end = offset + nr_ents; + pgoff_t offset, end; struct swap_cgroup *map; unsigned short old, iter = 0;
> On Mar 6, 2025, at 22:37, Johannes Weiner <hannes@cmpxchg.org> wrote: > > On Thu, Mar 06, 2025 at 10:54:12AM +0800, Kairui Song wrote: >> On Thu, Mar 6, 2025 at 10:32 AM Muchun Song <songmuchun@bytedance.com> wrote: >>> >>> The commit 6769183166b3 has removed the parameter of id from >>> swap_cgroup_record() and get the memcg id from >>> mem_cgroup_id(folio_memcg(folio)). However, the caller of it >>> may update a different memcg's counter instead of >>> folio_memcg(folio). E.g. in the caller of mem_cgroup_swapout(), >>> @swap_memcg could be different with @memcg and update the counter >>> of @swap_memcg, but swap_cgroup_record() records the wrong memcg's >>> ID. When it is uncharged from __mem_cgroup_uncharge_swap(), the >>> swap counter will leak since the wrong recorded ID. Fix it by >>> bring the parameter of id back. >>> >>> Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing") >>> Cc: <stable@vger.kernel.org> >>> Signed-off-by: Muchun Song <songmuchun@bytedance.com> > > Acked-by: Johannes Weiner <hannes@cmpxchg.org> > > Looking at the original commit again, we also should do this: > > --- > > From 2685ca87d73d0c2b91cfd6959e381a40db235119 Mon Sep 17 00:00:00 2001 > From: Johannes Weiner <hannes@cmpxchg.org> > Date: Thu, 6 Mar 2025 09:31:42 -0500 > Subject: [PATCH] mm: swap_cgroup: remove double initialization of locals > > Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing") > Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Yes. Reviewed-by: Muchun Song <muchun.song@linux.dev> Thanks. > --- > mm/swap_cgroup.c | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c > index 1007c30f12e2..de779fed8c21 100644 > --- a/mm/swap_cgroup.c > +++ b/mm/swap_cgroup.c > @@ -92,8 +92,7 @@ void swap_cgroup_record(struct folio *folio, unsigned short id, > */ > unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents) > { > - pgoff_t offset = swp_offset(ent); > - pgoff_t end = offset + nr_ents; > + pgoff_t offset, end; > struct swap_cgroup *map; > unsigned short old, iter = 0; > > -- > 2.48.1
diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h index b5ec038069dab..91cdf12190a03 100644 --- a/include/linux/swap_cgroup.h +++ b/include/linux/swap_cgroup.h @@ -6,7 +6,7 @@ #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) -extern void swap_cgroup_record(struct folio *folio, swp_entry_t ent); +extern void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent); extern unsigned short swap_cgroup_clear(swp_entry_t ent, unsigned int nr_ents); extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent); extern int swap_cgroup_swapon(int type, unsigned long max_pages); @@ -15,7 +15,7 @@ extern void swap_cgroup_swapoff(int type); #else static inline -void swap_cgroup_record(struct folio *folio, swp_entry_t ent) +void swap_cgroup_record(struct folio *folio, unsigned short id, swp_entry_t ent) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a5d870fbb4321..a5ab603806fbb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4988,7 +4988,7 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); - swap_cgroup_record(folio, entry); + swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry); folio_unqueue_deferred_split(folio); folio->memcg_data = 0; @@ -5050,7 +5050,7 @@ int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry) mem_cgroup_id_get_many(memcg, nr_pages - 1); mod_memcg_state(memcg, MEMCG_SWAP, nr_pages); - swap_cgroup_record(folio, entry); + swap_cgroup_record(folio, mem_cgroup_id(memcg), entry); return 0; } diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c index be39078f255be..1007c30f12e2c 100644 --- a/mm/swap_cgroup.c +++ b/mm/swap_cgroup.c @@ -58,9 +58,11 @@ static unsigned short __swap_cgroup_id_xchg(struct swap_cgroup *map, * entries must not have been charged * * @folio: the folio that the swap entry belongs to + * @id: mem_cgroup ID to be recorded * @ent: the first swap entry to be recorded */ -void swap_cgroup_record(struct folio *folio, swp_entry_t ent) +void swap_cgroup_record(struct folio *folio, unsigned short id, + swp_entry_t ent) { unsigned int nr_ents = folio_nr_pages(folio); struct swap_cgroup *map; @@ -72,8 +74,7 @@ void swap_cgroup_record(struct folio *folio, swp_entry_t ent) map = swap_cgroup_ctrl[swp_type(ent)].map; do { - old = __swap_cgroup_id_xchg(map, offset, - mem_cgroup_id(folio_memcg(folio))); + old = __swap_cgroup_id_xchg(map, offset, id); VM_BUG_ON(old); } while (++offset != end); }
The commit 6769183166b3 has removed the parameter of id from swap_cgroup_record() and get the memcg id from mem_cgroup_id(folio_memcg(folio)). However, the caller of it may update a different memcg's counter instead of folio_memcg(folio). E.g. in the caller of mem_cgroup_swapout(), @swap_memcg could be different with @memcg and update the counter of @swap_memcg, but swap_cgroup_record() records the wrong memcg's ID. When it is uncharged from __mem_cgroup_uncharge_swap(), the swap counter will leak since the wrong recorded ID. Fix it by bring the parameter of id back. Fixes: 6769183166b3 ("mm/swap_cgroup: decouple swap cgroup recording and clearing") Cc: <stable@vger.kernel.org> Signed-off-by: Muchun Song <songmuchun@bytedance.com> --- include/linux/swap_cgroup.h | 4 ++-- mm/memcontrol.c | 4 ++-- mm/swap_cgroup.c | 7 ++++--- 3 files changed, 8 insertions(+), 7 deletions(-)