diff mbox series

[V2] mm/memcontrol: add per-memcg pgpgin/pswpin counter

Message ID 20240913084453.3605621-1-jingxiangzeng.cas@gmail.com (mailing list archive)
State New
Headers show
Series [V2] mm/memcontrol: add per-memcg pgpgin/pswpin counter | expand

Commit Message

Jingxiang Zeng Sept. 13, 2024, 8:44 a.m. UTC
From: Jingxiang Zeng <linuszeng@tencent.com>

In proactive memory reclamation scenarios, it is necessary to estimate the
pswpin and pswpout metrics of the cgroup to determine whether to continue
reclaiming anonymous pages in the current batch.  This patch will collect
these metrics and expose them.

Link: https://lkml.kernel.org/r/20240830082244.156923-1-jingxiangzeng.cas@gmail.com
Signed-off-by: Jingxiang Zeng <linuszeng@tencent.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memcontrol.c | 2 ++
 mm/page_io.c    | 4 ++++
 2 files changed, 6 insertions(+)

Comments

Andrew Morton Sept. 16, 2024, 7:41 a.m. UTC | #1
On Fri, 13 Sep 2024 16:44:53 +0800 Jingxiang Zeng <jingxiangzeng.cas@gmail.com> wrote:

> From: Jingxiang Zeng <linuszeng@tencent.com>
> 
> In proactive memory reclamation scenarios, it is necessary to estimate the
> pswpin and pswpout metrics of the cgroup to determine whether to continue
> reclaiming anonymous pages in the current batch.  This patch will collect
> these metrics and expose them.
> 

Please explain the differences between v1 and v2:

--- a/mm/memcontrol-v1.c~mm-memcontrol-add-per-memcg-pgpgin-pswpin-counter-v2
+++ a/mm/memcontrol-v1.c
@@ -2729,8 +2729,6 @@ static const char *const memcg1_stat_nam
 static const unsigned int memcg1_events[] = {
 	PGPGIN,
 	PGPGOUT,
-	PSWPIN,
-	PSWPOUT,
 	PGFAULT,
 	PGMAJFAULT,
 };
Jingxiang Zeng Sept. 21, 2024, 7:51 a.m. UTC | #2
On Mon, 16 Sept 2024 at 15:41, Andrew Morton <akpm@linux-foundation.org> wrote:
>
> On Fri, 13 Sep 2024 16:44:53 +0800 Jingxiang Zeng <jingxiangzeng.cas@gmail.com> wrote:
>
> > From: Jingxiang Zeng <linuszeng@tencent.com>
> >
> > In proactive memory reclamation scenarios, it is necessary to estimate the
> > pswpin and pswpout metrics of the cgroup to determine whether to continue
> > reclaiming anonymous pages in the current batch.  This patch will collect
> > these metrics and expose them.
> >
>
> Please explain the differences between v1 and v2:

Currently, the PSWPIN and PSWPOUT fields are only used in proactive memory
reclamation scenarios, but memory.reclaim is only exposed in cgroup v2, so
here we simply delete these fields in cgroup v1.
>
> --- a/mm/memcontrol-v1.c~mm-memcontrol-add-per-memcg-pgpgin-pswpin-counter-v2
> +++ a/mm/memcontrol-v1.c
> @@ -2729,8 +2729,6 @@ static const char *const memcg1_stat_nam
>  static const unsigned int memcg1_events[] = {
>         PGPGIN,
>         PGPGOUT,
> -       PSWPIN,
> -       PSWPOUT,
>         PGFAULT,
>         PGMAJFAULT,
>  };
> _
>
Nhat Pham Oct. 11, 2024, 8:01 p.m. UTC | #3
On Fri, Sep 13, 2024 at 8:21 AM Jingxiang Zeng
<jingxiangzeng.cas@gmail.com> wrote:
>
> From: Jingxiang Zeng <linuszeng@tencent.com>
>
> In proactive memory reclamation scenarios, it is necessary to estimate the
> pswpin and pswpout metrics of the cgroup to determine whether to continue
> reclaiming anonymous pages in the current batch.  This patch will collect
> these metrics and expose them.

+1 - this is also useful for zswap shrinker enablement, after which an
anon page can be loaded back in either from swap or zswap.
Differentiating these two situations helps a lot with performance
regression diagnostics.

We have host level metrics, but they become less useful when we
combine workloads with different characteristics in the same host.

>
> Link: https://lkml.kernel.org/r/20240830082244.156923-1-jingxiangzeng.cas@gmail.com
> Signed-off-by: Jingxiang Zeng <linuszeng@tencent.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Muchun Song <muchun.song@linux.dev>
> Cc: Roman Gushchin <roman.gushchin@linux.dev>
> Cc: Shakeel Butt <shakeel.butt@linux.dev>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> ---
>  mm/memcontrol.c | 2 ++
>  mm/page_io.c    | 4 ++++
>  2 files changed, 6 insertions(+)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 6efbfc9399d0..dbc1d43a5c4c 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -418,6 +418,8 @@ static const unsigned int memcg_vm_event_stat[] = {
>         PGPGIN,
>         PGPGOUT,
>  #endif
> +       PSWPIN,
> +       PSWPOUT,
>         PGSCAN_KSWAPD,
>         PGSCAN_DIRECT,
>         PGSCAN_KHUGEPAGED,
> diff --git a/mm/page_io.c b/mm/page_io.c
> index b6f1519d63b0..4bc77d1c6bfa 100644
> --- a/mm/page_io.c
> +++ b/mm/page_io.c
> @@ -310,6 +310,7 @@ static inline void count_swpout_vm_event(struct folio *folio)
>         }
>         count_mthp_stat(folio_order(folio), MTHP_STAT_SWPOUT);
>  #endif
> +       count_memcg_folio_events(folio, PSWPOUT, folio_nr_pages(folio));
>         count_vm_events(PSWPOUT, folio_nr_pages(folio));
>  }
>
> @@ -505,6 +506,7 @@ static void sio_read_complete(struct kiocb *iocb, long ret)
>                 for (p = 0; p < sio->pages; p++) {
>                         struct folio *folio = page_folio(sio->bvec[p].bv_page);
>
> +                       count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
>                         folio_mark_uptodate(folio);
>                         folio_unlock(folio);
>                 }
> @@ -588,6 +590,7 @@ static void swap_read_folio_bdev_sync(struct folio *folio,
>          * attempt to access it in the page fault retry time check.
>          */
>         get_task_struct(current);
> +       count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
>         count_vm_event(PSWPIN);
>         submit_bio_wait(&bio);
>         __end_swap_bio_read(&bio);
> @@ -603,6 +606,7 @@ static void swap_read_folio_bdev_async(struct folio *folio,
>         bio->bi_iter.bi_sector = swap_folio_sector(folio);
>         bio->bi_end_io = end_swap_bio_read;
>         bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
> +       count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
>         count_vm_event(PSWPIN);

Not related to this patch, but why does the global stats not take into
account large folios here... `count_vm_event(PSWPIN);`?

Acked-by: Nhat Pham <nphamcs@gmail.com>
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6efbfc9399d0..dbc1d43a5c4c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -418,6 +418,8 @@  static const unsigned int memcg_vm_event_stat[] = {
 	PGPGIN,
 	PGPGOUT,
 #endif
+	PSWPIN,
+	PSWPOUT,
 	PGSCAN_KSWAPD,
 	PGSCAN_DIRECT,
 	PGSCAN_KHUGEPAGED,
diff --git a/mm/page_io.c b/mm/page_io.c
index b6f1519d63b0..4bc77d1c6bfa 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -310,6 +310,7 @@  static inline void count_swpout_vm_event(struct folio *folio)
 	}
 	count_mthp_stat(folio_order(folio), MTHP_STAT_SWPOUT);
 #endif
+	count_memcg_folio_events(folio, PSWPOUT, folio_nr_pages(folio));
 	count_vm_events(PSWPOUT, folio_nr_pages(folio));
 }
 
@@ -505,6 +506,7 @@  static void sio_read_complete(struct kiocb *iocb, long ret)
 		for (p = 0; p < sio->pages; p++) {
 			struct folio *folio = page_folio(sio->bvec[p].bv_page);
 
+			count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
 			folio_mark_uptodate(folio);
 			folio_unlock(folio);
 		}
@@ -588,6 +590,7 @@  static void swap_read_folio_bdev_sync(struct folio *folio,
 	 * attempt to access it in the page fault retry time check.
 	 */
 	get_task_struct(current);
+	count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
 	count_vm_event(PSWPIN);
 	submit_bio_wait(&bio);
 	__end_swap_bio_read(&bio);
@@ -603,6 +606,7 @@  static void swap_read_folio_bdev_async(struct folio *folio,
 	bio->bi_iter.bi_sector = swap_folio_sector(folio);
 	bio->bi_end_io = end_swap_bio_read;
 	bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
+	count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
 	count_vm_event(PSWPIN);
 	submit_bio(bio);
 }