diff mbox series

[v2,linux-next] delayacct: track delays from memory compact

Message ID 1638619795-71451-1-git-send-email-wang.yong12@zte.com.cn (mailing list archive)
State New
Headers show
Series [v2,linux-next] delayacct: track delays from memory compact | expand

Commit Message

yong w Dec. 4, 2021, 12:09 p.m. UTC
From: wangyong <wang.yong12@zte.com.cn>

Delay accounting does not track the delay of memory compact.
When there is not enough free memory, tasks can spend
a amount of their time waiting for compact.

To get the impact of tasks in direct memory compact, measure
the delay when allocating memory through memory compact.

Also update tools/accounting/getdelays.c:
/ # ./getdelays_next  -di -p 304
print delayacct stats ON
printing IO accounting
PID     304

CPU             count     real total  virtual total    delay total  delay average
                  277      780000000      849039485       18877296          0.068ms
IO              count    delay total  delay average
                    0              0              0ms
SWAP            count    delay total  delay average
                    0              0              0ms
RECLAIM         count    delay total  delay average
                    5    11088812685           2217ms
THRASHING       count    delay total  delay average
                    0              0              0ms
COMPACT         count    delay total  delay average
                    3          72758              0ms
watch: read=0, write=0, cancelled_write=0

Reported-by: Zeal Robot <zealci@zte.com.cn>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: wangyong <wang.yong12@zte.com.cn>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Jiang Xuexin <jiang.xuexin@zte.com.cn>
Reviewed-by: Zhang Wenya <zhang.wenya1@zte.com.cn>
Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
---

Changes since v1:
-fixed duplicate member freepages_start.

 include/linux/delayacct.h      | 28 ++++++++++++++++++++++++++++
 include/uapi/linux/taskstats.h |  6 +++++-
 kernel/delayacct.c             | 15 +++++++++++++++
 mm/page_alloc.c                |  3 +++
 tools/accounting/getdelays.c   |  8 +++++++-
 5 files changed, 58 insertions(+), 2 deletions(-)

Comments

Education Directorate Dec. 5, 2021, 8:17 a.m. UTC | #1
On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote:
> From: wangyong <wang.yong12@zte.com.cn>
> 
> Delay accounting does not track the delay of memory compact.
> When there is not enough free memory, tasks can spend
> a amount of their time waiting for compact.
> 
> To get the impact of tasks in direct memory compact, measure
> the delay when allocating memory through memory compact.
>

Should we call this DIRECT_COMPACT and through documentation
or name change imply that this won't work for kcompactd the
kernel thread - based on my reading of the patches.

> Also update tools/accounting/getdelays.c:
> / # ./getdelays_next  -di -p 304
> print delayacct stats ON
> printing IO accounting
> PID     304
> 
> CPU             count     real total  virtual total    delay total  delay average
>                   277      780000000      849039485       18877296          0.068ms
> IO              count    delay total  delay average
>                     0              0              0ms
> SWAP            count    delay total  delay average
>                     0              0              0ms
> RECLAIM         count    delay total  delay average
>                     5    11088812685           2217ms
> THRASHING       count    delay total  delay average
>                     0              0              0ms
> COMPACT         count    delay total  delay average
>                     3          72758              0ms
> watch: read=0, write=0, cancelled_write=0
> 
> Reported-by: Zeal Robot <zealci@zte.com.cn>
> Reported-by: kernel test robot <lkp@intel.com>
> Signed-off-by: wangyong <wang.yong12@zte.com.cn>
> Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
> Reviewed-by: Jiang Xuexin <jiang.xuexin@zte.com.cn>
> Reviewed-by: Zhang Wenya <zhang.wenya1@zte.com.cn>
> Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
> ---
> 
> Changes since v1:
> -fixed duplicate member freepages_start.
> 
>  include/linux/delayacct.h      | 28 ++++++++++++++++++++++++++++
>  include/uapi/linux/taskstats.h |  6 +++++-
>  kernel/delayacct.c             | 15 +++++++++++++++
>  mm/page_alloc.c                |  3 +++
>  tools/accounting/getdelays.c   |  8 +++++++-
>  5 files changed, 58 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
> index 435c365..3e03d01 100644
> --- a/include/linux/delayacct.h
> +++ b/include/linux/delayacct.h
> @@ -42,8 +42,12 @@ struct task_delay_info {
>  	u64 thrashing_start;
>  	u64 thrashing_delay;	/* wait for thrashing page */
>  
> +	u64 compact_start;
> +	u64 compact_delay;	/* wait for memory compact */
> +
>  	u32 freepages_count;	/* total count of memory reclaim */
>  	u32 thrashing_count;	/* total count of thrash waits */
> +	u32 compact_count;	/* total count of memory compact */
>  };
>  #endif
>  
> @@ -72,6 +76,8 @@ extern void __delayacct_thrashing_start(void);
>  extern void __delayacct_thrashing_end(void);
>  extern void __delayacct_swapin_start(void);
>  extern void __delayacct_swapin_end(void);
> +extern void __delayacct_compact_start(void);
> +extern void __delayacct_compact_end(void);
>  
>  static inline void delayacct_tsk_init(struct task_struct *tsk)
>  {
> @@ -170,6 +176,24 @@ static inline void delayacct_swapin_end(void)
>  		__delayacct_swapin_end();
>  }
>  
> +static inline void delayacct_compact_start(void)
> +{
> +	if (!static_branch_unlikely(&delayacct_key))
> +		return;
> +
> +	if (current->delays)
> +		__delayacct_compact_start();
> +}
> +
> +static inline void delayacct_compact_end(void)
> +{
> +	if (!static_branch_unlikely(&delayacct_key))
> +		return;
> +
> +	if (current->delays)
> +		__delayacct_compact_end();
> +}
> +
>  #else
>  static inline void delayacct_init(void)
>  {}
> @@ -200,6 +224,10 @@ static inline void delayacct_swapin_start(void)
>  {}
>  static inline void delayacct_swapin_end(void)
>  {}
> +static inline void delayacct_compact_start(void)
> +{}
> +static inline void delayacct_compact_end(void)
> +{}
>  
>  #endif /* CONFIG_TASK_DELAY_ACCT */
>  
> diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
> index ccbd087..12327d3 100644
> --- a/include/uapi/linux/taskstats.h
> +++ b/include/uapi/linux/taskstats.h
> @@ -34,7 +34,7 @@
>   */
>  
>  
> -#define TASKSTATS_VERSION	10
> +#define TASKSTATS_VERSION	11
>  #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
>  					 * in linux/sched.h */
>  
> @@ -172,6 +172,10 @@ struct taskstats {
>  
>  	/* v10: 64-bit btime to avoid overflow */
>  	__u64	ac_btime64;		/* 64-bit begin time */
> +
> +	/* Delay waiting for memory compact */
> +	__u64	compact_count;
> +	__u64	compact_delay_total;
>  };
>  
>  
> diff --git a/kernel/delayacct.c b/kernel/delayacct.c
> index 11f3cd8..c5e8cea 100644
> --- a/kernel/delayacct.c
> +++ b/kernel/delayacct.c
> @@ -155,10 +155,13 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
>  	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
>  	tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
>  	d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
> +	tmp = d->compact_delay_total + tsk->delays->compact_delay;
> +	d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
>  	d->blkio_count += tsk->delays->blkio_count;
>  	d->swapin_count += tsk->delays->swapin_count;
>  	d->freepages_count += tsk->delays->freepages_count;
>  	d->thrashing_count += tsk->delays->thrashing_count;
> +	d->compact_count += tsk->delays->compact_count;
>  	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
>  
>  	return 0;
> @@ -214,3 +217,15 @@ void __delayacct_swapin_end(void)
>  		      &current->delays->swapin_count);
>  }
>  
> +void __delayacct_compact_start(void)
> +{
> +	current->delays->compact_start = local_clock();
> +}
> +
> +void __delayacct_compact_end(void)
> +{
> +	delayacct_end(&current->delays->lock,
> +		      &current->delays->compact_start,
> +		      &current->delays->compact_delay,
> +		      &current->delays->compact_count);
> +}
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index edfd6c8..6430226 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -74,6 +74,7 @@
>  #include <linux/padata.h>
>  #include <linux/khugepaged.h>
>  #include <linux/buffer_head.h>
> +#include <linux/delayacct.h>
>  #include <asm/sections.h>
>  #include <asm/tlbflush.h>
>  #include <asm/div64.h>
> @@ -4363,6 +4364,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
>  		return NULL;
>  
>  	psi_memstall_enter(&pflags);
> +	delayacct_compact_start();
>  	noreclaim_flag = memalloc_noreclaim_save();
>  
>  	*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
> @@ -4370,6 +4372,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
>  
>  	memalloc_noreclaim_restore(noreclaim_flag);
>  	psi_memstall_leave(&pflags);
> +	delayacct_compact_end();
>  
>  	if (*compact_result == COMPACT_SKIPPED)
>  		return NULL;
> diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
> index 5ef1c15..11e8673 100644
> --- a/tools/accounting/getdelays.c
> +++ b/tools/accounting/getdelays.c
> @@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t)
>  	       "RECLAIM  %12s%15s%15s\n"
>  	       "      %15llu%15llu%15llums\n"
>  	       "THRASHING%12s%15s%15s\n"
> +	       "      %15llu%15llu%15llums\n"
> +	       "COMPACT  %12s%15s%15s\n"
>  	       "      %15llu%15llu%15llums\n",
>  	       "count", "real total", "virtual total",
>  	       "delay total", "delay average",
> @@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t)
>  	       "count", "delay total", "delay average",
>  	       (unsigned long long)t->thrashing_count,
>  	       (unsigned long long)t->thrashing_delay_total,
> -	       average_ms(t->thrashing_delay_total, t->thrashing_count));
> +	       average_ms(t->thrashing_delay_total, t->thrashing_count),
> +	       "count", "delay total", "delay average",
> +	       (unsigned long long)t->compact_count,
> +	       (unsigned long long)t->compact_delay_total,
> +	       average_ms(t->compact_delay_total, t->compact_count));
>  }
>  
>  static void task_context_switch_counts(struct taskstats *t)
> -- 
> 2.7.4
>

At some point we should make the user space tool version aware.

Reviewed-by: Balbir Singh <bsingharora@gmail.com>
yong w Dec. 5, 2021, 11:08 a.m. UTC | #2
Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道:
>
> On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote:
> > From: wangyong <wang.yong12@zte.com.cn>
> >
> > Delay accounting does not track the delay of memory compact.
> > When there is not enough free memory, tasks can spend
> > a amount of their time waiting for compact.
> >
> > To get the impact of tasks in direct memory compact, measure
> > the delay when allocating memory through memory compact.
> >
>
> Should we call this DIRECT_COMPACT and through documentation
> or name change imply that this won't work for kcompactd the
> kernel thread - based on my reading of the patches.
>
Using DIRECT_COMPACT is a little redundant,because the
delayacct stats of delay accounting is specific to tasks, it has
nothing to do with kcompactd, which is similar to the RECLAIM field.

As for the document, consider submitting a separate patch with
the thrashing delay description added.

> > Also update tools/accounting/getdelays.c:
> > / # ./getdelays_next  -di -p 304
> > print delayacct stats ON
> > printing IO accounting
> > PID     304
> >
> > CPU             count     real total  virtual total    delay total  delay average
> >                   277      780000000      849039485       18877296          0.068ms
> > IO              count    delay total  delay average
> >                     0              0              0ms
> > SWAP            count    delay total  delay average
> >                     0              0              0ms
> > RECLAIM         count    delay total  delay average
> >                     5    11088812685           2217ms
> > THRASHING       count    delay total  delay average
> >                     0              0              0ms
> > COMPACT         count    delay total  delay average
> >                     3          72758              0ms
> > watch: read=0, write=0, cancelled_write=0
> >
> > Reported-by: Zeal Robot <zealci@zte.com.cn>
> > Reported-by: kernel test robot <lkp@intel.com>
> > Signed-off-by: wangyong <wang.yong12@zte.com.cn>
> > Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
> > Reviewed-by: Jiang Xuexin <jiang.xuexin@zte.com.cn>
> > Reviewed-by: Zhang Wenya <zhang.wenya1@zte.com.cn>
> > Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
> > ---
> >
> > Changes since v1:
> > -fixed duplicate member freepages_start.
> >
> >  include/linux/delayacct.h      | 28 ++++++++++++++++++++++++++++
> >  include/uapi/linux/taskstats.h |  6 +++++-
> >  kernel/delayacct.c             | 15 +++++++++++++++
> >  mm/page_alloc.c                |  3 +++
> >  tools/accounting/getdelays.c   |  8 +++++++-
> >  5 files changed, 58 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
> > index 435c365..3e03d01 100644
> > --- a/include/linux/delayacct.h
> > +++ b/include/linux/delayacct.h
> > @@ -42,8 +42,12 @@ struct task_delay_info {
> >       u64 thrashing_start;
> >       u64 thrashing_delay;    /* wait for thrashing page */
> >
> > +     u64 compact_start;
> > +     u64 compact_delay;      /* wait for memory compact */
> > +
> >       u32 freepages_count;    /* total count of memory reclaim */
> >       u32 thrashing_count;    /* total count of thrash waits */
> > +     u32 compact_count;      /* total count of memory compact */
> >  };
> >  #endif
> >
> > @@ -72,6 +76,8 @@ extern void __delayacct_thrashing_start(void);
> >  extern void __delayacct_thrashing_end(void);
> >  extern void __delayacct_swapin_start(void);
> >  extern void __delayacct_swapin_end(void);
> > +extern void __delayacct_compact_start(void);
> > +extern void __delayacct_compact_end(void);
> >
> >  static inline void delayacct_tsk_init(struct task_struct *tsk)
> >  {
> > @@ -170,6 +176,24 @@ static inline void delayacct_swapin_end(void)
> >               __delayacct_swapin_end();
> >  }
> >
> > +static inline void delayacct_compact_start(void)
> > +{
> > +     if (!static_branch_unlikely(&delayacct_key))
> > +             return;
> > +
> > +     if (current->delays)
> > +             __delayacct_compact_start();
> > +}
> > +
> > +static inline void delayacct_compact_end(void)
> > +{
> > +     if (!static_branch_unlikely(&delayacct_key))
> > +             return;
> > +
> > +     if (current->delays)
> > +             __delayacct_compact_end();
> > +}
> > +
> >  #else
> >  static inline void delayacct_init(void)
> >  {}
> > @@ -200,6 +224,10 @@ static inline void delayacct_swapin_start(void)
> >  {}
> >  static inline void delayacct_swapin_end(void)
> >  {}
> > +static inline void delayacct_compact_start(void)
> > +{}
> > +static inline void delayacct_compact_end(void)
> > +{}
> >
> >  #endif /* CONFIG_TASK_DELAY_ACCT */
> >
> > diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
> > index ccbd087..12327d3 100644
> > --- a/include/uapi/linux/taskstats.h
> > +++ b/include/uapi/linux/taskstats.h
> > @@ -34,7 +34,7 @@
> >   */
> >
> >
> > -#define TASKSTATS_VERSION    10
> > +#define TASKSTATS_VERSION    11
> >  #define TS_COMM_LEN          32      /* should be >= TASK_COMM_LEN
> >                                        * in linux/sched.h */
> >
> > @@ -172,6 +172,10 @@ struct taskstats {
> >
> >       /* v10: 64-bit btime to avoid overflow */
> >       __u64   ac_btime64;             /* 64-bit begin time */
> > +
> > +     /* Delay waiting for memory compact */
> > +     __u64   compact_count;
> > +     __u64   compact_delay_total;
> >  };
> >
> >
> > diff --git a/kernel/delayacct.c b/kernel/delayacct.c
> > index 11f3cd8..c5e8cea 100644
> > --- a/kernel/delayacct.c
> > +++ b/kernel/delayacct.c
> > @@ -155,10 +155,13 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
> >       d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
> >       tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
> >       d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
> > +     tmp = d->compact_delay_total + tsk->delays->compact_delay;
> > +     d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
> >       d->blkio_count += tsk->delays->blkio_count;
> >       d->swapin_count += tsk->delays->swapin_count;
> >       d->freepages_count += tsk->delays->freepages_count;
> >       d->thrashing_count += tsk->delays->thrashing_count;
> > +     d->compact_count += tsk->delays->compact_count;
> >       raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
> >
> >       return 0;
> > @@ -214,3 +217,15 @@ void __delayacct_swapin_end(void)
> >                     &current->delays->swapin_count);
> >  }
> >
> > +void __delayacct_compact_start(void)
> > +{
> > +     current->delays->compact_start = local_clock();
> > +}
> > +
> > +void __delayacct_compact_end(void)
> > +{
> > +     delayacct_end(&current->delays->lock,
> > +                   &current->delays->compact_start,
> > +                   &current->delays->compact_delay,
> > +                   &current->delays->compact_count);
> > +}
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index edfd6c8..6430226 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -74,6 +74,7 @@
> >  #include <linux/padata.h>
> >  #include <linux/khugepaged.h>
> >  #include <linux/buffer_head.h>
> > +#include <linux/delayacct.h>
> >  #include <asm/sections.h>
> >  #include <asm/tlbflush.h>
> >  #include <asm/div64.h>
> > @@ -4363,6 +4364,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
> >               return NULL;
> >
> >       psi_memstall_enter(&pflags);
> > +     delayacct_compact_start();
> >       noreclaim_flag = memalloc_noreclaim_save();
> >
> >       *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
> > @@ -4370,6 +4372,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
> >
> >       memalloc_noreclaim_restore(noreclaim_flag);
> >       psi_memstall_leave(&pflags);
> > +     delayacct_compact_end();
> >
> >       if (*compact_result == COMPACT_SKIPPED)
> >               return NULL;
> > diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
> > index 5ef1c15..11e8673 100644
> > --- a/tools/accounting/getdelays.c
> > +++ b/tools/accounting/getdelays.c
> > @@ -205,6 +205,8 @@ static void print_delayacct(struct taskstats *t)
> >              "RECLAIM  %12s%15s%15s\n"
> >              "      %15llu%15llu%15llums\n"
> >              "THRASHING%12s%15s%15s\n"
> > +            "      %15llu%15llu%15llums\n"
> > +            "COMPACT  %12s%15s%15s\n"
> >              "      %15llu%15llu%15llums\n",
> >              "count", "real total", "virtual total",
> >              "delay total", "delay average",
> > @@ -228,7 +230,11 @@ static void print_delayacct(struct taskstats *t)
> >              "count", "delay total", "delay average",
> >              (unsigned long long)t->thrashing_count,
> >              (unsigned long long)t->thrashing_delay_total,
> > -            average_ms(t->thrashing_delay_total, t->thrashing_count));
> > +            average_ms(t->thrashing_delay_total, t->thrashing_count),
> > +            "count", "delay total", "delay average",
> > +            (unsigned long long)t->compact_count,
> > +            (unsigned long long)t->compact_delay_total,
> > +            average_ms(t->compact_delay_total, t->compact_count));
> >  }
> >
> >  static void task_context_switch_counts(struct taskstats *t)
> > --
> > 2.7.4
> >
>
> At some point we should make the user space tool version aware.
>
I think this tool is generally used with the corresponding kernel, and
TASKSTATS_VERSION is used to indicate that the structure has
changed which has been modified in this patch.
> Reviewed-by: Balbir Singh <bsingharora@gmail.com>

Thanks for your reply!
Education Directorate Dec. 7, 2021, 5:16 a.m. UTC | #3
On Sun, Dec 05, 2021 at 07:08:02PM +0800, yong w wrote:
> Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道:
> >
> > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote:
> > > From: wangyong <wang.yong12@zte.com.cn>
> > >
> > > Delay accounting does not track the delay of memory compact.
> > > When there is not enough free memory, tasks can spend
> > > a amount of their time waiting for compact.
> > >
> > > To get the impact of tasks in direct memory compact, measure
> > > the delay when allocating memory through memory compact.
> > >
> >
> > Should we call this DIRECT_COMPACT and through documentation
> > or name change imply that this won't work for kcompactd the
> > kernel thread - based on my reading of the patches.
> >
> Using DIRECT_COMPACT is a little redundant,because the
> delayacct stats of delay accounting is specific to tasks, it has
> nothing to do with kcompactd, which is similar to the RECLAIM field.
>

What would we expect when we call delayacct -p <pidof kcompactd>
to be output? Yes, I agree with your comment on the reclaim
field. Don't feel to strongly, but it can be confusing that kcompactd
has spent no time in compact'ing? Not that delayacct is used for
kernel threads, but I am not sure if that use case exists today.

<snip>

Balbir Singh.
yong w Dec. 7, 2021, 4:50 p.m. UTC | #4
Balbir Singh <bsingharora@gmail.com> 于2021年12月7日周二 13:16写道:
>
> On Sun, Dec 05, 2021 at 07:08:02PM +0800, yong w wrote:
> > Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道:
> > >
> > > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote:
> > > > From: wangyong <wang.yong12@zte.com.cn>
> > > >
> > > > Delay accounting does not track the delay of memory compact.
> > > > When there is not enough free memory, tasks can spend
> > > > a amount of their time waiting for compact.
> > > >
> > > > To get the impact of tasks in direct memory compact, measure
> > > > the delay when allocating memory through memory compact.
> > > >
> > >
> > > Should we call this DIRECT_COMPACT and through documentation
> > > or name change imply that this won't work for kcompactd the
> > > kernel thread - based on my reading of the patches.
> > >
> > Using DIRECT_COMPACT is a little redundant,because the
> > delayacct stats of delay accounting is specific to tasks, it has
> > nothing to do with kcompactd, which is similar to the RECLAIM field.
> >
>
> What would we expect when we call delayacct -p <pidof kcompactd>
> to be output?
If the slow path of memory allocation is invoked in the kcompacd process,
there may be delays being recorded.

> Don't feel to strongly, but it can be confusing that kcompactd
> has spent no time in compact'ing? Not that delayacct is used for
> kernel threads, but I am not sure if that use case exists today.
Yes, delayacct does not restrict the process of obtaining information,
but kcompactd is used for  compaction,  the compact delay of
kcompatd is not actually a delay.Maybe it can be added to the
document later to make it clearer.

Thanks for your reply!
yong w Dec. 13, 2021, 1:56 p.m. UTC | #5
Hello,  is this patch OK?

Thanks.

yong w <yongw.pur@gmail.com> 于2021年12月8日周三 00:50写道:
>
> Balbir Singh <bsingharora@gmail.com> 于2021年12月7日周二 13:16写道:
> >
> > On Sun, Dec 05, 2021 at 07:08:02PM +0800, yong w wrote:
> > > Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道:
> > > >
> > > > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote:
> > > > > From: wangyong <wang.yong12@zte.com.cn>
> > > > >
> > > > > Delay accounting does not track the delay of memory compact.
> > > > > When there is not enough free memory, tasks can spend
> > > > > a amount of their time waiting for compact.
> > > > >
> > > > > To get the impact of tasks in direct memory compact, measure
> > > > > the delay when allocating memory through memory compact.
> > > > >
> > > >
> > > > Should we call this DIRECT_COMPACT and through documentation
> > > > or name change imply that this won't work for kcompactd the
> > > > kernel thread - based on my reading of the patches.
> > > >
> > > Using DIRECT_COMPACT is a little redundant,because the
> > > delayacct stats of delay accounting is specific to tasks, it has
> > > nothing to do with kcompactd, which is similar to the RECLAIM field.
> > >
> >
> > What would we expect when we call delayacct -p <pidof kcompactd>
> > to be output?
> If the slow path of memory allocation is invoked in the kcompacd process,
> there may be delays being recorded.
>
> > Don't feel to strongly, but it can be confusing that kcompactd
> > has spent no time in compact'ing? Not that delayacct is used for
> > kernel threads, but I am not sure if that use case exists today.
> Yes, delayacct does not restrict the process of obtaining information,
> but kcompactd is used for  compaction,  the compact delay of
> kcompatd is not actually a delay.Maybe it can be added to the
> document later to make it clearer.
>
> Thanks for your reply!
Education Directorate Dec. 14, 2021, 5:49 a.m. UTC | #6
On Mon, Dec 13, 2021 at 09:56:08PM +0800, yong w wrote:
> Hello,  is this patch OK?
> 
> Thanks.
> 
> yong w <yongw.pur@gmail.com> 于2021年12月8日周三 00:50写道:
> >
> > Balbir Singh <bsingharora@gmail.com> 于2021年12月7日周二 13:16写道:
> > >
> > > On Sun, Dec 05, 2021 at 07:08:02PM +0800, yong w wrote:
> > > > Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道:
> > > > >
> > > > > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote:
> > > > > > From: wangyong <wang.yong12@zte.com.cn>
> > > > > >
> > > > > > Delay accounting does not track the delay of memory compact.
> > > > > > When there is not enough free memory, tasks can spend
> > > > > > a amount of their time waiting for compact.
> > > > > >
> > > > > > To get the impact of tasks in direct memory compact, measure
> > > > > > the delay when allocating memory through memory compact.
> > > > > >
> > > > >
> > > > > Should we call this DIRECT_COMPACT and through documentation
> > > > > or name change imply that this won't work for kcompactd the
> > > > > kernel thread - based on my reading of the patches.
> > > > >
> > > > Using DIRECT_COMPACT is a little redundant,because the
> > > > delayacct stats of delay accounting is specific to tasks, it has
> > > > nothing to do with kcompactd, which is similar to the RECLAIM field.
> > > >
> > >
> > > What would we expect when we call delayacct -p <pidof kcompactd>
> > > to be output?
> > If the slow path of memory allocation is invoked in the kcompacd process,
> > there may be delays being recorded.
> >
> > > Don't feel to strongly, but it can be confusing that kcompactd
> > > has spent no time in compact'ing? Not that delayacct is used for
> > > kernel threads, but I am not sure if that use case exists today.
> > Yes, delayacct does not restrict the process of obtaining information,
> > but kcompactd is used for  compaction,  the compact delay of
> > kcompatd is not actually a delay.Maybe it can be added to the
> > document later to make it clearer.
> >
> > Thanks for your reply!

Please avoid top posting, when you say added later, I presume more
patches for documentation are coming. I am OK with the patch in that
case.

Reviewed-by: Balbir Singh <bsingharora@gmail.com>

Balbir Singh
yong w Dec. 15, 2021, 12:40 p.m. UTC | #7
Balbir Singh <bsingharora@gmail.com> 于2021年12月14日周二 13:49写道:
>
> On Mon, Dec 13, 2021 at 09:56:08PM +0800, yong w wrote:
> > Hello,  is this patch OK?
> >
> > Thanks.
> >
> > yong w <yongw.pur@gmail.com> 于2021年12月8日周三 00:50写道:
> > >
> > > Balbir Singh <bsingharora@gmail.com> 于2021年12月7日周二 13:16写道:
> > > >
> > > > On Sun, Dec 05, 2021 at 07:08:02PM +0800, yong w wrote:
> > > > > Balbir Singh <bsingharora@gmail.com> 于2021年12月5日周日 16:17写道:
> > > > > >
> > > > > > On Sat, Dec 04, 2021 at 04:09:55AM -0800, yongw.pur@gmail.com wrote:
> > > > > > > From: wangyong <wang.yong12@zte.com.cn>
> > > > > > >
> > > > > > > Delay accounting does not track the delay of memory compact.
> > > > > > > When there is not enough free memory, tasks can spend
> > > > > > > a amount of their time waiting for compact.
> > > > > > >
> > > > > > > To get the impact of tasks in direct memory compact, measure
> > > > > > > the delay when allocating memory through memory compact.
> > > > > > >
> > > > > >
> > > > > > Should we call this DIRECT_COMPACT and through documentation
> > > > > > or name change imply that this won't work for kcompactd the
> > > > > > kernel thread - based on my reading of the patches.
> > > > > >
> > > > > Using DIRECT_COMPACT is a little redundant,because the
> > > > > delayacct stats of delay accounting is specific to tasks, it has
> > > > > nothing to do with kcompactd, which is similar to the RECLAIM field.
> > > > >
> > > >
> > > > What would we expect when we call delayacct -p <pidof kcompactd>
> > > > to be output?
> > > If the slow path of memory allocation is invoked in the kcompacd process,
> > > there may be delays being recorded.
> > >
> > > > Don't feel to strongly, but it can be confusing that kcompactd
> > > > has spent no time in compact'ing? Not that delayacct is used for
> > > > kernel threads, but I am not sure if that use case exists today.
> > > Yes, delayacct does not restrict the process of obtaining information,
> > > but kcompactd is used for  compaction,  the compact delay of
> > > kcompatd is not actually a delay.Maybe it can be added to the
> > > document later to make it clearer.
> > >
> > > Thanks for your reply!
>
> Please avoid top posting, when you say added later, I presume more
> patches for documentation are coming. I am OK with the patch in that
> case.
>
> Reviewed-by: Balbir Singh <bsingharora@gmail.com>
>
> Balbir Singh

OK,Thank you!
diff mbox series

Patch

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 435c365..3e03d01 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -42,8 +42,12 @@  struct task_delay_info {
 	u64 thrashing_start;
 	u64 thrashing_delay;	/* wait for thrashing page */
 
+	u64 compact_start;
+	u64 compact_delay;	/* wait for memory compact */
+
 	u32 freepages_count;	/* total count of memory reclaim */
 	u32 thrashing_count;	/* total count of thrash waits */
+	u32 compact_count;	/* total count of memory compact */
 };
 #endif
 
@@ -72,6 +76,8 @@  extern void __delayacct_thrashing_start(void);
 extern void __delayacct_thrashing_end(void);
 extern void __delayacct_swapin_start(void);
 extern void __delayacct_swapin_end(void);
+extern void __delayacct_compact_start(void);
+extern void __delayacct_compact_end(void);
 
 static inline void delayacct_tsk_init(struct task_struct *tsk)
 {
@@ -170,6 +176,24 @@  static inline void delayacct_swapin_end(void)
 		__delayacct_swapin_end();
 }
 
+static inline void delayacct_compact_start(void)
+{
+	if (!static_branch_unlikely(&delayacct_key))
+		return;
+
+	if (current->delays)
+		__delayacct_compact_start();
+}
+
+static inline void delayacct_compact_end(void)
+{
+	if (!static_branch_unlikely(&delayacct_key))
+		return;
+
+	if (current->delays)
+		__delayacct_compact_end();
+}
+
 #else
 static inline void delayacct_init(void)
 {}
@@ -200,6 +224,10 @@  static inline void delayacct_swapin_start(void)
 {}
 static inline void delayacct_swapin_end(void)
 {}
+static inline void delayacct_compact_start(void)
+{}
+static inline void delayacct_compact_end(void)
+{}
 
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index ccbd087..12327d3 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -34,7 +34,7 @@ 
  */
 
 
-#define TASKSTATS_VERSION	10
+#define TASKSTATS_VERSION	11
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -172,6 +172,10 @@  struct taskstats {
 
 	/* v10: 64-bit btime to avoid overflow */
 	__u64	ac_btime64;		/* 64-bit begin time */
+
+	/* Delay waiting for memory compact */
+	__u64	compact_count;
+	__u64	compact_delay_total;
 };
 
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 11f3cd8..c5e8cea 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -155,10 +155,13 @@  int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
 	tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
 	d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
+	tmp = d->compact_delay_total + tsk->delays->compact_delay;
+	d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
 	d->blkio_count += tsk->delays->blkio_count;
 	d->swapin_count += tsk->delays->swapin_count;
 	d->freepages_count += tsk->delays->freepages_count;
 	d->thrashing_count += tsk->delays->thrashing_count;
+	d->compact_count += tsk->delays->compact_count;
 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
 	return 0;
@@ -214,3 +217,15 @@  void __delayacct_swapin_end(void)
 		      &current->delays->swapin_count);
 }
 
+void __delayacct_compact_start(void)
+{
+	current->delays->compact_start = local_clock();
+}
+
+void __delayacct_compact_end(void)
+{
+	delayacct_end(&current->delays->lock,
+		      &current->delays->compact_start,
+		      &current->delays->compact_delay,
+		      &current->delays->compact_count);
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index edfd6c8..6430226 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -74,6 +74,7 @@ 
 #include <linux/padata.h>
 #include <linux/khugepaged.h>
 #include <linux/buffer_head.h>
+#include <linux/delayacct.h>
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -4363,6 +4364,7 @@  __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 		return NULL;
 
 	psi_memstall_enter(&pflags);
+	delayacct_compact_start();
 	noreclaim_flag = memalloc_noreclaim_save();
 
 	*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
@@ -4370,6 +4372,7 @@  __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 
 	memalloc_noreclaim_restore(noreclaim_flag);
 	psi_memstall_leave(&pflags);
+	delayacct_compact_end();
 
 	if (*compact_result == COMPACT_SKIPPED)
 		return NULL;
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 5ef1c15..11e8673 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -205,6 +205,8 @@  static void print_delayacct(struct taskstats *t)
 	       "RECLAIM  %12s%15s%15s\n"
 	       "      %15llu%15llu%15llums\n"
 	       "THRASHING%12s%15s%15s\n"
+	       "      %15llu%15llu%15llums\n"
+	       "COMPACT  %12s%15s%15s\n"
 	       "      %15llu%15llu%15llums\n",
 	       "count", "real total", "virtual total",
 	       "delay total", "delay average",
@@ -228,7 +230,11 @@  static void print_delayacct(struct taskstats *t)
 	       "count", "delay total", "delay average",
 	       (unsigned long long)t->thrashing_count,
 	       (unsigned long long)t->thrashing_delay_total,
-	       average_ms(t->thrashing_delay_total, t->thrashing_count));
+	       average_ms(t->thrashing_delay_total, t->thrashing_count),
+	       "count", "delay total", "delay average",
+	       (unsigned long long)t->compact_count,
+	       (unsigned long long)t->compact_delay_total,
+	       average_ms(t->compact_delay_total, t->compact_count));
 }
 
 static void task_context_switch_counts(struct taskstats *t)