diff mbox series

[v8,4/5] tick/nohz_full: Ensure quiet_vmstat() is called on exit to user-mode when the idle tick is stopped

Message ID 20220924152227.819815-5-atomlin@redhat.com (mailing list archive)
State New
Headers show
Series Ensure quiet_vmstat() is called when the idle tick was stopped too | expand

Commit Message

Aaron Tomlin Sept. 24, 2022, 3:22 p.m. UTC
This patch ensures CPU-specific vmstat differentials do not remain
when the scheduling-tick is stopped and before exiting to user-mode
in the context of nohz_full only.

A trivial test program was used to determine the impact of the proposed
changes and under vanilla. The mlock(2) and munlock(2) system calls was
used solely to modify vmstat item 'NR_MLOCK'. The following is an average
count of CPU-cycles across the aforementioned system calls:

				  Vanilla                 Modified

  Cycles per syscall              8461                    8690    (+2.6%)

Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
---
 include/linux/tick.h     |  5 +++--
 kernel/time/tick-sched.c | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

Comments

Rafael Folco Sept. 27, 2022, 4:17 p.m. UTC | #1
Tested this patch w/ nohz_full setup and oslat 8h run on isolated cpus, max
latency is 7us versus 15us without the patch.
Thanks.

On Mon, Sep 26, 2022 at 8:02 PM Aaron Tomlin <atomlin@redhat.com> wrote:

> This patch ensures CPU-specific vmstat differentials do not remain
> when the scheduling-tick is stopped and before exiting to user-mode
> in the context of nohz_full only.
>
> A trivial test program was used to determine the impact of the proposed
> changes and under vanilla. The mlock(2) and munlock(2) system calls was
> used solely to modify vmstat item 'NR_MLOCK'. The following is an average
> count of CPU-cycles across the aforementioned system calls:
>
>                                   Vanilla                 Modified
>
>   Cycles per syscall              8461                    8690    (+2.6%)
>
> Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
> ---
>  include/linux/tick.h     |  5 +++--
>  kernel/time/tick-sched.c | 15 +++++++++++++++
>  2 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/tick.h b/include/linux/tick.h
> index bfd571f18cfd..a2bbd6d32e33 100644
> --- a/include/linux/tick.h
> +++ b/include/linux/tick.h
> @@ -11,7 +11,6 @@
>  #include <linux/context_tracking_state.h>
>  #include <linux/cpumask.h>
>  #include <linux/sched.h>
> -#include <linux/rcupdate.h>
>
>  #ifdef CONFIG_GENERIC_CLOCKEVENTS
>  extern void __init tick_init(void);
> @@ -272,6 +271,7 @@ static inline void tick_dep_clear_signal(struct
> signal_struct *signal,
>
>  extern void tick_nohz_full_kick_cpu(int cpu);
>  extern void __tick_nohz_task_switch(void);
> +void __tick_nohz_user_enter_prepare(void);
>  extern void __init tick_nohz_full_setup(cpumask_var_t cpumask);
>  #else
>  static inline bool tick_nohz_full_enabled(void) { return false; }
> @@ -296,6 +296,7 @@ static inline void tick_dep_clear_signal(struct
> signal_struct *signal,
>
>  static inline void tick_nohz_full_kick_cpu(int cpu) { }
>  static inline void __tick_nohz_task_switch(void) { }
> +static inline void __tick_nohz_user_enter_prepare(void) { }
>  static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { }
>  #endif
>
> @@ -308,7 +309,7 @@ static inline void tick_nohz_task_switch(void)
>  static inline void tick_nohz_user_enter_prepare(void)
>  {
>         if (tick_nohz_full_cpu(smp_processor_id()))
> -               rcu_nocb_flush_deferred_wakeup();
> +               __tick_nohz_user_enter_prepare();
>  }
>
>  #endif
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index b0e3c9205946..634cd0fac267 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -26,6 +26,7 @@
>  #include <linux/posix-timers.h>
>  #include <linux/context_tracking.h>
>  #include <linux/mm.h>
> +#include <linux/rcupdate.h>
>
>  #include <asm/irq_regs.h>
>
> @@ -519,6 +520,20 @@ void __tick_nohz_task_switch(void)
>         }
>  }
>
> +void __tick_nohz_user_enter_prepare(void)
> +{
> +       struct tick_sched *ts;
> +
> +       if (tick_nohz_full_cpu(smp_processor_id())) {
> +               ts = this_cpu_ptr(&tick_cpu_sched);
> +
> +               if (ts->tick_stopped)
> +                       quiet_vmstat();
> +               rcu_nocb_flush_deferred_wakeup();
> +       }
> +}
> +EXPORT_SYMBOL_GPL(__tick_nohz_user_enter_prepare);
> +
>  /* Get the boot-time nohz CPU list from the kernel parameters. */
>  void __init tick_nohz_full_setup(cpumask_var_t cpumask)
>  {
> --
> 2.37.1
>
>
>
Aaron Tomlin Sept. 29, 2022, 8:22 a.m. UTC | #2
On Tue 2022-09-27 13:17 -0300, Rafael Folco wrote:
> Tested this patch w/ nohz_full setup and oslat 8h run on isolated cpus, max
> latency is 7us versus 15us without the patch.
> Thanks.

Hi Rafael,

Good news. Thanks for the feedback.
May I add Tested-by: ?


Kind regards,
Rafael Folco Sept. 29, 2022, 12:49 p.m. UTC | #3
Yes, please do. Thanks!

On Thu, Sep 29, 2022 at 5:22 AM Aaron Tomlin <atomlin@redhat.com> wrote:
>
> On Tue 2022-09-27 13:17 -0300, Rafael Folco wrote:
> > Tested this patch w/ nohz_full setup and oslat 8h run on isolated cpus, max
> > latency is 7us versus 15us without the patch.
> > Thanks.
>
> Hi Rafael,
>
> Good news. Thanks for the feedback.
> May I add Tested-by: ?
>
>
> Kind regards,
>
> --
> Aaron Tomlin
>
Frederic Weisbecker Oct. 21, 2022, 2:50 p.m. UTC | #4
On Tue, Sep 27, 2022 at 01:17:02PM -0300, Rafael Folco wrote:
> Tested this patch w/ nohz_full setup and oslat 8h run on isolated cpus, max
> latency is 7us versus 15us without the patch.
> Thanks.

What about the added overhead upon user enter?
That is my main worry. For people who want nohz_full for lowest latency
(or rather lowest noise) then it's a good add. But what if some people want
to use nohz_full for HPC and prefer faster syscalls over avoiding an interrupt
once in a while? (although arguably I never heard from users of such
workloads...)

Thanks.


> 
> On Mon, Sep 26, 2022 at 8:02 PM Aaron Tomlin <atomlin@redhat.com> wrote:
> 
> > This patch ensures CPU-specific vmstat differentials do not remain
> > when the scheduling-tick is stopped and before exiting to user-mode
> > in the context of nohz_full only.
> >
> > A trivial test program was used to determine the impact of the proposed
> > changes and under vanilla. The mlock(2) and munlock(2) system calls was
> > used solely to modify vmstat item 'NR_MLOCK'. The following is an average
> > count of CPU-cycles across the aforementioned system calls:
> >
> >                                   Vanilla                 Modified
> >
> >   Cycles per syscall              8461                    8690    (+2.6%)
> >
> > Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
> > ---
> >  include/linux/tick.h     |  5 +++--
> >  kernel/time/tick-sched.c | 15 +++++++++++++++
> >  2 files changed, 18 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/tick.h b/include/linux/tick.h
> > index bfd571f18cfd..a2bbd6d32e33 100644
> > --- a/include/linux/tick.h
> > +++ b/include/linux/tick.h
> > @@ -11,7 +11,6 @@
> >  #include <linux/context_tracking_state.h>
> >  #include <linux/cpumask.h>
> >  #include <linux/sched.h>
> > -#include <linux/rcupdate.h>
> >
> >  #ifdef CONFIG_GENERIC_CLOCKEVENTS
> >  extern void __init tick_init(void);
> > @@ -272,6 +271,7 @@ static inline void tick_dep_clear_signal(struct
> > signal_struct *signal,
> >
> >  extern void tick_nohz_full_kick_cpu(int cpu);
> >  extern void __tick_nohz_task_switch(void);
> > +void __tick_nohz_user_enter_prepare(void);
> >  extern void __init tick_nohz_full_setup(cpumask_var_t cpumask);
> >  #else
> >  static inline bool tick_nohz_full_enabled(void) { return false; }
> > @@ -296,6 +296,7 @@ static inline void tick_dep_clear_signal(struct
> > signal_struct *signal,
> >
> >  static inline void tick_nohz_full_kick_cpu(int cpu) { }
> >  static inline void __tick_nohz_task_switch(void) { }
> > +static inline void __tick_nohz_user_enter_prepare(void) { }
> >  static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { }
> >  #endif
> >
> > @@ -308,7 +309,7 @@ static inline void tick_nohz_task_switch(void)
> >  static inline void tick_nohz_user_enter_prepare(void)
> >  {
> >         if (tick_nohz_full_cpu(smp_processor_id()))
> > -               rcu_nocb_flush_deferred_wakeup();
> > +               __tick_nohz_user_enter_prepare();
> >  }
> >
> >  #endif
> > diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> > index b0e3c9205946..634cd0fac267 100644
> > --- a/kernel/time/tick-sched.c
> > +++ b/kernel/time/tick-sched.c
> > @@ -26,6 +26,7 @@
> >  #include <linux/posix-timers.h>
> >  #include <linux/context_tracking.h>
> >  #include <linux/mm.h>
> > +#include <linux/rcupdate.h>
> >
> >  #include <asm/irq_regs.h>
> >
> > @@ -519,6 +520,20 @@ void __tick_nohz_task_switch(void)
> >         }
> >  }
> >
> > +void __tick_nohz_user_enter_prepare(void)
> > +{
> > +       struct tick_sched *ts;
> > +
> > +       if (tick_nohz_full_cpu(smp_processor_id())) {
> > +               ts = this_cpu_ptr(&tick_cpu_sched);
> > +
> > +               if (ts->tick_stopped)
> > +                       quiet_vmstat();
> > +               rcu_nocb_flush_deferred_wakeup();
> > +       }
> > +}
> > +EXPORT_SYMBOL_GPL(__tick_nohz_user_enter_prepare);
> > +
> >  /* Get the boot-time nohz CPU list from the kernel parameters. */
> >  void __init tick_nohz_full_setup(cpumask_var_t cpumask)
> >  {
> > --
> > 2.37.1
> >
> >
> >
> 
> -- 
> Folco
Marcelo Tosatti Nov. 10, 2022, 7:14 p.m. UTC | #5
On Fri, Oct 21, 2022 at 04:50:17PM +0200, Frederic Weisbecker wrote:
> On Tue, Sep 27, 2022 at 01:17:02PM -0300, Rafael Folco wrote:
> > Tested this patch w/ nohz_full setup and oslat 8h run on isolated cpus, max
> > latency is 7us versus 15us without the patch.
> > Thanks.
> 
> What about the added overhead upon user enter?
> That is my main worry. For people who want nohz_full for lowest latency
> (or rather lowest noise) then it's a good add. But what if some people want
> to use nohz_full for HPC and prefer faster syscalls over avoiding an interrupt
> once in a while? (although arguably I never heard from users of such
> workloads...)
> 
> Thanks.

The performance reduction on the mlock test below is only 2.6% (see below).

HPC programs would likely be heavy on accesses to userspace memory,
and not so much on syscall performance at this scale?

Anyway, if this overhead turns out to be a problem for some HPC application
(which you consider unlikely) a knob can be added to control the behaviour.

> > On Mon, Sep 26, 2022 at 8:02 PM Aaron Tomlin <atomlin@redhat.com> wrote:
> > 
> > > This patch ensures CPU-specific vmstat differentials do not remain
> > > when the scheduling-tick is stopped and before exiting to user-mode
> > > in the context of nohz_full only.
> > >
> > > A trivial test program was used to determine the impact of the proposed
> > > changes and under vanilla. The mlock(2) and munlock(2) system calls was
> > > used solely to modify vmstat item 'NR_MLOCK'. The following is an average
> > > count of CPU-cycles across the aforementioned system calls:
> > >
> > >                                   Vanilla                 Modified
> > >
> > >   Cycles per syscall              8461                    8690    (+2.6%)
> > >
> > > Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
> > > ---
> > >  include/linux/tick.h     |  5 +++--
> > >  kernel/time/tick-sched.c | 15 +++++++++++++++
> > >  2 files changed, 18 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/include/linux/tick.h b/include/linux/tick.h
> > > index bfd571f18cfd..a2bbd6d32e33 100644
> > > --- a/include/linux/tick.h
> > > +++ b/include/linux/tick.h
> > > @@ -11,7 +11,6 @@
> > >  #include <linux/context_tracking_state.h>
> > >  #include <linux/cpumask.h>
> > >  #include <linux/sched.h>
> > > -#include <linux/rcupdate.h>
> > >
> > >  #ifdef CONFIG_GENERIC_CLOCKEVENTS
> > >  extern void __init tick_init(void);
> > > @@ -272,6 +271,7 @@ static inline void tick_dep_clear_signal(struct
> > > signal_struct *signal,
> > >
> > >  extern void tick_nohz_full_kick_cpu(int cpu);
> > >  extern void __tick_nohz_task_switch(void);
> > > +void __tick_nohz_user_enter_prepare(void);
> > >  extern void __init tick_nohz_full_setup(cpumask_var_t cpumask);
> > >  #else
> > >  static inline bool tick_nohz_full_enabled(void) { return false; }
> > > @@ -296,6 +296,7 @@ static inline void tick_dep_clear_signal(struct
> > > signal_struct *signal,
> > >
> > >  static inline void tick_nohz_full_kick_cpu(int cpu) { }
> > >  static inline void __tick_nohz_task_switch(void) { }
> > > +static inline void __tick_nohz_user_enter_prepare(void) { }
> > >  static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { }
> > >  #endif
> > >
> > > @@ -308,7 +309,7 @@ static inline void tick_nohz_task_switch(void)
> > >  static inline void tick_nohz_user_enter_prepare(void)
> > >  {
> > >         if (tick_nohz_full_cpu(smp_processor_id()))
> > > -               rcu_nocb_flush_deferred_wakeup();
> > > +               __tick_nohz_user_enter_prepare();
> > >  }
> > >
> > >  #endif
> > > diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> > > index b0e3c9205946..634cd0fac267 100644
> > > --- a/kernel/time/tick-sched.c
> > > +++ b/kernel/time/tick-sched.c
> > > @@ -26,6 +26,7 @@
> > >  #include <linux/posix-timers.h>
> > >  #include <linux/context_tracking.h>
> > >  #include <linux/mm.h>
> > > +#include <linux/rcupdate.h>
> > >
> > >  #include <asm/irq_regs.h>
> > >
> > > @@ -519,6 +520,20 @@ void __tick_nohz_task_switch(void)
> > >         }
> > >  }
> > >
> > > +void __tick_nohz_user_enter_prepare(void)
> > > +{
> > > +       struct tick_sched *ts;
> > > +
> > > +       if (tick_nohz_full_cpu(smp_processor_id())) {
> > > +               ts = this_cpu_ptr(&tick_cpu_sched);
> > > +
> > > +               if (ts->tick_stopped)
> > > +                       quiet_vmstat();
> > > +               rcu_nocb_flush_deferred_wakeup();
> > > +       }
> > > +}
> > > +EXPORT_SYMBOL_GPL(__tick_nohz_user_enter_prepare);
> > > +
> > >  /* Get the boot-time nohz CPU list from the kernel parameters. */
> > >  void __init tick_nohz_full_setup(cpumask_var_t cpumask)
> > >  {
> > > --
> > > 2.37.1
> > >
> > >
> > >
> > 
> > -- 
> > Folco
> 
>
diff mbox series

Patch

diff --git a/include/linux/tick.h b/include/linux/tick.h
index bfd571f18cfd..a2bbd6d32e33 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -11,7 +11,6 @@ 
 #include <linux/context_tracking_state.h>
 #include <linux/cpumask.h>
 #include <linux/sched.h>
-#include <linux/rcupdate.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern void __init tick_init(void);
@@ -272,6 +271,7 @@  static inline void tick_dep_clear_signal(struct signal_struct *signal,
 
 extern void tick_nohz_full_kick_cpu(int cpu);
 extern void __tick_nohz_task_switch(void);
+void __tick_nohz_user_enter_prepare(void);
 extern void __init tick_nohz_full_setup(cpumask_var_t cpumask);
 #else
 static inline bool tick_nohz_full_enabled(void) { return false; }
@@ -296,6 +296,7 @@  static inline void tick_dep_clear_signal(struct signal_struct *signal,
 
 static inline void tick_nohz_full_kick_cpu(int cpu) { }
 static inline void __tick_nohz_task_switch(void) { }
+static inline void __tick_nohz_user_enter_prepare(void) { }
 static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { }
 #endif
 
@@ -308,7 +309,7 @@  static inline void tick_nohz_task_switch(void)
 static inline void tick_nohz_user_enter_prepare(void)
 {
 	if (tick_nohz_full_cpu(smp_processor_id()))
-		rcu_nocb_flush_deferred_wakeup();
+		__tick_nohz_user_enter_prepare();
 }
 
 #endif
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b0e3c9205946..634cd0fac267 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -26,6 +26,7 @@ 
 #include <linux/posix-timers.h>
 #include <linux/context_tracking.h>
 #include <linux/mm.h>
+#include <linux/rcupdate.h>
 
 #include <asm/irq_regs.h>
 
@@ -519,6 +520,20 @@  void __tick_nohz_task_switch(void)
 	}
 }
 
+void __tick_nohz_user_enter_prepare(void)
+{
+	struct tick_sched *ts;
+
+	if (tick_nohz_full_cpu(smp_processor_id())) {
+		ts = this_cpu_ptr(&tick_cpu_sched);
+
+		if (ts->tick_stopped)
+			quiet_vmstat();
+		rcu_nocb_flush_deferred_wakeup();
+	}
+}
+EXPORT_SYMBOL_GPL(__tick_nohz_user_enter_prepare);
+
 /* Get the boot-time nohz CPU list from the kernel parameters. */
 void __init tick_nohz_full_setup(cpumask_var_t cpumask)
 {