diff mbox series

[v9,4/5] tick/nohz_full: Ensure quiet_vmstat() is called on exit to user-mode when the idle tick is stopped

Message ID 20221206162416.509808578@redhat.com (mailing list archive)
State New
Headers show
Series Ensure quiet_vmstat() is called when the idle tick was stopped too | expand

Commit Message

Marcelo Tosatti Dec. 6, 2022, 4:18 p.m. UTC
From: Aaron Tomlin <atomlin@redhat.com>

This patch ensures CPU-specific vmstat differentials do not remain
when the scheduling-tick is stopped and before exiting to user-mode
in the context of nohz_full only.

A trivial test program was used to determine the impact of the proposed
changes and under vanilla. The mlock(2) and munlock(2) system calls was
used solely to modify vmstat item 'NR_MLOCK'. The following is an average
count of CPU-cycles across the aforementioned system calls:

				  Vanilla                 Modified

  Cycles per syscall              8461                    8690    (+2.6%)

Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
---
 include/linux/tick.h     |    5 +++--
 kernel/time/tick-sched.c |   15 +++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

Comments

Frederic Weisbecker Dec. 14, 2022, 12:49 p.m. UTC | #1
On Tue, Dec 06, 2022 at 01:18:30PM -0300, Marcelo Tosatti wrote:
> From: Aaron Tomlin <atomlin@redhat.com>
> 
> This patch ensures CPU-specific vmstat differentials do not remain
> when the scheduling-tick is stopped and before exiting to user-mode
> in the context of nohz_full only.
> 
> A trivial test program was used to determine the impact of the proposed
> changes and under vanilla. The mlock(2) and munlock(2) system calls was
> used solely to modify vmstat item 'NR_MLOCK'. The following is an average
> count of CPU-cycles across the aforementioned system calls:
> 
> 				  Vanilla                 Modified
> 
>   Cycles per syscall              8461                    8690    (+2.6%)
> 
> Signed-off-by: Aaron Tomlin <atomlin@redhat.com>

This is missing your Signed-off-by:

Also can you make this (and also the conditional vmstat_shepherd ignore
nohz_full CPUs and also the in-place local arming) a Kconfig option perhaps?
Something like CONFIG_FLUSH_WORK_ON_RESUME_USER (depend on NO_HZ_FULL). I'm using
"WORK" instead of "VMSTAT" so that we can even add more stuff there later.

This way I'll stop worrying about the potential HPC users who may not care about
the occasional interrupt and prefer to have reasonably fast syscalls.

Then after some time if nobody complains, we can remove the Kconfig entry.

Thanks.

> ---
>  include/linux/tick.h     |    5 +++--
>  kernel/time/tick-sched.c |   15 +++++++++++++++
>  2 files changed, 18 insertions(+), 2 deletions(-)
> 
> Index: linux-2.6/include/linux/tick.h
> ===================================================================
> --- linux-2.6.orig/include/linux/tick.h
> +++ linux-2.6/include/linux/tick.h
> @@ -11,7 +11,6 @@
>  #include <linux/context_tracking_state.h>
>  #include <linux/cpumask.h>
>  #include <linux/sched.h>
> -#include <linux/rcupdate.h>
>  
>  #ifdef CONFIG_GENERIC_CLOCKEVENTS
>  extern void __init tick_init(void);
> @@ -272,6 +271,7 @@ static inline void tick_dep_clear_signal
>  
>  extern void tick_nohz_full_kick_cpu(int cpu);
>  extern void __tick_nohz_task_switch(void);
> +void __tick_nohz_user_enter_prepare(void);
>  extern void __init tick_nohz_full_setup(cpumask_var_t cpumask);
>  #else
>  static inline bool tick_nohz_full_enabled(void) { return false; }
> @@ -296,6 +296,7 @@ static inline void tick_dep_clear_signal
>  
>  static inline void tick_nohz_full_kick_cpu(int cpu) { }
>  static inline void __tick_nohz_task_switch(void) { }
> +static inline void __tick_nohz_user_enter_prepare(void) { }
>  static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { }
>  #endif
>  
> @@ -308,7 +309,7 @@ static inline void tick_nohz_task_switch
>  static inline void tick_nohz_user_enter_prepare(void)
>  {
>  	if (tick_nohz_full_cpu(smp_processor_id()))
> -		rcu_nocb_flush_deferred_wakeup();
> +		__tick_nohz_user_enter_prepare();
>  }
>  
>  #endif
> Index: linux-2.6/kernel/time/tick-sched.c
> ===================================================================
> --- linux-2.6.orig/kernel/time/tick-sched.c
> +++ linux-2.6/kernel/time/tick-sched.c
> @@ -26,6 +26,7 @@
>  #include <linux/posix-timers.h>
>  #include <linux/context_tracking.h>
>  #include <linux/mm.h>
> +#include <linux/rcupdate.h>
>  
>  #include <asm/irq_regs.h>
>  
> @@ -519,6 +520,20 @@ void __tick_nohz_task_switch(void)
>  	}
>  }
>  
> +void __tick_nohz_user_enter_prepare(void)
> +{
> +	struct tick_sched *ts;
> +
> +	if (tick_nohz_full_cpu(smp_processor_id())) {
> +		ts = this_cpu_ptr(&tick_cpu_sched);
> +
> +		if (ts->tick_stopped)
> +			quiet_vmstat(true);
> +		rcu_nocb_flush_deferred_wakeup();
> +	}
> +}
> +EXPORT_SYMBOL_GPL(__tick_nohz_user_enter_prepare);
> +
>  /* Get the boot-time nohz CPU list from the kernel parameters. */
>  void __init tick_nohz_full_setup(cpumask_var_t cpumask)
>  {
> 
>
Frederic Weisbecker Dec. 14, 2022, 1 p.m. UTC | #2
On Tue, Dec 06, 2022 at 01:18:30PM -0300, Marcelo Tosatti wrote:
> From: Aaron Tomlin <atomlin@redhat.com>
> 
> This patch ensures CPU-specific vmstat differentials do not remain
> when the scheduling-tick is stopped and before exiting to user-mode
> in the context of nohz_full only.

Please also don't forget to mention why this change is needed.

Thanks.
diff mbox series

Patch

Index: linux-2.6/include/linux/tick.h
===================================================================
--- linux-2.6.orig/include/linux/tick.h
+++ linux-2.6/include/linux/tick.h
@@ -11,7 +11,6 @@ 
 #include <linux/context_tracking_state.h>
 #include <linux/cpumask.h>
 #include <linux/sched.h>
-#include <linux/rcupdate.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern void __init tick_init(void);
@@ -272,6 +271,7 @@  static inline void tick_dep_clear_signal
 
 extern void tick_nohz_full_kick_cpu(int cpu);
 extern void __tick_nohz_task_switch(void);
+void __tick_nohz_user_enter_prepare(void);
 extern void __init tick_nohz_full_setup(cpumask_var_t cpumask);
 #else
 static inline bool tick_nohz_full_enabled(void) { return false; }
@@ -296,6 +296,7 @@  static inline void tick_dep_clear_signal
 
 static inline void tick_nohz_full_kick_cpu(int cpu) { }
 static inline void __tick_nohz_task_switch(void) { }
+static inline void __tick_nohz_user_enter_prepare(void) { }
 static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { }
 #endif
 
@@ -308,7 +309,7 @@  static inline void tick_nohz_task_switch
 static inline void tick_nohz_user_enter_prepare(void)
 {
 	if (tick_nohz_full_cpu(smp_processor_id()))
-		rcu_nocb_flush_deferred_wakeup();
+		__tick_nohz_user_enter_prepare();
 }
 
 #endif
Index: linux-2.6/kernel/time/tick-sched.c
===================================================================
--- linux-2.6.orig/kernel/time/tick-sched.c
+++ linux-2.6/kernel/time/tick-sched.c
@@ -26,6 +26,7 @@ 
 #include <linux/posix-timers.h>
 #include <linux/context_tracking.h>
 #include <linux/mm.h>
+#include <linux/rcupdate.h>
 
 #include <asm/irq_regs.h>
 
@@ -519,6 +520,20 @@  void __tick_nohz_task_switch(void)
 	}
 }
 
+void __tick_nohz_user_enter_prepare(void)
+{
+	struct tick_sched *ts;
+
+	if (tick_nohz_full_cpu(smp_processor_id())) {
+		ts = this_cpu_ptr(&tick_cpu_sched);
+
+		if (ts->tick_stopped)
+			quiet_vmstat(true);
+		rcu_nocb_flush_deferred_wakeup();
+	}
+}
+EXPORT_SYMBOL_GPL(__tick_nohz_user_enter_prepare);
+
 /* Get the boot-time nohz CPU list from the kernel parameters. */
 void __init tick_nohz_full_setup(cpumask_var_t cpumask)
 {