diff mbox series

sched: Don't define sched_clock_irqtime as static key

Message ID 20250126065037.97935-1-laoar.shao@gmail.com (mailing list archive)
State New
Headers show
Series sched: Don't define sched_clock_irqtime as static key | expand

Commit Message

Yafang Shao Jan. 26, 2025, 6:50 a.m. UTC
The sched_clock_irqtime was defined as a static key in commit 8722903cbb8f
('sched: Define sched_clock_irqtime as static key'). However, this change
introduces a 'sleeping in atomic context' warning, as shown below:

	arch/x86/kernel/tsc.c:1214 mark_tsc_unstable()
	warn: sleeping in atomic context

As analyzed by Dan, the affected code path is as follows:

vcpu_load() <- disables preempt
-> kvm_arch_vcpu_load()
   -> mark_tsc_unstable() <- sleeps

virt/kvm/kvm_main.c
   166  void vcpu_load(struct kvm_vcpu *vcpu)
   167  {
   168          int cpu = get_cpu();
                          ^^^^^^^^^^
This get_cpu() disables preemption.

   169
   170          __this_cpu_write(kvm_running_vcpu, vcpu);
   171          preempt_notifier_register(&vcpu->preempt_notifier);
   172          kvm_arch_vcpu_load(vcpu, cpu);
   173          put_cpu();
   174  }

arch/x86/kvm/x86.c
  4979          if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
  4980                  s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
  4981                                  rdtsc() - vcpu->arch.last_host_tsc;
  4982                  if (tsc_delta < 0)
  4983                          mark_tsc_unstable("KVM discovered backwards TSC");

arch/x86/kernel/tsc.c
    1206 void mark_tsc_unstable(char *reason)
    1207 {
    1208         if (tsc_unstable)
    1209                 return;
    1210
    1211         tsc_unstable = 1;
    1212         if (using_native_sched_clock())
    1213                 clear_sched_clock_stable();
--> 1214         disable_sched_clock_irqtime();
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
kernel/jump_label.c
   245  void static_key_disable(struct static_key *key)
   246  {
   247          cpus_read_lock();
                ^^^^^^^^^^^^^^^^
This lock has a might_sleep() in it which triggers the static checker
warning.

   248          static_key_disable_cpuslocked(key);
   249          cpus_read_unlock();
   250  }

Let revert this change for now as {disable,enable}_sched_clock_irqtime
are used in many places, as pointed out by Sean, including the following:

The code path in clocksource_watchdog():

  clocksource_watchdog()
  |
  -> spin_lock(&watchdog_lock);
     |
     -> __clocksource_unstable()
        |
        -> clocksource.mark_unstable() == tsc_cs_mark_unstable()
           |
           -> disable_sched_clock_irqtime()

And the code path in sched_clock_register():

	/* Cannot register a sched_clock with interrupts on */
	local_irq_save(flags);

	...

	/* Enable IRQ time accounting if we have a fast enough sched_clock() */
	if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
		enable_sched_clock_irqtime();

	local_irq_restore(flags);

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Closes: https://lore.kernel.org/kvm/37a79ba3-9ce0-479c-a5b0-2bd75d573ed3@stanley.mountain/
Debugged-by: Dan Carpenter <dan.carpenter@linaro.org>
Debugged-by: Sean Christopherson <seanjc@google.com>
Debugged-by: Michal Koutný <mkoutny@suse.com>
Fixes: 8722903cbb8f ("sched: Define sched_clock_irqtime as static key")
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Peter Zijlstra" <peterz@infradead.org>
Cc: Vincent Guittot" <vincent.guittot@linaro.org>
---
 kernel/sched/cputime.c | 8 ++++----
 kernel/sched/sched.h   | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

Comments

Vincent Guittot Jan. 27, 2025, 1:13 p.m. UTC | #1
On Sun, 26 Jan 2025 at 07:50, Yafang Shao <laoar.shao@gmail.com> wrote:
>
> The sched_clock_irqtime was defined as a static key in commit 8722903cbb8f
> ('sched: Define sched_clock_irqtime as static key'). However, this change
> introduces a 'sleeping in atomic context' warning, as shown below:
>
>         arch/x86/kernel/tsc.c:1214 mark_tsc_unstable()
>         warn: sleeping in atomic context
>
> As analyzed by Dan, the affected code path is as follows:
>
> vcpu_load() <- disables preempt
> -> kvm_arch_vcpu_load()
>    -> mark_tsc_unstable() <- sleeps
>
> virt/kvm/kvm_main.c
>    166  void vcpu_load(struct kvm_vcpu *vcpu)
>    167  {
>    168          int cpu = get_cpu();
>                           ^^^^^^^^^^
> This get_cpu() disables preemption.
>
>    169
>    170          __this_cpu_write(kvm_running_vcpu, vcpu);
>    171          preempt_notifier_register(&vcpu->preempt_notifier);
>    172          kvm_arch_vcpu_load(vcpu, cpu);
>    173          put_cpu();
>    174  }
>
> arch/x86/kvm/x86.c
>   4979          if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
>   4980                  s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
>   4981                                  rdtsc() - vcpu->arch.last_host_tsc;
>   4982                  if (tsc_delta < 0)
>   4983                          mark_tsc_unstable("KVM discovered backwards TSC");
>
> arch/x86/kernel/tsc.c
>     1206 void mark_tsc_unstable(char *reason)
>     1207 {
>     1208         if (tsc_unstable)
>     1209                 return;
>     1210
>     1211         tsc_unstable = 1;
>     1212         if (using_native_sched_clock())
>     1213                 clear_sched_clock_stable();
> --> 1214         disable_sched_clock_irqtime();
>                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> kernel/jump_label.c
>    245  void static_key_disable(struct static_key *key)
>    246  {
>    247          cpus_read_lock();
>                 ^^^^^^^^^^^^^^^^
> This lock has a might_sleep() in it which triggers the static checker
> warning.
>
>    248          static_key_disable_cpuslocked(key);
>    249          cpus_read_unlock();
>    250  }
>
> Let revert this change for now as {disable,enable}_sched_clock_irqtime
> are used in many places, as pointed out by Sean, including the following:
>
> The code path in clocksource_watchdog():
>
>   clocksource_watchdog()
>   |
>   -> spin_lock(&watchdog_lock);
>      |
>      -> __clocksource_unstable()
>         |
>         -> clocksource.mark_unstable() == tsc_cs_mark_unstable()
>            |
>            -> disable_sched_clock_irqtime()
>
> And the code path in sched_clock_register():
>
>         /* Cannot register a sched_clock with interrupts on */
>         local_irq_save(flags);
>
>         ...
>
>         /* Enable IRQ time accounting if we have a fast enough sched_clock() */
>         if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
>                 enable_sched_clock_irqtime();
>
>         local_irq_restore(flags);
>
> Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
> Closes: https://lore.kernel.org/kvm/37a79ba3-9ce0-479c-a5b0-2bd75d573ed3@stanley.mountain/
> Debugged-by: Dan Carpenter <dan.carpenter@linaro.org>
> Debugged-by: Sean Christopherson <seanjc@google.com>
> Debugged-by: Michal Koutný <mkoutny@suse.com>
> Fixes: 8722903cbb8f ("sched: Define sched_clock_irqtime as static key")
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> Cc: Dan Carpenter <dan.carpenter@linaro.org>
> Cc: Sean Christopherson <seanjc@google.com>
> Cc: Michal Koutný <mkoutny@suse.com>
> Cc: Peter Zijlstra" <peterz@infradead.org>
> Cc: Vincent Guittot" <vincent.guittot@linaro.org>

I overlooked that this was used under atomic context

Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---
>  kernel/sched/cputime.c | 8 ++++----
>  kernel/sched/sched.h   | 4 ++--
>  2 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index 5d9143dd0879..c7904ce2345a 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -9,8 +9,6 @@
>
>  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
>
> -DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
> -
>  /*
>   * There are no locks covering percpu hardirq/softirq time.
>   * They are only modified in vtime_account, on corresponding CPU
> @@ -24,14 +22,16 @@ DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
>   */
>  DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
>
> +static int sched_clock_irqtime;
> +
>  void enable_sched_clock_irqtime(void)
>  {
> -       static_branch_enable(&sched_clock_irqtime);
> +       sched_clock_irqtime = 1;
>  }
>
>  void disable_sched_clock_irqtime(void)
>  {
> -       static_branch_disable(&sched_clock_irqtime);
> +       sched_clock_irqtime = 0;
>  }
>
>  static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 38e0e323dda2..ab16d3d0e51c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -3259,11 +3259,11 @@ struct irqtime {
>  };
>
>  DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
> -DECLARE_STATIC_KEY_FALSE(sched_clock_irqtime);
> +extern int sched_clock_irqtime;
>
>  static inline int irqtime_enabled(void)
>  {
> -       return static_branch_likely(&sched_clock_irqtime);
> +       return sched_clock_irqtime;
>  }
>
>  /*
> --
> 2.43.5
>
diff mbox series

Patch

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 5d9143dd0879..c7904ce2345a 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -9,8 +9,6 @@ 
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 
-DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
-
 /*
  * There are no locks covering percpu hardirq/softirq time.
  * They are only modified in vtime_account, on corresponding CPU
@@ -24,14 +22,16 @@  DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
  */
 DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
 
+static int sched_clock_irqtime;
+
 void enable_sched_clock_irqtime(void)
 {
-	static_branch_enable(&sched_clock_irqtime);
+	sched_clock_irqtime = 1;
 }
 
 void disable_sched_clock_irqtime(void)
 {
-	static_branch_disable(&sched_clock_irqtime);
+	sched_clock_irqtime = 0;
 }
 
 static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 38e0e323dda2..ab16d3d0e51c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3259,11 +3259,11 @@  struct irqtime {
 };
 
 DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-DECLARE_STATIC_KEY_FALSE(sched_clock_irqtime);
+extern int sched_clock_irqtime;
 
 static inline int irqtime_enabled(void)
 {
-	return static_branch_likely(&sched_clock_irqtime);
+	return sched_clock_irqtime;
 }
 
 /*