diff mbox

[2/3] x86: kvmclock: set scheduler clock stable

Message ID 20150528014929.457223842@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Marcelo Tosatti May 28, 2015, 1:46 a.m. UTC
From: Luiz Capitulino <lcapitulino@redhat.com>

If you try to enable NOHZ_FULL on a guest today, you'll get
the following error when the guest tries to deactivate the
scheduler tick:

 WARNING: CPU: 3 PID: 2182 at kernel/time/tick-sched.c:192 can_stop_full_tick+0xb9/0x290()
 NO_HZ FULL will not work with unstable sched clock
 CPU: 3 PID: 2182 Comm: kworker/3:1 Not tainted 4.0.0-10545-gb9bb6fb #204
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
 Workqueue: events flush_to_ldisc
  ffffffff8162a0c7 ffff88011f583e88 ffffffff814e6ba0 0000000000000002
  ffff88011f583ed8 ffff88011f583ec8 ffffffff8104d095 ffff88011f583eb8
  0000000000000000 0000000000000003 0000000000000001 0000000000000001
 Call Trace:
  <IRQ>  [<ffffffff814e6ba0>] dump_stack+0x4f/0x7b
  [<ffffffff8104d095>] warn_slowpath_common+0x85/0xc0
  [<ffffffff8104d146>] warn_slowpath_fmt+0x46/0x50
  [<ffffffff810bd2a9>] can_stop_full_tick+0xb9/0x290
  [<ffffffff810bd9ed>] tick_nohz_irq_exit+0x8d/0xb0
  [<ffffffff810511c5>] irq_exit+0xc5/0x130
  [<ffffffff814f180a>] smp_apic_timer_interrupt+0x4a/0x60
  [<ffffffff814eff5e>] apic_timer_interrupt+0x6e/0x80
  <EOI>  [<ffffffff814ee5d1>] ? _raw_spin_unlock_irqrestore+0x31/0x60
  [<ffffffff8108bbc8>] __wake_up+0x48/0x60
  [<ffffffff8134836c>] n_tty_receive_buf_common+0x49c/0xba0
  [<ffffffff8134a6bf>] ? tty_ldisc_ref+0x1f/0x70
  [<ffffffff81348a84>] n_tty_receive_buf2+0x14/0x20
  [<ffffffff8134b390>] flush_to_ldisc+0xe0/0x120
  [<ffffffff81064d05>] process_one_work+0x1d5/0x540
  [<ffffffff81064c81>] ? process_one_work+0x151/0x540
  [<ffffffff81065191>] worker_thread+0x121/0x470
  [<ffffffff81065070>] ? process_one_work+0x540/0x540
  [<ffffffff8106b4df>] kthread+0xef/0x110
  [<ffffffff8106b3f0>] ? __kthread_parkme+0xa0/0xa0
  [<ffffffff814ef4f2>] ret_from_fork+0x42/0x70
  [<ffffffff8106b3f0>] ? __kthread_parkme+0xa0/0xa0
 ---[ end trace 06e3507544a38866 ]---

However, it turns out that kvmclock does provide a stable
sched_clock callback. So, let the scheduler know this which
in turn makes NOHZ_FULL work in the guest.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>

---
 arch/x86/kernel/kvmclock.c |   17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Paolo Bonzini May 28, 2015, 8:39 a.m. UTC | #1
On 28/05/2015 03:46, Marcelo Tosatti wrote:
> +	flags = PVCLOCK_COUNTS_FROM_ZERO;

If the KVM_FEATURE_CLOCKSOURCE_STABLE_BIT bit is not set, we cannot
trust flags at all.  So let's just do...

>  	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
> -		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
> +		flags |= PVCLOCK_TSC_STABLE_BIT;

-		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
+		pvclock_set_flags(~0);

Otherwise looks good.  Shall I do the above change and apply?

Paolo

> +	pvclock_set_flags(flags);
> +
> +	cpu = get_cpu();
> +	vcpu_time = &hv_clock[cpu].pvti;
> +	flags = pvclock_read_flags(vcpu_time);
> +	if (flags & PVCLOCK_COUNTS_FROM_ZERO)
> +		set_sched_clock_stable();
> +	put_cpu();
>  }
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: kvm/arch/x86/kernel/kvmclock.c
===================================================================
--- kvm.orig/arch/x86/kernel/kvmclock.c	2015-05-27 18:00:53.616391551 -0300
+++ kvm/arch/x86/kernel/kvmclock.c	2015-05-27 22:43:14.474432962 -0300
@@ -24,6 +24,7 @@ 
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/memblock.h>
+#include <linux/sched.h>
 
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
@@ -217,8 +218,10 @@ 
 
 void __init kvmclock_init(void)
 {
+	struct pvclock_vcpu_time_info *vcpu_time;
 	unsigned long mem;
-	int size;
+	int size, cpu;
+	u8 flags;
 
 	size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
 
@@ -263,8 +266,18 @@ 
 	clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
 	pv_info.name = "KVM";
 
+	flags = PVCLOCK_COUNTS_FROM_ZERO;
 	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
-		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
+		flags |= PVCLOCK_TSC_STABLE_BIT;
+
+	pvclock_set_flags(flags);
+
+	cpu = get_cpu();
+	vcpu_time = &hv_clock[cpu].pvti;
+	flags = pvclock_read_flags(vcpu_time);
+	if (flags & PVCLOCK_COUNTS_FROM_ZERO)
+		set_sched_clock_stable();
+	put_cpu();
 }
 
 int __init kvm_setup_vsyscall_timeinfo(void)