diff mbox

KVM: lapic: remove timer spikes on target expectation

Message ID 20180421163042.14093-1-anthoine.bourgeois@blade-group.com (mailing list archive)
State New, archived
Headers show

Commit Message

Anthoine Bourgeois April 21, 2018, 4:30 p.m. UTC
Since the commit "8003c9ae204e: add APIC Timer periodic/oneshot mode VMX
preemption timer support", a Windows 10 guest has some erratic timer
spikes after few hours.  As the uptime of the VM grows the spikes are
larger.

Here the results on a 150000 times 1ms timer without any load:
	  Before 8003c9ae204e | After 8003c9ae204e
Max           1834us          |  86000us
Mean          1100us          |   1021us
Deviation       59us          |    149us
Here the results on a 150000 times 1ms timer with a cpu-z stress test:
	  Before 8003c9ae204e | After 8003c9ae204e
Max          32000us          | 140000us
Mean          1006us          |   1997us
Deviation      140us          |  11095us

The current patch partially revert the previous commit by removing the
target timer expectation to go back to the straight hrtimer calls.  The
APIC Timer periodic/oneshot mode support is kept because it is necessary
on the new Windows Spring update.

Signed-off-by: Anthoine Bourgeois <anthoine.bourgeois@blade-group.com>
---
 arch/x86/kvm/lapic.c | 57 ++++++++++++++++++++++++----------------------------
 arch/x86/kvm/lapic.h |  1 -
 2 files changed, 26 insertions(+), 32 deletions(-)

Comments

Mika Penttilä April 21, 2018, 6:26 p.m. UTC | #1
On 21.04.2018 19:30, Anthoine Bourgeois wrote:
> Since the commit "8003c9ae204e: add APIC Timer periodic/oneshot mode VMX
> preemption timer support", a Windows 10 guest has some erratic timer
> spikes after few hours.  As the uptime of the VM grows the spikes are
> larger.
>
> Here the results on a 150000 times 1ms timer without any load:
> 	  Before 8003c9ae204e | After 8003c9ae204e
> Max           1834us          |  86000us
> Mean          1100us          |   1021us
> Deviation       59us          |    149us
> Here the results on a 150000 times 1ms timer with a cpu-z stress test:
> 	  Before 8003c9ae204e | After 8003c9ae204e
> Max          32000us          | 140000us
> Mean          1006us          |   1997us
> Deviation      140us          |  11095us
>
> The current patch partially revert the previous commit by removing the
> target timer expectation to go back to the straight hrtimer calls.  The
> APIC Timer periodic/oneshot mode support is kept because it is necessary
> on the new Windows Spring update.
>
> Signed-off-by: Anthoine Bourgeois <anthoine.bourgeois@blade-group.com>
> ---
>  arch/x86/kvm/lapic.c | 57 ++++++++++++++++++++++++----------------------------
>  arch/x86/kvm/lapic.h |  1 -
>  2 files changed, 26 insertions(+), 32 deletions(-)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 70dcb5548022..d88cf12259ef 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1173,7 +1173,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
>  
>  static u32 apic_get_tmcct(struct kvm_lapic *apic)
>  {
> -	ktime_t remaining, now;
> +	ktime_t remaining;
>  	s64 ns;
>  	u32 tmcct;
>  
> @@ -1184,8 +1184,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
>  		apic->lapic_timer.period == 0)
>  		return 0;
>  
> -	now = ktime_get();
> -	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
> +	remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
>  	if (ktime_to_ns(remaining) < 0)
>  		remaining = 0;
>  
> @@ -1465,32 +1464,48 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
>  
>  static void start_sw_period(struct kvm_lapic *apic)
>  {
> +	ktime_t now;
> +
> +	/* lapic timer in oneshot or periodic mode */
> +	now = ktime_get();
> +	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
> +		    * APIC_BUS_CYCLE_NS * apic->divide_count;
> +
>  	if (!apic->lapic_timer.period)
>  		return;
>  
> -	if (apic_lvtt_oneshot(apic) &&
> -	    ktime_after(ktime_get(),
> -			apic->lapic_timer.target_expiration)) {
> +	limit_periodic_timer_frequency(apic);
> +
> +	if (apic_lvtt_oneshot(apic)) {
>  		apic_timer_expired(apic);
>  		return;
>  	}
>  

So in oneshot is mode hrtimer is never armed?





..Mika
Wanpeng Li April 22, 2018, 12:49 a.m. UTC | #2
2018-04-22 0:30 GMT+08:00 Anthoine Bourgeois
<anthoine.bourgeois@blade-group.com>:
> Since the commit "8003c9ae204e: add APIC Timer periodic/oneshot mode VMX
> preemption timer support", a Windows 10 guest has some erratic timer
> spikes after few hours.  As the uptime of the VM grows the spikes are
> larger.
>
> Here the results on a 150000 times 1ms timer without any load:
>           Before 8003c9ae204e | After 8003c9ae204e
> Max           1834us          |  86000us
> Mean          1100us          |   1021us
> Deviation       59us          |    149us
> Here the results on a 150000 times 1ms timer with a cpu-z stress test:
>           Before 8003c9ae204e | After 8003c9ae204e
> Max          32000us          | 140000us
> Mean          1006us          |   1997us
> Deviation      140us          |  11095us
>

In your testing, preemption_timer kvm_intel parameter is N, right? In
addition, could you post the testcase?

Regards,
Wanpeng Li

> The current patch partially revert the previous commit by removing the
> target timer expectation to go back to the straight hrtimer calls.  The
> APIC Timer periodic/oneshot mode support is kept because it is necessary
> on the new Windows Spring update.
>
> Signed-off-by: Anthoine Bourgeois <anthoine.bourgeois@blade-group.com>
> ---
>  arch/x86/kvm/lapic.c | 57 ++++++++++++++++++++++++----------------------------
>  arch/x86/kvm/lapic.h |  1 -
>  2 files changed, 26 insertions(+), 32 deletions(-)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 70dcb5548022..d88cf12259ef 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1173,7 +1173,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
>
>  static u32 apic_get_tmcct(struct kvm_lapic *apic)
>  {
> -       ktime_t remaining, now;
> +       ktime_t remaining;
>         s64 ns;
>         u32 tmcct;
>
> @@ -1184,8 +1184,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
>                 apic->lapic_timer.period == 0)
>                 return 0;
>
> -       now = ktime_get();
> -       remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
> +       remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
>         if (ktime_to_ns(remaining) < 0)
>                 remaining = 0;
>
> @@ -1465,32 +1464,48 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
>
>  static void start_sw_period(struct kvm_lapic *apic)
>  {
> +       ktime_t now;
> +
> +       /* lapic timer in oneshot or periodic mode */
> +       now = ktime_get();
> +       apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
> +                   * APIC_BUS_CYCLE_NS * apic->divide_count;
> +
>         if (!apic->lapic_timer.period)
>                 return;
>
> -       if (apic_lvtt_oneshot(apic) &&
> -           ktime_after(ktime_get(),
> -                       apic->lapic_timer.target_expiration)) {
> +       limit_periodic_timer_frequency(apic);
> +
> +       if (apic_lvtt_oneshot(apic)) {
>                 apic_timer_expired(apic);
>                 return;
>         }
>
>         hrtimer_start(&apic->lapic_timer.timer,
> -               apic->lapic_timer.target_expiration,
> +               ktime_add_ns(now, apic->lapic_timer.period),
>                 HRTIMER_MODE_ABS_PINNED);
> +
> +       apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
> +                  PRIx64 ", "
> +                  "timer initial count 0x%x, period %lldns, "
> +                  "expire @ 0x%016" PRIx64 ".\n", __func__,
> +                  APIC_BUS_CYCLE_NS, ktime_to_ns(now),
> +                  kvm_lapic_get_reg(apic, APIC_TMICT),
> +                  apic->lapic_timer.period,
> +                  ktime_to_ns(ktime_add_ns(now,
> +                               apic->lapic_timer.period)));
>  }
>
>  static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
>  {
> -       ktime_t now, remaining;
> +       ktime_t remaining;
>         u64 ns_remaining_old, ns_remaining_new;
>
>         apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
>                 * APIC_BUS_CYCLE_NS * apic->divide_count;
>         limit_periodic_timer_frequency(apic);
>
> -       now = ktime_get();
> -       remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
> +       remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
>         if (ktime_to_ns(remaining) < 0)
>                 remaining = 0;
>
> @@ -1501,15 +1516,10 @@ static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_diviso
>         apic->lapic_timer.tscdeadline +=
>                 nsec_to_cycles(apic->vcpu, ns_remaining_new) -
>                 nsec_to_cycles(apic->vcpu, ns_remaining_old);
> -       apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
>  }
>
>  static bool set_target_expiration(struct kvm_lapic *apic)
>  {
> -       ktime_t now;
> -       u64 tscl = rdtsc();
> -
> -       now = ktime_get();
>         apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
>                 * APIC_BUS_CYCLE_NS * apic->divide_count;
>
> @@ -1520,19 +1530,8 @@ static bool set_target_expiration(struct kvm_lapic *apic)
>
>         limit_periodic_timer_frequency(apic);
>
> -       apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
> -                  PRIx64 ", "
> -                  "timer initial count 0x%x, period %lldns, "
> -                  "expire @ 0x%016" PRIx64 ".\n", __func__,
> -                  APIC_BUS_CYCLE_NS, ktime_to_ns(now),
> -                  kvm_lapic_get_reg(apic, APIC_TMICT),
> -                  apic->lapic_timer.period,
> -                  ktime_to_ns(ktime_add_ns(now,
> -                               apic->lapic_timer.period)));
> -
> -       apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
> +       apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, rdtsc()) +
>                 nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
> -       apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
>
>         return true;
>  }
> @@ -1541,9 +1540,6 @@ static void advance_periodic_target_expiration(struct kvm_lapic *apic)
>  {
>         apic->lapic_timer.tscdeadline +=
>                 nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
> -       apic->lapic_timer.target_expiration =
> -               ktime_add_ns(apic->lapic_timer.target_expiration,
> -                               apic->lapic_timer.period);
>  }
>
>  bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
> @@ -2216,7 +2212,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
>                         apic->lapic_timer.tscdeadline = 0;
>                 if (apic_lvtt_oneshot(apic)) {
>                         apic->lapic_timer.tscdeadline = 0;
> -                       apic->lapic_timer.target_expiration = 0;
>                 }
>                 atomic_set(&apic->lapic_timer.pending, 0);
>         }
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index edce055e9fd7..56823b159e9b 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -19,7 +19,6 @@
>  struct kvm_timer {
>         struct hrtimer timer;
>         s64 period;                             /* unit: ns */
> -       ktime_t target_expiration;
>         u32 timer_mode;
>         u32 timer_mode_mask;
>         u64 tscdeadline;
> --
> 2.11.0
>
Anthoine Bourgeois April 22, 2018, 1:15 a.m. UTC | #3
On Sun, Apr 22, 2018 at 08:49:06AM +0800, Wanpeng Li wrote:
>2018-04-22 0:30 GMT+08:00 Anthoine Bourgeois
><anthoine.bourgeois@blade-group.com>:
>> Since the commit "8003c9ae204e: add APIC Timer periodic/oneshot mode VMX
>> preemption timer support", a Windows 10 guest has some erratic timer
>> spikes after few hours.  As the uptime of the VM grows the spikes are
>> larger.
>>
>> Here the results on a 150000 times 1ms timer without any load:
>>           Before 8003c9ae204e | After 8003c9ae204e
>> Max           1834us          |  86000us
>> Mean          1100us          |   1021us
>> Deviation       59us          |    149us
>> Here the results on a 150000 times 1ms timer with a cpu-z stress test:
>>           Before 8003c9ae204e | After 8003c9ae204e
>> Max          32000us          | 140000us
>> Mean          1006us          |   1997us
>> Deviation      140us          |  11095us
>>
>
>In your testing, preemption_timer kvm_intel parameter is N, right? In
>addition, could you post the testcase?
>
The preemption_timer is set to Y.

The testcase is an windows app that stream sound. The kernel upgrade
trigger lots of sound crackling that get worst over the time. I suspect
that there are other side effect on the guest but the sound was the most
obvious on our workload.

The numbers in the commit message came from a simple test program that
loop on a 1ms sleep calls to show more formally the problem.

Best regards,
Anthoine
Wanpeng Li April 22, 2018, 5:56 a.m. UTC | #4
2018-04-22 9:15 GMT+08:00 Anthoine Bourgeois
<anthoine.bourgeois@blade-group.com>:
> On Sun, Apr 22, 2018 at 08:49:06AM +0800, Wanpeng Li wrote:
>>
>> 2018-04-22 0:30 GMT+08:00 Anthoine Bourgeois
>> <anthoine.bourgeois@blade-group.com>:
>>>
>>> Since the commit "8003c9ae204e: add APIC Timer periodic/oneshot mode VMX
>>> preemption timer support", a Windows 10 guest has some erratic timer
>>> spikes after few hours.  As the uptime of the VM grows the spikes are
>>> larger.
>>>
>>> Here the results on a 150000 times 1ms timer without any load:
>>>           Before 8003c9ae204e | After 8003c9ae204e
>>> Max           1834us          |  86000us
>>> Mean          1100us          |   1021us
>>> Deviation       59us          |    149us
>>> Here the results on a 150000 times 1ms timer with a cpu-z stress test:
>>>           Before 8003c9ae204e | After 8003c9ae204e
>>> Max          32000us          | 140000us
>>> Mean          1006us          |   1997us
>>> Deviation      140us          |  11095us
>>>
>>
>> In your testing, preemption_timer kvm_intel parameter is N, right? In
>> addition, could you post the testcase?
>>
> The preemption_timer is set to Y.
>
> The testcase is an windows app that stream sound. The kernel upgrade
> trigger lots of sound crackling that get worst over the time. I suspect
> that there are other side effect on the guest but the sound was the most
> obvious on our workload.
>
> The numbers in the commit message came from a simple test program that
> loop on a 1ms sleep calls to show more formally the problem.

Please explain the root cause why target_expiration results in spike?

Regards,
Wanpeng Li
diff mbox

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 70dcb5548022..d88cf12259ef 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1173,7 +1173,7 @@  static void apic_send_ipi(struct kvm_lapic *apic)
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
 {
-	ktime_t remaining, now;
+	ktime_t remaining;
 	s64 ns;
 	u32 tmcct;
 
@@ -1184,8 +1184,7 @@  static u32 apic_get_tmcct(struct kvm_lapic *apic)
 		apic->lapic_timer.period == 0)
 		return 0;
 
-	now = ktime_get();
-	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
+	remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
 	if (ktime_to_ns(remaining) < 0)
 		remaining = 0;
 
@@ -1465,32 +1464,48 @@  static void start_sw_tscdeadline(struct kvm_lapic *apic)
 
 static void start_sw_period(struct kvm_lapic *apic)
 {
+	ktime_t now;
+
+	/* lapic timer in oneshot or periodic mode */
+	now = ktime_get();
+	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
+		    * APIC_BUS_CYCLE_NS * apic->divide_count;
+
 	if (!apic->lapic_timer.period)
 		return;
 
-	if (apic_lvtt_oneshot(apic) &&
-	    ktime_after(ktime_get(),
-			apic->lapic_timer.target_expiration)) {
+	limit_periodic_timer_frequency(apic);
+
+	if (apic_lvtt_oneshot(apic)) {
 		apic_timer_expired(apic);
 		return;
 	}
 
 	hrtimer_start(&apic->lapic_timer.timer,
-		apic->lapic_timer.target_expiration,
+		ktime_add_ns(now, apic->lapic_timer.period),
 		HRTIMER_MODE_ABS_PINNED);
+
+	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
+		   PRIx64 ", "
+		   "timer initial count 0x%x, period %lldns, "
+		   "expire @ 0x%016" PRIx64 ".\n", __func__,
+		   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
+		   kvm_lapic_get_reg(apic, APIC_TMICT),
+		   apic->lapic_timer.period,
+		   ktime_to_ns(ktime_add_ns(now,
+				apic->lapic_timer.period)));
 }
 
 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
 {
-	ktime_t now, remaining;
+	ktime_t remaining;
 	u64 ns_remaining_old, ns_remaining_new;
 
 	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
 		* APIC_BUS_CYCLE_NS * apic->divide_count;
 	limit_periodic_timer_frequency(apic);
 
-	now = ktime_get();
-	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
+	remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
 	if (ktime_to_ns(remaining) < 0)
 		remaining = 0;
 
@@ -1501,15 +1516,10 @@  static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_diviso
 	apic->lapic_timer.tscdeadline +=
 		nsec_to_cycles(apic->vcpu, ns_remaining_new) -
 		nsec_to_cycles(apic->vcpu, ns_remaining_old);
-	apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
 }
 
 static bool set_target_expiration(struct kvm_lapic *apic)
 {
-	ktime_t now;
-	u64 tscl = rdtsc();
-
-	now = ktime_get();
 	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
 		* APIC_BUS_CYCLE_NS * apic->divide_count;
 
@@ -1520,19 +1530,8 @@  static bool set_target_expiration(struct kvm_lapic *apic)
 
 	limit_periodic_timer_frequency(apic);
 
-	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
-		   PRIx64 ", "
-		   "timer initial count 0x%x, period %lldns, "
-		   "expire @ 0x%016" PRIx64 ".\n", __func__,
-		   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
-		   kvm_lapic_get_reg(apic, APIC_TMICT),
-		   apic->lapic_timer.period,
-		   ktime_to_ns(ktime_add_ns(now,
-				apic->lapic_timer.period)));
-
-	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, rdtsc()) +
 		nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
-	apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
 
 	return true;
 }
@@ -1541,9 +1540,6 @@  static void advance_periodic_target_expiration(struct kvm_lapic *apic)
 {
 	apic->lapic_timer.tscdeadline +=
 		nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
-	apic->lapic_timer.target_expiration =
-		ktime_add_ns(apic->lapic_timer.target_expiration,
-				apic->lapic_timer.period);
 }
 
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@@ -2216,7 +2212,6 @@  void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 			apic->lapic_timer.tscdeadline = 0;
 		if (apic_lvtt_oneshot(apic)) {
 			apic->lapic_timer.tscdeadline = 0;
-			apic->lapic_timer.target_expiration = 0;
 		}
 		atomic_set(&apic->lapic_timer.pending, 0);
 	}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index edce055e9fd7..56823b159e9b 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -19,7 +19,6 @@ 
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
-	ktime_t target_expiration;
 	u32 timer_mode;
 	u32 timer_mode_mask;
 	u64 tscdeadline;