diff mbox

[v3,5/5] KVM: LAPIC: add APIC Timer periodic/oneshot mode VMX preemption timer support

Message ID 1477304593-3453-6-git-send-email-wanpeng.li@hotmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wanpeng Li Oct. 24, 2016, 10:23 a.m. UTC
From: Wanpeng Li <wanpeng.li@hotmail.com>

Most windows guests still utilize APIC Timer periodic/oneshot mode
instead of tsc-deadline mode, and the APIC Timer periodic/oneshot
mode are still emulated by high overhead hrtimer on host. This patch
converts the expected expire time of the periodic/oneshot mode to
guest deadline tsc in order to leverage VMX preemption timer logic
for APIC Timer tsc-deadline mode. After each preemption timer vmexit
preemption timer is restarted to emulate LVTT current-count register
is automatically reloaded from the initial-count register when the
count reaches 0. This patch reduces ~5600 cycles for each APIC Timer
periodic mode operation virtualization.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
 arch/x86/kvm/lapic.c | 117 ++++++++++++++++++++++++++++++++++++++-------------
 arch/x86/kvm/lapic.h |   1 +
 2 files changed, 89 insertions(+), 29 deletions(-)

Comments

Radim Krčmář Oct. 24, 2016, 2:50 p.m. UTC | #1
2016-10-24 18:23+0800, Wanpeng Li:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> Most windows guests still utilize APIC Timer periodic/oneshot mode
> instead of tsc-deadline mode, and the APIC Timer periodic/oneshot
> mode are still emulated by high overhead hrtimer on host. This patch
> converts the expected expire time of the periodic/oneshot mode to
> guest deadline tsc in order to leverage VMX preemption timer logic
> for APIC Timer tsc-deadline mode. After each preemption timer vmexit
> preemption timer is restarted to emulate LVTT current-count register
> is automatically reloaded from the initial-count register when the
> count reaches 0. This patch reduces ~5600 cycles for each APIC Timer
> periodic mode operation virtualization.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Yunhong Jiang <yunhong.jiang@intel.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
> ---
>  arch/x86/kvm/lapic.c | 117 ++++++++++++++++++++++++++++++++++++++-------------
>  arch/x86/kvm/lapic.h |   1 +
>  2 files changed, 89 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> @@ -2005,8 +2060,12 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
>  
>  	if (atomic_read(&apic->lapic_timer.pending) > 0) {
>  		kvm_apic_local_deliver(apic, APIC_LVTT);
> -		if (apic_lvtt_tscdeadline(apic))
> +		if (apic_lvtt_period(apic))

This should remain apic_lvtt_tscdeadline().  I can change that when
applying to kvm/queue.

> +			apic->lapic_timer.tscdeadline = 0;
> +		if (apic_lvtt_oneshot(apic)) {
>  			apic->lapic_timer.tscdeadline = 0;
> +			apic->lapic_timer.target_expiration = ktime_set(0, 0);
> +		}
>  		atomic_set(&apic->lapic_timer.pending, 0);
>  	}
>  }
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li Oct. 24, 2016, 11:33 p.m. UTC | #2
2016-10-24 22:50 GMT+08:00 Radim Krčmář <rkrcmar@redhat.com>:
> 2016-10-24 18:23+0800, Wanpeng Li:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> Most windows guests still utilize APIC Timer periodic/oneshot mode
>> instead of tsc-deadline mode, and the APIC Timer periodic/oneshot
>> mode are still emulated by high overhead hrtimer on host. This patch
>> converts the expected expire time of the periodic/oneshot mode to
>> guest deadline tsc in order to leverage VMX preemption timer logic
>> for APIC Timer tsc-deadline mode. After each preemption timer vmexit
>> preemption timer is restarted to emulate LVTT current-count register
>> is automatically reloaded from the initial-count register when the
>> count reaches 0. This patch reduces ~5600 cycles for each APIC Timer
>> periodic mode operation virtualization.
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Cc: Yunhong Jiang <yunhong.jiang@intel.com>
>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>> ---
>>  arch/x86/kvm/lapic.c | 117 ++++++++++++++++++++++++++++++++++++++-------------
>>  arch/x86/kvm/lapic.h |   1 +
>>  2 files changed, 89 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>> @@ -2005,8 +2060,12 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
>>
>>       if (atomic_read(&apic->lapic_timer.pending) > 0) {
>>               kvm_apic_local_deliver(apic, APIC_LVTT);
>> -             if (apic_lvtt_tscdeadline(apic))
>> +             if (apic_lvtt_period(apic))
>
> This should remain apic_lvtt_tscdeadline().  I can change that when
> applying to kvm/queue.

Thanks Radim, I make a mistake here.

Regards,
Wanpeng Li

>
>> +                     apic->lapic_timer.tscdeadline = 0;
>> +             if (apic_lvtt_oneshot(apic)) {
>>                       apic->lapic_timer.tscdeadline = 0;
>> +                     apic->lapic_timer.target_expiration = ktime_set(0, 0);
>> +             }
>>               atomic_set(&apic->lapic_timer.pending, 0);
>>       }
>>  }
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0354a79..827ef5d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1090,7 +1090,7 @@  static void apic_send_ipi(struct kvm_lapic *apic)
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
 {
-	ktime_t remaining;
+	ktime_t remaining, now;
 	s64 ns;
 	u32 tmcct;
 
@@ -1101,7 +1101,8 @@  static u32 apic_get_tmcct(struct kvm_lapic *apic)
 		apic->lapic_timer.period == 0)
 		return 0;
 
-	remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
+	now = apic->lapic_timer.timer.base->get_time();
+	remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
 	if (ktime_to_ns(remaining) < 0)
 		remaining = ktime_set(0, 0);
 
@@ -1351,13 +1352,31 @@  static void start_sw_period(struct kvm_lapic *apic)
 {
 	ktime_t now;
 
-	/* lapic timer in oneshot or periodic mode */
 	now = apic->lapic_timer.timer.base->get_time();
-	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
-		    * APIC_BUS_CYCLE_NS * apic->divide_count;
 
 	if (!apic->lapic_timer.period)
 		return;
+
+	if (likely(ktime_after(apic->lapic_timer.target_expiration, now)))
+		hrtimer_start(&apic->lapic_timer.timer,
+			apic->lapic_timer.target_expiration,
+			HRTIMER_MODE_ABS_PINNED);
+	else
+		apic_timer_expired(apic);
+}
+
+static bool set_target_expiration(struct kvm_lapic *apic)
+{
+	ktime_t now;
+	u64 tscl = rdtsc();
+
+	now = apic->lapic_timer.timer.base->get_time();
+	apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
+		* APIC_BUS_CYCLE_NS * apic->divide_count;
+
+	if (!apic->lapic_timer.period)
+		return false;
+
 	/*
 	 * Do not allow the guest to program periodic timers with small
 	 * interval, since the hrtimers are not throttled by the host
@@ -1376,10 +1395,6 @@  static void start_sw_period(struct kvm_lapic *apic)
 		}
 	}
 
-	hrtimer_start(&apic->lapic_timer.timer,
-		      ktime_add_ns(now, apic->lapic_timer.period),
-		      HRTIMER_MODE_ABS_PINNED);
-
 	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
 		   PRIx64 ", "
 		   "timer initial count 0x%x, period %lldns, "
@@ -1389,6 +1404,20 @@  static void start_sw_period(struct kvm_lapic *apic)
 		   apic->lapic_timer.period,
 		   ktime_to_ns(ktime_add_ns(now,
 				apic->lapic_timer.period)));
+
+	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+		nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+	apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
+
+	return true;
+}
+
+static void advance_periodic_target_expiration(struct kvm_lapic *apic)
+{
+	apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, rdtsc()) +
+		nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+	apic->lapic_timer.target_expiration = ktime_add_ns(apic->lapic_timer.timer.base->get_time(),
+		apic->lapic_timer.period);
 }
 
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@@ -1406,22 +1435,12 @@  static void cancel_hv_timer(struct kvm_lapic *apic)
 	apic->lapic_timer.hv_timer_in_use = false;
 }
 
-void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
-{
-	struct kvm_lapic *apic = vcpu->arch.apic;
-
-	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
-	WARN_ON(swait_active(&vcpu->wq));
-	cancel_hv_timer(apic);
-	apic_timer_expired(apic);
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
-
 static bool start_hv_timer(struct kvm_lapic *apic)
 {
 	u64 tscdeadline = apic->lapic_timer.tscdeadline;
 
-	if (atomic_read(&apic->lapic_timer.pending) ||
+	if ((atomic_read(&apic->lapic_timer.pending) &&
+		!apic_lvtt_period(apic)) ||
 		kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
 		if (apic->lapic_timer.hv_timer_in_use)
 			cancel_hv_timer(apic);
@@ -1430,7 +1449,8 @@  static bool start_hv_timer(struct kvm_lapic *apic)
 		hrtimer_cancel(&apic->lapic_timer.timer);
 
 		/* In case the sw timer triggered in the window */
-		if (atomic_read(&apic->lapic_timer.pending))
+		if (atomic_read(&apic->lapic_timer.pending) &&
+			!apic_lvtt_period(apic))
 			cancel_hv_timer(apic);
 	}
 	trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
@@ -1438,14 +1458,43 @@  static bool start_hv_timer(struct kvm_lapic *apic)
 	return apic->lapic_timer.hv_timer_in_use;
 }
 
+void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
+	WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+	WARN_ON(swait_active(&vcpu->wq));
+	cancel_hv_timer(apic);
+	apic_timer_expired(apic);
+
+	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
+		advance_periodic_target_expiration(apic);
+		if (!start_hv_timer(apic))
+			start_sw_period(apic);
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
+
 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	WARN_ON(apic->lapic_timer.hv_timer_in_use);
 
-	if (apic_lvtt_tscdeadline(apic))
-		start_hv_timer(apic);
+	if (apic_lvtt_period(apic)) {
+		ktime_t remaining, now;
+		u64 tscl = rdtsc();
+
+		now = apic->lapic_timer.timer.base->get_time();
+		remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
+		if (ktime_to_ns(remaining) < 0)
+			remaining = ktime_set(0, 0);
+
+		apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+			nsec_to_cycles(apic->vcpu, ktime_to_ns(remaining));
+		apic->lapic_timer.target_expiration = ktime_add_ns(now, ktime_to_ns(remaining));
+	}
+	start_hv_timer(apic);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
 
@@ -1462,7 +1511,10 @@  void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
 	if (atomic_read(&apic->lapic_timer.pending))
 		return;
 
-	start_sw_tscdeadline(apic);
+	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
+		start_sw_period(apic);
+	else if (apic_lvtt_tscdeadline(apic))
+		start_sw_tscdeadline(apic);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
 
@@ -1470,9 +1522,11 @@  static void start_apic_timer(struct kvm_lapic *apic)
 {
 	atomic_set(&apic->lapic_timer.pending, 0);
 
-	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
-		start_sw_period(apic);
-	else if (apic_lvtt_tscdeadline(apic)) {
+	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
+		if (set_target_expiration(apic) &&
+			!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
+			start_sw_period(apic);
+	} else if (apic_lvtt_tscdeadline(apic)) {
 		if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
 			start_sw_tscdeadline(apic);
 	}
@@ -1923,6 +1977,7 @@  static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 	apic_timer_expired(apic);
 
 	if (lapic_is_periodic(apic)) {
+		advance_periodic_target_expiration(apic);
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
 		return HRTIMER_RESTART;
 	} else
@@ -2005,8 +2060,12 @@  void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 
 	if (atomic_read(&apic->lapic_timer.pending) > 0) {
 		kvm_apic_local_deliver(apic, APIC_LVTT);
-		if (apic_lvtt_tscdeadline(apic))
+		if (apic_lvtt_period(apic))
+			apic->lapic_timer.tscdeadline = 0;
+		if (apic_lvtt_oneshot(apic)) {
 			apic->lapic_timer.tscdeadline = 0;
+			apic->lapic_timer.target_expiration = ktime_set(0, 0);
+		}
 		atomic_set(&apic->lapic_timer.pending, 0);
 	}
 }
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 031db26..e0c8023 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -15,6 +15,7 @@ 
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
+	ktime_t target_expiration;
 	u32 timer_mode;
 	u32 timer_mode_mask;
 	u64 tscdeadline;