diff mbox series

[v3,5/5] KVM: LAPIC: Optimize timer latency further

Message ID 1557975980-9875-6-git-send-email-wanpengli@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: LAPIC: Optimize timer latency further | expand

Commit Message

Wanpeng Li May 16, 2019, 3:06 a.m. UTC
From: Wanpeng Li <wanpengli@tencent.com>

Advance lapic timer tries to hidden the hypervisor overhead between the 
host emulated timer fires and the guest awares the timer is fired. However, 
it just hidden the time between apic_timer_fn/handle_preemption_timer -> 
wait_lapic_expire, instead of the real position of vmentry which is 
mentioned in the orignial commit d0659d946be0 ("KVM: x86: add option to 
advance tscdeadline hrtimer expiration"). There is 700+ cpu cycles between 
the end of wait_lapic_expire and before world switch on my haswell desktop.

This patch tries to narrow the last gap(wait_lapic_expire -> world switch), 
it takes the real overhead time between apic_timer_fn/handle_preemption_timer
and before world switch into consideration when adaptively tuning timer 
advancement. The patch can reduce 40% latency (~1600+ cycles to ~1000+ cycles 
on a haswell desktop) for kvm-unit-tests/tscdeadline_latency when testing 
busy waits.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/lapic.c   | 3 ++-
 arch/x86/kvm/lapic.h   | 2 +-
 arch/x86/kvm/svm.c     | 4 ++++
 arch/x86/kvm/vmx/vmx.c | 4 ++++
 arch/x86/kvm/x86.c     | 3 ---
 5 files changed, 11 insertions(+), 5 deletions(-)

Comments

Sean Christopherson May 17, 2019, 7:50 p.m. UTC | #1
On Thu, May 16, 2019 at 11:06:20AM +0800, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> Advance lapic timer tries to hidden the hypervisor overhead between the 
> host emulated timer fires and the guest awares the timer is fired. However, 
> it just hidden the time between apic_timer_fn/handle_preemption_timer -> 
> wait_lapic_expire, instead of the real position of vmentry which is 
> mentioned in the orignial commit d0659d946be0 ("KVM: x86: add option to 
> advance tscdeadline hrtimer expiration"). There is 700+ cpu cycles between 
> the end of wait_lapic_expire and before world switch on my haswell desktop.
> 
> This patch tries to narrow the last gap(wait_lapic_expire -> world switch), 
> it takes the real overhead time between apic_timer_fn/handle_preemption_timer
> and before world switch into consideration when adaptively tuning timer 
> advancement. The patch can reduce 40% latency (~1600+ cycles to ~1000+ cycles 
> on a haswell desktop) for kvm-unit-tests/tscdeadline_latency when testing 
> busy waits.
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Cc: Liran Alon <liran.alon@oracle.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/kvm/lapic.c   | 3 ++-
>  arch/x86/kvm/lapic.h   | 2 +-
>  arch/x86/kvm/svm.c     | 4 ++++
>  arch/x86/kvm/vmx/vmx.c | 4 ++++
>  arch/x86/kvm/x86.c     | 3 ---
>  5 files changed, 11 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index af38ece..63513de 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1531,7 +1531,7 @@ static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
>  	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
>  }
>  
> -void wait_lapic_expire(struct kvm_vcpu *vcpu)
> +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  	u64 guest_tsc, tsc_deadline;
> @@ -1553,6 +1553,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
>  	if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
>  		adaptive_tune_timer_advancement(vcpu, apic->lapic_timer.advance_expire_delta);
>  }
> +EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
>  
>  static void start_sw_tscdeadline(struct kvm_lapic *apic)
>  {
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 3e72a25..f974a3d 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -220,7 +220,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
>  
>  bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>  
> -void wait_lapic_expire(struct kvm_vcpu *vcpu);
> +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
>  
>  bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>  			struct kvm_vcpu **dest_vcpu);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 6b92eaf..955cfcb 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -5638,6 +5638,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
>  	clgi();
>  	kvm_load_guest_xcr0(vcpu);
>  
> +	if (lapic_in_kernel(vcpu) &&
> +		vcpu->arch.apic->lapic_timer.timer_advance_ns)

Nit: align the two lines of the if statement, doing so makes it easier to
     differentiate between the condition and execution, e.g.:

        if (lapic_in_kernel(vcpu) &&
            vcpu->arch.apic->lapic_timer.timer_advance_ns)
                kvm_wait_lapic_expire(vcpu);

> +		kvm_wait_lapic_expire(vcpu);
> +
>  	/*
>  	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
>  	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index e1fa935..771d3bf 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6423,6 +6423,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  
>  	vmx_update_hv_timer(vcpu);
>  
> +	if (lapic_in_kernel(vcpu) &&
> +		vcpu->arch.apic->lapic_timer.timer_advance_ns)
> +		kvm_wait_lapic_expire(vcpu);

Same comment as above.  With those fixed:

Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>

> +
>  	/*
>  	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
>  	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4a7b00c..e154f52 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7903,9 +7903,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	}
>  
>  	trace_kvm_entry(vcpu->vcpu_id);
> -	if (lapic_in_kernel(vcpu) &&
> -	    vcpu->arch.apic->lapic_timer.timer_advance_ns)
> -		wait_lapic_expire(vcpu);
>  	guest_enter_irqoff();
>  
>  	fpregs_assert_state_consistent();
> -- 
> 2.7.4
>
Wanpeng Li May 20, 2019, 8:19 a.m. UTC | #2
On Sat, 18 May 2019 at 03:50, Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Thu, May 16, 2019 at 11:06:20AM +0800, Wanpeng Li wrote:
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > Advance lapic timer tries to hidden the hypervisor overhead between the
> > host emulated timer fires and the guest awares the timer is fired. However,
> > it just hidden the time between apic_timer_fn/handle_preemption_timer ->
> > wait_lapic_expire, instead of the real position of vmentry which is
> > mentioned in the orignial commit d0659d946be0 ("KVM: x86: add option to
> > advance tscdeadline hrtimer expiration"). There is 700+ cpu cycles between
> > the end of wait_lapic_expire and before world switch on my haswell desktop.
> >
> > This patch tries to narrow the last gap(wait_lapic_expire -> world switch),
> > it takes the real overhead time between apic_timer_fn/handle_preemption_timer
> > and before world switch into consideration when adaptively tuning timer
> > advancement. The patch can reduce 40% latency (~1600+ cycles to ~1000+ cycles
> > on a haswell desktop) for kvm-unit-tests/tscdeadline_latency when testing
> > busy waits.
> >
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Radim Krčmář <rkrcmar@redhat.com>
> > Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> > Cc: Liran Alon <liran.alon@oracle.com>
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> >  arch/x86/kvm/lapic.c   | 3 ++-
> >  arch/x86/kvm/lapic.h   | 2 +-
> >  arch/x86/kvm/svm.c     | 4 ++++
> >  arch/x86/kvm/vmx/vmx.c | 4 ++++
> >  arch/x86/kvm/x86.c     | 3 ---
> >  5 files changed, 11 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index af38ece..63513de 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -1531,7 +1531,7 @@ static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
> >       apic->lapic_timer.timer_advance_ns = timer_advance_ns;
> >  }
> >
> > -void wait_lapic_expire(struct kvm_vcpu *vcpu)
> > +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
> >  {
> >       struct kvm_lapic *apic = vcpu->arch.apic;
> >       u64 guest_tsc, tsc_deadline;
> > @@ -1553,6 +1553,7 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
> >       if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
> >               adaptive_tune_timer_advancement(vcpu, apic->lapic_timer.advance_expire_delta);
> >  }
> > +EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
> >
> >  static void start_sw_tscdeadline(struct kvm_lapic *apic)
> >  {
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 3e72a25..f974a3d 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -220,7 +220,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
> >
> >  bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
> >
> > -void wait_lapic_expire(struct kvm_vcpu *vcpu);
> > +void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
> >
> >  bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
> >                       struct kvm_vcpu **dest_vcpu);
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 6b92eaf..955cfcb 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -5638,6 +5638,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> >       clgi();
> >       kvm_load_guest_xcr0(vcpu);
> >
> > +     if (lapic_in_kernel(vcpu) &&
> > +             vcpu->arch.apic->lapic_timer.timer_advance_ns)
>
> Nit: align the two lines of the if statement, doing so makes it easier to
>      differentiate between the condition and execution, e.g.:
>
>         if (lapic_in_kernel(vcpu) &&
>             vcpu->arch.apic->lapic_timer.timer_advance_ns)
>                 kvm_wait_lapic_expire(vcpu);

This can result in checkpatch.pl complain:

WARNING: suspect code indent for conditional statements (8, 24)
#94: FILE: arch/x86/kvm/vmx/vmx.c:6436:
+    if (lapic_in_kernel(vcpu) &&
[...]
+            kvm_wait_lapic_expire(vcpu);

Regards,
Wanpeng Li

>
> > +             kvm_wait_lapic_expire(vcpu);
> > +
> >       /*
> >        * If this vCPU has touched SPEC_CTRL, restore the guest's value if
> >        * it's non-zero. Since vmentry is serialising on affected CPUs, there
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index e1fa935..771d3bf 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -6423,6 +6423,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
> >
> >       vmx_update_hv_timer(vcpu);
> >
> > +     if (lapic_in_kernel(vcpu) &&
> > +             vcpu->arch.apic->lapic_timer.timer_advance_ns)
> > +             kvm_wait_lapic_expire(vcpu);
>
> Same comment as above.  With those fixed:
>
> Reviewed-by: Sean Christopherson <sean.j.christopherson@intel.com>
>
> > +
> >       /*
> >        * If this vCPU has touched SPEC_CTRL, restore the guest's value if
> >        * it's non-zero. Since vmentry is serialising on affected CPUs, there
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 4a7b00c..e154f52 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -7903,9 +7903,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
> >       }
> >
> >       trace_kvm_entry(vcpu->vcpu_id);
> > -     if (lapic_in_kernel(vcpu) &&
> > -         vcpu->arch.apic->lapic_timer.timer_advance_ns)
> > -             wait_lapic_expire(vcpu);
> >       guest_enter_irqoff();
> >
> >       fpregs_assert_state_consistent();
> > --
> > 2.7.4
> >
Sean Christopherson May 20, 2019, 3:08 p.m. UTC | #3
On Mon, May 20, 2019 at 04:19:47PM +0800, Wanpeng Li wrote:
> On Sat, 18 May 2019 at 03:50, Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> >
> > On Thu, May 16, 2019 at 11:06:20AM +0800, Wanpeng Li wrote:
> > > From: Wanpeng Li <wanpengli@tencent.com>
> > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > > index 6b92eaf..955cfcb 100644
> > > --- a/arch/x86/kvm/svm.c
> > > +++ b/arch/x86/kvm/svm.c
> > > @@ -5638,6 +5638,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> > >       clgi();
> > >       kvm_load_guest_xcr0(vcpu);
> > >
> > > +     if (lapic_in_kernel(vcpu) &&
> > > +             vcpu->arch.apic->lapic_timer.timer_advance_ns)
> >
> > Nit: align the two lines of the if statement, doing so makes it easier to
> >      differentiate between the condition and execution, e.g.:
> >
> >         if (lapic_in_kernel(vcpu) &&
> >             vcpu->arch.apic->lapic_timer.timer_advance_ns)
> >                 kvm_wait_lapic_expire(vcpu);
> 
> This can result in checkpatch.pl complain:
> 
> WARNING: suspect code indent for conditional statements (8, 24)
> #94: FILE: arch/x86/kvm/vmx/vmx.c:6436:
> +    if (lapic_in_kernel(vcpu) &&
> [...]
> +            kvm_wait_lapic_expire(vcpu);

That warning fires when the last line of the check and the code block of
the if statement are aligned (and the indent isn't a full tab stop, which
is why your original code isn't flagged).  Examples with explicit leading
whitespace:

Good:

\tif (lapic_in_kernel(vcpu) &&
\t\s\s\s\svcpu->arch.apic->lapic_timer.timer_advance_ns)
\t\tkvm_wait_lapic_expire(vcpu);

Bad:

\tif (lapic_in_kernel(vcpu) &&
\t\s\s\s\svcpu->arch.apic->lapic_timer.timer_advance_ns)
\t\s\s\s\skvm_wait_lapic_expire(vcpu);
diff mbox series

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index af38ece..63513de 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1531,7 +1531,7 @@  static inline void adaptive_tune_timer_advancement(struct kvm_vcpu *vcpu,
 	apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 }
 
-void wait_lapic_expire(struct kvm_vcpu *vcpu)
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u64 guest_tsc, tsc_deadline;
@@ -1553,6 +1553,7 @@  void wait_lapic_expire(struct kvm_vcpu *vcpu)
 	if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
 		adaptive_tune_timer_advancement(vcpu, apic->lapic_timer.advance_expire_delta);
 }
+EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
 
 static void start_sw_tscdeadline(struct kvm_lapic *apic)
 {
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 3e72a25..f974a3d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -220,7 +220,7 @@  static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
 
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
-void wait_lapic_expire(struct kvm_vcpu *vcpu);
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6b92eaf..955cfcb 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5638,6 +5638,10 @@  static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	clgi();
 	kvm_load_guest_xcr0(vcpu);
 
+	if (lapic_in_kernel(vcpu) &&
+		vcpu->arch.apic->lapic_timer.timer_advance_ns)
+		kvm_wait_lapic_expire(vcpu);
+
 	/*
 	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
 	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e1fa935..771d3bf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6423,6 +6423,10 @@  static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	vmx_update_hv_timer(vcpu);
 
+	if (lapic_in_kernel(vcpu) &&
+		vcpu->arch.apic->lapic_timer.timer_advance_ns)
+		kvm_wait_lapic_expire(vcpu);
+
 	/*
 	 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
 	 * it's non-zero. Since vmentry is serialising on affected CPUs, there
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4a7b00c..e154f52 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7903,9 +7903,6 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	trace_kvm_entry(vcpu->vcpu_id);
-	if (lapic_in_kernel(vcpu) &&
-	    vcpu->arch.apic->lapic_timer.timer_advance_ns)
-		wait_lapic_expire(vcpu);
 	guest_enter_irqoff();
 
 	fpregs_assert_state_consistent();