diff mbox series

KVM: LAPIC: Reset timer_advance_ns if timer mode switch

Message ID 1598578508-14134-1-git-send-email-wanpengli@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: LAPIC: Reset timer_advance_ns if timer mode switch | expand

Commit Message

Wanpeng Li Aug. 28, 2020, 1:35 a.m. UTC
From: Wanpeng Li <wanpengli@tencent.com>

per-vCPU timer_advance_ns should be set to 0 if timer mode is not tscdeadline 
otherwise we waste cpu cycles in the function lapic_timer_int_injected(), 
especially on AMD platform which doesn't support tscdeadline mode. We can 
reset timer_advance_ns to the initial value if switch back to tscdealine 
timer mode.

Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/lapic.c | 6 ++++++
 1 file changed, 6 insertions(+)

Comments

Vitaly Kuznetsov Aug. 31, 2020, 12:48 p.m. UTC | #1
Wanpeng Li <kernellwp@gmail.com> writes:

> From: Wanpeng Li <wanpengli@tencent.com>
>
> per-vCPU timer_advance_ns should be set to 0 if timer mode is not tscdeadline 
> otherwise we waste cpu cycles in the function lapic_timer_int_injected(), 

lapic_timer_int_injected is just a test, kvm_wait_lapic_expire()
(__kvm_wait_lapic_expire()) maybe?

> especially on AMD platform which doesn't support tscdeadline mode. We can 
> reset timer_advance_ns to the initial value if switch back to
> tscdealine

'tscdeadline'

> timer mode.
>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/kvm/lapic.c | 6 ++++++
>  1 file changed, 6 insertions(+)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 654649b..abc296d 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1499,10 +1499,16 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
>  			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
>  			apic->lapic_timer.period = 0;
>  			apic->lapic_timer.tscdeadline = 0;
> +			if (timer_mode == APIC_LVT_TIMER_TSCDEADLINE &&
> +				lapic_timer_advance_dynamic)
> +				apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
>  		}
>  		apic->lapic_timer.timer_mode = timer_mode;
>  		limit_periodic_timer_frequency(apic);
>  	}
> +	if (timer_mode != APIC_LVT_TIMER_TSCDEADLINE &&
> +		lapic_timer_advance_dynamic)
> +		apic->lapic_timer.timer_advance_ns = 0;
>  }
>  
>  /*
Wanpeng Li Sept. 1, 2020, 3:25 a.m. UTC | #2
On Mon, 31 Aug 2020 at 20:48, Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
> Wanpeng Li <kernellwp@gmail.com> writes:
>
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > per-vCPU timer_advance_ns should be set to 0 if timer mode is not tscdeadline
> > otherwise we waste cpu cycles in the function lapic_timer_int_injected(),
>
> lapic_timer_int_injected is just a test, kvm_wait_lapic_expire()
> (__kvm_wait_lapic_expire()) maybe?

Both the check in lapic_timer_int_injected(), the check in
__kvm_wait_lapic_expire(), and these function calls, we can observe
~1.3% world switch time reduce w/ this patch by
kvm-unit-tests/vmexit.flat vmcall testing on AMD server. In addition,
I think we should set apic->lapic_timer.expired_tscdeadline to 0 when
switching between tscdeadline mode and other modes on Intel in order
that we will not waste cpu cycles to tune advance value in
adjust_lapic_timer_advance() for one time.

Wanpeng
Sean Christopherson Sept. 2, 2020, 9:23 p.m. UTC | #3
On Fri, Aug 28, 2020 at 09:35:08AM +0800, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> per-vCPU timer_advance_ns should be set to 0 if timer mode is not tscdeadline 
> otherwise we waste cpu cycles in the function lapic_timer_int_injected(), 
> especially on AMD platform which doesn't support tscdeadline mode. We can 
> reset timer_advance_ns to the initial value if switch back to tscdealine 
> timer mode.
> 
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/kvm/lapic.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 654649b..abc296d 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1499,10 +1499,16 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
>  			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
>  			apic->lapic_timer.period = 0;
>  			apic->lapic_timer.tscdeadline = 0;
> +			if (timer_mode == APIC_LVT_TIMER_TSCDEADLINE &&
> +				lapic_timer_advance_dynamic)

Bad indentation.

> +				apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;

Redoing the tuning seems odd.  Doubt it will matter, but it feels weird to
have to retune the advancement just because the guest toggled between modes.

Rather than clear timer_advance_ns, can we simply move the check against
apic->lapic_timer.expired_tscdeadline much earlier?  I think that would
solve this performance hiccup, and IMO would be a logical change in any
case.  E.g. with some refactoring to avoid more duplication between VMX and
SVM:

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 35cca2e0c8026..54222f0071547 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1571,12 +1571,12 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
        apic->lapic_timer.timer_advance_ns = timer_advance_ns;
 }

-static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
        u64 guest_tsc, tsc_deadline;

-       if (apic->lapic_timer.expired_tscdeadline == 0)
+       if (!lapic_timer_int_injected(vcpu))
                return;

        tsc_deadline = apic->lapic_timer.expired_tscdeadline;
@@ -1590,13 +1590,7 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
        if (lapic_timer_advance_dynamic)
                adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
 }
-
-void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
-{
-       if (lapic_timer_int_injected(vcpu))
-               __kvm_wait_lapic_expire(vcpu);
-}
-EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
+EXPORT_SYMBOL_GPL(__kvm_wait_lapic_expire);

 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
 {
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 754f29beb83e3..64be9d751196a 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -236,7 +236,14 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)

 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);

-void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
+void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
+static inline void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+{
+       if (lapic_in_kernel(vcpu) &&
+           vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
+           vcpu->arch.apic->lapic_timer.timer_advance_ns)
+               __kvm_wait_lapic_expire(vcpu);
+}

 void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
                              unsigned long *vcpu_bitmap);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index eee7edcbe7491..dfe505a7304a3 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3456,9 +3456,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
        clgi();
        kvm_load_guest_xsave_state(vcpu);

-       if (lapic_in_kernel(vcpu) &&
-               vcpu->arch.apic->lapic_timer.timer_advance_ns)
-               kvm_wait_lapic_expire(vcpu);
+       kvm_wait_lapic_expire(vcpu);

        /*
         * If this vCPU has touched SPEC_CTRL, restore the guest's value if


>  		}
>  		apic->lapic_timer.timer_mode = timer_mode;
>  		limit_periodic_timer_frequency(apic);
>  	}
> +	if (timer_mode != APIC_LVT_TIMER_TSCDEADLINE &&
> +		lapic_timer_advance_dynamic)

Bad indentation.

> +		apic->lapic_timer.timer_advance_ns = 0;
>  }
>  
>  /*
> -- 
> 2.7.4
>
Wanpeng Li Sept. 3, 2020, 10:57 a.m. UTC | #4
On Thu, 3 Sep 2020 at 05:23, Sean Christopherson
<sean.j.christopherson@intel.com> wrote:
>
> On Fri, Aug 28, 2020 at 09:35:08AM +0800, Wanpeng Li wrote:
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > per-vCPU timer_advance_ns should be set to 0 if timer mode is not tscdeadline
> > otherwise we waste cpu cycles in the function lapic_timer_int_injected(),
> > especially on AMD platform which doesn't support tscdeadline mode. We can
> > reset timer_advance_ns to the initial value if switch back to tscdealine
> > timer mode.
> >
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> >  arch/x86/kvm/lapic.c | 6 ++++++
> >  1 file changed, 6 insertions(+)
> >
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 654649b..abc296d 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -1499,10 +1499,16 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
> >                       kvm_lapic_set_reg(apic, APIC_TMICT, 0);
> >                       apic->lapic_timer.period = 0;
> >                       apic->lapic_timer.tscdeadline = 0;
> > +                     if (timer_mode == APIC_LVT_TIMER_TSCDEADLINE &&
> > +                             lapic_timer_advance_dynamic)
>
> Bad indentation.
>
> > +                             apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
>
> Redoing the tuning seems odd.  Doubt it will matter, but it feels weird to
> have to retune the advancement just because the guest toggled between modes.
>
> Rather than clear timer_advance_ns, can we simply move the check against
> apic->lapic_timer.expired_tscdeadline much earlier?  I think that would
> solve this performance hiccup, and IMO would be a logical change in any
> case.  E.g. with some refactoring to avoid more duplication between VMX and
> SVM

How about something like below:

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3b32d3b..51ed4f0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1582,9 +1582,6 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
     struct kvm_lapic *apic = vcpu->arch.apic;
     u64 guest_tsc, tsc_deadline;

-    if (apic->lapic_timer.expired_tscdeadline == 0)
-        return;
-
     tsc_deadline = apic->lapic_timer.expired_tscdeadline;
     apic->lapic_timer.expired_tscdeadline = 0;
     guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
@@ -1599,7 +1596,10 @@ static void __kvm_wait_lapic_expire(struct
kvm_vcpu *vcpu)

 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 {
-    if (lapic_timer_int_injected(vcpu))
+    if (lapic_in_kernel(vcpu) &&
+        vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
+        vcpu->arch.apic->lapic_timer.timer_advance_ns &&
+        lapic_timer_int_injected(vcpu))
         __kvm_wait_lapic_expire(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
@@ -1635,8 +1635,7 @@ static void apic_timer_expired(struct kvm_lapic
*apic, bool from_timer_fn)
     }

     if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
-        if (apic->lapic_timer.timer_advance_ns)
-            __kvm_wait_lapic_expire(vcpu);
+        kvm_wait_lapic_expire(vcpu);
         kvm_apic_inject_pending_timer_irqs(apic);
         return;
     }
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0194336..19e622a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3456,9 +3456,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct
kvm_vcpu *vcpu)
     clgi();
     kvm_load_guest_xsave_state(vcpu);

-    if (lapic_in_kernel(vcpu) &&
-        vcpu->arch.apic->lapic_timer.timer_advance_ns)
-        kvm_wait_lapic_expire(vcpu);
+    kvm_wait_lapic_expire(vcpu);

     /*
      * If this vCPU has touched SPEC_CTRL, restore the guest's value if
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a544351..d6e1656 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6800,9 +6800,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
     if (enable_preemption_timer)
         vmx_update_hv_timer(vcpu);

-    if (lapic_in_kernel(vcpu) &&
-        vcpu->arch.apic->lapic_timer.timer_advance_ns)
-        kvm_wait_lapic_expire(vcpu);
+    kvm_wait_lapic_expire(vcpu);

     /*
      * If this vCPU has touched SPEC_CTRL, restore the guest's value if
Sean Christopherson Sept. 4, 2020, 4:06 p.m. UTC | #5
On Thu, Sep 03, 2020 at 06:57:00PM +0800, Wanpeng Li wrote:
> On Thu, 3 Sep 2020 at 05:23, Sean Christopherson
> <sean.j.christopherson@intel.com> wrote:
> >
> > On Fri, Aug 28, 2020 at 09:35:08AM +0800, Wanpeng Li wrote:
> > > From: Wanpeng Li <wanpengli@tencent.com>
> > >
> > > per-vCPU timer_advance_ns should be set to 0 if timer mode is not tscdeadline
> > > otherwise we waste cpu cycles in the function lapic_timer_int_injected(),
> > > especially on AMD platform which doesn't support tscdeadline mode. We can
> > > reset timer_advance_ns to the initial value if switch back to tscdealine
> > > timer mode.
> > >
> > > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > > ---
> > >  arch/x86/kvm/lapic.c | 6 ++++++
> > >  1 file changed, 6 insertions(+)
> > >
> > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > > index 654649b..abc296d 100644
> > > --- a/arch/x86/kvm/lapic.c
> > > +++ b/arch/x86/kvm/lapic.c
> > > @@ -1499,10 +1499,16 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
> > >                       kvm_lapic_set_reg(apic, APIC_TMICT, 0);
> > >                       apic->lapic_timer.period = 0;
> > >                       apic->lapic_timer.tscdeadline = 0;
> > > +                     if (timer_mode == APIC_LVT_TIMER_TSCDEADLINE &&
> > > +                             lapic_timer_advance_dynamic)
> >
> > Bad indentation.
> >
> > > +                             apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
> >
> > Redoing the tuning seems odd.  Doubt it will matter, but it feels weird to
> > have to retune the advancement just because the guest toggled between modes.
> >
> > Rather than clear timer_advance_ns, can we simply move the check against
> > apic->lapic_timer.expired_tscdeadline much earlier?  I think that would
> > solve this performance hiccup, and IMO would be a logical change in any
> > case.  E.g. with some refactoring to avoid more duplication between VMX and
> > SVM
> 
> How about something like below:

That works too.  The only reason I used the inline shenanigans was to avoid
the CALL+RET in VM-Enter when the timer hasn't expired.

> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 3b32d3b..51ed4f0 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1582,9 +1582,6 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
>      struct kvm_lapic *apic = vcpu->arch.apic;
>      u64 guest_tsc, tsc_deadline;
> 
> -    if (apic->lapic_timer.expired_tscdeadline == 0)
> -        return;
> -
>      tsc_deadline = apic->lapic_timer.expired_tscdeadline;
>      apic->lapic_timer.expired_tscdeadline = 0;
>      guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> @@ -1599,7 +1596,10 @@ static void __kvm_wait_lapic_expire(struct
> kvm_vcpu *vcpu)
> 
>  void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
>  {
> -    if (lapic_timer_int_injected(vcpu))
> +    if (lapic_in_kernel(vcpu) &&
> +        vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
> +        vcpu->arch.apic->lapic_timer.timer_advance_ns &&
> +        lapic_timer_int_injected(vcpu))
>          __kvm_wait_lapic_expire(vcpu);
>  }
>  EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
> @@ -1635,8 +1635,7 @@ static void apic_timer_expired(struct kvm_lapic
> *apic, bool from_timer_fn)
>      }
> 
>      if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
> -        if (apic->lapic_timer.timer_advance_ns)
> -            __kvm_wait_lapic_expire(vcpu);
> +        kvm_wait_lapic_expire(vcpu);
>          kvm_apic_inject_pending_timer_irqs(apic);
>          return;
>      }
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 0194336..19e622a 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3456,9 +3456,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct
> kvm_vcpu *vcpu)
>      clgi();
>      kvm_load_guest_xsave_state(vcpu);
> 
> -    if (lapic_in_kernel(vcpu) &&
> -        vcpu->arch.apic->lapic_timer.timer_advance_ns)
> -        kvm_wait_lapic_expire(vcpu);
> +    kvm_wait_lapic_expire(vcpu);
> 
>      /*
>       * If this vCPU has touched SPEC_CTRL, restore the guest's value if
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index a544351..d6e1656 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6800,9 +6800,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>      if (enable_preemption_timer)
>          vmx_update_hv_timer(vcpu);
> 
> -    if (lapic_in_kernel(vcpu) &&
> -        vcpu->arch.apic->lapic_timer.timer_advance_ns)
> -        kvm_wait_lapic_expire(vcpu);
> +    kvm_wait_lapic_expire(vcpu);
> 
>      /*
>       * If this vCPU has touched SPEC_CTRL, restore the guest's value if
diff mbox series

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 654649b..abc296d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1499,10 +1499,16 @@  static void apic_update_lvtt(struct kvm_lapic *apic)
 			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
 			apic->lapic_timer.period = 0;
 			apic->lapic_timer.tscdeadline = 0;
+			if (timer_mode == APIC_LVT_TIMER_TSCDEADLINE &&
+				lapic_timer_advance_dynamic)
+				apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
 		}
 		apic->lapic_timer.timer_mode = timer_mode;
 		limit_periodic_timer_frequency(apic);
 	}
+	if (timer_mode != APIC_LVT_TIMER_TSCDEADLINE &&
+		lapic_timer_advance_dynamic)
+		apic->lapic_timer.timer_advance_ns = 0;
 }
 
 /*