diff mbox series

[v3,5/5] KVM: LAPIC: Narrow the timer latency between wait_lapic_expire and world switch

Message ID 1621260028-6467-5-git-send-email-wanpengli@tencent.com (mailing list archive)
State New, archived
Headers show
Series [v3,1/5] KVM: exit halt polling on need_resched() for both book3s and generic halt-polling | expand

Commit Message

Wanpeng Li May 17, 2021, 2 p.m. UTC
From: Wanpeng Li <wanpengli@tencent.com>

Let's treat lapic_timer_advance_ns automatically tune logic as hypervisor
overhead, move it before wait_lapic_expire instead of between wait_lapic_expire 
and the world switch, the wait duration should be calculated by the 
up-to-date guest_tsc after the overhead of automatically tune logic. This 
patch reduces ~30+ cycles for kvm-unit-tests/tscdeadline-latency when testing 
busy waits.

Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kvm/lapic.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

Comments

Sean Christopherson May 17, 2021, 5:51 p.m. UTC | #1
On Mon, May 17, 2021, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> Let's treat lapic_timer_advance_ns automatically tune logic as hypervisor
> overhead, move it before wait_lapic_expire instead of between wait_lapic_expire 
> and the world switch, the wait duration should be calculated by the 
> up-to-date guest_tsc after the overhead of automatically tune logic. This 
> patch reduces ~30+ cycles for kvm-unit-tests/tscdeadline-latency when testing 
> busy waits.
> 
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/kvm/lapic.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index c0ebef560bd1..552d2acf89ab 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1598,11 +1598,12 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
>  	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>  	apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
>  
> -	if (guest_tsc < tsc_deadline)
> -		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
> -
>  	if (lapic_timer_advance_dynamic)
>  		adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
> +
> +	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());

This is redundant and unnecessary if automatic tuning is disabled, or if the
timer did not arrive early.  A comment would also be helpful.  E.g. I think this
would micro-optimize all paths:

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c0ebef560bd1..5d91f2367c31 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1598,11 +1598,19 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
        guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
        apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;

+       if (lapic_timer_advance_dynamic) {
+               adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
+               /*
+                * If the timer fired early, reread the TSC to account for the
+                * overhead of the above adjustment to avoid waiting longer
+                * than is necessary.
+                */
+               if (guest_tsc < tsc_deadline)
+                       guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+       }
+
        if (guest_tsc < tsc_deadline)
                __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
-
-       if (lapic_timer_advance_dynamic)
-               adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
 }

 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)

> +	if (guest_tsc < tsc_deadline)
> +		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
>  }
>  
>  void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
> -- 
> 2.25.1
>
Wanpeng Li May 18, 2021, 12:03 p.m. UTC | #2
On Tue, 18 May 2021 at 01:51, Sean Christopherson <seanjc@google.com> wrote:
>
> On Mon, May 17, 2021, Wanpeng Li wrote:
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > Let's treat lapic_timer_advance_ns automatically tune logic as hypervisor
> > overhead, move it before wait_lapic_expire instead of between wait_lapic_expire
> > and the world switch, the wait duration should be calculated by the
> > up-to-date guest_tsc after the overhead of automatically tune logic. This
> > patch reduces ~30+ cycles for kvm-unit-tests/tscdeadline-latency when testing
> > busy waits.
> >
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> >  arch/x86/kvm/lapic.c | 7 ++++---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index c0ebef560bd1..552d2acf89ab 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -1598,11 +1598,12 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
> >       guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> >       apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
> >
> > -     if (guest_tsc < tsc_deadline)
> > -             __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
> > -
> >       if (lapic_timer_advance_dynamic)
> >               adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
> > +
> > +     guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>
> This is redundant and unnecessary if automatic tuning is disabled, or if the
> timer did not arrive early.  A comment would also be helpful.  E.g. I think this
> would micro-optimize all paths:

Do it in v4, thanks.

    Wanpeng

>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index c0ebef560bd1..5d91f2367c31 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1598,11 +1598,19 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
>         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
>         apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
>
> +       if (lapic_timer_advance_dynamic) {
> +               adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
> +               /*
> +                * If the timer fired early, reread the TSC to account for the
> +                * overhead of the above adjustment to avoid waiting longer
> +                * than is necessary.
> +                */
> +               if (guest_tsc < tsc_deadline)
> +                       guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
> +       }
> +
>         if (guest_tsc < tsc_deadline)
>                 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
> -
> -       if (lapic_timer_advance_dynamic)
> -               adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
>  }
>
>  void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
>
> > +     if (guest_tsc < tsc_deadline)
> > +             __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
> >  }
> >
> >  void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
> > --
> > 2.25.1
> >
diff mbox series

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c0ebef560bd1..552d2acf89ab 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1598,11 +1598,12 @@  static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
 	apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
 
-	if (guest_tsc < tsc_deadline)
-		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
-
 	if (lapic_timer_advance_dynamic)
 		adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
+
+	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+	if (guest_tsc < tsc_deadline)
+		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
 }
 
 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)