Message ID | 1560474949-20497-2-git-send-email-wanpengli@tencent.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v4,1/2] KVM: LAPIC: Optimize timer latency consider world switch time | expand |
ping, On Fri, 14 Jun 2019 at 09:15, Wanpeng Li <kernellwp@gmail.com> wrote: > > From: Wanpeng Li <wanpengli@tencent.com> > > Advance lapic timer tries to hidden the hypervisor overhead between the > host emulated timer fires and the guest awares the timer is fired. However, > even though after more sustaining optimizations, kvm-unit-tests/tscdeadline_latency > still awares ~1000 cycles latency since we lost the time between the end of > wait_lapic_expire and the guest awares the timer is fired. There are > codes between the end of wait_lapic_expire and the world switch, furthermore, > the world switch itself also has overhead. Actually the guest_tsc is equal > to the target deadline time in wait_lapic_expire is too late, guest will > aware the latency between the end of wait_lapic_expire() and after vmentry > to the guest. This patch takes this time into consideration. > > The vmentry_advance_ns module parameter is conservative 25ns by default(thanks > to Radim's kvm-unit-tests/vmentry_latency.flat), it can be tuned/reworked in > the future. > > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Radim Krčmář <rkrcmar@redhat.com> > Cc: Sean Christopherson <sean.j.christopherson@intel.com> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com> > --- > v3 -> v4: > * default value is 25ns > * compute vmentry_advance_cycles in kvm_set_tsc_khz() path > v2 -> v3: > * read-only module parameter > * get_vmentry_advance_cycles() not inline > v1 -> v2: > * rename get_vmentry_advance_delta to get_vmentry_advance_cycles > * cache vmentry_advance_cycles by setting param bit 0 > * add param max limit > > arch/x86/kvm/lapic.c | 21 ++++++++++++++++++--- > arch/x86/kvm/lapic.h | 2 ++ > arch/x86/kvm/vmx/vmx.c | 3 ++- > arch/x86/kvm/x86.c | 12 ++++++++++-- > arch/x86/kvm/x86.h | 2 ++ > 5 files changed, 34 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index e82a18c..e92e4e5 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -1528,6 +1528,19 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, > apic->lapic_timer.timer_advance_ns = timer_advance_ns; > } > > +u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu) > +{ > + u64 cycles; > + struct kvm_lapic *apic = vcpu->arch.apic; > + > + cycles = vmentry_advance_ns * vcpu->arch.virtual_tsc_khz; > + do_div(cycles, 1000000); > + > + apic->lapic_timer.vmentry_advance_cycles = cycles; > + > + return cycles; > +} > + > void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) > { > struct kvm_lapic *apic = vcpu->arch.apic; > @@ -1541,7 +1554,8 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) > > tsc_deadline = apic->lapic_timer.expired_tscdeadline; > apic->lapic_timer.expired_tscdeadline = 0; > - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); > + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) + > + apic->lapic_timer.vmentry_advance_cycles; > apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; > > if (guest_tsc < tsc_deadline) > @@ -1569,7 +1583,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) > local_irq_save(flags); > > now = ktime_get(); > - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); > + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) + > + apic->lapic_timer.vmentry_advance_cycles; > > ns = (tscdeadline - guest_tsc) * 1000000ULL; > do_div(ns, this_tsc_khz); > @@ -2326,7 +2341,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) > apic->lapic_timer.timer_advance_ns = timer_advance_ns; > apic->lapic_timer.timer_advance_adjust_done = true; > } > - > + apic->lapic_timer.vmentry_advance_cycles = 0; > > /* > * APIC is created enabled. This will prevent kvm_lapic_set_base from > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index 3674717..7c38950 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -33,6 +33,7 @@ struct kvm_timer { > u64 expired_tscdeadline; > u32 timer_advance_ns; > s64 advance_expire_delta; > + u64 vmentry_advance_cycles; > atomic_t pending; /* accumulated triggered timers */ > bool hv_timer_in_use; > bool timer_advance_adjust_done; > @@ -226,6 +227,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) > bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); > > void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); > +u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu); > > bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, > struct kvm_vcpu **dest_vcpu); > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index 8fbea03..dc81c78 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -7064,7 +7064,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, > > vmx = to_vmx(vcpu); > tscl = rdtsc(); > - guest_tscl = kvm_read_l1_tsc(vcpu, tscl); > + guest_tscl = kvm_read_l1_tsc(vcpu, tscl) + > + vcpu->arch.apic->lapic_timer.vmentry_advance_cycles; > delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; > lapic_timer_advance_cycles = nsec_to_cycles(vcpu, > ktimer->timer_advance_ns); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 0a05a4e..5e79b6c 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -145,6 +145,12 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); > static int __read_mostly lapic_timer_advance_ns = -1; > module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR); > > +/* > + * lapic timer vmentry advance (tscdeadline mode only) in nanoseconds. > + */ > +u32 __read_mostly vmentry_advance_ns = 25; > +module_param(vmentry_advance_ns, uint, S_IRUGO); > + > static bool __read_mostly vector_hashing = true; > module_param(vector_hashing, bool, S_IRUGO); > > @@ -1592,6 +1598,8 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) > kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC, > &vcpu->arch.virtual_tsc_shift, > &vcpu->arch.virtual_tsc_mult); > + if (user_tsc_khz != vcpu->arch.virtual_tsc_khz) > + compute_vmentry_advance_cycles(vcpu); > vcpu->arch.virtual_tsc_khz = user_tsc_khz; > > /* > @@ -9134,8 +9142,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > } > vcpu->arch.pio_data = page_address(page); > > - kvm_set_tsc_khz(vcpu, max_tsc_khz); > - > r = kvm_mmu_create(vcpu); > if (r < 0) > goto fail_free_pio_data; > @@ -9148,6 +9154,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > } else > static_key_slow_inc(&kvm_no_apic_vcpu); > > + kvm_set_tsc_khz(vcpu, max_tsc_khz); > + > vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, > GFP_KERNEL_ACCOUNT); > if (!vcpu->arch.mce_banks) { > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h > index e08a128..9998989 100644 > --- a/arch/x86/kvm/x86.h > +++ b/arch/x86/kvm/x86.h > @@ -299,6 +299,8 @@ extern u64 kvm_supported_xcr0(void); > > extern unsigned int min_timer_period_us; > > +extern unsigned int vmentry_advance_ns; > + > extern bool enable_vmware_backdoor; > > extern struct static_key kvm_no_apic_vcpu; > -- > 2.7.4 >
ping again, On Fri, 21 Jun 2019 at 17:44, Wanpeng Li <kernellwp@gmail.com> wrote: > > ping, > On Fri, 14 Jun 2019 at 09:15, Wanpeng Li <kernellwp@gmail.com> wrote: > > > > From: Wanpeng Li <wanpengli@tencent.com> > > > > Advance lapic timer tries to hidden the hypervisor overhead between the > > host emulated timer fires and the guest awares the timer is fired. However, > > even though after more sustaining optimizations, kvm-unit-tests/tscdeadline_latency > > still awares ~1000 cycles latency since we lost the time between the end of > > wait_lapic_expire and the guest awares the timer is fired. There are > > codes between the end of wait_lapic_expire and the world switch, furthermore, > > the world switch itself also has overhead. Actually the guest_tsc is equal > > to the target deadline time in wait_lapic_expire is too late, guest will > > aware the latency between the end of wait_lapic_expire() and after vmentry > > to the guest. This patch takes this time into consideration. > > > > The vmentry_advance_ns module parameter is conservative 25ns by default(thanks > > to Radim's kvm-unit-tests/vmentry_latency.flat), it can be tuned/reworked in > > the future. > > > > Cc: Paolo Bonzini <pbonzini@redhat.com> > > Cc: Radim Krčmář <rkrcmar@redhat.com> > > Cc: Sean Christopherson <sean.j.christopherson@intel.com> > > Signed-off-by: Wanpeng Li <wanpengli@tencent.com> > > --- > > v3 -> v4: > > * default value is 25ns > > * compute vmentry_advance_cycles in kvm_set_tsc_khz() path > > v2 -> v3: > > * read-only module parameter > > * get_vmentry_advance_cycles() not inline > > v1 -> v2: > > * rename get_vmentry_advance_delta to get_vmentry_advance_cycles > > * cache vmentry_advance_cycles by setting param bit 0 > > * add param max limit > > > > arch/x86/kvm/lapic.c | 21 ++++++++++++++++++--- > > arch/x86/kvm/lapic.h | 2 ++ > > arch/x86/kvm/vmx/vmx.c | 3 ++- > > arch/x86/kvm/x86.c | 12 ++++++++++-- > > arch/x86/kvm/x86.h | 2 ++ > > 5 files changed, 34 insertions(+), 6 deletions(-) > > > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > index e82a18c..e92e4e5 100644 > > --- a/arch/x86/kvm/lapic.c > > +++ b/arch/x86/kvm/lapic.c > > @@ -1528,6 +1528,19 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, > > apic->lapic_timer.timer_advance_ns = timer_advance_ns; > > } > > > > +u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu) > > +{ > > + u64 cycles; > > + struct kvm_lapic *apic = vcpu->arch.apic; > > + > > + cycles = vmentry_advance_ns * vcpu->arch.virtual_tsc_khz; > > + do_div(cycles, 1000000); > > + > > + apic->lapic_timer.vmentry_advance_cycles = cycles; > > + > > + return cycles; > > +} > > + > > void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) > > { > > struct kvm_lapic *apic = vcpu->arch.apic; > > @@ -1541,7 +1554,8 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) > > > > tsc_deadline = apic->lapic_timer.expired_tscdeadline; > > apic->lapic_timer.expired_tscdeadline = 0; > > - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); > > + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) + > > + apic->lapic_timer.vmentry_advance_cycles; > > apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; > > > > if (guest_tsc < tsc_deadline) > > @@ -1569,7 +1583,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) > > local_irq_save(flags); > > > > now = ktime_get(); > > - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); > > + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) + > > + apic->lapic_timer.vmentry_advance_cycles; > > > > ns = (tscdeadline - guest_tsc) * 1000000ULL; > > do_div(ns, this_tsc_khz); > > @@ -2326,7 +2341,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) > > apic->lapic_timer.timer_advance_ns = timer_advance_ns; > > apic->lapic_timer.timer_advance_adjust_done = true; > > } > > - > > + apic->lapic_timer.vmentry_advance_cycles = 0; > > > > /* > > * APIC is created enabled. This will prevent kvm_lapic_set_base from > > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > > index 3674717..7c38950 100644 > > --- a/arch/x86/kvm/lapic.h > > +++ b/arch/x86/kvm/lapic.h > > @@ -33,6 +33,7 @@ struct kvm_timer { > > u64 expired_tscdeadline; > > u32 timer_advance_ns; > > s64 advance_expire_delta; > > + u64 vmentry_advance_cycles; > > atomic_t pending; /* accumulated triggered timers */ > > bool hv_timer_in_use; > > bool timer_advance_adjust_done; > > @@ -226,6 +227,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) > > bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); > > > > void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); > > +u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu); > > > > bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, > > struct kvm_vcpu **dest_vcpu); > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > > index 8fbea03..dc81c78 100644 > > --- a/arch/x86/kvm/vmx/vmx.c > > +++ b/arch/x86/kvm/vmx/vmx.c > > @@ -7064,7 +7064,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, > > > > vmx = to_vmx(vcpu); > > tscl = rdtsc(); > > - guest_tscl = kvm_read_l1_tsc(vcpu, tscl); > > + guest_tscl = kvm_read_l1_tsc(vcpu, tscl) + > > + vcpu->arch.apic->lapic_timer.vmentry_advance_cycles; > > delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; > > lapic_timer_advance_cycles = nsec_to_cycles(vcpu, > > ktimer->timer_advance_ns); > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > index 0a05a4e..5e79b6c 100644 > > --- a/arch/x86/kvm/x86.c > > +++ b/arch/x86/kvm/x86.c > > @@ -145,6 +145,12 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); > > static int __read_mostly lapic_timer_advance_ns = -1; > > module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR); > > > > +/* > > + * lapic timer vmentry advance (tscdeadline mode only) in nanoseconds. > > + */ > > +u32 __read_mostly vmentry_advance_ns = 25; > > +module_param(vmentry_advance_ns, uint, S_IRUGO); > > + > > static bool __read_mostly vector_hashing = true; > > module_param(vector_hashing, bool, S_IRUGO); > > > > @@ -1592,6 +1598,8 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) > > kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC, > > &vcpu->arch.virtual_tsc_shift, > > &vcpu->arch.virtual_tsc_mult); > > + if (user_tsc_khz != vcpu->arch.virtual_tsc_khz) > > + compute_vmentry_advance_cycles(vcpu); > > vcpu->arch.virtual_tsc_khz = user_tsc_khz; > > > > /* > > @@ -9134,8 +9142,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > > } > > vcpu->arch.pio_data = page_address(page); > > > > - kvm_set_tsc_khz(vcpu, max_tsc_khz); > > - > > r = kvm_mmu_create(vcpu); > > if (r < 0) > > goto fail_free_pio_data; > > @@ -9148,6 +9154,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > > } else > > static_key_slow_inc(&kvm_no_apic_vcpu); > > > > + kvm_set_tsc_khz(vcpu, max_tsc_khz); > > + > > vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, > > GFP_KERNEL_ACCOUNT); > > if (!vcpu->arch.mce_banks) { > > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h > > index e08a128..9998989 100644 > > --- a/arch/x86/kvm/x86.h > > +++ b/arch/x86/kvm/x86.h > > @@ -299,6 +299,8 @@ extern u64 kvm_supported_xcr0(void); > > > > extern unsigned int min_timer_period_us; > > > > +extern unsigned int vmentry_advance_ns; > > + > > extern bool enable_vmware_backdoor; > > > > extern struct static_key kvm_no_apic_vcpu; > > -- > > 2.7.4 > >
Hi Paolo, how about this patchset? Patch 2/2 is easy to take, do you have more concern about patch 1/2? On Fri, 28 Jun 2019 at 16:29, Wanpeng Li <kernellwp@gmail.com> wrote: > > ping again, > On Fri, 21 Jun 2019 at 17:44, Wanpeng Li <kernellwp@gmail.com> wrote: > > > > ping, > > On Fri, 14 Jun 2019 at 09:15, Wanpeng Li <kernellwp@gmail.com> wrote: > > > > > > From: Wanpeng Li <wanpengli@tencent.com> > > > > > > Advance lapic timer tries to hidden the hypervisor overhead between the > > > host emulated timer fires and the guest awares the timer is fired. However, > > > even though after more sustaining optimizations, kvm-unit-tests/tscdeadline_latency > > > still awares ~1000 cycles latency since we lost the time between the end of > > > wait_lapic_expire and the guest awares the timer is fired. There are > > > codes between the end of wait_lapic_expire and the world switch, furthermore, > > > the world switch itself also has overhead. Actually the guest_tsc is equal > > > to the target deadline time in wait_lapic_expire is too late, guest will > > > aware the latency between the end of wait_lapic_expire() and after vmentry > > > to the guest. This patch takes this time into consideration. > > > > > > The vmentry_advance_ns module parameter is conservative 25ns by default(thanks > > > to Radim's kvm-unit-tests/vmentry_latency.flat), it can be tuned/reworked in > > > the future. > > > > > > Cc: Paolo Bonzini <pbonzini@redhat.com> > > > Cc: Radim Krčmář <rkrcmar@redhat.com> > > > Cc: Sean Christopherson <sean.j.christopherson@intel.com> > > > Signed-off-by: Wanpeng Li <wanpengli@tencent.com> > > > --- > > > v3 -> v4: > > > * default value is 25ns > > > * compute vmentry_advance_cycles in kvm_set_tsc_khz() path > > > v2 -> v3: > > > * read-only module parameter > > > * get_vmentry_advance_cycles() not inline > > > v1 -> v2: > > > * rename get_vmentry_advance_delta to get_vmentry_advance_cycles > > > * cache vmentry_advance_cycles by setting param bit 0 > > > * add param max limit > > > > > > arch/x86/kvm/lapic.c | 21 ++++++++++++++++++--- > > > arch/x86/kvm/lapic.h | 2 ++ > > > arch/x86/kvm/vmx/vmx.c | 3 ++- > > > arch/x86/kvm/x86.c | 12 ++++++++++-- > > > arch/x86/kvm/x86.h | 2 ++ > > > 5 files changed, 34 insertions(+), 6 deletions(-) > > > > > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > > index e82a18c..e92e4e5 100644 > > > --- a/arch/x86/kvm/lapic.c > > > +++ b/arch/x86/kvm/lapic.c > > > @@ -1528,6 +1528,19 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, > > > apic->lapic_timer.timer_advance_ns = timer_advance_ns; > > > } > > > > > > +u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu) > > > +{ > > > + u64 cycles; > > > + struct kvm_lapic *apic = vcpu->arch.apic; > > > + > > > + cycles = vmentry_advance_ns * vcpu->arch.virtual_tsc_khz; > > > + do_div(cycles, 1000000); > > > + > > > + apic->lapic_timer.vmentry_advance_cycles = cycles; > > > + > > > + return cycles; > > > +} > > > + > > > void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) > > > { > > > struct kvm_lapic *apic = vcpu->arch.apic; > > > @@ -1541,7 +1554,8 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) > > > > > > tsc_deadline = apic->lapic_timer.expired_tscdeadline; > > > apic->lapic_timer.expired_tscdeadline = 0; > > > - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); > > > + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) + > > > + apic->lapic_timer.vmentry_advance_cycles; > > > apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; > > > > > > if (guest_tsc < tsc_deadline) > > > @@ -1569,7 +1583,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) > > > local_irq_save(flags); > > > > > > now = ktime_get(); > > > - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); > > > + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) + > > > + apic->lapic_timer.vmentry_advance_cycles; > > > > > > ns = (tscdeadline - guest_tsc) * 1000000ULL; > > > do_div(ns, this_tsc_khz); > > > @@ -2326,7 +2341,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) > > > apic->lapic_timer.timer_advance_ns = timer_advance_ns; > > > apic->lapic_timer.timer_advance_adjust_done = true; > > > } > > > - > > > + apic->lapic_timer.vmentry_advance_cycles = 0; > > > > > > /* > > > * APIC is created enabled. This will prevent kvm_lapic_set_base from > > > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > > > index 3674717..7c38950 100644 > > > --- a/arch/x86/kvm/lapic.h > > > +++ b/arch/x86/kvm/lapic.h > > > @@ -33,6 +33,7 @@ struct kvm_timer { > > > u64 expired_tscdeadline; > > > u32 timer_advance_ns; > > > s64 advance_expire_delta; > > > + u64 vmentry_advance_cycles; > > > atomic_t pending; /* accumulated triggered timers */ > > > bool hv_timer_in_use; > > > bool timer_advance_adjust_done; > > > @@ -226,6 +227,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) > > > bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); > > > > > > void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); > > > +u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu); > > > > > > bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, > > > struct kvm_vcpu **dest_vcpu); > > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > > > index 8fbea03..dc81c78 100644 > > > --- a/arch/x86/kvm/vmx/vmx.c > > > +++ b/arch/x86/kvm/vmx/vmx.c > > > @@ -7064,7 +7064,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, > > > > > > vmx = to_vmx(vcpu); > > > tscl = rdtsc(); > > > - guest_tscl = kvm_read_l1_tsc(vcpu, tscl); > > > + guest_tscl = kvm_read_l1_tsc(vcpu, tscl) + > > > + vcpu->arch.apic->lapic_timer.vmentry_advance_cycles; > > > delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; > > > lapic_timer_advance_cycles = nsec_to_cycles(vcpu, > > > ktimer->timer_advance_ns); > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > index 0a05a4e..5e79b6c 100644 > > > --- a/arch/x86/kvm/x86.c > > > +++ b/arch/x86/kvm/x86.c > > > @@ -145,6 +145,12 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); > > > static int __read_mostly lapic_timer_advance_ns = -1; > > > module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR); > > > > > > +/* > > > + * lapic timer vmentry advance (tscdeadline mode only) in nanoseconds. > > > + */ > > > +u32 __read_mostly vmentry_advance_ns = 25; > > > +module_param(vmentry_advance_ns, uint, S_IRUGO); > > > + > > > static bool __read_mostly vector_hashing = true; > > > module_param(vector_hashing, bool, S_IRUGO); > > > > > > @@ -1592,6 +1598,8 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) > > > kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC, > > > &vcpu->arch.virtual_tsc_shift, > > > &vcpu->arch.virtual_tsc_mult); > > > + if (user_tsc_khz != vcpu->arch.virtual_tsc_khz) > > > + compute_vmentry_advance_cycles(vcpu); > > > vcpu->arch.virtual_tsc_khz = user_tsc_khz; > > > > > > /* > > > @@ -9134,8 +9142,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > > > } > > > vcpu->arch.pio_data = page_address(page); > > > > > > - kvm_set_tsc_khz(vcpu, max_tsc_khz); > > > - > > > r = kvm_mmu_create(vcpu); > > > if (r < 0) > > > goto fail_free_pio_data; > > > @@ -9148,6 +9154,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) > > > } else > > > static_key_slow_inc(&kvm_no_apic_vcpu); > > > > > > + kvm_set_tsc_khz(vcpu, max_tsc_khz); > > > + > > > vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, > > > GFP_KERNEL_ACCOUNT); > > > if (!vcpu->arch.mce_banks) { > > > diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h > > > index e08a128..9998989 100644 > > > --- a/arch/x86/kvm/x86.h > > > +++ b/arch/x86/kvm/x86.h > > > @@ -299,6 +299,8 @@ extern u64 kvm_supported_xcr0(void); > > > > > > extern unsigned int min_timer_period_us; > > > > > > +extern unsigned int vmentry_advance_ns; > > > + > > > extern bool enable_vmware_backdoor; > > > > > > extern struct static_key kvm_no_apic_vcpu; > > > -- > > > 2.7.4 > > >
On 03/07/19 02:48, Wanpeng Li wrote: > Hi Paolo, how about this patchset? Patch 2/2 is easy to take, do you > have more concern about patch 1/2? I don't know. It seems somewhat hard to tune and in cyclictest it only happens for preemption_timer=N. Are you using preemption_timer=N together with the LAPIC-timer-on-service-CPU patches? Paolo
On Wed, 3 Jul 2019 at 22:13, Paolo Bonzini <pbonzini@redhat.com> wrote: > > On 03/07/19 02:48, Wanpeng Li wrote: > > Hi Paolo, how about this patchset? Patch 2/2 is easy to take, do you > > have more concern about patch 1/2? > > I don't know. It seems somewhat hard to tune and in cyclictest it only > happens for preemption_timer=N. Are you using preemption_timer=N > together with the LAPIC-timer-on-service-CPU patches? A 25ns conservative value makes no benefit for cyclictest any more even when preemption_timer=N. Btw, maybe it is the time to merge the LAPIC-timer-on-service-CPU patches now. :) Regards, Wanpeng Li
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e82a18c..e92e4e5 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1528,6 +1528,19 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu, apic->lapic_timer.timer_advance_ns = timer_advance_ns; } +u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu) +{ + u64 cycles; + struct kvm_lapic *apic = vcpu->arch.apic; + + cycles = vmentry_advance_ns * vcpu->arch.virtual_tsc_khz; + do_div(cycles, 1000000); + + apic->lapic_timer.vmentry_advance_cycles = cycles; + + return cycles; +} + void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -1541,7 +1554,8 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) tsc_deadline = apic->lapic_timer.expired_tscdeadline; apic->lapic_timer.expired_tscdeadline = 0; - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) + + apic->lapic_timer.vmentry_advance_cycles; apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline; if (guest_tsc < tsc_deadline) @@ -1569,7 +1583,8 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) local_irq_save(flags); now = ktime_get(); - guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); + guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) + + apic->lapic_timer.vmentry_advance_cycles; ns = (tscdeadline - guest_tsc) * 1000000ULL; do_div(ns, this_tsc_khz); @@ -2326,7 +2341,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) apic->lapic_timer.timer_advance_ns = timer_advance_ns; apic->lapic_timer.timer_advance_adjust_done = true; } - + apic->lapic_timer.vmentry_advance_cycles = 0; /* * APIC is created enabled. This will prevent kvm_lapic_set_base from diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 3674717..7c38950 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -33,6 +33,7 @@ struct kvm_timer { u64 expired_tscdeadline; u32 timer_advance_ns; s64 advance_expire_delta; + u64 vmentry_advance_cycles; atomic_t pending; /* accumulated triggered timers */ bool hv_timer_in_use; bool timer_advance_adjust_done; @@ -226,6 +227,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu); +u64 compute_vmentry_advance_cycles(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8fbea03..dc81c78 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7064,7 +7064,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, vmx = to_vmx(vcpu); tscl = rdtsc(); - guest_tscl = kvm_read_l1_tsc(vcpu, tscl); + guest_tscl = kvm_read_l1_tsc(vcpu, tscl) + + vcpu->arch.apic->lapic_timer.vmentry_advance_cycles; delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl; lapic_timer_advance_cycles = nsec_to_cycles(vcpu, ktimer->timer_advance_ns); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a05a4e..5e79b6c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -145,6 +145,12 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); static int __read_mostly lapic_timer_advance_ns = -1; module_param(lapic_timer_advance_ns, int, S_IRUGO | S_IWUSR); +/* + * lapic timer vmentry advance (tscdeadline mode only) in nanoseconds. + */ +u32 __read_mostly vmentry_advance_ns = 25; +module_param(vmentry_advance_ns, uint, S_IRUGO); + static bool __read_mostly vector_hashing = true; module_param(vector_hashing, bool, S_IRUGO); @@ -1592,6 +1598,8 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz) kvm_get_time_scale(user_tsc_khz * 1000LL, NSEC_PER_SEC, &vcpu->arch.virtual_tsc_shift, &vcpu->arch.virtual_tsc_mult); + if (user_tsc_khz != vcpu->arch.virtual_tsc_khz) + compute_vmentry_advance_cycles(vcpu); vcpu->arch.virtual_tsc_khz = user_tsc_khz; /* @@ -9134,8 +9142,6 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) } vcpu->arch.pio_data = page_address(page); - kvm_set_tsc_khz(vcpu, max_tsc_khz); - r = kvm_mmu_create(vcpu); if (r < 0) goto fail_free_pio_data; @@ -9148,6 +9154,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) } else static_key_slow_inc(&kvm_no_apic_vcpu); + kvm_set_tsc_khz(vcpu, max_tsc_khz); + vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4, GFP_KERNEL_ACCOUNT); if (!vcpu->arch.mce_banks) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index e08a128..9998989 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -299,6 +299,8 @@ extern u64 kvm_supported_xcr0(void); extern unsigned int min_timer_period_us; +extern unsigned int vmentry_advance_ns; + extern bool enable_vmware_backdoor; extern struct static_key kvm_no_apic_vcpu;