Message ID | 1511935673-7371-1-git-send-email-wanpeng.li@hotmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 29/11/2017 07:07, Wanpeng Li wrote: > From: Wanpeng Li <wanpeng.li@hotmail.com> > > MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored > each time during world switch. Jim from Google pointed out that > when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time, > and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr(). > This patch caches the host IA32_DEBUGCTL MSR and saves/restores > the host IA32_DEBUGCTL msr when guest/host switches to avoid to > save/restore each time during world switch. > > Suggested-by: Jim Mattson <jmattson@google.com> > Cc: Jim Mattson <jmattson@google.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Radim Krčmář <rkrcmar@redhat.com> > Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> The update_debugctlmsr should stay in vmx_vcpu_run so that tracing features work correctly. However, the get_debugctlmsr indeed can be moved to vmx_vcpu_load. > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/vmx.c | 11 +++++------ > 2 files changed, 6 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 63d34bc..c904250 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -619,6 +619,7 @@ struct kvm_vcpu_arch { > unsigned long dr7; > unsigned long eff_db[KVM_NR_DB_REGS]; > unsigned long guest_debug_dr7; > + unsigned long debugctlmsr; Please rename to host_debugctlmsr and place it in struct vcpu_vmx. Thanks, Paolo > u64 msr_platform_info; > u64 msr_misc_features_enables; > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index 8c7e816..b167bba 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -2326,6 +2326,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > > vmx_vcpu_pi_load(vcpu, cpu); > vmx->host_pkru = read_pkru(); > + vcpu->arch.debugctlmsr = get_debugctlmsr(); > } > > static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) > @@ -2347,6 +2348,9 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) > vmx_vcpu_pi_put(vcpu); > > __vmx_load_host_state(to_vmx(vcpu)); > + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ > + if (vcpu->arch.debugctlmsr) > + update_debugctlmsr(vcpu->arch.debugctlmsr); > } > > static bool emulation_required(struct kvm_vcpu *vcpu) > @@ -9346,7 +9350,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) > static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > - unsigned long debugctlmsr, cr3, cr4; > + unsigned long cr3, cr4; > > /* Record the guest's net vcpu time for enforced NMI injections. */ > if (unlikely(!enable_vnmi && > @@ -9399,7 +9403,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > __write_pkru(vcpu->arch.pkru); > > atomic_switch_perf_msrs(vmx); > - debugctlmsr = get_debugctlmsr(); > > vmx_arm_hv_timer(vcpu); > > @@ -9509,10 +9512,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) > #endif > ); > > - /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ > - if (debugctlmsr) > - update_debugctlmsr(debugctlmsr); > - > #ifndef CONFIG_X86_64 > /* > * The sysexit path does not restore ds/es, so we must set them to >
2017-11-29 16:48 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>: > On 29/11/2017 07:07, Wanpeng Li wrote: >> From: Wanpeng Li <wanpeng.li@hotmail.com> >> >> MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored >> each time during world switch. Jim from Google pointed out that >> when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time, >> and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr(). >> This patch caches the host IA32_DEBUGCTL MSR and saves/restores >> the host IA32_DEBUGCTL msr when guest/host switches to avoid to >> save/restore each time during world switch. >> >> Suggested-by: Jim Mattson <jmattson@google.com> >> Cc: Jim Mattson <jmattson@google.com> >> Cc: Paolo Bonzini <pbonzini@redhat.com> >> Cc: Radim Krčmář <rkrcmar@redhat.com> >> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> > > The update_debugctlmsr should stay in vmx_vcpu_run so that tracing > features work correctly. However, the get_debugctlmsr indeed can be The tracing can't run except vCPU is schedule out, so why update_debugctlmsr should stay in vmx_vcpu_run? Regards, Wanpeng Li > moved to vmx_vcpu_load. > >> --- >> arch/x86/include/asm/kvm_host.h | 1 + >> arch/x86/kvm/vmx.c | 11 +++++------ >> 2 files changed, 6 insertions(+), 6 deletions(-) >> >> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h >> index 63d34bc..c904250 100644 >> --- a/arch/x86/include/asm/kvm_host.h >> +++ b/arch/x86/include/asm/kvm_host.h >> @@ -619,6 +619,7 @@ struct kvm_vcpu_arch { >> unsigned long dr7; >> unsigned long eff_db[KVM_NR_DB_REGS]; >> unsigned long guest_debug_dr7; >> + unsigned long debugctlmsr; > > Please rename to host_debugctlmsr and place it in struct vcpu_vmx. > > Thanks, > > Paolo > >> u64 msr_platform_info; >> u64 msr_misc_features_enables; >> >> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c >> index 8c7e816..b167bba 100644 >> --- a/arch/x86/kvm/vmx.c >> +++ b/arch/x86/kvm/vmx.c >> @@ -2326,6 +2326,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) >> >> vmx_vcpu_pi_load(vcpu, cpu); >> vmx->host_pkru = read_pkru(); >> + vcpu->arch.debugctlmsr = get_debugctlmsr(); >> } >> >> static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) >> @@ -2347,6 +2348,9 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) >> vmx_vcpu_pi_put(vcpu); >> >> __vmx_load_host_state(to_vmx(vcpu)); >> + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ >> + if (vcpu->arch.debugctlmsr) >> + update_debugctlmsr(vcpu->arch.debugctlmsr); >> } >> >> static bool emulation_required(struct kvm_vcpu *vcpu) >> @@ -9346,7 +9350,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) >> static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> { >> struct vcpu_vmx *vmx = to_vmx(vcpu); >> - unsigned long debugctlmsr, cr3, cr4; >> + unsigned long cr3, cr4; >> >> /* Record the guest's net vcpu time for enforced NMI injections. */ >> if (unlikely(!enable_vnmi && >> @@ -9399,7 +9403,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> __write_pkru(vcpu->arch.pkru); >> >> atomic_switch_perf_msrs(vmx); >> - debugctlmsr = get_debugctlmsr(); >> >> vmx_arm_hv_timer(vcpu); >> >> @@ -9509,10 +9512,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) >> #endif >> ); >> >> - /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ >> - if (debugctlmsr) >> - update_debugctlmsr(debugctlmsr); >> - >> #ifndef CONFIG_X86_64 >> /* >> * The sysexit path does not restore ds/es, so we must set them to >> >
On 29/11/2017 09:51, Wanpeng Li wrote: > 2017-11-29 16:48 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>: >> On 29/11/2017 07:07, Wanpeng Li wrote: >>> From: Wanpeng Li <wanpeng.li@hotmail.com> >>> >>> MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored >>> each time during world switch. Jim from Google pointed out that >>> when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time, >>> and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr(). >>> This patch caches the host IA32_DEBUGCTL MSR and saves/restores >>> the host IA32_DEBUGCTL msr when guest/host switches to avoid to >>> save/restore each time during world switch. >>> >>> Suggested-by: Jim Mattson <jmattson@google.com> >>> Cc: Jim Mattson <jmattson@google.com> >>> Cc: Paolo Bonzini <pbonzini@redhat.com> >>> Cc: Radim Krčmář <rkrcmar@redhat.com> >>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> >> >> The update_debugctlmsr should stay in vmx_vcpu_run so that tracing >> features work correctly. However, the get_debugctlmsr indeed can be > > The tracing can't run except vCPU is schedule out, so why > update_debugctlmsr should stay in vmx_vcpu_run? For example your patch is disabling BTS (branch trace store) after the first vmexit, isn't it? Thanks, Paolo
2017-11-29 17:13 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>: > On 29/11/2017 09:51, Wanpeng Li wrote: >> 2017-11-29 16:48 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>: >>> On 29/11/2017 07:07, Wanpeng Li wrote: >>>> From: Wanpeng Li <wanpeng.li@hotmail.com> >>>> >>>> MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored >>>> each time during world switch. Jim from Google pointed out that >>>> when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time, >>>> and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr(). >>>> This patch caches the host IA32_DEBUGCTL MSR and saves/restores >>>> the host IA32_DEBUGCTL msr when guest/host switches to avoid to >>>> save/restore each time during world switch. >>>> >>>> Suggested-by: Jim Mattson <jmattson@google.com> >>>> Cc: Jim Mattson <jmattson@google.com> >>>> Cc: Paolo Bonzini <pbonzini@redhat.com> >>>> Cc: Radim Krčmář <rkrcmar@redhat.com> >>>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> >>> >>> The update_debugctlmsr should stay in vmx_vcpu_run so that tracing >>> features work correctly. However, the get_debugctlmsr indeed can be >> >> The tracing can't run except vCPU is schedule out, so why >> update_debugctlmsr should stay in vmx_vcpu_run? > > For example your patch is disabling BTS (branch trace store) after the > first vmexit, isn't it? I see. Thanks for pointing out. :) Regards, Wanpeng Li
Wanpeng Li <kernellwp@gmail.com> writes: > From: Wanpeng Li <wanpeng.li@hotmail.com> > > MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored > each time during world switch. Jim from Google pointed out that > when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time, > and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr(). > This patch caches the host IA32_DEBUGCTL MSR and saves/restores > the host IA32_DEBUGCTL msr when guest/host switches to avoid to > save/restore each time during world switch. FWIW i've seen this too on L2 profiles. But I haven't looked too closely, but I suspect you'll clobber global kernel debugger state this way. You would at least need some interface for KDB etc. to invalidate your cache. -Andi
An alternative is to give the L1 guest read permission for this MSR in the MSR permission bitmaps. It's still going to be ~80 cycles, but that's better than the cost of a VM-exit/VM-entry round-trip. On Wed, Nov 29, 2017 at 10:20 AM, Andi Kleen <ak@linux.intel.com> wrote: > Wanpeng Li <kernellwp@gmail.com> writes: > >> From: Wanpeng Li <wanpeng.li@hotmail.com> >> >> MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored >> each time during world switch. Jim from Google pointed out that >> when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time, >> and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr(). >> This patch caches the host IA32_DEBUGCTL MSR and saves/restores >> the host IA32_DEBUGCTL msr when guest/host switches to avoid to >> save/restore each time during world switch. > > FWIW i've seen this too on L2 profiles. > > But I haven't looked too closely, but I suspect you'll clobber global > kernel debugger state this way. > > You would at least need some interface for KDB etc. to invalidate > your cache. > > -Andi
On Wed, Nov 29, 2017 at 11:05:46AM -0800, Jim Mattson wrote: > An alternative is to give the L1 guest read permission for this MSR in > the MSR permission bitmaps. It's still going to be ~80 cycles, but > that's better than the cost of a VM-exit/VM-entry round-trip. It's a useful optimization, 80 cycles is 80 cycles. The cache invalidation could likely be really simple, like: have a global counter always check the counter before and after and don't use the cache if they don't match. change KDB etc. to increase the counter. -Andi
On 29/11/2017 19:20, Andi Kleen wrote: > But I haven't looked too closely, but I suspect you'll clobber global > kernel debugger state this way. I checked all callers of update_debugctlmsr, and couldn't find any that could run asynchronously while KVM is caching the value. For example __switch_to_xtra would always run before the sched_in notifier. Thanks, Paolo
On Wed, Nov 29, 2017 at 11:26:30PM +0100, Paolo Bonzini wrote: > On 29/11/2017 19:20, Andi Kleen wrote: > > But I haven't looked too closely, but I suspect you'll clobber global > > kernel debugger state this way. > > I checked all callers of update_debugctlmsr, and couldn't find any that > could run asynchronously while KVM is caching the value. For example > __switch_to_xtra would always run before the sched_in notifier. True. It would only be a problem if the debugger supported branch stepping or LBRs, which it doesn't seem to currently. -Andi
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 63d34bc..c904250 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -619,6 +619,7 @@ struct kvm_vcpu_arch { unsigned long dr7; unsigned long eff_db[KVM_NR_DB_REGS]; unsigned long guest_debug_dr7; + unsigned long debugctlmsr; u64 msr_platform_info; u64 msr_misc_features_enables; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8c7e816..b167bba 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2326,6 +2326,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vmx_vcpu_pi_load(vcpu, cpu); vmx->host_pkru = read_pkru(); + vcpu->arch.debugctlmsr = get_debugctlmsr(); } static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) @@ -2347,6 +2348,9 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) vmx_vcpu_pi_put(vcpu); __vmx_load_host_state(to_vmx(vcpu)); + /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ + if (vcpu->arch.debugctlmsr) + update_debugctlmsr(vcpu->arch.debugctlmsr); } static bool emulation_required(struct kvm_vcpu *vcpu) @@ -9346,7 +9350,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu) static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - unsigned long debugctlmsr, cr3, cr4; + unsigned long cr3, cr4; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && @@ -9399,7 +9403,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) __write_pkru(vcpu->arch.pkru); atomic_switch_perf_msrs(vmx); - debugctlmsr = get_debugctlmsr(); vmx_arm_hv_timer(vcpu); @@ -9509,10 +9512,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) #endif ); - /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */ - if (debugctlmsr) - update_debugctlmsr(debugctlmsr); - #ifndef CONFIG_X86_64 /* * The sysexit path does not restore ds/es, so we must set them to