diff mbox

KVM: VMX: Cache IA32_DEBUGCTL in memory

Message ID 1511935673-7371-1-git-send-email-wanpeng.li@hotmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wanpeng Li Nov. 29, 2017, 6:07 a.m. UTC
From: Wanpeng Li <wanpeng.li@hotmail.com>

MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored 
each time during world switch. Jim from Google pointed out that 
when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time, 
and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr(). 
This patch caches the host IA32_DEBUGCTL MSR and saves/restores 
the host IA32_DEBUGCTL msr when guest/host switches to avoid to 
save/restore each time during world switch.

Suggested-by: Jim Mattson <jmattson@google.com>
Cc: Jim Mattson <jmattson@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx.c              | 11 +++++------
 2 files changed, 6 insertions(+), 6 deletions(-)

Comments

Paolo Bonzini Nov. 29, 2017, 8:48 a.m. UTC | #1
On 29/11/2017 07:07, Wanpeng Li wrote:
> From: Wanpeng Li <wanpeng.li@hotmail.com>
> 
> MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored 
> each time during world switch. Jim from Google pointed out that 
> when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time, 
> and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr(). 
> This patch caches the host IA32_DEBUGCTL MSR and saves/restores 
> the host IA32_DEBUGCTL msr when guest/host switches to avoid to 
> save/restore each time during world switch.
> 
> Suggested-by: Jim Mattson <jmattson@google.com>
> Cc: Jim Mattson <jmattson@google.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>

The update_debugctlmsr should stay in vmx_vcpu_run so that tracing
features work correctly.  However, the get_debugctlmsr indeed can be
moved to vmx_vcpu_load.

> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/vmx.c              | 11 +++++------
>  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 63d34bc..c904250 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -619,6 +619,7 @@ struct kvm_vcpu_arch {
>  	unsigned long dr7;
>  	unsigned long eff_db[KVM_NR_DB_REGS];
>  	unsigned long guest_debug_dr7;
> +	unsigned long debugctlmsr;

Please rename to host_debugctlmsr and place it in struct vcpu_vmx.

Thanks,

Paolo

>  	u64 msr_platform_info;
>  	u64 msr_misc_features_enables;
>  
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 8c7e816..b167bba 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2326,6 +2326,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>  
>  	vmx_vcpu_pi_load(vcpu, cpu);
>  	vmx->host_pkru = read_pkru();
> +	vcpu->arch.debugctlmsr = get_debugctlmsr();
>  }
>  
>  static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
> @@ -2347,6 +2348,9 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
>  	vmx_vcpu_pi_put(vcpu);
>  
>  	__vmx_load_host_state(to_vmx(vcpu));
> +	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
> +	if (vcpu->arch.debugctlmsr)
> +		update_debugctlmsr(vcpu->arch.debugctlmsr);
>  }
>  
>  static bool emulation_required(struct kvm_vcpu *vcpu)
> @@ -9346,7 +9350,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
>  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> -	unsigned long debugctlmsr, cr3, cr4;
> +	unsigned long cr3, cr4;
>  
>  	/* Record the guest's net vcpu time for enforced NMI injections. */
>  	if (unlikely(!enable_vnmi &&
> @@ -9399,7 +9403,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  		__write_pkru(vcpu->arch.pkru);
>  
>  	atomic_switch_perf_msrs(vmx);
> -	debugctlmsr = get_debugctlmsr();
>  
>  	vmx_arm_hv_timer(vcpu);
>  
> @@ -9509,10 +9512,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  #endif
>  	      );
>  
> -	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
> -	if (debugctlmsr)
> -		update_debugctlmsr(debugctlmsr);
> -
>  #ifndef CONFIG_X86_64
>  	/*
>  	 * The sysexit path does not restore ds/es, so we must set them to
>
Wanpeng Li Nov. 29, 2017, 8:51 a.m. UTC | #2
2017-11-29 16:48 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
> On 29/11/2017 07:07, Wanpeng Li wrote:
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored
>> each time during world switch. Jim from Google pointed out that
>> when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time,
>> and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr().
>> This patch caches the host IA32_DEBUGCTL MSR and saves/restores
>> the host IA32_DEBUGCTL msr when guest/host switches to avoid to
>> save/restore each time during world switch.
>>
>> Suggested-by: Jim Mattson <jmattson@google.com>
>> Cc: Jim Mattson <jmattson@google.com>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>
> The update_debugctlmsr should stay in vmx_vcpu_run so that tracing
> features work correctly.  However, the get_debugctlmsr indeed can be

The tracing can't run except vCPU is schedule out, so why
update_debugctlmsr should stay in vmx_vcpu_run?

Regards,
Wanpeng Li

> moved to vmx_vcpu_load.
>
>> ---
>>  arch/x86/include/asm/kvm_host.h |  1 +
>>  arch/x86/kvm/vmx.c              | 11 +++++------
>>  2 files changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 63d34bc..c904250 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -619,6 +619,7 @@ struct kvm_vcpu_arch {
>>       unsigned long dr7;
>>       unsigned long eff_db[KVM_NR_DB_REGS];
>>       unsigned long guest_debug_dr7;
>> +     unsigned long debugctlmsr;
>
> Please rename to host_debugctlmsr and place it in struct vcpu_vmx.
>
> Thanks,
>
> Paolo
>
>>       u64 msr_platform_info;
>>       u64 msr_misc_features_enables;
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 8c7e816..b167bba 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -2326,6 +2326,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>
>>       vmx_vcpu_pi_load(vcpu, cpu);
>>       vmx->host_pkru = read_pkru();
>> +     vcpu->arch.debugctlmsr = get_debugctlmsr();
>>  }
>>
>>  static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
>> @@ -2347,6 +2348,9 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
>>       vmx_vcpu_pi_put(vcpu);
>>
>>       __vmx_load_host_state(to_vmx(vcpu));
>> +     /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
>> +     if (vcpu->arch.debugctlmsr)
>> +             update_debugctlmsr(vcpu->arch.debugctlmsr);
>>  }
>>
>>  static bool emulation_required(struct kvm_vcpu *vcpu)
>> @@ -9346,7 +9350,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
>>  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>  {
>>       struct vcpu_vmx *vmx = to_vmx(vcpu);
>> -     unsigned long debugctlmsr, cr3, cr4;
>> +     unsigned long cr3, cr4;
>>
>>       /* Record the guest's net vcpu time for enforced NMI injections. */
>>       if (unlikely(!enable_vnmi &&
>> @@ -9399,7 +9403,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>               __write_pkru(vcpu->arch.pkru);
>>
>>       atomic_switch_perf_msrs(vmx);
>> -     debugctlmsr = get_debugctlmsr();
>>
>>       vmx_arm_hv_timer(vcpu);
>>
>> @@ -9509,10 +9512,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>  #endif
>>             );
>>
>> -     /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
>> -     if (debugctlmsr)
>> -             update_debugctlmsr(debugctlmsr);
>> -
>>  #ifndef CONFIG_X86_64
>>       /*
>>        * The sysexit path does not restore ds/es, so we must set them to
>>
>
Paolo Bonzini Nov. 29, 2017, 9:13 a.m. UTC | #3
On 29/11/2017 09:51, Wanpeng Li wrote:
> 2017-11-29 16:48 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>> On 29/11/2017 07:07, Wanpeng Li wrote:
>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>
>>> MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored
>>> each time during world switch. Jim from Google pointed out that
>>> when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time,
>>> and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr().
>>> This patch caches the host IA32_DEBUGCTL MSR and saves/restores
>>> the host IA32_DEBUGCTL msr when guest/host switches to avoid to
>>> save/restore each time during world switch.
>>>
>>> Suggested-by: Jim Mattson <jmattson@google.com>
>>> Cc: Jim Mattson <jmattson@google.com>
>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> The update_debugctlmsr should stay in vmx_vcpu_run so that tracing
>> features work correctly.  However, the get_debugctlmsr indeed can be
> 
> The tracing can't run except vCPU is schedule out, so why
> update_debugctlmsr should stay in vmx_vcpu_run?

For example your patch is disabling BTS (branch trace store) after the
first vmexit, isn't it?

Thanks,

Paolo
Wanpeng Li Nov. 29, 2017, 9:20 a.m. UTC | #4
2017-11-29 17:13 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
> On 29/11/2017 09:51, Wanpeng Li wrote:
>> 2017-11-29 16:48 GMT+08:00 Paolo Bonzini <pbonzini@redhat.com>:
>>> On 29/11/2017 07:07, Wanpeng Li wrote:
>>>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>>>
>>>> MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored
>>>> each time during world switch. Jim from Google pointed out that
>>>> when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time,
>>>> and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr().
>>>> This patch caches the host IA32_DEBUGCTL MSR and saves/restores
>>>> the host IA32_DEBUGCTL msr when guest/host switches to avoid to
>>>> save/restore each time during world switch.
>>>>
>>>> Suggested-by: Jim Mattson <jmattson@google.com>
>>>> Cc: Jim Mattson <jmattson@google.com>
>>>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>>>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>>>> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
>>>
>>> The update_debugctlmsr should stay in vmx_vcpu_run so that tracing
>>> features work correctly.  However, the get_debugctlmsr indeed can be
>>
>> The tracing can't run except vCPU is schedule out, so why
>> update_debugctlmsr should stay in vmx_vcpu_run?
>
> For example your patch is disabling BTS (branch trace store) after the
> first vmexit, isn't it?

I see. Thanks for pointing out. :)

Regards,
Wanpeng Li
Andi Kleen Nov. 29, 2017, 6:20 p.m. UTC | #5
Wanpeng Li <kernellwp@gmail.com> writes:

> From: Wanpeng Li <wanpeng.li@hotmail.com>
>
> MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored 
> each time during world switch. Jim from Google pointed out that 
> when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time, 
> and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr(). 
> This patch caches the host IA32_DEBUGCTL MSR and saves/restores 
> the host IA32_DEBUGCTL msr when guest/host switches to avoid to 
> save/restore each time during world switch.

FWIW i've seen this too on L2 profiles.

But I haven't looked too closely, but I suspect you'll clobber global
kernel debugger state this way.

You would at least need some interface for KDB etc. to invalidate
your cache.

-Andi
Jim Mattson Nov. 29, 2017, 7:05 p.m. UTC | #6
An alternative is to give the L1 guest read permission for this MSR in
the MSR permission bitmaps. It's still going to be ~80 cycles, but
that's better than the cost of a VM-exit/VM-entry round-trip.

On Wed, Nov 29, 2017 at 10:20 AM, Andi Kleen <ak@linux.intel.com> wrote:
> Wanpeng Li <kernellwp@gmail.com> writes:
>
>> From: Wanpeng Li <wanpeng.li@hotmail.com>
>>
>> MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT, so it is saved/restored
>> each time during world switch. Jim from Google pointed out that
>> when running schbench in L2, vmx_vcpu_run will occupy 4% cpu time,
>> and the 25% of vmx_vcpu_run cpu time is occupied by get_debugctlmsr().
>> This patch caches the host IA32_DEBUGCTL MSR and saves/restores
>> the host IA32_DEBUGCTL msr when guest/host switches to avoid to
>> save/restore each time during world switch.
>
> FWIW i've seen this too on L2 profiles.
>
> But I haven't looked too closely, but I suspect you'll clobber global
> kernel debugger state this way.
>
> You would at least need some interface for KDB etc. to invalidate
> your cache.
>
> -Andi
Andi Kleen Nov. 29, 2017, 8:56 p.m. UTC | #7
On Wed, Nov 29, 2017 at 11:05:46AM -0800, Jim Mattson wrote:
> An alternative is to give the L1 guest read permission for this MSR in
> the MSR permission bitmaps. It's still going to be ~80 cycles, but
> that's better than the cost of a VM-exit/VM-entry round-trip.

It's a useful optimization, 80 cycles is 80 cycles.

The cache invalidation could likely be really simple, like:
have a global counter
always check the counter before and after and don't use the cache
if they don't match.
change KDB etc. to increase the counter.

-Andi
Paolo Bonzini Nov. 29, 2017, 10:26 p.m. UTC | #8
On 29/11/2017 19:20, Andi Kleen wrote:
> But I haven't looked too closely, but I suspect you'll clobber global
> kernel debugger state this way.

I checked all callers of update_debugctlmsr, and couldn't find any that
could run asynchronously while KVM is caching the value.  For example
__switch_to_xtra would always run before the sched_in notifier.

Thanks,

Paolo
Andi Kleen Nov. 29, 2017, 10:45 p.m. UTC | #9
On Wed, Nov 29, 2017 at 11:26:30PM +0100, Paolo Bonzini wrote:
> On 29/11/2017 19:20, Andi Kleen wrote:
> > But I haven't looked too closely, but I suspect you'll clobber global
> > kernel debugger state this way.
> 
> I checked all callers of update_debugctlmsr, and couldn't find any that
> could run asynchronously while KVM is caching the value.  For example
> __switch_to_xtra would always run before the sched_in notifier.

True. It would only be a problem if the debugger supported branch stepping
or LBRs, which it doesn't seem to currently.

-Andi
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 63d34bc..c904250 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -619,6 +619,7 @@  struct kvm_vcpu_arch {
 	unsigned long dr7;
 	unsigned long eff_db[KVM_NR_DB_REGS];
 	unsigned long guest_debug_dr7;
+	unsigned long debugctlmsr;
 	u64 msr_platform_info;
 	u64 msr_misc_features_enables;
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8c7e816..b167bba 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2326,6 +2326,7 @@  static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	vmx_vcpu_pi_load(vcpu, cpu);
 	vmx->host_pkru = read_pkru();
+	vcpu->arch.debugctlmsr = get_debugctlmsr();
 }
 
 static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
@@ -2347,6 +2348,9 @@  static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 	vmx_vcpu_pi_put(vcpu);
 
 	__vmx_load_host_state(to_vmx(vcpu));
+	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
+	if (vcpu->arch.debugctlmsr)
+		update_debugctlmsr(vcpu->arch.debugctlmsr);
 }
 
 static bool emulation_required(struct kvm_vcpu *vcpu)
@@ -9346,7 +9350,7 @@  static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long debugctlmsr, cr3, cr4;
+	unsigned long cr3, cr4;
 
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!enable_vnmi &&
@@ -9399,7 +9403,6 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		__write_pkru(vcpu->arch.pkru);
 
 	atomic_switch_perf_msrs(vmx);
-	debugctlmsr = get_debugctlmsr();
 
 	vmx_arm_hv_timer(vcpu);
 
@@ -9509,10 +9512,6 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 	      );
 
-	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
-	if (debugctlmsr)
-		update_debugctlmsr(debugctlmsr);
-
 #ifndef CONFIG_X86_64
 	/*
 	 * The sysexit path does not restore ds/es, so we must set them to