diff mbox series

KVM/nVMX: Use __vmx_vcpu_run in nested_vmx_check_vmentry_hw

Message ID 20201029134145.107560-1-ubizjak@gmail.com (mailing list archive)
State New, archived
Headers show
Series KVM/nVMX: Use __vmx_vcpu_run in nested_vmx_check_vmentry_hw | expand

Commit Message

Uros Bizjak Oct. 29, 2020, 1:41 p.m. UTC
Replace inline assembly in nested_vmx_check_vmentry_hw
with a call to __vmx_vcpu_run.  The function is not
performance critical, so (double) GPR save/restore
in __vmx_vcpu_run can be tolerated, as far as performance
effects are concerned.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
 arch/x86/kvm/vmx/nested.c | 32 +++-----------------------------
 arch/x86/kvm/vmx/vmx.c    |  2 --
 arch/x86/kvm/vmx/vmx.h    |  1 +
 3 files changed, 4 insertions(+), 31 deletions(-)

Comments

Uros Bizjak Dec. 16, 2020, 9:24 a.m. UTC | #1
Ping.  This patch didn't receive any feedback.

Thanks,
Uros.

On Thu, Oct 29, 2020 at 2:41 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> Replace inline assembly in nested_vmx_check_vmentry_hw
> with a call to __vmx_vcpu_run.  The function is not
> performance critical, so (double) GPR save/restore
> in __vmx_vcpu_run can be tolerated, as far as performance
> effects are concerned.
>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> ---
>  arch/x86/kvm/vmx/nested.c | 32 +++-----------------------------
>  arch/x86/kvm/vmx/vmx.c    |  2 --
>  arch/x86/kvm/vmx/vmx.h    |  1 +
>  3 files changed, 4 insertions(+), 31 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 89af692deb7e..6ab62bf277c4 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -12,6 +12,7 @@
>  #include "nested.h"
>  #include "pmu.h"
>  #include "trace.h"
> +#include "vmx.h"
>  #include "x86.h"
>
>  static bool __read_mostly enable_shadow_vmcs = 1;
> @@ -3056,35 +3057,8 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
>                 vmx->loaded_vmcs->host_state.cr4 = cr4;
>         }
>
> -       asm(
> -               "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
> -               "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
> -               "je 1f \n\t"
> -               __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
> -               "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
> -               "1: \n\t"
> -               "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
> -
> -               /* Check if vmlaunch or vmresume is needed */
> -               "cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
> -
> -               /*
> -                * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set
> -                * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail
> -                * Valid.  vmx_vmenter() directly "returns" RFLAGS, and so the
> -                * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail.
> -                */
> -               "call vmx_vmenter\n\t"
> -
> -               CC_SET(be)
> -             : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
> -             : [HOST_RSP]"r"((unsigned long)HOST_RSP),
> -               [loaded_vmcs]"r"(vmx->loaded_vmcs),
> -               [launched]"i"(offsetof(struct loaded_vmcs, launched)),
> -               [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
> -               [wordsize]"i"(sizeof(ulong))
> -             : "memory"
> -       );
> +       vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
> +                                vmx->loaded_vmcs->launched);
>
>         if (vmx->msr_autoload.host.nr)
>                 vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index d14c94d0aff1..0f390c748b18 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6591,8 +6591,6 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
>         }
>  }
>
> -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
> -
>  static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
>                                         struct vcpu_vmx *vmx)
>  {
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index f6f66e5c6510..32db3b033e9b 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -339,6 +339,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
>  struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
>  void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
>  void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
> +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
>  int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
>  void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
>
> --
> 2.26.2
>
Krish Sadhukhan Dec. 16, 2020, 8:15 p.m. UTC | #2
On 12/16/20 1:24 AM, Uros Bizjak wrote:
> Ping.  This patch didn't receive any feedback.
>
> Thanks,
> Uros.
>
> On Thu, Oct 29, 2020 at 2:41 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>> Replace inline assembly in nested_vmx_check_vmentry_hw
>> with a call to __vmx_vcpu_run.  The function is not
>> performance critical, so (double) GPR save/restore
>> in __vmx_vcpu_run can be tolerated, as far as performance
>> effects are concerned.
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
>> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
>> ---
>>   arch/x86/kvm/vmx/nested.c | 32 +++-----------------------------
>>   arch/x86/kvm/vmx/vmx.c    |  2 --
>>   arch/x86/kvm/vmx/vmx.h    |  1 +
>>   3 files changed, 4 insertions(+), 31 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
>> index 89af692deb7e..6ab62bf277c4 100644
>> --- a/arch/x86/kvm/vmx/nested.c
>> +++ b/arch/x86/kvm/vmx/nested.c
>> @@ -12,6 +12,7 @@
>>   #include "nested.h"
>>   #include "pmu.h"
>>   #include "trace.h"
>> +#include "vmx.h"
>>   #include "x86.h"
>>
>>   static bool __read_mostly enable_shadow_vmcs = 1;
>> @@ -3056,35 +3057,8 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
>>                  vmx->loaded_vmcs->host_state.cr4 = cr4;
>>          }
>>
>> -       asm(
>> -               "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
>> -               "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
>> -               "je 1f \n\t"
>> -               __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
>> -               "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
>> -               "1: \n\t"
>> -               "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
>> -
>> -               /* Check if vmlaunch or vmresume is needed */
>> -               "cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
>> -
>> -               /*
>> -                * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set
>> -                * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail
>> -                * Valid.  vmx_vmenter() directly "returns" RFLAGS, and so the
>> -                * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail.
>> -                */
>> -               "call vmx_vmenter\n\t"
>> -
>> -               CC_SET(be)
>> -             : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
>> -             : [HOST_RSP]"r"((unsigned long)HOST_RSP),
>> -               [loaded_vmcs]"r"(vmx->loaded_vmcs),
>> -               [launched]"i"(offsetof(struct loaded_vmcs, launched)),
>> -               [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
>> -               [wordsize]"i"(sizeof(ulong))
>> -             : "memory"
>> -       );
>> +       vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
>> +                                vmx->loaded_vmcs->launched);
>>
>>          if (vmx->msr_autoload.host.nr)
>>                  vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index d14c94d0aff1..0f390c748b18 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -6591,8 +6591,6 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
>>          }
>>   }
>>
>> -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
>> -
>>   static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
>>                                          struct vcpu_vmx *vmx)
>>   {
>> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
>> index f6f66e5c6510..32db3b033e9b 100644
>> --- a/arch/x86/kvm/vmx/vmx.h
>> +++ b/arch/x86/kvm/vmx/vmx.h
>> @@ -339,6 +339,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
>>   struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
>>   void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
>>   void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
>> +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
>>   int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
>>   void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
>>
>> --
>> 2.26.2
>>
Semantically __vmx_vcpu_run() is called to enter guest mode. In 
nested_vmx_check_vmentry_hw(), we are not entering guest mode. Guest 
mode is entered when nested_vmx_enter_non_root_mode() calls 
enter_guest_mode().

Secondly, why not just replace the first half of the assembly block with 
a call to vmx_update_host_rsp() and leave the rest as is ?
Sean Christopherson Dec. 16, 2020, 9:05 p.m. UTC | #3
On Wed, Dec 16, 2020, Krish Sadhukhan wrote:
> > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > > index d14c94d0aff1..0f390c748b18 100644
> > > --- a/arch/x86/kvm/vmx/vmx.c
> > > +++ b/arch/x86/kvm/vmx/vmx.c
> > > @@ -6591,8 +6591,6 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
> > >          }
> > >   }
> > > 
> > > -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
> > > -
> > >   static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
> > >                                          struct vcpu_vmx *vmx)
> > >   {
> > > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> > > index f6f66e5c6510..32db3b033e9b 100644
> > > --- a/arch/x86/kvm/vmx/vmx.h
> > > +++ b/arch/x86/kvm/vmx/vmx.h
> > > @@ -339,6 +339,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
> > >   struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
> > >   void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
> > >   void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
> > > +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
> > >   int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
> > >   void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
> > > 
> > > --
> > > 2.26.2
> > > 
> Semantically __vmx_vcpu_run() is called to enter guest mode. In
> nested_vmx_check_vmentry_hw(), we are not entering guest mode. Guest mode is
> entered when nested_vmx_enter_non_root_mode() calls enter_guest_mode().

Naming aside, this patch intentionally redefines the semantics to mean "execute
VM-Enter that may or may not succeed".  And as called out in the changelog, the
overhead of the GPR save/load/restore is tolerable; reusing code and avoiding
ugly inline asm is more important.

> Secondly, why not just replace the first half of the assembly block with a
> call to vmx_update_host_rsp() and leave the rest as is ?

As above, though not called out in the changelog, the goal is to move away from
the inline asm without introducing another asm subroutine.

Uros, I'll try to double check and review this later today.
Sean Christopherson Dec. 17, 2020, 6:04 p.m. UTC | #4
For future patches, please Cc LKML (in additional to KVM) so that the automatic
archiving and patchwork stuff kicks in.  Thanks!

On Wed, Dec 16, 2020, Uros Bizjak wrote:
> Ping.  This patch didn't receive any feedback.
>
> On Thu, Oct 29, 2020 at 2:41 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > Replace inline assembly in nested_vmx_check_vmentry_hw
> > with a call to __vmx_vcpu_run.  The function is not
> > performance critical, so (double) GPR save/restore
> > in __vmx_vcpu_run can be tolerated, as far as performance
> > effects are concerned.
> >
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> > Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> > ---

vmx_vmenter() in vmx/vmenter.S can and should now use SYM_FUNC_START_LOCAL
instead of SYM_FUNC_LOCAL.  Other than that nit:

Reviewed-and-tested-by: Sean Christopherson <seanjc@google.com>
Uros Bizjak Dec. 17, 2020, 6:12 p.m. UTC | #5
On Thu, Dec 17, 2020 at 7:04 PM Sean Christopherson <seanjc@google.com> wrote:
>
> For future patches, please Cc LKML (in additional to KVM) so that the automatic
> archiving and patchwork stuff kicks in.  Thanks!
>
> On Wed, Dec 16, 2020, Uros Bizjak wrote:
> > Ping.  This patch didn't receive any feedback.
> >
> > On Thu, Oct 29, 2020 at 2:41 PM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > Replace inline assembly in nested_vmx_check_vmentry_hw
> > > with a call to __vmx_vcpu_run.  The function is not
> > > performance critical, so (double) GPR save/restore
> > > in __vmx_vcpu_run can be tolerated, as far as performance
> > > effects are concerned.
> > >
> > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> > > Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> > > ---
>
> vmx_vmenter() in vmx/vmenter.S can and should now use SYM_FUNC_START_LOCAL
> instead of SYM_FUNC_LOCAL.  Other than that nit:
>
> Reviewed-and-tested-by: Sean Christopherson <seanjc@google.com>

Thanks!

I'll prepare a v2 (and added LKML, as you suggested).

Uros.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 89af692deb7e..6ab62bf277c4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -12,6 +12,7 @@ 
 #include "nested.h"
 #include "pmu.h"
 #include "trace.h"
+#include "vmx.h"
 #include "x86.h"
 
 static bool __read_mostly enable_shadow_vmcs = 1;
@@ -3056,35 +3057,8 @@  static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 		vmx->loaded_vmcs->host_state.cr4 = cr4;
 	}
 
-	asm(
-		"sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */
-		"cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
-		"je 1f \n\t"
-		__ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t"
-		"mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t"
-		"1: \n\t"
-		"add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */
-
-		/* Check if vmlaunch or vmresume is needed */
-		"cmpb $0, %c[launched](%[loaded_vmcs])\n\t"
-
-		/*
-		 * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set
-		 * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail
-		 * Valid.  vmx_vmenter() directly "returns" RFLAGS, and so the
-		 * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail.
-		 */
-		"call vmx_vmenter\n\t"
-
-		CC_SET(be)
-	      : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail)
-	      :	[HOST_RSP]"r"((unsigned long)HOST_RSP),
-		[loaded_vmcs]"r"(vmx->loaded_vmcs),
-		[launched]"i"(offsetof(struct loaded_vmcs, launched)),
-		[host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)),
-		[wordsize]"i"(sizeof(ulong))
-	      : "memory"
-	);
+	vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
+				 vmx->loaded_vmcs->launched);
 
 	if (vmx->msr_autoload.host.nr)
 		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d14c94d0aff1..0f390c748b18 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6591,8 +6591,6 @@  static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 	}
 }
 
-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
-
 static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 					struct vcpu_vmx *vmx)
 {
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index f6f66e5c6510..32db3b033e9b 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -339,6 +339,7 @@  void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
 void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
 void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
+bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
 int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
 void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);