Message ID | 20201029134145.107560-1-ubizjak@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM/nVMX: Use __vmx_vcpu_run in nested_vmx_check_vmentry_hw | expand |
Ping. This patch didn't receive any feedback. Thanks, Uros. On Thu, Oct 29, 2020 at 2:41 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > Replace inline assembly in nested_vmx_check_vmentry_hw > with a call to __vmx_vcpu_run. The function is not > performance critical, so (double) GPR save/restore > in __vmx_vcpu_run can be tolerated, as far as performance > effects are concerned. > > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Sean Christopherson <sean.j.christopherson@intel.com> > Signed-off-by: Uros Bizjak <ubizjak@gmail.com> > --- > arch/x86/kvm/vmx/nested.c | 32 +++----------------------------- > arch/x86/kvm/vmx/vmx.c | 2 -- > arch/x86/kvm/vmx/vmx.h | 1 + > 3 files changed, 4 insertions(+), 31 deletions(-) > > diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c > index 89af692deb7e..6ab62bf277c4 100644 > --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -12,6 +12,7 @@ > #include "nested.h" > #include "pmu.h" > #include "trace.h" > +#include "vmx.h" > #include "x86.h" > > static bool __read_mostly enable_shadow_vmcs = 1; > @@ -3056,35 +3057,8 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) > vmx->loaded_vmcs->host_state.cr4 = cr4; > } > > - asm( > - "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ > - "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" > - "je 1f \n\t" > - __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t" > - "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" > - "1: \n\t" > - "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ > - > - /* Check if vmlaunch or vmresume is needed */ > - "cmpb $0, %c[launched](%[loaded_vmcs])\n\t" > - > - /* > - * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set > - * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail > - * Valid. vmx_vmenter() directly "returns" RFLAGS, and so the > - * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail. > - */ > - "call vmx_vmenter\n\t" > - > - CC_SET(be) > - : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail) > - : [HOST_RSP]"r"((unsigned long)HOST_RSP), > - [loaded_vmcs]"r"(vmx->loaded_vmcs), > - [launched]"i"(offsetof(struct loaded_vmcs, launched)), > - [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), > - [wordsize]"i"(sizeof(ulong)) > - : "memory" > - ); > + vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, > + vmx->loaded_vmcs->launched); > > if (vmx->msr_autoload.host.nr) > vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > index d14c94d0aff1..0f390c748b18 100644 > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -6591,8 +6591,6 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) > } > } > > -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); > - > static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, > struct vcpu_vmx *vmx) > { > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > index f6f66e5c6510..32db3b033e9b 100644 > --- a/arch/x86/kvm/vmx/vmx.h > +++ b/arch/x86/kvm/vmx/vmx.h > @@ -339,6 +339,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); > struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); > void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); > void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); > +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); > int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); > void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); > > -- > 2.26.2 >
On 12/16/20 1:24 AM, Uros Bizjak wrote: > Ping. This patch didn't receive any feedback. > > Thanks, > Uros. > > On Thu, Oct 29, 2020 at 2:41 PM Uros Bizjak <ubizjak@gmail.com> wrote: >> Replace inline assembly in nested_vmx_check_vmentry_hw >> with a call to __vmx_vcpu_run. The function is not >> performance critical, so (double) GPR save/restore >> in __vmx_vcpu_run can be tolerated, as far as performance >> effects are concerned. >> >> Cc: Paolo Bonzini <pbonzini@redhat.com> >> Cc: Sean Christopherson <sean.j.christopherson@intel.com> >> Signed-off-by: Uros Bizjak <ubizjak@gmail.com> >> --- >> arch/x86/kvm/vmx/nested.c | 32 +++----------------------------- >> arch/x86/kvm/vmx/vmx.c | 2 -- >> arch/x86/kvm/vmx/vmx.h | 1 + >> 3 files changed, 4 insertions(+), 31 deletions(-) >> >> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c >> index 89af692deb7e..6ab62bf277c4 100644 >> --- a/arch/x86/kvm/vmx/nested.c >> +++ b/arch/x86/kvm/vmx/nested.c >> @@ -12,6 +12,7 @@ >> #include "nested.h" >> #include "pmu.h" >> #include "trace.h" >> +#include "vmx.h" >> #include "x86.h" >> >> static bool __read_mostly enable_shadow_vmcs = 1; >> @@ -3056,35 +3057,8 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) >> vmx->loaded_vmcs->host_state.cr4 = cr4; >> } >> >> - asm( >> - "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ >> - "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" >> - "je 1f \n\t" >> - __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t" >> - "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" >> - "1: \n\t" >> - "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ >> - >> - /* Check if vmlaunch or vmresume is needed */ >> - "cmpb $0, %c[launched](%[loaded_vmcs])\n\t" >> - >> - /* >> - * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set >> - * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail >> - * Valid. vmx_vmenter() directly "returns" RFLAGS, and so the >> - * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail. >> - */ >> - "call vmx_vmenter\n\t" >> - >> - CC_SET(be) >> - : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail) >> - : [HOST_RSP]"r"((unsigned long)HOST_RSP), >> - [loaded_vmcs]"r"(vmx->loaded_vmcs), >> - [launched]"i"(offsetof(struct loaded_vmcs, launched)), >> - [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), >> - [wordsize]"i"(sizeof(ulong)) >> - : "memory" >> - ); >> + vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, >> + vmx->loaded_vmcs->launched); >> >> if (vmx->msr_autoload.host.nr) >> vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); >> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c >> index d14c94d0aff1..0f390c748b18 100644 >> --- a/arch/x86/kvm/vmx/vmx.c >> +++ b/arch/x86/kvm/vmx/vmx.c >> @@ -6591,8 +6591,6 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) >> } >> } >> >> -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); >> - >> static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, >> struct vcpu_vmx *vmx) >> { >> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h >> index f6f66e5c6510..32db3b033e9b 100644 >> --- a/arch/x86/kvm/vmx/vmx.h >> +++ b/arch/x86/kvm/vmx/vmx.h >> @@ -339,6 +339,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); >> struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); >> void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); >> void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); >> +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); >> int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); >> void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); >> >> -- >> 2.26.2 >> Semantically __vmx_vcpu_run() is called to enter guest mode. In nested_vmx_check_vmentry_hw(), we are not entering guest mode. Guest mode is entered when nested_vmx_enter_non_root_mode() calls enter_guest_mode(). Secondly, why not just replace the first half of the assembly block with a call to vmx_update_host_rsp() and leave the rest as is ?
On Wed, Dec 16, 2020, Krish Sadhukhan wrote: > > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > > > index d14c94d0aff1..0f390c748b18 100644 > > > --- a/arch/x86/kvm/vmx/vmx.c > > > +++ b/arch/x86/kvm/vmx/vmx.c > > > @@ -6591,8 +6591,6 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) > > > } > > > } > > > > > > -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); > > > - > > > static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, > > > struct vcpu_vmx *vmx) > > > { > > > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h > > > index f6f66e5c6510..32db3b033e9b 100644 > > > --- a/arch/x86/kvm/vmx/vmx.h > > > +++ b/arch/x86/kvm/vmx/vmx.h > > > @@ -339,6 +339,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); > > > struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); > > > void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); > > > void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); > > > +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); > > > int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); > > > void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); > > > > > > -- > > > 2.26.2 > > > > Semantically __vmx_vcpu_run() is called to enter guest mode. In > nested_vmx_check_vmentry_hw(), we are not entering guest mode. Guest mode is > entered when nested_vmx_enter_non_root_mode() calls enter_guest_mode(). Naming aside, this patch intentionally redefines the semantics to mean "execute VM-Enter that may or may not succeed". And as called out in the changelog, the overhead of the GPR save/load/restore is tolerable; reusing code and avoiding ugly inline asm is more important. > Secondly, why not just replace the first half of the assembly block with a > call to vmx_update_host_rsp() and leave the rest as is ? As above, though not called out in the changelog, the goal is to move away from the inline asm without introducing another asm subroutine. Uros, I'll try to double check and review this later today.
For future patches, please Cc LKML (in additional to KVM) so that the automatic archiving and patchwork stuff kicks in. Thanks! On Wed, Dec 16, 2020, Uros Bizjak wrote: > Ping. This patch didn't receive any feedback. > > On Thu, Oct 29, 2020 at 2:41 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > Replace inline assembly in nested_vmx_check_vmentry_hw > > with a call to __vmx_vcpu_run. The function is not > > performance critical, so (double) GPR save/restore > > in __vmx_vcpu_run can be tolerated, as far as performance > > effects are concerned. > > > > Cc: Paolo Bonzini <pbonzini@redhat.com> > > Cc: Sean Christopherson <sean.j.christopherson@intel.com> > > Signed-off-by: Uros Bizjak <ubizjak@gmail.com> > > --- vmx_vmenter() in vmx/vmenter.S can and should now use SYM_FUNC_START_LOCAL instead of SYM_FUNC_LOCAL. Other than that nit: Reviewed-and-tested-by: Sean Christopherson <seanjc@google.com>
On Thu, Dec 17, 2020 at 7:04 PM Sean Christopherson <seanjc@google.com> wrote: > > For future patches, please Cc LKML (in additional to KVM) so that the automatic > archiving and patchwork stuff kicks in. Thanks! > > On Wed, Dec 16, 2020, Uros Bizjak wrote: > > Ping. This patch didn't receive any feedback. > > > > On Thu, Oct 29, 2020 at 2:41 PM Uros Bizjak <ubizjak@gmail.com> wrote: > > > > > > Replace inline assembly in nested_vmx_check_vmentry_hw > > > with a call to __vmx_vcpu_run. The function is not > > > performance critical, so (double) GPR save/restore > > > in __vmx_vcpu_run can be tolerated, as far as performance > > > effects are concerned. > > > > > > Cc: Paolo Bonzini <pbonzini@redhat.com> > > > Cc: Sean Christopherson <sean.j.christopherson@intel.com> > > > Signed-off-by: Uros Bizjak <ubizjak@gmail.com> > > > --- > > vmx_vmenter() in vmx/vmenter.S can and should now use SYM_FUNC_START_LOCAL > instead of SYM_FUNC_LOCAL. Other than that nit: > > Reviewed-and-tested-by: Sean Christopherson <seanjc@google.com> Thanks! I'll prepare a v2 (and added LKML, as you suggested). Uros.
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 89af692deb7e..6ab62bf277c4 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -12,6 +12,7 @@ #include "nested.h" #include "pmu.h" #include "trace.h" +#include "vmx.h" #include "x86.h" static bool __read_mostly enable_shadow_vmcs = 1; @@ -3056,35 +3057,8 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) vmx->loaded_vmcs->host_state.cr4 = cr4; } - asm( - "sub $%c[wordsize], %%" _ASM_SP "\n\t" /* temporarily adjust RSP for CALL */ - "cmp %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" - "je 1f \n\t" - __ex("vmwrite %%" _ASM_SP ", %[HOST_RSP]") "\n\t" - "mov %%" _ASM_SP ", %c[host_state_rsp](%[loaded_vmcs]) \n\t" - "1: \n\t" - "add $%c[wordsize], %%" _ASM_SP "\n\t" /* un-adjust RSP */ - - /* Check if vmlaunch or vmresume is needed */ - "cmpb $0, %c[launched](%[loaded_vmcs])\n\t" - - /* - * VMLAUNCH and VMRESUME clear RFLAGS.{CF,ZF} on VM-Exit, set - * RFLAGS.CF on VM-Fail Invalid and set RFLAGS.ZF on VM-Fail - * Valid. vmx_vmenter() directly "returns" RFLAGS, and so the - * results of VM-Enter is captured via CC_{SET,OUT} to vm_fail. - */ - "call vmx_vmenter\n\t" - - CC_SET(be) - : ASM_CALL_CONSTRAINT, CC_OUT(be) (vm_fail) - : [HOST_RSP]"r"((unsigned long)HOST_RSP), - [loaded_vmcs]"r"(vmx->loaded_vmcs), - [launched]"i"(offsetof(struct loaded_vmcs, launched)), - [host_state_rsp]"i"(offsetof(struct loaded_vmcs, host_state.rsp)), - [wordsize]"i"(sizeof(ulong)) - : "memory" - ); + vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, + vmx->loaded_vmcs->launched); if (vmx->msr_autoload.host.nr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d14c94d0aff1..0f390c748b18 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6591,8 +6591,6 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) } } -bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); - static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f6f66e5c6510..32db3b033e9b 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -339,6 +339,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
Replace inline assembly in nested_vmx_check_vmentry_hw with a call to __vmx_vcpu_run. The function is not performance critical, so (double) GPR save/restore in __vmx_vcpu_run can be tolerated, as far as performance effects are concerned. Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Sean Christopherson <sean.j.christopherson@intel.com> Signed-off-by: Uros Bizjak <ubizjak@gmail.com> --- arch/x86/kvm/vmx/nested.c | 32 +++----------------------------- arch/x86/kvm/vmx/vmx.c | 2 -- arch/x86/kvm/vmx/vmx.h | 1 + 3 files changed, 4 insertions(+), 31 deletions(-)