Message ID | 20190118212037.24412-29-sean.j.christopherson@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: VMX: Move vCPU-run to proper asm sub-routine | expand |
On 18/01/19 22:20, Sean Christopherson wrote: > ...and of course actually call it from C now that the assembly code is > in a dedicated sub-routine and has been cleansed of any quirks that > would break compliance with the kernel's x86 calling conventions. I'm not sure about removing the clobbering. If the stack is outside L1, could this lead to speculative execution with the guest values in the registers? No problem with patches 1-26 though. Paolo
On Tue, Jan 22, 2019 at 01:35:41PM +0100, Paolo Bonzini wrote: > On 18/01/19 22:20, Sean Christopherson wrote: > > ...and of course actually call it from C now that the assembly code is > > in a dedicated sub-routine and has been cleansed of any quirks that > > would break compliance with the kernel's x86 calling conventions. > > I'm not sure about removing the clobbering. If the stack is outside L1, > could this lead to speculative execution with the guest values in the > registers? The stack belongs to the L0 kernel. Or did I misunderstand the comment? > > No problem with patches 1-26 though. > > Paolo
On 22/01/19 15:59, Sean Christopherson wrote: > On Tue, Jan 22, 2019 at 01:35:41PM +0100, Paolo Bonzini wrote: >> On 18/01/19 22:20, Sean Christopherson wrote: >>> ...and of course actually call it from C now that the assembly code is >>> in a dedicated sub-routine and has been cleansed of any quirks that >>> would break compliance with the kernel's x86 calling conventions. >> >> I'm not sure about removing the clobbering. If the stack is outside L1, >> could this lead to speculative execution with the guest values in the >> registers? > > The stack belongs to the L0 kernel. Or did I misunderstand the comment? I meant outside L1 cache (overloaded terms... :)). Paolo
On Thu, Jan 24, 2019 at 09:21:31PM +0100, Paolo Bonzini wrote: > On 22/01/19 15:59, Sean Christopherson wrote: > > On Tue, Jan 22, 2019 at 01:35:41PM +0100, Paolo Bonzini wrote: > >> On 18/01/19 22:20, Sean Christopherson wrote: > >>> ...and of course actually call it from C now that the assembly code is > >>> in a dedicated sub-routine and has been cleansed of any quirks that > >>> would break compliance with the kernel's x86 calling conventions. > >> > >> I'm not sure about removing the clobbering. If the stack is outside L1, > >> could this lead to speculative execution with the guest values in the > >> registers? > > > > The stack belongs to the L0 kernel. Or did I misunderstand the comment? > > I meant outside L1 cache (overloaded terms... :)). Alternatively, what about zeroing out the callee-save registers prior to restoring them? That'd allow the function to be called from C, and the patch could be introduced earlier in the series, e.g. to apply the logic to RBP, which is currently only saved/restored. A few extra zeroing XORs is dirt cheap, especially on CPUs with move elimination.
On 24/01/19 22:23, Sean Christopherson wrote: > On Thu, Jan 24, 2019 at 09:21:31PM +0100, Paolo Bonzini wrote: >> On 22/01/19 15:59, Sean Christopherson wrote: >>> On Tue, Jan 22, 2019 at 01:35:41PM +0100, Paolo Bonzini wrote: >>>> On 18/01/19 22:20, Sean Christopherson wrote: >>>>> ...and of course actually call it from C now that the assembly code is >>>>> in a dedicated sub-routine and has been cleansed of any quirks that >>>>> would break compliance with the kernel's x86 calling conventions. >>>> >>>> I'm not sure about removing the clobbering. If the stack is outside L1, >>>> could this lead to speculative execution with the guest values in the >>>> registers? >>> >>> The stack belongs to the L0 kernel. Or did I misunderstand the comment? >> >> I meant outside L1 cache (overloaded terms... :)). > > Alternatively, what about zeroing out the callee-save registers prior to > restoring them? That'd allow the function to be called from C, and the > patch could be introduced earlier in the series, e.g. to apply the logic > to RBP, which is currently only saved/restored. A few extra zeroing > XORs is dirt cheap, especially on CPUs with move elimination. That's a good idea, it gets the best of both worlds. Paolo
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 45e3e381d41d..28c9034773b8 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -87,17 +87,34 @@ ENDPROC(vmx_vmexit) * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode * @vmx: struct vcpu_vmx * * @regs: unsigned long * (to guest registers) - * %RBX: VMCS launched status (non-zero indicates already launched) + * @launched: %true if the VMCS has been launched * * Returns: - * %RBX is 0 on VM-Exit, 1 on VM-Fail + * 0 on VM-Exit, 1 on VM-Fail */ ENTRY(__vmx_vcpu_run) - /* Create a stack frame and save @regs. */ push %_ASM_BP - mov %_ASM_SP, %_ASM_BP + mov %_ASM_SP, %_ASM_BP +#ifdef CONFIG_X86_64 + push %r15 + push %r14 + push %r13 + push %r12 +#else + push %edi + push %esi +#endif + push %_ASM_BX + + /* + * Save @regs, its register may be modified by vmx_update_host_rsp() + * and it's also needed after VM-Exit. + */ push %_ASM_ARG2 + /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + mov %_ASM_ARG3B, %bl + /* Adjust RSP to account for the CALL to vmx_vmenter(). */ lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 call vmx_update_host_rsp @@ -159,8 +176,8 @@ ENTRY(__vmx_vcpu_run) mov %r15, VCPU_R15(%_ASM_AX) #endif - /* Clear EBX to indicate VM-Exit (as opposed to VM-Fail). */ - xor %ebx, %ebx + /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ + xor %eax, %eax /* * Clear registers that contain guest values and will not be @@ -172,10 +189,6 @@ ENTRY(__vmx_vcpu_run) xor %r9d, %r9d xor %r10d, %r10d xor %r11d, %r11d - xor %r12d, %r12d - xor %r13d, %r13d - xor %r14d, %r14d - xor %r15d, %r15d #endif xor %ecx, %ecx xor %edx, %edx @@ -184,15 +197,21 @@ ENTRY(__vmx_vcpu_run) /* "POP" @regs. */ add $WORD_SIZE, %_ASM_SP - pop %_ASM_BP + pop %_ASM_BX + +#ifdef CONFIG_X86_64 + pop %r12 + pop %r13 + pop %r14 + pop %r15 +#else + pop %esi + pop %edi +#endif + pop %_ASM_BP ret /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ -2: mov $1, %ebx - /* - * RAX holds a guest value and it's not cleared in the common - * exit path as VM-Exit reloads it with the vcpu_vmx pointer. - */ - xor %eax, %eax +2: mov $1, %eax jmp 1b ENDPROC(__vmx_vcpu_run) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c4c6d6ccd53b..d840810ca91f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6370,6 +6370,8 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } +bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); + static void vmx_vcpu_run(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6443,25 +6445,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->arch.cr2 != read_cr2()) write_cr2(vcpu->arch.cr2); - asm( - "call __vmx_vcpu_run \n\t" - : ASM_CALL_CONSTRAINT, "=ebx"(vmx->fail), -#ifdef CONFIG_X86_64 - "=D"((int){0}), "=S"((int){0}) - : "D"(vmx), "S"(&vcpu->arch.regs), -#else - "=a"((int){0}), "=d"((int){0}) - : "a"(vmx), "d"(&vcpu->arch.regs), -#endif - "bl"(vmx->loaded_vmcs->launched) - : "cc", "memory" -#ifdef CONFIG_X86_64 - , "rax", "rcx", "rdx" - , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" -#else - , "ecx", "edi", "esi" -#endif - ); + vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, + vmx->loaded_vmcs->launched); vcpu->arch.cr2 = read_cr2();
...and of course actually call it from C now that the assembly code is in a dedicated sub-routine and has been cleansed of any quirks that would break compliance with the kernel's x86 calling conventions. Aside from saving/restoring registers instead of clobbering them, the only real function difference is that VM-Fail is propagated to EAX instead of EBX. Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> --- arch/x86/kvm/vmx/vmenter.S | 53 ++++++++++++++++++++++++++------------ arch/x86/kvm/vmx/vmx.c | 23 +++-------------- 2 files changed, 40 insertions(+), 36 deletions(-)