diff mbox

KVM: x86: Expose the split lock detection feature to guest VM

Message ID 1526957507-123937-1-git-send-email-jingqi.liu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Liu, Jingqi May 22, 2018, 2:51 a.m. UTC
A new control bit(bit 29) in the TEST_CTL MSR will be introduced
to enable detection of split locks.

When bit 29 of the TEST_CTL(33H) MSR is set, the processor
causes an #AC exception to be issued instead of suppressing LOCK on
bus(during split lock access). A previous control bit (bit 31)
in this MSR causes the processor to disable LOCK# assertion for
split locked accesses when set. When bits 29 and 31 are both set,
bit 29 takes precedence.

The release document ref below link:
https://software.intel.com/sites/default/files/managed/c5/15/\
architecture-instruction-set-extensions-programming-reference.pdf
This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157

Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
---
 arch/x86/kvm/vmx.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

Comments

Paolo Bonzini May 21, 2018, 2:32 p.m. UTC | #1
On 22/05/2018 04:51, Jingqi Liu wrote:
> A new control bit(bit 29) in the TEST_CTL MSR will be introduced
> to enable detection of split locks.
> 
> When bit 29 of the TEST_CTL(33H) MSR is set, the processor
> causes an #AC exception to be issued instead of suppressing LOCK on
> bus(during split lock access). A previous control bit (bit 31)
> in this MSR causes the processor to disable LOCK# assertion for
> split locked accesses when set. When bits 29 and 31 are both set,
> bit 29 takes precedence.
> 
> The release document ref below link:
> https://software.intel.com/sites/default/files/managed/c5/15/\
> architecture-instruction-set-extensions-programming-reference.pdf
> This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157

Please follow the way spec_ctrl is implemented, so that there is no
wrmsr/rdmsr unless the guest is using the feature (and also no wrmsr
unless the guest and host settings don't match).

How should the guest detect that the bits are available?  Is there a
CPUID bit?

Paolo

> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
> ---
>  arch/x86/kvm/vmx.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 3f16965..07986e0 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -610,6 +610,8 @@ struct vcpu_vmx {
>  
>  	u64 		      arch_capabilities;
>  	u64 		      spec_ctrl;
> +	u64		      guest_split_lock_ctrl;
> +	u64		      host_split_lock_ctrl;
>  
>  	u32 vm_entry_controls_shadow;
>  	u32 vm_exit_controls_shadow;
> @@ -6013,6 +6015,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
>  	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
>  
> +	vmx->guest_split_lock_ctrl = 0;
> +
>  	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
>  		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
>  
> @@ -6062,6 +6066,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>  
>  	vmx->rmode.vm86_active = 0;
>  	vmx->spec_ctrl = 0;
> +	vmx->guest_split_lock_ctrl = 0;
>  
>  	vcpu->arch.microcode_version = 0x100000000ULL;
>  	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
> @@ -9725,6 +9730,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  	if (vmx->spec_ctrl)
>  		native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>  
> +	vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
> +	native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl);
> +
>  	vmx->__launched = vmx->loaded_vmcs->launched;
>  
>  	evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
> @@ -9874,6 +9882,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  	if (vmx->spec_ctrl)
>  		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>  
> +	vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
> +	native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl);
> +
>  	/* Eliminate branch target predictions from guest mode */
>  	vmexit_fill_RSB();
>  
> @@ -10037,6 +10048,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
>  	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
>  	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
> +	vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW);
> +
>  	vmx->msr_bitmap_mode = 0;
>  
>  	vmx->loaded_vmcs = &vmx->vmcs01;
>
Konrad Rzeszutek Wilk May 21, 2018, 3:36 p.m. UTC | #2
On Tue, May 22, 2018 at 10:51:47AM +0800, Jingqi Liu wrote:
> A new control bit(bit 29) in the TEST_CTL MSR will be introduced
> to enable detection of split locks.
> 
> When bit 29 of the TEST_CTL(33H) MSR is set, the processor
> causes an #AC exception to be issued instead of suppressing LOCK on
> bus(during split lock access). A previous control bit (bit 31)
> in this MSR causes the processor to disable LOCK# assertion for
> split locked accesses when set. When bits 29 and 31 are both set,
> bit 29 takes precedence.

Migration?
> 
> The release document ref below link:
> https://software.intel.com/sites/default/files/managed/c5/15/\
> architecture-instruction-set-extensions-programming-reference.pdf
> This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157
> 
> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
> ---
>  arch/x86/kvm/vmx.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 3f16965..07986e0 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -610,6 +610,8 @@ struct vcpu_vmx {
>  
>  	u64 		      arch_capabilities;
>  	u64 		      spec_ctrl;
> +	u64		      guest_split_lock_ctrl;
> +	u64		      host_split_lock_ctrl;
>  
>  	u32 vm_entry_controls_shadow;
>  	u32 vm_exit_controls_shadow;
> @@ -6013,6 +6015,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
>  	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
>  
> +	vmx->guest_split_lock_ctrl = 0;
> +
>  	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
>  		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
>  
> @@ -6062,6 +6066,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>  
>  	vmx->rmode.vm86_active = 0;
>  	vmx->spec_ctrl = 0;
> +	vmx->guest_split_lock_ctrl = 0;
>  
>  	vcpu->arch.microcode_version = 0x100000000ULL;
>  	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
> @@ -9725,6 +9730,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  	if (vmx->spec_ctrl)
>  		native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>  
> +	vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
> +	native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl);
> +
>  	vmx->__launched = vmx->loaded_vmcs->launched;
>  
>  	evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
> @@ -9874,6 +9882,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  	if (vmx->spec_ctrl)
>  		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>  
> +	vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
> +	native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl);
> +
>  	/* Eliminate branch target predictions from guest mode */
>  	vmexit_fill_RSB();
>  
> @@ -10037,6 +10048,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>  	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
>  	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
>  	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
> +	vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW);
> +
>  	vmx->msr_bitmap_mode = 0;
>  
>  	vmx->loaded_vmcs = &vmx->vmcs01;
> -- 
> 1.8.3.1
>
kernel test robot May 21, 2018, 7:08 p.m. UTC | #3
Hi Jingqi,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v4.17-rc6 next-20180517]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Jingqi-Liu/KVM-x86-Expose-the-split-lock-detection-feature-to-guest-VM/20180522-004806
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: i386-randconfig-x010-201820 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   arch/x86/kvm/vmx.c: In function 'vmx_vcpu_run':
>> arch/x86/kvm/vmx.c:9756:46: error: 'MSR_TEST_CTL' undeclared (first use in this function); did you mean 'MSR_THERM2_CTL'?
     vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
                                                 ^~~~~~~~~~~~
                                                 MSR_THERM2_CTL
   arch/x86/kvm/vmx.c:9756:46: note: each undeclared identifier is reported only once for each function it appears in
   arch/x86/kvm/vmx.c: In function 'vmx_create_vcpu':
   arch/x86/kvm/vmx.c:10074:44: error: 'MSR_TEST_CTL' undeclared (first use in this function); did you mean 'MSR_THERM2_CTL'?
     vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW);
                                               ^~~~~~~~~~~~
                                               MSR_THERM2_CTL

vim +9756 arch/x86/kvm/vmx.c

  9687	
  9688	static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
  9689	{
  9690		struct vcpu_vmx *vmx = to_vmx(vcpu);
  9691		unsigned long cr3, cr4, evmcs_rsp;
  9692	
  9693		/* Record the guest's net vcpu time for enforced NMI injections. */
  9694		if (unlikely(!enable_vnmi &&
  9695			     vmx->loaded_vmcs->soft_vnmi_blocked))
  9696			vmx->loaded_vmcs->entry_time = ktime_get();
  9697	
  9698		/* Don't enter VMX if guest state is invalid, let the exit handler
  9699		   start emulation until we arrive back to a valid state */
  9700		if (vmx->emulation_required)
  9701			return;
  9702	
  9703		if (vmx->ple_window_dirty) {
  9704			vmx->ple_window_dirty = false;
  9705			vmcs_write32(PLE_WINDOW, vmx->ple_window);
  9706		}
  9707	
  9708		if (vmx->nested.sync_shadow_vmcs) {
  9709			copy_vmcs12_to_shadow(vmx);
  9710			vmx->nested.sync_shadow_vmcs = false;
  9711		}
  9712	
  9713		if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
  9714			vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
  9715		if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
  9716			vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
  9717	
  9718		cr3 = __get_current_cr3_fast();
  9719		if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) {
  9720			vmcs_writel(HOST_CR3, cr3);
  9721			vmx->loaded_vmcs->vmcs_host_cr3 = cr3;
  9722		}
  9723	
  9724		cr4 = cr4_read_shadow();
  9725		if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) {
  9726			vmcs_writel(HOST_CR4, cr4);
  9727			vmx->loaded_vmcs->vmcs_host_cr4 = cr4;
  9728		}
  9729	
  9730		/* When single-stepping over STI and MOV SS, we must clear the
  9731		 * corresponding interruptibility bits in the guest state. Otherwise
  9732		 * vmentry fails as it then expects bit 14 (BS) in pending debug
  9733		 * exceptions being set, but that's not correct for the guest debugging
  9734		 * case. */
  9735		if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
  9736			vmx_set_interrupt_shadow(vcpu, 0);
  9737	
  9738		if (static_cpu_has(X86_FEATURE_PKU) &&
  9739		    kvm_read_cr4_bits(vcpu, X86_CR4_PKE) &&
  9740		    vcpu->arch.pkru != vmx->host_pkru)
  9741			__write_pkru(vcpu->arch.pkru);
  9742	
  9743		atomic_switch_perf_msrs(vmx);
  9744	
  9745		vmx_arm_hv_timer(vcpu);
  9746	
  9747		/*
  9748		 * If this vCPU has touched SPEC_CTRL, restore the guest's value if
  9749		 * it's non-zero. Since vmentry is serialising on affected CPUs, there
  9750		 * is no need to worry about the conditional branch over the wrmsr
  9751		 * being speculatively taken.
  9752		 */
  9753		if (vmx->spec_ctrl)
  9754			native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
  9755	
> 9756		vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
  9757		native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl);
  9758	
  9759		vmx->__launched = vmx->loaded_vmcs->launched;
  9760	
  9761		evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
  9762			(unsigned long)&current_evmcs->host_rsp : 0;
  9763	
  9764		asm(
  9765			/* Store host registers */
  9766			"push %%" _ASM_DX "; push %%" _ASM_BP ";"
  9767			"push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
  9768			"push %%" _ASM_CX " \n\t"
  9769			"cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
  9770			"je 1f \n\t"
  9771			"mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
  9772			/* Avoid VMWRITE when Enlightened VMCS is in use */
  9773			"test %%" _ASM_SI ", %%" _ASM_SI " \n\t"
  9774			"jz 2f \n\t"
  9775			"mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t"
  9776			"jmp 1f \n\t"
  9777			"2: \n\t"
  9778			__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
  9779			"1: \n\t"
  9780			/* Reload cr2 if changed */
  9781			"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
  9782			"mov %%cr2, %%" _ASM_DX " \n\t"
  9783			"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
  9784			"je 3f \n\t"
  9785			"mov %%" _ASM_AX", %%cr2 \n\t"
  9786			"3: \n\t"
  9787			/* Check if vmlaunch of vmresume is needed */
  9788			"cmpl $0, %c[launched](%0) \n\t"
  9789			/* Load guest registers.  Don't clobber flags. */
  9790			"mov %c[rax](%0), %%" _ASM_AX " \n\t"
  9791			"mov %c[rbx](%0), %%" _ASM_BX " \n\t"
  9792			"mov %c[rdx](%0), %%" _ASM_DX " \n\t"
  9793			"mov %c[rsi](%0), %%" _ASM_SI " \n\t"
  9794			"mov %c[rdi](%0), %%" _ASM_DI " \n\t"
  9795			"mov %c[rbp](%0), %%" _ASM_BP " \n\t"
  9796	#ifdef CONFIG_X86_64
  9797			"mov %c[r8](%0),  %%r8  \n\t"
  9798			"mov %c[r9](%0),  %%r9  \n\t"
  9799			"mov %c[r10](%0), %%r10 \n\t"
  9800			"mov %c[r11](%0), %%r11 \n\t"
  9801			"mov %c[r12](%0), %%r12 \n\t"
  9802			"mov %c[r13](%0), %%r13 \n\t"
  9803			"mov %c[r14](%0), %%r14 \n\t"
  9804			"mov %c[r15](%0), %%r15 \n\t"
  9805	#endif
  9806			"mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
  9807	
  9808			/* Enter guest mode */
  9809			"jne 1f \n\t"
  9810			__ex(ASM_VMX_VMLAUNCH) "\n\t"
  9811			"jmp 2f \n\t"
  9812			"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
  9813			"2: "
  9814			/* Save guest registers, load host registers, keep flags */
  9815			"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
  9816			"pop %0 \n\t"
  9817			"setbe %c[fail](%0)\n\t"
  9818			"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
  9819			"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
  9820			__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
  9821			"mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
  9822			"mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
  9823			"mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
  9824			"mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
  9825	#ifdef CONFIG_X86_64
  9826			"mov %%r8,  %c[r8](%0) \n\t"
  9827			"mov %%r9,  %c[r9](%0) \n\t"
  9828			"mov %%r10, %c[r10](%0) \n\t"
  9829			"mov %%r11, %c[r11](%0) \n\t"
  9830			"mov %%r12, %c[r12](%0) \n\t"
  9831			"mov %%r13, %c[r13](%0) \n\t"
  9832			"mov %%r14, %c[r14](%0) \n\t"
  9833			"mov %%r15, %c[r15](%0) \n\t"
  9834			"xor %%r8d,  %%r8d \n\t"
  9835			"xor %%r9d,  %%r9d \n\t"
  9836			"xor %%r10d, %%r10d \n\t"
  9837			"xor %%r11d, %%r11d \n\t"
  9838			"xor %%r12d, %%r12d \n\t"
  9839			"xor %%r13d, %%r13d \n\t"
  9840			"xor %%r14d, %%r14d \n\t"
  9841			"xor %%r15d, %%r15d \n\t"
  9842	#endif
  9843			"mov %%cr2, %%" _ASM_AX "   \n\t"
  9844			"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
  9845	
  9846			"xor %%eax, %%eax \n\t"
  9847			"xor %%ebx, %%ebx \n\t"
  9848			"xor %%esi, %%esi \n\t"
  9849			"xor %%edi, %%edi \n\t"
  9850			"pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
  9851			".pushsection .rodata \n\t"
  9852			".global vmx_return \n\t"
  9853			"vmx_return: " _ASM_PTR " 2b \n\t"
  9854			".popsection"
  9855		      : : "c"(vmx), "d"((unsigned long)HOST_RSP), "S"(evmcs_rsp),
  9856			[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
  9857			[fail]"i"(offsetof(struct vcpu_vmx, fail)),
  9858			[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
  9859			[rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
  9860			[rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
  9861			[rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
  9862			[rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
  9863			[rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
  9864			[rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
  9865			[rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
  9866	#ifdef CONFIG_X86_64
  9867			[r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
  9868			[r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
  9869			[r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
  9870			[r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
  9871			[r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
  9872			[r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
  9873			[r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
  9874			[r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
  9875	#endif
  9876			[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
  9877			[wordsize]"i"(sizeof(ulong))
  9878		      : "cc", "memory"
  9879	#ifdef CONFIG_X86_64
  9880			, "rax", "rbx", "rdi"
  9881			, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
  9882	#else
  9883			, "eax", "ebx", "edi"
  9884	#endif
  9885		      );
  9886	
  9887		/*
  9888		 * We do not use IBRS in the kernel. If this vCPU has used the
  9889		 * SPEC_CTRL MSR it may have left it on; save the value and
  9890		 * turn it off. This is much more efficient than blindly adding
  9891		 * it to the atomic save/restore list. Especially as the former
  9892		 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
  9893		 *
  9894		 * For non-nested case:
  9895		 * If the L01 MSR bitmap does not intercept the MSR, then we need to
  9896		 * save it.
  9897		 *
  9898		 * For nested case:
  9899		 * If the L02 MSR bitmap does not intercept the MSR, then we need to
  9900		 * save it.
  9901		 */
  9902		if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
  9903			vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
  9904	
  9905		if (vmx->spec_ctrl)
  9906			native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
  9907	
  9908		vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
  9909		native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl);
  9910	
  9911		/* Eliminate branch target predictions from guest mode */
  9912		vmexit_fill_RSB();
  9913	
  9914		/* All fields are clean at this point */
  9915		if (static_branch_unlikely(&enable_evmcs))
  9916			current_evmcs->hv_clean_fields |=
  9917				HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
  9918	
  9919		/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
  9920		if (vmx->host_debugctlmsr)
  9921			update_debugctlmsr(vmx->host_debugctlmsr);
  9922	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Liu, Jingqi May 22, 2018, 6:20 a.m. UTC | #4
On 5/21/2018 10:32 PM, Paolo Bonzini wrote:
> On 22/05/2018 04:51, Jingqi Liu wrote:
>> A new control bit(bit 29) in the TEST_CTL MSR will be introduced
>> to enable detection of split locks.
>>
>> When bit 29 of the TEST_CTL(33H) MSR is set, the processor
>> causes an #AC exception to be issued instead of suppressing LOCK on
>> bus(during split lock access). A previous control bit (bit 31)
>> in this MSR causes the processor to disable LOCK# assertion for
>> split locked accesses when set. When bits 29 and 31 are both set,
>> bit 29 takes precedence.
>>
>> The release document ref below link:
>> https://software.intel.com/sites/default/files/managed/c5/15/\
>> architecture-instruction-set-extensions-programming-reference.pdf
>> This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157
> Please follow the way spec_ctrl is implemented, so that there is no
> wrmsr/rdmsr unless the guest is using the feature (and also no wrmsr
> unless the guest and host settings don't match).
>
> How should the guest detect that the bits are available?  Is there a
> CPUID bit?
>
> Paolo
Thanks for your review.
The bit is in a MSR register(33H), and there isn't a  corresponding 
CPUID bit.
This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157,
which could enable or disable this feature in kernel.
The bit could be modified in guest or host, so need to rdmsr before 
vmentry and after vmexit.
And wrmsr if the guest and host settings don't match.
Will improve in next version.
Thanks
Jingqi
>> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
>> ---
>>   arch/x86/kvm/vmx.c | 13 +++++++++++++
>>   1 file changed, 13 insertions(+)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 3f16965..07986e0 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -610,6 +610,8 @@ struct vcpu_vmx {
>>   
>>   	u64 		      arch_capabilities;
>>   	u64 		      spec_ctrl;
>> +	u64		      guest_split_lock_ctrl;
>> +	u64		      host_split_lock_ctrl;
>>   
>>   	u32 vm_entry_controls_shadow;
>>   	u32 vm_exit_controls_shadow;
>> @@ -6013,6 +6015,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>>   	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
>>   	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
>>   
>> +	vmx->guest_split_lock_ctrl = 0;
>> +
>>   	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
>>   		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
>>   
>> @@ -6062,6 +6066,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>>   
>>   	vmx->rmode.vm86_active = 0;
>>   	vmx->spec_ctrl = 0;
>> +	vmx->guest_split_lock_ctrl = 0;
>>   
>>   	vcpu->arch.microcode_version = 0x100000000ULL;
>>   	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
>> @@ -9725,6 +9730,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>   	if (vmx->spec_ctrl)
>>   		native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>>   
>> +	vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
>> +	native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl);
>> +
>>   	vmx->__launched = vmx->loaded_vmcs->launched;
>>   
>>   	evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
>> @@ -9874,6 +9882,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>   	if (vmx->spec_ctrl)
>>   		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>>   
>> +	vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
>> +	native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl);
>> +
>>   	/* Eliminate branch target predictions from guest mode */
>>   	vmexit_fill_RSB();
>>   
>> @@ -10037,6 +10048,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>   	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
>>   	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
>>   	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
>> +	vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW);
>> +
>>   	vmx->msr_bitmap_mode = 0;
>>   
>>   	vmx->loaded_vmcs = &vmx->vmcs01;
>>
Liu, Jingqi May 22, 2018, 7:16 a.m. UTC | #5
On 5/21/2018 10:32 PM, Paolo Bonzini wrote:
> On 22/05/2018 04:51, Jingqi Liu wrote:
>> A new control bit(bit 29) in the TEST_CTL MSR will be introduced
>> to enable detection of split locks.
>>
>> When bit 29 of the TEST_CTL(33H) MSR is set, the processor
>> causes an #AC exception to be issued instead of suppressing LOCK on
>> bus(during split lock access). A previous control bit (bit 31)
>> in this MSR causes the processor to disable LOCK# assertion for
>> split locked accesses when set. When bits 29 and 31 are both set,
>> bit 29 takes precedence.
>>
>> The release document ref below link:
>> https://software.intel.com/sites/default/files/managed/c5/15/\
>> architecture-instruction-set-extensions-programming-reference.pdf
>> This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157
> Please follow the way spec_ctrl is implemented,
Hi Paolo,
It could not follow the way spec_ctrl is implemented.
The guest will use the setting in TEST_CTL MSR which is set by host.
If the bit is set in the host,  an #AC exception will be caused when the 
guest starts.
Actually, the bit is disabled when guest starts. And it can be enabled 
in guest kernel.
Thanks
Jingqi
> so that there is no
> wrmsr/rdmsr unless the guest is using the feature (and also no wrmsr
> unless the guest and host settings don't match).
>
> How should the guest detect that the bits are available?  Is there a
> CPUID bit?
>
> Paolo
>
>> Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
>> ---
>>   arch/x86/kvm/vmx.c | 13 +++++++++++++
>>   1 file changed, 13 insertions(+)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 3f16965..07986e0 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -610,6 +610,8 @@ struct vcpu_vmx {
>>   
>>   	u64 		      arch_capabilities;
>>   	u64 		      spec_ctrl;
>> +	u64		      guest_split_lock_ctrl;
>> +	u64		      host_split_lock_ctrl;
>>   
>>   	u32 vm_entry_controls_shadow;
>>   	u32 vm_exit_controls_shadow;
>> @@ -6013,6 +6015,8 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>>   	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
>>   	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
>>   
>> +	vmx->guest_split_lock_ctrl = 0;
>> +
>>   	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
>>   		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
>>   
>> @@ -6062,6 +6066,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>>   
>>   	vmx->rmode.vm86_active = 0;
>>   	vmx->spec_ctrl = 0;
>> +	vmx->guest_split_lock_ctrl = 0;
>>   
>>   	vcpu->arch.microcode_version = 0x100000000ULL;
>>   	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
>> @@ -9725,6 +9730,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>   	if (vmx->spec_ctrl)
>>   		native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>>   
>> +	vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
>> +	native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl);
>> +
>>   	vmx->__launched = vmx->loaded_vmcs->launched;
>>   
>>   	evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
>> @@ -9874,6 +9882,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>   	if (vmx->spec_ctrl)
>>   		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>>   
>> +	vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
>> +	native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl);
>> +
>>   	/* Eliminate branch target predictions from guest mode */
>>   	vmexit_fill_RSB();
>>   
>> @@ -10037,6 +10048,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>>   	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
>>   	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
>>   	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
>> +	vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW);
>> +
>>   	vmx->msr_bitmap_mode = 0;
>>   
>>   	vmx->loaded_vmcs = &vmx->vmcs01;
>>
Paolo Bonzini May 22, 2018, 2:50 p.m. UTC | #6
On 22/05/2018 08:20, Liu, Jingqi wrote:
>>
>>
>> How should the guest detect that the bits are available?  Is there a
>> CPUID bit?
>>
>> Paolo
> Thanks for your review.
> The bit is in a MSR register(33H), and there isn't a  corresponding
> CPUID bit.
> This patch has a dependency on https://lkml.org/lkml/2018/5/14/1157,
> which could enable or disable this feature in kernel.
> The bit could be modified in guest or host, so need to rdmsr before
> vmentry and after vmexit.

Yes, but only do that after the first time the guest uses the MSR, or
perhaps we could use some trick to limit the cost of vmexits for guests
that write to the MSR very rarely.  Maybe even require userspace to do a
ioctl, for example KVM_ENABLE_CAP, in order to let the guest see the
0x33 MSR (in which case, the guest would pay the price on every
vmentry/vmexit).

Another optimization possibility is to use a static key so that, if no
guest can see the 0x33 MSR, the cost is really zero.

Note that this is not premature optimization.  vmexit time is really the
hottest path in KVM, even removing a local_irq_save/restore can provide
a measurable improvement there!  So you cannot add 200 clock cycles or
worse for an MSR that is essentially a debugging tool.

Paolo

> And wrmsr if the guest and host settings don't match.
> Will improve in next version.
Paolo Bonzini May 22, 2018, 2:50 p.m. UTC | #7
On 22/05/2018 09:16, Liu, Jingqi wrote:
> Hi Paolo, It could not follow the way spec_ctrl is implemented. The
> guest will use the setting in TEST_CTL MSR which is set by host. If
> the bit is set in the host,  an #AC exception will be caused when
> the guest starts. Actually, the bit is disabled when guest starts.
> And it can be enabled in guest kernel.

Check out the new code that was added for SSBD.  It now supports
different SPEC_CTRL settings in the guest and in the host.

Paolo
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3f16965..07986e0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -610,6 +610,8 @@  struct vcpu_vmx {
 
 	u64 		      arch_capabilities;
 	u64 		      spec_ctrl;
+	u64		      guest_split_lock_ctrl;
+	u64		      host_split_lock_ctrl;
 
 	u32 vm_entry_controls_shadow;
 	u32 vm_exit_controls_shadow;
@@ -6013,6 +6015,8 @@  static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
 	vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
 
+	vmx->guest_split_lock_ctrl = 0;
+
 	if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 
@@ -6062,6 +6066,7 @@  static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	vmx->rmode.vm86_active = 0;
 	vmx->spec_ctrl = 0;
+	vmx->guest_split_lock_ctrl = 0;
 
 	vcpu->arch.microcode_version = 0x100000000ULL;
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
@@ -9725,6 +9730,9 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vmx->spec_ctrl)
 		native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
 
+	vmx->host_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
+	native_wrmsrl(MSR_TEST_CTL, vmx->guest_split_lock_ctrl);
+
 	vmx->__launched = vmx->loaded_vmcs->launched;
 
 	evmcs_rsp = static_branch_unlikely(&enable_evmcs) ?
@@ -9874,6 +9882,9 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vmx->spec_ctrl)
 		native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
 
+	vmx->guest_split_lock_ctrl = native_read_msr(MSR_TEST_CTL);
+	native_wrmsrl(MSR_TEST_CTL, vmx->host_split_lock_ctrl);
+
 	/* Eliminate branch target predictions from guest mode */
 	vmexit_fill_RSB();
 
@@ -10037,6 +10048,8 @@  static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
 	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
 	vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+	vmx_disable_intercept_for_msr(msr_bitmap, MSR_TEST_CTL, MSR_TYPE_RW);
+
 	vmx->msr_bitmap_mode = 0;
 
 	vmx->loaded_vmcs = &vmx->vmcs01;