diff mbox series

[v6,12/20] kvm/vmx: Emulate MSR TEST_CTL

Message ID 1554326526-172295-13-git-send-email-fenghua.yu@intel.com (mailing list archive)
State Not Applicable
Delegated to: Johannes Berg
Headers show
Series x86/split_lock: Enable split locked accesses detection | expand

Commit Message

Fenghua Yu April 3, 2019, 9:21 p.m. UTC
From: Xiaoyao Li <xiaoyao.li@linux.intel.com>

A control bit (bit 29) in TEST_CTL MSR 0x33 will be introduced in
future x86 processors. When bit 29 is set, the processor causes #AC
exception for split locked accesses at all CPL.

Please check the latest Intel 64 and IA-32 Architectures Software
Developer's Manual for more detailed information on the MSR and
the split lock bit.

This patch emulate MSR TEST_CTL with vmx->msr_test_ctl and does the
following:
1. As MSR TEST_CTL of guest is emulated, enable the related bits
in CORE_CAPABILITY to corretly report this feature to guest.

2. Differentiate MSR TEST_CTL between host and guest.

Signed-off-by: Xiaoyao Li <xiaoyao.li@linux.intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 35 +++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.h |  1 +
 arch/x86/kvm/x86.c     | 17 ++++++++++++++++-
 3 files changed, 52 insertions(+), 1 deletion(-)

Comments

Sean Christopherson April 4, 2019, 2:44 p.m. UTC | #1
On Wed, Apr 03, 2019 at 02:21:58PM -0700, Fenghua Yu wrote:
> From: Xiaoyao Li <xiaoyao.li@linux.intel.com>
> 
> A control bit (bit 29) in TEST_CTL MSR 0x33 will be introduced in
> future x86 processors. When bit 29 is set, the processor causes #AC
> exception for split locked accesses at all CPL.
> 
> Please check the latest Intel 64 and IA-32 Architectures Software
> Developer's Manual for more detailed information on the MSR and
> the split lock bit.
> 
> This patch emulate MSR TEST_CTL with vmx->msr_test_ctl and does the
> following:
> 1. As MSR TEST_CTL of guest is emulated, enable the related bits
> in CORE_CAPABILITY to corretly report this feature to guest.

s/corretly/correctly

> 
> 2. Differentiate MSR TEST_CTL between host and guest.
> 
> Signed-off-by: Xiaoyao Li <xiaoyao.li@linux.intel.com>
> Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
> Acked-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 35 +++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/vmx/vmx.h |  1 +
>  arch/x86/kvm/x86.c     | 17 ++++++++++++++++-
>  3 files changed, 52 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index ab432a930ae8..309ccf593f0d 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1663,6 +1663,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	u32 index;
>  
>  	switch (msr_info->index) {
> +	case MSR_TEST_CTL:
> +		if (!msr_info->host_initiated &&
> +		    !(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))
> +			return 1;
> +		msr_info->data = vmx->msr_test_ctl;
> +		break;
>  #ifdef CONFIG_X86_64
>  	case MSR_FS_BASE:
>  		msr_info->data = vmcs_readl(GUEST_FS_BASE);
> @@ -1797,6 +1803,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	u32 index;
>  
>  	switch (msr_index) {
> +	case MSR_TEST_CTL:
> +		if (!(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))
> +			return 1;
> +
> +		if (data & ~TEST_CTL_ENABLE_SPLIT_LOCK_DETECT)
> +			return 1;
> +		vmx->msr_test_ctl = data;
> +		break;
>  	case MSR_EFER:
>  		ret = kvm_set_msr_common(vcpu, msr_info);
>  		break;
> @@ -4077,6 +4091,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
>  		++vmx->nmsrs;
>  	}
>  
> +	/* disable AC split lock by default */
> +	vmx->msr_test_ctl = 0;
> +
>  	vm_exit_controls_init(vmx, vmx_vmexit_ctrl());
>  
>  	/* 22.2.1, 20.8.1 */
> @@ -4114,6 +4131,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
>  
>  	vmx->rmode.vm86_active = 0;
>  	vmx->spec_ctrl = 0;
> +	vmx->msr_test_ctl = 0;
>  
>  	vcpu->arch.microcode_version = 0x100000000ULL;
>  	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
> @@ -6313,6 +6331,21 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
>  					msrs[i].host, false);
>  }
>  
> +static void atomic_switch_msr_test_ctl(struct vcpu_vmx *vmx)
> +{
> +	u64 host_msr_test_ctl;
> +
> +	/* if TEST_CTL MSR doesn't exist on the hardware, do nothing */
> +	if (rdmsrl_safe(MSR_TEST_CTL, &host_msr_test_ctl))
> +		return;

This adds a RDMSR on every VM-Enter, and a fault on CPUs that don't
support MSR_TEST_CTL.  Ideally the kernel would cache MSR_TEST_CTL and
expose a helper that returns a boolean to indicate the existence of the
MSRs along with the current value.  Racing with split_lock_detect_store()
is ok since this code runs with interrupts disabled, i.e. will block
split_lock_detect_store() until after VM-Exit.

Paolo, can you weigh in with your thoughts?  I'm surprised you acked
this patch given your earlier comment:

https://patchwork.kernel.org/patch/10413779/#21892723

> +
> +	if (host_msr_test_ctl == vmx->msr_test_ctl)
> +		clear_atomic_switch_msr(vmx, MSR_TEST_CTL);
> +	else
> +		add_atomic_switch_msr(vmx, MSR_TEST_CTL, vmx->msr_test_ctl,
> +				      host_msr_test_ctl, false);
> +}
> +
>  static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
>  {
>  	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
> @@ -6419,6 +6452,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  
>  	atomic_switch_perf_msrs(vmx);
>  
> +	atomic_switch_msr_test_ctl(vmx);
> +
>  	vmx_update_hv_timer(vcpu);
>  
>  	/*
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index a1e00d0a2482..6091a8b9de74 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -190,6 +190,7 @@ struct vcpu_vmx {
>  	u64		      msr_guest_kernel_gs_base;
>  #endif
>  
> +	u64		      msr_test_ctl;
>  	u64		      spec_ctrl;
>  
>  	u32 vm_entry_controls_shadow;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4459115eb0ec..e93c2f620cdb 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1229,7 +1229,22 @@ EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
>  
>  u64 kvm_get_core_capability(void)
>  {
> -	return 0;
> +	u64 data;
> +
> +	rdmsrl_safe(MSR_IA32_CORE_CAPABILITY, &data);
> +
> +	/* mask non-virtualizable functions */
> +	data &= CORE_CAP_SPLIT_LOCK_DETECT;
> +
> +	/*
> +	 * There will be a list of FMS values that have split lock detection
> +	 * but lack the CORE CAPABILITY MSR. In this case, set
> +	 * CORE_CAP_SPLIT_LOCK_DETECT since we emulate MSR CORE_CAPABILITY.
> +	 */
> +	if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
> +		data |= CORE_CAP_SPLIT_LOCK_DETECT;
> +
> +	return data;
>  }
>  EXPORT_SYMBOL_GPL(kvm_get_core_capability);
>  
> -- 
> 2.19.1
>
Thomas Gleixner April 5, 2019, 12:30 p.m. UTC | #2
On Wed, 3 Apr 2019, Fenghua Yu wrote:
> @@ -1663,6 +1663,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	u32 index;
>  
>  	switch (msr_info->index) {
> +	case MSR_TEST_CTL:
> +		if (!msr_info->host_initiated &&
> +		    !(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))

Errm? If the MSR_TEST_CTL is exposed to the guest via CPUID then the
rdmsr() in the guest is not supposed to fail just because
CORE_CAP_SPLIT_LOCK_DETECT is not set.

vmx->msr_test_ctl holds the proper value, which is either 0 or
CORE_CAP_SPLIT_LOCK_DETECT until more bits are supported.

So the chek needs to be guest_cpuid_has(X86_FEATURE_CORE_CAPABILITY).

> +			return 1;
> +		msr_info->data = vmx->msr_test_ctl;
> +		break;
>  #ifdef CONFIG_X86_64
>  	case MSR_FS_BASE:
>  		msr_info->data = vmcs_readl(GUEST_FS_BASE);
> @@ -1797,6 +1803,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	u32 index;
>  
>  	switch (msr_index) {
> +	case MSR_TEST_CTL:
> +		if (!(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))
> +			return 1;

Again, this wants to be a check for the availability of the MSR in the
guest cpuid and not to the CORE_CAP_SPLIT_LOCK_DETECT magic bit.

> +
> +		if (data & ~TEST_CTL_ENABLE_SPLIT_LOCK_DETECT)
> +			return 1;

and this one wants to be:

		if (data & vmx->msr_test_ctl_mask)

so additional bits can be enabled later on at exactly one point in the
code, i.e. during vm init.

> +		vmx->msr_test_ctl = data;
> +		break;
>  
> +static void atomic_switch_msr_test_ctl(struct vcpu_vmx *vmx)
> +{
> +	u64 host_msr_test_ctl;
> +
> +	/* if TEST_CTL MSR doesn't exist on the hardware, do nothing */
> +	if (rdmsrl_safe(MSR_TEST_CTL, &host_msr_test_ctl))
> +		return;

Yikes. So on hosts which do not have MSR_TEST_CTL this takes a fault on
every VM enter. The host knows whether it has this MSR or not.

Even if it exists there is no point to do the rdmsr on every vmenter. The
value should be cached per cpu. It only changes when:

      1) #AC triggers in kernel space

      2) The sysfs knob is flipped

#1 It happens either _BEFORE_ or _AFTER_ atomic_switch_msr_test_ctl().  In
   both cases the cached value is correct in atomic_switch_msr_test_ctl().
   
   If it happens _AFTER_ atomic_switch_msr_test_ctl() then the VMEXIT will
   restore the enabled bit, but there is nothing you can do about that.

#2 CANNOT happen in the context of vcpu_run().

So this wants to be:

   host_msr_test_ctl = this_cpu_read(msr_test_ctl_cache);

> +	if (host_msr_test_ctl == vmx->msr_test_ctl)
> +		clear_atomic_switch_msr(vmx, MSR_TEST_CTL);
> +	else
> +		add_atomic_switch_msr(vmx, MSR_TEST_CTL, vmx->msr_test_ctl,
> +				      host_msr_test_ctl, false);
> +}
> +

>  u64 kvm_get_core_capability(void)
>  {
> -	return 0;
> +	u64 data;
> +
> +	rdmsrl_safe(MSR_IA32_CORE_CAPABILITY, &data);

  	if (!boot_cpu_has(X86_FEATURE_CORE_CAPABILITY))
		return 0;

> +	/* mask non-virtualizable functions */
> +	data &= CORE_CAP_SPLIT_LOCK_DETECT;

Why?

> +	/*
> +	 * There will be a list of FMS values that have split lock detection
> +	 * but lack the CORE CAPABILITY MSR. In this case, set
> +	 * CORE_CAP_SPLIT_LOCK_DETECT since we emulate MSR CORE_CAPABILITY.

That's completely wrong. X86_FEATURE_SPLIT_LOCK_DETECT is set when the
capability is enumerated in CPUID or it's set via the FMS quirk.

So:
	data = 0;
> +	if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
> +		data |= CORE_CAP_SPLIT_LOCK_DETECT;
> +
> +	return data;

is absolutely sufficient.

Thanks,

	tglx
Xiaoyao Li April 8, 2019, 8:54 a.m. UTC | #3
On Thu, 2019-04-04 at 07:44 -0700, Sean Christopherson wrote:
> On Wed, Apr 03, 2019 at 02:21:58PM -0700, Fenghua Yu wrote:
> > From: Xiaoyao Li <xiaoyao.li@linux.intel.com>
> > 
> > A control bit (bit 29) in TEST_CTL MSR 0x33 will be introduced in
> > future x86 processors. When bit 29 is set, the processor causes #AC
> > exception for split locked accesses at all CPL.
> > 
> > Please check the latest Intel 64 and IA-32 Architectures Software
> > Developer's Manual for more detailed information on the MSR and
> > the split lock bit.
> > 
> > This patch emulate MSR TEST_CTL with vmx->msr_test_ctl and does the
> > following:
> > 1. As MSR TEST_CTL of guest is emulated, enable the related bits
> > in CORE_CAPABILITY to corretly report this feature to guest.
> 
> s/corretly/correctly

will correct it. thanks.

> > 
> > 2. Differentiate MSR TEST_CTL between host and guest.
> > 
> > Signed-off-by: Xiaoyao Li <xiaoyao.li@linux.intel.com>
> > Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
> > Acked-by: Paolo Bonzini <pbonzini@redhat.com>
> > ---
> >  arch/x86/kvm/vmx/vmx.c | 35 +++++++++++++++++++++++++++++++++++
> >  arch/x86/kvm/vmx/vmx.h |  1 +
> >  arch/x86/kvm/x86.c     | 17 ++++++++++++++++-
> >  3 files changed, 52 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index ab432a930ae8..309ccf593f0d 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -1663,6 +1663,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct
> > msr_data *msr_info)
> >  	u32 index;
> >  
> >  	switch (msr_info->index) {
> > +	case MSR_TEST_CTL:
> > +		if (!msr_info->host_initiated &&
> > +		    !(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))
> > +			return 1;
> > +		msr_info->data = vmx->msr_test_ctl;
> > +		break;
> >  #ifdef CONFIG_X86_64
> >  	case MSR_FS_BASE:
> >  		msr_info->data = vmcs_readl(GUEST_FS_BASE);
> > @@ -1797,6 +1803,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct
> > msr_data *msr_info)
> >  	u32 index;
> >  
> >  	switch (msr_index) {
> > +	case MSR_TEST_CTL:
> > +		if (!(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))
> > +			return 1;
> > +
> > +		if (data & ~TEST_CTL_ENABLE_SPLIT_LOCK_DETECT)
> > +			return 1;
> > +		vmx->msr_test_ctl = data;
> > +		break;
> >  	case MSR_EFER:
> >  		ret = kvm_set_msr_common(vcpu, msr_info);
> >  		break;
> > @@ -4077,6 +4091,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
> >  		++vmx->nmsrs;
> >  	}
> >  
> > +	/* disable AC split lock by default */
> > +	vmx->msr_test_ctl = 0;
> > +
> >  	vm_exit_controls_init(vmx, vmx_vmexit_ctrl());
> >  
> >  	/* 22.2.1, 20.8.1 */
> > @@ -4114,6 +4131,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool
> > init_event)
> >  
> >  	vmx->rmode.vm86_active = 0;
> >  	vmx->spec_ctrl = 0;
> > +	vmx->msr_test_ctl = 0;
> >  
> >  	vcpu->arch.microcode_version = 0x100000000ULL;
> >  	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
> > @@ -6313,6 +6331,21 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx
> > *vmx)
> >  					msrs[i].host, false);
> >  }
> >  
> > +static void atomic_switch_msr_test_ctl(struct vcpu_vmx *vmx)
> > +{
> > +	u64 host_msr_test_ctl;
> > +
> > +	/* if TEST_CTL MSR doesn't exist on the hardware, do nothing */
> > +	if (rdmsrl_safe(MSR_TEST_CTL, &host_msr_test_ctl))
> > +		return;
> 
> This adds a RDMSR on every VM-Enter, and a fault on CPUs that don't
> support MSR_TEST_CTL.  Ideally the kernel would cache MSR_TEST_CTL and
> expose a helper that returns a boolean to indicate the existence of the
> MSRs along with the current value.  Racing with split_lock_detect_store()
> is ok since this code runs with interrupts disabled, i.e. will block
> split_lock_detect_store() until after VM-Exit.
> 
> Paolo, can you weigh in with your thoughts?  I'm surprised you acked
> this patch given your earlier comment:
> 
> https://patchwork.kernel.org/patch/10413779/#21892723

In v4 patchset, it checks boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) first in
atomic_switch_msr_test_ctl().

In v5 patchset, considering that there is another bit (bit 31) in MSR_TEST_CTL,
and !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) cannot guarantee there is no
MSR_TEST_CTL in hardware. So I changed it to rdmsrl_safe() in v5.
It's my fault that I didn't point it out in the changelog in v5 patchset.

Considering the overhead of rdmsr every VMENTRY. I will use a percpu variable
msr_test_ctl_cache based on Thomas's comment to cache the value of host's
MSR_TEST_CTL.

> > +
> > +	if (host_msr_test_ctl == vmx->msr_test_ctl)
> > +		clear_atomic_switch_msr(vmx, MSR_TEST_CTL);
> > +	else
> > +		add_atomic_switch_msr(vmx, MSR_TEST_CTL, vmx->msr_test_ctl,
> > +				      host_msr_test_ctl, false);
> > +}
> > +
> >  static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
> >  {
> >  	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
> > @@ -6419,6 +6452,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
> >  
> >  	atomic_switch_perf_msrs(vmx);
> >  
> > +	atomic_switch_msr_test_ctl(vmx);
> > +
> >  	vmx_update_hv_timer(vcpu);
> >  
> >  	/*
> > diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> > index a1e00d0a2482..6091a8b9de74 100644
> > --- a/arch/x86/kvm/vmx/vmx.h
> > +++ b/arch/x86/kvm/vmx/vmx.h
> > @@ -190,6 +190,7 @@ struct vcpu_vmx {
> >  	u64		      msr_guest_kernel_gs_base;
> >  #endif
> >  
> > +	u64		      msr_test_ctl;
> >  	u64		      spec_ctrl;
> >  
> >  	u32 vm_entry_controls_shadow;
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 4459115eb0ec..e93c2f620cdb 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1229,7 +1229,22 @@ EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
> >  
> >  u64 kvm_get_core_capability(void)
> >  {
> > -	return 0;
> > +	u64 data;
> > +
> > +	rdmsrl_safe(MSR_IA32_CORE_CAPABILITY, &data);
> > +
> > +	/* mask non-virtualizable functions */
> > +	data &= CORE_CAP_SPLIT_LOCK_DETECT;
> > +
> > +	/*
> > +	 * There will be a list of FMS values that have split lock detection
> > +	 * but lack the CORE CAPABILITY MSR. In this case, set
> > +	 * CORE_CAP_SPLIT_LOCK_DETECT since we emulate MSR CORE_CAPABILITY.
> > +	 */
> > +	if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
> > +		data |= CORE_CAP_SPLIT_LOCK_DETECT;
> > +
> > +	return data;
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_get_core_capability);
> >  
> > -- 
> > 2.19.1
> >
Xiaoyao Li April 8, 2019, 9:54 a.m. UTC | #4
Hi, Thomas,

On Fri, 2019-04-05 at 14:30 +0200, Thomas Gleixner wrote:
> On Wed, 3 Apr 2019, Fenghua Yu wrote:
> > @@ -1663,6 +1663,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct
> > msr_data *msr_info)
> >  	u32 index;
> >  
> >  	switch (msr_info->index) {
> > +	case MSR_TEST_CTL:
> > +		if (!msr_info->host_initiated &&
> > +		    !(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))
> 
> Errm? If the MSR_TEST_CTL is exposed to the guest via CPUID then the
> rdmsr() in the guest is not supposed to fail just because
> CORE_CAP_SPLIT_LOCK_DETECT is not set.

you're right.

> vmx->msr_test_ctl holds the proper value, which is either 0 or
> CORE_CAP_SPLIT_LOCK_DETECT until more bits are supported.
> 
> So the chek needs to be guest_cpuid_has(X86_FEATURE_CORE_CAPABILITY).

I don't think so. There is no dependecy between
guest_cpuid_has(X86_FEATURE_CORE_CAPABILITY) and MSR_TEST_CTL.
guest_cpuid_has(X86_FEATURE_CORE_CAPABILITY) only indicates that guest has MSR
CORE_CAPABILITY.

Due to the fact that MSR_TEST_CTL is emulated with vmx->msr_test_ctl. I think it
's ok to always let userspace or guest to read MSR_TEST_CTL, it just returns the
emulated value. Like you said, " vmx->msr_test_ctl holds the proper value, which
is either 0 or CORE_CAP_SPLIT_LOCK_DETECT until more bits are supported." 

> > +			return 1;
> > +		msr_info->data = vmx->msr_test_ctl;
> > +		break;
> >  #ifdef CONFIG_X86_64
> >  	case MSR_FS_BASE:
> >  		msr_info->data = vmcs_readl(GUEST_FS_BASE);
> > @@ -1797,6 +1803,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct
> > msr_data *msr_info)
> >  	u32 index;
> >  
> >  	switch (msr_index) {
> > +	case MSR_TEST_CTL:
> > +		if (!(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))
> > +			return 1;
> 
> Again, this wants to be a check for the availability of the MSR in the
> guest cpuid and not to the CORE_CAP_SPLIT_LOCK_DETECT magic bit.
> 
> > +
> > +		if (data & ~TEST_CTL_ENABLE_SPLIT_LOCK_DETECT)
> > +			return 1;
> 
> and this one wants to be:
> 
> 		if (data & vmx->msr_test_ctl_mask)
> 
> so additional bits can be enabled later on at exactly one point in the
> code, i.e. during vm init.

Again, in wrmsr handler, since MSR_TEST_CTL is emulated, it can be considered
that guest always has this MSR. It just needs to return 1 when reserved bits are
set. 

Using vmx->msr_test_ctl_mask is a good idea. I will use it in next version. 

> > +		vmx->msr_test_ctl = data;
> > +		break;
> >  
> > +static void atomic_switch_msr_test_ctl(struct vcpu_vmx *vmx)
> > +{
> > +	u64 host_msr_test_ctl;
> > +
> > +	/* if TEST_CTL MSR doesn't exist on the hardware, do nothing */
> > +	if (rdmsrl_safe(MSR_TEST_CTL, &host_msr_test_ctl))
> > +		return;
> 
> Yikes. So on hosts which do not have MSR_TEST_CTL this takes a fault on
> every VM enter. The host knows whether it has this MSR or not.
> 
> Even if it exists there is no point to do the rdmsr on every vmenter. The
> value should be cached per cpu. It only changes when:
> 
>       1) #AC triggers in kernel space
> 
>       2) The sysfs knob is flipped
> 
> #1 It happens either _BEFORE_ or _AFTER_ atomic_switch_msr_test_ctl().  In
>    both cases the cached value is correct in atomic_switch_msr_test_ctl().
>    
>    If it happens _AFTER_ atomic_switch_msr_test_ctl() then the VMEXIT will
>    restore the enabled bit, but there is nothing you can do about that.
> 
> #2 CANNOT happen in the context of vcpu_run().
> 
> So this wants to be:
> 
>    host_msr_test_ctl = this_cpu_read(msr_test_ctl_cache);

You're right and thanks for your advice. 
I will take it in next version.

> > +	if (host_msr_test_ctl == vmx->msr_test_ctl)
> > +		clear_atomic_switch_msr(vmx, MSR_TEST_CTL);
> > +	else
> > +		add_atomic_switch_msr(vmx, MSR_TEST_CTL, vmx->msr_test_ctl,
> > +				      host_msr_test_ctl, false);
> > +}
> > +
> >  u64 kvm_get_core_capability(void)
> >  {
> > -	return 0;
> > +	u64 data;
> > +
> > +	rdmsrl_safe(MSR_IA32_CORE_CAPABILITY, &data);
> 
>   	if (!boot_cpu_has(X86_FEATURE_CORE_CAPABILITY))
> 		return 0;
> 
> > +	/* mask non-virtualizable functions */
> > +	data &= CORE_CAP_SPLIT_LOCK_DETECT;
> 
> Why?
> 
> > +	/*
> > +	 * There will be a list of FMS values that have split lock detection
> > +	 * but lack the CORE CAPABILITY MSR. In this case, set
> > +	 * CORE_CAP_SPLIT_LOCK_DETECT since we emulate MSR CORE_CAPABILITY.
> 
> That's completely wrong. X86_FEATURE_SPLIT_LOCK_DETECT is set when the
> capability is enumerated in CPUID or it's set via the FMS quirk.
> 
> So:
> 	data = 0;
> > +	if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
> > +		data |= CORE_CAP_SPLIT_LOCK_DETECT;
> > +
> > +	return data;
> 
> is absolutely sufficient.

Right, X86_FEATURE_SPLIT_LOCK_DETECT is set in 2 situation:

	#1 the capability is enumerated by CORE_CAP_SPLIT_LOCK_DETECT (bit 5 of
MSR_IA32_CORE_CAPABILITY)
	
	#2 via FMS list, in which those processors have split lock detection
feature but don't have MSR_IA32_CORE_CAPABILITY.

There two pathes work well for host, accurately for real FMS.
However, when it comes to virtualization, we can emulate a different FMS of
guest from host.

Considering the following case:

The host cpu is ICELAKE_MOBILE, which doesn't have X86_FEATURE_CORE_CAPABILITY
but has X86_FEATURE_SPLIT_LOCK_DETECT. 
If we run a guest with cpu model, Skylake. It will hidden the feature
X86_FEATURE_SPLIT_LOCK_DETECT from guest, since guest's MSR_IA32_CORE_CAPABILITY
reports zero, and FMS of guest doesn't match the list.

In this way, it failed to expose the X86_FEATURE_SPLIT_LOCK_DETECT to guest.

Since MSR_IA32_CORE_CAPBILITY is emulated in software for guest. We can enforce
using #1 to report X86_FEATURE_SPLIT_LOCK_DETECT of guest, thus guest can get
rid of the limitation of the FMS list.
 

> Thanks,
> 
> 	tglx
Sean Christopherson April 8, 2019, 5:48 p.m. UTC | #5
On Mon, Apr 08, 2019 at 05:54:25PM +0800, Xiaoyao Li wrote:
> On Fri, 2019-04-05 at 14:30 +0200, Thomas Gleixner wrote:
> > On Wed, 3 Apr 2019, Fenghua Yu wrote:
> > > @@ -1663,6 +1663,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct
> > > msr_data *msr_info)
> > >  	u32 index;
> > >  
> > >  	switch (msr_info->index) {
> > > +	case MSR_TEST_CTL:
> > > +		if (!msr_info->host_initiated &&
> > > +		    !(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))
> > 
> > Errm? If the MSR_TEST_CTL is exposed to the guest via CPUID then the
> > rdmsr() in the guest is not supposed to fail just because
> > CORE_CAP_SPLIT_LOCK_DETECT is not set.
> 
> you're right.
> 
> > vmx->msr_test_ctl holds the proper value, which is either 0 or
> > CORE_CAP_SPLIT_LOCK_DETECT until more bits are supported.
> > 
> > So the chek needs to be guest_cpuid_has(X86_FEATURE_CORE_CAPABILITY).
> 
> I don't think so. There is no dependecy between
> guest_cpuid_has(X86_FEATURE_CORE_CAPABILITY) and MSR_TEST_CTL.
> guest_cpuid_has(X86_FEATURE_CORE_CAPABILITY) only indicates that guest has MSR
> CORE_CAPABILITY.
> 
> Due to the fact that MSR_TEST_CTL is emulated with vmx->msr_test_ctl. I think it
> 's ok to always let userspace or guest to read MSR_TEST_CTL, it just returns the
> emulated value. Like you said, " vmx->msr_test_ctl holds the proper value, which
> is either 0 or CORE_CAP_SPLIT_LOCK_DETECT until more bits are supported."

Assuming the next version implements "vmx->msr_test_ctl_mask", KVM should
inject #GP if the guest attempts RDMSR(MSR_TEST_CTL) and the mask is zero.
It stands to reason that a kernel can only reasonably assume the MSR exists
if the (virtual) CPU supports at least one feature enabled via MSR_TEST_CTL.
Xiaoyao Li April 10, 2019, 5:03 a.m. UTC | #6
On Mon, 2019-04-08 at 10:48 -0700, Sean Christopherson wrote:
> On Mon, Apr 08, 2019 at 05:54:25PM +0800, Xiaoyao Li wrote:
> > On Fri, 2019-04-05 at 14:30 +0200, Thomas Gleixner wrote:
> > > On Wed, 3 Apr 2019, Fenghua Yu wrote:
> > > > @@ -1663,6 +1663,12 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
> > > > struct
> > > > msr_data *msr_info)
> > > >  	u32 index;
> > > >  
> > > >  	switch (msr_info->index) {
> > > > +	case MSR_TEST_CTL:
> > > > +		if (!msr_info->host_initiated &&
> > > > +		    !(vcpu->arch.core_capability &
> > > > CORE_CAP_SPLIT_LOCK_DETECT))
> > > 
> > > Errm? If the MSR_TEST_CTL is exposed to the guest via CPUID then the
> > > rdmsr() in the guest is not supposed to fail just because
> > > CORE_CAP_SPLIT_LOCK_DETECT is not set.
> > 
> > you're right.
> > 
> > > vmx->msr_test_ctl holds the proper value, which is either 0 or
> > > CORE_CAP_SPLIT_LOCK_DETECT until more bits are supported.
> > > 
> > > So the chek needs to be guest_cpuid_has(X86_FEATURE_CORE_CAPABILITY).
> > 
> > I don't think so. There is no dependecy between
> > guest_cpuid_has(X86_FEATURE_CORE_CAPABILITY) and MSR_TEST_CTL.
> > guest_cpuid_has(X86_FEATURE_CORE_CAPABILITY) only indicates that guest has
> > MSR
> > CORE_CAPABILITY.
> > 
> > Due to the fact that MSR_TEST_CTL is emulated with vmx->msr_test_ctl. I
> > think it
> > 's ok to always let userspace or guest to read MSR_TEST_CTL, it just returns
> > the
> > emulated value. Like you said, " vmx->msr_test_ctl holds the proper value,
> > which
> > is either 0 or CORE_CAP_SPLIT_LOCK_DETECT until more bits are supported."
> 
> Assuming the next version implements "vmx->msr_test_ctl_mask", KVM should
> inject #GP if the guest attempts RDMSR(MSR_TEST_CTL) and the mask is zero.
> It stands to reason that a kernel can only reasonably assume the MSR exists
> if the (virtual) CPU supports at least one feature enabled via MSR_TEST_CTL.

It makes sense to me. Thanks for your reminder.
Will apply it in next version.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ab432a930ae8..309ccf593f0d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1663,6 +1663,12 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	u32 index;
 
 	switch (msr_info->index) {
+	case MSR_TEST_CTL:
+		if (!msr_info->host_initiated &&
+		    !(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))
+			return 1;
+		msr_info->data = vmx->msr_test_ctl;
+		break;
 #ifdef CONFIG_X86_64
 	case MSR_FS_BASE:
 		msr_info->data = vmcs_readl(GUEST_FS_BASE);
@@ -1797,6 +1803,14 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	u32 index;
 
 	switch (msr_index) {
+	case MSR_TEST_CTL:
+		if (!(vcpu->arch.core_capability & CORE_CAP_SPLIT_LOCK_DETECT))
+			return 1;
+
+		if (data & ~TEST_CTL_ENABLE_SPLIT_LOCK_DETECT)
+			return 1;
+		vmx->msr_test_ctl = data;
+		break;
 	case MSR_EFER:
 		ret = kvm_set_msr_common(vcpu, msr_info);
 		break;
@@ -4077,6 +4091,9 @@  static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		++vmx->nmsrs;
 	}
 
+	/* disable AC split lock by default */
+	vmx->msr_test_ctl = 0;
+
 	vm_exit_controls_init(vmx, vmx_vmexit_ctrl());
 
 	/* 22.2.1, 20.8.1 */
@@ -4114,6 +4131,7 @@  static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	vmx->rmode.vm86_active = 0;
 	vmx->spec_ctrl = 0;
+	vmx->msr_test_ctl = 0;
 
 	vcpu->arch.microcode_version = 0x100000000ULL;
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
@@ -6313,6 +6331,21 @@  static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 					msrs[i].host, false);
 }
 
+static void atomic_switch_msr_test_ctl(struct vcpu_vmx *vmx)
+{
+	u64 host_msr_test_ctl;
+
+	/* if TEST_CTL MSR doesn't exist on the hardware, do nothing */
+	if (rdmsrl_safe(MSR_TEST_CTL, &host_msr_test_ctl))
+		return;
+
+	if (host_msr_test_ctl == vmx->msr_test_ctl)
+		clear_atomic_switch_msr(vmx, MSR_TEST_CTL);
+	else
+		add_atomic_switch_msr(vmx, MSR_TEST_CTL, vmx->msr_test_ctl,
+				      host_msr_test_ctl, false);
+}
+
 static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
 {
 	vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
@@ -6419,6 +6452,8 @@  static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	atomic_switch_perf_msrs(vmx);
 
+	atomic_switch_msr_test_ctl(vmx);
+
 	vmx_update_hv_timer(vcpu);
 
 	/*
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a1e00d0a2482..6091a8b9de74 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -190,6 +190,7 @@  struct vcpu_vmx {
 	u64		      msr_guest_kernel_gs_base;
 #endif
 
+	u64		      msr_test_ctl;
 	u64		      spec_ctrl;
 
 	u32 vm_entry_controls_shadow;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4459115eb0ec..e93c2f620cdb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1229,7 +1229,22 @@  EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
 
 u64 kvm_get_core_capability(void)
 {
-	return 0;
+	u64 data;
+
+	rdmsrl_safe(MSR_IA32_CORE_CAPABILITY, &data);
+
+	/* mask non-virtualizable functions */
+	data &= CORE_CAP_SPLIT_LOCK_DETECT;
+
+	/*
+	 * There will be a list of FMS values that have split lock detection
+	 * but lack the CORE CAPABILITY MSR. In this case, set
+	 * CORE_CAP_SPLIT_LOCK_DETECT since we emulate MSR CORE_CAPABILITY.
+	 */
+	if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
+		data |= CORE_CAP_SPLIT_LOCK_DETECT;
+
+	return data;
 }
 EXPORT_SYMBOL_GPL(kvm_get_core_capability);