diff mbox series

[RFC,v3,09/10] KVM: VMX: Advertise MITI_CTRL_BHB_CLEAR_SEQ_S_SUPPORT

Message ID 20240410143446.797262-10-chao.gao@intel.com (mailing list archive)
State New, archived
Headers show
Series Virtualize Intel IA32_SPEC_CTRL | expand

Commit Message

Chao Gao April 10, 2024, 2:34 p.m. UTC
From: Zhang Chen <chen.zhang@intel.com>

Allow guest to report if the short BHB-clearing sequence is in use.

KVM will deploy BHI_DIS_S for the guest if the short BHB-clearing
sequence is in use and the processor doesn't enumerate BHI_NO.

Signed-off-by: Zhang Chen <chen.zhang@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

Comments

Sean Christopherson June 11, 2024, 1:34 a.m. UTC | #1
On Wed, Apr 10, 2024, Chao Gao wrote:
> From: Zhang Chen <chen.zhang@intel.com>
> 
> Allow guest to report if the short BHB-clearing sequence is in use.
> 
> KVM will deploy BHI_DIS_S for the guest if the short BHB-clearing
> sequence is in use and the processor doesn't enumerate BHI_NO.
> 
> Signed-off-by: Zhang Chen <chen.zhang@intel.com>
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 31 ++++++++++++++++++++++++++++---
>  1 file changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index cc260b14f8df..c5ceaebd954b 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1956,8 +1956,8 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
>  }
>  
>  #define VIRTUAL_ENUMERATION_VALID_BITS	VIRT_ENUM_MITIGATION_CTRL_SUPPORT
> -#define MITI_ENUM_VALID_BITS		0ULL
> -#define MITI_CTRL_VALID_BITS		0ULL
> +#define MITI_ENUM_VALID_BITS		MITI_ENUM_BHB_CLEAR_SEQ_S_SUPPORT
> +#define MITI_CTRL_VALID_BITS		MITI_CTRL_BHB_CLEAR_SEQ_S_USED
>  
>  static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
>  {
> @@ -2204,7 +2204,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  	struct vmx_uret_msr *msr;
>  	int ret = 0;
>  	u32 msr_index = msr_info->index;
> -	u64 data = msr_info->data;
> +	u64 data = msr_info->data, spec_ctrl_mask = 0;
>  	u32 index;
>  
>  	switch (msr_index) {
> @@ -2508,6 +2508,31 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		if (data & ~MITI_CTRL_VALID_BITS)
>  			return 1;
>  
> +		if (data & MITI_CTRL_BHB_CLEAR_SEQ_S_USED &&
> +		    kvm_cpu_cap_has(X86_FEATURE_BHI_CTRL) &&
> +		    !(host_arch_capabilities & ARCH_CAP_BHI_NO))
> +			spec_ctrl_mask |= SPEC_CTRL_BHI_DIS_S;
> +
> +		/*
> +		 * Intercept IA32_SPEC_CTRL to disallow guest from changing
> +		 * certain bits if "virtualize IA32_SPEC_CTRL" isn't supported
> +		 * e.g., in nested case.
> +		 */
> +		if (spec_ctrl_mask && !cpu_has_spec_ctrl_shadow())
> +			vmx_enable_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
> +
> +		/*
> +		 * KVM_CAP_FORCE_SPEC_CTRL takes precedence over
> +		 * MSR_VIRTUAL_MITIGATION_CTRL.
> +		 */
> +		spec_ctrl_mask &= ~vmx->vcpu.kvm->arch.force_spec_ctrl_mask;
> +
> +		vmx->force_spec_ctrl_mask = vmx->vcpu.kvm->arch.force_spec_ctrl_mask |
> +					    spec_ctrl_mask;
> +		vmx->force_spec_ctrl_value = vmx->vcpu.kvm->arch.force_spec_ctrl_value |
> +					    spec_ctrl_mask;
> +		vmx_set_spec_ctrl(&vmx->vcpu, vmx->spec_ctrl_shadow);
> +
>  		vmx->msr_virtual_mitigation_ctrl = data;
>  		break;

I continue find all of this unpalatable.  The guest tells KVM what software
mitigations the guest is using, and then KVM is supposed to translate that into
some hardware functionality?  And merge that with userspace's own overrides?

Blech.

With KVM_CAP_FORCE_SPEC_CTRL, I don't see any reason for KVM to support the
Intel-defined virtual MSRs.  If the userspace VMM wants to play nice with the
Intel-defined stuff, then userspace can advertise the MSRs and use an MSR filter
to intercept and "emulate" the MSRs.  They should be set-and-forget MSRs, so
there's no need for KVM to handle them for performance reasons.

That way KVM doesn't need to deal with the the virtual MSRs, userspace can make
an informed decision when deciding how to set KVM_CAP_FORCE_SPEC_CTRL, and as a
bonus, rollouts for new mitigation thingies should be faster as updating userspace
is typically easier than updating the kernel/KVM.
Chao Gao June 11, 2024, 10:48 a.m. UTC | #2
>> +		if (data & MITI_CTRL_BHB_CLEAR_SEQ_S_USED &&
>> +		    kvm_cpu_cap_has(X86_FEATURE_BHI_CTRL) &&
>> +		    !(host_arch_capabilities & ARCH_CAP_BHI_NO))
>> +			spec_ctrl_mask |= SPEC_CTRL_BHI_DIS_S;
>> +
>> +		/*
>> +		 * Intercept IA32_SPEC_CTRL to disallow guest from changing
>> +		 * certain bits if "virtualize IA32_SPEC_CTRL" isn't supported
>> +		 * e.g., in nested case.
>> +		 */
>> +		if (spec_ctrl_mask && !cpu_has_spec_ctrl_shadow())
>> +			vmx_enable_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
>> +
>> +		/*
>> +		 * KVM_CAP_FORCE_SPEC_CTRL takes precedence over
>> +		 * MSR_VIRTUAL_MITIGATION_CTRL.
>> +		 */
>> +		spec_ctrl_mask &= ~vmx->vcpu.kvm->arch.force_spec_ctrl_mask;
>> +
>> +		vmx->force_spec_ctrl_mask = vmx->vcpu.kvm->arch.force_spec_ctrl_mask |
>> +					    spec_ctrl_mask;
>> +		vmx->force_spec_ctrl_value = vmx->vcpu.kvm->arch.force_spec_ctrl_value |
>> +					    spec_ctrl_mask;
>> +		vmx_set_spec_ctrl(&vmx->vcpu, vmx->spec_ctrl_shadow);
>> +
>>  		vmx->msr_virtual_mitigation_ctrl = data;
>>  		break;
>
>I continue find all of this unpalatable.  The guest tells KVM what software
>mitigations the guest is using, and then KVM is supposed to translate that into
>some hardware functionality?  And merge that with userspace's own overrides?

Yes. It is ugly. I will drop all Intel-defined stuff from KVM. Actually, I
wanted to punt to userspace ...

>
>Blech.
>
>With KVM_CAP_FORCE_SPEC_CTRL, I don't see any reason for KVM to support the
>Intel-defined virtual MSRs.  If the userspace VMM wants to play nice with the
>Intel-defined stuff, then userspace can advertise the MSRs and use an MSR filter
>to intercept and "emulate" the MSRs.  They should be set-and-forget MSRs, so
>there's no need for KVM to handle them for performance reasons.

... I had this idea of implementing policy-related stuff in userspace, and I wrote
in the cover-letter:

	"""
	1. the KVM<->userspace ABI defined in patch 1

	I am wondering if we can allow the userspace to configure the mask
	and the shadow value during guest's lifetime and do it on a vCPU basis.
	this way, in conjunction with "virtual MSRs" or any other interfaces,
	the usespace can adjust hardware mitigations applied to the guest during
	guest's lifetime e.g., for the best performance.
	"""

As said, this requires some tweaks to KVM_CAP_FORCE_SPEC_CTRL, such as making
the mask and shadow values adjustable and applicable on a per-vCPU basis. The
tweaks are not necessarily for Intel-defined virtual MSRs; if there were other
preferable interfaces, they could also benefit from these changes.

Any objections to these tweaks to KVM_CAP_FORCE_SPEC_CTRL?

>
>That way KVM doesn't need to deal with the the virtual MSRs, userspace can make
>an informed decision when deciding how to set KVM_CAP_FORCE_SPEC_CTRL, and as a
>bonus, rollouts for new mitigation thingies should be faster as updating userspace
>is typically easier than updating the kernel/KVM.

Good point!
Sean Christopherson June 11, 2024, 1:34 p.m. UTC | #3
On Tue, Jun 11, 2024, Chao Gao wrote:
> >I continue find all of this unpalatable.  The guest tells KVM what software
> >mitigations the guest is using, and then KVM is supposed to translate that into
> >some hardware functionality?  And merge that with userspace's own overrides?
> 
> Yes. It is ugly. I will drop all Intel-defined stuff from KVM. Actually, I
> wanted to punt to userspace ...
> 
> >
> >Blech.
> >
> >With KVM_CAP_FORCE_SPEC_CTRL, I don't see any reason for KVM to support the
> >Intel-defined virtual MSRs.  If the userspace VMM wants to play nice with the
> >Intel-defined stuff, then userspace can advertise the MSRs and use an MSR filter
> >to intercept and "emulate" the MSRs.  They should be set-and-forget MSRs, so
> >there's no need for KVM to handle them for performance reasons.
> 
> ... I had this idea of implementing policy-related stuff in userspace, and I wrote
> in the cover-letter:
> 
> 	"""
> 	1. the KVM<->userspace ABI defined in patch 1
> 
> 	I am wondering if we can allow the userspace to configure the mask
> 	and the shadow value during guest's lifetime and do it on a vCPU basis.
> 	this way, in conjunction with "virtual MSRs" or any other interfaces,
> 	the usespace can adjust hardware mitigations applied to the guest during
> 	guest's lifetime e.g., for the best performance.
> 	"""

Gah, sorry, I speed read the cover letter and didn't take the time to process that.

> As said, this requires some tweaks to KVM_CAP_FORCE_SPEC_CTRL, such as making
> the mask and shadow values adjustable and applicable on a per-vCPU basis. The
> tweaks are not necessarily for Intel-defined virtual MSRs; if there were other
> preferable interfaces, they could also benefit from these changes.
> 
> Any objections to these tweaks to KVM_CAP_FORCE_SPEC_CTRL?

Why does KVM_CAP_FORCE_SPEC_CTRL need to be per-vCPU?  Won't the CPU bugs and
mitigations be system-wide / VM-wide?
Chao Gao June 11, 2024, 2:08 p.m. UTC | #4
On Tue, Jun 11, 2024 at 06:34:49AM -0700, Sean Christopherson wrote:
>On Tue, Jun 11, 2024, Chao Gao wrote:
>> >I continue find all of this unpalatable.  The guest tells KVM what software
>> >mitigations the guest is using, and then KVM is supposed to translate that into
>> >some hardware functionality?  And merge that with userspace's own overrides?
>> 
>> Yes. It is ugly. I will drop all Intel-defined stuff from KVM. Actually, I
>> wanted to punt to userspace ...
>> 
>> >
>> >Blech.
>> >
>> >With KVM_CAP_FORCE_SPEC_CTRL, I don't see any reason for KVM to support the
>> >Intel-defined virtual MSRs.  If the userspace VMM wants to play nice with the
>> >Intel-defined stuff, then userspace can advertise the MSRs and use an MSR filter
>> >to intercept and "emulate" the MSRs.  They should be set-and-forget MSRs, so
>> >there's no need for KVM to handle them for performance reasons.
>> 
>> ... I had this idea of implementing policy-related stuff in userspace, and I wrote
>> in the cover-letter:
>> 
>> 	"""
>> 	1. the KVM<->userspace ABI defined in patch 1
>> 
>> 	I am wondering if we can allow the userspace to configure the mask
>> 	and the shadow value during guest's lifetime and do it on a vCPU basis.
>> 	this way, in conjunction with "virtual MSRs" or any other interfaces,
>> 	the usespace can adjust hardware mitigations applied to the guest during
>> 	guest's lifetime e.g., for the best performance.
>> 	"""
>
>Gah, sorry, I speed read the cover letter and didn't take the time to process that.
>
>> As said, this requires some tweaks to KVM_CAP_FORCE_SPEC_CTRL, such as making
>> the mask and shadow values adjustable and applicable on a per-vCPU basis. The
>> tweaks are not necessarily for Intel-defined virtual MSRs; if there were other
>> preferable interfaces, they could also benefit from these changes.
>> 
>> Any objections to these tweaks to KVM_CAP_FORCE_SPEC_CTRL?
>
>Why does KVM_CAP_FORCE_SPEC_CTRL need to be per-vCPU?  Won't the CPU bugs and
>mitigations be system-wide / VM-wide?

Because spec_ctrl is per-vCPU and Intel-defined virtual MSRs are also per-vCPU.
i.e., a guest __can__ configure different values to virtual MSRs on different
vCPUs even though a sane guest won't do this. If KVM doesn't want to rule out
the possibility of supporting Intel-defined virtual MSRs in userspace or any
other per-vCPU interfaces, KVM_CAP_FORCE_SPEC_CTRL needs to be per-vCPU.

implementation-wise, being per-vCPU is simpler because, otherwise, once userspace
adjusts the hardware mitigations to enforce, KVM needs to kick all vCPUs. This
will add more complexity.

And IMO, requiring guests to deploy same mitigations on vCPUs is an unnecessary
limitation.
Sean Christopherson June 11, 2024, 4:32 p.m. UTC | #5
On Tue, Jun 11, 2024, Chao Gao wrote:
> On Tue, Jun 11, 2024 at 06:34:49AM -0700, Sean Christopherson wrote:
> >> As said, this requires some tweaks to KVM_CAP_FORCE_SPEC_CTRL, such as making
> >> the mask and shadow values adjustable and applicable on a per-vCPU basis. The
> >> tweaks are not necessarily for Intel-defined virtual MSRs; if there were other
> >> preferable interfaces, they could also benefit from these changes.
> >> 
> >> Any objections to these tweaks to KVM_CAP_FORCE_SPEC_CTRL?
> >
> >Why does KVM_CAP_FORCE_SPEC_CTRL need to be per-vCPU?  Won't the CPU bugs and
> >mitigations be system-wide / VM-wide?
> 
> Because spec_ctrl is per-vCPU and Intel-defined virtual MSRs are also per-vCPU.

I figured that was the answer, but part of me was hopeful :-)

> i.e., a guest __can__ configure different values to virtual MSRs on different
> vCPUs even though a sane guest won't do this. If KVM doesn't want to rule out
> the possibility of supporting Intel-defined virtual MSRs in userspace or any
> other per-vCPU interfaces, KVM_CAP_FORCE_SPEC_CTRL needs to be per-vCPU.
> 
> implementation-wise, being per-vCPU is simpler because, otherwise, once userspace
> adjusts the hardware mitigations to enforce, KVM needs to kick all vCPUs. This
> will add more complexity.

+1, I even typed up as much before reading this paragraph.

> And IMO, requiring guests to deploy same mitigations on vCPUs is an unnecessary
> limitation.

Yeah, I can see how it would make things weird for no good reason.
 
So yeah, if the only thing stopping us from letting userspace deal with the virtual
MSRs is converting to a vCPU-scoped ioctl(), then by all means, lets do that.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cc260b14f8df..c5ceaebd954b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1956,8 +1956,8 @@  static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
 }
 
 #define VIRTUAL_ENUMERATION_VALID_BITS	VIRT_ENUM_MITIGATION_CTRL_SUPPORT
-#define MITI_ENUM_VALID_BITS		0ULL
-#define MITI_CTRL_VALID_BITS		0ULL
+#define MITI_ENUM_VALID_BITS		MITI_ENUM_BHB_CLEAR_SEQ_S_SUPPORT
+#define MITI_CTRL_VALID_BITS		MITI_CTRL_BHB_CLEAR_SEQ_S_USED
 
 static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
 {
@@ -2204,7 +2204,7 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	struct vmx_uret_msr *msr;
 	int ret = 0;
 	u32 msr_index = msr_info->index;
-	u64 data = msr_info->data;
+	u64 data = msr_info->data, spec_ctrl_mask = 0;
 	u32 index;
 
 	switch (msr_index) {
@@ -2508,6 +2508,31 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (data & ~MITI_CTRL_VALID_BITS)
 			return 1;
 
+		if (data & MITI_CTRL_BHB_CLEAR_SEQ_S_USED &&
+		    kvm_cpu_cap_has(X86_FEATURE_BHI_CTRL) &&
+		    !(host_arch_capabilities & ARCH_CAP_BHI_NO))
+			spec_ctrl_mask |= SPEC_CTRL_BHI_DIS_S;
+
+		/*
+		 * Intercept IA32_SPEC_CTRL to disallow guest from changing
+		 * certain bits if "virtualize IA32_SPEC_CTRL" isn't supported
+		 * e.g., in nested case.
+		 */
+		if (spec_ctrl_mask && !cpu_has_spec_ctrl_shadow())
+			vmx_enable_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
+
+		/*
+		 * KVM_CAP_FORCE_SPEC_CTRL takes precedence over
+		 * MSR_VIRTUAL_MITIGATION_CTRL.
+		 */
+		spec_ctrl_mask &= ~vmx->vcpu.kvm->arch.force_spec_ctrl_mask;
+
+		vmx->force_spec_ctrl_mask = vmx->vcpu.kvm->arch.force_spec_ctrl_mask |
+					    spec_ctrl_mask;
+		vmx->force_spec_ctrl_value = vmx->vcpu.kvm->arch.force_spec_ctrl_value |
+					    spec_ctrl_mask;
+		vmx_set_spec_ctrl(&vmx->vcpu, vmx->spec_ctrl_shadow);
+
 		vmx->msr_virtual_mitigation_ctrl = data;
 		break;
 	default: