diff mbox series

[v15,07/14] KVM: VMX: Emulate reads and writes to CET MSRs

Message ID 20210203113421.5759-8-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce support for guest CET feature | expand

Commit Message

Yang, Weijiang Feb. 3, 2021, 11:34 a.m. UTC
Add support for emulating read and write accesses to CET MSRs.  CET MSRs
are universally "special" as they are either context switched via
dedicated VMCS fields or via XSAVES, i.e. no additional in-memory
tracking is needed, but emulated reads/writes are more expensive.

MSRs that are switched through XSAVES are especially annoying due to the
possibility of the kernel's FPU being used in IRQ context.  Disable IRQs
and ensure the guest's FPU state is loaded when accessing such MSRs.

Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 105 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.h     |   5 ++
 2 files changed, 110 insertions(+)

Comments

Paolo Bonzini Feb. 3, 2021, 11:57 a.m. UTC | #1
On 03/02/21 12:34, Yang Weijiang wrote:
> MSRs that are switched through XSAVES are especially annoying due to 
> the possibility of the kernel's FPU being used in IRQ context. Disable 
> IRQs and ensure the guest's FPU state is loaded when accessing such MSRs.

Good catch!  This should be in x86.h and named kvm_get/set_xsave_msr 
because it's not VMX specific.  The commit message should also be there 
as a comment.

In addition,

> +	case MSR_IA32_S_CET:
> +		if (!cet_is_control_msr_accessible(vcpu, msr_info))
> +			return 1;
> +		msr_info->data = vmcs_readl(GUEST_S_CET);
> +		break;
> +	case MSR_IA32_U_CET:
> +		if (!cet_is_control_msr_accessible(vcpu, msr_info))
> +			return 1;
> +		vmx_get_xsave_msr(msr_info);
> +		break;

these two might as well be the same "case" for symmetry with the 
handling of WRMSR.

I've fixed this up locally, since these patches will not be pushed to 
Linus until the corresponding bare metal support is there.

Paolo
Yang, Weijiang Feb. 3, 2021, 12:50 p.m. UTC | #2
On Wed, Feb 03, 2021 at 12:57:41PM +0100, Paolo Bonzini wrote:
> On 03/02/21 12:34, Yang Weijiang wrote:
> > MSRs that are switched through XSAVES are especially annoying due to the
> > possibility of the kernel's FPU being used in IRQ context. Disable IRQs
> > and ensure the guest's FPU state is loaded when accessing such MSRs.
> 
> Good catch!  This should be in x86.h and named kvm_get/set_xsave_msr because
> it's not VMX specific.  The commit message should also be there as a
> comment.
Thanks a lot for reviewing! These words came from Sean! :-D
>
> In addition,
> 
> > +	case MSR_IA32_S_CET:
> > +		if (!cet_is_control_msr_accessible(vcpu, msr_info))
> > +			return 1;
> > +		msr_info->data = vmcs_readl(GUEST_S_CET);
> > +		break;
> > +	case MSR_IA32_U_CET:
> > +		if (!cet_is_control_msr_accessible(vcpu, msr_info))
> > +			return 1;
> > +		vmx_get_xsave_msr(msr_info);
> > +		break;
> 
> these two might as well be the same "case" for symmetry with the handling of
> WRMSR.
> 
> I've fixed this up locally, since these patches will not be pushed to Linus
> until the corresponding bare metal support is there.
Got it!
> 
> Paolo
John Allen May 18, 2022, 3:55 p.m. UTC | #3
On Wed, Feb 03, 2021 at 07:34:14PM +0800, Yang Weijiang wrote:
> Add support for emulating read and write accesses to CET MSRs.  CET MSRs
> are universally "special" as they are either context switched via
> dedicated VMCS fields or via XSAVES, i.e. no additional in-memory
> tracking is needed, but emulated reads/writes are more expensive.
> 
> MSRs that are switched through XSAVES are especially annoying due to the
> possibility of the kernel's FPU being used in IRQ context.  Disable IRQs
> and ensure the guest's FPU state is loaded when accessing such MSRs.
> 
> Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 105 +++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.h     |   5 ++
>  2 files changed, 110 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index cc60b1fc3ee7..694879c2b0b7 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1787,6 +1787,66 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
>  	}
>  }
>  
> +static void vmx_get_xsave_msr(struct msr_data *msr_info)
> +{
> +	local_irq_disable();
> +	if (test_thread_flag(TIF_NEED_FPU_LOAD))
> +		switch_fpu_return();
> +	rdmsrl(msr_info->index, msr_info->data);
> +	local_irq_enable();
> +}
> +
> +static void  vmx_set_xsave_msr(struct msr_data *msr_info)
> +{
> +	local_irq_disable();
> +	if (test_thread_flag(TIF_NEED_FPU_LOAD))
> +		switch_fpu_return();
> +	wrmsrl(msr_info->index, msr_info->data);
> +	local_irq_enable();
> +}
> +
> +static bool cet_is_ssp_msr_accessible(struct kvm_vcpu *vcpu,
> +				      struct msr_data *msr)
> +{
> +	u64 mask;
> +
> +	if (!kvm_cet_supported())
> +		return false;
> +
> +	if (msr->host_initiated)
> +		return true;
> +
> +	if (!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
> +		return false;
> +
> +	if (msr->index == MSR_IA32_INT_SSP_TAB)
> +		return true;
> +
> +	mask = (msr->index == MSR_IA32_PL3_SSP) ? XFEATURE_MASK_CET_USER :
> +						  XFEATURE_MASK_CET_KERNEL;
> +	return !!(vcpu->arch.guest_supported_xss & mask);
> +}
> +
> +static bool cet_is_control_msr_accessible(struct kvm_vcpu *vcpu,
> +					  struct msr_data *msr)
> +{
> +	u64 mask;
> +
> +	if (!kvm_cet_supported())
> +		return false;
> +
> +	if (msr->host_initiated)
> +		return true;
> +
> +	if (!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) &&
> +	    !guest_cpuid_has(vcpu, X86_FEATURE_IBT))
> +		return false;
> +
> +	mask = (msr->index == MSR_IA32_U_CET) ? XFEATURE_MASK_CET_USER :
> +						XFEATURE_MASK_CET_KERNEL;
> +	return !!(vcpu->arch.guest_supported_xss & mask);
> +}
> +
>  /*
>   * Reads an msr value (of 'msr_index') into 'pdata'.
>   * Returns 0 on success, non-0 otherwise.
> @@ -1919,6 +1979,26 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		else
>  			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
>  		break;
> +	case MSR_IA32_S_CET:
> +		if (!cet_is_control_msr_accessible(vcpu, msr_info))
> +			return 1;
> +		msr_info->data = vmcs_readl(GUEST_S_CET);
> +		break;
> +	case MSR_IA32_U_CET:
> +		if (!cet_is_control_msr_accessible(vcpu, msr_info))
> +			return 1;
> +		vmx_get_xsave_msr(msr_info);
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		if (!cet_is_ssp_msr_accessible(vcpu, msr_info))
> +			return 1;
> +		msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
> +		break;
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		if (!cet_is_ssp_msr_accessible(vcpu, msr_info))
> +			return 1;
> +		vmx_get_xsave_msr(msr_info);
> +		break;
>  	case MSR_TSC_AUX:
>  		if (!msr_info->host_initiated &&
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> @@ -2188,6 +2268,31 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		else
>  			vmx->pt_desc.guest.addr_a[index / 2] = data;
>  		break;
> +	case MSR_IA32_S_CET:
> +	case MSR_IA32_U_CET:
> +		if (!cet_is_control_msr_accessible(vcpu, msr_info))
> +			return 1;
> +		if (data & GENMASK(9, 6))
> +			return 1;
> +		if (msr_index == MSR_IA32_S_CET)
> +			vmcs_writel(GUEST_S_CET, data);
> +		else
> +			vmx_set_xsave_msr(msr_info);
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		if (!cet_is_control_msr_accessible(vcpu, msr_info))
> +			return 1;
> +		if (is_noncanonical_address(data, vcpu))
> +			return 1;
> +		vmcs_writel(GUEST_INTR_SSP_TABLE, data);
> +		break;
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		if (!cet_is_ssp_msr_accessible(vcpu, msr_info))
> +			return 1;
> +		if ((data & GENMASK(2, 0)) || is_noncanonical_address(data, vcpu))

Sorry to revive this old thread. I'm working on the corresponding SVM
bits for shadow stack and I noticed the above check. Why isn't this
GENMASK(1, 0)? The *SSP MSRs should be a 4-byte aligned canonical
address meaning that just bits 1 and 0 should always be zero. I was
looking through the previous versions of the set and found that this
changed between versions 11 and 12, but I don't see any discussion
related to this on the list.

Thanks,
John

> +			return 1;
> +		vmx_set_xsave_msr(msr_info);
> +		break;
>  	case MSR_TSC_AUX:
>  		if (!msr_info->host_initiated &&
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index fd8c46da2030..16c661d94349 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -288,6 +288,11 @@ static inline bool kvm_mpx_supported(void)
>  		== (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
>  }
>  
> +static inline bool kvm_cet_supported(void)
> +{
> +	return supported_xss & XFEATURE_MASK_CET_USER;
> +}
> +
>  extern unsigned int min_timer_period_us;
>  
>  extern bool enable_vmware_backdoor;
> -- 
> 2.26.2
>
Sean Christopherson May 18, 2022, 4:16 p.m. UTC | #4
On Wed, May 18, 2022, John Allen wrote:
> On Wed, Feb 03, 2021 at 07:34:14PM +0800, Yang Weijiang wrote:
> > +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > +		if (!cet_is_ssp_msr_accessible(vcpu, msr_info))
> > +			return 1;
> > +		if ((data & GENMASK(2, 0)) || is_noncanonical_address(data, vcpu))
> 
> Sorry to revive this old thread. I'm working on the corresponding SVM
> bits for shadow stack and I noticed the above check. Why isn't this
> GENMASK(1, 0)? The *SSP MSRs should be a 4-byte aligned canonical
> address meaning that just bits 1 and 0 should always be zero. I was
> looking through the previous versions of the set and found that this
> changed between versions 11 and 12, but I don't see any discussion
> related to this on the list.

Huh.  I'm not entirely sure what to make of the SDM's wording:

  The linear address written must be aligned to 8 bytes and bits 2:0 must be 0
  (hardware requires bits 1:0 to be 0).

Looking at the rest of the CET stuff, I believe requiring 8-byte alignment is
correct, and that the "hardware requires" blurb is trying to call out that the
SSP stored in hardware will always be 4-byte aligned but not necessarily 8-byte
aligned in order to play nice with 32-bit/compatibility mode.  But "base" addresses
that come from software, e.g. via MSRs and whatnot, must always be 8-byte aligned.
Yang, Weijiang May 19, 2022, 8:49 a.m. UTC | #5
On 5/19/2022 12:16 AM, Sean Christopherson wrote:
> On Wed, May 18, 2022, John Allen wrote:
>> On Wed, Feb 03, 2021 at 07:34:14PM +0800, Yang Weijiang wrote:
>>> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
>>> +		if (!cet_is_ssp_msr_accessible(vcpu, msr_info))
>>> +			return 1;
>>> +		if ((data & GENMASK(2, 0)) || is_noncanonical_address(data, vcpu))
>> Sorry to revive this old thread. I'm working on the corresponding SVM
>> bits for shadow stack and I noticed the above check. Why isn't this
>> GENMASK(1, 0)? The *SSP MSRs should be a 4-byte aligned canonical
>> address meaning that just bits 1 and 0 should always be zero. I was
>> looking through the previous versions of the set and found that this
>> changed between versions 11 and 12, but I don't see any discussion
>> related to this on the list.
> Huh.  I'm not entirely sure what to make of the SDM's wording:
>
>    The linear address written must be aligned to 8 bytes and bits 2:0 must be 0
>    (hardware requires bits 1:0 to be 0).
>
> Looking at the rest of the CET stuff, I believe requiring 8-byte alignment is
> correct, and that the "hardware requires" blurb is trying to call out that the
> SSP stored in hardware will always be 4-byte aligned but not necessarily 8-byte
> aligned in order to play nice with 32-bit/compatibility mode.  But "base" addresses
> that come from software, e.g. via MSRs and whatnot, must always be 8-byte aligned.
Thanks Sean, I cannot agree more ;-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cc60b1fc3ee7..694879c2b0b7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1787,6 +1787,66 @@  static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
 	}
 }
 
+static void vmx_get_xsave_msr(struct msr_data *msr_info)
+{
+	local_irq_disable();
+	if (test_thread_flag(TIF_NEED_FPU_LOAD))
+		switch_fpu_return();
+	rdmsrl(msr_info->index, msr_info->data);
+	local_irq_enable();
+}
+
+static void  vmx_set_xsave_msr(struct msr_data *msr_info)
+{
+	local_irq_disable();
+	if (test_thread_flag(TIF_NEED_FPU_LOAD))
+		switch_fpu_return();
+	wrmsrl(msr_info->index, msr_info->data);
+	local_irq_enable();
+}
+
+static bool cet_is_ssp_msr_accessible(struct kvm_vcpu *vcpu,
+				      struct msr_data *msr)
+{
+	u64 mask;
+
+	if (!kvm_cet_supported())
+		return false;
+
+	if (msr->host_initiated)
+		return true;
+
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
+		return false;
+
+	if (msr->index == MSR_IA32_INT_SSP_TAB)
+		return true;
+
+	mask = (msr->index == MSR_IA32_PL3_SSP) ? XFEATURE_MASK_CET_USER :
+						  XFEATURE_MASK_CET_KERNEL;
+	return !!(vcpu->arch.guest_supported_xss & mask);
+}
+
+static bool cet_is_control_msr_accessible(struct kvm_vcpu *vcpu,
+					  struct msr_data *msr)
+{
+	u64 mask;
+
+	if (!kvm_cet_supported())
+		return false;
+
+	if (msr->host_initiated)
+		return true;
+
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) &&
+	    !guest_cpuid_has(vcpu, X86_FEATURE_IBT))
+		return false;
+
+	mask = (msr->index == MSR_IA32_U_CET) ? XFEATURE_MASK_CET_USER :
+						XFEATURE_MASK_CET_KERNEL;
+	return !!(vcpu->arch.guest_supported_xss & mask);
+}
+
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -1919,6 +1979,26 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
 		break;
+	case MSR_IA32_S_CET:
+		if (!cet_is_control_msr_accessible(vcpu, msr_info))
+			return 1;
+		msr_info->data = vmcs_readl(GUEST_S_CET);
+		break;
+	case MSR_IA32_U_CET:
+		if (!cet_is_control_msr_accessible(vcpu, msr_info))
+			return 1;
+		vmx_get_xsave_msr(msr_info);
+		break;
+	case MSR_IA32_INT_SSP_TAB:
+		if (!cet_is_ssp_msr_accessible(vcpu, msr_info))
+			return 1;
+		msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
+		break;
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+		if (!cet_is_ssp_msr_accessible(vcpu, msr_info))
+			return 1;
+		vmx_get_xsave_msr(msr_info);
+		break;
 	case MSR_TSC_AUX:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
@@ -2188,6 +2268,31 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			vmx->pt_desc.guest.addr_a[index / 2] = data;
 		break;
+	case MSR_IA32_S_CET:
+	case MSR_IA32_U_CET:
+		if (!cet_is_control_msr_accessible(vcpu, msr_info))
+			return 1;
+		if (data & GENMASK(9, 6))
+			return 1;
+		if (msr_index == MSR_IA32_S_CET)
+			vmcs_writel(GUEST_S_CET, data);
+		else
+			vmx_set_xsave_msr(msr_info);
+		break;
+	case MSR_IA32_INT_SSP_TAB:
+		if (!cet_is_control_msr_accessible(vcpu, msr_info))
+			return 1;
+		if (is_noncanonical_address(data, vcpu))
+			return 1;
+		vmcs_writel(GUEST_INTR_SSP_TABLE, data);
+		break;
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+		if (!cet_is_ssp_msr_accessible(vcpu, msr_info))
+			return 1;
+		if ((data & GENMASK(2, 0)) || is_noncanonical_address(data, vcpu))
+			return 1;
+		vmx_set_xsave_msr(msr_info);
+		break;
 	case MSR_TSC_AUX:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index fd8c46da2030..16c661d94349 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -288,6 +288,11 @@  static inline bool kvm_mpx_supported(void)
 		== (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
 }
 
+static inline bool kvm_cet_supported(void)
+{
+	return supported_xss & XFEATURE_MASK_CET_USER;
+}
+
 extern unsigned int min_timer_period_us;
 
 extern bool enable_vmware_backdoor;