diff mbox series

[v6,19/25] KVM: VMX: Emulate read and write to CET MSRs

Message ID 20230914063325.85503-20-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series Enable CET Virtualization | expand

Commit Message

Yang, Weijiang Sept. 14, 2023, 6:33 a.m. UTC
Add emulation interface for CET MSR access. The emulation code is split
into common part and vendor specific part. The former does common check
for MSRs and reads/writes directly from/to XSAVE-managed MSRs via the
helpers while the latter accesses the MSRs linked to VMCS fields.

Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 18 +++++++++++
 arch/x86/kvm/x86.c     | 71 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)

Comments

Maxim Levitsky Oct. 31, 2023, 5:55 p.m. UTC | #1
On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote:
> Add emulation interface for CET MSR access. The emulation code is split
> into common part and vendor specific part. The former does common check
> for MSRs and reads/writes directly from/to XSAVE-managed MSRs via the
> helpers while the latter accesses the MSRs linked to VMCS fields.
> 
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 18 +++++++++++
>  arch/x86/kvm/x86.c     | 71 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 89 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index fd5893b3a2c8..9f4b56337251 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2111,6 +2111,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		else
>  			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
>  		break;
> +	case MSR_IA32_S_CET:
> +		msr_info->data = vmcs_readl(GUEST_S_CET);
> +		break;
> +	case MSR_KVM_SSP:
> +		msr_info->data = vmcs_readl(GUEST_SSP);
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
> +		break;
>  	case MSR_IA32_DEBUGCTLMSR:
>  		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
>  		break;
> @@ -2420,6 +2429,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		else
>  			vmx->pt_desc.guest.addr_a[index / 2] = data;
>  		break;
> +	case MSR_IA32_S_CET:
> +		vmcs_writel(GUEST_S_CET, data);
> +		break;
> +	case MSR_KVM_SSP:
> +		vmcs_writel(GUEST_SSP, data);
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		vmcs_writel(GUEST_INTR_SSP_TABLE, data);
> +		break;
>  	case MSR_IA32_PERF_CAPABILITIES:
>  		if (data && !vcpu_to_pmu(vcpu)->version)
>  			return 1;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 73b45351c0fc..c85ee42ab4f1 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1847,6 +1847,11 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
>  }
>  EXPORT_SYMBOL_GPL(kvm_msr_allowed);
>  
> +#define CET_US_RESERVED_BITS		GENMASK(9, 6)
> +#define CET_US_SHSTK_MASK_BITS		GENMASK(1, 0)
> +#define CET_US_IBT_MASK_BITS		(GENMASK_ULL(5, 2) | GENMASK_ULL(63, 10))
> +#define CET_US_LEGACY_BITMAP_BASE(data)	((data) >> 12)
> +
>  /*
>   * Write @data into the MSR specified by @index.  Select MSR specific fault
>   * checks are bypassed if @host_initiated is %true.
> @@ -1856,6 +1861,7 @@ EXPORT_SYMBOL_GPL(kvm_msr_allowed);
>  static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>  			 bool host_initiated)
>  {
> +	bool host_msr_reset = host_initiated && data == 0;

I really don't like this boolean. While 0 is usually the reset value, it doesn't have to
be (like SVM tsc ratio reset value is 1 for example).
Also its name is confusing.

I suggest to just open code this instead.

>  	struct msr_data msr;
>  
>  	switch (index) {
> @@ -1906,6 +1912,46 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>  
>  		data = (u32)data;
>  		break;
> +	case MSR_IA32_U_CET:
> +	case MSR_IA32_S_CET:
> +		if (host_msr_reset && (kvm_cpu_cap_has(X86_FEATURE_SHSTK) ||
> +				       kvm_cpu_cap_has(X86_FEATURE_IBT)))
> +			break;
> +		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
> +		    !guest_can_use(vcpu, X86_FEATURE_IBT))
> +			return 1;
> +		if (data & CET_US_RESERVED_BITS)
> +			return 1;
> +		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
> +		    (data & CET_US_SHSTK_MASK_BITS))
> +			return 1;
> +		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
> +		    (data & CET_US_IBT_MASK_BITS))
> +			return 1;
> +		if (!IS_ALIGNED(CET_US_LEGACY_BITMAP_BASE(data), 4))
> +			return 1;
> +
> +		/* IBT can be suppressed iff the TRACKER isn't WAIT_ENDBR. */
> +		if ((data & CET_SUPPRESS) && (data & CET_WAIT_ENDBR))
> +			return 1;
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
> +			return 1;
> +		fallthrough;
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +	case MSR_KVM_SSP:
> +		if (host_msr_reset && kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> +			break;
> +		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
> +			return 1;
> +		if (index == MSR_KVM_SSP && !host_initiated)
> +			return 1;
> +		if (is_noncanonical_address(data, vcpu))
> +			return 1;
> +		if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
> +			return 1;
> +		break;
Once again I'll prefer to have an ioctl for setting/getting SSP, this will make the above
code simpler (e.g there will be no need to check that write comes from the host/etc).

>  	}
>  
>  	msr.data = data;
> @@ -1949,6 +1995,23 @@ static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
>  			return 1;
>  		break;
> +	case MSR_IA32_U_CET:
> +	case MSR_IA32_S_CET:
> +		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
> +		    !guest_can_use(vcpu, X86_FEATURE_SHSTK))
> +			return 1;
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
> +			return 1;
> +		fallthrough;
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +	case MSR_KVM_SSP:
> +		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
> +			return 1;
> +		if (index == MSR_KVM_SSP && !host_initiated)
> +			return 1;
> +		break;
>  	}
>  
>  	msr.index = index;
> @@ -4009,6 +4072,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		vcpu->arch.guest_fpu.xfd_err = data;
>  		break;
>  #endif
> +	case MSR_IA32_U_CET:
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		kvm_set_xstate_msr(vcpu, msr_info);

Ah, so here these functions (kvm_set_xstate_msr/kvm_get_xstate_msr) are used. 
I think that this patch should introduce them.

Also I will appreciate a comment to kvm_set_xstate_msr/kvm_get_xstate_msr saying something like:

"This function updates a guest MSR which value is saved in the guest FPU state. 
Wrap the write with load/save of the guest FPU state to keep the state consistent with the new MSR value"

Or something similar, although I will not argue over this.

> +		break;
>  	default:
>  		if (kvm_pmu_is_valid_msr(vcpu, msr))
>  			return kvm_pmu_set_msr(vcpu, msr_info);
> @@ -4365,6 +4432,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		msr_info->data = vcpu->arch.guest_fpu.xfd_err;
>  		break;
>  #endif
> +	case MSR_IA32_U_CET:
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		kvm_get_xstate_msr(vcpu, msr_info);
> +		break;
>  	default:
>  		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
>  			return kvm_pmu_get_msr(vcpu, msr_info);


Best regards,
	Maxim Levitsky
Sean Christopherson Nov. 1, 2023, 4:31 p.m. UTC | #2
On Tue, Oct 31, 2023, Maxim Levitsky wrote:
> On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote:
> > Add emulation interface for CET MSR access. The emulation code is split
> > into common part and vendor specific part. The former does common check
> > for MSRs and reads/writes directly from/to XSAVE-managed MSRs via the
> > helpers while the latter accesses the MSRs linked to VMCS fields.
> > 
> > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> > ---

...

> > +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > +	case MSR_KVM_SSP:
> > +		if (host_msr_reset && kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> > +			break;
> > +		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
> > +			return 1;
> > +		if (index == MSR_KVM_SSP && !host_initiated)
> > +			return 1;
> > +		if (is_noncanonical_address(data, vcpu))
> > +			return 1;
> > +		if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
> > +			return 1;
> > +		break;
> Once again I'll prefer to have an ioctl for setting/getting SSP, this will
> make the above code simpler (e.g there will be no need to check that write
> comes from the host/etc).

I don't think an ioctl() would be simpler overall, especially when factoring in
userspace.  With a synthetic MSR, we get the following quite cheaply:

 1. Enumerating support to userspace.
 2. Save/restore of the value, e.g. for live migration.
 3. Vendor hooks for propagating values to/from the VMCS/VMCB.

For an ioctl(), #1 would require a capability, #2 (and #1 to some extent) would
require new userspace flows, and #3 would require new kvm_x86_ops hooks.

The synthetic MSR adds a small amount of messiness, as does bundling 
MSR_IA32_INT_SSP_TAB with the other shadow stack MSRs.  The bulk of the mess comes
from the need to allow userspace to write '0' when KVM enumerated supported to
userspace.

If we isolate MSR_IA32_INT_SSP_TAB, that'll help with the synthetic MSR and with
MSR_IA32_INT_SSP_TAB.  For the unfortunate "host reset" behavior, the best idea I
came up with is to add a helper.  It's still a bit ugly, but the ugliness is
contained in a helper and IMO makes it much easier to follow the case statements.

get:

	case MSR_IA32_INT_SSP_TAB:
		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) ||
		    !guest_cpuid_has(vcpu, X86_FEATURE_LM))
			return 1;
		break;
	case MSR_KVM_SSP:
		if (!host_initiated)
			return 1;
		fallthrough;
	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
			return 1;
		break;

static bool is_set_cet_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u64 data,
				   bool host_initiated)
{
	bool any_cet = index == MSR_IA32_S_CET || index == MSR_IA32_U_CET;

	if (guest_can_use(vcpu, X86_FEATURE_SHSTK))
		return true;

	if (any_cet && guest_can_use(vcpu, X86_FEATURE_IBT))
		return true;

	/* 
	 * If KVM supports the MSR, i.e. has enumerated the MSR existence to
	 * userspace, then userspace is allowed to write '0' irrespective of
	 * whether or not the MSR is exposed to the guest.
	 */
	if (!host_initiated || data)
		return false;

	if (kvm_cpu_cap_has(X86_FEATURE_SHSTK))
		return true;

	return any_cet && kvm_cpu_cap_has(X86_FEATURE_IBT);
}

set:
	case MSR_IA32_U_CET:
	case MSR_IA32_S_CET:
		if (!is_set_cet_msr_allowed(vcpu, index, data, host_initiated))
			return 1;
		if (data & CET_US_RESERVED_BITS)
			return 1;
		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
		    (data & CET_US_SHSTK_MASK_BITS))
			return 1;
		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
		    (data & CET_US_IBT_MASK_BITS))
			return 1;
		if (!IS_ALIGNED(CET_US_LEGACY_BITMAP_BASE(data), 4))
			return 1;

		/* IBT can be suppressed iff the TRACKER isn't WAIT_ENDBR. */
		if ((data & CET_SUPPRESS) && (data & CET_WAIT_ENDBR))
			return 1;
		break;
	case MSR_IA32_INT_SSP_TAB:
		if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
			return 1;

		if (is_noncanonical_address(data, vcpu))
			return 1;
		break;
	case MSR_KVM_SSP:
		if (!host_initiated)
			return 1;
		fallthrough;
	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
		if (!is_set_cet_msr_allowed(vcpu, index, data, host_initiated))
			return 1;
		if (is_noncanonical_address(data, vcpu))
			return 1;
		if (!IS_ALIGNED(data, 4))
			return 1;
		break;
	}
Maxim Levitsky Nov. 2, 2023, 6:38 p.m. UTC | #3
On Wed, 2023-11-01 at 09:31 -0700, Sean Christopherson wrote:
> On Tue, Oct 31, 2023, Maxim Levitsky wrote:
> > On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote:
> > > Add emulation interface for CET MSR access. The emulation code is split
> > > into common part and vendor specific part. The former does common check
> > > for MSRs and reads/writes directly from/to XSAVE-managed MSRs via the
> > > helpers while the latter accesses the MSRs linked to VMCS fields.
> > > 
> > > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> > > ---
> 
> ...
> 
> > > +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > > +	case MSR_KVM_SSP:
> > > +		if (host_msr_reset && kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> > > +			break;
> > > +		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
> > > +			return 1;
> > > +		if (index == MSR_KVM_SSP && !host_initiated)
> > > +			return 1;
> > > +		if (is_noncanonical_address(data, vcpu))
> > > +			return 1;
> > > +		if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
> > > +			return 1;
> > > +		break;
> > Once again I'll prefer to have an ioctl for setting/getting SSP, this will
> > make the above code simpler (e.g there will be no need to check that write
> > comes from the host/etc).
> 
> I don't think an ioctl() would be simpler overall, especially when factoring in
> userspace.  With a synthetic MSR, we get the following quite cheaply:
> 
>  1. Enumerating support to userspace.
>  2. Save/restore of the value, e.g. for live migration.
>  3. Vendor hooks for propagating values to/from the VMCS/VMCB.
> 
> For an ioctl(), 
> #1 would require a capability, #2 (and #1 to some extent) would
> require new userspace flows, and #3 would require new kvm_x86_ops hooks.
> 
> The synthetic MSR adds a small amount of messiness, as does bundling 
> MSR_IA32_INT_SSP_TAB with the other shadow stack MSRs.  The bulk of the mess comes
> from the need to allow userspace to write '0' when KVM enumerated supported to
> userspace.


Let me put it this way - all hacks start like that, and in this case this is API/ABI hack
so we will have to live with it forever.

Once there is a precedent, trust me there will be 10s of new 'fake' msrs added, and the
interface will become one big mess.

As I suggested, if you don't want to add new capability/ioctl and vendor callback per new
x86 arch register, then let's implement KVM_GET_ONE_REG/KVM_SET_ONE_REG and then it will
be really easy to add new regs without confusing users, and without polluting msr
namespace with msrs that don't exist.


Best regards,
	Maxim Levitsky


> 
> If we isolate MSR_IA32_INT_SSP_TAB, that'll help with the synthetic MSR and with
> MSR_IA32_INT_SSP_TAB.  For the unfortunate "host reset" behavior, the best idea I
> came up with is to add a helper.  It's still a bit ugly, but the ugliness is
> contained in a helper and IMO makes it much easier to follow the case statements.
> 
> get:
> 
> 	case MSR_IA32_INT_SSP_TAB:
> 		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) ||
> 		    !guest_cpuid_has(vcpu, X86_FEATURE_LM))
> 			return 1;
> 		break;
> 	case MSR_KVM_SSP:
> 		if (!host_initiated)
> 			return 1;
> 		fallthrough;
> 	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> 		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
> 			return 1;
> 		break;
> 
> static bool is_set_cet_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u64 data,
> 				   bool host_initiated)
> {
> 	bool any_cet = index == MSR_IA32_S_CET || index == MSR_IA32_U_CET;
> 
> 	if (guest_can_use(vcpu, X86_FEATURE_SHSTK))
> 		return true;
> 
> 	if (any_cet && guest_can_use(vcpu, X86_FEATURE_IBT))
> 		return true;
> 
> 	/* 
> 	 * If KVM supports the MSR, i.e. has enumerated the MSR existence to
> 	 * userspace, then userspace is allowed to write '0' irrespective of
> 	 * whether or not the MSR is exposed to the guest.
> 	 */
> 	if (!host_initiated || data)
> 		return false;
> 
> 	if (kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> 		return true;
> 
> 	return any_cet && kvm_cpu_cap_has(X86_FEATURE_IBT);
> }
> 
> set:
> 	case MSR_IA32_U_CET:
> 	case MSR_IA32_S_CET:
> 		if (!is_set_cet_msr_allowed(vcpu, index, data, host_initiated))
> 			return 1;
> 		if (data & CET_US_RESERVED_BITS)
> 			return 1;
> 		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
> 		    (data & CET_US_SHSTK_MASK_BITS))
> 			return 1;
> 		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
> 		    (data & CET_US_IBT_MASK_BITS))
> 			return 1;
> 		if (!IS_ALIGNED(CET_US_LEGACY_BITMAP_BASE(data), 4))
> 			return 1;
> 
> 		/* IBT can be suppressed iff the TRACKER isn't WAIT_ENDBR. */
> 		if ((data & CET_SUPPRESS) && (data & CET_WAIT_ENDBR))
> 			return 1;
> 		break;
> 	case MSR_IA32_INT_SSP_TAB:
> 		if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
> 			return 1;
> 
> 		if (is_noncanonical_address(data, vcpu))
> 			return 1;
> 		break;
> 	case MSR_KVM_SSP:
> 		if (!host_initiated)
> 			return 1;
> 		fallthrough;
> 	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> 		if (!is_set_cet_msr_allowed(vcpu, index, data, host_initiated))
> 			return 1;
> 		if (is_noncanonical_address(data, vcpu))
> 			return 1;
> 		if (!IS_ALIGNED(data, 4))
> 			return 1;
> 		break;
> 	}
>
Sean Christopherson Nov. 2, 2023, 11:58 p.m. UTC | #4
On Thu, Nov 02, 2023, Maxim Levitsky wrote:
> On Wed, 2023-11-01 at 09:31 -0700, Sean Christopherson wrote:
> > On Tue, Oct 31, 2023, Maxim Levitsky wrote:
> > > On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote:
> > > > Add emulation interface for CET MSR access. The emulation code is split
> > > > into common part and vendor specific part. The former does common check
> > > > for MSRs and reads/writes directly from/to XSAVE-managed MSRs via the
> > > > helpers while the latter accesses the MSRs linked to VMCS fields.
> > > > 
> > > > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> > > > ---
> > 
> > ...
> > 
> > > > +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > > > +	case MSR_KVM_SSP:
> > > > +		if (host_msr_reset && kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> > > > +			break;
> > > > +		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
> > > > +			return 1;
> > > > +		if (index == MSR_KVM_SSP && !host_initiated)
> > > > +			return 1;
> > > > +		if (is_noncanonical_address(data, vcpu))
> > > > +			return 1;
> > > > +		if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
> > > > +			return 1;
> > > > +		break;
> > > Once again I'll prefer to have an ioctl for setting/getting SSP, this will
> > > make the above code simpler (e.g there will be no need to check that write
> > > comes from the host/etc).
> > 
> > I don't think an ioctl() would be simpler overall, especially when factoring in
> > userspace.  With a synthetic MSR, we get the following quite cheaply:
> > 
> >  1. Enumerating support to userspace.
> >  2. Save/restore of the value, e.g. for live migration.
> >  3. Vendor hooks for propagating values to/from the VMCS/VMCB.
> > 
> > For an ioctl(), 
> > #1 would require a capability, #2 (and #1 to some extent) would
> > require new userspace flows, and #3 would require new kvm_x86_ops hooks.
> > 
> > The synthetic MSR adds a small amount of messiness, as does bundling 
> > MSR_IA32_INT_SSP_TAB with the other shadow stack MSRs.  The bulk of the mess comes
> > from the need to allow userspace to write '0' when KVM enumerated supported to
> > userspace.
> 
> Let me put it this way - all hacks start like that, and in this case this is API/ABI hack
> so we will have to live with it forever.

Eh, I don't view it as a hack, at least the kind of hack that has a negative
connotation.  KVM effectively has ~240 MSR indices reserved for whatever KVM
wants.  The only weird thing about this one is that it's not accessible from the
guest.  Which I agree is quite weird, but from a code perspective I think it
works quite well.

> Once there is a precedent, trust me there will be 10s of new 'fake' msrs added, and the
> interface will become one big mess.

That suggests MSRs aren't already one big mess. :-)  I'm somewhat joking, but also
somewhat serious.  I really don't think that adding one oddball synthetic MSR is
going to meaningfully move the needle on the messiness of MSRs.

Hmm, there probably is a valid slippery slope argument though.  As you say, at
some point, enough state will get shoved into hardware that KVM will need an ever
growing number of synthetic MSRs to keep pace.

> As I suggested, if you don't want to add new capability/ioctl and vendor
> callback per new x86 arch register, then let's implement
> KVM_GET_ONE_REG/KVM_SET_ONE_REG and then it will be really easy to add new
> regs without confusing users, and without polluting msr namespace with msrs
> that don't exist.

I definitely don't hate the idea of KVM_{G,S}ET_ONE_REG, what I don't want is to
have an entirely separate path in KVM for handling the actual get/set.

What if we combine the approaches?  Add KVM_{G,S}ET_ONE_REG support so that the
uAPI can use completely arbitrary register indices without having to worry about
polluting the MSR space and making MSR_KVM_SSP ABI.

Ooh, if we're clever, I bet we can extend KVM_{G,S}ET_ONE_REG to also work with
existing MSRs, GPRs, and other stuff, i.e. not force userspace through the funky
KVM_SET_MSRS just to set one reg, and not force a RMW of all GPRs just to set
RIP or something.  E.g. use bits 39:32 of the id to encode the register class,
bits 31:0 to hold the index within a class, and reserve bits 63:40 for future
usage.

Then for KVM-defined registers, we can route them internally as needed, e.g. we
can still define MSR_KVM_SSP so that internal it's treated like an MSR, but its
index isn't ABI and so can be changed at will.  And future KVM-defined registers
wouldn't _need_ to be treated like MSRs, i.e. we could route registers through
the MSR APIs if and only if it makes sense to do so.

Side topic, why on earth is the data value of kvm_one_reg "addr"?
Yang, Weijiang Nov. 3, 2023, 8:18 a.m. UTC | #5
On 11/2/2023 12:31 AM, Sean Christopherson wrote:
> On Tue, Oct 31, 2023, Maxim Levitsky wrote:
>> On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote:
>>> Add emulation interface for CET MSR access. The emulation code is split
>>> into common part and vendor specific part. The former does common check
>>> for MSRs and reads/writes directly from/to XSAVE-managed MSRs via the
>>> helpers while the latter accesses the MSRs linked to VMCS fields.
>>>
>>> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
>>> ---
> ...
>
>>> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
>>> +	case MSR_KVM_SSP:
>>> +		if (host_msr_reset && kvm_cpu_cap_has(X86_FEATURE_SHSTK))
>>> +			break;
>>> +		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
>>> +			return 1;
>>> +		if (index == MSR_KVM_SSP && !host_initiated)
>>> +			return 1;
>>> +		if (is_noncanonical_address(data, vcpu))
>>> +			return 1;
>>> +		if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
>>> +			return 1;
>>> +		break;
>> Once again I'll prefer to have an ioctl for setting/getting SSP, this will
>> make the above code simpler (e.g there will be no need to check that write
>> comes from the host/etc).
> I don't think an ioctl() would be simpler overall, especially when factoring in
> userspace.  With a synthetic MSR, we get the following quite cheaply:
>
>   1. Enumerating support to userspace.
>   2. Save/restore of the value, e.g. for live migration.
>   3. Vendor hooks for propagating values to/from the VMCS/VMCB.
>
> For an ioctl(), #1 would require a capability, #2 (and #1 to some extent) would
> require new userspace flows, and #3 would require new kvm_x86_ops hooks.
>
> The synthetic MSR adds a small amount of messiness, as does bundling
> MSR_IA32_INT_SSP_TAB with the other shadow stack MSRs.  The bulk of the mess comes
> from the need to allow userspace to write '0' when KVM enumerated supported to
> userspace.
>
> If we isolate MSR_IA32_INT_SSP_TAB, that'll help with the synthetic MSR and with
> MSR_IA32_INT_SSP_TAB.  For the unfortunate "host reset" behavior, the best idea I
> came up with is to add a helper.  It's still a bit ugly, but the ugliness is
> contained in a helper and IMO makes it much easier to follow the case statements.

Frankly speaking, existing code is not hard to understand to me :-), the handling for MSR_KVM_SSP
and MSR_IA32_INT_SSP_TAB is straightforward if audiences read the related spec.
But I'll take your advice and enclose below changes. Thanks!
> get:
>
> 	case MSR_IA32_INT_SSP_TAB:
> 		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) ||
> 		    !guest_cpuid_has(vcpu, X86_FEATURE_LM))
> 			return 1;
> 		break;
> 	case MSR_KVM_SSP:
> 		if (!host_initiated)
> 			return 1;
> 		fallthrough;
> 	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> 		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
> 			return 1;
> 		break;
>
> static bool is_set_cet_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u64 data,
> 				   bool host_initiated)
> {
> 	bool any_cet = index == MSR_IA32_S_CET || index == MSR_IA32_U_CET;
>
> 	if (guest_can_use(vcpu, X86_FEATURE_SHSTK))
> 		return true;
>
> 	if (any_cet && guest_can_use(vcpu, X86_FEATURE_IBT))
> 		return true;
>
> 	/*
> 	 * If KVM supports the MSR, i.e. has enumerated the MSR existence to
> 	 * userspace, then userspace is allowed to write '0' irrespective of
> 	 * whether or not the MSR is exposed to the guest.
> 	 */
> 	if (!host_initiated || data)
> 		return false;
> 	if (kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> 		return true;
>
> 	return any_cet && kvm_cpu_cap_has(X86_FEATURE_IBT);
> }
>
> set:
> 	case MSR_IA32_U_CET:
> 	case MSR_IA32_S_CET:
> 		if (!is_set_cet_msr_allowed(vcpu, index, data, host_initiated))
> 			return 1;
> 		if (data & CET_US_RESERVED_BITS)
> 			return 1;
> 		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
> 		    (data & CET_US_SHSTK_MASK_BITS))
> 			return 1;
> 		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
> 		    (data & CET_US_IBT_MASK_BITS))
> 			return 1;
> 		if (!IS_ALIGNED(CET_US_LEGACY_BITMAP_BASE(data), 4))
> 			return 1;
>
> 		/* IBT can be suppressed iff the TRACKER isn't WAIT_ENDBR. */
> 		if ((data & CET_SUPPRESS) && (data & CET_WAIT_ENDBR))
> 			return 1;
> 		break;
> 	case MSR_IA32_INT_SSP_TAB:
> 		if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
> 			return 1;
>
> 		if (is_noncanonical_address(data, vcpu))
> 			return 1;
> 		break;
> 	case MSR_KVM_SSP:
> 		if (!host_initiated)
> 			return 1;
> 		fallthrough;
> 	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> 		if (!is_set_cet_msr_allowed(vcpu, index, data, host_initiated))
> 			return 1;
> 		if (is_noncanonical_address(data, vcpu))
> 			return 1;
> 		if (!IS_ALIGNED(data, 4))
> 			return 1;
> 		break;
> 	}
Sean Christopherson Nov. 3, 2023, 10:26 p.m. UTC | #6
On Fri, Nov 03, 2023, Weijiang Yang wrote:
> On 11/2/2023 12:31 AM, Sean Christopherson wrote:
> > On Tue, Oct 31, 2023, Maxim Levitsky wrote:
> > > On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote:
> > > > Add emulation interface for CET MSR access. The emulation code is split
> > > > into common part and vendor specific part. The former does common check
> > > > for MSRs and reads/writes directly from/to XSAVE-managed MSRs via the
> > > > helpers while the latter accesses the MSRs linked to VMCS fields.
> > > > 
> > > > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> > > > ---
> > ...
> > 
> > > > +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > > > +	case MSR_KVM_SSP:
> > > > +		if (host_msr_reset && kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> > > > +			break;
> > > > +		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
> > > > +			return 1;
> > > > +		if (index == MSR_KVM_SSP && !host_initiated)
> > > > +			return 1;
> > > > +		if (is_noncanonical_address(data, vcpu))
> > > > +			return 1;
> > > > +		if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
> > > > +			return 1;
> > > > +		break;
> > > Once again I'll prefer to have an ioctl for setting/getting SSP, this will
> > > make the above code simpler (e.g there will be no need to check that write
> > > comes from the host/etc).
> > I don't think an ioctl() would be simpler overall, especially when factoring in
> > userspace.  With a synthetic MSR, we get the following quite cheaply:
> > 
> >   1. Enumerating support to userspace.
> >   2. Save/restore of the value, e.g. for live migration.
> >   3. Vendor hooks for propagating values to/from the VMCS/VMCB.
> > 
> > For an ioctl(), #1 would require a capability, #2 (and #1 to some extent) would
> > require new userspace flows, and #3 would require new kvm_x86_ops hooks.
> > 
> > The synthetic MSR adds a small amount of messiness, as does bundling
> > MSR_IA32_INT_SSP_TAB with the other shadow stack MSRs.  The bulk of the mess comes
> > from the need to allow userspace to write '0' when KVM enumerated supported to
> > userspace.
> > 
> > If we isolate MSR_IA32_INT_SSP_TAB, that'll help with the synthetic MSR and with
> > MSR_IA32_INT_SSP_TAB.  For the unfortunate "host reset" behavior, the best idea I
> > came up with is to add a helper.  It's still a bit ugly, but the ugliness is
> > contained in a helper and IMO makes it much easier to follow the case statements.
> 
> Frankly speaking, existing code is not hard to understand to me :-), the
> handling for MSR_KVM_SSP and MSR_IA32_INT_SSP_TAB is straightforward if
> audiences read the related spec.

I don't necessarily disagree, but I 100% agree with Maxim that host_msr_reset is
a confusing name.  As Maxim pointed out, '0' isn't necessarily the RESET value.
And host_msr_reset implies that userspace is emulating a RESET, which may not
actually be true, e.g. a naive userspace could be restoring '0' as part of live
migration.

> But I'll take your advice and enclose below changes. Thanks!

Definitely feel free to propose an alternative.  My goal with the suggested change
is eliminate host_msr_reset without creating creating unwieldy case statements.
Isolating MSR_IA32_INT_SSP_TAB was (obviously) the best solution I came up with.

> > get:
> > 
> > 	case MSR_IA32_INT_SSP_TAB:
> > 		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) ||
> > 		    !guest_cpuid_has(vcpu, X86_FEATURE_LM))
> > 			return 1;
> > 		break;
> > 	case MSR_KVM_SSP:
> > 		if (!host_initiated)
> > 			return 1;
> > 		fallthrough;
> > 	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > 		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
> > 			return 1;
> > 		break;
> > 
> > static bool is_set_cet_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u64 data,
> > 				   bool host_initiated)
> > {
> > 	bool any_cet = index == MSR_IA32_S_CET || index == MSR_IA32_U_CET;
> > 
> > 	if (guest_can_use(vcpu, X86_FEATURE_SHSTK))
> > 		return true;
> > 
> > 	if (any_cet && guest_can_use(vcpu, X86_FEATURE_IBT))
> > 		return true;
> > 
> > 	/*
> > 	 * If KVM supports the MSR, i.e. has enumerated the MSR existence to
> > 	 * userspace, then userspace is allowed to write '0' irrespective of
> > 	 * whether or not the MSR is exposed to the guest.
> > 	 */
> > 	if (!host_initiated || data)
> > 		return false;
> > 	if (kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> > 		return true;
> > 
> > 	return any_cet && kvm_cpu_cap_has(X86_FEATURE_IBT);
> > }
> > 
> > set:
> > 	case MSR_IA32_U_CET:
> > 	case MSR_IA32_S_CET:
> > 		if (!is_set_cet_msr_allowed(vcpu, index, data, host_initiated))
> > 			return 1;
> > 		if (data & CET_US_RESERVED_BITS)
> > 			return 1;
> > 		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
> > 		    (data & CET_US_SHSTK_MASK_BITS))
> > 			return 1;
> > 		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
> > 		    (data & CET_US_IBT_MASK_BITS))
> > 			return 1;
> > 		if (!IS_ALIGNED(CET_US_LEGACY_BITMAP_BASE(data), 4))
> > 			return 1;
> > 
> > 		/* IBT can be suppressed iff the TRACKER isn't WAIT_ENDBR. */
> > 		if ((data & CET_SUPPRESS) && (data & CET_WAIT_ENDBR))
> > 			return 1;
> > 		break;
> > 	case MSR_IA32_INT_SSP_TAB:
> > 		if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
> > 			return 1;

Doh, I think this should be:

		if (!is_set_cet_msr_allowed(vcpu, index, data, host_initiated) ||
		    !guest_cpuid_has(vcpu, X86_FEATURE_LM))
			return 1;
> > 
> > 		if (is_noncanonical_address(data, vcpu))
> > 			return 1;
> > 		break;
> > 	case MSR_KVM_SSP:
> > 		if (!host_initiated)
> > 			return 1;
> > 		fallthrough;
> > 	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > 		if (!is_set_cet_msr_allowed(vcpu, index, data, host_initiated))
> > 			return 1;
> > 		if (is_noncanonical_address(data, vcpu))
> > 			return 1;
> > 		if (!IS_ALIGNED(data, 4))
> > 			return 1;
> > 		break;
> > 	}
>
Maxim Levitsky Nov. 7, 2023, 6:12 p.m. UTC | #7
On Thu, 2023-11-02 at 16:58 -0700, Sean Christopherson wrote:
> On Thu, Nov 02, 2023, Maxim Levitsky wrote:
> > On Wed, 2023-11-01 at 09:31 -0700, Sean Christopherson wrote:
> > > On Tue, Oct 31, 2023, Maxim Levitsky wrote:
> > > > On Thu, 2023-09-14 at 02:33 -0400, Yang Weijiang wrote:
> > > > > Add emulation interface for CET MSR access. The emulation code is split
> > > > > into common part and vendor specific part. The former does common check
> > > > > for MSRs and reads/writes directly from/to XSAVE-managed MSRs via the
> > > > > helpers while the latter accesses the MSRs linked to VMCS fields.
> > > > > 
> > > > > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> > > > > ---
> > > 
> > > ...
> > > 
> > > > > +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > > > > +	case MSR_KVM_SSP:
> > > > > +		if (host_msr_reset && kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> > > > > +			break;
> > > > > +		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
> > > > > +			return 1;
> > > > > +		if (index == MSR_KVM_SSP && !host_initiated)
> > > > > +			return 1;
> > > > > +		if (is_noncanonical_address(data, vcpu))
> > > > > +			return 1;
> > > > > +		if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
> > > > > +			return 1;
> > > > > +		break;
> > > > Once again I'll prefer to have an ioctl for setting/getting SSP, this will
> > > > make the above code simpler (e.g there will be no need to check that write
> > > > comes from the host/etc).
> > > 
> > > I don't think an ioctl() would be simpler overall, especially when factoring in
> > > userspace.  With a synthetic MSR, we get the following quite cheaply:
> > > 
> > >  1. Enumerating support to userspace.
> > >  2. Save/restore of the value, e.g. for live migration.
> > >  3. Vendor hooks for propagating values to/from the VMCS/VMCB.
> > > 
> > > For an ioctl(), 
> > > #1 would require a capability, #2 (and #1 to some extent) would
> > > require new userspace flows, and #3 would require new kvm_x86_ops hooks.
> > > 
> > > The synthetic MSR adds a small amount of messiness, as does bundling 
> > > MSR_IA32_INT_SSP_TAB with the other shadow stack MSRs.  The bulk of the mess comes
> > > from the need to allow userspace to write '0' when KVM enumerated supported to
> > > userspace.
> > 
> > Let me put it this way - all hacks start like that, and in this case this is API/ABI hack
> > so we will have to live with it forever.
> 
> Eh, I don't view it as a hack, at least the kind of hack that has a negative
> connotation.  KVM effectively has ~240 MSR indices reserved for whatever KVM
> wants. 
This is exactly the problem. These indices are reserved for PV features, not
for fake msrs, and my fear is that once we mix it up, it will be a mess.

If that was not API/ABI, I wouldn't complain, but since this is API/ABI, I'm afraid
to make a mistake and then be sorry.


>  The only weird thing about this one is that it's not accessible from the
> guest.  Which I agree is quite weird, but from a code perspective I think it
> works quite well.
> 
> > Once there is a precedent, trust me there will be 10s of new 'fake' msrs added, and the
> > interface will become one big mess.
> 
> That suggests MSRs aren't already one big mess. :-)  I'm somewhat joking, but also
> somewhat serious.  I really don't think that adding one oddball synthetic MSR is
> going to meaningfully move the needle on the messiness of MSRs.
> 
> Hmm, there probably is a valid slippery slope argument though.  As you say, at
> some point, enough state will get shoved into hardware that KVM will need an ever
> growing number of synthetic MSRs to keep pace.

Yes, exactly what I mean - Honestly though I don't expect many new x86 registers/states
that are not msrs, but we don't know what x86 designers will do next,
and APIs are something that can't be fixed later.

> 
> > As I suggested, if you don't want to add new capability/ioctl and vendor
> > callback per new x86 arch register, then let's implement
> > KVM_GET_ONE_REG/KVM_SET_ONE_REG and then it will be really easy to add new
> > regs without confusing users, and without polluting msr namespace with msrs
> > that don't exist.
> 
> I definitely don't hate the idea of KVM_{G,S}ET_ONE_REG, what I don't want is to
> have an entirely separate path in KVM for handling the actual get/set.
> 
> What if we combine the approaches?  Add KVM_{G,S}ET_ONE_REG support so that the
> uAPI can use completely arbitrary register indices without having to worry about
> polluting the MSR space and making MSR_KVM_SSP ABI.
Sounds like a reasonable idea but might be overkill.

> 
> Ooh, if we're clever, I bet we can extend KVM_{G,S}ET_ONE_REG to also work with
> existing MSRs, GPRs, and other stuff,

Not sure if we want to make it work with MSRs. MSRs are a very well defined thing
on x86, and we already have an API to read/write them. Other registers maybe,
don't know.

>  i.e. not force userspace through the funky
> KVM_SET_MSRS just to set one reg, and not force a RMW of all GPRs just to set
> RIP or something.
Setting one GPR like RIP does sound like a valid use case of KVM_SET_ONE_REG.

>   E.g. use bits 39:32 of the id to encode the register class,
> bits 31:0 to hold the index within a class, and reserve bits 63:40 for future
> usage.
> 
> Then for KVM-defined registers, we can route them internally as needed, e.g. we
> can still define MSR_KVM_SSP so that internal it's treated like an MSR, but its
> index isn't ABI and so can be changed at will.  And future KVM-defined registers
> wouldn't _need_ to be treated like MSRs, i.e. we could route registers through
> the MSR APIs if and only if it makes sense to do so.

I am not sure that even internally I'll treat MSR_KVM_SSP as MSR. 
An MSR IMHO is a msr, a register is a register, mixing this up will
just add to the confusion.


Honestly if I were to add support for the SSP register, I'll just add a new
ioctl/capability and vendor callback. All of this code is just harmless
boilerplate code.
Even using KVM_GET_ONE_REG/KVM_SET_ONE_REG is probably overkill, although using
it for new registers is reasonable.

At the end I am not going to argue much about this - I just voiced my option that currently
MSR read/write interface is pure in regard that it only works on either real msrs or
at least PV msrs that the guest can read/write. 

All other guest's state is set via separate ioctls/callbacks/etc, and thus it's more consistent
from API POV to add SSP here.


> 
> Side topic, why on earth is the data value of kvm_one_reg "addr"?

I don't know, probably something ARM related.

> 


Best regards,
	Maxim Levitsky
Sean Christopherson Nov. 7, 2023, 6:39 p.m. UTC | #8
On Tue, Nov 07, 2023, Maxim Levitsky wrote:
> On Thu, 2023-11-02 at 16:58 -0700, Sean Christopherson wrote:
> > Ooh, if we're clever, I bet we can extend KVM_{G,S}ET_ONE_REG to also work with
> > existing MSRs, GPRs, and other stuff,
> 
> Not sure if we want to make it work with MSRs. MSRs are a very well defined thing
> on x86, and we already have an API to read/write them.

Yeah, the API is weird though :-)

> Other registers maybe, don't know.
> 
> >  i.e. not force userspace through the funky
> > KVM_SET_MSRS just to set one reg, and not force a RMW of all GPRs just to set
> > RIP or something.
> Setting one GPR like RIP does sound like a valid use case of KVM_SET_ONE_REG.
> 
> >   E.g. use bits 39:32 of the id to encode the register class,
> > bits 31:0 to hold the index within a class, and reserve bits 63:40 for future
> > usage.
> > 
> > Then for KVM-defined registers, we can route them internally as needed, e.g. we
> > can still define MSR_KVM_SSP so that internal it's treated like an MSR, but its
> > index isn't ABI and so can be changed at will.  And future KVM-defined registers
> > wouldn't _need_ to be treated like MSRs, i.e. we could route registers through
> > the MSR APIs if and only if it makes sense to do so.
> 
> I am not sure that even internally I'll treat MSR_KVM_SSP as MSR. 
> An MSR IMHO is a msr, a register is a register, mixing this up will
> just add to the confusion.

I disagree, things like MSR_{FS,GS}_BASE already set the precedent that MSRs and
registers can be separate viewpoints to the same internal CPU state.  AIUI, these
days, whether a register is exposed via an MSR or dedicated ISA largely comes
down to CPL restrictions and performance.

> Honestly if I were to add support for the SSP register, I'll just add a new
> ioctl/capability and vendor callback. All of this code is just harmless
> boilerplate code.

We've had far too many bugs and confusion over error handling for things like
checking "is this value legal" to be considered harmless boilerplate code.

> Even using KVM_GET_ONE_REG/KVM_SET_ONE_REG is probably overkill, although using
> it for new registers is reasonable.

Maybe, but if we're going to bother adding new ioctls() for x86, I don't see any
benefit to reinventing a wheel that's only good for one thing.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fd5893b3a2c8..9f4b56337251 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2111,6 +2111,15 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
 		break;
+	case MSR_IA32_S_CET:
+		msr_info->data = vmcs_readl(GUEST_S_CET);
+		break;
+	case MSR_KVM_SSP:
+		msr_info->data = vmcs_readl(GUEST_SSP);
+		break;
+	case MSR_IA32_INT_SSP_TAB:
+		msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
+		break;
 	case MSR_IA32_DEBUGCTLMSR:
 		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
 		break;
@@ -2420,6 +2429,15 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			vmx->pt_desc.guest.addr_a[index / 2] = data;
 		break;
+	case MSR_IA32_S_CET:
+		vmcs_writel(GUEST_S_CET, data);
+		break;
+	case MSR_KVM_SSP:
+		vmcs_writel(GUEST_SSP, data);
+		break;
+	case MSR_IA32_INT_SSP_TAB:
+		vmcs_writel(GUEST_INTR_SSP_TABLE, data);
+		break;
 	case MSR_IA32_PERF_CAPABILITIES:
 		if (data && !vcpu_to_pmu(vcpu)->version)
 			return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 73b45351c0fc..c85ee42ab4f1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1847,6 +1847,11 @@  bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
 }
 EXPORT_SYMBOL_GPL(kvm_msr_allowed);
 
+#define CET_US_RESERVED_BITS		GENMASK(9, 6)
+#define CET_US_SHSTK_MASK_BITS		GENMASK(1, 0)
+#define CET_US_IBT_MASK_BITS		(GENMASK_ULL(5, 2) | GENMASK_ULL(63, 10))
+#define CET_US_LEGACY_BITMAP_BASE(data)	((data) >> 12)
+
 /*
  * Write @data into the MSR specified by @index.  Select MSR specific fault
  * checks are bypassed if @host_initiated is %true.
@@ -1856,6 +1861,7 @@  EXPORT_SYMBOL_GPL(kvm_msr_allowed);
 static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 			 bool host_initiated)
 {
+	bool host_msr_reset = host_initiated && data == 0;
 	struct msr_data msr;
 
 	switch (index) {
@@ -1906,6 +1912,46 @@  static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 
 		data = (u32)data;
 		break;
+	case MSR_IA32_U_CET:
+	case MSR_IA32_S_CET:
+		if (host_msr_reset && (kvm_cpu_cap_has(X86_FEATURE_SHSTK) ||
+				       kvm_cpu_cap_has(X86_FEATURE_IBT)))
+			break;
+		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
+		    !guest_can_use(vcpu, X86_FEATURE_IBT))
+			return 1;
+		if (data & CET_US_RESERVED_BITS)
+			return 1;
+		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
+		    (data & CET_US_SHSTK_MASK_BITS))
+			return 1;
+		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
+		    (data & CET_US_IBT_MASK_BITS))
+			return 1;
+		if (!IS_ALIGNED(CET_US_LEGACY_BITMAP_BASE(data), 4))
+			return 1;
+
+		/* IBT can be suppressed iff the TRACKER isn't WAIT_ENDBR. */
+		if ((data & CET_SUPPRESS) && (data & CET_WAIT_ENDBR))
+			return 1;
+		break;
+	case MSR_IA32_INT_SSP_TAB:
+		if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+			return 1;
+		fallthrough;
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+	case MSR_KVM_SSP:
+		if (host_msr_reset && kvm_cpu_cap_has(X86_FEATURE_SHSTK))
+			break;
+		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
+			return 1;
+		if (index == MSR_KVM_SSP && !host_initiated)
+			return 1;
+		if (is_noncanonical_address(data, vcpu))
+			return 1;
+		if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
+			return 1;
+		break;
 	}
 
 	msr.data = data;
@@ -1949,6 +1995,23 @@  static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 		break;
+	case MSR_IA32_U_CET:
+	case MSR_IA32_S_CET:
+		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
+		    !guest_can_use(vcpu, X86_FEATURE_SHSTK))
+			return 1;
+		break;
+	case MSR_IA32_INT_SSP_TAB:
+		if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+			return 1;
+		fallthrough;
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+	case MSR_KVM_SSP:
+		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
+			return 1;
+		if (index == MSR_KVM_SSP && !host_initiated)
+			return 1;
+		break;
 	}
 
 	msr.index = index;
@@ -4009,6 +4072,10 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vcpu->arch.guest_fpu.xfd_err = data;
 		break;
 #endif
+	case MSR_IA32_U_CET:
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+		kvm_set_xstate_msr(vcpu, msr_info);
+		break;
 	default:
 		if (kvm_pmu_is_valid_msr(vcpu, msr))
 			return kvm_pmu_set_msr(vcpu, msr_info);
@@ -4365,6 +4432,10 @@  int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = vcpu->arch.guest_fpu.xfd_err;
 		break;
 #endif
+	case MSR_IA32_U_CET:
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+		kvm_get_xstate_msr(vcpu, msr_info);
+		break;
 	default:
 		if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
 			return kvm_pmu_get_msr(vcpu, msr_info);