diff mbox series

[v8,7/7] KVM: X86: Add user-space access interface for CET MSRs

Message ID 20191101085222.27997-8-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce support for guest CET feature | expand

Commit Message

Yang, Weijiang Nov. 1, 2019, 8:52 a.m. UTC
There're two different places storing Guest CET states, states
managed with XSAVES/XRSTORS, as restored/saved
in previous patch, can be read/write directly from/to the MSRs.
For those stored in VMCS fields, they're access via vmcs_read/
vmcs_write.

Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/kvm/vmx/vmx.c | 140 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c     |   3 +
 2 files changed, 143 insertions(+)

Comments

Sean Christopherson Dec. 10, 2019, 9:58 p.m. UTC | #1
On Fri, Nov 01, 2019 at 04:52:22PM +0800, Yang Weijiang wrote:
> There're two different places storing Guest CET states, states
> managed with XSAVES/XRSTORS, as restored/saved
> in previous patch, can be read/write directly from/to the MSRs.
> For those stored in VMCS fields, they're access via vmcs_read/
> vmcs_write.
> 
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> ---
>  arch/x86/kvm/vmx/vmx.c | 140 +++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/x86.c     |   3 +
>  2 files changed, 143 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index bfb1b922a9ac..71aba264b5d2 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1671,6 +1671,98 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
>  	return 0;
>  }
>  
> +#define CET_MSR_RSVD_BITS_1    0x3
> +#define CET_MSR_RSVD_BITS_2   (0xF << 6)
> +
> +static bool cet_msr_write_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> +{
> +	u32 index = msr->index;
> +	u64 data = msr->data;
> +	u32 high_word = data >> 32;
> +
> +	if ((index == MSR_IA32_U_CET || index == MSR_IA32_S_CET) &&
> +	    (data & CET_MSR_RSVD_BITS_2))
> +		return false;
> +
> +	if (is_64_bit_mode(vcpu)) {
> +		if (is_noncanonical_address(data & PAGE_MASK, vcpu))

I don't think this is correct.  MSRs that contain an address usually only
fault on a non-canonical value and do the non-canonical check regardless
of mode.  E.g. VM-Enter's consistency checks on SYSENTER_E{I,S}P only care
about a canonical address and are not dependent on mode, and SYSENTER
itself states that bits 63:32 are ignored in 32-bit mode.  I assume the
same is true here.

If that is indeed the case, what about adding these to the common canonical
check in __kvm_set_msr()?  That'd cut down on the boilerplate here and
might make it easier to audit KVM's canonical checks.

> +			return false;
> +		else if ((index == MSR_IA32_PL0_SSP ||
> +			  index == MSR_IA32_PL1_SSP ||
> +			  index == MSR_IA32_PL2_SSP ||
> +			  index == MSR_IA32_PL3_SSP) &&
> +			  (data & CET_MSR_RSVD_BITS_1))
> +			return false;
> +	} else {
> +		if (msr->index == MSR_IA32_INT_SSP_TAB)
> +			return false;
> +		else if ((index == MSR_IA32_U_CET ||
> +			  index == MSR_IA32_S_CET ||
> +			  index == MSR_IA32_PL0_SSP ||
> +			  index == MSR_IA32_PL1_SSP ||
> +			  index == MSR_IA32_PL2_SSP ||
> +			  index == MSR_IA32_PL3_SSP) &&
> +			  (high_word & ~0ul))
> +			return false;
> +	}
> +
> +	return true;
> +}

This helper seems like overkill, e.g. it's filled with index-specific
checks, but is called from code that has already switched on the index.
Open coding the individual checks is likely more readable and would require
less code, especially if the canonical checks are cleaned up.

> +
> +static bool cet_msr_access_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> +{
> +	u64 kvm_xss;
> +	u32 index = msr->index;
> +
> +	if (is_guest_mode(vcpu))
> +		return false;

I may have missed this in an earlier discussion, does CET not support
nesting?

> +
> +	kvm_xss = kvm_supported_xss();
> +
> +	switch (index) {
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		if (!boot_cpu_has(X86_FEATURE_SHSTK))
> +			return false;
> +		if (!msr->host_initiated) {
> +			if (!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
> +				return false;
> +		} else {

This looks wrong, WRMSR from the guest only checks CPUID, it doesn't check
kvm_xss.

> +			if (index == MSR_IA32_PL3_SSP) {
> +				if (!(kvm_xss & XFEATURE_MASK_CET_USER))
> +					return false;
> +			} else {
> +				if (!(kvm_xss & XFEATURE_MASK_CET_KERNEL))
> +					return false;
> +			}
> +		}
> +		break;
> +	case MSR_IA32_U_CET:
> +	case MSR_IA32_S_CET:

Rather than bundle everything in a single access_allowed() helper, it might
be easier to have separate helpers for each class of MSR.   Except for the
guest_mode() check, there's no overlap between the classes.

> +		if (!boot_cpu_has(X86_FEATURE_SHSTK) &&
> +		    !boot_cpu_has(X86_FEATURE_IBT))
> +			return false;
> +
> +		if (!msr->host_initiated) {
> +			if (!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) &&
> +			    !guest_cpuid_has(vcpu, X86_FEATURE_IBT))
> +				return false;
> +		} else if (index == MSR_IA32_U_CET &&
> +			   !(kvm_xss & XFEATURE_MASK_CET_USER))

Same comment about guest not checking kvm_xss.

> +			return false;
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		if (!boot_cpu_has(X86_FEATURE_SHSTK))
> +			return false;
> +
> +		if (!msr->host_initiated &&
> +		    !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
> +			return false;
> +		break;
> +	default:
> +		return false;
> +	}
> +	return true;
> +}
>  /*
>   * Reads an msr value (of 'msr_index') into 'pdata'.
>   * Returns 0 on success, non-0 otherwise.
> @@ -1788,6 +1880,26 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		else
>  			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
>  		break;
> +	case MSR_IA32_S_CET:
> +		if (!cet_msr_access_allowed(vcpu, msr_info))
> +			return 1;
> +		msr_info->data = vmcs_readl(GUEST_S_CET);
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		if (!cet_msr_access_allowed(vcpu, msr_info))
> +			return 1;
> +		msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
> +		break;
> +	case MSR_IA32_U_CET:
> +		if (!cet_msr_access_allowed(vcpu, msr_info))
> +			return 1;
> +		rdmsrl(MSR_IA32_U_CET, msr_info->data);
> +		break;
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		if (!cet_msr_access_allowed(vcpu, msr_info))
> +			return 1;
> +		rdmsrl(msr_info->index, msr_info->data);
> +		break;
>  	case MSR_TSC_AUX:
>  		if (!msr_info->host_initiated &&
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> @@ -2039,6 +2151,34 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  		else
>  			vmx->pt_desc.guest.addr_a[index / 2] = data;
>  		break;
> +	case MSR_IA32_S_CET:
> +		if (!cet_msr_access_allowed(vcpu, msr_info))
> +			return 1;
> +		if (!cet_msr_write_allowed(vcpu, msr_info))
> +			return 1;
> +		vmcs_writel(GUEST_S_CET, data);
> +		break;
> +	case MSR_IA32_INT_SSP_TAB:
> +		if (!cet_msr_access_allowed(vcpu, msr_info))
> +			return 1;
> +		if (!cet_msr_write_allowed(vcpu, msr_info))
> +			return 1;
> +		vmcs_writel(GUEST_INTR_SSP_TABLE, data);
> +		break;
> +	case MSR_IA32_U_CET:
> +		if (!cet_msr_access_allowed(vcpu, msr_info))
> +			return 1;
> +		if (!cet_msr_write_allowed(vcpu, msr_info))
> +			return 1;
> +		wrmsrl(MSR_IA32_U_CET, data);
> +		break;
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		if (!cet_msr_access_allowed(vcpu, msr_info))
> +			return 1;
> +		if (!cet_msr_write_allowed(vcpu, msr_info))
> +			return 1;
> +		wrmsrl(msr_info->index, data);
> +		break;
>  	case MSR_TSC_AUX:
>  		if (!msr_info->host_initiated &&
>  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6275a75d5802..1bbe4550da90 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1143,6 +1143,9 @@ static u32 msrs_to_save[] = {
>  	MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
>  	MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
>  	MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
> +	MSR_IA32_XSS, MSR_IA32_U_CET, MSR_IA32_S_CET,
> +	MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
> +	MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
>  };
>  
>  static unsigned num_msrs_to_save;
> -- 
> 2.17.2
>
Yang, Weijiang Dec. 11, 2019, 2:19 a.m. UTC | #2
On Tue, Dec 10, 2019 at 01:58:59PM -0800, Sean Christopherson wrote:
> On Fri, Nov 01, 2019 at 04:52:22PM +0800, Yang Weijiang wrote:
> > There're two different places storing Guest CET states, states
> > managed with XSAVES/XRSTORS, as restored/saved
> > in previous patch, can be read/write directly from/to the MSRs.
> > For those stored in VMCS fields, they're access via vmcs_read/
> > vmcs_write.
> > 
> >  
> > +#define CET_MSR_RSVD_BITS_1    0x3
> > +#define CET_MSR_RSVD_BITS_2   (0xF << 6)
> > +
> > +static bool cet_msr_write_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> > +{
> > +	u32 index = msr->index;
> > +	u64 data = msr->data;
> > +	u32 high_word = data >> 32;
> > +
> > +	if ((index == MSR_IA32_U_CET || index == MSR_IA32_S_CET) &&
> > +	    (data & CET_MSR_RSVD_BITS_2))
> > +		return false;
> > +
> > +	if (is_64_bit_mode(vcpu)) {
> > +		if (is_noncanonical_address(data & PAGE_MASK, vcpu))
> 
> I don't think this is correct.  MSRs that contain an address usually only
> fault on a non-canonical value and do the non-canonical check regardless
> of mode.  E.g. VM-Enter's consistency checks on SYSENTER_E{I,S}P only care
> about a canonical address and are not dependent on mode, and SYSENTER
> itself states that bits 63:32 are ignored in 32-bit mode.  I assume the
> same is true here.
The spec. reads like this:  Must be machine canonical when written on
parts that support 64 bit mode. On parts that do not support 64 bit mode, the bits 63:32 are
reserved and must be 0.  

> If that is indeed the case, what about adding these to the common canonical
> check in __kvm_set_msr()?  That'd cut down on the boilerplate here and
> might make it easier to audit KVM's canonical checks.
> 
> > +			return false;
> > +		else if ((index == MSR_IA32_PL0_SSP ||
> > +			  index == MSR_IA32_PL1_SSP ||
> > +			  index == MSR_IA32_PL2_SSP ||
> > +			  index == MSR_IA32_PL3_SSP) &&
> > +			  (data & CET_MSR_RSVD_BITS_1))
> > +			return false;
> > +	} else {
> > +		if (msr->index == MSR_IA32_INT_SSP_TAB)
> > +			return false;
> > +		else if ((index == MSR_IA32_U_CET ||
> > +			  index == MSR_IA32_S_CET ||
> > +			  index == MSR_IA32_PL0_SSP ||
> > +			  index == MSR_IA32_PL1_SSP ||
> > +			  index == MSR_IA32_PL2_SSP ||
> > +			  index == MSR_IA32_PL3_SSP) &&
> > +			  (high_word & ~0ul))
> > +			return false;
> > +	}
> > +
> > +	return true;
> > +}
> 
> This helper seems like overkill, e.g. it's filled with index-specific
> checks, but is called from code that has already switched on the index.
> Open coding the individual checks is likely more readable and would require
> less code, especially if the canonical checks are cleaned up.
>
I'm afraid if the checks are not wrapped in a helper, there're many
repeat checking-code, that's why I'm using a wrapper.

> > +
> > +static bool cet_msr_access_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> > +{
> > +	u64 kvm_xss;
> > +	u32 index = msr->index;
> > +
> > +	if (is_guest_mode(vcpu))
> > +		return false;
> 
> I may have missed this in an earlier discussion, does CET not support
> nesting?
>
I don't want to make CET avaible to nested guest at time being, first to
make it available to L1 guest first. So I need to avoid exposing any CET
CPUID/MSRs to a nested guest.

> > +
> > +	kvm_xss = kvm_supported_xss();
> > +
> > +	switch (index) {
> > +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > +		if (!boot_cpu_has(X86_FEATURE_SHSTK))
> > +			return false;
> > +		if (!msr->host_initiated) {
> > +			if (!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
> > +				return false;
> > +		} else {
> 
> This looks wrong, WRMSR from the guest only checks CPUID, it doesn't check
> kvm_xss.
> 
OOPs, I need to add the check, thank you!

> > +			if (index == MSR_IA32_PL3_SSP) {
> > +				if (!(kvm_xss & XFEATURE_MASK_CET_USER))
> > +					return false;
> > +			} else {
> > +				if (!(kvm_xss & XFEATURE_MASK_CET_KERNEL))
> > +					return false;
> > +			}
> > +		}
> > +		break;
> > +	case MSR_IA32_U_CET:
> > +	case MSR_IA32_S_CET:
> 
> Rather than bundle everything in a single access_allowed() helper, it might
> be easier to have separate helpers for each class of MSR.   Except for the
> guest_mode() check, there's no overlap between the classes.
>
Sure, let me double check the code.

> > +		if (!boot_cpu_has(X86_FEATURE_SHSTK) &&
> > +		    !boot_cpu_has(X86_FEATURE_IBT))
> > +			return false;
> > +
> > +		if (!msr->host_initiated) {
> > +			if (!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) &&
> > +			    !guest_cpuid_has(vcpu, X86_FEATURE_IBT))
> > +				return false;
> > +		} else if (index == MSR_IA32_U_CET &&
> > +			   !(kvm_xss & XFEATURE_MASK_CET_USER))
> 
> Same comment about guest not checking kvm_xss.
>
OK.

> > +			return false;
> > +		break;
> > +	case MSR_IA32_INT_SSP_TAB:
> > +		if (!boot_cpu_has(X86_FEATURE_SHSTK))
> > +			return false;
> > +
> > +		if (!msr->host_initiated &&
> > +		    !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
> > +			return false;
> > +		break;
> > +	default:
> > +		return false;
> > +	}
> > +	return true;
> > +}
> >  /*
> >   * Reads an msr value (of 'msr_index') into 'pdata'.
> >   * Returns 0 on success, non-0 otherwise.
> > @@ -1788,6 +1880,26 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  		else
> >  			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
> >  		break;
> > +	case MSR_IA32_S_CET:
> > +		if (!cet_msr_access_allowed(vcpu, msr_info))
> > +			return 1;
> > +		msr_info->data = vmcs_readl(GUEST_S_CET);
> > +		break;
> > +	case MSR_IA32_INT_SSP_TAB:
> > +		if (!cet_msr_access_allowed(vcpu, msr_info))
> > +			return 1;
> > +		msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
> > +		break;
> > +	case MSR_IA32_U_CET:
> > +		if (!cet_msr_access_allowed(vcpu, msr_info))
> > +			return 1;
> > +		rdmsrl(MSR_IA32_U_CET, msr_info->data);
> > +		break;
> > +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > +		if (!cet_msr_access_allowed(vcpu, msr_info))
> > +			return 1;
> > +		rdmsrl(msr_info->index, msr_info->data);
> > +		break;
> >  	case MSR_TSC_AUX:
> >  		if (!msr_info->host_initiated &&
> >  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> > @@ -2039,6 +2151,34 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >  		else
> >  			vmx->pt_desc.guest.addr_a[index / 2] = data;
> >  		break;
> > +	case MSR_IA32_S_CET:
> > +		if (!cet_msr_access_allowed(vcpu, msr_info))
> > +			return 1;
> > +		if (!cet_msr_write_allowed(vcpu, msr_info))
> > +			return 1;
> > +		vmcs_writel(GUEST_S_CET, data);
> > +		break;
> > +	case MSR_IA32_INT_SSP_TAB:
> > +		if (!cet_msr_access_allowed(vcpu, msr_info))
> > +			return 1;
> > +		if (!cet_msr_write_allowed(vcpu, msr_info))
> > +			return 1;
> > +		vmcs_writel(GUEST_INTR_SSP_TABLE, data);
> > +		break;
> > +	case MSR_IA32_U_CET:
> > +		if (!cet_msr_access_allowed(vcpu, msr_info))
> > +			return 1;
> > +		if (!cet_msr_write_allowed(vcpu, msr_info))
> > +			return 1;
> > +		wrmsrl(MSR_IA32_U_CET, data);
> > +		break;
> > +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> > +		if (!cet_msr_access_allowed(vcpu, msr_info))
> > +			return 1;
> > +		if (!cet_msr_write_allowed(vcpu, msr_info))
> > +			return 1;
> > +		wrmsrl(msr_info->index, data);
> > +		break;
> >  	case MSR_TSC_AUX:
> >  		if (!msr_info->host_initiated &&
> >  		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 6275a75d5802..1bbe4550da90 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1143,6 +1143,9 @@ static u32 msrs_to_save[] = {
> >  	MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
> >  	MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
> >  	MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
> > +	MSR_IA32_XSS, MSR_IA32_U_CET, MSR_IA32_S_CET,
> > +	MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
> > +	MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
> >  };
> >  
> >  static unsigned num_msrs_to_save;
> > -- 
> > 2.17.2
> >
Sean Christopherson Dec. 11, 2019, 4:27 p.m. UTC | #3
On Wed, Dec 11, 2019 at 10:19:51AM +0800, Yang Weijiang wrote:
> On Tue, Dec 10, 2019 at 01:58:59PM -0800, Sean Christopherson wrote:
> > On Fri, Nov 01, 2019 at 04:52:22PM +0800, Yang Weijiang wrote:
> > > There're two different places storing Guest CET states, states
> > > managed with XSAVES/XRSTORS, as restored/saved
> > > in previous patch, can be read/write directly from/to the MSRs.
> > > For those stored in VMCS fields, they're access via vmcs_read/
> > > vmcs_write.
> > > 
> > >  
> > > +#define CET_MSR_RSVD_BITS_1    0x3
> > > +#define CET_MSR_RSVD_BITS_2   (0xF << 6)
> > > +
> > > +static bool cet_msr_write_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> > > +{
> > > +	u32 index = msr->index;
> > > +	u64 data = msr->data;
> > > +	u32 high_word = data >> 32;
> > > +
> > > +	if ((index == MSR_IA32_U_CET || index == MSR_IA32_S_CET) &&
> > > +	    (data & CET_MSR_RSVD_BITS_2))
> > > +		return false;
> > > +
> > > +	if (is_64_bit_mode(vcpu)) {
> > > +		if (is_noncanonical_address(data & PAGE_MASK, vcpu))
> > 
> > I don't think this is correct.  MSRs that contain an address usually only
> > fault on a non-canonical value and do the non-canonical check regardless
> > of mode.  E.g. VM-Enter's consistency checks on SYSENTER_E{I,S}P only care
> > about a canonical address and are not dependent on mode, and SYSENTER
> > itself states that bits 63:32 are ignored in 32-bit mode.  I assume the
> > same is true here.
> The spec. reads like this:  Must be machine canonical when written on parts
> that support 64 bit mode. On parts that do not support 64 bit mode, the bits
> 63:32 are reserved and must be 0.

Yes, that agrees with me.  The key word is "support", i.e. "on parts that
support 64 bit mode" means "on parts with CPUID.0x80000001.EDX.LM=1."

The reason the architecture works this way is that unless hardware clears
the MSRs on transition from 64->32, bits 63:32 need to be ignored on the
way out instead of being validated on the way in, e.g. software writes a
64-bit value to the MSR and then transitions to 32-bit mode.  Clearing the
MSRs would be painful, slow and error prone, so it's easier for hardware
to simply ignore bits 63:32 in 32-bit mode.

> > If that is indeed the case, what about adding these to the common canonical
> > check in __kvm_set_msr()?  That'd cut down on the boilerplate here and
> > might make it easier to audit KVM's canonical checks.
> > 
> > > +			return false;
> > > +		else if ((index == MSR_IA32_PL0_SSP ||
> > > +			  index == MSR_IA32_PL1_SSP ||
> > > +			  index == MSR_IA32_PL2_SSP ||
> > > +			  index == MSR_IA32_PL3_SSP) &&
> > > +			  (data & CET_MSR_RSVD_BITS_1))
> > > +			return false;
> > > +	} else {
> > > +		if (msr->index == MSR_IA32_INT_SSP_TAB)
> > > +			return false;
> > > +		else if ((index == MSR_IA32_U_CET ||
> > > +			  index == MSR_IA32_S_CET ||
> > > +			  index == MSR_IA32_PL0_SSP ||
> > > +			  index == MSR_IA32_PL1_SSP ||
> > > +			  index == MSR_IA32_PL2_SSP ||
> > > +			  index == MSR_IA32_PL3_SSP) &&
> > > +			  (high_word & ~0ul))
> > > +			return false;
> > > +	}
> > > +
> > > +	return true;
> > > +}
> > 
> > This helper seems like overkill, e.g. it's filled with index-specific
> > checks, but is called from code that has already switched on the index.
> > Open coding the individual checks is likely more readable and would require
> > less code, especially if the canonical checks are cleaned up.
> >
> I'm afraid if the checks are not wrapped in a helper, there're many
> repeat checking-code, that's why I'm using a wrapper.

But you're adding almost as much, if not more, code to re-split the checks
in this helper.

> > > +
> > > +static bool cet_msr_access_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> > > +{
> > > +	u64 kvm_xss;
> > > +	u32 index = msr->index;
> > > +
> > > +	if (is_guest_mode(vcpu))
> > > +		return false;
> > 
> > I may have missed this in an earlier discussion, does CET not support
> > nesting?
> >
> I don't want to make CET avaible to nested guest at time being, first to
> make it available to L1 guest first. So I need to avoid exposing any CET
> CPUID/MSRs to a nested guest.
Yang, Weijiang Dec. 12, 2019, 12:42 a.m. UTC | #4
On Wed, Dec 11, 2019 at 08:27:02AM -0800, Sean Christopherson wrote:
> On Wed, Dec 11, 2019 at 10:19:51AM +0800, Yang Weijiang wrote:
> > On Tue, Dec 10, 2019 at 01:58:59PM -0800, Sean Christopherson wrote:
> > > On Fri, Nov 01, 2019 at 04:52:22PM +0800, Yang Weijiang wrote:
> > > > There're two different places storing Guest CET states, states
> > > > managed with XSAVES/XRSTORS, as restored/saved
> > > > in previous patch, can be read/write directly from/to the MSRs.
> > > > For those stored in VMCS fields, they're access via vmcs_read/
> > > > vmcs_write.
> > > > 
> > > >  
> > > > +#define CET_MSR_RSVD_BITS_1    0x3
> > > > +#define CET_MSR_RSVD_BITS_2   (0xF << 6)
> > > > +
> > > > +static bool cet_msr_write_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> > > > +{
> > > > +	u32 index = msr->index;
> > > > +	u64 data = msr->data;
> > > > +	u32 high_word = data >> 32;
> > > > +
> > > > +	if ((index == MSR_IA32_U_CET || index == MSR_IA32_S_CET) &&
> > > > +	    (data & CET_MSR_RSVD_BITS_2))
> > > > +		return false;
> > > > +
> > > > +	if (is_64_bit_mode(vcpu)) {
> > > > +		if (is_noncanonical_address(data & PAGE_MASK, vcpu))
> > > 
> > > I don't think this is correct.  MSRs that contain an address usually only
> > > fault on a non-canonical value and do the non-canonical check regardless
> > > of mode.  E.g. VM-Enter's consistency checks on SYSENTER_E{I,S}P only care
> > > about a canonical address and are not dependent on mode, and SYSENTER
> > > itself states that bits 63:32 are ignored in 32-bit mode.  I assume the
> > > same is true here.
> > The spec. reads like this:  Must be machine canonical when written on parts
> > that support 64 bit mode. On parts that do not support 64 bit mode, the bits
> > 63:32 are reserved and must be 0.
> 
> Yes, that agrees with me.  The key word is "support", i.e. "on parts that
> support 64 bit mode" means "on parts with CPUID.0x80000001.EDX.LM=1."
> 
> The reason the architecture works this way is that unless hardware clears
> the MSRs on transition from 64->32, bits 63:32 need to be ignored on the
> way out instead of being validated on the way in, e.g. software writes a
> 64-bit value to the MSR and then transitions to 32-bit mode.  Clearing the
> MSRs would be painful, slow and error prone, so it's easier for hardware
> to simply ignore bits 63:32 in 32-bit mode.
>
Make sense, I'll move the canonical check up to kvm_set_msr() like other
MSRs, thanks!

> > > If that is indeed the case, what about adding these to the common canonical
> > > check in __kvm_set_msr()?  That'd cut down on the boilerplate here and
> > > might make it easier to audit KVM's canonical checks.
> > > 
> > > > +			return false;
> > > > +		else if ((index == MSR_IA32_PL0_SSP ||
> > > > +			  index == MSR_IA32_PL1_SSP ||
> > > > +			  index == MSR_IA32_PL2_SSP ||
> > > > +			  index == MSR_IA32_PL3_SSP) &&
> > > > +			  (data & CET_MSR_RSVD_BITS_1))
> > > > +			return false;
> > > > +	} else {
> > > > +		if (msr->index == MSR_IA32_INT_SSP_TAB)
> > > > +			return false;
> > > > +		else if ((index == MSR_IA32_U_CET ||
> > > > +			  index == MSR_IA32_S_CET ||
> > > > +			  index == MSR_IA32_PL0_SSP ||
> > > > +			  index == MSR_IA32_PL1_SSP ||
> > > > +			  index == MSR_IA32_PL2_SSP ||
> > > > +			  index == MSR_IA32_PL3_SSP) &&
> > > > +			  (high_word & ~0ul))
> > > > +			return false;
> > > > +	}
> > > > +
> > > > +	return true;
> > > > +}
> > > 
> > > This helper seems like overkill, e.g. it's filled with index-specific
> > > checks, but is called from code that has already switched on the index.
> > > Open coding the individual checks is likely more readable and would require
> > > less code, especially if the canonical checks are cleaned up.
> > >
> > I'm afraid if the checks are not wrapped in a helper, there're many
> > repeat checking-code, that's why I'm using a wrapper.
> 
> But you're adding almost as much, if not more, code to re-split the checks
> in this helper.
>
Sure, thanks!

> > > > +
> > > > +static bool cet_msr_access_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
> > > > +{
> > > > +	u64 kvm_xss;
> > > > +	u32 index = msr->index;
> > > > +
> > > > +	if (is_guest_mode(vcpu))
> > > > +		return false;
> > > 
> > > I may have missed this in an earlier discussion, does CET not support
> > > nesting?
> > >
> > I don't want to make CET avaible to nested guest at time being, first to
> > make it available to L1 guest first. So I need to avoid exposing any CET
> > CPUID/MSRs to a nested guest.
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bfb1b922a9ac..71aba264b5d2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1671,6 +1671,98 @@  static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
 	return 0;
 }
 
+#define CET_MSR_RSVD_BITS_1    0x3
+#define CET_MSR_RSVD_BITS_2   (0xF << 6)
+
+static bool cet_msr_write_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
+{
+	u32 index = msr->index;
+	u64 data = msr->data;
+	u32 high_word = data >> 32;
+
+	if ((index == MSR_IA32_U_CET || index == MSR_IA32_S_CET) &&
+	    (data & CET_MSR_RSVD_BITS_2))
+		return false;
+
+	if (is_64_bit_mode(vcpu)) {
+		if (is_noncanonical_address(data & PAGE_MASK, vcpu))
+			return false;
+		else if ((index == MSR_IA32_PL0_SSP ||
+			  index == MSR_IA32_PL1_SSP ||
+			  index == MSR_IA32_PL2_SSP ||
+			  index == MSR_IA32_PL3_SSP) &&
+			  (data & CET_MSR_RSVD_BITS_1))
+			return false;
+	} else {
+		if (msr->index == MSR_IA32_INT_SSP_TAB)
+			return false;
+		else if ((index == MSR_IA32_U_CET ||
+			  index == MSR_IA32_S_CET ||
+			  index == MSR_IA32_PL0_SSP ||
+			  index == MSR_IA32_PL1_SSP ||
+			  index == MSR_IA32_PL2_SSP ||
+			  index == MSR_IA32_PL3_SSP) &&
+			  (high_word & ~0ul))
+			return false;
+	}
+
+	return true;
+}
+
+static bool cet_msr_access_allowed(struct kvm_vcpu *vcpu, struct msr_data *msr)
+{
+	u64 kvm_xss;
+	u32 index = msr->index;
+
+	if (is_guest_mode(vcpu))
+		return false;
+
+	kvm_xss = kvm_supported_xss();
+
+	switch (index) {
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+		if (!boot_cpu_has(X86_FEATURE_SHSTK))
+			return false;
+		if (!msr->host_initiated) {
+			if (!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
+				return false;
+		} else {
+			if (index == MSR_IA32_PL3_SSP) {
+				if (!(kvm_xss & XFEATURE_MASK_CET_USER))
+					return false;
+			} else {
+				if (!(kvm_xss & XFEATURE_MASK_CET_KERNEL))
+					return false;
+			}
+		}
+		break;
+	case MSR_IA32_U_CET:
+	case MSR_IA32_S_CET:
+		if (!boot_cpu_has(X86_FEATURE_SHSTK) &&
+		    !boot_cpu_has(X86_FEATURE_IBT))
+			return false;
+
+		if (!msr->host_initiated) {
+			if (!guest_cpuid_has(vcpu, X86_FEATURE_SHSTK) &&
+			    !guest_cpuid_has(vcpu, X86_FEATURE_IBT))
+				return false;
+		} else if (index == MSR_IA32_U_CET &&
+			   !(kvm_xss & XFEATURE_MASK_CET_USER))
+			return false;
+		break;
+	case MSR_IA32_INT_SSP_TAB:
+		if (!boot_cpu_has(X86_FEATURE_SHSTK))
+			return false;
+
+		if (!msr->host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_SHSTK))
+			return false;
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -1788,6 +1880,26 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
 		break;
+	case MSR_IA32_S_CET:
+		if (!cet_msr_access_allowed(vcpu, msr_info))
+			return 1;
+		msr_info->data = vmcs_readl(GUEST_S_CET);
+		break;
+	case MSR_IA32_INT_SSP_TAB:
+		if (!cet_msr_access_allowed(vcpu, msr_info))
+			return 1;
+		msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
+		break;
+	case MSR_IA32_U_CET:
+		if (!cet_msr_access_allowed(vcpu, msr_info))
+			return 1;
+		rdmsrl(MSR_IA32_U_CET, msr_info->data);
+		break;
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+		if (!cet_msr_access_allowed(vcpu, msr_info))
+			return 1;
+		rdmsrl(msr_info->index, msr_info->data);
+		break;
 	case MSR_TSC_AUX:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
@@ -2039,6 +2151,34 @@  static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			vmx->pt_desc.guest.addr_a[index / 2] = data;
 		break;
+	case MSR_IA32_S_CET:
+		if (!cet_msr_access_allowed(vcpu, msr_info))
+			return 1;
+		if (!cet_msr_write_allowed(vcpu, msr_info))
+			return 1;
+		vmcs_writel(GUEST_S_CET, data);
+		break;
+	case MSR_IA32_INT_SSP_TAB:
+		if (!cet_msr_access_allowed(vcpu, msr_info))
+			return 1;
+		if (!cet_msr_write_allowed(vcpu, msr_info))
+			return 1;
+		vmcs_writel(GUEST_INTR_SSP_TABLE, data);
+		break;
+	case MSR_IA32_U_CET:
+		if (!cet_msr_access_allowed(vcpu, msr_info))
+			return 1;
+		if (!cet_msr_write_allowed(vcpu, msr_info))
+			return 1;
+		wrmsrl(MSR_IA32_U_CET, data);
+		break;
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+		if (!cet_msr_access_allowed(vcpu, msr_info))
+			return 1;
+		if (!cet_msr_write_allowed(vcpu, msr_info))
+			return 1;
+		wrmsrl(msr_info->index, data);
+		break;
 	case MSR_TSC_AUX:
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6275a75d5802..1bbe4550da90 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1143,6 +1143,9 @@  static u32 msrs_to_save[] = {
 	MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
 	MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
 	MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
+	MSR_IA32_XSS, MSR_IA32_U_CET, MSR_IA32_S_CET,
+	MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
+	MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
 };
 
 static unsigned num_msrs_to_save;