diff mbox series

[RFC,1/2] KVM: CPUID: Enable supervisor XSAVE states in CPUID enumeration and XSS

Message ID 20200211065706.3462-1-weijiang.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series [RFC,1/2] KVM: CPUID: Enable supervisor XSAVE states in CPUID enumeration and XSS | expand

Commit Message

Yang, Weijiang Feb. 11, 2020, 6:57 a.m. UTC
CPUID.(EAX=DH, ECX={i}H i>=0) enumerates XSAVE related leaves/sub-leaves,
but supervisor states are not taken into account. Meanwhile,more and more
new features, e.g., CET, PT, LBR, rely on supervisor states to enhance
performance, so updating related KVM code becomes necessary.

With Aaron Lewis's <aaronlewis@google.com> patches in place, i.e.,
{c90992bfb080, 52297436199d, 864e2ab2b46d, 139a12cfe1a0, 9753d68865c5,
312a1c87798e, 78958563d802, c034f2aa8622, 7204160eb780}, this patch
is to enable suppervisor XSAVE states support in CPUID enumeration and
MSR IA32_XSS. KVM_SUPPORTED_XSS is a static mask for KVM/Guest supervisor
states and guest_supported_xss is a dynamic mask which consolidates
current host IA32_XSS and QEMU configuration together with static mask.

Right now, supervisor states in IA32_XSS haven't been used in upstreamed
KVM code, so set KVM_SUPPORTED_XSS to 0 in the patch, new XSAVES related
features can expand the macro to enable save/restore with XSAVES/XSTORS
instruction.

To test the patch, I first set the KVM_SUPPORTED_XSS to 0x3900 and inject
value to IA32_XSS too, 0x3900 corresponds to the most recent possible
candidate supervisor states on Intel platforms, tested on TGL platform as
results below:

cpuid.[d.0]: eax = 0x000002e7, ebx = 0x00000a88, ecx = 0x00000a88, edx = 0x00000000
cpuid.[d.1]: eax = 0x0000000f, ebx = 0x00000a38, ecx = 0x00003900, edx = 0x00000000
cpuid.[d.2]: eax = 0x00000100, ebx = 0x00000240, ecx = 0x00000000, edx = 0x00000000
cpuid.[d.5]: eax = 0x00000040, ebx = 0x00000440, ecx = 0x00000000, edx = 0x00000000
cpuid.[d.6]: eax = 0x00000200, ebx = 0x00000480, ecx = 0x00000000, edx = 0x00000000
cpuid.[d.7]: eax = 0x00000400, ebx = 0x00000680, ecx = 0x00000000, edx = 0x00000000
cpuid.[d.8]: eax = 0x00000080, ebx = 0x00000000, ecx = 0x00000001, edx = 0x00000000
cpuid.[d.9]: eax = 0x00000008, ebx = 0x00000a80, ecx = 0x00000000, edx = 0x00000000
cpuid.[d.11]: eax = 0x00000010, ebx = 0x00000000, ecx = 0x00000001, edx = 0x00000000
cpuid.[d.12]: eax = 0x00000018, ebx = 0x00000000, ecx = 0x00000001, edx = 0x00000000
cpuid.[d.13]: eax = 0x00000008, ebx = 0x00000000, ecx = 0x00000001, edx = 0x00000000
bit[8] in MSR_IA32_XSS is supported
bit[11] in MSR_IA32_XSS is supported
bit[12] in MSR_IA32_XSS is supported
bit[13] in MSR_IA32_XSS is supported
Supported bit mask in MSR_IA32_XSS is : 0x3900

When IA32_XSS and KVM_SUPPORTED_XSS are 0, got below output:
cpuid.[d.0]: eax = 0x000002e7, ebx = 0x00000a88, ecx = 0x00000a88, edx = 0x00000000
cpuid.[d.1]: eax = 0x0000000f, ebx = 0x00000988, ecx = 0x00000000, edx = 0x00000000
cpuid.[d.2]: eax = 0x00000100, ebx = 0x00000240, ecx = 0x00000000, edx = 0x00000000
cpuid.[d.5]: eax = 0x00000040, ebx = 0x00000440, ecx = 0x00000000, edx = 0x00000000
cpuid.[d.6]: eax = 0x00000200, ebx = 0x00000480, ecx = 0x00000000, edx = 0x00000000
cpuid.[d.7]: eax = 0x00000400, ebx = 0x00000680, ecx = 0x00000000, edx = 0x00000000
cpuid.[d.9]: eax = 0x00000008, ebx = 0x00000a80, ecx = 0x00000000, edx = 0x00000000
Supported bit mask in MSR_IA32_XSS is : 0x0

Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/x86/include/asm/kvm_host.h |   1 +
 arch/x86/kvm/cpuid.c            | 111 ++++++++++++++++++++++----------
 arch/x86/kvm/x86.c              |   4 +-
 arch/x86/kvm/x86.h              |   8 +++
 4 files changed, 87 insertions(+), 37 deletions(-)

Comments

Xiaoyao Li Feb. 17, 2020, 4:26 a.m. UTC | #1
On 2/11/2020 2:57 PM, Yang Weijiang wrote:
> CPUID.(EAX=DH, ECX={i}H i>=0) enumerates XSAVE related leaves/sub-leaves,
> but supervisor states are not taken into account. Meanwhile,more and more
> new features, e.g., CET, PT, LBR, rely on supervisor states to enhance
> performance, so updating related KVM code becomes necessary.
> 
> With Aaron Lewis's <aaronlewis@google.com> patches in place, i.e.,
> {c90992bfb080, 52297436199d, 864e2ab2b46d, 139a12cfe1a0, 9753d68865c5,
> 312a1c87798e, 78958563d802, c034f2aa8622, 7204160eb780}, this patch
> is to enable suppervisor XSAVE states support in CPUID enumeration and
> MSR IA32_XSS. KVM_SUPPORTED_XSS is a static mask for KVM/Guest supervisor
> states and guest_supported_xss is a dynamic mask which consolidates
> current host IA32_XSS and QEMU configuration together with static mask.
> 
> Right now, supervisor states in IA32_XSS haven't been used in upstreamed
> KVM code, so set KVM_SUPPORTED_XSS to 0 in the patch, new XSAVES related
> features can expand the macro to enable save/restore with XSAVES/XSTORS
> instruction.
> 
> To test the patch, I first set the KVM_SUPPORTED_XSS to 0x3900 and inject
> value to IA32_XSS too, 0x3900 corresponds to the most recent possible
> candidate supervisor states on Intel platforms, tested on TGL platform as
> results below:
> 
> cpuid.[d.0]: eax = 0x000002e7, ebx = 0x00000a88, ecx = 0x00000a88, edx = 0x00000000
> cpuid.[d.1]: eax = 0x0000000f, ebx = 0x00000a38, ecx = 0x00003900, edx = 0x00000000
> cpuid.[d.2]: eax = 0x00000100, ebx = 0x00000240, ecx = 0x00000000, edx = 0x00000000
> cpuid.[d.5]: eax = 0x00000040, ebx = 0x00000440, ecx = 0x00000000, edx = 0x00000000
> cpuid.[d.6]: eax = 0x00000200, ebx = 0x00000480, ecx = 0x00000000, edx = 0x00000000
> cpuid.[d.7]: eax = 0x00000400, ebx = 0x00000680, ecx = 0x00000000, edx = 0x00000000
> cpuid.[d.8]: eax = 0x00000080, ebx = 0x00000000, ecx = 0x00000001, edx = 0x00000000
> cpuid.[d.9]: eax = 0x00000008, ebx = 0x00000a80, ecx = 0x00000000, edx = 0x00000000
> cpuid.[d.11]: eax = 0x00000010, ebx = 0x00000000, ecx = 0x00000001, edx = 0x00000000
> cpuid.[d.12]: eax = 0x00000018, ebx = 0x00000000, ecx = 0x00000001, edx = 0x00000000
> cpuid.[d.13]: eax = 0x00000008, ebx = 0x00000000, ecx = 0x00000001, edx = 0x00000000
> bit[8] in MSR_IA32_XSS is supported
> bit[11] in MSR_IA32_XSS is supported
> bit[12] in MSR_IA32_XSS is supported
> bit[13] in MSR_IA32_XSS is supported
> Supported bit mask in MSR_IA32_XSS is : 0x3900
> 
> When IA32_XSS and KVM_SUPPORTED_XSS are 0, got below output:
> cpuid.[d.0]: eax = 0x000002e7, ebx = 0x00000a88, ecx = 0x00000a88, edx = 0x00000000
> cpuid.[d.1]: eax = 0x0000000f, ebx = 0x00000988, ecx = 0x00000000, edx = 0x00000000
> cpuid.[d.2]: eax = 0x00000100, ebx = 0x00000240, ecx = 0x00000000, edx = 0x00000000
> cpuid.[d.5]: eax = 0x00000040, ebx = 0x00000440, ecx = 0x00000000, edx = 0x00000000
> cpuid.[d.6]: eax = 0x00000200, ebx = 0x00000480, ecx = 0x00000000, edx = 0x00000000
> cpuid.[d.7]: eax = 0x00000400, ebx = 0x00000680, ecx = 0x00000000, edx = 0x00000000
> cpuid.[d.9]: eax = 0x00000008, ebx = 0x00000a80, ecx = 0x00000000, edx = 0x00000000
> Supported bit mask in MSR_IA32_XSS is : 0x0
> 
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> ---
>   arch/x86/include/asm/kvm_host.h |   1 +
>   arch/x86/kvm/cpuid.c            | 111 ++++++++++++++++++++++----------
>   arch/x86/kvm/x86.c              |   4 +-
>   arch/x86/kvm/x86.h              |   8 +++
>   4 files changed, 87 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b79cd6aa4075..627284fa4369 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -638,6 +638,7 @@ struct kvm_vcpu_arch {
>   
>   	u64 xcr0;
>   	u64 guest_supported_xcr0;
> +	u64 guest_supported_xss;
>   	u32 guest_xstate_size;
>   
>   	struct kvm_pio_request pio;
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index cfafa320a8cf..9546271d4038 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -62,6 +62,12 @@ u64 kvm_supported_xcr0(void)
>   	return xcr0;
>   }
>   
> +extern int host_xss;
> +u64 kvm_supported_xss(void)
> +{
> +	return KVM_SUPPORTED_XSS & host_xss;
> +}
> +

How about using a global variable, supported_xss, instead of calculating 
the mask on every call. Just like what Sean posted on
https://lore.kernel.org/kvm/20200201185218.24473-21-sean.j.christopherson@intel.com/

>   #define F(x) bit(X86_FEATURE_##x)
>   
>   int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> @@ -112,10 +118,17 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>   		vcpu->arch.guest_xstate_size = best->ebx =
>   			xstate_required_size(vcpu->arch.xcr0, false);
>   	}
> -
>   	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
> -	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
> -		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
> +	if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {
> +		u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
> +
> +		best->ebx = xstate_required_size(xstate, true);
> +		vcpu->arch.guest_supported_xss =
> +			(best->ecx | ((u64)best->edx << 32)) &
> +			kvm_supported_xss();
> +	} else {
> +		vcpu->arch.guest_supported_xss = 0;
> +	}
>   
>   	/*
>   	 * The existing code assumes virtual address is 48-bit or 57-bit in the
> @@ -426,6 +439,56 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
>   	}
>   }
>   
> +static inline bool do_cpuid_0xd_mask(struct kvm_cpuid_entry2 *entry, int index)
> +{
> +	unsigned int f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
> +	/* cpuid 0xD.1.eax */
> +	const u32 kvm_cpuid_D_1_eax_x86_features =
> +		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
> +	u64 u_supported = kvm_supported_xcr0();
> +	u64 s_supported = kvm_supported_xss();
> +	u64 supported;
> +
> +	switch (index) {
> +	case 0:
> +		if (!u_supported) {
> +			entry->eax = 0;
> +			entry->ebx = 0;
> +			entry->ecx = 0;
> +			entry->edx = 0;
> +			return false;
> +		}
> +		entry->eax &= u_supported;
> +		entry->ebx = xstate_required_size(u_supported, false);
> +		entry->ecx = entry->ebx;
> +		entry->edx &= u_supported >> 32;
> +		break;
> +	case 1:
> +		supported = u_supported | s_supported;
> +		entry->eax &= kvm_cpuid_D_1_eax_x86_features;
> +		cpuid_mask(&entry->eax, CPUID_D_1_EAX);
> +		entry->ebx = 0;
> +		entry->edx &= s_supported >> 32;
> +		entry->ecx &= s_supported;

We'd better initialize msr_ia32_xss bitmap (entry->ecx & entry-edx) as 
zeros here.

> +		if (entry->eax & (F(XSAVES) | F(XSAVEC)))
> +			entry->ebx = xstate_required_size(supported, true);

And setup msr_ia32_xss bitmap based on the s_supported within this 
condition when F(XSAVES) is supported.

> +		break;
> +	default:
> +		supported = (entry->ecx & 0x1) ? s_supported : u_supported;
> +		if (!(supported & (BIT_ULL(index)))) {
> +			entry->eax = 0;
> +			entry->ebx = 0;
> +			entry->ecx = 0;
> +			entry->edx = 0;
> +			return false;
> +		}
> +		if (entry->ecx & 0x1)
> +			entry->ebx = 0;
> +		break;
> +	}
> +	return true;
> +}
> +
>   static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
>   				  int *nent, int maxnent)
>   {
> @@ -440,7 +503,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
>   	unsigned f_lm = 0;
>   #endif
>   	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> -	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
>   	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
>   
>   	/* cpuid 1.edx */
> @@ -495,10 +557,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
>   		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
>   		F(PMM) | F(PMM_EN);
>   
> -	/* cpuid 0xD.1.eax */
> -	const u32 kvm_cpuid_D_1_eax_x86_features =
> -		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
> -
>   	/* all calls to cpuid_count() should be made on the same cpu */
>   	get_cpu();
>   
> @@ -639,38 +697,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
>   		break;
>   	}
>   	case 0xd: {
> -		int idx, i;
> -		u64 supported = kvm_supported_xcr0();
> +		int i, idx;
>   
> -		entry->eax &= supported;
> -		entry->ebx = xstate_required_size(supported, false);
> -		entry->ecx = entry->ebx;
> -		entry->edx &= supported >> 32;
> -		if (!supported)
> +		if (!do_cpuid_0xd_mask(&entry[0], 0))
>   			break;
> -
> -		for (idx = 1, i = 1; idx < 64; ++idx) {
> -			u64 mask = ((u64)1 << idx);
> +		for (i = 1, idx = 1; idx < 64; ++idx) {
>   			if (*nent >= maxnent)
>   				goto out;
> -
>   			do_host_cpuid(&entry[i], function, idx);
> -			if (idx == 1) {
> -				entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
> -				cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
> -				entry[i].ebx = 0;
> -				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
> -					entry[i].ebx =
> -						xstate_required_size(supported,
> -								     true);
> -			} else {
> -				if (entry[i].eax == 0 || !(supported & mask))
> -					continue;
> -				if (WARN_ON_ONCE(entry[i].ecx & 1))
> -					continue;
> -			}
> -			entry[i].ecx = 0;
> -			entry[i].edx = 0;
> +
> +			if (entry[i].eax == 0 && entry[i].ebx == 0 &&
> +			    entry[i].ecx == 0 && entry[i].edx == 0)
> +				continue;
> +
> +			if (!do_cpuid_0xd_mask(&entry[i], idx))
> +				continue;
>   			++*nent;
>   			++i;
>   		}
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index cf917139de6b..908a6cdb2151 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -177,7 +177,7 @@ struct kvm_shared_msrs {
>   static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
>   static struct kvm_shared_msrs __percpu *shared_msrs;
>   
> -static u64 __read_mostly host_xss;
> +u64 __read_mostly host_xss;
>   
>   struct kvm_stats_debugfs_item debugfs_entries[] = {
>   	{ "pf_fixed", VCPU_STAT(pf_fixed) },
> @@ -2732,7 +2732,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   		 * RDMSR/WRMSR rather than XSAVES/XRSTORS to save/restore PT
>   		 * MSRs.
>   		 */
> -		if (data != 0)
> +		if (data & ~vcpu->arch.guest_supported_xss)
>   			return 1;
>   		vcpu->arch.ia32_xss = data;
>   		break;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 29391af8871d..9e7725f8bb46 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -296,6 +296,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
>   				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
>   				| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
>   				| XFEATURE_MASK_PKRU)
> +
> +/*
> + * In future, new XSS bits can be ORed here to make them available
> + * to KVM and guest, right now, it's 0, meaning no XSS bits are
> + * supported.
> + */
> +#define KVM_SUPPORTED_XSS 0
> +
>   extern u64 host_xcr0;
>   
>   extern u64 kvm_supported_xcr0(void);
>
Yang, Weijiang Feb. 17, 2020, 1:03 p.m. UTC | #2
On Mon, Feb 17, 2020 at 12:26:51PM +0800, Xiaoyao Li wrote:
> On 2/11/2020 2:57 PM, Yang Weijiang wrote:
> > CPUID.(EAX=DH, ECX={i}H i>=0) enumerates XSAVE related leaves/sub-leaves,
> > +extern int host_xss;
> > +u64 kvm_supported_xss(void)
> > +{
> > +	return KVM_SUPPORTED_XSS & host_xss;
> > +}
> > +
> 
> How about using a global variable, supported_xss, instead of calculating the
> mask on every call. Just like what Sean posted on
> https://lore.kernel.org/kvm/20200201185218.24473-21-sean.j.christopherson@intel.com/
>
Thanks Xiaoyao for the comments!
Good suggestion, I'll change it in next version.

> >   #define F(x) bit(X86_FEATURE_##x)
> >   int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > @@ -112,10 +118,17 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> >   		vcpu->arch.guest_xstate_size = best->ebx =
> >   			xstate_required_size(vcpu->arch.xcr0, false);
> >   	}
> > -
> >   	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
> > -	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
> > -		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
> > +	if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {
> > +		u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
> > +
> > +		best->ebx = xstate_required_size(xstate, true);
> > +		vcpu->arch.guest_supported_xss =
> > +			(best->ecx | ((u64)best->edx << 32)) &
> > +			kvm_supported_xss();
> > +	} else {
> > +		vcpu->arch.guest_supported_xss = 0;
> > +	}
> >   	/*
> >   	 * The existing code assumes virtual address is 48-bit or 57-bit in the
> > @@ -426,6 +439,56 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
> >   	}
> >   }
> > +static inline bool do_cpuid_0xd_mask(struct kvm_cpuid_entry2 *entry, int index)
> > +{
> > +	unsigned int f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
> > +	/* cpuid 0xD.1.eax */
> > +	const u32 kvm_cpuid_D_1_eax_x86_features =
> > +		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
> > +	u64 u_supported = kvm_supported_xcr0();
> > +	u64 s_supported = kvm_supported_xss();
> > +	u64 supported;
> > +
> > +	switch (index) {
> > +	case 0:
> > +		if (!u_supported) {
> > +			entry->eax = 0;
> > +			entry->ebx = 0;
> > +			entry->ecx = 0;
> > +			entry->edx = 0;
> > +			return false;
> > +		}
> > +		entry->eax &= u_supported;
> > +		entry->ebx = xstate_required_size(u_supported, false);
> > +		entry->ecx = entry->ebx;
> > +		entry->edx &= u_supported >> 32;
> > +		break;
> > +	case 1:
> > +		supported = u_supported | s_supported;
> > +		entry->eax &= kvm_cpuid_D_1_eax_x86_features;
> > +		cpuid_mask(&entry->eax, CPUID_D_1_EAX);
> > +		entry->ebx = 0;
> > +		entry->edx &= s_supported >> 32;
> > +		entry->ecx &= s_supported;
> 
> We'd better initialize msr_ia32_xss bitmap (entry->ecx & entry-edx) as zeros
> here.
Hmm, explicit setting the MSR to 0 is good in this case, but there's implied
flow to ensure guest MSR_IA32_XSS will be 0 if entry->ecx and entry->edx are 0s.
In above kvm_update_cpuid(), vcpu->arch.guest_supported_xss is set to 0
when they're 0s. this masks guest cannot set non-zero value to this
MSR. And in kvm_vcpu_reset(), vcpu->arch.ia32_xss is initialized to 0,
in kvm_load_guest_xsave_state() MSR_IA32_XSS is set to ia32_xss,
therefore the MSR is kept to 0.
> 
> > +		if (entry->eax & (F(XSAVES) | F(XSAVEC)))
> > +			entry->ebx = xstate_required_size(supported, true);
> 
> And setup msr_ia32_xss bitmap based on the s_supported within this condition
> when F(XSAVES) is supported.

IIUC, both XSAVEC and XSAVES use compacted format of the extended
region, so if XSAVEC is supported while XSAVES is not, guest still can
get correct size, so in existing code the two bits are ORed.
> 
> > +		break;
> > +	default:
> > +		supported = (entry->ecx & 0x1) ? s_supported : u_supported;
> > +		if (!(supported & (BIT_ULL(index)))) {
> > +			entry->eax = 0;
> > +			entry->ebx = 0;
> > +			entry->ecx = 0;
> > +			entry->edx = 0;
> > +			return false;
> > +		}
> > +		if (entry->ecx & 0x1)
> > +			entry->ebx = 0;
> > +		break;
> > +	}
> > +	return true;
> > +}
> > +
> >   static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> >   				  int *nent, int maxnent)
> >   {
> > @@ -440,7 +503,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> >   	unsigned f_lm = 0;
> >   #endif
> >   	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> > -	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
> >   	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
> >   	/* cpuid 1.edx */
> > @@ -495,10 +557,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> >   		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
> >   		F(PMM) | F(PMM_EN);
> > -	/* cpuid 0xD.1.eax */
> > -	const u32 kvm_cpuid_D_1_eax_x86_features =
> > -		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
> > -
> >   	/* all calls to cpuid_count() should be made on the same cpu */
> >   	get_cpu();
> > @@ -639,38 +697,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> >   		break;
> >   	}
> >   	case 0xd: {
> > -		int idx, i;
> > -		u64 supported = kvm_supported_xcr0();
> > +		int i, idx;
> > -		entry->eax &= supported;
> > -		entry->ebx = xstate_required_size(supported, false);
> > -		entry->ecx = entry->ebx;
> > -		entry->edx &= supported >> 32;
> > -		if (!supported)
> > +		if (!do_cpuid_0xd_mask(&entry[0], 0))
> >   			break;
> > -
> > -		for (idx = 1, i = 1; idx < 64; ++idx) {
> > -			u64 mask = ((u64)1 << idx);
> > +		for (i = 1, idx = 1; idx < 64; ++idx) {
> >   			if (*nent >= maxnent)
> >   				goto out;
> > -
> >   			do_host_cpuid(&entry[i], function, idx);
> > -			if (idx == 1) {
> > -				entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
> > -				cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
> > -				entry[i].ebx = 0;
> > -				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
> > -					entry[i].ebx =
> > -						xstate_required_size(supported,
> > -								     true);
> > -			} else {
> > -				if (entry[i].eax == 0 || !(supported & mask))
> > -					continue;
> > -				if (WARN_ON_ONCE(entry[i].ecx & 1))
> > -					continue;
> > -			}
> > -			entry[i].ecx = 0;
> > -			entry[i].edx = 0;
> > +
> > +			if (entry[i].eax == 0 && entry[i].ebx == 0 &&
> > +			    entry[i].ecx == 0 && entry[i].edx == 0)
> > +				continue;
> > +
> > +			if (!do_cpuid_0xd_mask(&entry[i], idx))
> > +				continue;
> >   			++*nent;
> >   			++i;
> >   		}
 >
Xiaoyao Li Feb. 17, 2020, 1:20 p.m. UTC | #3
On 2/17/2020 9:03 PM, Yang Weijiang wrote:
> On Mon, Feb 17, 2020 at 12:26:51PM +0800, Xiaoyao Li wrote:
>> On 2/11/2020 2:57 PM, Yang Weijiang wrote:
>>> CPUID.(EAX=DH, ECX={i}H i>=0) enumerates XSAVE related leaves/sub-leaves,
>>> +extern int host_xss;
>>> +u64 kvm_supported_xss(void)
>>> +{
>>> +	return KVM_SUPPORTED_XSS & host_xss;
>>> +}
>>> +
>>
>> How about using a global variable, supported_xss, instead of calculating the
>> mask on every call. Just like what Sean posted on
>> https://lore.kernel.org/kvm/20200201185218.24473-21-sean.j.christopherson@intel.com/
>>
> Thanks Xiaoyao for the comments!
> Good suggestion, I'll change it in next version.
> 
>>>    #define F(x) bit(X86_FEATURE_##x)
>>>    int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>>> @@ -112,10 +118,17 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>>>    		vcpu->arch.guest_xstate_size = best->ebx =
>>>    			xstate_required_size(vcpu->arch.xcr0, false);
>>>    	}
>>> -
>>>    	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
>>> -	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
>>> -		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
>>> +	if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {
>>> +		u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
>>> +
>>> +		best->ebx = xstate_required_size(xstate, true);
>>> +		vcpu->arch.guest_supported_xss =
>>> +			(best->ecx | ((u64)best->edx << 32)) &
>>> +			kvm_supported_xss();
>>> +	} else {
>>> +		vcpu->arch.guest_supported_xss = 0;
>>> +	}
>>>    	/*
>>>    	 * The existing code assumes virtual address is 48-bit or 57-bit in the
>>> @@ -426,6 +439,56 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
>>>    	}
>>>    }
>>> +static inline bool do_cpuid_0xd_mask(struct kvm_cpuid_entry2 *entry, int index)
>>> +{
>>> +	unsigned int f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
>>> +	/* cpuid 0xD.1.eax */
>>> +	const u32 kvm_cpuid_D_1_eax_x86_features =
>>> +		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
>>> +	u64 u_supported = kvm_supported_xcr0();
>>> +	u64 s_supported = kvm_supported_xss();
>>> +	u64 supported;
>>> +
>>> +	switch (index) {
>>> +	case 0:
>>> +		if (!u_supported) {
>>> +			entry->eax = 0;
>>> +			entry->ebx = 0;
>>> +			entry->ecx = 0;
>>> +			entry->edx = 0;
>>> +			return false;
>>> +		}
>>> +		entry->eax &= u_supported;
>>> +		entry->ebx = xstate_required_size(u_supported, false);
>>> +		entry->ecx = entry->ebx;
>>> +		entry->edx &= u_supported >> 32;
>>> +		break;
>>> +	case 1:
>>> +		supported = u_supported | s_supported;
>>> +		entry->eax &= kvm_cpuid_D_1_eax_x86_features;
>>> +		cpuid_mask(&entry->eax, CPUID_D_1_EAX);
>>> +		entry->ebx = 0;
>>> +		entry->edx &= s_supported >> 32;
>>> +		entry->ecx &= s_supported;
>>
>> We'd better initialize msr_ia32_xss bitmap (entry->ecx & entry-edx) as zeros
>> here.
> Hmm, explicit setting the MSR to 0 is good in this case, but there's implied
> flow to ensure guest MSR_IA32_XSS will be 0 if entry->ecx and entry->edx are 0s.
> In above kvm_update_cpuid(), vcpu->arch.guest_supported_xss is set to 0
> when they're 0s. this masks guest cannot set non-zero value to this
> MSR. And in kvm_vcpu_reset(), vcpu->arch.ia32_xss is initialized to 0,
> in kvm_load_guest_xsave_state() MSR_IA32_XSS is set to ia32_xss,
> therefore the MSR is kept to 0.

Sorry, I think what I said "msr_ia32_xss bitmap" misled you.

"msr_ia32_xss bitmap" is not MSR_IA32_XSS,
but the (entry->ecx | entry->edx >> 32) of cpuid.D_1

I meant we'd better set entry->ecx and entry->edx to 0 here.

>>
>>> +		if (entry->eax & (F(XSAVES) | F(XSAVEC)))
>>> +			entry->ebx = xstate_required_size(supported, true);
>>
>> And setup msr_ia32_xss bitmap based on the s_supported within this condition
>> when F(XSAVES) is supported.
> 

And set entry->ecx & entry->edx only when F(XSAVES) is supported.

> IIUC, both XSAVEC and XSAVES use compacted format of the extended
> region, so if XSAVEC is supported while XSAVES is not, guest still can
> get correct size, so in existing code the two bits are ORed.

Yeah, but entry->ecx and entry->edx should be non-zero only when 
F(XSAVES) is set.

>>
>>> +		break;
>>> +	default:
>>> +		supported = (entry->ecx & 0x1) ? s_supported : u_supported;
>>> +		if (!(supported & (BIT_ULL(index)))) {
>>> +			entry->eax = 0;
>>> +			entry->ebx = 0;
>>> +			entry->ecx = 0;
>>> +			entry->edx = 0;
>>> +			return false;
>>> +		}
>>> +		if (entry->ecx & 0x1)
>>> +			entry->ebx = 0;
>>> +		break;
>>> +	}
>>> +	return true;
>>> +}
>>> +
>>>    static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
>>>    				  int *nent, int maxnent)
>>>    {
>>> @@ -440,7 +503,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
>>>    	unsigned f_lm = 0;
>>>    #endif
>>>    	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
>>> -	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
>>>    	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
>>>    	/* cpuid 1.edx */
>>> @@ -495,10 +557,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
>>>    		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
>>>    		F(PMM) | F(PMM_EN);
>>> -	/* cpuid 0xD.1.eax */
>>> -	const u32 kvm_cpuid_D_1_eax_x86_features =
>>> -		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
>>> -
>>>    	/* all calls to cpuid_count() should be made on the same cpu */
>>>    	get_cpu();
>>> @@ -639,38 +697,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
>>>    		break;
>>>    	}
>>>    	case 0xd: {
>>> -		int idx, i;
>>> -		u64 supported = kvm_supported_xcr0();
>>> +		int i, idx;
>>> -		entry->eax &= supported;
>>> -		entry->ebx = xstate_required_size(supported, false);
>>> -		entry->ecx = entry->ebx;
>>> -		entry->edx &= supported >> 32;
>>> -		if (!supported)
>>> +		if (!do_cpuid_0xd_mask(&entry[0], 0))
>>>    			break;
>>> -
>>> -		for (idx = 1, i = 1; idx < 64; ++idx) {
>>> -			u64 mask = ((u64)1 << idx);
>>> +		for (i = 1, idx = 1; idx < 64; ++idx) {
>>>    			if (*nent >= maxnent)
>>>    				goto out;
>>> -
>>>    			do_host_cpuid(&entry[i], function, idx);
>>> -			if (idx == 1) {
>>> -				entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
>>> -				cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
>>> -				entry[i].ebx = 0;
>>> -				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
>>> -					entry[i].ebx =
>>> -						xstate_required_size(supported,
>>> -								     true);
>>> -			} else {
>>> -				if (entry[i].eax == 0 || !(supported & mask))
>>> -					continue;
>>> -				if (WARN_ON_ONCE(entry[i].ecx & 1))
>>> -					continue;
>>> -			}
>>> -			entry[i].ecx = 0;
>>> -			entry[i].edx = 0;
>>> +
>>> +			if (entry[i].eax == 0 && entry[i].ebx == 0 &&
>>> +			    entry[i].ecx == 0 && entry[i].edx == 0)
>>> +				continue;
>>> +
>>> +			if (!do_cpuid_0xd_mask(&entry[i], idx))
>>> +				continue;
>>>    			++*nent;
>>>    			++i;
>>>    		}
>   >
>
Xiaoyao Li Feb. 18, 2020, 5:44 a.m. UTC | #4
On 2/17/2020 9:20 PM, Xiaoyao Li wrote:
> On 2/17/2020 9:03 PM, Yang Weijiang wrote:
>> On Mon, Feb 17, 2020 at 12:26:51PM +0800, Xiaoyao Li wrote:
>>> On 2/11/2020 2:57 PM, Yang Weijiang wrote:
>>>> CPUID.(EAX=DH, ECX={i}H i>=0) enumerates XSAVE related 
>>>> leaves/sub-leaves,
>>>> +extern int host_xss;
>>>> +u64 kvm_supported_xss(void)
>>>> +{
>>>> +    return KVM_SUPPORTED_XSS & host_xss;
>>>> +}
>>>> +
>>>
>>> How about using a global variable, supported_xss, instead of 
>>> calculating the
>>> mask on every call. Just like what Sean posted on
>>> https://lore.kernel.org/kvm/20200201185218.24473-21-sean.j.christopherson@intel.com/ 
>>>
>>>
>> Thanks Xiaoyao for the comments!
>> Good suggestion, I'll change it in next version.
>>
>>>>    #define F(x) bit(X86_FEATURE_##x)
>>>>    int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>>>> @@ -112,10 +118,17 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>>>>            vcpu->arch.guest_xstate_size = best->ebx =
>>>>                xstate_required_size(vcpu->arch.xcr0, false);
>>>>        }
>>>> -
>>>>        best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
>>>> -    if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
>>>> -        best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
>>>> +    if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {
>>>> +        u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
>>>> +
>>>> +        best->ebx = xstate_required_size(xstate, true);
>>>> +        vcpu->arch.guest_supported_xss =
>>>> +            (best->ecx | ((u64)best->edx << 32)) &
>>>> +            kvm_supported_xss();
>>>> +    } else {
>>>> +        vcpu->arch.guest_supported_xss = 0;
>>>> +    }

also here should be something like below:

if (best) {
	if (best->eax & (F(XSAVES) | F(XSAVEC)))) {
		u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
		best->ebx = xstate_required_size(xstate, true);
	}

	if (best->eax & F(XSAVES) {
		vcpu->arch.guest_supported_xss =
			(best->ecx | ((u64)best->edx << 32)) &
			kvm_supported_xss();
	} else {
		best->ecx = 0;
		best->edx = 0;
		vcpu->arch.guest_supported_xss = 0;
	}
}

>>>>        /*
>>>>         * The existing code assumes virtual address is 48-bit or 
>>>> 57-bit in the
>>>> @@ -426,6 +439,56 @@ static inline void do_cpuid_7_mask(struct 
>>>> kvm_cpuid_entry2 *entry, int index)
>>>>        }
>>>>    }
>>>> +static inline bool do_cpuid_0xd_mask(struct kvm_cpuid_entry2 
>>>> *entry, int index)
>>>> +{
>>>> +    unsigned int f_xsaves = kvm_x86_ops->xsaves_supported() ? 
>>>> F(XSAVES) : 0;
>>>> +    /* cpuid 0xD.1.eax */
>>>> +    const u32 kvm_cpuid_D_1_eax_x86_features =
>>>> +        F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
>>>> +    u64 u_supported = kvm_supported_xcr0();
>>>> +    u64 s_supported = kvm_supported_xss();
>>>> +    u64 supported;
>>>> +
>>>> +    switch (index) {
>>>> +    case 0:
>>>> +        if (!u_supported) {
>>>> +            entry->eax = 0;
>>>> +            entry->ebx = 0;
>>>> +            entry->ecx = 0;
>>>> +            entry->edx = 0;
>>>> +            return false;
>>>> +        }
>>>> +        entry->eax &= u_supported;
>>>> +        entry->ebx = xstate_required_size(u_supported, false);
>>>> +        entry->ecx = entry->ebx;
>>>> +        entry->edx &= u_supported >> 32;
>>>> +        break;
>>>> +    case 1:
>>>> +        supported = u_supported | s_supported;
>>>> +        entry->eax &= kvm_cpuid_D_1_eax_x86_features;
>>>> +        cpuid_mask(&entry->eax, CPUID_D_1_EAX);
>>>> +        entry->ebx = 0;
>>>> +        entry->edx &= s_supported >> 32;
>>>> +        entry->ecx &= s_supported;
>>>
>>> We'd better initialize msr_ia32_xss bitmap (entry->ecx & entry-edx) 
>>> as zeros
>>> here.
>> Hmm, explicit setting the MSR to 0 is good in this case, but there's 
>> implied
>> flow to ensure guest MSR_IA32_XSS will be 0 if entry->ecx and 
>> entry->edx are 0s.
>> In above kvm_update_cpuid(), vcpu->arch.guest_supported_xss is set to 0
>> when they're 0s. this masks guest cannot set non-zero value to this
>> MSR. And in kvm_vcpu_reset(), vcpu->arch.ia32_xss is initialized to 0,
>> in kvm_load_guest_xsave_state() MSR_IA32_XSS is set to ia32_xss,
>> therefore the MSR is kept to 0.
> 
> Sorry, I think what I said "msr_ia32_xss bitmap" misled you.
> 
> "msr_ia32_xss bitmap" is not MSR_IA32_XSS,
> but the (entry->ecx | entry->edx >> 32) of cpuid.D_1
> 
> I meant we'd better set entry->ecx and entry->edx to 0 here.
> 
>>>
>>>> +        if (entry->eax & (F(XSAVES) | F(XSAVEC)))
>>>> +            entry->ebx = xstate_required_size(supported, true);
>>>
>>> And setup msr_ia32_xss bitmap based on the s_supported within this 
>>> condition
>>> when F(XSAVES) is supported.
>>
> 
> And set entry->ecx & entry->edx only when F(XSAVES) is supported.
> 
>> IIUC, both XSAVEC and XSAVES use compacted format of the extended
>> region, so if XSAVEC is supported while XSAVES is not, guest still can
>> get correct size, so in existing code the two bits are ORed.
> 
> Yeah, but entry->ecx and entry->edx should be non-zero only when 
> F(XSAVES) is set.
> 
>>>
>>>> +        break;
>>>> +    default:
>>>> +        supported = (entry->ecx & 0x1) ? s_supported : u_supported;
>>>> +        if (!(supported & (BIT_ULL(index)))) {
>>>> +            entry->eax = 0;
>>>> +            entry->ebx = 0;
>>>> +            entry->ecx = 0;
>>>> +            entry->edx = 0;
>>>> +            return false;
>>>> +        }
>>>> +        if (entry->ecx & 0x1)
>>>> +            entry->ebx = 0;
>>>> +        break;
>>>> +    }
>>>> +    return true;
>>>> +}
>>>> +
>>>>    static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, 
>>>> u32 function,
>>>>                      int *nent, int maxnent)
>>>>    {
>>>> @@ -440,7 +503,6 @@ static inline int __do_cpuid_func(struct 
>>>> kvm_cpuid_entry2 *entry, u32 function,
>>>>        unsigned f_lm = 0;
>>>>    #endif
>>>>        unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? 
>>>> F(RDTSCP) : 0;
>>>> -    unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) 
>>>> : 0;
>>>>        unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? 
>>>> F(INTEL_PT) : 0;
>>>>        /* cpuid 1.edx */
>>>> @@ -495,10 +557,6 @@ static inline int __do_cpuid_func(struct 
>>>> kvm_cpuid_entry2 *entry, u32 function,
>>>>            F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
>>>>            F(PMM) | F(PMM_EN);
>>>> -    /* cpuid 0xD.1.eax */
>>>> -    const u32 kvm_cpuid_D_1_eax_x86_features =
>>>> -        F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
>>>> -
>>>>        /* all calls to cpuid_count() should be made on the same cpu */
>>>>        get_cpu();
>>>> @@ -639,38 +697,21 @@ static inline int __do_cpuid_func(struct 
>>>> kvm_cpuid_entry2 *entry, u32 function,
>>>>            break;
>>>>        }
>>>>        case 0xd: {
>>>> -        int idx, i;
>>>> -        u64 supported = kvm_supported_xcr0();
>>>> +        int i, idx;
>>>> -        entry->eax &= supported;
>>>> -        entry->ebx = xstate_required_size(supported, false);
>>>> -        entry->ecx = entry->ebx;
>>>> -        entry->edx &= supported >> 32;
>>>> -        if (!supported)
>>>> +        if (!do_cpuid_0xd_mask(&entry[0], 0))
>>>>                break;
>>>> -
>>>> -        for (idx = 1, i = 1; idx < 64; ++idx) {
>>>> -            u64 mask = ((u64)1 << idx);
>>>> +        for (i = 1, idx = 1; idx < 64; ++idx) {
>>>>                if (*nent >= maxnent)
>>>>                    goto out;
>>>> -
>>>>                do_host_cpuid(&entry[i], function, idx);
>>>> -            if (idx == 1) {
>>>> -                entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
>>>> -                cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
>>>> -                entry[i].ebx = 0;
>>>> -                if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
>>>> -                    entry[i].ebx =
>>>> -                        xstate_required_size(supported,
>>>> -                                     true);
>>>> -            } else {
>>>> -                if (entry[i].eax == 0 || !(supported & mask))
>>>> -                    continue;
>>>> -                if (WARN_ON_ONCE(entry[i].ecx & 1))
>>>> -                    continue;
>>>> -            }
>>>> -            entry[i].ecx = 0;
>>>> -            entry[i].edx = 0;
>>>> +
>>>> +            if (entry[i].eax == 0 && entry[i].ebx == 0 &&
>>>> +                entry[i].ecx == 0 && entry[i].edx == 0)
>>>> +                continue;
>>>> +
>>>> +            if (!do_cpuid_0xd_mask(&entry[i], idx))
>>>> +                continue;
>>>>                ++*nent;
>>>>                ++i;
>>>>            }
>>   >
>>
>
Yang, Weijiang Feb. 18, 2020, 12:43 p.m. UTC | #5
On Mon, Feb 17, 2020 at 09:20:02PM +0800, Xiaoyao Li wrote:
> On 2/17/2020 9:03 PM, Yang Weijiang wrote:
> > On Mon, Feb 17, 2020 at 12:26:51PM +0800, Xiaoyao Li wrote:
> > > On 2/11/2020 2:57 PM, Yang Weijiang wrote:
> > > > CPUID.(EAX=DH, ECX={i}H i>=0) enumerates XSAVE related leaves/sub-leaves,
> > > > +extern int host_xss;
> > > > +u64 kvm_supported_xss(void)
> > > > +{
> > > > +	return KVM_SUPPORTED_XSS & host_xss;
> > > > +}
> > > > +
> > > 
> > > How about using a global variable, supported_xss, instead of calculating the
> > > mask on every call. Just like what Sean posted on
> > > https://lore.kernel.org/kvm/20200201185218.24473-21-sean.j.christopherson@intel.com/
> > > 
> > Thanks Xiaoyao for the comments!
> > Good suggestion, I'll change it in next version.
> > 
> > > >    #define F(x) bit(X86_FEATURE_##x)
> > > >    int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > > > @@ -112,10 +118,17 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > > >    		vcpu->arch.guest_xstate_size = best->ebx =
> > > >    			xstate_required_size(vcpu->arch.xcr0, false);
> > > >    	}
> > > > -
> > > >    	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
> > > > -	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
> > > > -		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
> > > > +	if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {
> > > > +		u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
> > > > +
> > > > +		best->ebx = xstate_required_size(xstate, true);
> > > > +		vcpu->arch.guest_supported_xss =
> > > > +			(best->ecx | ((u64)best->edx << 32)) &
> > > > +			kvm_supported_xss();
> > > > +	} else {
> > > > +		vcpu->arch.guest_supported_xss = 0;
> > > > +	}
> > > >    	/*
> > > >    	 * The existing code assumes virtual address is 48-bit or 57-bit in the
> > > > @@ -426,6 +439,56 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
> > > >    	}
> > > >    }
> > > > +static inline bool do_cpuid_0xd_mask(struct kvm_cpuid_entry2 *entry, int index)
> > > > +{
> > > > +	unsigned int f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
> > > > +	/* cpuid 0xD.1.eax */
> > > > +	const u32 kvm_cpuid_D_1_eax_x86_features =
> > > > +		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
> > > > +	u64 u_supported = kvm_supported_xcr0();
> > > > +	u64 s_supported = kvm_supported_xss();
> > > > +	u64 supported;
> > > > +
> > > > +	switch (index) {
> > > > +	case 0:
> > > > +		if (!u_supported) {
> > > > +			entry->eax = 0;
> > > > +			entry->ebx = 0;
> > > > +			entry->ecx = 0;
> > > > +			entry->edx = 0;
> > > > +			return false;
> > > > +		}
> > > > +		entry->eax &= u_supported;
> > > > +		entry->ebx = xstate_required_size(u_supported, false);
> > > > +		entry->ecx = entry->ebx;
> > > > +		entry->edx &= u_supported >> 32;
> > > > +		break;
> > > > +	case 1:
> > > > +		supported = u_supported | s_supported;
> > > > +		entry->eax &= kvm_cpuid_D_1_eax_x86_features;
> > > > +		cpuid_mask(&entry->eax, CPUID_D_1_EAX);
> > > > +		entry->ebx = 0;
> > > > +		entry->edx &= s_supported >> 32;
> > > > +		entry->ecx &= s_supported;
> > > 
> > > We'd better initialize msr_ia32_xss bitmap (entry->ecx & entry-edx) as zeros
> > > here.
> > Hmm, explicit setting the MSR to 0 is good in this case, but there's implied
> > flow to ensure guest MSR_IA32_XSS will be 0 if entry->ecx and entry->edx are 0s.
> > In above kvm_update_cpuid(), vcpu->arch.guest_supported_xss is set to 0
> > when they're 0s. this masks guest cannot set non-zero value to this
> > MSR. And in kvm_vcpu_reset(), vcpu->arch.ia32_xss is initialized to 0,
> > in kvm_load_guest_xsave_state() MSR_IA32_XSS is set to ia32_xss,
> > therefore the MSR is kept to 0.
> 
> Sorry, I think what I said "msr_ia32_xss bitmap" misled you.
> 
> "msr_ia32_xss bitmap" is not MSR_IA32_XSS,
> but the (entry->ecx | entry->edx >> 32) of cpuid.D_1
> 
> I meant we'd better set entry->ecx and entry->edx to 0 here.
>
> > > 
> > > > +		if (entry->eax & (F(XSAVES) | F(XSAVEC)))
> > > > +			entry->ebx = xstate_required_size(supported, true);
> > > 
> > > And setup msr_ia32_xss bitmap based on the s_supported within this condition
> > > when F(XSAVES) is supported.
> > 
> 
> And set entry->ecx & entry->edx only when F(XSAVES) is supported.
>
Yep, make sense, will change it, thanks!

> > IIUC, both XSAVEC and XSAVES use compacted format of the extended
> > region, so if XSAVEC is supported while XSAVES is not, guest still can
> > get correct size, so in existing code the two bits are ORed.
> 
> Yeah, but entry->ecx and entry->edx should be non-zero only when F(XSAVES)
> is set.
>
Yes, when F(XSAVES) is not supported, we just init them to 0s.

> > > 
> > > > +		break;
> > > > +	default:
> > > > +		supported = (entry->ecx & 0x1) ? s_supported : u_supported;
> > > > +		if (!(supported & (BIT_ULL(index)))) {
> > > > +			entry->eax = 0;
> > > > +			entry->ebx = 0;
> > > > +			entry->ecx = 0;
> > > > +			entry->edx = 0;
> > > > +			return false;
> > > > +		}
> > > > +		if (entry->ecx & 0x1)
> > > > +			entry->ebx = 0;
> > > > +		break;
> > > > +	}
> > > > +	return true;
> > > > +}
> > > > +
> > > >    static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> > > >    				  int *nent, int maxnent)
> > > >    {
> > > > @@ -440,7 +503,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> > > >    	unsigned f_lm = 0;
> > > >    #endif
> > > >    	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
> > > > -	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
> > > >    	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
> > > >    	/* cpuid 1.edx */
> > > > @@ -495,10 +557,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> > > >    		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
> > > >    		F(PMM) | F(PMM_EN);
> > > > -	/* cpuid 0xD.1.eax */
> > > > -	const u32 kvm_cpuid_D_1_eax_x86_features =
> > > > -		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
> > > > -
> > > >    	/* all calls to cpuid_count() should be made on the same cpu */
> > > >    	get_cpu();
> > > > @@ -639,38 +697,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
> > > >    		break;
> > > >    	}
> > > >    	case 0xd: {
> > > > -		int idx, i;
> > > > -		u64 supported = kvm_supported_xcr0();
> > > > +		int i, idx;
> > > > -		entry->eax &= supported;
> > > > -		entry->ebx = xstate_required_size(supported, false);
> > > > -		entry->ecx = entry->ebx;
> > > > -		entry->edx &= supported >> 32;
> > > > -		if (!supported)
> > > > +		if (!do_cpuid_0xd_mask(&entry[0], 0))
> > > >    			break;
> > > > -
> > > > -		for (idx = 1, i = 1; idx < 64; ++idx) {
> > > > -			u64 mask = ((u64)1 << idx);
> > > > +		for (i = 1, idx = 1; idx < 64; ++idx) {
> > > >    			if (*nent >= maxnent)
> > > >    				goto out;
> > > > -
> > > >    			do_host_cpuid(&entry[i], function, idx);
> > > > -			if (idx == 1) {
> > > > -				entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
> > > > -				cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
> > > > -				entry[i].ebx = 0;
> > > > -				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
> > > > -					entry[i].ebx =
> > > > -						xstate_required_size(supported,
> > > > -								     true);
> > > > -			} else {
> > > > -				if (entry[i].eax == 0 || !(supported & mask))
> > > > -					continue;
> > > > -				if (WARN_ON_ONCE(entry[i].ecx & 1))
> > > > -					continue;
> > > > -			}
> > > > -			entry[i].ecx = 0;
> > > > -			entry[i].edx = 0;
> > > > +
> > > > +			if (entry[i].eax == 0 && entry[i].ebx == 0 &&
> > > > +			    entry[i].ecx == 0 && entry[i].edx == 0)
> > > > +				continue;
> > > > +
> > > > +			if (!do_cpuid_0xd_mask(&entry[i], idx))
> > > > +				continue;
> > > >    			++*nent;
> > > >    			++i;
> > > >    		}
> >   >
> >
Yang, Weijiang Feb. 18, 2020, 12:46 p.m. UTC | #6
On Tue, Feb 18, 2020 at 01:44:17PM +0800, Xiaoyao Li wrote:
> On 2/17/2020 9:20 PM, Xiaoyao Li wrote:
> > On 2/17/2020 9:03 PM, Yang Weijiang wrote:
> > > On Mon, Feb 17, 2020 at 12:26:51PM +0800, Xiaoyao Li wrote:
> > > > On 2/11/2020 2:57 PM, Yang Weijiang wrote:
> > > > > CPUID.(EAX=DH, ECX={i}H i>=0) enumerates XSAVE related
> > > > > leaves/sub-leaves,
> > > > > +extern int host_xss;
> > > > > +u64 kvm_supported_xss(void)
> > > > > +{
> > > > > +    return KVM_SUPPORTED_XSS & host_xss;
> > > > > +}
> > > > > +
> > > > 
> > > > How about using a global variable, supported_xss, instead of
> > > > calculating the
> > > > mask on every call. Just like what Sean posted on
> > > > https://lore.kernel.org/kvm/20200201185218.24473-21-sean.j.christopherson@intel.com/
> > > > 
> > > > 
> > > Thanks Xiaoyao for the comments!
> > > Good suggestion, I'll change it in next version.
> > > 
> > > > >    #define F(x) bit(X86_FEATURE_##x)
> > > > >    int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > > > > @@ -112,10 +118,17 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> > > > >            vcpu->arch.guest_xstate_size = best->ebx =
> > > > >                xstate_required_size(vcpu->arch.xcr0, false);
> > > > >        }
> > > > > -
> > > > >        best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
> > > > > -    if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
> > > > > -        best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
> > > > > +    if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {
> > > > > +        u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
> > > > > +
> > > > > +        best->ebx = xstate_required_size(xstate, true);
> > > > > +        vcpu->arch.guest_supported_xss =
> > > > > +            (best->ecx | ((u64)best->edx << 32)) &
> > > > > +            kvm_supported_xss();
> > > > > +    } else {
> > > > > +        vcpu->arch.guest_supported_xss = 0;
> > > > > +    }
> 
> also here should be something like below:
> 
> if (best) {
> 	if (best->eax & (F(XSAVES) | F(XSAVEC)))) {
> 		u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
> 		best->ebx = xstate_required_size(xstate, true);
> 	}
> 
> 	if (best->eax & F(XSAVES) {
> 		vcpu->arch.guest_supported_xss =
> 			(best->ecx | ((u64)best->edx << 32)) &
> 			kvm_supported_xss();
> 	} else {
> 		best->ecx = 0;
> 		best->edx = 0;
> 		vcpu->arch.guest_supported_xss = 0;
> 	}
> }
Right, to keep it consistent with CPUID enumeration, thanks for review!

> 
> > > > >        /*
> > > > >         * The existing code assumes virtual address is
> > > > > 48-bit or 57-bit in the
> > > > > @@ -426,6 +439,56 @@ static inline void
> > > > > do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
> > > > >        }
> > > > >    }
> > > > > +static inline bool do_cpuid_0xd_mask(struct
> > > > > kvm_cpuid_entry2 *entry, int index)
> > > > > +{
> > > > > +    unsigned int f_xsaves = kvm_x86_ops->xsaves_supported()
> > > > > ? F(XSAVES) : 0;
> > > > > +    /* cpuid 0xD.1.eax */
> > > > > +    const u32 kvm_cpuid_D_1_eax_x86_features =
> > > > > +        F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
> > > > > +    u64 u_supported = kvm_supported_xcr0();
> > > > > +    u64 s_supported = kvm_supported_xss();
> > > > > +    u64 supported;
> > > > > +
> > > > > +    switch (index) {
> > > > > +    case 0:
> > > > > +        if (!u_supported) {
> > > > > +            entry->eax = 0;
> > > > > +            entry->ebx = 0;
> > > > > +            entry->ecx = 0;
> > > > > +            entry->edx = 0;
> > > > > +            return false;
> > > > > +        }
> > > > > +        entry->eax &= u_supported;
> > > > > +        entry->ebx = xstate_required_size(u_supported, false);
> > > > > +        entry->ecx = entry->ebx;
> > > > > +        entry->edx &= u_supported >> 32;
> > > > > +        break;
> > > > > +    case 1:
> > > > > +        supported = u_supported | s_supported;
> > > > > +        entry->eax &= kvm_cpuid_D_1_eax_x86_features;
> > > > > +        cpuid_mask(&entry->eax, CPUID_D_1_EAX);
> > > > > +        entry->ebx = 0;
> > > > > +        entry->edx &= s_supported >> 32;
> > > > > +        entry->ecx &= s_supported;
> > > > 
> > > > We'd better initialize msr_ia32_xss bitmap (entry->ecx &
> > > > entry-edx) as zeros
> > > > here.
> > > Hmm, explicit setting the MSR to 0 is good in this case, but there's
> > > implied
> > > flow to ensure guest MSR_IA32_XSS will be 0 if entry->ecx and
> > > entry->edx are 0s.
> > > In above kvm_update_cpuid(), vcpu->arch.guest_supported_xss is set to 0
> > > when they're 0s. this masks guest cannot set non-zero value to this
> > > MSR. And in kvm_vcpu_reset(), vcpu->arch.ia32_xss is initialized to 0,
> > > in kvm_load_guest_xsave_state() MSR_IA32_XSS is set to ia32_xss,
> > > therefore the MSR is kept to 0.
> > 
> > Sorry, I think what I said "msr_ia32_xss bitmap" misled you.
> > 
> > "msr_ia32_xss bitmap" is not MSR_IA32_XSS,
> > but the (entry->ecx | entry->edx >> 32) of cpuid.D_1
> > 
> > I meant we'd better set entry->ecx and entry->edx to 0 here.
> > 
> > > > 
> > > > > +        if (entry->eax & (F(XSAVES) | F(XSAVEC)))
> > > > > +            entry->ebx = xstate_required_size(supported, true);
> > > > 
> > > > And setup msr_ia32_xss bitmap based on the s_supported within
> > > > this condition
> > > > when F(XSAVES) is supported.
> > > 
> > 
> > And set entry->ecx & entry->edx only when F(XSAVES) is supported.
> > 
> > > IIUC, both XSAVEC and XSAVES use compacted format of the extended
> > > region, so if XSAVEC is supported while XSAVES is not, guest still can
> > > get correct size, so in existing code the two bits are ORed.
> > 
> > Yeah, but entry->ecx and entry->edx should be non-zero only when
> > F(XSAVES) is set.
> > 
> > > > 
> > > > > +        break;
> > > > > +    default:
> > > > > +        supported = (entry->ecx & 0x1) ? s_supported : u_supported;
> > > > > +        if (!(supported & (BIT_ULL(index)))) {
> > > > > +            entry->eax = 0;
> > > > > +            entry->ebx = 0;
> > > > > +            entry->ecx = 0;
> > > > > +            entry->edx = 0;
> > > > > +            return false;
> > > > > +        }
> > > > > +        if (entry->ecx & 0x1)
> > > > > +            entry->ebx = 0;
> > > > > +        break;
> > > > > +    }
> > > > > +    return true;
> > > > > +}
> > > > > +
> > > > >    static inline int __do_cpuid_func(struct kvm_cpuid_entry2
> > > > > *entry, u32 function,
> > > > >                      int *nent, int maxnent)
> > > > >    {
> > > > > @@ -440,7 +503,6 @@ static inline int __do_cpuid_func(struct
> > > > > kvm_cpuid_entry2 *entry, u32 function,
> > > > >        unsigned f_lm = 0;
> > > > >    #endif
> > > > >        unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ?
> > > > > F(RDTSCP) : 0;
> > > > > -    unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ?
> > > > > F(XSAVES) : 0;
> > > > >        unsigned f_intel_pt = kvm_x86_ops->pt_supported() ?
> > > > > F(INTEL_PT) : 0;
> > > > >        /* cpuid 1.edx */
> > > > > @@ -495,10 +557,6 @@ static inline int
> > > > > __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32
> > > > > function,
> > > > >            F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
> > > > >            F(PMM) | F(PMM_EN);
> > > > > -    /* cpuid 0xD.1.eax */
> > > > > -    const u32 kvm_cpuid_D_1_eax_x86_features =
> > > > > -        F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
> > > > > -
> > > > >        /* all calls to cpuid_count() should be made on the same cpu */
> > > > >        get_cpu();
> > > > > @@ -639,38 +697,21 @@ static inline int
> > > > > __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32
> > > > > function,
> > > > >            break;
> > > > >        }
> > > > >        case 0xd: {
> > > > > -        int idx, i;
> > > > > -        u64 supported = kvm_supported_xcr0();
> > > > > +        int i, idx;
> > > > > -        entry->eax &= supported;
> > > > > -        entry->ebx = xstate_required_size(supported, false);
> > > > > -        entry->ecx = entry->ebx;
> > > > > -        entry->edx &= supported >> 32;
> > > > > -        if (!supported)
> > > > > +        if (!do_cpuid_0xd_mask(&entry[0], 0))
> > > > >                break;
> > > > > -
> > > > > -        for (idx = 1, i = 1; idx < 64; ++idx) {
> > > > > -            u64 mask = ((u64)1 << idx);
> > > > > +        for (i = 1, idx = 1; idx < 64; ++idx) {
> > > > >                if (*nent >= maxnent)
> > > > >                    goto out;
> > > > > -
> > > > >                do_host_cpuid(&entry[i], function, idx);
> > > > > -            if (idx == 1) {
> > > > > -                entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
> > > > > -                cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
> > > > > -                entry[i].ebx = 0;
> > > > > -                if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
> > > > > -                    entry[i].ebx =
> > > > > -                        xstate_required_size(supported,
> > > > > -                                     true);
> > > > > -            } else {
> > > > > -                if (entry[i].eax == 0 || !(supported & mask))
> > > > > -                    continue;
> > > > > -                if (WARN_ON_ONCE(entry[i].ecx & 1))
> > > > > -                    continue;
> > > > > -            }
> > > > > -            entry[i].ecx = 0;
> > > > > -            entry[i].edx = 0;
> > > > > +
> > > > > +            if (entry[i].eax == 0 && entry[i].ebx == 0 &&
> > > > > +                entry[i].ecx == 0 && entry[i].edx == 0)
> > > > > +                continue;
> > > > > +
> > > > > +            if (!do_cpuid_0xd_mask(&entry[i], idx))
> > > > > +                continue;
> > > > >                ++*nent;
> > > > >                ++i;
> > > > >            }
> > >   >
> > > 
> >
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b79cd6aa4075..627284fa4369 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -638,6 +638,7 @@  struct kvm_vcpu_arch {
 
 	u64 xcr0;
 	u64 guest_supported_xcr0;
+	u64 guest_supported_xss;
 	u32 guest_xstate_size;
 
 	struct kvm_pio_request pio;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index cfafa320a8cf..9546271d4038 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -62,6 +62,12 @@  u64 kvm_supported_xcr0(void)
 	return xcr0;
 }
 
+extern int host_xss;
+u64 kvm_supported_xss(void)
+{
+	return KVM_SUPPORTED_XSS & host_xss;
+}
+
 #define F(x) bit(X86_FEATURE_##x)
 
 int kvm_update_cpuid(struct kvm_vcpu *vcpu)
@@ -112,10 +118,17 @@  int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 		vcpu->arch.guest_xstate_size = best->ebx =
 			xstate_required_size(vcpu->arch.xcr0, false);
 	}
-
 	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
-	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
-		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+	if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) {
+		u64 xstate = vcpu->arch.xcr0 | vcpu->arch.ia32_xss;
+
+		best->ebx = xstate_required_size(xstate, true);
+		vcpu->arch.guest_supported_xss =
+			(best->ecx | ((u64)best->edx << 32)) &
+			kvm_supported_xss();
+	} else {
+		vcpu->arch.guest_supported_xss = 0;
+	}
 
 	/*
 	 * The existing code assumes virtual address is 48-bit or 57-bit in the
@@ -426,6 +439,56 @@  static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
 	}
 }
 
+static inline bool do_cpuid_0xd_mask(struct kvm_cpuid_entry2 *entry, int index)
+{
+	unsigned int f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
+	/* cpuid 0xD.1.eax */
+	const u32 kvm_cpuid_D_1_eax_x86_features =
+		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
+	u64 u_supported = kvm_supported_xcr0();
+	u64 s_supported = kvm_supported_xss();
+	u64 supported;
+
+	switch (index) {
+	case 0:
+		if (!u_supported) {
+			entry->eax = 0;
+			entry->ebx = 0;
+			entry->ecx = 0;
+			entry->edx = 0;
+			return false;
+		}
+		entry->eax &= u_supported;
+		entry->ebx = xstate_required_size(u_supported, false);
+		entry->ecx = entry->ebx;
+		entry->edx &= u_supported >> 32;
+		break;
+	case 1:
+		supported = u_supported | s_supported;
+		entry->eax &= kvm_cpuid_D_1_eax_x86_features;
+		cpuid_mask(&entry->eax, CPUID_D_1_EAX);
+		entry->ebx = 0;
+		entry->edx &= s_supported >> 32;
+		entry->ecx &= s_supported;
+		if (entry->eax & (F(XSAVES) | F(XSAVEC)))
+			entry->ebx = xstate_required_size(supported, true);
+		break;
+	default:
+		supported = (entry->ecx & 0x1) ? s_supported : u_supported;
+		if (!(supported & (BIT_ULL(index)))) {
+			entry->eax = 0;
+			entry->ebx = 0;
+			entry->ecx = 0;
+			entry->edx = 0;
+			return false;
+		}
+		if (entry->ecx & 0x1)
+			entry->ebx = 0;
+		break;
+	}
+	return true;
+}
+
 static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 				  int *nent, int maxnent)
 {
@@ -440,7 +503,6 @@  static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 	unsigned f_lm = 0;
 #endif
 	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
-	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
 	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
 
 	/* cpuid 1.edx */
@@ -495,10 +557,6 @@  static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
 		F(PMM) | F(PMM_EN);
 
-	/* cpuid 0xD.1.eax */
-	const u32 kvm_cpuid_D_1_eax_x86_features =
-		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
-
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
 
@@ -639,38 +697,21 @@  static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
 		break;
 	}
 	case 0xd: {
-		int idx, i;
-		u64 supported = kvm_supported_xcr0();
+		int i, idx;
 
-		entry->eax &= supported;
-		entry->ebx = xstate_required_size(supported, false);
-		entry->ecx = entry->ebx;
-		entry->edx &= supported >> 32;
-		if (!supported)
+		if (!do_cpuid_0xd_mask(&entry[0], 0))
 			break;
-
-		for (idx = 1, i = 1; idx < 64; ++idx) {
-			u64 mask = ((u64)1 << idx);
+		for (i = 1, idx = 1; idx < 64; ++idx) {
 			if (*nent >= maxnent)
 				goto out;
-
 			do_host_cpuid(&entry[i], function, idx);
-			if (idx == 1) {
-				entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
-				cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
-				entry[i].ebx = 0;
-				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
-					entry[i].ebx =
-						xstate_required_size(supported,
-								     true);
-			} else {
-				if (entry[i].eax == 0 || !(supported & mask))
-					continue;
-				if (WARN_ON_ONCE(entry[i].ecx & 1))
-					continue;
-			}
-			entry[i].ecx = 0;
-			entry[i].edx = 0;
+
+			if (entry[i].eax == 0 && entry[i].ebx == 0 &&
+			    entry[i].ecx == 0 && entry[i].edx == 0)
+				continue;
+
+			if (!do_cpuid_0xd_mask(&entry[i], idx))
+				continue;
 			++*nent;
 			++i;
 		}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cf917139de6b..908a6cdb2151 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -177,7 +177,7 @@  struct kvm_shared_msrs {
 static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
 static struct kvm_shared_msrs __percpu *shared_msrs;
 
-static u64 __read_mostly host_xss;
+u64 __read_mostly host_xss;
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "pf_fixed", VCPU_STAT(pf_fixed) },
@@ -2732,7 +2732,7 @@  int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 * RDMSR/WRMSR rather than XSAVES/XRSTORS to save/restore PT
 		 * MSRs.
 		 */
-		if (data != 0)
+		if (data & ~vcpu->arch.guest_supported_xss)
 			return 1;
 		vcpu->arch.ia32_xss = data;
 		break;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 29391af8871d..9e7725f8bb46 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -296,6 +296,14 @@  int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
 				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
 				| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
 				| XFEATURE_MASK_PKRU)
+
+/*
+ * In future, new XSS bits can be ORed here to make them available
+ * to KVM and guest, right now, it's 0, meaning no XSS bits are
+ * supported.
+ */
+#define KVM_SUPPORTED_XSS 0
+
 extern u64 host_xcr0;
 
 extern u64 kvm_supported_xcr0(void);