diff mbox series

[38/61] KVM: x86: Introduce kvm_cpu_caps to replace runtime CPUID masking

Message ID 20200201185218.24473-39-sean.j.christopherson@intel.com
State New, archived
Headers show
Series KVM: x86: Introduce KVM cpu caps | expand

Commit Message

Sean Christopherson Feb. 1, 2020, 6:51 p.m. UTC
Calculate the CPUID masks for KVM_GET_SUPPORTED_CPUID at load time using
what is effectively a KVM-adjusted copy of boot_cpu_data, or more
precisely, the x86_capability array in boot_cpu_data.

In terms of KVM support, the vast majority of CPUID feature bits are
constant, and *all* feature support is known at KVM load time.  Rather
than apply boot_cpu_data, which is effectively read-only after init,
at runtime, copy it into a KVM-specific array and use *that* to mask
CPUID registers.

In additional to consolidating the masking, kvm_cpu_caps can be adjusted
by SVM/VMX at load time and thus eliminate all feature bit manipulation
in ->set_supported_cpuid().

No functional change intended.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/cpuid.c | 229 +++++++++++++++++++++++--------------------
 arch/x86/kvm/cpuid.h |  19 ++++
 arch/x86/kvm/x86.c   |   2 +
 3 files changed, 142 insertions(+), 108 deletions(-)

Comments

Vitaly Kuznetsov Feb. 24, 2020, 4:32 p.m. UTC | #1
Sean Christopherson <sean.j.christopherson@intel.com> writes:

> Calculate the CPUID masks for KVM_GET_SUPPORTED_CPUID at load time using
> what is effectively a KVM-adjusted copy of boot_cpu_data, or more
> precisely, the x86_capability array in boot_cpu_data.
>
> In terms of KVM support, the vast majority of CPUID feature bits are
> constant, and *all* feature support is known at KVM load time.  Rather
> than apply boot_cpu_data, which is effectively read-only after init,
> at runtime, copy it into a KVM-specific array and use *that* to mask
> CPUID registers.
>
> In additional to consolidating the masking, kvm_cpu_caps can be adjusted
> by SVM/VMX at load time and thus eliminate all feature bit manipulation
> in ->set_supported_cpuid().
>
> No functional change intended.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>  arch/x86/kvm/cpuid.c | 229 +++++++++++++++++++++++--------------------
>  arch/x86/kvm/cpuid.h |  19 ++++
>  arch/x86/kvm/x86.c   |   2 +
>  3 files changed, 142 insertions(+), 108 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 20a7af320291..c2a4c9df49a9 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -24,6 +24,13 @@
>  #include "trace.h"
>  #include "pmu.h"
>  
> +/*
> + * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
> + * aligned to sizeof(unsigned long) because it's not accessed via bitops.
> + */
> +u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
> +EXPORT_SYMBOL_GPL(kvm_cpu_caps);
> +
>  static u32 xstate_required_size(u64 xstate_bv, bool compacted)
>  {
>  	int feature_bit = 0;
> @@ -259,7 +266,119 @@ static __always_inline void cpuid_entry_mask(struct kvm_cpuid_entry2 *entry,
>  {
>  	u32 *reg = cpuid_entry_get_reg(entry, leaf * 32);
>  
> -	*reg &= boot_cpu_data.x86_capability[leaf];
> +	BUILD_BUG_ON(leaf > ARRAY_SIZE(kvm_cpu_caps));

Should this be '>=' ?

> +	*reg &= kvm_cpu_caps[leaf];
> +}
> +
> +static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
> +{
> +	reverse_cpuid_check(leaf);
> +	kvm_cpu_caps[leaf] &= mask;
> +}
> +
> +void kvm_set_cpu_caps(void)
> +{
> +	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
> +#ifdef CONFIG_X86_64
> +	unsigned f_gbpages = F(GBPAGES);
> +	unsigned f_lm = F(LM);
> +#else
> +	unsigned f_gbpages = 0;
> +	unsigned f_lm = 0;
> +#endif

Three too many bare 'unsinged's :-)

> +
> +	BUILD_BUG_ON(sizeof(kvm_cpu_caps) >
> +		     sizeof(boot_cpu_data.x86_capability));
> +
> +	memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,
> +	       sizeof(kvm_cpu_caps));
> +
> +	kvm_cpu_cap_mask(CPUID_1_EDX,
> +		F(FPU) | F(VME) | F(DE) | F(PSE) |
> +		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
> +		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
> +		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
> +		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
> +		0 /* Reserved, DS, ACPI */ | F(MMX) |
> +		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
> +		0 /* HTT, TM, Reserved, PBE */
> +	);
> +
> +	kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
> +		F(FPU) | F(VME) | F(DE) | F(PSE) |
> +		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
> +		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
> +		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
> +		F(PAT) | F(PSE36) | 0 /* Reserved */ |
> +		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> +		F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
> +		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
> +	);
> +
> +	kvm_cpu_cap_mask(CPUID_1_ECX,
> +		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
> +		 * but *not* advertised to guests via CPUID ! */
> +		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> +		0 /* DS-CPL, VMX, SMX, EST */ |
> +		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> +		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> +		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
> +		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> +		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
> +		F(F16C) | F(RDRAND)
> +	);

I would suggest we order things by CPUID_NUM here, i.e.

CPUID_1_ECX
CPUID_1_EDX
CPUID_7_1_EAX
CPUID_7_0_EBX
CPUID_7_ECX
CPUID_7_EDX
CPUID_D_1_EAX
...

> +
> +	kvm_cpu_cap_mask(CPUID_7_0_EBX,
> +		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
> +		F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
> +		F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
> +		F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
> +		F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
> +	);
> +
> +	kvm_cpu_cap_mask(CPUID_7_ECX,
> +		F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
> +		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
> +		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
> +		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/
> +	);
> +	/* Set LA57 based on hardware capability. */
> +	if (cpuid_ecx(7) & F(LA57))
> +		kvm_cpu_cap_set(X86_FEATURE_LA57);
> +
> +	kvm_cpu_cap_mask(CPUID_7_EDX,
> +		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
> +		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
> +		F(MD_CLEAR)
> +	);
> +
> +	kvm_cpu_cap_mask(CPUID_7_1_EAX,
> +		F(AVX512_BF16)
> +	);
> +
> +	kvm_cpu_cap_mask(CPUID_D_1_EAX,
> +		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES)
> +	);
> +
> +	kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
> +		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
> +		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
> +		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
> +		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
> +		F(TOPOEXT) | F(PERFCTR_CORE)
> +	);
> +
> +	kvm_cpu_cap_mask(CPUID_8000_0008_EBX,
> +		F(CLZERO) | F(XSAVEERPTR) |
> +		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
> +		F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON)
> +	);
> +
> +	kvm_cpu_cap_mask(CPUID_C000_0001_EDX,
> +		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
> +		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
> +		F(PMM) | F(PMM_EN)
> +	);
>  }
>  
>  struct kvm_cpuid_array {
> @@ -339,48 +458,13 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
>  
>  static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry)
>  {
> -	unsigned f_la57;
> -
> -	/* cpuid 7.0.ebx */
> -	const u32 kvm_cpuid_7_0_ebx_x86_features =
> -		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
> -		F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
> -		F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
> -		F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
> -		F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/;
> -
> -	/* cpuid 7.0.ecx*/
> -	const u32 kvm_cpuid_7_0_ecx_x86_features =
> -		F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
> -		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
> -		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
> -		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
> -
> -	/* cpuid 7.0.edx*/
> -	const u32 kvm_cpuid_7_0_edx_x86_features =
> -		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
> -		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
> -		F(MD_CLEAR);
> -
> -	/* cpuid 7.1.eax */
> -	const u32 kvm_cpuid_7_1_eax_x86_features =
> -		F(AVX512_BF16);
> -
>  	switch (entry->index) {
>  	case 0:
>  		entry->eax = min(entry->eax, 1u);
> -		entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
>  		cpuid_entry_mask(entry, CPUID_7_0_EBX);
>  		/* TSC_ADJUST is emulated */
>  		cpuid_entry_set(entry, X86_FEATURE_TSC_ADJUST);
> -
> -		entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
> -		f_la57 = cpuid_entry_get(entry, X86_FEATURE_LA57);
>  		cpuid_entry_mask(entry, CPUID_7_ECX);
> -		/* Set LA57 based on hardware capability. */
> -		entry->ecx |= f_la57;
> -
> -		entry->edx &= kvm_cpuid_7_0_edx_x86_features;
>  		cpuid_entry_mask(entry, CPUID_7_EDX);
>  		if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
>  			cpuid_entry_set(entry, X86_FEATURE_SPEC_CTRL);
> @@ -395,7 +479,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry)
>  		cpuid_entry_set(entry, X86_FEATURE_ARCH_CAPABILITIES);
>  		break;
>  	case 1:
> -		entry->eax &= kvm_cpuid_7_1_eax_x86_features;
> +		cpuid_entry_mask(entry, CPUID_7_1_EAX);
>  		entry->ebx = 0;
>  		entry->ecx = 0;
>  		entry->edx = 0;
> @@ -414,72 +498,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>  {
>  	struct kvm_cpuid_entry2 *entry;
>  	int r, i, max_idx;
> -	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
> -#ifdef CONFIG_X86_64
> -	unsigned f_gbpages = F(GBPAGES);
> -	unsigned f_lm = F(LM);
> -#else
> -	unsigned f_gbpages = 0;
> -	unsigned f_lm = 0;
> -#endif
>  	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
>  
> -	/* cpuid 1.edx */
> -	const u32 kvm_cpuid_1_edx_x86_features =
> -		F(FPU) | F(VME) | F(DE) | F(PSE) |
> -		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
> -		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
> -		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
> -		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
> -		0 /* Reserved, DS, ACPI */ | F(MMX) |
> -		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
> -		0 /* HTT, TM, Reserved, PBE */;
> -	/* cpuid 0x80000001.edx */
> -	const u32 kvm_cpuid_8000_0001_edx_x86_features =
> -		F(FPU) | F(VME) | F(DE) | F(PSE) |
> -		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
> -		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
> -		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
> -		F(PAT) | F(PSE36) | 0 /* Reserved */ |
> -		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> -		F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
> -		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
> -	/* cpuid 1.ecx */
> -	const u32 kvm_cpuid_1_ecx_x86_features =
> -		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
> -		 * but *not* advertised to guests via CPUID ! */
> -		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> -		0 /* DS-CPL, VMX, SMX, EST */ |
> -		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> -		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> -		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
> -		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> -		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
> -		F(F16C) | F(RDRAND);
> -	/* cpuid 0x80000001.ecx */
> -	const u32 kvm_cpuid_8000_0001_ecx_x86_features =
> -		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
> -		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
> -		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
> -		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
> -		F(TOPOEXT) | F(PERFCTR_CORE);
> -
> -	/* cpuid 0x80000008.ebx */
> -	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
> -		F(CLZERO) | F(XSAVEERPTR) |
> -		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
> -		F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
> -
> -	/* cpuid 0xC0000001.edx */
> -	const u32 kvm_cpuid_C000_0001_edx_x86_features =
> -		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
> -		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
> -		F(PMM) | F(PMM_EN);
> -
> -	/* cpuid 0xD.1.eax */
> -	const u32 kvm_cpuid_D_1_eax_x86_features =
> -		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES);
> -
>  	/* all calls to cpuid_count() should be made on the same cpu */
>  	get_cpu();
>  
> @@ -495,9 +515,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>  		entry->eax = min(entry->eax, 0x1fU);
>  		break;
>  	case 1:
> -		entry->edx &= kvm_cpuid_1_edx_x86_features;
>  		cpuid_entry_mask(entry, CPUID_1_EDX);
> -		entry->ecx &= kvm_cpuid_1_ecx_x86_features;
>  		cpuid_entry_mask(entry, CPUID_1_ECX);
>  		/* we support x2apic emulation even if host does not support
>  		 * it since we emulate x2apic in software */
> @@ -607,7 +625,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>  		if (!entry)
>  			goto out;
>  
> -		entry->eax &= kvm_cpuid_D_1_eax_x86_features;
>  		cpuid_entry_mask(entry, CPUID_D_1_EAX);
>  
>  		if (!kvm_x86_ops->xsaves_supported())
> @@ -691,9 +708,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>  		entry->eax = min(entry->eax, 0x8000001f);
>  		break;
>  	case 0x80000001:
> -		entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
>  		cpuid_entry_mask(entry, CPUID_8000_0001_EDX);
> -		entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features;
>  		cpuid_entry_mask(entry, CPUID_8000_0001_ECX);
>  		break;
>  	case 0x80000007: /* Advanced power management */
> @@ -712,7 +727,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>  			g_phys_as = phys_as;
>  		entry->eax = g_phys_as | (virt_as << 8);
>  		entry->edx = 0;
> -		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
>  		cpuid_entry_mask(entry, CPUID_8000_0008_EBX);
>  		/*
>  		 * AMD has separate bits for each SPEC_CTRL bit.
> @@ -755,7 +769,6 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
>  		entry->eax = min(entry->eax, 0xC0000004);
>  		break;
>  	case 0xC0000001:
> -		entry->edx &= kvm_cpuid_C000_0001_edx_x86_features;
>  		cpuid_entry_mask(entry, CPUID_C000_0001_EDX);
>  		break;
>  	case 3: /* Processor serial number */
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index 41ff94a7d3e0..c64283582d96 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -6,6 +6,9 @@
>  #include <asm/cpu.h>
>  #include <asm/processor.h>
>  
> +extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
> +void kvm_set_cpu_caps(void);
> +
>  int kvm_update_cpuid(struct kvm_vcpu *vcpu);
>  struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
>  					      u32 function, u32 index);
> @@ -255,4 +258,20 @@ static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu)
>  		  MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
>  }
>  
> +static __always_inline void kvm_cpu_cap_clear(unsigned x86_feature)
> +{
> +	unsigned x86_leaf = x86_feature / 32;
> +
> +	reverse_cpuid_check(x86_leaf);
> +	kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
> +}
> +
> +static __always_inline void kvm_cpu_cap_set(unsigned x86_feature)
> +{
> +	unsigned x86_leaf = x86_feature / 32;
> +
> +	reverse_cpuid_check(x86_leaf);
> +	kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
> +}
> +
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index f90c56c0c64a..c5ed199d6cd9 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9591,6 +9591,8 @@ int kvm_arch_hardware_setup(void)
>  {
>  	int r;
>  
> +	kvm_set_cpu_caps();
> +
>  	r = kvm_x86_ops->hardware_setup();
>  	if (r != 0)
>  		return r;

Apart from the BUILD_BUG_ON() condition in cpuid_entry_mask()

Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Sean Christopherson Feb. 24, 2020, 10:57 p.m. UTC | #2
On Mon, Feb 24, 2020 at 05:32:54PM +0100, Vitaly Kuznetsov wrote:
> Sean Christopherson <sean.j.christopherson@intel.com> writes:
> 
> > Calculate the CPUID masks for KVM_GET_SUPPORTED_CPUID at load time using
> > what is effectively a KVM-adjusted copy of boot_cpu_data, or more
> > precisely, the x86_capability array in boot_cpu_data.
> >
> > In terms of KVM support, the vast majority of CPUID feature bits are
> > constant, and *all* feature support is known at KVM load time.  Rather
> > than apply boot_cpu_data, which is effectively read-only after init,
> > at runtime, copy it into a KVM-specific array and use *that* to mask
> > CPUID registers.
> >
> > In additional to consolidating the masking, kvm_cpu_caps can be adjusted
> > by SVM/VMX at load time and thus eliminate all feature bit manipulation
> > in ->set_supported_cpuid().
> >
> > No functional change intended.
> >
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > ---
> >  arch/x86/kvm/cpuid.c | 229 +++++++++++++++++++++++--------------------
> >  arch/x86/kvm/cpuid.h |  19 ++++
> >  arch/x86/kvm/x86.c   |   2 +
> >  3 files changed, 142 insertions(+), 108 deletions(-)
> >
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 20a7af320291..c2a4c9df49a9 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -24,6 +24,13 @@
> >  #include "trace.h"
> >  #include "pmu.h"
> >  
> > +/*
> > + * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
> > + * aligned to sizeof(unsigned long) because it's not accessed via bitops.
> > + */
> > +u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
> > +EXPORT_SYMBOL_GPL(kvm_cpu_caps);
> > +
> >  static u32 xstate_required_size(u64 xstate_bv, bool compacted)
> >  {
> >  	int feature_bit = 0;
> > @@ -259,7 +266,119 @@ static __always_inline void cpuid_entry_mask(struct kvm_cpuid_entry2 *entry,
> >  {
> >  	u32 *reg = cpuid_entry_get_reg(entry, leaf * 32);
> >  
> > -	*reg &= boot_cpu_data.x86_capability[leaf];
> > +	BUILD_BUG_ON(leaf > ARRAY_SIZE(kvm_cpu_caps));
> 
> Should this be '>=' ?

Yep, nice catch.

> > +	*reg &= kvm_cpu_caps[leaf];
> > +}
> > +
> > +static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
> > +{
> > +	reverse_cpuid_check(leaf);
> > +	kvm_cpu_caps[leaf] &= mask;
> > +}
> > +
> > +void kvm_set_cpu_caps(void)
> > +{
> > +	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
> > +#ifdef CONFIG_X86_64
> > +	unsigned f_gbpages = F(GBPAGES);
> > +	unsigned f_lm = F(LM);
> > +#else
> > +	unsigned f_gbpages = 0;
> > +	unsigned f_lm = 0;
> > +#endif
> 
> Three too many bare 'unsinged's :-)

Roger that, I'll fix this up.

> > +
> > +	BUILD_BUG_ON(sizeof(kvm_cpu_caps) >
> > +		     sizeof(boot_cpu_data.x86_capability));
> > +
> > +	memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,
> > +	       sizeof(kvm_cpu_caps));
> > +
> > +	kvm_cpu_cap_mask(CPUID_1_EDX,
> > +		F(FPU) | F(VME) | F(DE) | F(PSE) |
> > +		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
> > +		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
> > +		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
> > +		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
> > +		0 /* Reserved, DS, ACPI */ | F(MMX) |
> > +		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
> > +		0 /* HTT, TM, Reserved, PBE */
> > +	);
> > +
> > +	kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
> > +		F(FPU) | F(VME) | F(DE) | F(PSE) |
> > +		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
> > +		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
> > +		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
> > +		F(PAT) | F(PSE36) | 0 /* Reserved */ |
> > +		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> > +		F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
> > +		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
> > +	);
> > +
> > +	kvm_cpu_cap_mask(CPUID_1_ECX,
> > +		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
> > +		 * but *not* advertised to guests via CPUID ! */
> > +		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> > +		0 /* DS-CPL, VMX, SMX, EST */ |
> > +		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> > +		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> > +		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
> > +		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> > +		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
> > +		F(F16C) | F(RDRAND)
> > +	);
> 
> I would suggest we order things by CPUID_NUM here, i.e.
> 
> CPUID_1_ECX
> CPUID_1_EDX
> CPUID_7_1_EAX
> CPUID_7_0_EBX
> CPUID_7_ECX
> CPUID_7_EDX
> CPUID_D_1_EAX
> ...

Hmm, generally speaking I agree, but I didn't want to change the ordering
in this patch when moving the code.  Throw a patch on top?  Leave as is?
Something else?
Vitaly Kuznetsov Feb. 24, 2020, 11:20 p.m. UTC | #3
Sean Christopherson <sean.j.christopherson@intel.com> writes:

> On Mon, Feb 24, 2020 at 05:32:54PM +0100, Vitaly Kuznetsov wrote:
>> Sean Christopherson <sean.j.christopherson@intel.com> writes:
>> 

...

>
>> > +
>> > +	BUILD_BUG_ON(sizeof(kvm_cpu_caps) >
>> > +		     sizeof(boot_cpu_data.x86_capability));
>> > +
>> > +	memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,
>> > +	       sizeof(kvm_cpu_caps));
>> > +
>> > +	kvm_cpu_cap_mask(CPUID_1_EDX,
>> > +		F(FPU) | F(VME) | F(DE) | F(PSE) |
>> > +		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
>> > +		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
>> > +		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
>> > +		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
>> > +		0 /* Reserved, DS, ACPI */ | F(MMX) |
>> > +		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
>> > +		0 /* HTT, TM, Reserved, PBE */
>> > +	);
>> > +
>> > +	kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
>> > +		F(FPU) | F(VME) | F(DE) | F(PSE) |
>> > +		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
>> > +		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
>> > +		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
>> > +		F(PAT) | F(PSE36) | 0 /* Reserved */ |
>> > +		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
>> > +		F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
>> > +		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
>> > +	);
>> > +
>> > +	kvm_cpu_cap_mask(CPUID_1_ECX,
>> > +		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
>> > +		 * but *not* advertised to guests via CPUID ! */
>> > +		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
>> > +		0 /* DS-CPL, VMX, SMX, EST */ |
>> > +		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
>> > +		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
>> > +		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
>> > +		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
>> > +		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
>> > +		F(F16C) | F(RDRAND)
>> > +	);
>> 
>> I would suggest we order things by CPUID_NUM here, i.e.
>> 
>> CPUID_1_ECX
>> CPUID_1_EDX
>> CPUID_7_1_EAX
>> CPUID_7_0_EBX
>> CPUID_7_ECX
>> CPUID_7_EDX
>> CPUID_D_1_EAX
>> ...
>
> Hmm, generally speaking I agree, but I didn't want to change the ordering
> in this patch when moving the code.  Throw a patch on top?  Leave as is?
> Something else?

My line of thought was: it's not a mechanical "s,const u32
xxx_x86_features =,kvm_cpu_cap_mask...," change, things get moved from
do_cpuid_7_mask() and __do_cpuid_func() so we may as well re-order them,
reviewing-wise it's more or less the same. But honestly, this is very
minor, feel free to leave as-is.
Sean Christopherson Feb. 24, 2020, 11:25 p.m. UTC | #4
On Tue, Feb 25, 2020 at 12:20:03AM +0100, Vitaly Kuznetsov wrote:
> Sean Christopherson <sean.j.christopherson@intel.com> writes:
> 
> > On Mon, Feb 24, 2020 at 05:32:54PM +0100, Vitaly Kuznetsov wrote:
> >> Sean Christopherson <sean.j.christopherson@intel.com> writes:
> >> 
> 
> ...
> 
> >
> >> > +
> >> > +	BUILD_BUG_ON(sizeof(kvm_cpu_caps) >
> >> > +		     sizeof(boot_cpu_data.x86_capability));
> >> > +
> >> > +	memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,
> >> > +	       sizeof(kvm_cpu_caps));
> >> > +
> >> > +	kvm_cpu_cap_mask(CPUID_1_EDX,
> >> > +		F(FPU) | F(VME) | F(DE) | F(PSE) |
> >> > +		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
> >> > +		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
> >> > +		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
> >> > +		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
> >> > +		0 /* Reserved, DS, ACPI */ | F(MMX) |
> >> > +		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
> >> > +		0 /* HTT, TM, Reserved, PBE */
> >> > +	);
> >> > +
> >> > +	kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
> >> > +		F(FPU) | F(VME) | F(DE) | F(PSE) |
> >> > +		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
> >> > +		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
> >> > +		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
> >> > +		F(PAT) | F(PSE36) | 0 /* Reserved */ |
> >> > +		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
> >> > +		F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
> >> > +		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
> >> > +	);
> >> > +
> >> > +	kvm_cpu_cap_mask(CPUID_1_ECX,
> >> > +		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
> >> > +		 * but *not* advertised to guests via CPUID ! */
> >> > +		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
> >> > +		0 /* DS-CPL, VMX, SMX, EST */ |
> >> > +		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
> >> > +		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
> >> > +		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
> >> > +		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
> >> > +		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
> >> > +		F(F16C) | F(RDRAND)
> >> > +	);
> >> 
> >> I would suggest we order things by CPUID_NUM here, i.e.
> >> 
> >> CPUID_1_ECX
> >> CPUID_1_EDX
> >> CPUID_7_1_EAX
> >> CPUID_7_0_EBX
> >> CPUID_7_ECX
> >> CPUID_7_EDX
> >> CPUID_D_1_EAX
> >> ...
> >
> > Hmm, generally speaking I agree, but I didn't want to change the ordering
> > in this patch when moving the code.  Throw a patch on top?  Leave as is?
> > Something else?
> 
> My line of thought was: it's not a mechanical "s,const u32
> xxx_x86_features =,kvm_cpu_cap_mask...," change, things get moved from
> do_cpuid_7_mask() and __do_cpuid_func() so we may as well re-order them,
> reviewing-wise it's more or less the same. But honestly, this is very
> minor, feel free to leave as-is.

Fair enough, I'll throw it into this patch.
diff mbox series

Patch

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 20a7af320291..c2a4c9df49a9 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -24,6 +24,13 @@ 
 #include "trace.h"
 #include "pmu.h"
 
+/*
+ * Unlike "struct cpuinfo_x86.x86_capability", kvm_cpu_caps doesn't need to be
+ * aligned to sizeof(unsigned long) because it's not accessed via bitops.
+ */
+u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
+EXPORT_SYMBOL_GPL(kvm_cpu_caps);
+
 static u32 xstate_required_size(u64 xstate_bv, bool compacted)
 {
 	int feature_bit = 0;
@@ -259,7 +266,119 @@  static __always_inline void cpuid_entry_mask(struct kvm_cpuid_entry2 *entry,
 {
 	u32 *reg = cpuid_entry_get_reg(entry, leaf * 32);
 
-	*reg &= boot_cpu_data.x86_capability[leaf];
+	BUILD_BUG_ON(leaf > ARRAY_SIZE(kvm_cpu_caps));
+	*reg &= kvm_cpu_caps[leaf];
+}
+
+static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
+{
+	reverse_cpuid_check(leaf);
+	kvm_cpu_caps[leaf] &= mask;
+}
+
+void kvm_set_cpu_caps(void)
+{
+	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
+#ifdef CONFIG_X86_64
+	unsigned f_gbpages = F(GBPAGES);
+	unsigned f_lm = F(LM);
+#else
+	unsigned f_gbpages = 0;
+	unsigned f_lm = 0;
+#endif
+
+	BUILD_BUG_ON(sizeof(kvm_cpu_caps) >
+		     sizeof(boot_cpu_data.x86_capability));
+
+	memcpy(&kvm_cpu_caps, &boot_cpu_data.x86_capability,
+	       sizeof(kvm_cpu_caps));
+
+	kvm_cpu_cap_mask(CPUID_1_EDX,
+		F(FPU) | F(VME) | F(DE) | F(PSE) |
+		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
+		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
+		0 /* Reserved, DS, ACPI */ | F(MMX) |
+		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
+		0 /* HTT, TM, Reserved, PBE */
+	);
+
+	kvm_cpu_cap_mask(CPUID_8000_0001_EDX,
+		F(FPU) | F(VME) | F(DE) | F(PSE) |
+		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
+		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
+		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
+		F(PAT) | F(PSE36) | 0 /* Reserved */ |
+		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
+		F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
+		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW)
+	);
+
+	kvm_cpu_cap_mask(CPUID_1_ECX,
+		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
+		 * but *not* advertised to guests via CPUID ! */
+		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
+		0 /* DS-CPL, VMX, SMX, EST */ |
+		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
+		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
+		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
+		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
+		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
+		F(F16C) | F(RDRAND)
+	);
+
+	kvm_cpu_cap_mask(CPUID_7_0_EBX,
+		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
+		F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
+		F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
+		F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
+		F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
+	);
+
+	kvm_cpu_cap_mask(CPUID_7_ECX,
+		F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
+		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
+		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/
+	);
+	/* Set LA57 based on hardware capability. */
+	if (cpuid_ecx(7) & F(LA57))
+		kvm_cpu_cap_set(X86_FEATURE_LA57);
+
+	kvm_cpu_cap_mask(CPUID_7_EDX,
+		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
+		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
+		F(MD_CLEAR)
+	);
+
+	kvm_cpu_cap_mask(CPUID_7_1_EAX,
+		F(AVX512_BF16)
+	);
+
+	kvm_cpu_cap_mask(CPUID_D_1_EAX,
+		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES)
+	);
+
+	kvm_cpu_cap_mask(CPUID_8000_0001_ECX,
+		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
+		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
+		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
+		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
+		F(TOPOEXT) | F(PERFCTR_CORE)
+	);
+
+	kvm_cpu_cap_mask(CPUID_8000_0008_EBX,
+		F(CLZERO) | F(XSAVEERPTR) |
+		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
+		F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON)
+	);
+
+	kvm_cpu_cap_mask(CPUID_C000_0001_EDX,
+		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
+		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
+		F(PMM) | F(PMM_EN)
+	);
 }
 
 struct kvm_cpuid_array {
@@ -339,48 +458,13 @@  static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
 
 static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry)
 {
-	unsigned f_la57;
-
-	/* cpuid 7.0.ebx */
-	const u32 kvm_cpuid_7_0_ebx_x86_features =
-		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
-		F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
-		F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
-		F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
-		F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/;
-
-	/* cpuid 7.0.ecx*/
-	const u32 kvm_cpuid_7_0_ecx_x86_features =
-		F(AVX512VBMI) | F(LA57) | 0 /*PKU*/ | 0 /*OSPKE*/ | F(RDPID) |
-		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
-		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
-
-	/* cpuid 7.0.edx*/
-	const u32 kvm_cpuid_7_0_edx_x86_features =
-		F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
-		F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
-		F(MD_CLEAR);
-
-	/* cpuid 7.1.eax */
-	const u32 kvm_cpuid_7_1_eax_x86_features =
-		F(AVX512_BF16);
-
 	switch (entry->index) {
 	case 0:
 		entry->eax = min(entry->eax, 1u);
-		entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
 		cpuid_entry_mask(entry, CPUID_7_0_EBX);
 		/* TSC_ADJUST is emulated */
 		cpuid_entry_set(entry, X86_FEATURE_TSC_ADJUST);
-
-		entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
-		f_la57 = cpuid_entry_get(entry, X86_FEATURE_LA57);
 		cpuid_entry_mask(entry, CPUID_7_ECX);
-		/* Set LA57 based on hardware capability. */
-		entry->ecx |= f_la57;
-
-		entry->edx &= kvm_cpuid_7_0_edx_x86_features;
 		cpuid_entry_mask(entry, CPUID_7_EDX);
 		if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
 			cpuid_entry_set(entry, X86_FEATURE_SPEC_CTRL);
@@ -395,7 +479,7 @@  static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry)
 		cpuid_entry_set(entry, X86_FEATURE_ARCH_CAPABILITIES);
 		break;
 	case 1:
-		entry->eax &= kvm_cpuid_7_1_eax_x86_features;
+		cpuid_entry_mask(entry, CPUID_7_1_EAX);
 		entry->ebx = 0;
 		entry->ecx = 0;
 		entry->edx = 0;
@@ -414,72 +498,8 @@  static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 {
 	struct kvm_cpuid_entry2 *entry;
 	int r, i, max_idx;
-	unsigned f_nx = is_efer_nx() ? F(NX) : 0;
-#ifdef CONFIG_X86_64
-	unsigned f_gbpages = F(GBPAGES);
-	unsigned f_lm = F(LM);
-#else
-	unsigned f_gbpages = 0;
-	unsigned f_lm = 0;
-#endif
 	unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
 
-	/* cpuid 1.edx */
-	const u32 kvm_cpuid_1_edx_x86_features =
-		F(FPU) | F(VME) | F(DE) | F(PSE) |
-		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
-		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
-		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
-		F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
-		0 /* Reserved, DS, ACPI */ | F(MMX) |
-		F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
-		0 /* HTT, TM, Reserved, PBE */;
-	/* cpuid 0x80000001.edx */
-	const u32 kvm_cpuid_8000_0001_edx_x86_features =
-		F(FPU) | F(VME) | F(DE) | F(PSE) |
-		F(TSC) | F(MSR) | F(PAE) | F(MCE) |
-		F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
-		F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
-		F(PAT) | F(PSE36) | 0 /* Reserved */ |
-		f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
-		F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) |
-		0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
-	/* cpuid 1.ecx */
-	const u32 kvm_cpuid_1_ecx_x86_features =
-		/* NOTE: MONITOR (and MWAIT) are emulated as NOP,
-		 * but *not* advertised to guests via CPUID ! */
-		F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
-		0 /* DS-CPL, VMX, SMX, EST */ |
-		0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
-		F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
-		F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
-		F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
-		0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
-		F(F16C) | F(RDRAND);
-	/* cpuid 0x80000001.ecx */
-	const u32 kvm_cpuid_8000_0001_ecx_x86_features =
-		F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
-		F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
-		F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
-		0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM) |
-		F(TOPOEXT) | F(PERFCTR_CORE);
-
-	/* cpuid 0x80000008.ebx */
-	const u32 kvm_cpuid_8000_0008_ebx_x86_features =
-		F(CLZERO) | F(XSAVEERPTR) |
-		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
-		F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
-
-	/* cpuid 0xC0000001.edx */
-	const u32 kvm_cpuid_C000_0001_edx_x86_features =
-		F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
-		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
-		F(PMM) | F(PMM_EN);
-
-	/* cpuid 0xD.1.eax */
-	const u32 kvm_cpuid_D_1_eax_x86_features =
-		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES);
-
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
 
@@ -495,9 +515,7 @@  static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		entry->eax = min(entry->eax, 0x1fU);
 		break;
 	case 1:
-		entry->edx &= kvm_cpuid_1_edx_x86_features;
 		cpuid_entry_mask(entry, CPUID_1_EDX);
-		entry->ecx &= kvm_cpuid_1_ecx_x86_features;
 		cpuid_entry_mask(entry, CPUID_1_ECX);
 		/* we support x2apic emulation even if host does not support
 		 * it since we emulate x2apic in software */
@@ -607,7 +625,6 @@  static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		if (!entry)
 			goto out;
 
-		entry->eax &= kvm_cpuid_D_1_eax_x86_features;
 		cpuid_entry_mask(entry, CPUID_D_1_EAX);
 
 		if (!kvm_x86_ops->xsaves_supported())
@@ -691,9 +708,7 @@  static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		entry->eax = min(entry->eax, 0x8000001f);
 		break;
 	case 0x80000001:
-		entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
 		cpuid_entry_mask(entry, CPUID_8000_0001_EDX);
-		entry->ecx &= kvm_cpuid_8000_0001_ecx_x86_features;
 		cpuid_entry_mask(entry, CPUID_8000_0001_ECX);
 		break;
 	case 0x80000007: /* Advanced power management */
@@ -712,7 +727,6 @@  static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 			g_phys_as = phys_as;
 		entry->eax = g_phys_as | (virt_as << 8);
 		entry->edx = 0;
-		entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
 		cpuid_entry_mask(entry, CPUID_8000_0008_EBX);
 		/*
 		 * AMD has separate bits for each SPEC_CTRL bit.
@@ -755,7 +769,6 @@  static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		entry->eax = min(entry->eax, 0xC0000004);
 		break;
 	case 0xC0000001:
-		entry->edx &= kvm_cpuid_C000_0001_edx_x86_features;
 		cpuid_entry_mask(entry, CPUID_C000_0001_EDX);
 		break;
 	case 3: /* Processor serial number */
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 41ff94a7d3e0..c64283582d96 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -6,6 +6,9 @@ 
 #include <asm/cpu.h>
 #include <asm/processor.h>
 
+extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
+void kvm_set_cpu_caps(void);
+
 int kvm_update_cpuid(struct kvm_vcpu *vcpu);
 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 					      u32 function, u32 index);
@@ -255,4 +258,20 @@  static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu)
 		  MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
 }
 
+static __always_inline void kvm_cpu_cap_clear(unsigned x86_feature)
+{
+	unsigned x86_leaf = x86_feature / 32;
+
+	reverse_cpuid_check(x86_leaf);
+	kvm_cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
+}
+
+static __always_inline void kvm_cpu_cap_set(unsigned x86_feature)
+{
+	unsigned x86_leaf = x86_feature / 32;
+
+	reverse_cpuid_check(x86_leaf);
+	kvm_cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
+}
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f90c56c0c64a..c5ed199d6cd9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9591,6 +9591,8 @@  int kvm_arch_hardware_setup(void)
 {
 	int r;
 
+	kvm_set_cpu_caps();
+
 	r = kvm_x86_ops->hardware_setup();
 	if (r != 0)
 		return r;