diff mbox series

[v4,10/19] x86/cpu: Detect VMX features on Intel, Centaur and Zhaoxin CPUs

Message ID 20191128014016.4389-11-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series x86/cpu: Clean up handling of VMX features | expand

Commit Message

Sean Christopherson Nov. 28, 2019, 1:40 a.m. UTC
Add an entry in struct cpuinfo_x86 to track VMX capabilities and fill
the capabilities during IA32_FEAT_CTL MSR initialization.

Make the VMX capabilities dependent on IA32_FEAT_CTL and
X86_FEATURE_NAMES so as to avoid unnecessary overhead on CPUs that can't
possibly support VMX, or when /proc/cpuinfo is not available.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/Kconfig.cpu               |  4 ++
 arch/x86/include/asm/processor.h   |  3 ++
 arch/x86/include/asm/vmxfeatures.h |  5 +++
 arch/x86/kernel/cpu/common.c       |  3 ++
 arch/x86/kernel/cpu/feat_ctl.c     | 70 ++++++++++++++++++++++++++++++
 5 files changed, 85 insertions(+)

Comments

Borislav Petkov Dec. 12, 2019, 11:38 a.m. UTC | #1
On Wed, Nov 27, 2019 at 05:40:07PM -0800, Sean Christopherson wrote:
> diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
> index a46c9e46f937..93268bde662a 100644
> --- a/arch/x86/kernel/cpu/feat_ctl.c
> +++ b/arch/x86/kernel/cpu/feat_ctl.c
> @@ -4,6 +4,72 @@
>  #include <asm/cpufeature.h>
>  #include <asm/msr-index.h>
>  #include <asm/processor.h>
> +#include <asm/vmx.h>
> +
> +#ifdef CONFIG_X86_VMX_FEATURE_NAMES
> +enum vmx_feature_leafs {
> +	MISC_FEATURES = 0,
> +	PRIMARY_PROC_CTLS,
> +	SECONDARY_PROC_CTLS,
> +	NR_VMX_FEATURE_WORDS,
> +};
> +
> +#define F(x) BIT(VMX_FEATURE_##x & 0x1f)

Eww, this F-thing has been always bugging me, especially if it means
something a little different each time:

arch/x86/crypto/blowfish-x86_64-asm_64.S:59:#define F() \
arch/x86/kernel/cpu/feat_ctl.c:17:#define F(x) BIT(VMX_FEATURE_##x & 0x1f)
arch/x86/kvm/cpuid.c:65:#define F(x) bit(X86_FEATURE_##x)
arch/x86/kvm/emulate.c:4393:#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
arch/x86/kvm/svm.c:5927:#define F(x) bit(X86_FEATURE_##x)

I guess you can call yours VMX_F() or so, just so that it's name is
something different.

> +static void init_vmx_capabilities(struct cpuinfo_x86 *c)
> +{
> +	u32 supported, funcs, ept, vpid, ign;
> +
> +	BUILD_BUG_ON(NVMXINTS != NR_VMX_FEATURE_WORDS);
> +
> +	/*
> +	 * The high bits contain the allowed-1 settings, i.e. features that can
> +	 * be turned on.  The low bits contain the allowed-0 settings, i.e.
> +	 * features that can be turned off.  Ignore the allowed-0 settings,
> +	 * if a feature can be turned on then it's supported.
> +	 */
> +	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, ign, supported);
> +	c->vmx_capability[PRIMARY_PROC_CTLS] = supported;
> +
> +	rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported);
> +	c->vmx_capability[SECONDARY_PROC_CTLS] = supported;
> +
> +	rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported);
> +	rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs);
> +
> +	/*
> +	 * Except for EPT+VPID, which enumerates support for both in a single
> +	 * MSR, low for EPT, high for VPID.
> +	 */
> +	rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, &ept, &vpid);

Right, so this is a garden variety of rdmsr() and rdmsr_safe() and
the safe variant's retval needs to be checked, strictly speaking. It
probably doesn't matter here since you'll get 0s if it fails, which
means feature not supported, so all good.

But I guess you can still use rdmsr_safe() everywhere just so it doesn't
cause head scratching in the future, when one looks at that code.

> +#endif /* CONFIG_X86_VMX_FEATURE_NAMES */
>  
>  #undef pr_fmt
>  #define pr_fmt(fmt)	"x86/cpu: " fmt
> @@ -50,5 +116,9 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
>  		pr_err_once("VMX (%s TXT) disabled by BIOS\n",
>  			    tboot ? "inside" : "outside");
>  		clear_cpu_cap(c, X86_FEATURE_VMX);
> +	} else {
> +#ifdef CONFIG_X86_VMX_FEATURE_NAMES
> +		init_vmx_capabilities(c);
> +#endif

Can't say that I'm happy about all that ifdeffery but I guess we need
to perpetuate this since X86_FEATURE_NAMES is there for embedded. In
practice, probably no one disables it...
Sean Christopherson Dec. 12, 2019, 5:55 p.m. UTC | #2
On Thu, Dec 12, 2019 at 12:38:38PM +0100, Borislav Petkov wrote:
> On Wed, Nov 27, 2019 at 05:40:07PM -0800, Sean Christopherson wrote:
> > +static void init_vmx_capabilities(struct cpuinfo_x86 *c)
> > +{
> > +	u32 supported, funcs, ept, vpid, ign;
> > +
> > +	BUILD_BUG_ON(NVMXINTS != NR_VMX_FEATURE_WORDS);
> > +
> > +	/*
> > +	 * The high bits contain the allowed-1 settings, i.e. features that can
> > +	 * be turned on.  The low bits contain the allowed-0 settings, i.e.
> > +	 * features that can be turned off.  Ignore the allowed-0 settings,
> > +	 * if a feature can be turned on then it's supported.
> > +	 */
> > +	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, ign, supported);
> > +	c->vmx_capability[PRIMARY_PROC_CTLS] = supported;
> > +
> > +	rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported);
> > +	c->vmx_capability[SECONDARY_PROC_CTLS] = supported;
> > +
> > +	rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported);
> > +	rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs);
> > +
> > +	/*
> > +	 * Except for EPT+VPID, which enumerates support for both in a single
> > +	 * MSR, low for EPT, high for VPID.
> > +	 */
> > +	rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, &ept, &vpid);
> 
> Right, so this is a garden variety of rdmsr() and rdmsr_safe() and
> the safe variant's retval needs to be checked, strictly speaking. It
> probably doesn't matter here since you'll get 0s if it fails, which
> means feature not supported, so all good.
> 
> But I guess you can still use rdmsr_safe() everywhere just so it doesn't
> cause head scratching in the future, when one looks at that code.

The reasoning behind using vanilla rdmsr() on PROC and PIN controls is that
those MSRs should exist on any CPU that supports VMX, i.e. we want the WARN.

The alternative would be to use rdmsr_safe() for everything and then
explicitly disable VMX if a fault on PROC or PIN occurs, but that circles
us back to the handling a fault on rdmsr(MSR_IA32_FEAT_CTL), i.e. is it
really worth gracefully handling a fault that should never occur?

> 
> > +#endif /* CONFIG_X86_VMX_FEATURE_NAMES */
> >  
> >  #undef pr_fmt
> >  #define pr_fmt(fmt)	"x86/cpu: " fmt
> > @@ -50,5 +116,9 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
> >  		pr_err_once("VMX (%s TXT) disabled by BIOS\n",
> >  			    tboot ? "inside" : "outside");
> >  		clear_cpu_cap(c, X86_FEATURE_VMX);
> > +	} else {
> > +#ifdef CONFIG_X86_VMX_FEATURE_NAMES
> > +		init_vmx_capabilities(c);
> > +#endif
> 
> Can't say that I'm happy about all that ifdeffery but I guess we need
> to perpetuate this since X86_FEATURE_NAMES is there for embedded. In
> practice, probably no one disables it...

Ya, systemd wasn't happy when I tried booting without X86_FEATURE_NAMES.
Borislav Petkov Dec. 12, 2019, 6:24 p.m. UTC | #3
On Thu, Dec 12, 2019 at 09:55:11AM -0800, Sean Christopherson wrote:
> The reasoning behind using vanilla rdmsr() on PROC and PIN controls is that
> those MSRs should exist on any CPU that supports VMX, i.e. we want the WARN.
> 
> The alternative would be to use rdmsr_safe() for everything and then
> explicitly disable VMX if a fault on PROC or PIN occurs, but that circles
> us back to the handling a fault on rdmsr(MSR_IA32_FEAT_CTL), i.e. is it
> really worth gracefully handling a fault that should never occur?

No but pls put a comment above it explaining why we're doing rdmsr()
only with those two MSRs.

Thx.
diff mbox series

Patch

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 526425fcaedc..bc3a497c029c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -391,6 +391,10 @@  config IA32_FEAT_CTL
 	def_bool y
 	depends on CPU_SUP_INTEL || CPU_SUP_CENTAUR || CPU_SUP_ZHAOXIN
 
+config X86_VMX_FEATURE_NAMES
+	def_bool y
+	depends on IA32_FEAT_CTL && X86_FEATURE_NAMES
+
 menuconfig PROCESSOR_SELECT
 	bool "Supported processor vendors" if EXPERT
 	---help---
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index bab513404606..3324cf036dfb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -85,6 +85,9 @@  struct cpuinfo_x86 {
 #ifdef CONFIG_X86_64
 	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
 	int			x86_tlbsize;
+#endif
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+	__u32			vmx_capability[NVMXINTS];
 #endif
 	__u8			x86_virt_bits;
 	__u8			x86_phys_bits;
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index 1b96b03c1147..ab42d94e2359 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -2,6 +2,11 @@ 
 #ifndef _ASM_X86_VMXFEATURES_H
 #define _ASM_X86_VMXFEATURES_H
 
+/*
+ * Defines VMX CPU feature bits
+ */
+#define NVMXINTS			3 /* N 32-bit words worth of info */
+
 /*
  * Note: If the comment begins with a quoted string, that string is used
  * in /proc/cpuinfo instead of the macro name.  If the string is "",
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9d6a35a4586e..df1eacd26443 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1449,6 +1449,9 @@  static void identify_cpu(struct cpuinfo_x86 *c)
 #endif
 	c->x86_cache_alignment = c->x86_clflush_size;
 	memset(&c->x86_capability, 0, sizeof(c->x86_capability));
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+	memset(&c->vmx_capability, 0, sizeof(c->vmx_capability));
+#endif
 
 	generic_identify(c);
 
diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c
index a46c9e46f937..93268bde662a 100644
--- a/arch/x86/kernel/cpu/feat_ctl.c
+++ b/arch/x86/kernel/cpu/feat_ctl.c
@@ -4,6 +4,72 @@ 
 #include <asm/cpufeature.h>
 #include <asm/msr-index.h>
 #include <asm/processor.h>
+#include <asm/vmx.h>
+
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+enum vmx_feature_leafs {
+	MISC_FEATURES = 0,
+	PRIMARY_PROC_CTLS,
+	SECONDARY_PROC_CTLS,
+	NR_VMX_FEATURE_WORDS,
+};
+
+#define F(x) BIT(VMX_FEATURE_##x & 0x1f)
+
+static void init_vmx_capabilities(struct cpuinfo_x86 *c)
+{
+	u32 supported, funcs, ept, vpid, ign;
+
+	BUILD_BUG_ON(NVMXINTS != NR_VMX_FEATURE_WORDS);
+
+	/*
+	 * The high bits contain the allowed-1 settings, i.e. features that can
+	 * be turned on.  The low bits contain the allowed-0 settings, i.e.
+	 * features that can be turned off.  Ignore the allowed-0 settings,
+	 * if a feature can be turned on then it's supported.
+	 */
+	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, ign, supported);
+	c->vmx_capability[PRIMARY_PROC_CTLS] = supported;
+
+	rdmsr_safe(MSR_IA32_VMX_PROCBASED_CTLS2, &ign, &supported);
+	c->vmx_capability[SECONDARY_PROC_CTLS] = supported;
+
+	rdmsr(MSR_IA32_VMX_PINBASED_CTLS, ign, supported);
+	rdmsr_safe(MSR_IA32_VMX_VMFUNC, &ign, &funcs);
+
+	/*
+	 * Except for EPT+VPID, which enumerates support for both in a single
+	 * MSR, low for EPT, high for VPID.
+	 */
+	rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP, &ept, &vpid);
+
+	/* Pin, EPT, VPID and VM-Func are merged into a single word. */
+	WARN_ON_ONCE(supported >> 16);
+	WARN_ON_ONCE(funcs >> 4);
+	c->vmx_capability[MISC_FEATURES] = (supported & 0xffff) |
+					   ((vpid & 0x1) << 16) |
+					   ((funcs & 0xf) << 28);
+
+	/* EPT bits are full on scattered and must be manually handled. */
+	if (ept & VMX_EPT_EXECUTE_ONLY_BIT)
+		c->vmx_capability[MISC_FEATURES] |= F(EPT_EXECUTE_ONLY);
+	if (ept & VMX_EPT_AD_BIT)
+		c->vmx_capability[MISC_FEATURES] |= F(EPT_AD);
+	if (ept & VMX_EPT_1GB_PAGE_BIT)
+		c->vmx_capability[MISC_FEATURES] |= F(EPT_1GB);
+
+	/* Synthetic APIC features that are aggregates of multiple features. */
+	if ((c->vmx_capability[PRIMARY_PROC_CTLS] & F(VIRTUAL_TPR)) &&
+	    (c->vmx_capability[SECONDARY_PROC_CTLS] & F(VIRT_APIC_ACCESSES)))
+		c->vmx_capability[MISC_FEATURES] |= F(FLEXPRIORITY);
+
+	if ((c->vmx_capability[PRIMARY_PROC_CTLS] & F(VIRTUAL_TPR)) &&
+	    (c->vmx_capability[SECONDARY_PROC_CTLS] & F(APIC_REGISTER_VIRT)) &&
+	    (c->vmx_capability[SECONDARY_PROC_CTLS] & F(VIRT_INTR_DELIVERY)) &&
+	    (c->vmx_capability[MISC_FEATURES] & F(POSTED_INTR)))
+		c->vmx_capability[MISC_FEATURES] |= F(APICV);
+}
+#endif /* CONFIG_X86_VMX_FEATURE_NAMES */
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"x86/cpu: " fmt
@@ -50,5 +116,9 @@  void init_ia32_feat_ctl(struct cpuinfo_x86 *c)
 		pr_err_once("VMX (%s TXT) disabled by BIOS\n",
 			    tboot ? "inside" : "outside");
 		clear_cpu_cap(c, X86_FEATURE_VMX);
+	} else {
+#ifdef CONFIG_X86_VMX_FEATURE_NAMES
+		init_vmx_capabilities(c);
+#endif
 	}
 }