diff mbox series

[1/2] KVM: VMX: Sanitize VM-Entry/VM-Exit control pairs at kvm_intel load time

Message ID 20220525210447.2758436-2-seanjc@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: VMX: Sanitize VM-Entry/VM-Exit pairs during setup | expand

Commit Message

Sean Christopherson May 25, 2022, 9:04 p.m. UTC
Sanitize the VM-Entry/VM-Exit control pairs (load+load or load+clear)
during setup instead of checking both controls in a pair at runtime.  If
only one control is supported, KVM will report the associated feature as
not available, but will leave the supported control bit set in the VMCS
config, which could lead to corruption of host state.  E.g. if only the
VM-Entry control is supported and the feature is not dynamically toggled,
KVM will set the control in all VMCSes and load zeros without restoring
host state.

Note, while this is technically a bug fix, practically speaking no sane
CPU or VMM would support only one control.  KVM's behavior of checking
both controls is mostly pedantry.

Cc: Chenyi Qiang <chenyi.qiang@intel.com>
Cc: Lei Wang <lei4.wang@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/capabilities.h | 13 ++++--------
 arch/x86/kvm/vmx/vmx.c          | 35 +++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 9 deletions(-)

Comments

Yuan Yao May 25, 2022, 11:27 p.m. UTC | #1
On Wed, May 25, 2022 at 09:04:46PM +0000, Sean Christopherson wrote:
> Sanitize the VM-Entry/VM-Exit control pairs (load+load or load+clear)
> during setup instead of checking both controls in a pair at runtime.  If
> only one control is supported, KVM will report the associated feature as
> not available, but will leave the supported control bit set in the VMCS
> config, which could lead to corruption of host state.  E.g. if only the
> VM-Entry control is supported and the feature is not dynamically toggled,
> KVM will set the control in all VMCSes and load zeros without restoring
> host state.
>
> Note, while this is technically a bug fix, practically speaking no sane
> CPU or VMM would support only one control.  KVM's behavior of checking
> both controls is mostly pedantry.
>
> Cc: Chenyi Qiang <chenyi.qiang@intel.com>
> Cc: Lei Wang <lei4.wang@intel.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/vmx/capabilities.h | 13 ++++--------
>  arch/x86/kvm/vmx/vmx.c          | 35 +++++++++++++++++++++++++++++++++
>  2 files changed, 39 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
> index dc2cb8a16e76..464bf39e4835 100644
> --- a/arch/x86/kvm/vmx/capabilities.h
> +++ b/arch/x86/kvm/vmx/capabilities.h
> @@ -97,20 +97,17 @@ static inline bool cpu_has_vmx_posted_intr(void)
>
>  static inline bool cpu_has_load_ia32_efer(void)
>  {
> -	return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_EFER) &&
> -	       (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_EFER);
> +	return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_EFER;
>  }
>
>  static inline bool cpu_has_load_perf_global_ctrl(void)
>  {
> -	return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
> -	       (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
> +	return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
>  }
>
>  static inline bool cpu_has_vmx_mpx(void)
>  {
> -	return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
> -		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
> +	return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS;
>  }
>
>  static inline bool cpu_has_vmx_tpr_shadow(void)
> @@ -377,7 +374,6 @@ static inline bool cpu_has_vmx_intel_pt(void)
>  	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
>  	return (vmx_msr & MSR_IA32_VMX_MISC_INTEL_PT) &&
>  		(vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) &&
> -		(vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_IA32_RTIT_CTL) &&
>  		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL);
>  }
>
> @@ -406,8 +402,7 @@ static inline bool vmx_pebs_supported(void)
>
>  static inline bool cpu_has_vmx_arch_lbr(void)
>  {
> -	return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_IA32_LBR_CTL) &&
> -		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_LBR_CTL);
> +	return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_LBR_CTL;
>  }
>
>  static inline u64 vmx_get_perf_capabilities(void)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 6927f6e8ec31..2ea256de9aba 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2462,6 +2462,9 @@ static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
>  	return  ctl_opt & allowed;
>  }
>
> +#define VMCS_ENTRY_EXIT_PAIR(name, entry_action, exit_action) \
> +	{ VM_ENTRY_##entry_action##_##name, VM_EXIT_##exit_action##_##name }
> +
>  static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
>  				    struct vmx_capability *vmx_cap)
>  {
> @@ -2473,6 +2476,24 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
>  	u64 _cpu_based_3rd_exec_control = 0;
>  	u32 _vmexit_control = 0;
>  	u32 _vmentry_control = 0;
> +	int i;
> +
> +	/*
> +	 * LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
> +	 * SAVE_IA32_PAT and SAVE_IA32_EFER are absent because KVM always
> +	 * intercepts writes to PAT and EFER, i.e. never enables those controls.
> +	 */
> +	struct {
> +		u32 entry_control;
> +		u32 exit_control;
> +	} vmcs_entry_exit_pairs[] = {
> +		VMCS_ENTRY_EXIT_PAIR(IA32_PERF_GLOBAL_CTRL, LOAD, LOAD),
> +		VMCS_ENTRY_EXIT_PAIR(IA32_PAT, LOAD, LOAD),
> +		VMCS_ENTRY_EXIT_PAIR(IA32_EFER, LOAD, LOAD),
> +		VMCS_ENTRY_EXIT_PAIR(BNDCFGS, LOAD, CLEAR),
> +		VMCS_ENTRY_EXIT_PAIR(IA32_RTIT_CTL, LOAD, CLEAR),
> +		VMCS_ENTRY_EXIT_PAIR(IA32_LBR_CTL, LOAD, CLEAR),
> +	};
>
>  	memset(vmcs_conf, 0, sizeof(*vmcs_conf));
>  	min = CPU_BASED_HLT_EXITING |
> @@ -2614,6 +2635,20 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
>  				&_vmentry_control) < 0)
>  		return -EIO;
>
> +	for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
> +		u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
> +		u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;
> +
> +		if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
> +			continue;
> +
> +		pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
> +			     _vmentry_control & n_ctrl, _vmexit_control & x_ctrl);

How about "n_ctrl, x_ctrl);" ? In 0/1 or 1/0 case this
outputs all information of real inconsistent bits but not 0.

> +
> +		_vmentry_control &= ~n_ctrl;
> +		_vmexit_control &= ~x_ctrl;
> +	}
> +
>  	/*
>  	 * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
>  	 * can't be used due to an errata where VM Exit may incorrectly clear
> --
> 2.36.1.124.g0e6072fb45-goog
>
Sean Christopherson May 26, 2022, 12:42 a.m. UTC | #2
On Thu, May 26, 2022, Yuan Yao wrote:
> On Wed, May 25, 2022 at 09:04:46PM +0000, Sean Christopherson wrote:
> > @@ -2614,6 +2635,20 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
> >  				&_vmentry_control) < 0)
> >  		return -EIO;
> >
> > +	for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
> > +		u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
> > +		u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;
> > +
> > +		if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
> > +			continue;
> > +
> > +		pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
> > +			     _vmentry_control & n_ctrl, _vmexit_control & x_ctrl);
> 
> How about "n_ctrl, x_ctrl);" ? In 0/1 or 1/0 case this
> outputs all information of real inconsistent bits but not 0.

I thought about adding the stringified control name to the output (yay macros),
but opted for the simplest approach because this should be a very, very rare
event.  All the necessary info is there, it just takes a bit of leg work to get
from a single control bit to the related control name and finally to its pair.

I'm not totally against printing more info, but if we're going to bother doing so,
my vote is to print names instead of numbers.
Yuan Yao May 26, 2022, 1:04 a.m. UTC | #3
On Thu, May 26, 2022 at 12:42:31AM +0000, Sean Christopherson wrote:
> On Thu, May 26, 2022, Yuan Yao wrote:
> > On Wed, May 25, 2022 at 09:04:46PM +0000, Sean Christopherson wrote:
> > > @@ -2614,6 +2635,20 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
> > >  				&_vmentry_control) < 0)
> > >  		return -EIO;
> > >
> > > +	for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
> > > +		u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
> > > +		u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;
> > > +
> > > +		if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
> > > +			continue;
> > > +
> > > +		pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
> > > +			     _vmentry_control & n_ctrl, _vmexit_control & x_ctrl);
> >
> > How about "n_ctrl, x_ctrl);" ? In 0/1 or 1/0 case this
> > outputs all information of real inconsistent bits but not 0.
>
> I thought about adding the stringified control name to the output (yay macros),
> but opted for the simplest approach because this should be a very, very rare
> event.  All the necessary info is there, it just takes a bit of leg work to get
> from a single control bit to the related control name and finally to its pair.
>
> I'm not totally against printing more info, but if we're going to bother doing so,
> my vote is to print names instead of numbers.

Agree for simplest approach because this should be rare event, thanks.
Paolo Bonzini May 26, 2022, 10:39 a.m. UTC | #4
On 5/25/22 23:04, Sean Christopherson wrote:
>   
> +#define VMCS_ENTRY_EXIT_PAIR(name, entry_action, exit_action) \
> +	{ VM_ENTRY_##entry_action##_##name, VM_EXIT_##exit_action##_##name }
> +
>   static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
>   				    struct vmx_capability *vmx_cap)
>   {
> @@ -2473,6 +2476,24 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
>   	u64 _cpu_based_3rd_exec_control = 0;
>   	u32 _vmexit_control = 0;
>   	u32 _vmentry_control = 0;
> +	int i;
> +
> +	/*
> +	 * LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
> +	 * SAVE_IA32_PAT and SAVE_IA32_EFER are absent because KVM always
> +	 * intercepts writes to PAT and EFER, i.e. never enables those controls.
> +	 */
> +	struct {
> +		u32 entry_control;
> +		u32 exit_control;
> +	} vmcs_entry_exit_pairs[] = {
> +		VMCS_ENTRY_EXIT_PAIR(IA32_PERF_GLOBAL_CTRL, LOAD, LOAD),
> +		VMCS_ENTRY_EXIT_PAIR(IA32_PAT, LOAD, LOAD),
> +		VMCS_ENTRY_EXIT_PAIR(IA32_EFER, LOAD, LOAD),
> +		VMCS_ENTRY_EXIT_PAIR(BNDCFGS, LOAD, CLEAR),
> +		VMCS_ENTRY_EXIT_PAIR(IA32_RTIT_CTL, LOAD, CLEAR),
> +		VMCS_ENTRY_EXIT_PAIR(IA32_LBR_CTL, LOAD, CLEAR),

No macros please, it's just as clear to expand them especially since the 
#define is far from the struct definition.

Paolo
Sean Christopherson May 26, 2022, 9:35 p.m. UTC | #5
On Thu, May 26, 2022, Paolo Bonzini wrote:
> On 5/25/22 23:04, Sean Christopherson wrote:
> > +#define VMCS_ENTRY_EXIT_PAIR(name, entry_action, exit_action) \
> > +	{ VM_ENTRY_##entry_action##_##name, VM_EXIT_##exit_action##_##name }
> > +
> >   static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
> >   				    struct vmx_capability *vmx_cap)
> >   {
> > @@ -2473,6 +2476,24 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
> >   	u64 _cpu_based_3rd_exec_control = 0;
> >   	u32 _vmexit_control = 0;
> >   	u32 _vmentry_control = 0;
> > +	int i;
> > +
> > +	/*
> > +	 * LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
> > +	 * SAVE_IA32_PAT and SAVE_IA32_EFER are absent because KVM always
> > +	 * intercepts writes to PAT and EFER, i.e. never enables those controls.
> > +	 */
> > +	struct {
> > +		u32 entry_control;
> > +		u32 exit_control;
> > +	} vmcs_entry_exit_pairs[] = {
> > +		VMCS_ENTRY_EXIT_PAIR(IA32_PERF_GLOBAL_CTRL, LOAD, LOAD),
> > +		VMCS_ENTRY_EXIT_PAIR(IA32_PAT, LOAD, LOAD),
> > +		VMCS_ENTRY_EXIT_PAIR(IA32_EFER, LOAD, LOAD),
> > +		VMCS_ENTRY_EXIT_PAIR(BNDCFGS, LOAD, CLEAR),
> > +		VMCS_ENTRY_EXIT_PAIR(IA32_RTIT_CTL, LOAD, CLEAR),
> > +		VMCS_ENTRY_EXIT_PAIR(IA32_LBR_CTL, LOAD, CLEAR),
> 
> No macros please, it's just as clear to expand them especially since the
> #define is far from the struct definition.

It's not for clarity, it's to prevent plopping an EXIT control into the ENTRY
slot and vice versa.  I have a hell of a time trying to visually differentiate
those, and a buggy pair isn't guaranteed to be detected at runtime, e.g. if both
are swapped, all bets are off, and if one is duplicated, odds the warn may or may
not show up unless hardware actually supports at least one of the controls, if not
both.

With this, swapping LOAD and LOAD is obviously a nop, and swapping LOAD and CLEAR
will generate a compiler error.

FWIW, I did originally have the array declared as static __initdata immediately
after the #define.  I moved away from that because __initdata doesn't play nice
with const, but then of course I forgot to add back the "const".  /facepalm
Paolo Bonzini May 27, 2022, 9:44 a.m. UTC | #6
On 5/26/22 23:35, Sean Christopherson wrote:
> It's not for clarity, it's to prevent plopping an EXIT control into the ENTRY
> slot and vice versa.  I have a hell of a time trying to visually differentiate
> those, and a buggy pair isn't guaranteed to be detected at runtime, e.g. if both
> are swapped, all bets are off, and if one is duplicated, odds the warn may or may
> not show up unless hardware actually supports at least one of the controls, if not
> both.

Make the lines longer than 80 characters and align each element so that 
you have a line of VM_ENTRY_ and VM_EXIT_.  (It would not work if they 
were the same length, but they aren't).

Paolo
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index dc2cb8a16e76..464bf39e4835 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -97,20 +97,17 @@  static inline bool cpu_has_vmx_posted_intr(void)
 
 static inline bool cpu_has_load_ia32_efer(void)
 {
-	return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_EFER) &&
-	       (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_EFER);
+	return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_EFER;
 }
 
 static inline bool cpu_has_load_perf_global_ctrl(void)
 {
-	return (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
-	       (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
+	return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
 }
 
 static inline bool cpu_has_vmx_mpx(void)
 {
-	return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) &&
-		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
+	return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS;
 }
 
 static inline bool cpu_has_vmx_tpr_shadow(void)
@@ -377,7 +374,6 @@  static inline bool cpu_has_vmx_intel_pt(void)
 	rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
 	return (vmx_msr & MSR_IA32_VMX_MISC_INTEL_PT) &&
 		(vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PT_USE_GPA) &&
-		(vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_IA32_RTIT_CTL) &&
 		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_RTIT_CTL);
 }
 
@@ -406,8 +402,7 @@  static inline bool vmx_pebs_supported(void)
 
 static inline bool cpu_has_vmx_arch_lbr(void)
 {
-	return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_IA32_LBR_CTL) &&
-		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_LBR_CTL);
+	return vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_LBR_CTL;
 }
 
 static inline u64 vmx_get_perf_capabilities(void)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6927f6e8ec31..2ea256de9aba 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2462,6 +2462,9 @@  static __init u64 adjust_vmx_controls64(u64 ctl_opt, u32 msr)
 	return  ctl_opt & allowed;
 }
 
+#define VMCS_ENTRY_EXIT_PAIR(name, entry_action, exit_action) \
+	{ VM_ENTRY_##entry_action##_##name, VM_EXIT_##exit_action##_##name }
+
 static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 				    struct vmx_capability *vmx_cap)
 {
@@ -2473,6 +2476,24 @@  static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 	u64 _cpu_based_3rd_exec_control = 0;
 	u32 _vmexit_control = 0;
 	u32 _vmentry_control = 0;
+	int i;
+
+	/*
+	 * LOAD/SAVE_DEBUG_CONTROLS are absent because both are mandatory.
+	 * SAVE_IA32_PAT and SAVE_IA32_EFER are absent because KVM always
+	 * intercepts writes to PAT and EFER, i.e. never enables those controls.
+	 */
+	struct {
+		u32 entry_control;
+		u32 exit_control;
+	} vmcs_entry_exit_pairs[] = {
+		VMCS_ENTRY_EXIT_PAIR(IA32_PERF_GLOBAL_CTRL, LOAD, LOAD),
+		VMCS_ENTRY_EXIT_PAIR(IA32_PAT, LOAD, LOAD),
+		VMCS_ENTRY_EXIT_PAIR(IA32_EFER, LOAD, LOAD),
+		VMCS_ENTRY_EXIT_PAIR(BNDCFGS, LOAD, CLEAR),
+		VMCS_ENTRY_EXIT_PAIR(IA32_RTIT_CTL, LOAD, CLEAR),
+		VMCS_ENTRY_EXIT_PAIR(IA32_LBR_CTL, LOAD, CLEAR),
+	};
 
 	memset(vmcs_conf, 0, sizeof(*vmcs_conf));
 	min = CPU_BASED_HLT_EXITING |
@@ -2614,6 +2635,20 @@  static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
 				&_vmentry_control) < 0)
 		return -EIO;
 
+	for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
+		u32 n_ctrl = vmcs_entry_exit_pairs[i].entry_control;
+		u32 x_ctrl = vmcs_entry_exit_pairs[i].exit_control;
+
+		if (!(_vmentry_control & n_ctrl) == !(_vmexit_control & x_ctrl))
+			continue;
+
+		pr_warn_once("Inconsistent VM-Entry/VM-Exit pair, entry = %x, exit = %x\n",
+			     _vmentry_control & n_ctrl, _vmexit_control & x_ctrl);
+
+		_vmentry_control &= ~n_ctrl;
+		_vmexit_control &= ~x_ctrl;
+	}
+
 	/*
 	 * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
 	 * can't be used due to an errata where VM Exit may incorrectly clear