diff mbox series

[v4,09/25] KVM: VMX: nVMX: Support TSC scaling and PERF_GLOBAL_CTRL with enlightened VMCS

Message ID 20220714091327.1085353-10-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: VMX: Support updated eVMCSv1 revision + use vmcs_config for L1 VMX MSRs | expand

Commit Message

Vitaly Kuznetsov July 14, 2022, 9:13 a.m. UTC
Enlightened VMCS v1 got updated and now includes the required fields
for TSC scaling and loading PERF_GLOBAL_CTRL upon VMENTER/VMEXIT
features. For Hyper-V on KVM enablement, KVM can just observe VMX control
MSRs and use the features (with or without eVMCS) when
possible. Hyper-V on KVM case is trickier because of live migration:
the new features require explicit enablement from VMM to not break
it. Luckily, the updated eVMCS revision comes with a feature bit in
CPUID.0x4000000A.EBX.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/hyperv.c     |  2 +-
 arch/x86/kvm/vmx/evmcs.c  | 88 +++++++++++++++++++++++++++++++--------
 arch/x86/kvm/vmx/evmcs.h  | 17 ++------
 arch/x86/kvm/vmx/nested.c |  2 +-
 arch/x86/kvm/vmx/vmx.c    |  2 +-
 5 files changed, 78 insertions(+), 33 deletions(-)

Comments

Sean Christopherson July 21, 2022, 9:58 p.m. UTC | #1
Nit of all nits, just "KVM: nVMX:" in the shortlog.

On Thu, Jul 14, 2022, Vitaly Kuznetsov wrote:
> Enlightened VMCS v1 got updated and now includes the required fields
> for TSC scaling and loading PERF_GLOBAL_CTRL upon VMENTER/VMEXIT
> features. For Hyper-V on KVM enablement, KVM can just observe VMX control
> MSRs and use the features (with or without eVMCS) when
> possible. Hyper-V on KVM case is trickier because of live migration:
> the new features require explicit enablement from VMM to not break
> it. Luckily, the updated eVMCS revision comes with a feature bit in
> CPUID.0x4000000A.EBX.

Mostly out of curiosity, what happens if the VMM parrots back the results of
kvm_get_hv_cpuid()?

> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/kvm/hyperv.c     |  2 +-
>  arch/x86/kvm/vmx/evmcs.c  | 88 +++++++++++++++++++++++++++++++--------
>  arch/x86/kvm/vmx/evmcs.h  | 17 ++------
>  arch/x86/kvm/vmx/nested.c |  2 +-
>  arch/x86/kvm/vmx/vmx.c    |  2 +-
>  5 files changed, 78 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index a8e4944ca110..995d3ab1443e 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -2552,7 +2552,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
>  		case HYPERV_CPUID_NESTED_FEATURES:
>  			ent->eax = evmcs_ver;
>  			ent->eax |= HV_X64_NESTED_MSR_BITMAP;
> -
> +			ent->ebx |= HV_X64_NESTED_EVMCS1_2022_UPDATE;
>  			break;
>  
>  		case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
> diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
> index 8bea5dea0341..52a53debd806 100644
> --- a/arch/x86/kvm/vmx/evmcs.c
> +++ b/arch/x86/kvm/vmx/evmcs.c
> @@ -368,7 +368,60 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> -void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
> +enum evmcs_v1_revision {
> +	EVMCSv1_2016,
> +	EVMCSv1_2022,
> +};

Why bother with the enums?  They don't make life any easier, e.g. if there's a
2023 update then it's easy to do:

		unsupported = <baseline>;
		if (!has_evmcs_2022_features)
			unsupported |= <2022 features>;
		if (!has_evmcs_2023_features)
			unsupported |= <2023 features>;
		return unsupported;

diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index b5cfbf7d487b..7b348a03e096 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -355,11 +355,6 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
        return 0;
 }

-enum evmcs_v1_revision {
-       EVMCSv1_2016,
-       EVMCSv1_2022,
-};
-
 enum evmcs_unsupported_ctrl_type {
        EVMCS_EXIT_CTLS,
        EVMCS_ENTRY_CTLS,
@@ -372,29 +367,29 @@ static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
                                      enum evmcs_unsupported_ctrl_type ctrl_type)
 {
        struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
-       enum evmcs_v1_revision evmcs_rev = EVMCSv1_2016;
+       bool has_evmcs_2022_features;

        if (!hv_vcpu)
                return 0;

-       if (hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_2022_UPDATE)
-               evmcs_rev = EVMCSv1_2022;
+       has_evmcs_2022_features = hv_vcpu->cpuid_cache.nested_ebx &
+                                 HV_X64_NESTED_EVMCS1_2022_UPDATE;

        switch (ctrl_type) {
        case EVMCS_EXIT_CTLS:
-               if (evmcs_rev == EVMCSv1_2016)
+               if (!has_evmcs_2022_features)
                        return EVMCS1_UNSUPPORTED_VMEXIT_CTRL |
                                VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
                else
                        return EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
        case EVMCS_ENTRY_CTLS:
-               if (evmcs_rev == EVMCSv1_2016)
+               if (!has_evmcs_2022_features)
                        return EVMCS1_UNSUPPORTED_VMENTRY_CTRL |
                                VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
                else
                        return EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
        case EVMCS_2NDEXEC:
-               if (evmcs_rev == EVMCSv1_2016)
+               if (!has_evmcs_2022_features)
                        return EVMCS1_UNSUPPORTED_2NDEXEC |
                                SECONDARY_EXEC_TSC_SCALING;
                else

> +
> +enum evmcs_unsupported_ctrl_type {
> +	EVMCS_EXIT_CTLS,
> +	EVMCS_ENTRY_CTLS,
> +	EVMCS_2NDEXEC,
> +	EVMCS_PINCTRL,
> +	EVMCS_VMFUNC,
> +};

Same question here, it just makes life more difficult.  I.e. do

  static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu, int msr_index)

and then

  void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
  {
	u32 ctl_low = (u32)*pdata;
	u32 ctl_high = (u32)(*pdata >> 32);

	/*
	 * Hyper-V 2016 and 2019 try using unsupported features when eVMCS is
	 * enabled but there are no corresponding fields.
	 */
	ctl_high &= ~evmcs_get_unsupported_ctls(vcpu, msr_index);

	*pdata = ctl_low | ((u64)ctl_high << 32);
  }


  int nested_evmcs_check_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
  {
	int ret = 0;
	u32 unsupp_ctl;

	unsupp_ctl = vmcs12->pin_based_vm_exec_control &
		evmcs_get_unsupported_ctls(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);

	<and so on and so forth>
  }
Paolo Bonzini July 25, 2022, 5:09 p.m. UTC | #2
On 7/21/22 23:58, Sean Christopherson wrote:
> 
> diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
> index b5cfbf7d487b..7b348a03e096 100644
> --- a/arch/x86/kvm/vmx/evmcs.c
> +++ b/arch/x86/kvm/vmx/evmcs.c
> @@ -355,11 +355,6 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
>          return 0;
>   }
> 
> -enum evmcs_v1_revision {
> -       EVMCSv1_2016,
> -       EVMCSv1_2022,
> -};
> -
>   enum evmcs_unsupported_ctrl_type {
>          EVMCS_EXIT_CTLS,
>          EVMCS_ENTRY_CTLS,
> @@ -372,29 +367,29 @@ static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
>                                        enum evmcs_unsupported_ctrl_type ctrl_type)
>   {
>          struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> -       enum evmcs_v1_revision evmcs_rev = EVMCSv1_2016;
> +       bool has_evmcs_2022_features;
> 
>          if (!hv_vcpu)
>                  return 0;
> 
> -       if (hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_2022_UPDATE)
> -               evmcs_rev = EVMCSv1_2022;
> +       has_evmcs_2022_features = hv_vcpu->cpuid_cache.nested_ebx &
> +                                 HV_X64_NESTED_EVMCS1_2022_UPDATE;
> 
>          switch (ctrl_type) {
>          case EVMCS_EXIT_CTLS:
> -               if (evmcs_rev == EVMCSv1_2016)
> +               if (!has_evmcs_2022_features)
>                          return EVMCS1_UNSUPPORTED_VMEXIT_CTRL |
>                                  VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
>                  else
>                          return EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
>          case EVMCS_ENTRY_CTLS:
> -               if (evmcs_rev == EVMCSv1_2016)
> +               if (!has_evmcs_2022_features)
>                          return EVMCS1_UNSUPPORTED_VMENTRY_CTRL |
>                                  VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
>                  else
>                          return EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
>          case EVMCS_2NDEXEC:
> -               if (evmcs_rev == EVMCSv1_2016)
> +               if (!has_evmcs_2022_features)
>                          return EVMCS1_UNSUPPORTED_2NDEXEC |
>                                  SECONDARY_EXEC_TSC_SCALING;
>                  else
> 

I kind of like the idea of having a two-dimensional array based on the 
enums instead of switch statements, so for now I'll keep Vitaly's enums.

Paolo
Sean Christopherson July 25, 2022, 6:18 p.m. UTC | #3
On Mon, Jul 25, 2022, Paolo Bonzini wrote:
> On 7/21/22 23:58, Sean Christopherson wrote:
> > 
> > diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
> > index b5cfbf7d487b..7b348a03e096 100644
> > --- a/arch/x86/kvm/vmx/evmcs.c
> > +++ b/arch/x86/kvm/vmx/evmcs.c
> > @@ -355,11 +355,6 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
> >          return 0;
> >   }
> > 
> > -enum evmcs_v1_revision {
> > -       EVMCSv1_2016,
> > -       EVMCSv1_2022,
> > -};
> > -
> >   enum evmcs_unsupported_ctrl_type {
> >          EVMCS_EXIT_CTLS,
> >          EVMCS_ENTRY_CTLS,
> > @@ -372,29 +367,29 @@ static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
> >                                        enum evmcs_unsupported_ctrl_type ctrl_type)
> >   {
> >          struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> > -       enum evmcs_v1_revision evmcs_rev = EVMCSv1_2016;
> > +       bool has_evmcs_2022_features;
> > 
> >          if (!hv_vcpu)
> >                  return 0;
> > 
> > -       if (hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_2022_UPDATE)
> > -               evmcs_rev = EVMCSv1_2022;
> > +       has_evmcs_2022_features = hv_vcpu->cpuid_cache.nested_ebx &
> > +                                 HV_X64_NESTED_EVMCS1_2022_UPDATE;
> > 
> >          switch (ctrl_type) {
> >          case EVMCS_EXIT_CTLS:
> > -               if (evmcs_rev == EVMCSv1_2016)
> > +               if (!has_evmcs_2022_features)
> >                          return EVMCS1_UNSUPPORTED_VMEXIT_CTRL |
> >                                  VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
> >                  else
> >                          return EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
> >          case EVMCS_ENTRY_CTLS:
> > -               if (evmcs_rev == EVMCSv1_2016)
> > +               if (!has_evmcs_2022_features)
> >                          return EVMCS1_UNSUPPORTED_VMENTRY_CTRL |
> >                                  VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
> >                  else
> >                          return EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
> >          case EVMCS_2NDEXEC:
> > -               if (evmcs_rev == EVMCSv1_2016)
> > +               if (!has_evmcs_2022_features)
> >                          return EVMCS1_UNSUPPORTED_2NDEXEC |
> >                                  SECONDARY_EXEC_TSC_SCALING;
> >                  else
> > 
> 
> I kind of like the idea of having a two-dimensional array based on the enums
> instead of switch statements, so for now I'll keep Vitaly's enums.

I don't have a strong opinion on using a 2d array, but unless I'm missing something,
that's nowhere to be found in this patch.  IMO, having the enums without them
providing any unique value is silly and obfuscates the code.
Paolo Bonzini July 28, 2022, 9:52 p.m. UTC | #4
On 7/25/22 20:18, Sean Christopherson wrote:
>> I kind of like the idea of having a two-dimensional array based on the enums
>> instead of switch statements, so for now I'll keep Vitaly's enums.
> I don't have a strong opinion on using a 2d array, but unless I'm missing something,
> that's nowhere to be found in this patch.  IMO, having the enums without them
> providing any unique value is silly and obfuscates the code.

Yeah, like this:

diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index d8da4026c93d..8055128d8638 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -342,9 +342,10 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
  	return 0;
  }
  
-enum evmcs_v1_revision {
+enum evmcs_revision {
  	EVMCSv1_2016,
  	EVMCSv1_2022,
+	EVMCS_REVISION_MAX,
  };
  
  enum evmcs_unsupported_ctrl_type {
@@ -353,13 +354,37 @@ enum evmcs_unsupported_ctrl_type {
  	EVMCS_2NDEXEC,
  	EVMCS_PINCTRL,
  	EVMCS_VMFUNC,
+	EVMCS_CTRL_MAX,
+};
+
+static u32 evmcs_unsupported_ctls[EVMCS_CTRL_MAX][EVMCS_REVISION_MAX] = {
+	[EVMCS_EXIT_CTLS] = {
+		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
+		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL,
+	},
+	[EVMCS_ENTRY_CTLS] = {
+		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMENTRY_CTRL | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
+		[EVMCSv1_2022] =  EVMCS1_UNSUPPORTED_VMENTRY_CTRL,
+	},
+	[EVMCS_2NDEXEC] = {
+		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_2NDEXEC | SECONDARY_EXEC_TSC_SCALING,
+		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_2NDEXEC,
+	},
+	[EVMCS_PINCTRL] = {
+		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_PINCTRL,
+		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_PINCTRL,
+	},
+	[EVMCS_VMFUNC] = {
+		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMFUNC,
+		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_VMFUNC,
+	},
  };
  
  static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
  				      enum evmcs_unsupported_ctrl_type ctrl_type)
  {
  	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
-	enum evmcs_v1_revision evmcs_rev = EVMCSv1_2016;
+	enum evmcs_revision evmcs_rev = EVMCSv1_2016;
  
  	if (!hv_vcpu)
  		return 0;
@@ -367,32 +392,7 @@ static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
  	if (hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_2022_UPDATE)
  		evmcs_rev = EVMCSv1_2022;
  
-	switch (ctrl_type) {
-	case EVMCS_EXIT_CTLS:
-		if (evmcs_rev == EVMCSv1_2016)
-			return EVMCS1_UNSUPPORTED_VMEXIT_CTRL |
-				VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
-		else
-			return EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
-	case EVMCS_ENTRY_CTLS:
-		if (evmcs_rev == EVMCSv1_2016)
-			return EVMCS1_UNSUPPORTED_VMENTRY_CTRL |
-				VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
-		else
-			return EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
-	case EVMCS_2NDEXEC:
-		if (evmcs_rev == EVMCSv1_2016)
-			return EVMCS1_UNSUPPORTED_2NDEXEC |
-				SECONDARY_EXEC_TSC_SCALING;
-		else
-			return EVMCS1_UNSUPPORTED_2NDEXEC;
-	case EVMCS_PINCTRL:
-		return EVMCS1_UNSUPPORTED_PINCTRL;
-	case EVMCS_VMFUNC:
-		return EVMCS1_UNSUPPORTED_VMFUNC;
-	}
-
-	return 0;
+	return evmcs_unsupported_ctls[ctrl_type][evmcs_rev];
  }
  
  void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)

Paolo
Sean Christopherson July 28, 2022, 10:13 p.m. UTC | #5
On Thu, Jul 28, 2022, Paolo Bonzini wrote:
> On 7/25/22 20:18, Sean Christopherson wrote:
> > > I kind of like the idea of having a two-dimensional array based on the enums
> > > instead of switch statements, so for now I'll keep Vitaly's enums.
> > I don't have a strong opinion on using a 2d array, but unless I'm missing something,
> > that's nowhere to be found in this patch.  IMO, having the enums without them
> > providing any unique value is silly and obfuscates the code.
> 
> Yeah, like this:
> 
> diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
> index d8da4026c93d..8055128d8638 100644
> --- a/arch/x86/kvm/vmx/evmcs.c
> +++ b/arch/x86/kvm/vmx/evmcs.c
> @@ -342,9 +342,10 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
> -enum evmcs_v1_revision {
> +enum evmcs_revision {
>  	EVMCSv1_2016,
>  	EVMCSv1_2022,
> +	EVMCS_REVISION_MAX,
>  };
>  enum evmcs_unsupported_ctrl_type {
> @@ -353,13 +354,37 @@ enum evmcs_unsupported_ctrl_type {
>  	EVMCS_2NDEXEC,
>  	EVMCS_PINCTRL,
>  	EVMCS_VMFUNC,
> +	EVMCS_CTRL_MAX,
> +};
> +
> +static u32 evmcs_unsupported_ctls[EVMCS_CTRL_MAX][EVMCS_REVISION_MAX] = {

Can this be const?

> +	[EVMCS_EXIT_CTLS] = {
> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL,
> +	},
> +	[EVMCS_ENTRY_CTLS] = {
> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMENTRY_CTRL | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
> +		[EVMCSv1_2022] =  EVMCS1_UNSUPPORTED_VMENTRY_CTRL,
> +	},
> +	[EVMCS_2NDEXEC] = {
> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_2NDEXEC | SECONDARY_EXEC_TSC_SCALING,
> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_2NDEXEC,
> +	},
> +	[EVMCS_PINCTRL] = {
> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_PINCTRL,
> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_PINCTRL,
> +	},
> +	[EVMCS_VMFUNC] = {
> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMFUNC,
> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_VMFUNC,
> +	},
>  };

...

> +	return evmcs_unsupported_ctls[ctrl_type][evmcs_rev];
>  }

The only flaw in this is if KVM gets handed a CPUID model that enumerates support
for 2025 (or whenever the next update comes) but not 2022.  Hmm, though if Microsoft
defines each new "version" as a full superset, then even that theoretical bug goes
away.  I'm happy to be optimistic for once and give this a shot.  I definitely like
that it makes it easier to see the deltas between versions.
Paolo Bonzini July 28, 2022, 10:24 p.m. UTC | #6
On 7/29/22 00:13, Sean Christopherson wrote:
> The only flaw in this is if KVM gets handed a CPUID model that enumerates support
> for 2025 (or whenever the next update comes) but not 2022.  Hmm, though if Microsoft
> defines each new "version" as a full superset, then even that theoretical bug goes
> away.  I'm happy to be optimistic for once and give this a shot.  I definitely like
> that it makes it easier to see the deltas between versions.

Okay, I have queued the series but I still haven't gone through all the 
comments.  So this will _not_ be in the 5.21 pull request.

The first patch also needs a bit more thought to figure out the impact 
on userspace and whether we can consider syndbg niche enough to not care.

Paolo
Sean Christopherson July 28, 2022, 10:35 p.m. UTC | #7
On Fri, Jul 29, 2022, Paolo Bonzini wrote:
> On 7/29/22 00:13, Sean Christopherson wrote:
> > The only flaw in this is if KVM gets handed a CPUID model that enumerates support
> > for 2025 (or whenever the next update comes) but not 2022.  Hmm, though if Microsoft
> > defines each new "version" as a full superset, then even that theoretical bug goes
> > away.  I'm happy to be optimistic for once and give this a shot.  I definitely like
> > that it makes it easier to see the deltas between versions.
> 
> Okay, I have queued the series but I still haven't gone through all the
> comments.  So this will _not_ be in the 5.21 pull request.

I assume you meant 5.20?

> The first patch also needs a bit more thought to figure out the impact on
> userspace and whether we can consider syndbg niche enough to not care.
> 
> Paolo
>
Vitaly Kuznetsov Aug. 1, 2022, 8:54 a.m. UTC | #8
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 7/29/22 00:13, Sean Christopherson wrote:
>> The only flaw in this is if KVM gets handed a CPUID model that enumerates support
>> for 2025 (or whenever the next update comes) but not 2022.  Hmm, though if Microsoft
>> defines each new "version" as a full superset, then even that theoretical bug goes
>> away.  I'm happy to be optimistic for once and give this a shot.  I definitely like
>> that it makes it easier to see the deltas between versions.
>
> Okay, I have queued the series but I still haven't gone through all the 
> comments.  So this will _not_ be in the 5.21 pull request.
>
> The first patch also needs a bit more thought to figure out the impact 
> on userspace and whether we can consider syndbg niche enough to not care.

(Sorry for delayed replies here, I'm back from vacation now)

The first patch is not a requirement for the rest of the series, we can
discuss it separately. I, however, think that we can just keep checking
HV_FEATURE_DEBUG_MSRS_AVAILABLE in hv_check_msr_access() to be
compatible with existing QEMUs and make QEMU expose both
HV_FEATURE_DEBUG_MSRS_AVAILABLE and HV_ACCESS_DEBUG_MSRS unconditionally
when syndbg feature is enabled as we know that missing
HV_ACCESS_DEBUG_MSRS is just a bug. I don't think we actually need to
be so picky and support VMMs which want to set 'syndbg without access to
it' and 'access to syndbg without syndbg' use-cases. All-or-nothing is
likely good enough.
Vitaly Kuznetsov Aug. 2, 2022, 1:03 p.m. UTC | #9
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 7/29/22 00:13, Sean Christopherson wrote:
>> The only flaw in this is if KVM gets handed a CPUID model that enumerates support
>> for 2025 (or whenever the next update comes) but not 2022.  Hmm, though if Microsoft
>> defines each new "version" as a full superset, then even that theoretical bug goes
>> away.  I'm happy to be optimistic for once and give this a shot.  I definitely like
>> that it makes it easier to see the deltas between versions.
>
> Okay, I have queued the series but I still haven't gone through all the 
> comments.

The biggest problem with this version is the EFER.LMA problem on i386
discovered (and, thankfully, fixed in the suggested patch) by
Sean. To address this and all other comment I'm going to put together a
v5 on top of the current kvm/queue (as I don't yet see any of this stuff
there).

>  So this will _not_ be in the 5.21 pull request.

At first I thought you meant 5.20 but then I got the pun: 5.20 will
likely become 6.0 and so 5.21 pull request will just never happen :-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a8e4944ca110..995d3ab1443e 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2552,7 +2552,7 @@  int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 		case HYPERV_CPUID_NESTED_FEATURES:
 			ent->eax = evmcs_ver;
 			ent->eax |= HV_X64_NESTED_MSR_BITMAP;
-
+			ent->ebx |= HV_X64_NESTED_EVMCS1_2022_UPDATE;
 			break;
 
 		case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 8bea5dea0341..52a53debd806 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -368,7 +368,60 @@  uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
+enum evmcs_v1_revision {
+	EVMCSv1_2016,
+	EVMCSv1_2022,
+};
+
+enum evmcs_unsupported_ctrl_type {
+	EVMCS_EXIT_CTLS,
+	EVMCS_ENTRY_CTLS,
+	EVMCS_2NDEXEC,
+	EVMCS_PINCTRL,
+	EVMCS_VMFUNC,
+};
+
+static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
+				      enum evmcs_unsupported_ctrl_type ctrl_type)
+{
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	enum evmcs_v1_revision evmcs_rev = EVMCSv1_2016;
+
+	if (!hv_vcpu)
+		return 0;
+
+	if (hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_2022_UPDATE)
+		evmcs_rev = EVMCSv1_2022;
+
+	switch (ctrl_type) {
+	case EVMCS_EXIT_CTLS:
+		if (evmcs_rev == EVMCSv1_2016)
+			return EVMCS1_UNSUPPORTED_VMEXIT_CTRL |
+				VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+		else
+			return EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+	case EVMCS_ENTRY_CTLS:
+		if (evmcs_rev == EVMCSv1_2016)
+			return EVMCS1_UNSUPPORTED_VMENTRY_CTRL |
+				VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+		else
+			return EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+	case EVMCS_2NDEXEC:
+		if (evmcs_rev == EVMCSv1_2016)
+			return EVMCS1_UNSUPPORTED_2NDEXEC |
+				SECONDARY_EXEC_TSC_SCALING;
+		else
+			return EVMCS1_UNSUPPORTED_2NDEXEC;
+	case EVMCS_PINCTRL:
+		return EVMCS1_UNSUPPORTED_PINCTRL;
+	case EVMCS_VMFUNC:
+		return EVMCS1_UNSUPPORTED_VMFUNC;
+	}
+
+	return 0;
+}
+
+void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 {
 	u32 ctl_low = (u32)*pdata;
 	u32 ctl_high = (u32)(*pdata >> 32);
@@ -380,72 +433,73 @@  void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
 	switch (msr_index) {
 	case MSR_IA32_VMX_EXIT_CTLS:
 	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
-		ctl_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+		ctl_high &= ~evmcs_get_unsupported_ctls(vcpu, EVMCS_EXIT_CTLS);
 		break;
 	case MSR_IA32_VMX_ENTRY_CTLS:
 	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
-		ctl_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+		ctl_high &= ~evmcs_get_unsupported_ctls(vcpu, EVMCS_ENTRY_CTLS);
 		break;
 	case MSR_IA32_VMX_PROCBASED_CTLS2:
-		ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+		ctl_high &= ~evmcs_get_unsupported_ctls(vcpu, EVMCS_2NDEXEC);
 		break;
 	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
 	case MSR_IA32_VMX_PINBASED_CTLS:
-		ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+		ctl_high &= ~evmcs_get_unsupported_ctls(vcpu, EVMCS_PINCTRL);
 		break;
 	case MSR_IA32_VMX_VMFUNC:
-		ctl_low &= ~EVMCS1_UNSUPPORTED_VMFUNC;
+		ctl_low &= ~evmcs_get_unsupported_ctls(vcpu, EVMCS_VMFUNC);
 		break;
 	}
 
 	*pdata = ctl_low | ((u64)ctl_high << 32);
 }
 
-int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
+int nested_evmcs_check_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
 	int ret = 0;
 	u32 unsupp_ctl;
 
 	unsupp_ctl = vmcs12->pin_based_vm_exec_control &
-		EVMCS1_UNSUPPORTED_PINCTRL;
+		evmcs_get_unsupported_ctls(vcpu, EVMCS_PINCTRL);
 	if (unsupp_ctl) {
 		trace_kvm_nested_vmenter_failed(
-			"eVMCS: unsupported pin-based VM-execution controls",
+			"eVMCS: unsupported pin-based VM-execution controls: ",
 			unsupp_ctl);
 		ret = -EINVAL;
 	}
 
 	unsupp_ctl = vmcs12->secondary_vm_exec_control &
-		EVMCS1_UNSUPPORTED_2NDEXEC;
+		evmcs_get_unsupported_ctls(vcpu, EVMCS_2NDEXEC);
 	if (unsupp_ctl) {
 		trace_kvm_nested_vmenter_failed(
-			"eVMCS: unsupported secondary VM-execution controls",
+			"eVMCS: unsupported secondary VM-execution controls: ",
 			unsupp_ctl);
 		ret = -EINVAL;
 	}
 
 	unsupp_ctl = vmcs12->vm_exit_controls &
-		EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+		evmcs_get_unsupported_ctls(vcpu, EVMCS_EXIT_CTLS);
 	if (unsupp_ctl) {
 		trace_kvm_nested_vmenter_failed(
-			"eVMCS: unsupported VM-exit controls",
+			"eVMCS: unsupported VM-exit controls: ",
 			unsupp_ctl);
 		ret = -EINVAL;
 	}
 
 	unsupp_ctl = vmcs12->vm_entry_controls &
-		EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+		evmcs_get_unsupported_ctls(vcpu, EVMCS_ENTRY_CTLS);
 	if (unsupp_ctl) {
 		trace_kvm_nested_vmenter_failed(
-			"eVMCS: unsupported VM-entry controls",
+			"eVMCS: unsupported VM-entry controls: ",
 			unsupp_ctl);
 		ret = -EINVAL;
 	}
 
-	unsupp_ctl = vmcs12->vm_function_control & EVMCS1_UNSUPPORTED_VMFUNC;
+	unsupp_ctl = vmcs12->vm_function_control &
+		evmcs_get_unsupported_ctls(vcpu, EVMCS_VMFUNC);
 	if (unsupp_ctl) {
 		trace_kvm_nested_vmenter_failed(
-			"eVMCS: unsupported VM-function controls",
+			"eVMCS: unsupported VM-function controls: ",
 			unsupp_ctl);
 		ret = -EINVAL;
 	}
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index f886a8ff0342..4b809c79ae63 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -37,16 +37,9 @@  DECLARE_STATIC_KEY_FALSE(enable_evmcs);
  *	EPTP_LIST_ADDRESS               = 0x00002024,
  *	VMREAD_BITMAP                   = 0x00002026,
  *	VMWRITE_BITMAP                  = 0x00002028,
- *
- *	TSC_MULTIPLIER                  = 0x00002032,
  *	PLE_GAP                         = 0x00004020,
  *	PLE_WINDOW                      = 0x00004022,
  *	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
- *      GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
- *      HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
- *
- * Currently unsupported in KVM:
- *	GUEST_IA32_RTIT_CTL		= 0x00002814,
  */
 #define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
 				    PIN_BASED_VMX_PREEMPTION_TIMER)
@@ -58,12 +51,10 @@  DECLARE_STATIC_KEY_FALSE(enable_evmcs);
 	 SECONDARY_EXEC_ENABLE_PML |					\
 	 SECONDARY_EXEC_ENABLE_VMFUNC |					\
 	 SECONDARY_EXEC_SHADOW_VMCS |					\
-	 SECONDARY_EXEC_TSC_SCALING |					\
 	 SECONDARY_EXEC_PAUSE_LOOP_EXITING)
 #define EVMCS1_UNSUPPORTED_VMEXIT_CTRL					\
-	(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |				\
-	 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
-#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+	(VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (0)
 #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
 
 struct evmcs_field {
@@ -243,7 +234,7 @@  bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
 uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
 int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 			uint16_t *vmcs_version);
-void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata);
-int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
+void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+int nested_evmcs_check_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12);
 
 #endif /* __KVM_X86_VMX_EVMCS_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 4fc84f0f5875..dcf3ee645212 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2891,7 +2891,7 @@  static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
-		return nested_evmcs_check_controls(vmcs12);
+		return nested_evmcs_check_controls(vcpu, vmcs12);
 
 	return 0;
 }
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c30115b9cb33..b4915d841357 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1858,7 +1858,7 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 */
 		if (!msr_info->host_initiated &&
 		    vmx->nested.enlightened_vmcs_enabled)
-			nested_evmcs_filter_control_msr(msr_info->index,
+			nested_evmcs_filter_control_msr(vcpu, msr_info->index,
 							&msr_info->data);
 		break;
 	case MSR_IA32_RTIT_CTL: