diff mbox series

[v5,09/26] KVM: VMX: nVMX: Support TSC scaling and PERF_GLOBAL_CTRL with enlightened VMCS

Message ID 20220802160756.339464-10-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: VMX: Support updated eVMCSv1 revision + use vmcs_config for L1 VMX MSRs | expand

Commit Message

Vitaly Kuznetsov Aug. 2, 2022, 4:07 p.m. UTC
Enlightened VMCS v1 got updated and now includes the required fields
for TSC scaling and loading PERF_GLOBAL_CTRL upon VMENTER/VMEXIT
features. For Hyper-V on KVM enablement, KVM can just observe VMX control
MSRs and use the features (with or without eVMCS) when
possible. Hyper-V on KVM case is trickier because of live migration:
the new features require explicit enablement from VMM to not break
it. Luckily, the updated eVMCS revision comes with a feature bit in
CPUID.0x4000000A.EBX.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/hyperv.c     |  2 +-
 arch/x86/kvm/vmx/evmcs.c  | 88 +++++++++++++++++++++++++++++++--------
 arch/x86/kvm/vmx/evmcs.h  | 17 ++------
 arch/x86/kvm/vmx/nested.c |  2 +-
 arch/x86/kvm/vmx/vmx.c    |  2 +-
 5 files changed, 78 insertions(+), 33 deletions(-)

Comments

Sean Christopherson Aug. 18, 2022, 5:15 p.m. UTC | #1
On Tue, Aug 02, 2022, Vitaly Kuznetsov wrote:
> Enlightened VMCS v1 got updated and now includes the required fields
> for TSC scaling and loading PERF_GLOBAL_CTRL upon VMENTER/VMEXIT
> features. For Hyper-V on KVM enablement, KVM can just observe VMX control
> MSRs and use the features (with or without eVMCS) when
> possible. Hyper-V on KVM case is trickier because of live migration:
> the new features require explicit enablement from VMM to not break
> it. Luckily, the updated eVMCS revision comes with a feature bit in
> CPUID.0x4000000A.EBX.

The refactor to implement the 2-d array should go in a prep patch.  Unless you
(or anyone else) objects to the feedback below, I'll do the split myself, post v6,
and queue this up (without patch 1, and assuming there're no major issues in the
back half of the series).

> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/kvm/hyperv.c     |  2 +-
>  arch/x86/kvm/vmx/evmcs.c  | 88 +++++++++++++++++++++++++++++++--------
>  arch/x86/kvm/vmx/evmcs.h  | 17 ++------
>  arch/x86/kvm/vmx/nested.c |  2 +-
>  arch/x86/kvm/vmx/vmx.c    |  2 +-
>  5 files changed, 78 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 1098915360ae..8a2b24f9bbf6 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -2552,7 +2552,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
>  		case HYPERV_CPUID_NESTED_FEATURES:
>  			ent->eax = evmcs_ver;
>  			ent->eax |= HV_X64_NESTED_MSR_BITMAP;
> -
> +			ent->ebx |= HV_X64_NESTED_EVMCS1_2022_UPDATE;
>  			break;
>  
>  		case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
> diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
> index 8bea5dea0341..e8497f9854a1 100644
> --- a/arch/x86/kvm/vmx/evmcs.c
> +++ b/arch/x86/kvm/vmx/evmcs.c
> @@ -368,7 +368,60 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> -void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
> +enum evmcs_revision {
> +	EVMCSv1_2016,
> +	EVMCSv1_2022,
> +	EVMCS_REVISION_MAX,

"MAX" is technically wrong, the "max" entry is usually the last valid entry.  This
should be NR_EVMCS_REVISIONS, and then NR_EVMCS_CTRLS below.

> +};
> +
> +enum evmcs_unsupported_ctrl_type {

I think drop the "unsupported" here, because the naming gets weird when trying to
use it for something other than indexing evmcs_unsupported_ctls (see bottom).

> +	EVMCS_EXIT_CTLS,
> +	EVMCS_ENTRY_CTLS,

s/CTLS/CTRLS for consistency.

> +	EVMCS_2NDEXEC,
> +	EVMCS_PINCTRL,
> +	EVMCS_VMFUNC,
> +	EVMCS_CTRL_MAX,
> +};
> +
> +static u32 evmcs_unsupported_ctls[EVMCS_CTRL_MAX][EVMCS_REVISION_MAX] = {

This can be "const".  And s/ctls/ctrls for consistency.

> +	[EVMCS_EXIT_CTLS] = {
> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL,
> +	},
> +	[EVMCS_ENTRY_CTLS] = {
> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMENTRY_CTRL | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
> +		[EVMCSv1_2022] =  EVMCS1_UNSUPPORTED_VMENTRY_CTRL,
> +	},
> +	[EVMCS_2NDEXEC] = {
> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_2NDEXEC | SECONDARY_EXEC_TSC_SCALING,
> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_2NDEXEC,
> +	},
> +	[EVMCS_PINCTRL] = {
> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_PINCTRL,
> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_PINCTRL,
> +	},
> +	[EVMCS_VMFUNC] = {
> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMFUNC,
> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_VMFUNC,
> +	},
> +};
> +
> +static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
> +				      enum evmcs_unsupported_ctrl_type ctrl_type)
> +{
> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +	enum evmcs_revision evmcs_rev = EVMCSv1_2016;
> +
> +	if (!hv_vcpu)

This is a functiontal change, and I don't think it's correct either.  Previously,
KVM would apply the EVMCSv1_2016 filter irrespective of whether or not
vcpu->arch.hyperv is non-NULL.  nested_enable_evmcs() doesn't require a Hyper-V
vCPU, and AFAICT nothing requires a Hyper-V vCPU to use eVMCS.

So I believe this should be:

	if (hv_vcpu &&
	    hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_2022_UPDATE)
		evmcs_rev = EVMCSv1_2022;

> +		return 0;
> +
> +	if (hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_2022_UPDATE)
> +		evmcs_rev = EVMCSv1_2022;
> +
> +	return evmcs_unsupported_ctls[ctrl_type][evmcs_rev];
> +}
> +

...

> -int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
> +int nested_evmcs_check_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  {
>  	int ret = 0;
>  	u32 unsupp_ctl;
>  
>  	unsupp_ctl = vmcs12->pin_based_vm_exec_control &
> -		EVMCS1_UNSUPPORTED_PINCTRL;
> +		evmcs_get_unsupported_ctls(vcpu, EVMCS_PINCTRL);
>  	if (unsupp_ctl) {
>  		trace_kvm_nested_vmenter_failed(
> -			"eVMCS: unsupported pin-based VM-execution controls",
> +			"eVMCS: unsupported pin-based VM-execution controls: ",
>  			unsupp_ctl);

Egad!  This is all broken, at least in theory.  The second param to
trace_kvm_nested_vmenter_failed() is the error code, not the bad value.  It mostly
works because __print_symbolic() falls back to printing the hex value on error,
but that relies on there being no collisions/matches between unsupp_ctl and the
set of VMX_VMENTER_INSTRUCTION_ERRORS.  E.g. if there's a collision then the
tracepoint will pretty print a string and cause confusion.

And checking every control even after one fails seems unnecessary subtle.  It
provides marginal benefit while increasing the probability that we screw up and
squash "ret" to zero.  Yeah, it might save some onion peeling, but if that happens
during production and now during initial development of a feature, then someone
has much bigger problems to worry about.

Unless there are objections, I'll fold in a patch to yield:

#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK

static bool nested_evmcs_is_valid_controls(enum evmcs_ctrl_type type, u32 val)
{
	return val & evmcs_get_unsupported_ctls(type);
}

int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
{
	if (CC(!nested_evmcs_is_valid_controls(EVMCS_PINCTRL,
					       vmcs12->pin_based_vm_exec_control)))
		return -EINVAL;

	if (CC(!nested_evmcs_is_valid_controls(EVMCS_2NDEXEC,
					       vmcs12->secondary_vm_exec_control)))
		return -EINVAL;

	if (CC(!nested_evmcs_is_valid_controls(EVMCS_EXIT_CTRLS,
					       vmcs12->vm_exit_controls)))
		return -EINVAL;

	if (CC(!nested_evmcs_is_valid_controls(EVMCS_ENTRY_CTRLS,
					       vmcs12->vm_entry_controls)))
		return -EINVAL;

	if (CC(!nested_evmcs_is_valid_controls(EVMCS_VMFUNC,
					       vmcs12->vm_function_control)))
		return -EINVAL;

	return 0;
}
Sean Christopherson Aug. 18, 2022, 5:19 p.m. UTC | #2
On Tue, Aug 02, 2022, Vitaly Kuznetsov wrote:
> diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
> index f886a8ff0342..4b809c79ae63 100644
> --- a/arch/x86/kvm/vmx/evmcs.h
> +++ b/arch/x86/kvm/vmx/evmcs.h
> @@ -37,16 +37,9 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
>   *	EPTP_LIST_ADDRESS               = 0x00002024,
>   *	VMREAD_BITMAP                   = 0x00002026,
>   *	VMWRITE_BITMAP                  = 0x00002028,
> - *
> - *	TSC_MULTIPLIER                  = 0x00002032,
>   *	PLE_GAP                         = 0x00004020,
>   *	PLE_WINDOW                      = 0x00004022,
>   *	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
> - *      GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
> - *      HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
> - *
> - * Currently unsupported in KVM:
> - *	GUEST_IA32_RTIT_CTL		= 0x00002814,

Almost forgot: is deleting this chunk of the comment intentional?

>   */
>  #define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
>  				    PIN_BASED_VMX_PREEMPTION_TIMER)
Vitaly Kuznetsov Aug. 19, 2022, 7:42 a.m. UTC | #3
Sean Christopherson <seanjc@google.com> writes:

> On Tue, Aug 02, 2022, Vitaly Kuznetsov wrote:
>> diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
>> index f886a8ff0342..4b809c79ae63 100644
>> --- a/arch/x86/kvm/vmx/evmcs.h
>> +++ b/arch/x86/kvm/vmx/evmcs.h
>> @@ -37,16 +37,9 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
>>   *	EPTP_LIST_ADDRESS               = 0x00002024,
>>   *	VMREAD_BITMAP                   = 0x00002026,
>>   *	VMWRITE_BITMAP                  = 0x00002028,
>> - *
>> - *	TSC_MULTIPLIER                  = 0x00002032,
>>   *	PLE_GAP                         = 0x00004020,
>>   *	PLE_WINDOW                      = 0x00004022,
>>   *	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
>> - *      GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
>> - *      HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
>> - *
>> - * Currently unsupported in KVM:
>> - *	GUEST_IA32_RTIT_CTL		= 0x00002814,
>
> Almost forgot: is deleting this chunk of the comment intentional?
>

Intentional or not (I forgot :-), GUEST_IA32_RTIT_CTL is supported/used
by KVM since

commit f99e3daf94ff35dd4a878d32ff66e1fd35223ad6
Author: Chao Peng <chao.p.peng@linux.intel.com>
Date:   Wed Oct 24 16:05:10 2018 +0800

    KVM: x86: Add Intel PT virtualization work mode

...
 
commit bf8c55d8dc094c85a3f98cd302a4dddb720dd63f
Author: Chao Peng <chao.p.peng@linux.intel.com>
Date:   Wed Oct 24 16:05:14 2018 +0800

    KVM: x86: Implement Intel PT MSRs read/write emulation

but there's no corresponding field in eVMCS. It would probably be better
to remove "Currently unsupported in KVM:" line leaving

"GUEST_IA32_RTIT_CTL             = 0x00002814" 

in place.
Vitaly Kuznetsov Aug. 19, 2022, 8:06 a.m. UTC | #4
Sean Christopherson <seanjc@google.com> writes:

> On Tue, Aug 02, 2022, Vitaly Kuznetsov wrote:
>> Enlightened VMCS v1 got updated and now includes the required fields
>> for TSC scaling and loading PERF_GLOBAL_CTRL upon VMENTER/VMEXIT
>> features. For Hyper-V on KVM enablement, KVM can just observe VMX control
>> MSRs and use the features (with or without eVMCS) when
>> possible. Hyper-V on KVM case is trickier because of live migration:
>> the new features require explicit enablement from VMM to not break
>> it. Luckily, the updated eVMCS revision comes with a feature bit in
>> CPUID.0x4000000A.EBX.
>
> The refactor to implement the 2-d array should go in a prep patch.  Unless you
> (or anyone else) objects to the feedback below, I'll do the split myself, post v6,
> and queue this up (without patch 1, and assuming there're no major issues in the
> back half of the series).

No objections from me, thanks!

>
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>> ---
>>  arch/x86/kvm/hyperv.c     |  2 +-
>>  arch/x86/kvm/vmx/evmcs.c  | 88 +++++++++++++++++++++++++++++++--------
>>  arch/x86/kvm/vmx/evmcs.h  | 17 ++------
>>  arch/x86/kvm/vmx/nested.c |  2 +-
>>  arch/x86/kvm/vmx/vmx.c    |  2 +-
>>  5 files changed, 78 insertions(+), 33 deletions(-)
>> 
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index 1098915360ae..8a2b24f9bbf6 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -2552,7 +2552,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
>>  		case HYPERV_CPUID_NESTED_FEATURES:
>>  			ent->eax = evmcs_ver;
>>  			ent->eax |= HV_X64_NESTED_MSR_BITMAP;
>> -
>> +			ent->ebx |= HV_X64_NESTED_EVMCS1_2022_UPDATE;
>>  			break;
>>  
>>  		case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
>> diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
>> index 8bea5dea0341..e8497f9854a1 100644
>> --- a/arch/x86/kvm/vmx/evmcs.c
>> +++ b/arch/x86/kvm/vmx/evmcs.c
>> @@ -368,7 +368,60 @@ uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
>>  	return 0;
>>  }
>>  
>> -void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
>> +enum evmcs_revision {
>> +	EVMCSv1_2016,
>> +	EVMCSv1_2022,
>> +	EVMCS_REVISION_MAX,
>
> "MAX" is technically wrong, the "max" entry is usually the last valid entry.  This
> should be NR_EVMCS_REVISIONS, and then NR_EVMCS_CTRLS below.
>
>> +};
>> +
>> +enum evmcs_unsupported_ctrl_type {
>
> I think drop the "unsupported" here, because the naming gets weird when trying to
> use it for something other than indexing evmcs_unsupported_ctls (see bottom).
>
>> +	EVMCS_EXIT_CTLS,
>> +	EVMCS_ENTRY_CTLS,
>
> s/CTLS/CTRLS for consistency.
>
>> +	EVMCS_2NDEXEC,
>> +	EVMCS_PINCTRL,
>> +	EVMCS_VMFUNC,
>> +	EVMCS_CTRL_MAX,
>> +};
>> +
>> +static u32 evmcs_unsupported_ctls[EVMCS_CTRL_MAX][EVMCS_REVISION_MAX] = {
>
> This can be "const".  And s/ctls/ctrls for consistency.
>
>> +	[EVMCS_EXIT_CTLS] = {
>> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
>> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL,
>> +	},
>> +	[EVMCS_ENTRY_CTLS] = {
>> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMENTRY_CTRL | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
>> +		[EVMCSv1_2022] =  EVMCS1_UNSUPPORTED_VMENTRY_CTRL,
>> +	},
>> +	[EVMCS_2NDEXEC] = {
>> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_2NDEXEC | SECONDARY_EXEC_TSC_SCALING,
>> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_2NDEXEC,
>> +	},
>> +	[EVMCS_PINCTRL] = {
>> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_PINCTRL,
>> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_PINCTRL,
>> +	},
>> +	[EVMCS_VMFUNC] = {
>> +		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMFUNC,
>> +		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_VMFUNC,
>> +	},
>> +};
>> +
>> +static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
>> +				      enum evmcs_unsupported_ctrl_type ctrl_type)
>> +{
>> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
>> +	enum evmcs_revision evmcs_rev = EVMCSv1_2016;
>> +
>> +	if (!hv_vcpu)
>
> This is a functiontal change, and I don't think it's correct either.  Previously,
> KVM would apply the EVMCSv1_2016 filter irrespective of whether or not
> vcpu->arch.hyperv is non-NULL.  nested_enable_evmcs() doesn't require a Hyper-V
> vCPU, and AFAICT nothing requires a Hyper-V vCPU to use eVMCS.

Indeed, this *is* correct after PATCH11 when we get rid of VMX feature
MSR filtering for KVM-on-Hyper-V as the remaining use for
evmcs_get_unsupported_ctls() is Hyper-V on KVM and hv_vcpu is not NULL
there. As of this patch, this is incorrect as we're breaking e.g. Linux
guests on KVM on Hyper-V.

>
> So I believe this should be:
>
> 	if (hv_vcpu &&
> 	    hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_2022_UPDATE)
> 		evmcs_rev = EVMCSv1_2022;

This looks good to me, thanks!

>
>> +		return 0;
>> +
>> +	if (hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_2022_UPDATE)
>> +		evmcs_rev = EVMCSv1_2022;
>> +
>> +	return evmcs_unsupported_ctls[ctrl_type][evmcs_rev];
>> +}
>> +
>
> ...
>
>> -int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
>> +int nested_evmcs_check_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>>  {
>>  	int ret = 0;
>>  	u32 unsupp_ctl;
>>  
>>  	unsupp_ctl = vmcs12->pin_based_vm_exec_control &
>> -		EVMCS1_UNSUPPORTED_PINCTRL;
>> +		evmcs_get_unsupported_ctls(vcpu, EVMCS_PINCTRL);
>>  	if (unsupp_ctl) {
>>  		trace_kvm_nested_vmenter_failed(
>> -			"eVMCS: unsupported pin-based VM-execution controls",
>> +			"eVMCS: unsupported pin-based VM-execution controls: ",
>>  			unsupp_ctl);
>
> Egad!  This is all broken, at least in theory.  The second param to
> trace_kvm_nested_vmenter_failed() is the error code, not the bad value.  It mostly
> works because __print_symbolic() falls back to printing the hex value on error,
> but that relies on there being no collisions/matches between unsupp_ctl and the
> set of VMX_VMENTER_INSTRUCTION_ERRORS.  E.g. if there's a collision then the
> tracepoint will pretty print a string and cause confusion.
>
> And checking every control even after one fails seems unnecessary subtle.  It
> provides marginal benefit while increasing the probability that we screw up and
> squash "ret" to zero.  Yeah, it might save some onion peeling, but if that happens
> during production and now during initial development of a feature, then someone
> has much bigger problems to worry about.
>
> Unless there are objections, I'll fold in a patch to yield:
>
> #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
>
> static bool nested_evmcs_is_valid_controls(enum evmcs_ctrl_type type, u32 val)
> {
> 	return val & evmcs_get_unsupported_ctls(type);
> }
>
> int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
> {
> 	if (CC(!nested_evmcs_is_valid_controls(EVMCS_PINCTRL,
> 					       vmcs12->pin_based_vm_exec_control)))
> 		return -EINVAL;
>
> 	if (CC(!nested_evmcs_is_valid_controls(EVMCS_2NDEXEC,
> 					       vmcs12->secondary_vm_exec_control)))
> 		return -EINVAL;
>
> 	if (CC(!nested_evmcs_is_valid_controls(EVMCS_EXIT_CTRLS,
> 					       vmcs12->vm_exit_controls)))
> 		return -EINVAL;
>
> 	if (CC(!nested_evmcs_is_valid_controls(EVMCS_ENTRY_CTRLS,
> 					       vmcs12->vm_entry_controls)))
> 		return -EINVAL;
>
> 	if (CC(!nested_evmcs_is_valid_controls(EVMCS_VMFUNC,
> 					       vmcs12->vm_function_control)))
> 		return -EINVAL;
>
> 	return 0;
> }


Well, it's actually nice to see all the controls where KVM screwed up
without the need to instrument kernel, this way when someone comes with
an issue you can immediately see what's wrong in the trace
log. Honestly, I don't remember these firing outside of my development
environment, your patch to make things correct looks good to me.
Sean Christopherson Aug. 19, 2022, 2:49 p.m. UTC | #5
On Fri, Aug 19, 2022, Vitaly Kuznetsov wrote:
> Sean Christopherson <seanjc@google.com> writes:
> 
> > On Tue, Aug 02, 2022, Vitaly Kuznetsov wrote:
> >> diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
> >> index f886a8ff0342..4b809c79ae63 100644
> >> --- a/arch/x86/kvm/vmx/evmcs.h
> >> +++ b/arch/x86/kvm/vmx/evmcs.h
> >> @@ -37,16 +37,9 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
> >>   *	EPTP_LIST_ADDRESS               = 0x00002024,
> >>   *	VMREAD_BITMAP                   = 0x00002026,
> >>   *	VMWRITE_BITMAP                  = 0x00002028,
> >> - *
> >> - *	TSC_MULTIPLIER                  = 0x00002032,
> >>   *	PLE_GAP                         = 0x00004020,
> >>   *	PLE_WINDOW                      = 0x00004022,
> >>   *	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
> >> - *      GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
> >> - *      HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
> >> - *
> >> - * Currently unsupported in KVM:
> >> - *	GUEST_IA32_RTIT_CTL		= 0x00002814,
> >
> > Almost forgot: is deleting this chunk of the comment intentional?
> >
> 
> Intentional or not (I forgot :-), GUEST_IA32_RTIT_CTL is supported/used
> by KVM since
> 
> commit f99e3daf94ff35dd4a878d32ff66e1fd35223ad6
> Author: Chao Peng <chao.p.peng@linux.intel.com>
> Date:   Wed Oct 24 16:05:10 2018 +0800
> 
>     KVM: x86: Add Intel PT virtualization work mode
> 
> ...
>  
> commit bf8c55d8dc094c85a3f98cd302a4dddb720dd63f
> Author: Chao Peng <chao.p.peng@linux.intel.com>
> Date:   Wed Oct 24 16:05:14 2018 +0800
> 
>     KVM: x86: Implement Intel PT MSRs read/write emulation
> 
> but there's no corresponding field in eVMCS. It would probably be better
> to remove "Currently unsupported in KVM:" line leaving
> 
> "GUEST_IA32_RTIT_CTL             = 0x00002814" 
> 
> in place. 

GUEST_IA32_RTIT_CTL isn't supported for nested VMX though, which is how I
interpreted the "Currently unsupported in KVM:".  Would it be accurate to extend
that part of the comment to "Currently unsupported in KVM for nested VMX:"?
Vitaly Kuznetsov Aug. 19, 2022, 3:07 p.m. UTC | #6
Sean Christopherson <seanjc@google.com> writes:

> On Fri, Aug 19, 2022, Vitaly Kuznetsov wrote:
>> Sean Christopherson <seanjc@google.com> writes:
>> 
>> > On Tue, Aug 02, 2022, Vitaly Kuznetsov wrote:
>> >> diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
>> >> index f886a8ff0342..4b809c79ae63 100644
>> >> --- a/arch/x86/kvm/vmx/evmcs.h
>> >> +++ b/arch/x86/kvm/vmx/evmcs.h
>> >> @@ -37,16 +37,9 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs);
>> >>   *	EPTP_LIST_ADDRESS               = 0x00002024,
>> >>   *	VMREAD_BITMAP                   = 0x00002026,
>> >>   *	VMWRITE_BITMAP                  = 0x00002028,
>> >> - *
>> >> - *	TSC_MULTIPLIER                  = 0x00002032,
>> >>   *	PLE_GAP                         = 0x00004020,
>> >>   *	PLE_WINDOW                      = 0x00004022,
>> >>   *	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
>> >> - *      GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
>> >> - *      HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
>> >> - *
>> >> - * Currently unsupported in KVM:
>> >> - *	GUEST_IA32_RTIT_CTL		= 0x00002814,
>> >
>> > Almost forgot: is deleting this chunk of the comment intentional?
>> >
>> 
>> Intentional or not (I forgot :-), GUEST_IA32_RTIT_CTL is supported/used
>> by KVM since
>> 
>> commit f99e3daf94ff35dd4a878d32ff66e1fd35223ad6
>> Author: Chao Peng <chao.p.peng@linux.intel.com>
>> Date:   Wed Oct 24 16:05:10 2018 +0800
>> 
>>     KVM: x86: Add Intel PT virtualization work mode
>> 
>> ...
>>  
>> commit bf8c55d8dc094c85a3f98cd302a4dddb720dd63f
>> Author: Chao Peng <chao.p.peng@linux.intel.com>
>> Date:   Wed Oct 24 16:05:14 2018 +0800
>> 
>>     KVM: x86: Implement Intel PT MSRs read/write emulation
>> 
>> but there's no corresponding field in eVMCS. It would probably be better
>> to remove "Currently unsupported in KVM:" line leaving
>> 
>> "GUEST_IA32_RTIT_CTL             = 0x00002814" 
>> 
>> in place. 
>
> GUEST_IA32_RTIT_CTL isn't supported for nested VMX though, which is how I
> interpreted the "Currently unsupported in KVM:".  Would it be accurate to extend
> that part of the comment to "Currently unsupported in KVM for nested VMX:"?

Yea, sounds good to me. 

FWIW, there are other controls which are currently missing in KVM,
e.g. 'guest_ia32_lbr_ctl' (VMX field 0x2816) and we have no 'macro
shenanigans' to catch the moment when support for these gets introduced
in KVM. When this happens, we need to extend VMCS-to-eVMCS mapping
(vmcs_field_to_evmcs_1) to support KVM-on-Hyper-V case and, when the
corresponding field gets added to 'struct vmcs12',
copy_enlightened_to_vmcs12()/copy_vmcs12_to_enlightened(). The whole
process is manual and thus error prone...
Sean Christopherson Aug. 19, 2022, 5:02 p.m. UTC | #7
On Fri, Aug 19, 2022, Vitaly Kuznetsov wrote:
> Sean Christopherson <seanjc@google.com> writes:
> 
> > On Tue, Aug 02, 2022, Vitaly Kuznetsov wrote:
> >> +static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
> >> +				      enum evmcs_unsupported_ctrl_type ctrl_type)
> >> +{
> >> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> >> +	enum evmcs_revision evmcs_rev = EVMCSv1_2016;
> >> +
> >> +	if (!hv_vcpu)
> >
> > This is a functiontal change, and I don't think it's correct either.  Previously,
> > KVM would apply the EVMCSv1_2016 filter irrespective of whether or not
> > vcpu->arch.hyperv is non-NULL.  nested_enable_evmcs() doesn't require a Hyper-V
> > vCPU, and AFAICT nothing requires a Hyper-V vCPU to use eVMCS.
> 
> Indeed, this *is* correct after PATCH11 when we get rid of VMX feature
> MSR filtering for KVM-on-Hyper-V as the remaining use for
> evmcs_get_unsupported_ctls() is Hyper-V on KVM and hv_vcpu is not NULL
> there.

Hmm, nested_vmx_handle_enlightened_vmptrld() will fail without a Hyper-V vCPU, so
filtering eVMCS control iff there's a Hyper-V vCPU makes sense.  But that's a guest
visible change and should be a separate patch.

But that also raises the question of whether or not KVM should honor hyperv_enabled
when filtering MSRs.  Same question for nested VM-Enter.  nested_enlightened_vmentry()
will "fail" without an assist page, and the guest can't set the assist page without
hyperv_enabled==true, but nothing prevents the host from stuffing the assist page.

And on a very related topic, the handling of kvm_hv_vcpu_init() in kvm_hv_set_cpuid()
is buggy.  KVM will not report an error to userspace for KVM_SET_CPUID2 if allocation
fails.  If a later operation successfully create a Hyper-V vCPU, KVM will chug along
with Hyper-V enabled but without having cached the relevant Hyper-V CPUID info.

Less important is that kvm_hv_set_cpuid() should also zero out the CPUID cache if
Hyper-V is disabled.  I'm pretty sure sure all paths check hyperv_enabled before
consuming cpuid_cache, but it's unnecessarily risky.

If we fix the kvm_hv_set_cpuid() allocation failure, then we can also guarantee
that vcpu->arch.hyperv is non-NULL if vcpu->arch.hyperv_enabled==true.  And then
we can add gate guest eVMCS flow on hyperv_enabled, and evmcs_get_unsupported_ctls()
can then WARN if hv_vcpu is NULL.

Assuming I'm not overlooking something, I'll fold in yet more patches.
Vitaly Kuznetsov Aug. 22, 2022, 8:47 a.m. UTC | #8
Sean Christopherson <seanjc@google.com> writes:

> On Fri, Aug 19, 2022, Vitaly Kuznetsov wrote:
>> Sean Christopherson <seanjc@google.com> writes:
>> 
>> > On Tue, Aug 02, 2022, Vitaly Kuznetsov wrote:
>> >> +static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
>> >> +				      enum evmcs_unsupported_ctrl_type ctrl_type)
>> >> +{
>> >> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
>> >> +	enum evmcs_revision evmcs_rev = EVMCSv1_2016;
>> >> +
>> >> +	if (!hv_vcpu)
>> >
>> > This is a functiontal change, and I don't think it's correct either.  Previously,
>> > KVM would apply the EVMCSv1_2016 filter irrespective of whether or not
>> > vcpu->arch.hyperv is non-NULL.  nested_enable_evmcs() doesn't require a Hyper-V
>> > vCPU, and AFAICT nothing requires a Hyper-V vCPU to use eVMCS.
>> 
>> Indeed, this *is* correct after PATCH11 when we get rid of VMX feature
>> MSR filtering for KVM-on-Hyper-V as the remaining use for
>> evmcs_get_unsupported_ctls() is Hyper-V on KVM and hv_vcpu is not NULL
>> there.
>
> Hmm, nested_vmx_handle_enlightened_vmptrld() will fail without a Hyper-V vCPU, so
> filtering eVMCS control iff there's a Hyper-V vCPU makes sense.  But that's a guest
> visible change and should be a separate patch.
>

Yes, the change you suggested:

          if (hv_vcpu &&
              hv_vcpu->cpuid_cache.nested_eb & HV_X64_NESTED_EVMCS1_2022_UPDATE) 
			evmcs_rev = EVMCSv1_2022;

seems to keep the status quo so we can discuss dropping filtering when
!hv_vcpu separately.

> But that also raises the question of whether or not KVM should honor hyperv_enabled
> when filtering MSRs.  Same question for nested VM-Enter.  nested_enlightened_vmentry()
> will "fail" without an assist page, and the guest can't set the assist page without
> hyperv_enabled==true, but nothing prevents the host from stuffing the assist page.

The case sounds more like a misbehaving VMM to me. It would probably be
better to fail nested_enlightened_vmentry() immediately on !hyperv_enabled.

>
> And on a very related topic, the handling of kvm_hv_vcpu_init() in kvm_hv_set_cpuid()
> is buggy.  KVM will not report an error to userspace for KVM_SET_CPUID2 if allocation
> fails.  If a later operation successfully create a Hyper-V vCPU, KVM will chug along
> with Hyper-V enabled but without having cached the relevant Hyper-V
> CPUID info.

Indeed, that's probably because kvm_vcpu_after_set_cpuid() itself is
never supposed to fail and thus returns 'void'. I'm not up-to-date on
the discussion whether small allocations can actually fail (and whether
2832 bytes for 'struct kvm_vcpu_hv' is 'small') but propagating -ENOMEM
all the way up to VMM is likely the right way to go.

>
> Less important is that kvm_hv_set_cpuid() should also zero out the CPUID cache if
> Hyper-V is disabled.  I'm pretty sure sure all paths check hyperv_enabled before
> consuming cpuid_cache, but it's unnecessarily risky.

+1

>
> If we fix the kvm_hv_set_cpuid() allocation failure, then we can also guarantee
> that vcpu->arch.hyperv is non-NULL if vcpu->arch.hyperv_enabled==true.  And then
> we can add gate guest eVMCS flow on hyperv_enabled, and evmcs_get_unsupported_ctls()
> can then WARN if hv_vcpu is NULL.
>

Alternatively, we can just KVM_BUG_ON() in kvm_hv_set_cpuid() when
allocation fails, at least for the time being as the VM is likely
useless anyway.

> Assuming I'm not overlooking something, I'll fold in yet more patches.
>

Thanks for the thorough review here and don't hesitate to speak up when
you think it's too much of a change to do upon queueing)
Sean Christopherson Aug. 22, 2022, 4:50 p.m. UTC | #9
On Mon, Aug 22, 2022, Vitaly Kuznetsov wrote:
> Sean Christopherson <seanjc@google.com> writes:
> > But that also raises the question of whether or not KVM should honor hyperv_enabled
> > when filtering MSRs.  Same question for nested VM-Enter.  nested_enlightened_vmentry()
> > will "fail" without an assist page, and the guest can't set the assist page without
> > hyperv_enabled==true, but nothing prevents the host from stuffing the assist page.
> 
> The case sounds more like a misbehaving VMM to me. It would probably be
> better to fail nested_enlightened_vmentry() immediately on !hyperv_enabled.

Hmm, sort of.  If KVM fails explicitly fails nested VM-Enter, then allowing the
guest to read the VMX MSRs with the same buggy setup is odd, e.g. nested VMX is
effectively unsupported at that point since there is nothing the guest can do to
make nested VM-Enter succeed.  Extending the "fail VM-Enter" behavior would be to
inject #GP on RDMSR, and at that point KVM is well into "made up architecture"
behavior.

All in all, I don't think it's worth forcing the issue, even though I do agree that
the VMM is being weird if it's enabling KVM_CAP_HYPERV_ENLIGHTENED_VMCS but not
advertising Hyper-V.

> > If we fix the kvm_hv_set_cpuid() allocation failure, then we can also guarantee
> > that vcpu->arch.hyperv is non-NULL if vcpu->arch.hyperv_enabled==true.  And then
> > we can add gate guest eVMCS flow on hyperv_enabled, and evmcs_get_unsupported_ctls()
> > can then WARN if hv_vcpu is NULL.
> >
> 
> Alternatively, we can just KVM_BUG_ON() in kvm_hv_set_cpuid() when
> allocation fails, at least for the time being as the VM is likely
> useless anyway.

I'd prefer not to use KVM_BUG_ON() in this case.  A proper fix isn't that much
more code, and this isn't a KVM bug unless we conciously make it one :-)

> > Assuming I'm not overlooking something, I'll fold in yet more patches.
> >
> 
> Thanks for the thorough review here and don't hesitate to speak up when
> you think it's too much of a change to do upon queueing)

Heh, this definitely snowballed beyond "fixup on queue".  Let's sort out how to
address the filtering issue and then decide how to handle v6.
Vitaly Kuznetsov Aug. 22, 2022, 5:49 p.m. UTC | #10
Sean Christopherson <seanjc@google.com> writes:

> On Mon, Aug 22, 2022, Vitaly Kuznetsov wrote:
>> Sean Christopherson <seanjc@google.com> writes:
>> > But that also raises the question of whether or not KVM should honor hyperv_enabled
>> > when filtering MSRs.  Same question for nested VM-Enter.  nested_enlightened_vmentry()
>> > will "fail" without an assist page, and the guest can't set the assist page without
>> > hyperv_enabled==true, but nothing prevents the host from stuffing the assist page.
>> 
>> The case sounds more like a misbehaving VMM to me. It would probably be
>> better to fail nested_enlightened_vmentry() immediately on !hyperv_enabled.
>
> Hmm, sort of.  If KVM fails explicitly fails nested VM-Enter, then allowing the
> guest to read the VMX MSRs with the same buggy setup is odd, e.g. nested VMX is
> effectively unsupported at that point since there is nothing the guest can do to
> make nested VM-Enter succeed.  Extending the "fail VM-Enter" behavior would be to
> inject #GP on RDMSR, and at that point KVM is well into "made up architecture"
> behavior.
>
> All in all, I don't think it's worth forcing the issue, even though I do agree that
> the VMM is being weird if it's enabling KVM_CAP_HYPERV_ENLIGHTENED_VMCS but not
> advertising Hyper-V.

I keep thinking about KVM-on-KVM using Hyper-V features like eVMCS, eMSR
bitmap, 'l2' tlb flush,... when I can't sleep at night sometimes :-)

...

>> 
>> Thanks for the thorough review here and don't hesitate to speak up when
>> you think it's too much of a change to do upon queueing)
>
> Heh, this definitely snowballed beyond "fixup on queue".  Let's sort out how to
> address the filtering issue and then decide how to handle v6.
>

Yep, let's keep the snowball rolling! :-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 1098915360ae..8a2b24f9bbf6 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2552,7 +2552,7 @@  int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 		case HYPERV_CPUID_NESTED_FEATURES:
 			ent->eax = evmcs_ver;
 			ent->eax |= HV_X64_NESTED_MSR_BITMAP;
-
+			ent->ebx |= HV_X64_NESTED_EVMCS1_2022_UPDATE;
 			break;
 
 		case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 8bea5dea0341..e8497f9854a1 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -368,7 +368,60 @@  uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
+enum evmcs_revision {
+	EVMCSv1_2016,
+	EVMCSv1_2022,
+	EVMCS_REVISION_MAX,
+};
+
+enum evmcs_unsupported_ctrl_type {
+	EVMCS_EXIT_CTLS,
+	EVMCS_ENTRY_CTLS,
+	EVMCS_2NDEXEC,
+	EVMCS_PINCTRL,
+	EVMCS_VMFUNC,
+	EVMCS_CTRL_MAX,
+};
+
+static u32 evmcs_unsupported_ctls[EVMCS_CTRL_MAX][EVMCS_REVISION_MAX] = {
+	[EVMCS_EXIT_CTLS] = {
+		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
+		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_VMEXIT_CTRL,
+	},
+	[EVMCS_ENTRY_CTLS] = {
+		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMENTRY_CTRL | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
+		[EVMCSv1_2022] =  EVMCS1_UNSUPPORTED_VMENTRY_CTRL,
+	},
+	[EVMCS_2NDEXEC] = {
+		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_2NDEXEC | SECONDARY_EXEC_TSC_SCALING,
+		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_2NDEXEC,
+	},
+	[EVMCS_PINCTRL] = {
+		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_PINCTRL,
+		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_PINCTRL,
+	},
+	[EVMCS_VMFUNC] = {
+		[EVMCSv1_2016] = EVMCS1_UNSUPPORTED_VMFUNC,
+		[EVMCSv1_2022] = EVMCS1_UNSUPPORTED_VMFUNC,
+	},
+};
+
+static u32 evmcs_get_unsupported_ctls(struct kvm_vcpu *vcpu,
+				      enum evmcs_unsupported_ctrl_type ctrl_type)
+{
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	enum evmcs_revision evmcs_rev = EVMCSv1_2016;
+
+	if (!hv_vcpu)
+		return 0;
+
+	if (hv_vcpu->cpuid_cache.nested_ebx & HV_X64_NESTED_EVMCS1_2022_UPDATE)
+		evmcs_rev = EVMCSv1_2022;
+
+	return evmcs_unsupported_ctls[ctrl_type][evmcs_rev];
+}
+
+void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 {
 	u32 ctl_low = (u32)*pdata;
 	u32 ctl_high = (u32)(*pdata >> 32);
@@ -380,72 +433,73 @@  void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata)
 	switch (msr_index) {
 	case MSR_IA32_VMX_EXIT_CTLS:
 	case MSR_IA32_VMX_TRUE_EXIT_CTLS:
-		ctl_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+		ctl_high &= ~evmcs_get_unsupported_ctls(vcpu, EVMCS_EXIT_CTLS);
 		break;
 	case MSR_IA32_VMX_ENTRY_CTLS:
 	case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
-		ctl_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+		ctl_high &= ~evmcs_get_unsupported_ctls(vcpu, EVMCS_ENTRY_CTLS);
 		break;
 	case MSR_IA32_VMX_PROCBASED_CTLS2:
-		ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+		ctl_high &= ~evmcs_get_unsupported_ctls(vcpu, EVMCS_2NDEXEC);
 		break;
 	case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
 	case MSR_IA32_VMX_PINBASED_CTLS:
-		ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+		ctl_high &= ~evmcs_get_unsupported_ctls(vcpu, EVMCS_PINCTRL);
 		break;
 	case MSR_IA32_VMX_VMFUNC:
-		ctl_low &= ~EVMCS1_UNSUPPORTED_VMFUNC;
+		ctl_low &= ~evmcs_get_unsupported_ctls(vcpu, EVMCS_VMFUNC);
 		break;
 	}
 
 	*pdata = ctl_low | ((u64)ctl_high << 32);
 }
 
-int nested_evmcs_check_controls(struct vmcs12 *vmcs12)
+int nested_evmcs_check_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
 	int ret = 0;
 	u32 unsupp_ctl;
 
 	unsupp_ctl = vmcs12->pin_based_vm_exec_control &
-		EVMCS1_UNSUPPORTED_PINCTRL;
+		evmcs_get_unsupported_ctls(vcpu, EVMCS_PINCTRL);
 	if (unsupp_ctl) {
 		trace_kvm_nested_vmenter_failed(
-			"eVMCS: unsupported pin-based VM-execution controls",
+			"eVMCS: unsupported pin-based VM-execution controls: ",
 			unsupp_ctl);
 		ret = -EINVAL;
 	}
 
 	unsupp_ctl = vmcs12->secondary_vm_exec_control &
-		EVMCS1_UNSUPPORTED_2NDEXEC;
+		evmcs_get_unsupported_ctls(vcpu, EVMCS_2NDEXEC);
 	if (unsupp_ctl) {
 		trace_kvm_nested_vmenter_failed(
-			"eVMCS: unsupported secondary VM-execution controls",
+			"eVMCS: unsupported secondary VM-execution controls: ",
 			unsupp_ctl);
 		ret = -EINVAL;
 	}
 
 	unsupp_ctl = vmcs12->vm_exit_controls &
-		EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+		evmcs_get_unsupported_ctls(vcpu, EVMCS_EXIT_CTLS);
 	if (unsupp_ctl) {
 		trace_kvm_nested_vmenter_failed(
-			"eVMCS: unsupported VM-exit controls",
+			"eVMCS: unsupported VM-exit controls: ",
 			unsupp_ctl);
 		ret = -EINVAL;
 	}
 
 	unsupp_ctl = vmcs12->vm_entry_controls &
-		EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+		evmcs_get_unsupported_ctls(vcpu, EVMCS_ENTRY_CTLS);
 	if (unsupp_ctl) {
 		trace_kvm_nested_vmenter_failed(
-			"eVMCS: unsupported VM-entry controls",
+			"eVMCS: unsupported VM-entry controls: ",
 			unsupp_ctl);
 		ret = -EINVAL;
 	}
 
-	unsupp_ctl = vmcs12->vm_function_control & EVMCS1_UNSUPPORTED_VMFUNC;
+	unsupp_ctl = vmcs12->vm_function_control &
+		evmcs_get_unsupported_ctls(vcpu, EVMCS_VMFUNC);
 	if (unsupp_ctl) {
 		trace_kvm_nested_vmenter_failed(
-			"eVMCS: unsupported VM-function controls",
+			"eVMCS: unsupported VM-function controls: ",
 			unsupp_ctl);
 		ret = -EINVAL;
 	}
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index f886a8ff0342..4b809c79ae63 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -37,16 +37,9 @@  DECLARE_STATIC_KEY_FALSE(enable_evmcs);
  *	EPTP_LIST_ADDRESS               = 0x00002024,
  *	VMREAD_BITMAP                   = 0x00002026,
  *	VMWRITE_BITMAP                  = 0x00002028,
- *
- *	TSC_MULTIPLIER                  = 0x00002032,
  *	PLE_GAP                         = 0x00004020,
  *	PLE_WINDOW                      = 0x00004022,
  *	VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
- *      GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
- *      HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
- *
- * Currently unsupported in KVM:
- *	GUEST_IA32_RTIT_CTL		= 0x00002814,
  */
 #define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
 				    PIN_BASED_VMX_PREEMPTION_TIMER)
@@ -58,12 +51,10 @@  DECLARE_STATIC_KEY_FALSE(enable_evmcs);
 	 SECONDARY_EXEC_ENABLE_PML |					\
 	 SECONDARY_EXEC_ENABLE_VMFUNC |					\
 	 SECONDARY_EXEC_SHADOW_VMCS |					\
-	 SECONDARY_EXEC_TSC_SCALING |					\
 	 SECONDARY_EXEC_PAUSE_LOOP_EXITING)
 #define EVMCS1_UNSUPPORTED_VMEXIT_CTRL					\
-	(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |				\
-	 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
-#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+	(VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)
+#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (0)
 #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
 
 struct evmcs_field {
@@ -243,7 +234,7 @@  bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
 uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
 int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 			uint16_t *vmcs_version);
-void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata);
-int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
+void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+int nested_evmcs_check_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12);
 
 #endif /* __KVM_X86_VMX_EVMCS_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 270a1d8e4a6e..edb2f9c74d71 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2895,7 +2895,7 @@  static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
 		return -EINVAL;
 
 	if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
-		return nested_evmcs_check_controls(vmcs12);
+		return nested_evmcs_check_controls(vcpu, vmcs12);
 
 	return 0;
 }
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d7f8331d6f7e..bd6f8552102a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1933,7 +1933,7 @@  static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 */
 		if (!msr_info->host_initiated &&
 		    vmx->nested.enlightened_vmcs_enabled)
-			nested_evmcs_filter_control_msr(msr_info->index,
+			nested_evmcs_filter_control_msr(vcpu, msr_info->index,
 							&msr_info->data);
 		break;
 	case MSR_IA32_RTIT_CTL: