diff mbox

[3/4] KVM: nVMX: accurate emulation of MSR_IA32_CR{0,4}_FIXED1

Message ID 1479863680-117511-4-git-send-email-dmatlack@google.com (mailing list archive)
State New, archived
Headers show

Commit Message

David Matlack Nov. 23, 2016, 1:14 a.m. UTC
Set MSR_IA32_CR{0,4}_FIXED1 to match the CPU's MSRs.

In addition, MSR_IA32_CR4_FIXED1 should reflect the available CR4 bits
according to CPUID. Whenever guest CPUID is updated by userspace,
regenerate MSR_IA32_CR4_FIXED1 to match it.

Signed-off-by: David Matlack <dmatlack@google.com>
---
Note: "x86/cpufeature: Add User-Mode Instruction Prevention definitions" has
not hit kvm/master yet so the macros for X86_CR4_UMIP and X86_FEATURE_UMIP
are not available.

 arch/x86/kvm/vmx.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 51 insertions(+), 3 deletions(-)

Comments

Paolo Bonzini Nov. 23, 2016, 9:06 a.m. UTC | #1
> Set MSR_IA32_CR{0,4}_FIXED1 to match the CPU's MSRs.
> 
> In addition, MSR_IA32_CR4_FIXED1 should reflect the available CR4 bits
> according to CPUID. Whenever guest CPUID is updated by userspace,
> regenerate MSR_IA32_CR4_FIXED1 to match it.
> 
> Signed-off-by: David Matlack <dmatlack@google.com>

Oh, I thought userspace would do that!  Doing it in KVM is fine as well,
but then do we need to give userspace access to CR{0,4}_FIXED{0,1} at all?

Paolo

> ---
> Note: "x86/cpufeature: Add User-Mode Instruction Prevention definitions" has
> not hit kvm/master yet so the macros for X86_CR4_UMIP and X86_FEATURE_UMIP
> are not available.
> 
>  arch/x86/kvm/vmx.c | 54
>  +++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 51 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a2a5ad8..ac5d9c0 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2852,16 +2852,18 @@ static void nested_vmx_setup_ctls_msrs(struct
> vcpu_vmx *vmx)
>  		vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
>  
>  	/*
> -	 * These MSRs specify bits which the guest must keep fixed (on or off)
> +	 * These MSRs specify bits which the guest must keep fixed on
>  	 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
>  	 * We picked the standard core2 setting.
>  	 */
>  #define VMXON_CR0_ALWAYSON     (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
>  #define VMXON_CR4_ALWAYSON     X86_CR4_VMXE
>  	vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
> -	vmx->nested.nested_vmx_cr0_fixed1 = -1ULL;
>  	vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
> -	vmx->nested.nested_vmx_cr4_fixed1 = -1ULL;
> +
> +	/* These MSRs specify bits which the guest must keep fixed off. */
> +	rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
> +	rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
>  
>  	/* highest index: VMX_PREEMPTION_TIMER_VALUE */
>  	vmx->nested.nested_vmx_vmcs_enum = 0x2e;
> @@ -9580,6 +9582,49 @@ static void vmcs_set_secondary_exec_control(u32
> new_ctl)
>  		     (new_ctl & ~mask) | (cur_ctl & mask));
>  }
>  
> +/*
> + * Generate MSR_IA32_VMX_CR4_FIXED1 according to CPUID. Only set bits
> + * (indicating "allowed-1") if they are supported in the guest's CPUID.
> + */
> +static void nested_vmx_cr4_fixed1_update(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	struct kvm_cpuid_entry2 *entry;
> +
> +#define update(_cr4_mask, _reg, _cpuid_mask) do {			\
> +	if (entry && (entry->_reg & (_cpuid_mask)))			\
> +		vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask);	\
> +} while (0)
> +
> +	vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
> +
> +	entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
> +	update(X86_CR4_VME,        edx, bit(X86_FEATURE_VME));
> +	update(X86_CR4_PVI,        edx, bit(X86_FEATURE_VME));
> +	update(X86_CR4_TSD,        edx, bit(X86_FEATURE_TSC));
> +	update(X86_CR4_DE,         edx, bit(X86_FEATURE_DE));
> +	update(X86_CR4_PSE,        edx, bit(X86_FEATURE_PSE));
> +	update(X86_CR4_PAE,        edx, bit(X86_FEATURE_PAE));
> +	update(X86_CR4_MCE,        edx, bit(X86_FEATURE_MCE));
> +	update(X86_CR4_PGE,        edx, bit(X86_FEATURE_PGE));
> +	update(X86_CR4_OSFXSR,     edx, bit(X86_FEATURE_FXSR));
> +	update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
> +	update(X86_CR4_VMXE,       ecx, bit(X86_FEATURE_VMX));
> +	update(X86_CR4_SMXE,       ecx, bit(X86_FEATURE_SMX));
> +	update(X86_CR4_PCIDE,      ecx, bit(X86_FEATURE_PCID));
> +	update(X86_CR4_OSXSAVE,    ecx, bit(X86_FEATURE_XSAVE));
> +
> +	entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
> +	update(X86_CR4_FSGSBASE,   ebx, bit(X86_FEATURE_FSGSBASE));
> +	update(X86_CR4_SMEP,       ebx, bit(X86_FEATURE_SMEP));
> +	update(X86_CR4_SMAP,       ebx, bit(X86_FEATURE_SMAP));
> +	update(X86_CR4_PKE,        ecx, bit(X86_FEATURE_PKU));
> +	/* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */
> +	update(bit(11),            ecx, bit(2));
> +
> +#undef update
> +}
> +
>  static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_cpuid_entry2 *best;
> @@ -9621,6 +9666,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
>  	else
>  		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
>  			~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
> +
> +	if (nested_vmx_allowed(vcpu))
> +		nested_vmx_cr4_fixed1_update(vcpu);
>  }
>  
>  static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2
>  *entry)
> --
> 2.8.0.rc3.226.g39d4020
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Matlack Nov. 23, 2016, 7:16 p.m. UTC | #2
On Wed, Nov 23, 2016 at 1:06 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>> Set MSR_IA32_CR{0,4}_FIXED1 to match the CPU's MSRs.
>>
>> In addition, MSR_IA32_CR4_FIXED1 should reflect the available CR4 bits
>> according to CPUID. Whenever guest CPUID is updated by userspace,
>> regenerate MSR_IA32_CR4_FIXED1 to match it.
>>
>> Signed-off-by: David Matlack <dmatlack@google.com>
>
> Oh, I thought userspace would do that!  Doing it in KVM is fine as well,
> but then do we need to give userspace access to CR{0,4}_FIXED{0,1} at all?

I think it should be safe for userspace to skip restoring CR4_FIXED1,
since it is 100% generated based on CPUID. But I'd prefer to keep it
accessible from userspace, for consistency with the other VMX MSRs and
for flexibility. The auditing should ensure userspace doesn't restore
a CR4_FIXED1 that is inconsistent with CPUID.

Userspace should restore CR0_FIXED1 in case future CPUs change which
bits of CR0 are valid in VMX operation. Userspace should also restore
CR{0,4}_FIXED0 so we have the flexibility to change the defaults in
KVM. Both of these situations seem unlikely but we might as well play
it safe, the cost is small.

>
> Paolo
>
>> ---
>> Note: "x86/cpufeature: Add User-Mode Instruction Prevention definitions" has
>> not hit kvm/master yet so the macros for X86_CR4_UMIP and X86_FEATURE_UMIP
>> are not available.
>>
>>  arch/x86/kvm/vmx.c | 54
>>  +++++++++++++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 51 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index a2a5ad8..ac5d9c0 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -2852,16 +2852,18 @@ static void nested_vmx_setup_ctls_msrs(struct
>> vcpu_vmx *vmx)
>>               vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
>>
>>       /*
>> -      * These MSRs specify bits which the guest must keep fixed (on or off)
>> +      * These MSRs specify bits which the guest must keep fixed on
>>        * while L1 is in VMXON mode (in L1's root mode, or running an L2).
>>        * We picked the standard core2 setting.
>>        */
>>  #define VMXON_CR0_ALWAYSON     (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
>>  #define VMXON_CR4_ALWAYSON     X86_CR4_VMXE
>>       vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
>> -     vmx->nested.nested_vmx_cr0_fixed1 = -1ULL;
>>       vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
>> -     vmx->nested.nested_vmx_cr4_fixed1 = -1ULL;
>> +
>> +     /* These MSRs specify bits which the guest must keep fixed off. */
>> +     rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
>> +     rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
>>
>>       /* highest index: VMX_PREEMPTION_TIMER_VALUE */
>>       vmx->nested.nested_vmx_vmcs_enum = 0x2e;
>> @@ -9580,6 +9582,49 @@ static void vmcs_set_secondary_exec_control(u32
>> new_ctl)
>>                    (new_ctl & ~mask) | (cur_ctl & mask));
>>  }
>>
>> +/*
>> + * Generate MSR_IA32_VMX_CR4_FIXED1 according to CPUID. Only set bits
>> + * (indicating "allowed-1") if they are supported in the guest's CPUID.
>> + */
>> +static void nested_vmx_cr4_fixed1_update(struct kvm_vcpu *vcpu)
>> +{
>> +     struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +     struct kvm_cpuid_entry2 *entry;
>> +
>> +#define update(_cr4_mask, _reg, _cpuid_mask) do {                    \
>> +     if (entry && (entry->_reg & (_cpuid_mask)))                     \
>> +             vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask);       \
>> +} while (0)
>> +
>> +     vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
>> +
>> +     entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
>> +     update(X86_CR4_VME,        edx, bit(X86_FEATURE_VME));
>> +     update(X86_CR4_PVI,        edx, bit(X86_FEATURE_VME));
>> +     update(X86_CR4_TSD,        edx, bit(X86_FEATURE_TSC));
>> +     update(X86_CR4_DE,         edx, bit(X86_FEATURE_DE));
>> +     update(X86_CR4_PSE,        edx, bit(X86_FEATURE_PSE));
>> +     update(X86_CR4_PAE,        edx, bit(X86_FEATURE_PAE));
>> +     update(X86_CR4_MCE,        edx, bit(X86_FEATURE_MCE));
>> +     update(X86_CR4_PGE,        edx, bit(X86_FEATURE_PGE));
>> +     update(X86_CR4_OSFXSR,     edx, bit(X86_FEATURE_FXSR));
>> +     update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
>> +     update(X86_CR4_VMXE,       ecx, bit(X86_FEATURE_VMX));
>> +     update(X86_CR4_SMXE,       ecx, bit(X86_FEATURE_SMX));
>> +     update(X86_CR4_PCIDE,      ecx, bit(X86_FEATURE_PCID));
>> +     update(X86_CR4_OSXSAVE,    ecx, bit(X86_FEATURE_XSAVE));
>> +
>> +     entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
>> +     update(X86_CR4_FSGSBASE,   ebx, bit(X86_FEATURE_FSGSBASE));
>> +     update(X86_CR4_SMEP,       ebx, bit(X86_FEATURE_SMEP));
>> +     update(X86_CR4_SMAP,       ebx, bit(X86_FEATURE_SMAP));
>> +     update(X86_CR4_PKE,        ecx, bit(X86_FEATURE_PKU));
>> +     /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */
>> +     update(bit(11),            ecx, bit(2));
>> +
>> +#undef update
>> +}
>> +
>>  static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
>>  {
>>       struct kvm_cpuid_entry2 *best;
>> @@ -9621,6 +9666,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
>>       else
>>               to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
>>                       ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
>> +
>> +     if (nested_vmx_allowed(vcpu))
>> +             nested_vmx_cr4_fixed1_update(vcpu);
>>  }
>>
>>  static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2
>>  *entry)
>> --
>> 2.8.0.rc3.226.g39d4020
>>
>>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Nov. 23, 2016, 7:24 p.m. UTC | #3
On 23/11/2016 20:16, David Matlack wrote:
> > Oh, I thought userspace would do that!  Doing it in KVM is fine as well,
> > but then do we need to give userspace access to CR{0,4}_FIXED{0,1} at all?
>
> I think it should be safe for userspace to skip restoring CR4_FIXED1,
> since it is 100% generated based on CPUID. But I'd prefer to keep it
> accessible from userspace, for consistency with the other VMX MSRs and
> for flexibility. The auditing should ensure userspace doesn't restore
> a CR4_FIXED1 that is inconsistent with CPUID.

Or would it just allow userspace to put anything into it, even if it's
inconsistent with CPUID, as long as it's consistent with the host?

> Userspace should restore CR0_FIXED1 in case future CPUs change which
> bits of CR0 are valid in VMX operation. Userspace should also restore
> CR{0,4}_FIXED0 so we have the flexibility to change the defaults in
> KVM. Both of these situations seem unlikely but we might as well play
> it safe, the cost is small.

I disagree, there is always a cost.  Besides the fact that it's
unlikely that there'll be any future CR0 bits at all, any changes would
most likely be keyed by a new CPUID bit (the same as CR4) or execution
control (the same as unrestricted guest).

In the end, since we assume that userspace (any) has no idea of what to
do with it, I see no good reason to make the MSRs available.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Matlack Nov. 23, 2016, 10:07 p.m. UTC | #4
On Wed, Nov 23, 2016 at 11:24 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>
> On 23/11/2016 20:16, David Matlack wrote:
>> > Oh, I thought userspace would do that!  Doing it in KVM is fine as well,
>> > but then do we need to give userspace access to CR{0,4}_FIXED{0,1} at all?
>>
>> I think it should be safe for userspace to skip restoring CR4_FIXED1,
>> since it is 100% generated based on CPUID. But I'd prefer to keep it
>> accessible from userspace, for consistency with the other VMX MSRs and
>> for flexibility. The auditing should ensure userspace doesn't restore
>> a CR4_FIXED1 that is inconsistent with CPUID.
>
> Or would it just allow userspace to put anything into it, even if it's
> inconsistent with CPUID, as long as it's consistent with the host?

It would not allow anything inconsistent with guest CPUID. The
auditing on restore of CR4_FIXED1 compares the new value with
vmx->nested.nested_vmx_cr4_fixed1, which is updated as part of setting
the guest's CPUID.

>
>> Userspace should restore CR0_FIXED1 in case future CPUs change which
>> bits of CR0 are valid in VMX operation. Userspace should also restore
>> CR{0,4}_FIXED0 so we have the flexibility to change the defaults in
>> KVM. Both of these situations seem unlikely but we might as well play
>> it safe, the cost is small.
>
> I disagree, there is always a cost.  Besides the fact that it's
> unlikely that there'll be any future CR0 bits at all, any changes would
> most likely be keyed by a new CPUID bit (the same as CR4) or execution
> control (the same as unrestricted guest).

That's true. So CR0_FIXED1 would not need to be accessible from
userspace either. This patch would need to be a little different then:
vmx_cpuid_update should also update vmx->nested.nested_vmx_cr0_fixed1
to 0xffffffff.

A downside of this scheme is we'd have to remember to update
nested_vmx_cr4_fixed1_update() before giving VMs new CPUID bits. If we
forget, a VM could end up with different values for CR{0,4}_FIXED0 for
the same CPUID depending on which version of KVM you're running on.

Hm, now I'm thinking you were right in the beginning. Userspace should
generate CR{0,4}_FIXED1, not the kernel. And KVM should allow
userspace to save/restore them.

>
> In the end, since we assume that userspace (any) has no idea of what to
> do with it, I see no good reason to make the MSRs available.
>
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Nov. 23, 2016, 10:11 p.m. UTC | #5
On 23/11/2016 23:07, David Matlack wrote:
> A downside of this scheme is we'd have to remember to update
> nested_vmx_cr4_fixed1_update() before giving VMs new CPUID bits. If we
> forget, a VM could end up with different values for CR{0,4}_FIXED0 for
> the same CPUID depending on which version of KVM you're running on.

If userspace doesn't obey KVM_GET_SUPPORTED_CPUID, all bets are off
anyway, so I don't think it's a big deal.  However, if you want to make
it generated by userspace, that would be fine as well!  That would
simply entail removing this patch, wouldn't it?

Paolo

> Hm, now I'm thinking you were right in the beginning. Userspace should
> generate CR{0,4}_FIXED1, not the kernel. And KVM should allow
> userspace to save/restore them.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Matlack Nov. 23, 2016, 11:28 p.m. UTC | #6
On Wed, Nov 23, 2016 at 2:11 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> On 23/11/2016 23:07, David Matlack wrote:
>> A downside of this scheme is we'd have to remember to update
>> nested_vmx_cr4_fixed1_update() before giving VMs new CPUID bits. If we
>> forget, a VM could end up with different values for CR{0,4}_FIXED0 for
>> the same CPUID depending on which version of KVM you're running on.
>
> If userspace doesn't obey KVM_GET_SUPPORTED_CPUID, all bets are off
> anyway, so I don't think it's a big deal.  However, if you want to make
> it generated by userspace, that would be fine as well!

Ok let's generate them in userspace.

> That would simply entail removing this patch, wouldn't it?

Mostly. The first half of the patch (initialize from host MSRs) should stay.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Matlack Nov. 28, 2016, 9:51 p.m. UTC | #7
On Wed, Nov 23, 2016 at 3:28 PM, David Matlack <dmatlack@google.com> wrote:
> On Wed, Nov 23, 2016 at 2:11 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>> On 23/11/2016 23:07, David Matlack wrote:
>>> A downside of this scheme is we'd have to remember to update
>>> nested_vmx_cr4_fixed1_update() before giving VMs new CPUID bits. If we
>>> forget, a VM could end up with different values for CR{0,4}_FIXED0 for
>>> the same CPUID depending on which version of KVM you're running on.

I've realized my concern here doesn't make sense. Such a VM would
likely fail to enter VMX operation, or #GP (unexpectedly) at some
point later. Linux, for example, does not appear to consult
MSR_IA32_VMX_CR4_FIXED1 when determining which bits of CR4 it can use
(regardless of whether it is in VMX operation or not).

>>
>> If userspace doesn't obey KVM_GET_SUPPORTED_CPUID, all bets are off
>> anyway, so I don't think it's a big deal.  However, if you want to make
>> it generated by userspace, that would be fine as well!
>
> Ok let's generate them in userspace.

I'm more inclined to generate them in the kernel, given the above.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a2a5ad8..ac5d9c0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2852,16 +2852,18 @@  static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 		vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
 
 	/*
-	 * These MSRs specify bits which the guest must keep fixed (on or off)
+	 * These MSRs specify bits which the guest must keep fixed on
 	 * while L1 is in VMXON mode (in L1's root mode, or running an L2).
 	 * We picked the standard core2 setting.
 	 */
 #define VMXON_CR0_ALWAYSON     (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
 #define VMXON_CR4_ALWAYSON     X86_CR4_VMXE
 	vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
-	vmx->nested.nested_vmx_cr0_fixed1 = -1ULL;
 	vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
-	vmx->nested.nested_vmx_cr4_fixed1 = -1ULL;
+
+	/* These MSRs specify bits which the guest must keep fixed off. */
+	rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
+	rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
 
 	/* highest index: VMX_PREEMPTION_TIMER_VALUE */
 	vmx->nested.nested_vmx_vmcs_enum = 0x2e;
@@ -9580,6 +9582,49 @@  static void vmcs_set_secondary_exec_control(u32 new_ctl)
 		     (new_ctl & ~mask) | (cur_ctl & mask));
 }
 
+/*
+ * Generate MSR_IA32_VMX_CR4_FIXED1 according to CPUID. Only set bits
+ * (indicating "allowed-1") if they are supported in the guest's CPUID.
+ */
+static void nested_vmx_cr4_fixed1_update(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct kvm_cpuid_entry2 *entry;
+
+#define update(_cr4_mask, _reg, _cpuid_mask) do {			\
+	if (entry && (entry->_reg & (_cpuid_mask)))			\
+		vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask);	\
+} while (0)
+
+	vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
+
+	entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+	update(X86_CR4_VME,        edx, bit(X86_FEATURE_VME));
+	update(X86_CR4_PVI,        edx, bit(X86_FEATURE_VME));
+	update(X86_CR4_TSD,        edx, bit(X86_FEATURE_TSC));
+	update(X86_CR4_DE,         edx, bit(X86_FEATURE_DE));
+	update(X86_CR4_PSE,        edx, bit(X86_FEATURE_PSE));
+	update(X86_CR4_PAE,        edx, bit(X86_FEATURE_PAE));
+	update(X86_CR4_MCE,        edx, bit(X86_FEATURE_MCE));
+	update(X86_CR4_PGE,        edx, bit(X86_FEATURE_PGE));
+	update(X86_CR4_OSFXSR,     edx, bit(X86_FEATURE_FXSR));
+	update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
+	update(X86_CR4_VMXE,       ecx, bit(X86_FEATURE_VMX));
+	update(X86_CR4_SMXE,       ecx, bit(X86_FEATURE_SMX));
+	update(X86_CR4_PCIDE,      ecx, bit(X86_FEATURE_PCID));
+	update(X86_CR4_OSXSAVE,    ecx, bit(X86_FEATURE_XSAVE));
+
+	entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
+	update(X86_CR4_FSGSBASE,   ebx, bit(X86_FEATURE_FSGSBASE));
+	update(X86_CR4_SMEP,       ebx, bit(X86_FEATURE_SMEP));
+	update(X86_CR4_SMAP,       ebx, bit(X86_FEATURE_SMAP));
+	update(X86_CR4_PKE,        ecx, bit(X86_FEATURE_PKU));
+	/* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */
+	update(bit(11),            ecx, bit(2));
+
+#undef update
+}
+
 static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
@@ -9621,6 +9666,9 @@  static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 	else
 		to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
 			~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+
+	if (nested_vmx_allowed(vcpu))
+		nested_vmx_cr4_fixed1_update(vcpu);
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)