diff mbox

[v3,01/13] nEPT: Support LOAD_IA32_EFER entry/exit controls for L1

Message ID 1368939152-11406-1-git-send-email-jun.nakajima@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nakajima, Jun May 19, 2013, 4:52 a.m. UTC
From: Nadav Har'El <nyh@il.ibm.com>

Recent KVM, since http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
switch the EFER MSR when EPT is used and the host and guest have different
NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
and want to be able to run recent KVM as L1, we need to allow L1 to use this
EFER switching feature.

To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if available,
and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
support for the former (the latter is still unsupported).

Nested entry and exit emulation (prepare_vmcs_02 and load_vmcs12_host_state,
respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So all
that's left to do in this patch is to properly advertise this feature to L1.

Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by using
vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
support this feature, regardless of whether the host supports it.

Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
---
 arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

Comments

Paolo Bonzini May 20, 2013, 12:33 p.m. UTC | #1
Il 19/05/2013 06:52, Jun Nakajima ha scritto:
> From: Nadav Har'El <nyh@il.ibm.com>
> 
> Recent KVM, since http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
> switch the EFER MSR when EPT is used and the host and guest have different
> NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
> and want to be able to run recent KVM as L1, we need to allow L1 to use this
> EFER switching feature.
> 
> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if available,
> and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
> support for the former (the latter is still unsupported).
> 
> Nested entry and exit emulation (prepare_vmcs_02 and load_vmcs12_host_state,
> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So all
> that's left to do in this patch is to properly advertise this feature to L1.
> 
> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by using
> vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
> support this feature, regardless of whether the host supports it.
> 
> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> ---
>  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
>  1 file changed, 16 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 260a919..fb9cae5 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2192,7 +2192,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>  #else
>  	nested_vmx_exit_ctls_high = 0;
>  #endif
> -	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
> +	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> +				      VM_EXIT_LOAD_IA32_EFER);
>  
>  	/* entry controls */
>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
> @@ -2201,8 +2202,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>  	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>  	nested_vmx_entry_ctls_high &=
>  		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
> -	nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> -
> +	nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
> +				       VM_ENTRY_LOAD_IA32_EFER);
>  	/* cpu-based controls */
>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>  		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
> @@ -7492,10 +7493,18 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
>  	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
>  
> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */
> -	vmcs_write32(VM_EXIT_CONTROLS,
> -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
> -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
> +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
> +	 * we should use its exit controls. Note that IA32_MODE, LOAD_IA32_EFER
> +	 * bits are further modified by vmx_set_efer() below.
> +	 */
> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> +
> +	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
> +	 * emulated by vmx_set_efer(), below.

VM_ENTRY_LOAD_IA32_EFER is not emulated by vmx_set_efer, so:

    /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
     * are emulated below.  VM_ENTRY_IA32E_MODE is handled in
     * vmx_set_efer().  */

Paolo

> +	 */
> +	vmcs_write32(VM_ENTRY_CONTROLS,
> +		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
> +			~VM_ENTRY_IA32E_MODE) |
>  		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
>  
>  	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z July 2, 2013, 3:01 a.m. UTC | #2
Since this series is pending in mail list for long time. And it's really a big feature for Nested. Also, I doubt the original authors(Jun and Nahav)should not have enough time to continue it. So I will pick it up. :)

See comments below:

Paolo Bonzini wrote on 2013-05-20:
> Il 19/05/2013 06:52, Jun Nakajima ha scritto:
> > From: Nadav Har'El <nyh@il.ibm.com>
> >
> > Recent KVM, since
> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
> > switch the EFER MSR when EPT is used and the host and guest have different
> > NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
> > and want to be able to run recent KVM as L1, we need to allow L1 to use this
> > EFER switching feature.
> >
> > To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if
> available,
> > and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
> > support for the former (the latter is still unsupported).
> >
> > Nested entry and exit emulation (prepare_vmcs_02 and
> load_vmcs12_host_state,
> > respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So
> all
> > that's left to do in this patch is to properly advertise this feature to L1.
> >
> > Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by
> using
> > vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
> > support this feature, regardless of whether the host supports it.
> >
> > Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> > Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> > Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> > ---
> >  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
> >  1 file changed, 16 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 260a919..fb9cae5 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -2192,7 +2192,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> >  #else
> >  	nested_vmx_exit_ctls_high = 0;
> >  #endif
> > -	nested_vmx_exit_ctls_high |=
> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
> > +	nested_vmx_exit_ctls_high |=
> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> > +				      VM_EXIT_LOAD_IA32_EFER);
> >
> >  	/* entry controls */
> >  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
> > @@ -2201,8 +2202,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> >  	nested_vmx_entry_ctls_low =
> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> >  	nested_vmx_entry_ctls_high &=
> >  		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
> > -	nested_vmx_entry_ctls_high |=
> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> > -
> > +	nested_vmx_entry_ctls_high |=
> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
> > +				       VM_ENTRY_LOAD_IA32_EFER);
> >  	/* cpu-based controls */
> >  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
> >  		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
> > @@ -7492,10 +7493,18 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
> struct vmcs12 *vmcs12)
> >  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
> >  	vmcs_writel(CR0_GUEST_HOST_MASK,
> ~vcpu->arch.cr0_guest_owned_bits);
> >
> > -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer
> below */
> > -	vmcs_write32(VM_EXIT_CONTROLS,
> > -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
> > -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
> > +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
> > +	 * we should use its exit controls. Note that IA32_MODE, LOAD_IA32_EFER
> > +	 * bits are further modified by vmx_set_efer() below.
> > +	 */
> > +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
This is wrong. We cannot use L0 exit control directly.
LOAD_PERF_GLOBAL_CTRL, LOAD_HOST_EFE, LOAD_HOST_PAT, ACK_INTR_ON_EXIT should use host's exit control. But others, still need use (vmcs12|host).

> > +
> > +	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
> are
> > +	 * emulated by vmx_set_efer(), below.
> 
> VM_ENTRY_LOAD_IA32_EFER is not emulated by vmx_set_efer, so:
VM_ENTRY_LOAD_IA32_EFER is hanlded in setup_msrs(), and vmx_set_efer already call it.

> 
>     /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
>      * are emulated below.  VM_ENTRY_IA32E_MODE is handled in
>      * vmx_set_efer().  */
> 
> Paolo
> 
> > +	 */
> > +	vmcs_write32(VM_ENTRY_CONTROLS,
> > +		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
> > +			~VM_ENTRY_IA32E_MODE) |
> >  		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
> >
> >  	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)
> >
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 2, 2013, 1:59 p.m. UTC | #3
On Tue, Jul 02, 2013 at 03:01:24AM +0000, Zhang, Yang Z wrote:
> Since this series is pending in mail list for long time. And it's really a big feature for Nested. Also, I doubt the original authors(Jun and Nahav)should not have enough time to continue it. So I will pick it up. :)
> 
> See comments below:
> 
> Paolo Bonzini wrote on 2013-05-20:
> > Il 19/05/2013 06:52, Jun Nakajima ha scritto:
> > > From: Nadav Har'El <nyh@il.ibm.com>
> > >
> > > Recent KVM, since
> > http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
> > > switch the EFER MSR when EPT is used and the host and guest have different
> > > NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
> > > and want to be able to run recent KVM as L1, we need to allow L1 to use this
> > > EFER switching feature.
> > >
> > > To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if
> > available,
> > > and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
> > > support for the former (the latter is still unsupported).
> > >
> > > Nested entry and exit emulation (prepare_vmcs_02 and
> > load_vmcs12_host_state,
> > > respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So
> > all
> > > that's left to do in this patch is to properly advertise this feature to L1.
> > >
> > > Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by
> > using
> > > vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
> > > support this feature, regardless of whether the host supports it.
> > >
> > > Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> > > Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> > > Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> > > ---
> > >  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
> > >  1 file changed, 16 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > > index 260a919..fb9cae5 100644
> > > --- a/arch/x86/kvm/vmx.c
> > > +++ b/arch/x86/kvm/vmx.c
> > > @@ -2192,7 +2192,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> > >  #else
> > >  	nested_vmx_exit_ctls_high = 0;
> > >  #endif
> > > -	nested_vmx_exit_ctls_high |=
> > VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
> > > +	nested_vmx_exit_ctls_high |=
> > (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> > > +				      VM_EXIT_LOAD_IA32_EFER);
> > >
> > >  	/* entry controls */
> > >  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
> > > @@ -2201,8 +2202,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> > >  	nested_vmx_entry_ctls_low =
> > VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> > >  	nested_vmx_entry_ctls_high &=
> > >  		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
> > > -	nested_vmx_entry_ctls_high |=
> > VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> > > -
> > > +	nested_vmx_entry_ctls_high |=
> > (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
> > > +				       VM_ENTRY_LOAD_IA32_EFER);
> > >  	/* cpu-based controls */
> > >  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
> > >  		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
> > > @@ -7492,10 +7493,18 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
> > struct vmcs12 *vmcs12)
> > >  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
> > >  	vmcs_writel(CR0_GUEST_HOST_MASK,
> > ~vcpu->arch.cr0_guest_owned_bits);
> > >
> > > -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer
> > below */
> > > -	vmcs_write32(VM_EXIT_CONTROLS,
> > > -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
> > > -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
> > > +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
> > > +	 * we should use its exit controls. Note that IA32_MODE, LOAD_IA32_EFER
> > > +	 * bits are further modified by vmx_set_efer() below.
> > > +	 */
> > > +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> This is wrong. We cannot use L0 exit control directly.
> LOAD_PERF_GLOBAL_CTRL, LOAD_HOST_EFE, LOAD_HOST_PAT, ACK_INTR_ON_EXIT should use host's exit control. But others, still need use (vmcs12|host).
> 
I do not see why. We always intercept DR7/PAT/EFER, so save is emulated
too. Host address space size always come from L0 and preemption timer is
not supported for nested IIRC and when it will be host will have to save
it on exit anyway for correct emulation.

> > > +
> > > +	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
> > are
> > > +	 * emulated by vmx_set_efer(), below.
> > 
> > VM_ENTRY_LOAD_IA32_EFER is not emulated by vmx_set_efer, so:
> VM_ENTRY_LOAD_IA32_EFER is hanlded in setup_msrs(), and vmx_set_efer already call it.
> 
> > 
> >     /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
> >      * are emulated below.  VM_ENTRY_IA32E_MODE is handled in
> >      * vmx_set_efer().  */
> > 
> > Paolo
> > 
> > > +	 */
> > > +	vmcs_write32(VM_ENTRY_CONTROLS,
> > > +		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
> > > +			~VM_ENTRY_IA32E_MODE) |
> > >  		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
> > >
> > >  	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)
> > >
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> Best regards,
> Yang
> 

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka July 2, 2013, 2:28 p.m. UTC | #4
On 2013-07-02 15:59, Gleb Natapov wrote:
> On Tue, Jul 02, 2013 at 03:01:24AM +0000, Zhang, Yang Z wrote:
>> Since this series is pending in mail list for long time. And it's really a big feature for Nested. Also, I doubt the original authors(Jun and Nahav)should not have enough time to continue it. So I will pick it up. :)
>>
>> See comments below:
>>
>> Paolo Bonzini wrote on 2013-05-20:
>>> Il 19/05/2013 06:52, Jun Nakajima ha scritto:
>>>> From: Nadav Har'El <nyh@il.ibm.com>
>>>>
>>>> Recent KVM, since
>>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
>>>> switch the EFER MSR when EPT is used and the host and guest have different
>>>> NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
>>>> and want to be able to run recent KVM as L1, we need to allow L1 to use this
>>>> EFER switching feature.
>>>>
>>>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if
>>> available,
>>>> and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
>>>> support for the former (the latter is still unsupported).
>>>>
>>>> Nested entry and exit emulation (prepare_vmcs_02 and
>>> load_vmcs12_host_state,
>>>> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So
>>> all
>>>> that's left to do in this patch is to properly advertise this feature to L1.
>>>>
>>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by
>>> using
>>>> vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
>>>> support this feature, regardless of whether the host supports it.
>>>>
>>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
>>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
>>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
>>>> ---
>>>>  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
>>>>  1 file changed, 16 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>> index 260a919..fb9cae5 100644
>>>> --- a/arch/x86/kvm/vmx.c
>>>> +++ b/arch/x86/kvm/vmx.c
>>>> @@ -2192,7 +2192,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>>>>  #else
>>>>  	nested_vmx_exit_ctls_high = 0;
>>>>  #endif
>>>> -	nested_vmx_exit_ctls_high |=
>>> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
>>>> +	nested_vmx_exit_ctls_high |=
>>> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>>>> +				      VM_EXIT_LOAD_IA32_EFER);
>>>>
>>>>  	/* entry controls */
>>>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
>>>> @@ -2201,8 +2202,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>>>>  	nested_vmx_entry_ctls_low =
>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>>>>  	nested_vmx_entry_ctls_high &=
>>>>  		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
>>>> -	nested_vmx_entry_ctls_high |=
>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>>>> -
>>>> +	nested_vmx_entry_ctls_high |=
>>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
>>>> +				       VM_ENTRY_LOAD_IA32_EFER);
>>>>  	/* cpu-based controls */
>>>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>>>>  		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
>>>> @@ -7492,10 +7493,18 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
>>> struct vmcs12 *vmcs12)
>>>>  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
>>>>  	vmcs_writel(CR0_GUEST_HOST_MASK,
>>> ~vcpu->arch.cr0_guest_owned_bits);
>>>>
>>>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer
>>> below */
>>>> -	vmcs_write32(VM_EXIT_CONTROLS,
>>>> -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
>>>> -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
>>>> +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
>>>> +	 * we should use its exit controls. Note that IA32_MODE, LOAD_IA32_EFER
>>>> +	 * bits are further modified by vmx_set_efer() below.
>>>> +	 */
>>>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
>> This is wrong. We cannot use L0 exit control directly.
>> LOAD_PERF_GLOBAL_CTRL, LOAD_HOST_EFE, LOAD_HOST_PAT, ACK_INTR_ON_EXIT should use host's exit control. But others, still need use (vmcs12|host).
>>
> I do not see why. We always intercept DR7/PAT/EFER, so save is emulated
> too. Host address space size always come from L0 and preemption timer is
> not supported for nested IIRC and when it will be host will have to save
> it on exit anyway for correct emulation.

Preemption timer is already supported and works fine as far as I tested.
KVM doesn't use it for L1, so we do not need to save/restore it - IIRC.

Jan
Gleb Natapov July 2, 2013, 3:15 p.m. UTC | #5
On Tue, Jul 02, 2013 at 04:28:56PM +0200, Jan Kiszka wrote:
> On 2013-07-02 15:59, Gleb Natapov wrote:
> > On Tue, Jul 02, 2013 at 03:01:24AM +0000, Zhang, Yang Z wrote:
> >> Since this series is pending in mail list for long time. And it's really a big feature for Nested. Also, I doubt the original authors(Jun and Nahav)should not have enough time to continue it. So I will pick it up. :)
> >>
> >> See comments below:
> >>
> >> Paolo Bonzini wrote on 2013-05-20:
> >>> Il 19/05/2013 06:52, Jun Nakajima ha scritto:
> >>>> From: Nadav Har'El <nyh@il.ibm.com>
> >>>>
> >>>> Recent KVM, since
> >>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
> >>>> switch the EFER MSR when EPT is used and the host and guest have different
> >>>> NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
> >>>> and want to be able to run recent KVM as L1, we need to allow L1 to use this
> >>>> EFER switching feature.
> >>>>
> >>>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if
> >>> available,
> >>>> and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
> >>>> support for the former (the latter is still unsupported).
> >>>>
> >>>> Nested entry and exit emulation (prepare_vmcs_02 and
> >>> load_vmcs12_host_state,
> >>>> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So
> >>> all
> >>>> that's left to do in this patch is to properly advertise this feature to L1.
> >>>>
> >>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by
> >>> using
> >>>> vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
> >>>> support this feature, regardless of whether the host supports it.
> >>>>
> >>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> >>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> >>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> >>>> ---
> >>>>  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
> >>>>  1 file changed, 16 insertions(+), 7 deletions(-)
> >>>>
> >>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >>>> index 260a919..fb9cae5 100644
> >>>> --- a/arch/x86/kvm/vmx.c
> >>>> +++ b/arch/x86/kvm/vmx.c
> >>>> @@ -2192,7 +2192,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> >>>>  #else
> >>>>  	nested_vmx_exit_ctls_high = 0;
> >>>>  #endif
> >>>> -	nested_vmx_exit_ctls_high |=
> >>> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
> >>>> +	nested_vmx_exit_ctls_high |=
> >>> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> >>>> +				      VM_EXIT_LOAD_IA32_EFER);
> >>>>
> >>>>  	/* entry controls */
> >>>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
> >>>> @@ -2201,8 +2202,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> >>>>  	nested_vmx_entry_ctls_low =
> >>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> >>>>  	nested_vmx_entry_ctls_high &=
> >>>>  		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
> >>>> -	nested_vmx_entry_ctls_high |=
> >>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> >>>> -
> >>>> +	nested_vmx_entry_ctls_high |=
> >>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
> >>>> +				       VM_ENTRY_LOAD_IA32_EFER);
> >>>>  	/* cpu-based controls */
> >>>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
> >>>>  		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
> >>>> @@ -7492,10 +7493,18 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
> >>> struct vmcs12 *vmcs12)
> >>>>  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
> >>>>  	vmcs_writel(CR0_GUEST_HOST_MASK,
> >>> ~vcpu->arch.cr0_guest_owned_bits);
> >>>>
> >>>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer
> >>> below */
> >>>> -	vmcs_write32(VM_EXIT_CONTROLS,
> >>>> -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
> >>>> -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
> >>>> +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
> >>>> +	 * we should use its exit controls. Note that IA32_MODE, LOAD_IA32_EFER
> >>>> +	 * bits are further modified by vmx_set_efer() below.
> >>>> +	 */
> >>>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> >> This is wrong. We cannot use L0 exit control directly.
> >> LOAD_PERF_GLOBAL_CTRL, LOAD_HOST_EFE, LOAD_HOST_PAT, ACK_INTR_ON_EXIT should use host's exit control. But others, still need use (vmcs12|host).
> >>
> > I do not see why. We always intercept DR7/PAT/EFER, so save is emulated
> > too. Host address space size always come from L0 and preemption timer is
> > not supported for nested IIRC and when it will be host will have to save
> > it on exit anyway for correct emulation.
> 
> Preemption timer is already supported and works fine as far as I tested.
> KVM doesn't use it for L1, so we do not need to save/restore it - IIRC.
> 
So what happens if L1 configures it to value X after X/2 ticks L0 exit
happen and L0 gets back to L2 directly. The counter will be X again
instead of X/2.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka July 2, 2013, 3:34 p.m. UTC | #6
On 2013-07-02 17:15, Gleb Natapov wrote:
> On Tue, Jul 02, 2013 at 04:28:56PM +0200, Jan Kiszka wrote:
>> On 2013-07-02 15:59, Gleb Natapov wrote:
>>> On Tue, Jul 02, 2013 at 03:01:24AM +0000, Zhang, Yang Z wrote:
>>>> Since this series is pending in mail list for long time. And it's really a big feature for Nested. Also, I doubt the original authors(Jun and Nahav)should not have enough time to continue it. So I will pick it up. :)
>>>>
>>>> See comments below:
>>>>
>>>> Paolo Bonzini wrote on 2013-05-20:
>>>>> Il 19/05/2013 06:52, Jun Nakajima ha scritto:
>>>>>> From: Nadav Har'El <nyh@il.ibm.com>
>>>>>>
>>>>>> Recent KVM, since
>>>>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
>>>>>> switch the EFER MSR when EPT is used and the host and guest have different
>>>>>> NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
>>>>>> and want to be able to run recent KVM as L1, we need to allow L1 to use this
>>>>>> EFER switching feature.
>>>>>>
>>>>>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if
>>>>> available,
>>>>>> and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
>>>>>> support for the former (the latter is still unsupported).
>>>>>>
>>>>>> Nested entry and exit emulation (prepare_vmcs_02 and
>>>>> load_vmcs12_host_state,
>>>>>> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So
>>>>> all
>>>>>> that's left to do in this patch is to properly advertise this feature to L1.
>>>>>>
>>>>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by
>>>>> using
>>>>>> vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
>>>>>> support this feature, regardless of whether the host supports it.
>>>>>>
>>>>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
>>>>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
>>>>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
>>>>>> ---
>>>>>>  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
>>>>>>  1 file changed, 16 insertions(+), 7 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>>>> index 260a919..fb9cae5 100644
>>>>>> --- a/arch/x86/kvm/vmx.c
>>>>>> +++ b/arch/x86/kvm/vmx.c
>>>>>> @@ -2192,7 +2192,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>>>>>>  #else
>>>>>>  	nested_vmx_exit_ctls_high = 0;
>>>>>>  #endif
>>>>>> -	nested_vmx_exit_ctls_high |=
>>>>> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
>>>>>> +	nested_vmx_exit_ctls_high |=
>>>>> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>>>>>> +				      VM_EXIT_LOAD_IA32_EFER);
>>>>>>
>>>>>>  	/* entry controls */
>>>>>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
>>>>>> @@ -2201,8 +2202,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>>>>>>  	nested_vmx_entry_ctls_low =
>>>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>>>>>>  	nested_vmx_entry_ctls_high &=
>>>>>>  		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
>>>>>> -	nested_vmx_entry_ctls_high |=
>>>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>>>>>> -
>>>>>> +	nested_vmx_entry_ctls_high |=
>>>>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
>>>>>> +				       VM_ENTRY_LOAD_IA32_EFER);
>>>>>>  	/* cpu-based controls */
>>>>>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>>>>>>  		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
>>>>>> @@ -7492,10 +7493,18 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
>>>>> struct vmcs12 *vmcs12)
>>>>>>  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
>>>>>>  	vmcs_writel(CR0_GUEST_HOST_MASK,
>>>>> ~vcpu->arch.cr0_guest_owned_bits);
>>>>>>
>>>>>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer
>>>>> below */
>>>>>> -	vmcs_write32(VM_EXIT_CONTROLS,
>>>>>> -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
>>>>>> -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
>>>>>> +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
>>>>>> +	 * we should use its exit controls. Note that IA32_MODE, LOAD_IA32_EFER
>>>>>> +	 * bits are further modified by vmx_set_efer() below.
>>>>>> +	 */
>>>>>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
>>>> This is wrong. We cannot use L0 exit control directly.
>>>> LOAD_PERF_GLOBAL_CTRL, LOAD_HOST_EFE, LOAD_HOST_PAT, ACK_INTR_ON_EXIT should use host's exit control. But others, still need use (vmcs12|host).
>>>>
>>> I do not see why. We always intercept DR7/PAT/EFER, so save is emulated
>>> too. Host address space size always come from L0 and preemption timer is
>>> not supported for nested IIRC and when it will be host will have to save
>>> it on exit anyway for correct emulation.
>>
>> Preemption timer is already supported and works fine as far as I tested.
>> KVM doesn't use it for L1, so we do not need to save/restore it - IIRC.
>>
> So what happens if L1 configures it to value X after X/2 ticks L0 exit
> happen and L0 gets back to L2 directly. The counter will be X again
> instead of X/2.

Likely. Yes, we need to improve our emulation by setting "Save
VMX-preemption timer value" or emulate this in software if the hardware
lacks support for it (was this flag introduced after the preemption
timer itself?).

Jan
Gleb Natapov July 2, 2013, 3:43 p.m. UTC | #7
On Tue, Jul 02, 2013 at 05:34:56PM +0200, Jan Kiszka wrote:
> On 2013-07-02 17:15, Gleb Natapov wrote:
> > On Tue, Jul 02, 2013 at 04:28:56PM +0200, Jan Kiszka wrote:
> >> On 2013-07-02 15:59, Gleb Natapov wrote:
> >>> On Tue, Jul 02, 2013 at 03:01:24AM +0000, Zhang, Yang Z wrote:
> >>>> Since this series is pending in mail list for long time. And it's really a big feature for Nested. Also, I doubt the original authors(Jun and Nahav)should not have enough time to continue it. So I will pick it up. :)
> >>>>
> >>>> See comments below:
> >>>>
> >>>> Paolo Bonzini wrote on 2013-05-20:
> >>>>> Il 19/05/2013 06:52, Jun Nakajima ha scritto:
> >>>>>> From: Nadav Har'El <nyh@il.ibm.com>
> >>>>>>
> >>>>>> Recent KVM, since
> >>>>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
> >>>>>> switch the EFER MSR when EPT is used and the host and guest have different
> >>>>>> NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
> >>>>>> and want to be able to run recent KVM as L1, we need to allow L1 to use this
> >>>>>> EFER switching feature.
> >>>>>>
> >>>>>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if
> >>>>> available,
> >>>>>> and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
> >>>>>> support for the former (the latter is still unsupported).
> >>>>>>
> >>>>>> Nested entry and exit emulation (prepare_vmcs_02 and
> >>>>> load_vmcs12_host_state,
> >>>>>> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So
> >>>>> all
> >>>>>> that's left to do in this patch is to properly advertise this feature to L1.
> >>>>>>
> >>>>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by
> >>>>> using
> >>>>>> vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
> >>>>>> support this feature, regardless of whether the host supports it.
> >>>>>>
> >>>>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> >>>>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> >>>>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> >>>>>> ---
> >>>>>>  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
> >>>>>>  1 file changed, 16 insertions(+), 7 deletions(-)
> >>>>>>
> >>>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >>>>>> index 260a919..fb9cae5 100644
> >>>>>> --- a/arch/x86/kvm/vmx.c
> >>>>>> +++ b/arch/x86/kvm/vmx.c
> >>>>>> @@ -2192,7 +2192,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> >>>>>>  #else
> >>>>>>  	nested_vmx_exit_ctls_high = 0;
> >>>>>>  #endif
> >>>>>> -	nested_vmx_exit_ctls_high |=
> >>>>> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
> >>>>>> +	nested_vmx_exit_ctls_high |=
> >>>>> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> >>>>>> +				      VM_EXIT_LOAD_IA32_EFER);
> >>>>>>
> >>>>>>  	/* entry controls */
> >>>>>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
> >>>>>> @@ -2201,8 +2202,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
> >>>>>>  	nested_vmx_entry_ctls_low =
> >>>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> >>>>>>  	nested_vmx_entry_ctls_high &=
> >>>>>>  		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
> >>>>>> -	nested_vmx_entry_ctls_high |=
> >>>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> >>>>>> -
> >>>>>> +	nested_vmx_entry_ctls_high |=
> >>>>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
> >>>>>> +				       VM_ENTRY_LOAD_IA32_EFER);
> >>>>>>  	/* cpu-based controls */
> >>>>>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
> >>>>>>  		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
> >>>>>> @@ -7492,10 +7493,18 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu,
> >>>>> struct vmcs12 *vmcs12)
> >>>>>>  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
> >>>>>>  	vmcs_writel(CR0_GUEST_HOST_MASK,
> >>>>> ~vcpu->arch.cr0_guest_owned_bits);
> >>>>>>
> >>>>>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer
> >>>>> below */
> >>>>>> -	vmcs_write32(VM_EXIT_CONTROLS,
> >>>>>> -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
> >>>>>> -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
> >>>>>> +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
> >>>>>> +	 * we should use its exit controls. Note that IA32_MODE, LOAD_IA32_EFER
> >>>>>> +	 * bits are further modified by vmx_set_efer() below.
> >>>>>> +	 */
> >>>>>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> >>>> This is wrong. We cannot use L0 exit control directly.
> >>>> LOAD_PERF_GLOBAL_CTRL, LOAD_HOST_EFE, LOAD_HOST_PAT, ACK_INTR_ON_EXIT should use host's exit control. But others, still need use (vmcs12|host).
> >>>>
> >>> I do not see why. We always intercept DR7/PAT/EFER, so save is emulated
> >>> too. Host address space size always come from L0 and preemption timer is
> >>> not supported for nested IIRC and when it will be host will have to save
> >>> it on exit anyway for correct emulation.
> >>
> >> Preemption timer is already supported and works fine as far as I tested.
> >> KVM doesn't use it for L1, so we do not need to save/restore it - IIRC.
> >>
> > So what happens if L1 configures it to value X after X/2 ticks L0 exit
> > happen and L0 gets back to L2 directly. The counter will be X again
> > instead of X/2.
> 
> Likely. Yes, we need to improve our emulation by setting "Save
> VMX-preemption timer value" or emulate this in software if the hardware
> lacks support for it (was this flag introduced after the preemption
> timer itself?).
> 
Not sure, but my point was that for correct emulation host needs to set
 "save preempt timer on vmexit" anyway so all VM_EXIT_CONTROLS are
indeed emulated as far as I see.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z July 4, 2013, 8:42 a.m. UTC | #8
Gleb Natapov wrote on 2013-07-02:
> On Tue, Jul 02, 2013 at 05:34:56PM +0200, Jan Kiszka wrote:
>> On 2013-07-02 17:15, Gleb Natapov wrote:
>>> On Tue, Jul 02, 2013 at 04:28:56PM +0200, Jan Kiszka wrote:
>>>> On 2013-07-02 15:59, Gleb Natapov wrote:
>>>>> On Tue, Jul 02, 2013 at 03:01:24AM +0000, Zhang, Yang Z wrote:
>>>>>> Since this series is pending in mail list for long time. And
>>>>>> it's really a big feature for Nested. Also, I doubt the
>>>>>> original authors(Jun and Nahav)should not have enough time to continue it.
>>>>>> So I will pick it up. :)
>>>>>> 
>>>>>> See comments below:
>>>>>> 
>>>>>> Paolo Bonzini wrote on 2013-05-20:
>>>>>>> Il 19/05/2013 06:52, Jun Nakajima ha scritto:
>>>>>>>> From: Nadav Har'El <nyh@il.ibm.com>
>>>>>>>> 
>>>>>>>> Recent KVM, since
>>>>>>>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
>>>>>>>> switch the EFER MSR when EPT is used and the host and guest have
>>>>>>>> different NX bits. So if we add support for nested EPT (L1 guest
>>>>>>>> using EPT to run L2) and want to be able to run recent KVM as L1,
>>>>>>>> we need to allow L1 to use this EFER switching feature.
>>>>>>>> 
>>>>>>>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER
>>>>>>>> if available, and if it isn't, it uses the generic
>>>>>>>> VM_ENTRY/EXIT_MSR_LOAD. This patch adds support for the former
>>>>>>>> (the latter is still unsupported).
>>>>>>>> 
>>>>>>>> Nested entry and exit emulation (prepare_vmcs_02 and
>>>>>>>> load_vmcs12_host_state, respectively) already handled
>>>>>>>> VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So all that's left to do
>>>>>>>> in this patch is to properly advertise this feature to L1.
>>>>>>>> 
>>>>>>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by
>>>>>>>> L0, by using vmx_set_efer (which itself sets one of several
>>>>>>>> vmcs02 fields), so we always support this feature, regardless of
>>>>>>>> whether the host supports it.
>>>>>>>> 
>>>>>>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
>>>>>>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
>>>>>>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
>>>>>>>> ---
>>>>>>>>  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
>>>>>>>>  1 file changed, 16 insertions(+), 7 deletions(-)
>>>>>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
>>>>>>>> 260a919..fb9cae5 100644
>>>>>>>> --- a/arch/x86/kvm/vmx.c
>>>>>>>> +++ b/arch/x86/kvm/vmx.c
>>>>>>>> @@ -2192,7 +2192,8 @@ static __init void
>>>>>>>> nested_vmx_setup_ctls_msrs(void)  #else
>>>>>>>>  	nested_vmx_exit_ctls_high = 0;  #endif
>>>>>>>> -	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
>>>>>>>> +	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR
>>>>>>>> | +				      VM_EXIT_LOAD_IA32_EFER);
>>>>>>>> 
>>>>>>>>  	/* entry controls */
>>>>>>>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS, @@ -2201,8 +2202,8
> @@ static
>>>>>>>> __init void nested_vmx_setup_ctls_msrs(void)
>>>>>>>>  	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>>>>>>>>  	nested_vmx_entry_ctls_high &= 		VM_ENTRY_LOAD_IA32_PAT |
>>>>>>>>  VM_ENTRY_IA32E_MODE;
>>>>>>>> -	nested_vmx_entry_ctls_high |=
>>>>>>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; -
>>>>>>>> +	nested_vmx_entry_ctls_high |=
>>>>>>>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | +				      
>>>>>>>> VM_ENTRY_LOAD_IA32_EFER);
>>>>>>>>  	/* cpu-based controls */
>>>>>>>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>>>>>>>>  		nested_vmx_procbased_ctls_low,
>>>>>>>> nested_vmx_procbased_ctls_high); @@ -7492,10 +7493,18 @@ static
>>>>>>>> void prepare_vmcs02(struct kvm_vcpu *vcpu,
>>>>>>> struct vmcs12 *vmcs12)
>>>>>>>>  	vcpu->arch.cr0_guest_owned_bits &=
>>>>>>>>  ~vmcs12->cr0_guest_host_mask; 	vmcs_writel(CR0_GUEST_HOST_MASK,
>>>>>>> ~vcpu->arch.cr0_guest_owned_bits);
>>>>>>>> 
>>>>>>>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by
> vmx_set_efer
>>>>>>> below */
>>>>>>>> -	vmcs_write32(VM_EXIT_CONTROLS, -		vmcs12->vm_exit_controls |
>>>>>>>> vmcs_config.vmexit_ctrl); -	vmcs_write32(VM_ENTRY_CONTROLS,
>>>>>>>> vmcs12->vm_entry_controls | +	/* L2->L1 exit controls are
>>>>>>>> emulated - the hardware exit is +to L0 so +	 * we should use its
>>>>>>>> exit controls. Note that IA32_MODE, LOAD_IA32_EFER +	 * bits are
>>>>>>>> further modified by vmx_set_efer() below. +	 */
>>>>>>>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
>>>>>> This is wrong. We cannot use L0 exit control directly.
>>>>>> LOAD_PERF_GLOBAL_CTRL, LOAD_HOST_EFE, LOAD_HOST_PAT,
> ACK_INTR_ON_EXIT should use host's exit control. But others, still
> need use (vmcs12|host).
>>>>>> 
>>>>> I do not see why. We always intercept DR7/PAT/EFER, so save is
>>>>> emulated too. Host address space size always come from L0 and
>>>>> preemption timer is not supported for nested IIRC and when it
>>>>> will be host will have to save it on exit anyway for correct emulation.
>>>> 
>>>> Preemption timer is already supported and works fine as far as I tested.
>>>> KVM doesn't use it for L1, so we do not need to save/restore it - IIRC.
>>>> 
>>> So what happens if L1 configures it to value X after X/2 ticks L0
>>> exit happen and L0 gets back to L2 directly. The counter will be X
>>> again instead of X/2.
>> 
>> Likely. Yes, we need to improve our emulation by setting "Save
>> VMX-preemption timer value" or emulate this in software if the
>> hardware lacks support for it (was this flag introduced after the
>> preemption timer itself?).
>> 
> Not sure, but my point was that for correct emulation host needs to
> set "save preempt timer on vmexit" anyway so all VM_EXIT_CONTROLS are
> indeed emulated as far as I see.
>
Ok, here is my summary, please correct me if I am wrong:
bit 2: Save debug controls, the first processor only support 1-setting on it, so just use host's setting is enough
bit 9: Host address space size, it indicate the host's state, so must use host's setting.
bit 12: Load IA32_PERF_GLOBAL_CTRL: same as above.
bit 15 : Acknowledge interrupt on exit: same as above.
bit 19: Load IA32_PAT: same as above.
bit 20: Load IA32_EFER: same as above.
bit 18: Save IA32_PAT, Didn't expose it to L1, so use host' setting is ok.
bit 19: Save IA32_EFER, same as above.
bit 22: Save VMXpreemption timer value, I don't see KVM expose it to L1, but Jan said it's working. Strange! And according gleb's suggestion, it better to always set it.

So, currently, only use host' exit_control for L2 is enough.

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 8, 2013, 12:37 p.m. UTC | #9
On Thu, Jul 04, 2013 at 08:42:53AM +0000, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-07-02:
> > On Tue, Jul 02, 2013 at 05:34:56PM +0200, Jan Kiszka wrote:
> >> On 2013-07-02 17:15, Gleb Natapov wrote:
> >>> On Tue, Jul 02, 2013 at 04:28:56PM +0200, Jan Kiszka wrote:
> >>>> On 2013-07-02 15:59, Gleb Natapov wrote:
> >>>>> On Tue, Jul 02, 2013 at 03:01:24AM +0000, Zhang, Yang Z wrote:
> >>>>>> Since this series is pending in mail list for long time. And
> >>>>>> it's really a big feature for Nested. Also, I doubt the
> >>>>>> original authors(Jun and Nahav)should not have enough time to continue it.
> >>>>>> So I will pick it up. :)
> >>>>>> 
> >>>>>> See comments below:
> >>>>>> 
> >>>>>> Paolo Bonzini wrote on 2013-05-20:
> >>>>>>> Il 19/05/2013 06:52, Jun Nakajima ha scritto:
> >>>>>>>> From: Nadav Har'El <nyh@il.ibm.com>
> >>>>>>>> 
> >>>>>>>> Recent KVM, since
> >>>>>>>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
> >>>>>>>> switch the EFER MSR when EPT is used and the host and guest have
> >>>>>>>> different NX bits. So if we add support for nested EPT (L1 guest
> >>>>>>>> using EPT to run L2) and want to be able to run recent KVM as L1,
> >>>>>>>> we need to allow L1 to use this EFER switching feature.
> >>>>>>>> 
> >>>>>>>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER
> >>>>>>>> if available, and if it isn't, it uses the generic
> >>>>>>>> VM_ENTRY/EXIT_MSR_LOAD. This patch adds support for the former
> >>>>>>>> (the latter is still unsupported).
> >>>>>>>> 
> >>>>>>>> Nested entry and exit emulation (prepare_vmcs_02 and
> >>>>>>>> load_vmcs12_host_state, respectively) already handled
> >>>>>>>> VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So all that's left to do
> >>>>>>>> in this patch is to properly advertise this feature to L1.
> >>>>>>>> 
> >>>>>>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by
> >>>>>>>> L0, by using vmx_set_efer (which itself sets one of several
> >>>>>>>> vmcs02 fields), so we always support this feature, regardless of
> >>>>>>>> whether the host supports it.
> >>>>>>>> 
> >>>>>>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> >>>>>>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> >>>>>>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> >>>>>>>> ---
> >>>>>>>>  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
> >>>>>>>>  1 file changed, 16 insertions(+), 7 deletions(-)
> >>>>>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> >>>>>>>> 260a919..fb9cae5 100644
> >>>>>>>> --- a/arch/x86/kvm/vmx.c
> >>>>>>>> +++ b/arch/x86/kvm/vmx.c
> >>>>>>>> @@ -2192,7 +2192,8 @@ static __init void
> >>>>>>>> nested_vmx_setup_ctls_msrs(void)  #else
> >>>>>>>>  	nested_vmx_exit_ctls_high = 0;  #endif
> >>>>>>>> -	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
> >>>>>>>> +	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR
> >>>>>>>> | +				      VM_EXIT_LOAD_IA32_EFER);
> >>>>>>>> 
> >>>>>>>>  	/* entry controls */
> >>>>>>>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS, @@ -2201,8 +2202,8
> > @@ static
> >>>>>>>> __init void nested_vmx_setup_ctls_msrs(void)
> >>>>>>>>  	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> >>>>>>>>  	nested_vmx_entry_ctls_high &= 		VM_ENTRY_LOAD_IA32_PAT |
> >>>>>>>>  VM_ENTRY_IA32E_MODE;
> >>>>>>>> -	nested_vmx_entry_ctls_high |=
> >>>>>>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; -
> >>>>>>>> +	nested_vmx_entry_ctls_high |=
> >>>>>>>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | +				      
> >>>>>>>> VM_ENTRY_LOAD_IA32_EFER);
> >>>>>>>>  	/* cpu-based controls */
> >>>>>>>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
> >>>>>>>>  		nested_vmx_procbased_ctls_low,
> >>>>>>>> nested_vmx_procbased_ctls_high); @@ -7492,10 +7493,18 @@ static
> >>>>>>>> void prepare_vmcs02(struct kvm_vcpu *vcpu,
> >>>>>>> struct vmcs12 *vmcs12)
> >>>>>>>>  	vcpu->arch.cr0_guest_owned_bits &=
> >>>>>>>>  ~vmcs12->cr0_guest_host_mask; 	vmcs_writel(CR0_GUEST_HOST_MASK,
> >>>>>>> ~vcpu->arch.cr0_guest_owned_bits);
> >>>>>>>> 
> >>>>>>>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by
> > vmx_set_efer
> >>>>>>> below */
> >>>>>>>> -	vmcs_write32(VM_EXIT_CONTROLS, -		vmcs12->vm_exit_controls |
> >>>>>>>> vmcs_config.vmexit_ctrl); -	vmcs_write32(VM_ENTRY_CONTROLS,
> >>>>>>>> vmcs12->vm_entry_controls | +	/* L2->L1 exit controls are
> >>>>>>>> emulated - the hardware exit is +to L0 so +	 * we should use its
> >>>>>>>> exit controls. Note that IA32_MODE, LOAD_IA32_EFER +	 * bits are
> >>>>>>>> further modified by vmx_set_efer() below. +	 */
> >>>>>>>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> >>>>>> This is wrong. We cannot use L0 exit control directly.
> >>>>>> LOAD_PERF_GLOBAL_CTRL, LOAD_HOST_EFE, LOAD_HOST_PAT,
> > ACK_INTR_ON_EXIT should use host's exit control. But others, still
> > need use (vmcs12|host).
> >>>>>> 
> >>>>> I do not see why. We always intercept DR7/PAT/EFER, so save is
> >>>>> emulated too. Host address space size always come from L0 and
> >>>>> preemption timer is not supported for nested IIRC and when it
> >>>>> will be host will have to save it on exit anyway for correct emulation.
> >>>> 
> >>>> Preemption timer is already supported and works fine as far as I tested.
> >>>> KVM doesn't use it for L1, so we do not need to save/restore it - IIRC.
> >>>> 
> >>> So what happens if L1 configures it to value X after X/2 ticks L0
> >>> exit happen and L0 gets back to L2 directly. The counter will be X
> >>> again instead of X/2.
> >> 
> >> Likely. Yes, we need to improve our emulation by setting "Save
> >> VMX-preemption timer value" or emulate this in software if the
> >> hardware lacks support for it (was this flag introduced after the
> >> preemption timer itself?).
> >> 
> > Not sure, but my point was that for correct emulation host needs to
> > set "save preempt timer on vmexit" anyway so all VM_EXIT_CONTROLS are
> > indeed emulated as far as I see.
> >
> Ok, here is my summary, please correct me if I am wrong:
> bit 2: Save debug controls, the first processor only support 1-setting on it, so just use host's setting is enough
Not because first processor only supported 1-setting, but because L0
intercepts all changes to DR7 and DEBUGCTL MSR, so L2 cannot change them
behind L0 back. If L1 asks to save them in vmcs12 L0 can do it during
vmexit emulation.

> bit 9: Host address space size, it indicate the host's state, so must use host's setting.
> bit 12: Load IA32_PERF_GLOBAL_CTRL: same as above.
Not sure what "above" do you mean. It is fully emulated during vmexit
emulation. We do not want PERF_GLOBAL_CTRL to be loaded during L2->L0
vmexit, we want it to be loaded in L1 after L0 emulates L2->L1 vmexit.
But I think there is a bug in current emulation. The code looks like
this:

        if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
                vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
                        vmcs12->host_ia32_perf_global_ctrl);

But GUEST_IA32_PERF_GLOBAL_CTRL will not be loaded during L1 entry unless
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL is set. Why do we assume it is set
here?

> bit 15 : Acknowledge interrupt on exit: same as above.
Same as bit 9.

> bit 19: Load IA32_PAT: same as above.
> bit 20: Load IA32_EFER: same as above.
Those two are same as bit 12.

> bit 18: Save IA32_PAT, Didn't expose it to L1, so use host' setting is ok.
> bit 19: Save IA32_EFER, same as above.
Those two are the same a bit 2.

> bit 22: Save VMXpreemption timer value, I don't see KVM expose it to L1, but Jan said it's working. Strange! And according gleb's suggestion, it better to always set it.
>
It exposed it in nested_vmx_setup_ctls_msrs:
 
        nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
                PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
                PIN_BASED_VMX_PREEMPTION_TIMER;

According to Gleb's suggestion current emulation is broken and to fix it
the bit will have to be set on each L2 entry if L1 is using preemption timer.

> So, currently, only use host' exit_control for L2 is enough.
> 
> Best regards,
> Yang
> 

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zhang, Yang Z July 8, 2013, 2:28 p.m. UTC | #10
> -----Original Message-----
> From: Gleb Natapov [mailto:gleb@redhat.com]
> Sent: Monday, July 08, 2013 8:38 PM
> To: Zhang, Yang Z
> Cc: Jan Kiszka; Paolo Bonzini; Nakajima, Jun; kvm@vger.kernel.org
> Subject: Re: [PATCH v3 01/13] nEPT: Support LOAD_IA32_EFER entry/exit
> controls for L1
> 
> On Thu, Jul 04, 2013 at 08:42:53AM +0000, Zhang, Yang Z wrote:
> > Gleb Natapov wrote on 2013-07-02:
> > > On Tue, Jul 02, 2013 at 05:34:56PM +0200, Jan Kiszka wrote:
> > >> On 2013-07-02 17:15, Gleb Natapov wrote:
> > >>> On Tue, Jul 02, 2013 at 04:28:56PM +0200, Jan Kiszka wrote:
> > >>>> On 2013-07-02 15:59, Gleb Natapov wrote:
> > >>>>> On Tue, Jul 02, 2013 at 03:01:24AM +0000, Zhang, Yang Z wrote:
> > >>>>>> Since this series is pending in mail list for long time. And
> > >>>>>> it's really a big feature for Nested. Also, I doubt the
> > >>>>>> original authors(Jun and Nahav)should not have enough time to
> continue it.
> > >>>>>> So I will pick it up. :)
> > >>>>>>
> > >>>>>> See comments below:
> > >>>>>>
> > >>>>>> Paolo Bonzini wrote on 2013-05-20:
> > >>>>>>> Il 19/05/2013 06:52, Jun Nakajima ha scritto:
> > >>>>>>>> From: Nadav Har'El <nyh@il.ibm.com>
> > >>>>>>>>
> > >>>>>>>> Recent KVM, since
> > >>>>>>>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
> > >>>>>>>> switch the EFER MSR when EPT is used and the host and guest
> have
> > >>>>>>>> different NX bits. So if we add support for nested EPT (L1 guest
> > >>>>>>>> using EPT to run L2) and want to be able to run recent KVM as L1,
> > >>>>>>>> we need to allow L1 to use this EFER switching feature.
> > >>>>>>>>
> > >>>>>>>> To do this EFER switching, KVM uses
> VM_ENTRY/EXIT_LOAD_IA32_EFER
> > >>>>>>>> if available, and if it isn't, it uses the generic
> > >>>>>>>> VM_ENTRY/EXIT_MSR_LOAD. This patch adds support for the
> former
> > >>>>>>>> (the latter is still unsupported).
> > >>>>>>>>
> > >>>>>>>> Nested entry and exit emulation (prepare_vmcs_02 and
> > >>>>>>>> load_vmcs12_host_state, respectively) already handled
> > >>>>>>>> VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So all that's left to do
> > >>>>>>>> in this patch is to properly advertise this feature to L1.
> > >>>>>>>>
> > >>>>>>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are
> emulated by
> > >>>>>>>> L0, by using vmx_set_efer (which itself sets one of several
> > >>>>>>>> vmcs02 fields), so we always support this feature, regardless of
> > >>>>>>>> whether the host supports it.
> > >>>>>>>>
> > >>>>>>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> > >>>>>>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> > >>>>>>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> > >>>>>>>> ---
> > >>>>>>>>  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
> > >>>>>>>>  1 file changed, 16 insertions(+), 7 deletions(-)
> > >>>>>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> > >>>>>>>> 260a919..fb9cae5 100644
> > >>>>>>>> --- a/arch/x86/kvm/vmx.c
> > >>>>>>>> +++ b/arch/x86/kvm/vmx.c
> > >>>>>>>> @@ -2192,7 +2192,8 @@ static __init void
> > >>>>>>>> nested_vmx_setup_ctls_msrs(void)  #else
> > >>>>>>>>  	nested_vmx_exit_ctls_high = 0;  #endif
> > >>>>>>>> -	nested_vmx_exit_ctls_high |=
> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
> > >>>>>>>> +	nested_vmx_exit_ctls_high |=
> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR
> > >>>>>>>> | +				      VM_EXIT_LOAD_IA32_EFER);
> > >>>>>>>>
> > >>>>>>>>  	/* entry controls */
> > >>>>>>>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS, @@ -2201,8 +2202,8
> > > @@ static
> > >>>>>>>> __init void nested_vmx_setup_ctls_msrs(void)
> > >>>>>>>>  	nested_vmx_entry_ctls_low =
> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> > >>>>>>>>  	nested_vmx_entry_ctls_high &=
> 	VM_ENTRY_LOAD_IA32_PAT |
> > >>>>>>>>  VM_ENTRY_IA32E_MODE;
> > >>>>>>>> -	nested_vmx_entry_ctls_high |=
> > >>>>>>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; -
> > >>>>>>>> +	nested_vmx_entry_ctls_high |=
> > >>>>>>>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | +
> > >>>>>>>> VM_ENTRY_LOAD_IA32_EFER);
> > >>>>>>>>  	/* cpu-based controls */
> > >>>>>>>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
> > >>>>>>>>  		nested_vmx_procbased_ctls_low,
> > >>>>>>>> nested_vmx_procbased_ctls_high); @@ -7492,10 +7493,18 @@
> static
> > >>>>>>>> void prepare_vmcs02(struct kvm_vcpu *vcpu,
> > >>>>>>> struct vmcs12 *vmcs12)
> > >>>>>>>>  	vcpu->arch.cr0_guest_owned_bits &=
> > >>>>>>>>  ~vmcs12->cr0_guest_host_mask;
> 	vmcs_writel(CR0_GUEST_HOST_MASK,
> > >>>>>>> ~vcpu->arch.cr0_guest_owned_bits);
> > >>>>>>>>
> > >>>>>>>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by
> > > vmx_set_efer
> > >>>>>>> below */
> > >>>>>>>> -	vmcs_write32(VM_EXIT_CONTROLS, -
> 	vmcs12->vm_exit_controls |
> > >>>>>>>> vmcs_config.vmexit_ctrl); -	vmcs_write32(VM_ENTRY_CONTROLS,
> > >>>>>>>> vmcs12->vm_entry_controls | +	/* L2->L1 exit controls are
> > >>>>>>>> emulated - the hardware exit is +to L0 so +	 * we should use
> its
> > >>>>>>>> exit controls. Note that IA32_MODE, LOAD_IA32_EFER +	 *
> bits are
> > >>>>>>>> further modified by vmx_set_efer() below. +	 */
> > >>>>>>>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> > >>>>>> This is wrong. We cannot use L0 exit control directly.
> > >>>>>> LOAD_PERF_GLOBAL_CTRL, LOAD_HOST_EFE, LOAD_HOST_PAT,
> > > ACK_INTR_ON_EXIT should use host's exit control. But others, still
> > > need use (vmcs12|host).
> > >>>>>>
> > >>>>> I do not see why. We always intercept DR7/PAT/EFER, so save is
> > >>>>> emulated too. Host address space size always come from L0 and
> > >>>>> preemption timer is not supported for nested IIRC and when it
> > >>>>> will be host will have to save it on exit anyway for correct emulation.
> > >>>>
> > >>>> Preemption timer is already supported and works fine as far as I tested.
> > >>>> KVM doesn't use it for L1, so we do not need to save/restore it - IIRC.
> > >>>>
> > >>> So what happens if L1 configures it to value X after X/2 ticks L0
> > >>> exit happen and L0 gets back to L2 directly. The counter will be X
> > >>> again instead of X/2.
> > >>
> > >> Likely. Yes, we need to improve our emulation by setting "Save
> > >> VMX-preemption timer value" or emulate this in software if the
> > >> hardware lacks support for it (was this flag introduced after the
> > >> preemption timer itself?).
> > >>
> > > Not sure, but my point was that for correct emulation host needs to
> > > set "save preempt timer on vmexit" anyway so all VM_EXIT_CONTROLS are
> > > indeed emulated as far as I see.
> > >
> > Ok, here is my summary, please correct me if I am wrong:
> > bit 2: Save debug controls, the first processor only support 1-setting on it, so
> just use host's setting is enough
> Not because first processor only supported 1-setting, but because L0
> intercepts all changes to DR7 and DEBUGCTL MSR, so L2 cannot change them
> behind L0 back. If L1 asks to save them in vmcs12 L0 can do it during
> vmexit emulation.
> 
> > bit 9: Host address space size, it indicate the host's state, so must use host's
> setting.
> > bit 12: Load IA32_PERF_GLOBAL_CTRL: same as above.
> Not sure what "above" do you mean. It is fully emulated during vmexit
> emulation. We do not want PERF_GLOBAL_CTRL to be loaded during L2->L0
Not understand why PERF_GLOBAL_CTRL/ Load IA32_PAT/ Load IA32_EFER shouldn't be loaded during L2->L0?

> vmexit, we want it to be loaded in L1 after L0 emulates L2->L1 vmexit.
> But I think there is a bug in current emulation. The code looks like
> this:
> 
>         if (vmcs12->vm_exit_controls &
> VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
>                 vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
>                         vmcs12->host_ia32_perf_global_ctrl);
> 
> But GUEST_IA32_PERF_GLOBAL_CTRL will not be loaded during L1 entry unless
> VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL is set. Why do we assume it is
> set
> here?
You are right. It seems a bug in current emulation.

> 
> > bit 15 : Acknowledge interrupt on exit: same as above.
> Same as bit 9.
> 
> > bit 19: Load IA32_PAT: same as above.
> > bit 20: Load IA32_EFER: same as above.
> Those two are same as bit 12.
> 
> > bit 18: Save IA32_PAT, Didn't expose it to L1, so use host' setting is ok.
> > bit 19: Save IA32_EFER, same as above.
> Those two are the same a bit 2.
> 
> > bit 22: Save VMXpreemption timer value, I don't see KVM expose it to L1, but
> Jan said it's working. Strange! And according gleb's suggestion, it better to
> always set it.
> >
> It exposed it in nested_vmx_setup_ctls_msrs:
> 
>         nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
>                 PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
>                 PIN_BASED_VMX_PREEMPTION_TIMER;
> According to Gleb's suggestion current emulation is broken and to fix it
> the bit will have to be set on each L2 entry if L1 is using preemption timer.
> 
> > So, currently, only use host' exit_control for L2 is enough.
> >
> > Best regards,
> > Yang
> >

Best regards,
Yang
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov July 8, 2013, 4:08 p.m. UTC | #11
On Mon, Jul 08, 2013 at 02:28:15PM +0000, Zhang, Yang Z wrote:
> > -----Original Message-----
> > From: Gleb Natapov [mailto:gleb@redhat.com]
> > Sent: Monday, July 08, 2013 8:38 PM
> > To: Zhang, Yang Z
> > Cc: Jan Kiszka; Paolo Bonzini; Nakajima, Jun; kvm@vger.kernel.org
> > Subject: Re: [PATCH v3 01/13] nEPT: Support LOAD_IA32_EFER entry/exit
> > controls for L1
> > 
> > On Thu, Jul 04, 2013 at 08:42:53AM +0000, Zhang, Yang Z wrote:
> > > Gleb Natapov wrote on 2013-07-02:
> > > > On Tue, Jul 02, 2013 at 05:34:56PM +0200, Jan Kiszka wrote:
> > > >> On 2013-07-02 17:15, Gleb Natapov wrote:
> > > >>> On Tue, Jul 02, 2013 at 04:28:56PM +0200, Jan Kiszka wrote:
> > > >>>> On 2013-07-02 15:59, Gleb Natapov wrote:
> > > >>>>> On Tue, Jul 02, 2013 at 03:01:24AM +0000, Zhang, Yang Z wrote:
> > > >>>>>> Since this series is pending in mail list for long time. And
> > > >>>>>> it's really a big feature for Nested. Also, I doubt the
> > > >>>>>> original authors(Jun and Nahav)should not have enough time to
> > continue it.
> > > >>>>>> So I will pick it up. :)
> > > >>>>>>
> > > >>>>>> See comments below:
> > > >>>>>>
> > > >>>>>> Paolo Bonzini wrote on 2013-05-20:
> > > >>>>>>> Il 19/05/2013 06:52, Jun Nakajima ha scritto:
> > > >>>>>>>> From: Nadav Har'El <nyh@il.ibm.com>
> > > >>>>>>>>
> > > >>>>>>>> Recent KVM, since
> > > >>>>>>>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
> > > >>>>>>>> switch the EFER MSR when EPT is used and the host and guest
> > have
> > > >>>>>>>> different NX bits. So if we add support for nested EPT (L1 guest
> > > >>>>>>>> using EPT to run L2) and want to be able to run recent KVM as L1,
> > > >>>>>>>> we need to allow L1 to use this EFER switching feature.
> > > >>>>>>>>
> > > >>>>>>>> To do this EFER switching, KVM uses
> > VM_ENTRY/EXIT_LOAD_IA32_EFER
> > > >>>>>>>> if available, and if it isn't, it uses the generic
> > > >>>>>>>> VM_ENTRY/EXIT_MSR_LOAD. This patch adds support for the
> > former
> > > >>>>>>>> (the latter is still unsupported).
> > > >>>>>>>>
> > > >>>>>>>> Nested entry and exit emulation (prepare_vmcs_02 and
> > > >>>>>>>> load_vmcs12_host_state, respectively) already handled
> > > >>>>>>>> VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So all that's left to do
> > > >>>>>>>> in this patch is to properly advertise this feature to L1.
> > > >>>>>>>>
> > > >>>>>>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are
> > emulated by
> > > >>>>>>>> L0, by using vmx_set_efer (which itself sets one of several
> > > >>>>>>>> vmcs02 fields), so we always support this feature, regardless of
> > > >>>>>>>> whether the host supports it.
> > > >>>>>>>>
> > > >>>>>>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> > > >>>>>>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> > > >>>>>>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> > > >>>>>>>> ---
> > > >>>>>>>>  arch/x86/kvm/vmx.c | 23 ++++++++++++++++-------
> > > >>>>>>>>  1 file changed, 16 insertions(+), 7 deletions(-)
> > > >>>>>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
> > > >>>>>>>> 260a919..fb9cae5 100644
> > > >>>>>>>> --- a/arch/x86/kvm/vmx.c
> > > >>>>>>>> +++ b/arch/x86/kvm/vmx.c
> > > >>>>>>>> @@ -2192,7 +2192,8 @@ static __init void
> > > >>>>>>>> nested_vmx_setup_ctls_msrs(void)  #else
> > > >>>>>>>>  	nested_vmx_exit_ctls_high = 0;  #endif
> > > >>>>>>>> -	nested_vmx_exit_ctls_high |=
> > VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
> > > >>>>>>>> +	nested_vmx_exit_ctls_high |=
> > (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR
> > > >>>>>>>> | +				      VM_EXIT_LOAD_IA32_EFER);
> > > >>>>>>>>
> > > >>>>>>>>  	/* entry controls */
> > > >>>>>>>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS, @@ -2201,8 +2202,8
> > > > @@ static
> > > >>>>>>>> __init void nested_vmx_setup_ctls_msrs(void)
> > > >>>>>>>>  	nested_vmx_entry_ctls_low =
> > VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> > > >>>>>>>>  	nested_vmx_entry_ctls_high &=
> > 	VM_ENTRY_LOAD_IA32_PAT |
> > > >>>>>>>>  VM_ENTRY_IA32E_MODE;
> > > >>>>>>>> -	nested_vmx_entry_ctls_high |=
> > > >>>>>>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; -
> > > >>>>>>>> +	nested_vmx_entry_ctls_high |=
> > > >>>>>>>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | +
> > > >>>>>>>> VM_ENTRY_LOAD_IA32_EFER);
> > > >>>>>>>>  	/* cpu-based controls */
> > > >>>>>>>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
> > > >>>>>>>>  		nested_vmx_procbased_ctls_low,
> > > >>>>>>>> nested_vmx_procbased_ctls_high); @@ -7492,10 +7493,18 @@
> > static
> > > >>>>>>>> void prepare_vmcs02(struct kvm_vcpu *vcpu,
> > > >>>>>>> struct vmcs12 *vmcs12)
> > > >>>>>>>>  	vcpu->arch.cr0_guest_owned_bits &=
> > > >>>>>>>>  ~vmcs12->cr0_guest_host_mask;
> > 	vmcs_writel(CR0_GUEST_HOST_MASK,
> > > >>>>>>> ~vcpu->arch.cr0_guest_owned_bits);
> > > >>>>>>>>
> > > >>>>>>>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by
> > > > vmx_set_efer
> > > >>>>>>> below */
> > > >>>>>>>> -	vmcs_write32(VM_EXIT_CONTROLS, -
> > 	vmcs12->vm_exit_controls |
> > > >>>>>>>> vmcs_config.vmexit_ctrl); -	vmcs_write32(VM_ENTRY_CONTROLS,
> > > >>>>>>>> vmcs12->vm_entry_controls | +	/* L2->L1 exit controls are
> > > >>>>>>>> emulated - the hardware exit is +to L0 so +	 * we should use
> > its
> > > >>>>>>>> exit controls. Note that IA32_MODE, LOAD_IA32_EFER +	 *
> > bits are
> > > >>>>>>>> further modified by vmx_set_efer() below. +	 */
> > > >>>>>>>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> > > >>>>>> This is wrong. We cannot use L0 exit control directly.
> > > >>>>>> LOAD_PERF_GLOBAL_CTRL, LOAD_HOST_EFE, LOAD_HOST_PAT,
> > > > ACK_INTR_ON_EXIT should use host's exit control. But others, still
> > > > need use (vmcs12|host).
> > > >>>>>>
> > > >>>>> I do not see why. We always intercept DR7/PAT/EFER, so save is
> > > >>>>> emulated too. Host address space size always come from L0 and
> > > >>>>> preemption timer is not supported for nested IIRC and when it
> > > >>>>> will be host will have to save it on exit anyway for correct emulation.
> > > >>>>
> > > >>>> Preemption timer is already supported and works fine as far as I tested.
> > > >>>> KVM doesn't use it for L1, so we do not need to save/restore it - IIRC.
> > > >>>>
> > > >>> So what happens if L1 configures it to value X after X/2 ticks L0
> > > >>> exit happen and L0 gets back to L2 directly. The counter will be X
> > > >>> again instead of X/2.
> > > >>
> > > >> Likely. Yes, we need to improve our emulation by setting "Save
> > > >> VMX-preemption timer value" or emulate this in software if the
> > > >> hardware lacks support for it (was this flag introduced after the
> > > >> preemption timer itself?).
> > > >>
> > > > Not sure, but my point was that for correct emulation host needs to
> > > > set "save preempt timer on vmexit" anyway so all VM_EXIT_CONTROLS are
> > > > indeed emulated as far as I see.
> > > >
> > > Ok, here is my summary, please correct me if I am wrong:
> > > bit 2: Save debug controls, the first processor only support 1-setting on it, so
> > just use host's setting is enough
> > Not because first processor only supported 1-setting, but because L0
> > intercepts all changes to DR7 and DEBUGCTL MSR, so L2 cannot change them
> > behind L0 back. If L1 asks to save them in vmcs12 L0 can do it during
> > vmexit emulation.
> > 
> > > bit 9: Host address space size, it indicate the host's state, so must use host's
> > setting.
> > > bit 12: Load IA32_PERF_GLOBAL_CTRL: same as above.
> > Not sure what "above" do you mean. It is fully emulated during vmexit
> > emulation. We do not want PERF_GLOBAL_CTRL to be loaded during L2->L0
> Not understand why PERF_GLOBAL_CTRL/ Load IA32_PAT/ Load IA32_EFER shouldn't be loaded during L2->L0?
> 
Because L0 didn't ask them to be loaded and values that would be loaded
are L1 values, not L0's. If L0 requests loading then they should be
loaded, but we should be careful to put L0's values in vmcs02.

> > vmexit, we want it to be loaded in L1 after L0 emulates L2->L1 vmexit.
> > But I think there is a bug in current emulation. The code looks like
> > this:
> > 
> >         if (vmcs12->vm_exit_controls &
> > VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
> >                 vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
> >                         vmcs12->host_ia32_perf_global_ctrl);
> > 
> > But GUEST_IA32_PERF_GLOBAL_CTRL will not be loaded during L1 entry unless
> > VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL is set. Why do we assume it is
> > set
> > here?
> You are right. It seems a bug in current emulation.
> 
> > 
> > > bit 15 : Acknowledge interrupt on exit: same as above.
> > Same as bit 9.
> > 
> > > bit 19: Load IA32_PAT: same as above.
> > > bit 20: Load IA32_EFER: same as above.
> > Those two are same as bit 12.
> > 
> > > bit 18: Save IA32_PAT, Didn't expose it to L1, so use host' setting is ok.
> > > bit 19: Save IA32_EFER, same as above.
> > Those two are the same a bit 2.
> > 
> > > bit 22: Save VMXpreemption timer value, I don't see KVM expose it to L1, but
> > Jan said it's working. Strange! And according gleb's suggestion, it better to
> > always set it.
> > >
> > It exposed it in nested_vmx_setup_ctls_msrs:
> > 
> >         nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK |
> >                 PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS |
> >                 PIN_BASED_VMX_PREEMPTION_TIMER;
> > According to Gleb's suggestion current emulation is broken and to fix it
> > the bit will have to be set on each L2 entry if L1 is using preemption timer.
> > 
> > > So, currently, only use host' exit_control for L2 is enough.
> > >
> > > Best regards,
> > > Yang
> > >
> 
> Best regards,
> Yang

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 260a919..fb9cae5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2192,7 +2192,8 @@  static __init void nested_vmx_setup_ctls_msrs(void)
 #else
 	nested_vmx_exit_ctls_high = 0;
 #endif
-	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
+	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
+				      VM_EXIT_LOAD_IA32_EFER);
 
 	/* entry controls */
 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2201,8 +2202,8 @@  static __init void nested_vmx_setup_ctls_msrs(void)
 	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
 	nested_vmx_entry_ctls_high &=
 		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
-	nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
-
+	nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
+				       VM_ENTRY_LOAD_IA32_EFER);
 	/* cpu-based controls */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
 		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
@@ -7492,10 +7493,18 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
 
-	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */
-	vmcs_write32(VM_EXIT_CONTROLS,
-		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
-	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
+	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
+	 * we should use its exit controls. Note that IA32_MODE, LOAD_IA32_EFER
+	 * bits are further modified by vmx_set_efer() below.
+	 */
+	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+
+	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
+	 * emulated by vmx_set_efer(), below.
+	 */
+	vmcs_write32(VM_ENTRY_CONTROLS,
+		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
+			~VM_ENTRY_IA32E_MODE) |
 		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
 
 	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)