diff mbox

[v6,01/15] nEPT: Support LOAD_IA32_EFER entry/exit controls for L1

Message ID 1375366117-9014-2-git-send-email-gleb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov Aug. 1, 2013, 2:08 p.m. UTC
From: Nadav Har'El <nyh@il.ibm.com>

Recent KVM, since http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
switch the EFER MSR when EPT is used and the host and guest have different
NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
and want to be able to run recent KVM as L1, we need to allow L1 to use this
EFER switching feature.

To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if available,
and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
support for the former (the latter is still unsupported).

Nested entry and exit emulation (prepare_vmcs_02 and load_vmcs12_host_state,
respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So all
that's left to do in this patch is to properly advertise this feature to L1.

Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by using
vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
support this feature, regardless of whether the host supports it.

Reviewed-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/kvm/vmx.c |   23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

Comments

Zhang, Yang Z Aug. 2, 2013, 3:04 a.m. UTC | #1
Gleb Natapov wrote on 2013-08-01:
> From: Nadav Har'El <nyh@il.ibm.com>
> 
> Recent KVM, since
> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
> switch the EFER MSR when EPT is used and the host and guest have different
> NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
> and want to be able to run recent KVM as L1, we need to allow L1 to use this
> EFER switching feature.
> 
> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if
> available,
> and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
> support for the former (the latter is still unsupported).
> 
> Nested entry and exit emulation (prepare_vmcs_02 and
> load_vmcs12_host_state,
> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So
> all
> that's left to do in this patch is to properly advertise this feature to L1.
> 
> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by
> using
> vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
> support this feature, regardless of whether the host supports it.
> 
> Reviewed-by: Orit Wasserman <owasserm@redhat.com>
> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/kvm/vmx.c |   23 ++++++++++++++++-------
>  1 file changed, 16 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index e999dc7..27efa6a 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2198,7 +2198,8 @@ static __init void
> nested_vmx_setup_ctls_msrs(void)
>  #else
>  	nested_vmx_exit_ctls_high = 0;
>  #endif
> -	nested_vmx_exit_ctls_high |=
> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
> +	nested_vmx_exit_ctls_high |=
> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
> +				      VM_EXIT_LOAD_IA32_EFER);
> 
>  	/* entry controls */
>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
> @@ -2207,8 +2208,8 @@ static __init void
> nested_vmx_setup_ctls_msrs(void)
>  	nested_vmx_entry_ctls_low =
> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>  	nested_vmx_entry_ctls_high &=
>  		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
> -	nested_vmx_entry_ctls_high |=
> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
> -
> +	nested_vmx_entry_ctls_high |=
> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
> +				       VM_ENTRY_LOAD_IA32_EFER);
Just saw it, we didn't expose bit22 save VMX-preemption timer in vm-exit control but we already allowed guest to set active VMX-preemption timer in pin based vm-execution conrols. This is wrong.

>  	/* cpu-based controls */
>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>  		nested_vmx_procbased_ctls_low,
> nested_vmx_procbased_ctls_high);
> @@ -7529,10 +7530,18 @@ static void prepare_vmcs02(struct kvm_vcpu
> *vcpu, struct vmcs12 *vmcs12)
>  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
>  	vmcs_writel(CR0_GUEST_HOST_MASK,
> ~vcpu->arch.cr0_guest_owned_bits);
> 
> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer
> below */
> -	vmcs_write32(VM_EXIT_CONTROLS,
> -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
> -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
> +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
> +	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
> +	 * bits are further modified by vmx_set_efer() below.
> +	 */
> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
Should we mentioned that save vmx preemption bit must use host|guest, not just host?

> +
> +	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
> are
> +	 * emulated by vmx_set_efer(), below.
> +	 */
> +	vmcs_write32(VM_ENTRY_CONTROLS,
> +		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
> +			~VM_ENTRY_IA32E_MODE) |
>  		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
> 
>  	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)
> --
> 1.7.10.4

Best regards,
Yang


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka Aug. 2, 2013, 6:35 a.m. UTC | #2
On 2013-08-02 05:04, Zhang, Yang Z wrote:
> Gleb Natapov wrote on 2013-08-01:
>> From: Nadav Har'El <nyh@il.ibm.com>
>>
>> Recent KVM, since
>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
>> switch the EFER MSR when EPT is used and the host and guest have different
>> NX bits. So if we add support for nested EPT (L1 guest using EPT to run L2)
>> and want to be able to run recent KVM as L1, we need to allow L1 to use this
>> EFER switching feature.
>>
>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if
>> available,
>> and if it isn't, it uses the generic VM_ENTRY/EXIT_MSR_LOAD. This patch adds
>> support for the former (the latter is still unsupported).
>>
>> Nested entry and exit emulation (prepare_vmcs_02 and
>> load_vmcs12_host_state,
>> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly. So
>> all
>> that's left to do in this patch is to properly advertise this feature to L1.
>>
>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, by
>> using
>> vmx_set_efer (which itself sets one of several vmcs02 fields), so we always
>> support this feature, regardless of whether the host supports it.
>>
>> Reviewed-by: Orit Wasserman <owasserm@redhat.com>
>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>> ---
>>  arch/x86/kvm/vmx.c |   23 ++++++++++++++++-------
>>  1 file changed, 16 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index e999dc7..27efa6a 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -2198,7 +2198,8 @@ static __init void
>> nested_vmx_setup_ctls_msrs(void)
>>  #else
>>  	nested_vmx_exit_ctls_high = 0;
>>  #endif
>> -	nested_vmx_exit_ctls_high |=
>> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
>> +	nested_vmx_exit_ctls_high |=
>> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>> +				      VM_EXIT_LOAD_IA32_EFER);
>>
>>  	/* entry controls */
>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
>> @@ -2207,8 +2208,8 @@ static __init void
>> nested_vmx_setup_ctls_msrs(void)
>>  	nested_vmx_entry_ctls_low =
>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>>  	nested_vmx_entry_ctls_high &=
>>  		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
>> -	nested_vmx_entry_ctls_high |=
>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>> -
>> +	nested_vmx_entry_ctls_high |=
>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
>> +				       VM_ENTRY_LOAD_IA32_EFER);
> Just saw it, we didn't expose bit22 save VMX-preemption timer in vm-exit control but we already allowed guest to set active VMX-preemption timer in pin based vm-execution conrols. This is wrong.

Does the presence of preemption timer support imply that saving its
value is also supported? Then we could demand this combination (ie. do
not expose preemption timer support at all to L1 if value saving is
missing) and build our preemption timer emulation on top.

There is more broken /wrt VMX preemption timer, patches are welcome.
Arthur will also try to develop test cases for it. But that topic is
unrelated to this series.

Jan

> 
>>  	/* cpu-based controls */
>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>>  		nested_vmx_procbased_ctls_low,
>> nested_vmx_procbased_ctls_high);
>> @@ -7529,10 +7530,18 @@ static void prepare_vmcs02(struct kvm_vcpu
>> *vcpu, struct vmcs12 *vmcs12)
>>  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
>>  	vmcs_writel(CR0_GUEST_HOST_MASK,
>> ~vcpu->arch.cr0_guest_owned_bits);
>>
>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer
>> below */
>> -	vmcs_write32(VM_EXIT_CONTROLS,
>> -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
>> -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
>> +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
>> +	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
>> +	 * bits are further modified by vmx_set_efer() below.
>> +	 */
>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
> Should we mentioned that save vmx preemption bit must use host|guest, not just host?
> 
>> +
>> +	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
>> are
>> +	 * emulated by vmx_set_efer(), below.
>> +	 */
>> +	vmcs_write32(VM_ENTRY_CONTROLS,
>> +		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
>> +			~VM_ENTRY_IA32E_MODE) |
>>  		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
>>
>>  	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)
>> --
>> 1.7.10.4
> 
> Best regards,
> Yang
Zhang, Yang Z Aug. 2, 2013, 7:27 a.m. UTC | #3
Jan Kiszka wrote on 2013-08-02:
> On 2013-08-02 05:04, Zhang, Yang Z wrote:

>> Gleb Natapov wrote on 2013-08-01:

>>> From: Nadav Har'El <nyh@il.ibm.com>

>>> 

>>> Recent KVM, since

>>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577

>>> switch the EFER MSR when EPT is used and the host and guest have 

>>> different NX bits. So if we add support for nested EPT (L1 guest 

>>> using EPT to run L2) and want to be able to run recent KVM as L1, we 

>>> need to allow L1 to use this EFER switching feature.

>>> 

>>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if 

>>> available, and if it isn't, it uses the generic 

>>> VM_ENTRY/EXIT_MSR_LOAD. This patch adds support for the former (the 

>>> latter is still unsupported).

>>> 

>>> Nested entry and exit emulation (prepare_vmcs_02 and 

>>> load_vmcs12_host_state,

>>> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly.

>>> So all that's left to do in this patch is to properly advertise this 

>>> feature to L1.

>>> 

>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, 

>>> by using vmx_set_efer (which itself sets one of several vmcs02 

>>> fields), so we always support this feature, regardless of whether 

>>> the host supports it.

>>> 

>>> Reviewed-by: Orit Wasserman <owasserm@redhat.com>

>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>

>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>

>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>

>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>

>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>

>>> ---

>>>  arch/x86/kvm/vmx.c |   23 ++++++++++++++++-------

>>>  1 file changed, 16 insertions(+), 7 deletions(-) diff --git 

>>> a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e999dc7..27efa6a 

>>> 100644

>>> --- a/arch/x86/kvm/vmx.c

>>> +++ b/arch/x86/kvm/vmx.c

>>> @@ -2198,7 +2198,8 @@ static __init void

>>> nested_vmx_setup_ctls_msrs(void)

>>>  #else

>>>  	nested_vmx_exit_ctls_high = 0;

>>>  #endif

>>> -	nested_vmx_exit_ctls_high |=

>>> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;

>>> +	nested_vmx_exit_ctls_high |=

>>> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |

>>> +				      VM_EXIT_LOAD_IA32_EFER);

>>> 

>>>  	/* entry controls */

>>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,

>>> @@ -2207,8 +2208,8 @@ static __init void

>>> nested_vmx_setup_ctls_msrs(void)

>>>  	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;

>>>  	nested_vmx_entry_ctls_high &= 		VM_ENTRY_LOAD_IA32_PAT |

>>>  VM_ENTRY_IA32E_MODE;

>>> -	nested_vmx_entry_ctls_high |=

>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;

>>> -

>>> +	nested_vmx_entry_ctls_high |=

>>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |

>>> +				       VM_ENTRY_LOAD_IA32_EFER);

>> Just saw it, we didn't expose bit22 save VMX-preemption timer in 

>> vm-exit

> control but we already allowed guest to set active VMX-preemption 

> timer in pin based vm-execution conrols. This is wrong.

> 

> Does the presence of preemption timer support imply that saving its 

> value is also supported? Then we could demand this combination (ie. do 

> not expose preemption timer support at all to L1 if value saving is

> missing) and build our preemption timer emulation on top.

> 

I don't see we saved the preemption timer value to vmcs12 in prepare_vmcs12(). Will it be saved automatically?

> There is more broken /wrt VMX preemption timer, patches are welcome.

> Arthur will also try to develop test cases for it. But that topic is 

> unrelated to this series.

> 

> Jan

> 

>> 

>>>  	/* cpu-based controls */

>>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,

>>>  		nested_vmx_procbased_ctls_low,

>>> nested_vmx_procbased_ctls_high);

>>> @@ -7529,10 +7530,18 @@ static void prepare_vmcs02(struct kvm_vcpu 

>>> *vcpu, struct vmcs12 *vmcs12)

>>>  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;

>>>  	vmcs_writel(CR0_GUEST_HOST_MASK,

>>> ~vcpu->arch.cr0_guest_owned_bits);

>>> 

>>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer

>>> below */

>>> -	vmcs_write32(VM_EXIT_CONTROLS,

>>> -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);

>>> -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |

>>> +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so

>>> +	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER

>>> +	 * bits are further modified by vmx_set_efer() below.

>>> +	 */

>>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);

>> Should we mentioned that save vmx preemption bit must use host|guest, 

>> not just host?

>> 

>>> +

>>> +	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE

>>> are

>>> +	 * emulated by vmx_set_efer(), below.

>>> +	 */

>>> +	vmcs_write32(VM_ENTRY_CONTROLS,

>>> +		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &

>>> +			~VM_ENTRY_IA32E_MODE) |

>>>  		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));

>>>  

>>>  	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)

>>> --

>>> 1.7.10.4

>> 

>> Best regards,

>> Yang

>



Best regards,
Yang
Jan Kiszka Aug. 2, 2013, 7:33 a.m. UTC | #4
On 2013-08-02 09:27, Zhang, Yang Z wrote:
> Jan Kiszka wrote on 2013-08-02:
>> On 2013-08-02 05:04, Zhang, Yang Z wrote:
>>> Gleb Natapov wrote on 2013-08-01:
>>>> From: Nadav Har'El <nyh@il.ibm.com>
>>>>
>>>> Recent KVM, since
>>>> http://kerneltrap.org/mailarchive/linux-kvm/2010/5/2/6261577
>>>> switch the EFER MSR when EPT is used and the host and guest have 
>>>> different NX bits. So if we add support for nested EPT (L1 guest 
>>>> using EPT to run L2) and want to be able to run recent KVM as L1, we 
>>>> need to allow L1 to use this EFER switching feature.
>>>>
>>>> To do this EFER switching, KVM uses VM_ENTRY/EXIT_LOAD_IA32_EFER if 
>>>> available, and if it isn't, it uses the generic 
>>>> VM_ENTRY/EXIT_MSR_LOAD. This patch adds support for the former (the 
>>>> latter is still unsupported).
>>>>
>>>> Nested entry and exit emulation (prepare_vmcs_02 and 
>>>> load_vmcs12_host_state,
>>>> respectively) already handled VM_ENTRY/EXIT_LOAD_IA32_EFER correctly.
>>>> So all that's left to do in this patch is to properly advertise this 
>>>> feature to L1.
>>>>
>>>> Note that vmcs12's VM_ENTRY/EXIT_LOAD_IA32_EFER are emulated by L0, 
>>>> by using vmx_set_efer (which itself sets one of several vmcs02 
>>>> fields), so we always support this feature, regardless of whether 
>>>> the host supports it.
>>>>
>>>> Reviewed-by: Orit Wasserman <owasserm@redhat.com>
>>>> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
>>>> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
>>>> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
>>>> Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
>>>> Signed-off-by: Gleb Natapov <gleb@redhat.com>
>>>> ---
>>>>  arch/x86/kvm/vmx.c |   23 ++++++++++++++++-------
>>>>  1 file changed, 16 insertions(+), 7 deletions(-) diff --git 
>>>> a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e999dc7..27efa6a 
>>>> 100644
>>>> --- a/arch/x86/kvm/vmx.c
>>>> +++ b/arch/x86/kvm/vmx.c
>>>> @@ -2198,7 +2198,8 @@ static __init void
>>>> nested_vmx_setup_ctls_msrs(void)
>>>>  #else
>>>>  	nested_vmx_exit_ctls_high = 0;
>>>>  #endif
>>>> -	nested_vmx_exit_ctls_high |=
>>>> VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
>>>> +	nested_vmx_exit_ctls_high |=
>>>> (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>>>> +				      VM_EXIT_LOAD_IA32_EFER);
>>>>
>>>>  	/* entry controls */
>>>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
>>>> @@ -2207,8 +2208,8 @@ static __init void
>>>> nested_vmx_setup_ctls_msrs(void)
>>>>  	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>>>>  	nested_vmx_entry_ctls_high &= 		VM_ENTRY_LOAD_IA32_PAT |
>>>>  VM_ENTRY_IA32E_MODE;
>>>> -	nested_vmx_entry_ctls_high |=
>>>> VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
>>>> -
>>>> +	nested_vmx_entry_ctls_high |=
>>>> (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
>>>> +				       VM_ENTRY_LOAD_IA32_EFER);
>>> Just saw it, we didn't expose bit22 save VMX-preemption timer in 
>>> vm-exit
>> control but we already allowed guest to set active VMX-preemption 
>> timer in pin based vm-execution conrols. This is wrong.
>>
>> Does the presence of preemption timer support imply that saving its 
>> value is also supported? Then we could demand this combination (ie. do 
>> not expose preemption timer support at all to L1 if value saving is
>> missing) and build our preemption timer emulation on top.
>>
> I don't see we saved the preemption timer value to vmcs12 in prepare_vmcs12(). Will it be saved automatically?

No. As I said, there is more broken with our preemption timer emulation.

Jan

> 
>> There is more broken /wrt VMX preemption timer, patches are welcome.
>> Arthur will also try to develop test cases for it. But that topic is 
>> unrelated to this series.
>>
>> Jan
>>
>>>
>>>>  	/* cpu-based controls */
>>>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>>>>  		nested_vmx_procbased_ctls_low,
>>>> nested_vmx_procbased_ctls_high);
>>>> @@ -7529,10 +7530,18 @@ static void prepare_vmcs02(struct kvm_vcpu 
>>>> *vcpu, struct vmcs12 *vmcs12)
>>>>  	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
>>>>  	vmcs_writel(CR0_GUEST_HOST_MASK,
>>>> ~vcpu->arch.cr0_guest_owned_bits);
>>>>
>>>> -	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer
>>>> below */
>>>> -	vmcs_write32(VM_EXIT_CONTROLS,
>>>> -		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
>>>> -	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
>>>> +	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
>>>> +	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
>>>> +	 * bits are further modified by vmx_set_efer() below.
>>>> +	 */
>>>> +	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
>>> Should we mentioned that save vmx preemption bit must use host|guest, 
>>> not just host?
>>>
>>>> +
>>>> +	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
>>>> are
>>>> +	 * emulated by vmx_set_efer(), below.
>>>> +	 */
>>>> +	vmcs_write32(VM_ENTRY_CONTROLS,
>>>> +		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
>>>> +			~VM_ENTRY_IA32E_MODE) |
>>>>  		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
>>>>  
>>>>  	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)
>>>> --
>>>> 1.7.10.4
>>>
>>> Best regards,
>>> Yang
>>
> 
> 
> Best regards,
> Yang
> 
>
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e999dc7..27efa6a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2198,7 +2198,8 @@  static __init void nested_vmx_setup_ctls_msrs(void)
 #else
 	nested_vmx_exit_ctls_high = 0;
 #endif
-	nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
+	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
+				      VM_EXIT_LOAD_IA32_EFER);
 
 	/* entry controls */
 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2207,8 +2208,8 @@  static __init void nested_vmx_setup_ctls_msrs(void)
 	nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
 	nested_vmx_entry_ctls_high &=
 		VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE;
-	nested_vmx_entry_ctls_high |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
-
+	nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
+				       VM_ENTRY_LOAD_IA32_EFER);
 	/* cpu-based controls */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
 		nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high);
@@ -7529,10 +7530,18 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
 
-	/* Note: IA32_MODE, LOAD_IA32_EFER are modified by vmx_set_efer below */
-	vmcs_write32(VM_EXIT_CONTROLS,
-		vmcs12->vm_exit_controls | vmcs_config.vmexit_ctrl);
-	vmcs_write32(VM_ENTRY_CONTROLS, vmcs12->vm_entry_controls |
+	/* L2->L1 exit controls are emulated - the hardware exit is to L0 so
+	 * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
+	 * bits are further modified by vmx_set_efer() below.
+	 */
+	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+
+	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
+	 * emulated by vmx_set_efer(), below.
+	 */
+	vmcs_write32(VM_ENTRY_CONTROLS,
+		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
+			~VM_ENTRY_IA32E_MODE) |
 		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
 
 	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)