diff mbox series

[15/15] KVM: nVMX: Copy PDPTRs to/from vmcs12 only when necessary

Message ID 20190507160640.4812-16-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM: nVMX: Optimize nested VM-Entry | expand

Commit Message

Sean Christopherson May 7, 2019, 4:06 p.m. UTC
Per Intel's SDM:

  ... the logical processor uses PAE paging if CR0.PG=1, CR4.PAE=1 and
  IA32_EFER.LME=0.  A VM entry to a guest that uses PAE paging loads the
  PDPTEs into internal, non-architectural registers based on the setting
  of the "enable EPT" VM-execution control.

and:

  [GUEST_PDPTR] values are saved into the four PDPTE fields as follows:

    - If the "enable EPT" VM-execution control is 0 or the logical
      processor was not using PAE paging at the time of the VM exit,
      the values saved are undefined.

In other words, if EPT is disabled or the guest isn't using PAE paging,
then the PDPTRS aren't consumed by hardware on VM-Entry and are loaded
with junk on VM-Exit.  From a nesting perspective, all of the above hold
true, i.e. KVM can effectively ignore the VMCS PDPTRs.  E.g. KVM already
loads the PDPTRs from memory when nested EPT is disabled (see
nested_vmx_load_cr3()).

Because KVM intercepts setting CR4.PAE, there is no danger of consuming
a stale value or crushing L1's VMWRITEs regardless of whether L1
intercepts CR4.PAE. The vmcs12's values are unchanged up until the
VM-Exit where L2 sets CR4.PAE, i.e. L0 will see the new PAE state on the
subsequent VM-Entry and propagate the PDPTRs from vmcs12 to vmcs02.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/kvm/vmx/nested.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

Comments

Paolo Bonzini June 6, 2019, 4:53 p.m. UTC | #1
On 07/05/19 18:06, Sean Christopherson wrote:
> Per Intel's SDM:
> 
>   ... the logical processor uses PAE paging if CR0.PG=1, CR4.PAE=1 and
>   IA32_EFER.LME=0.  A VM entry to a guest that uses PAE paging loads the
>   PDPTEs into internal, non-architectural registers based on the setting
>   of the "enable EPT" VM-execution control.
> 
> and:
> 
>   [GUEST_PDPTR] values are saved into the four PDPTE fields as follows:
> 
>     - If the "enable EPT" VM-execution control is 0 or the logical
>       processor was not using PAE paging at the time of the VM exit,
>       the values saved are undefined.
> 
> In other words, if EPT is disabled or the guest isn't using PAE paging,
> then the PDPTRS aren't consumed by hardware on VM-Entry and are loaded
> with junk on VM-Exit.  From a nesting perspective, all of the above hold
> true, i.e. KVM can effectively ignore the VMCS PDPTRs.  E.g. KVM already
> loads the PDPTRs from memory when nested EPT is disabled (see
> nested_vmx_load_cr3()).
> 
> Because KVM intercepts setting CR4.PAE, there is no danger of consuming
> a stale value or crushing L1's VMWRITEs regardless of whether L1
> intercepts CR4.PAE. The vmcs12's values are unchanged up until the
> VM-Exit where L2 sets CR4.PAE, i.e. L0 will see the new PAE state on the
> subsequent VM-Entry and propagate the PDPTRs from vmcs12 to vmcs02.
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> ---
>  arch/x86/kvm/vmx/nested.c | 36 +++++++++++++++++++++---------------
>  1 file changed, 21 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index cfdc04fde8eb..b8bd446b2c8b 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -2184,17 +2184,6 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
>  		vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
>  		vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
>  
> -		/*
> -		 * L1 may access the L2's PDPTR, so save them to construct
> -		 * vmcs12
> -		 */
> -		if (enable_ept) {
> -			vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
> -			vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
> -			vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
> -			vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
> -		}
> -
>  		if (vmx->nested.nested_run_pending &&
>  		    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
>  			vmcs_write64(GUEST_IA32_DEBUGCTL,
> @@ -2256,10 +2245,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
> +	bool load_guest_pdptrs_vmcs12 = false;
>  
>  	if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
>  		prepare_vmcs02_full(vmx, vmcs12);
>  		vmx->nested.dirty_vmcs12 = false;
> +
> +		load_guest_pdptrs_vmcs12 = !vmx->nested.hv_evmcs ||
> +			!(vmx->nested.hv_evmcs->hv_clean_fields &
> +			  HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
>  	}
>  
>  	/*
> @@ -2366,6 +2360,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>  		return -EINVAL;
>  	}
>  
> +	/* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
> +	if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
> +	    !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
> +		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
> +		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
> +		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
> +		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
> +	}

This probably should be merged into nested_vmx_load_cr3, but something
for later.  I've just sent a patch to create a new is_pae_paging
function that can be used here.

Paolo

>  	/* Shadow page tables on either EPT or shadow page tables. */
>  	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
>  				entry_failure_code))
> @@ -3467,10 +3470,13 @@ static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	 */
>  	if (enable_ept) {
>  		vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
> -		vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
> -		vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
> -		vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
> -		vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
> +		if (nested_cpu_has_ept(vmcs12) && !is_long_mode(vcpu) &&
> +		    is_pae(vcpu) && is_paging(vcpu)) {
> +			vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
> +			vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
> +			vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
> +			vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
> +		}
>  	}
>  
>  	vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index cfdc04fde8eb..b8bd446b2c8b 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2184,17 +2184,6 @@  static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 		vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
 		vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
 
-		/*
-		 * L1 may access the L2's PDPTR, so save them to construct
-		 * vmcs12
-		 */
-		if (enable_ept) {
-			vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
-			vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
-			vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
-			vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
-		}
-
 		if (vmx->nested.nested_run_pending &&
 		    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
 			vmcs_write64(GUEST_IA32_DEBUGCTL,
@@ -2256,10 +2245,15 @@  static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
+	bool load_guest_pdptrs_vmcs12 = false;
 
 	if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
 		prepare_vmcs02_full(vmx, vmcs12);
 		vmx->nested.dirty_vmcs12 = false;
+
+		load_guest_pdptrs_vmcs12 = !vmx->nested.hv_evmcs ||
+			!(vmx->nested.hv_evmcs->hv_clean_fields &
+			  HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
 	}
 
 	/*
@@ -2366,6 +2360,15 @@  static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		return -EINVAL;
 	}
 
+	/* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
+	if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
+	    !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
+		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+	}
+
 	/* Shadow page tables on either EPT or shadow page tables. */
 	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
 				entry_failure_code))
@@ -3467,10 +3470,13 @@  static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	 */
 	if (enable_ept) {
 		vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
-		vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
-		vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
-		vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
-		vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+		if (nested_cpu_has_ept(vmcs12) && !is_long_mode(vcpu) &&
+		    is_pae(vcpu) && is_paging(vcpu)) {
+			vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
+			vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
+			vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
+			vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+		}
 	}
 
 	vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);