diff mbox

[v3,06/13] nEPT: Fix cr3 handling in nested exit and entry

Message ID 1368939152-11406-6-git-send-email-jun.nakajima@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nakajima, Jun May 19, 2013, 4:52 a.m. UTC
From: Nadav Har'El <nyh@il.ibm.com>

The existing code for handling cr3 and related VMCS fields during nested
exit and entry wasn't correct in all cases:

If L2 is allowed to control cr3 (and this is indeed the case in nested EPT),
during nested exit we must copy the modified cr3 from vmcs02 to vmcs12, and
we forgot to do so. This patch adds this copy.

If L0 isn't controlling cr3 when running L2 (i.e., L0 is using EPT), and
whoever does control cr3 (L1 or L2) is using PAE, the processor might have
saved PDPTEs and we should also save them in vmcs12 (and restore later).

Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
---
 arch/x86/kvm/vmx.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

Comments

Paolo Bonzini May 20, 2013, 1:19 p.m. UTC | #1
Il 19/05/2013 06:52, Jun Nakajima ha scritto:
> From: Nadav Har'El <nyh@il.ibm.com>
> 
> The existing code for handling cr3 and related VMCS fields during nested
> exit and entry wasn't correct in all cases:
> 
> If L2 is allowed to control cr3 (and this is indeed the case in nested EPT),
> during nested exit we must copy the modified cr3 from vmcs02 to vmcs12, and
> we forgot to do so. This patch adds this copy.
> 
> If L0 isn't controlling cr3 when running L2 (i.e., L0 is using EPT), and
> whoever does control cr3 (L1 or L2) is using PAE, the processor might have
> saved PDPTEs and we should also save them in vmcs12 (and restore later).
> 
> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> ---
>  arch/x86/kvm/vmx.c | 30 ++++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a88432f..b79efd4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -7608,6 +7608,17 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	kvm_set_cr3(vcpu, vmcs12->guest_cr3);
>  	kvm_mmu_reset_context(vcpu);
>  
> +	/*
> +	 * Additionally, except when L0 is using shadow page tables, L1 or
> +	 * L2 control guest_cr3 for L2, so they may also have saved PDPTEs
> +	 */
> +	if (enable_ept) {
> +		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
> +		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
> +		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
> +		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
> +	}
> +
>  	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
>  	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
>  }
> @@ -7930,6 +7941,25 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	vmcs12->guest_pending_dbg_exceptions =
>  		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>  
> +	/*
> +	 * In some cases (usually, nested EPT), L2 is allowed to change its
> +	 * own CR3 without exiting. If it has changed it, we must keep it.
> +	 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
> +	 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
> +	 */
> +	if (enable_ept)
> +		vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3);
> +	/*
> +	 * Additionally, except when L0 is using shadow page tables, L1 or
> +	 * L2 control guest_cr3 for L2, so save their PDPTEs
> +	 */
> +	if (enable_ept) {
> +		vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
> +		vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
> +		vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
> +		vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
> +	}
> +
>  	vmcs12->vm_entry_controls =
>  		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
>  		(vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
> 

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov June 12, 2013, 12:42 p.m. UTC | #2
On Sat, May 18, 2013 at 09:52:25PM -0700, Jun Nakajima wrote:
> From: Nadav Har'El <nyh@il.ibm.com>
> 
> The existing code for handling cr3 and related VMCS fields during nested
> exit and entry wasn't correct in all cases:
> 
> If L2 is allowed to control cr3 (and this is indeed the case in nested EPT),
> during nested exit we must copy the modified cr3 from vmcs02 to vmcs12, and
> we forgot to do so. This patch adds this copy.
> 
> If L0 isn't controlling cr3 when running L2 (i.e., L0 is using EPT), and
> whoever does control cr3 (L1 or L2) is using PAE, the processor might have
> saved PDPTEs and we should also save them in vmcs12 (and restore later).
> 
> Signed-off-by: Nadav Har'El <nyh@il.ibm.com>
> Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
> Signed-off-by: Xinhao Xu <xinhao.xu@intel.com>
> ---
>  arch/x86/kvm/vmx.c | 30 ++++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a88432f..b79efd4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -7608,6 +7608,17 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	kvm_set_cr3(vcpu, vmcs12->guest_cr3);
>  	kvm_mmu_reset_context(vcpu);
>  
> +	/*
> +	 * Additionally, except when L0 is using shadow page tables, L1 or
What this "Additionally" corresponds to?

> +	 * L2 control guest_cr3 for L2, so they may also have saved PDPTEs
> +	 */
> +	if (enable_ept) {
> +		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
> +		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
> +		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
> +		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
> +	}
> +
>  	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
>  	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
>  }
> @@ -7930,6 +7941,25 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	vmcs12->guest_pending_dbg_exceptions =
>  		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
>  
> +	/*
> +	 * In some cases (usually, nested EPT), L2 is allowed to change its
> +	 * own CR3 without exiting. If it has changed it, we must keep it.
> +	 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
> +	 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
> +	 */
> +	if (enable_ept)
Non need separate if for guest_cr3. Put it under if() below.

> +		vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3);
> +	/*
> +	 * Additionally, except when L0 is using shadow page tables, L1 or
> +	 * L2 control guest_cr3 for L2, so save their PDPTEs
> +	 */
> +	if (enable_ept) {
> +		vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
> +		vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
> +		vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
> +		vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
> +	}
> +
>  	vmcs12->vm_entry_controls =
>  		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
>  		(vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
> -- 
> 1.8.1.2

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a88432f..b79efd4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7608,6 +7608,17 @@  static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	kvm_set_cr3(vcpu, vmcs12->guest_cr3);
 	kvm_mmu_reset_context(vcpu);
 
+	/*
+	 * Additionally, except when L0 is using shadow page tables, L1 or
+	 * L2 control guest_cr3 for L2, so they may also have saved PDPTEs
+	 */
+	if (enable_ept) {
+		vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+		vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+		vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+		vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+	}
+
 	kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
 	kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
 }
@@ -7930,6 +7941,25 @@  static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs12->guest_pending_dbg_exceptions =
 		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
 
+	/*
+	 * In some cases (usually, nested EPT), L2 is allowed to change its
+	 * own CR3 without exiting. If it has changed it, we must keep it.
+	 * Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
+	 * by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
+	 */
+	if (enable_ept)
+		vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3);
+	/*
+	 * Additionally, except when L0 is using shadow page tables, L1 or
+	 * L2 control guest_cr3 for L2, so save their PDPTEs
+	 */
+	if (enable_ept) {
+		vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
+		vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
+		vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
+		vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
+	}
+
 	vmcs12->vm_entry_controls =
 		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
 		(vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);