diff mbox

[v3,3/4] KVM: nVMX: introduce nested_vmx_load_cr3 and call it on vmentry

Message ID 1480518191-4572-4-git-send-email-lprosek@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ladi Prosek Nov. 30, 2016, 3:03 p.m. UTC
Loading CR3 as part of emulating vmentry is different from regular CR3 loads,
as implemented in kvm_set_cr3, in several ways.

* different rules are followed to check CR3 and it is desirable for the caller
to distinguish between the possible failures
* PDPTRs are not loaded if PAE paging and nested EPT are both enabled
* many MMU operations are not necessary

This patch introduces nested_vmx_load_cr3 suitable for CR3 loads as part of
nested vmentry and vmexit, and makes use of it on the nested vmentry path.

Signed-off-by: Ladi Prosek <lprosek@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx.c              | 59 +++++++++++++++++++++++++++++------------
 arch/x86/kvm/x86.c              |  3 ++-
 3 files changed, 45 insertions(+), 18 deletions(-)
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bdde807..9ec06a3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1062,6 +1062,7 @@  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
 
 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
+bool pdptrs_changed(struct kvm_vcpu *vcpu);
 
 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 70598a1..58dc7f1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9796,6 +9796,44 @@  static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
 }
 
 /*
+ * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are
+ * emulating VM entry into a guest with EPT enabled.
+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
+ * is assigned to entry_failure_code on failure.
+ */
+static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
+			       unsigned long *entry_failure_code)
+{
+	unsigned long invalid_mask;
+
+	if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
+		invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
+		if (cr3 & invalid_mask) {
+			*entry_failure_code = ENTRY_FAIL_DEFAULT;
+			return 1;
+		}
+
+		/*
+		 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
+		 * must not be dereferenced.
+		 */
+		if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
+		    !nested_ept) {
+			if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
+				*entry_failure_code = ENTRY_FAIL_PDPTE;
+				return 1;
+			}
+		}
+
+		vcpu->arch.cr3 = cr3;
+		__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	}
+
+	kvm_mmu_reset_context(vcpu);
+	return 0;
+}
+
+/*
  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
  * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
@@ -10118,23 +10156,10 @@  static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmx_set_cr4(vcpu, vmcs12->guest_cr4);
 	vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
 
-	/*
-	 * Shadow page tables on either EPT or shadow page tables.
-	 * If PAE and EPT are both on, CR3 is not used by the CPU and must not
-	 * be dereferenced.
-	 */
-	if (is_pae(vcpu) && is_paging(vcpu) && !is_long_mode(vcpu) &&
-	    nested_ept_enabled) {
-		vcpu->arch.cr3 = vmcs12->guest_cr3;
-		__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
-	} else {
-		if (kvm_set_cr3(vcpu, vmcs12->guest_cr3)) {
-			*entry_failure_code = ENTRY_FAIL_DEFAULT;
-			return 1;
-		}
-	}
-
-	kvm_mmu_reset_context(vcpu);
+	/* Shadow page tables on either EPT or shadow page tables. */
+	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled,
+				entry_failure_code))
+		return 1;
 
 	if (!enable_ept)
 		vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 78f6061..e4e06f9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -573,7 +573,7 @@  int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 }
 EXPORT_SYMBOL_GPL(load_pdptrs);
 
-static bool pdptrs_changed(struct kvm_vcpu *vcpu)
+bool pdptrs_changed(struct kvm_vcpu *vcpu)
 {
 	u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
 	bool changed = true;
@@ -599,6 +599,7 @@  static bool pdptrs_changed(struct kvm_vcpu *vcpu)
 
 	return changed;
 }
+EXPORT_SYMBOL_GPL(pdptrs_changed);
 
 int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {