diff mbox

[3/3,v2] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor

Message ID 20170706230323.29952-4-bsd@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bandan Das July 6, 2017, 11:03 p.m. UTC
When L2 uses vmfunc, L0 utilizes the associated vmexit to
emulate a switching of the ept pointer by reloading the
guest MMU.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
 arch/x86/include/asm/vmx.h |  6 +++++
 arch/x86/kvm/vmx.c         | 55 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 58 insertions(+), 3 deletions(-)

Comments

Paolo Bonzini July 7, 2017, 8:30 a.m. UTC | #1
On 07/07/2017 01:03, Bandan Das wrote:
> When L2 uses vmfunc, L0 utilizes the associated vmexit to
> emulate a switching of the ept pointer by reloading the
> guest MMU.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Bandan Das <bsd@redhat.com>
> ---
>  arch/x86/include/asm/vmx.h |  6 +++++
>  arch/x86/kvm/vmx.c         | 55 +++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 58 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index da5375e..5f63a2e 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -115,6 +115,10 @@
>  #define VMX_MISC_SAVE_EFER_LMA			0x00000020
>  #define VMX_MISC_ACTIVITY_HLT			0x00000040
>  
> +/* VMFUNC functions */
> +#define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
> +#define VMFUNC_EPTP_ENTRIES  512
> +
>  static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
>  {
>  	return vmx_basic & GENMASK_ULL(30, 0);
> @@ -200,6 +204,8 @@ enum vmcs_field {
>  	EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
>  	EOI_EXIT_BITMAP3                = 0x00002022,
>  	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
> +	EPTP_LIST_ADDRESS               = 0x00002024,
> +	EPTP_LIST_ADDRESS_HIGH          = 0x00002025,
>  	VMREAD_BITMAP                   = 0x00002026,
>  	VMWRITE_BITMAP                  = 0x00002028,
>  	XSS_EXIT_BITMAP                 = 0x0000202C,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 7364678..3a4aa68 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -246,6 +246,7 @@ struct __packed vmcs12 {
>  	u64 eoi_exit_bitmap1;
>  	u64 eoi_exit_bitmap2;
>  	u64 eoi_exit_bitmap3;
> +	u64 eptp_list_address;
>  	u64 xss_exit_bitmap;
>  	u64 guest_physical_address;
>  	u64 vmcs_link_pointer;
> @@ -771,6 +772,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
>  	FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
>  	FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
>  	FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
> +	FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
>  	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
>  	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
>  	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
> @@ -1402,6 +1404,13 @@ static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
>  	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
>  }
>  
> +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
> +{
> +	return nested_cpu_has_vmfunc(vmcs12) &&
> +		(vmcs12->vm_function_control &
> +		 VMX_VMFUNC_EPTP_SWITCHING);
> +}
> +
>  static inline bool is_nmi(u32 intr_info)
>  {
>  	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
> @@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
>  	if (cpu_has_vmx_vmfunc()) {
>  		vmx->nested.nested_vmx_secondary_ctls_high |=
>  			SECONDARY_EXEC_ENABLE_VMFUNC;
> -		vmx->nested.nested_vmx_vmfunc_controls = 0;
> +		/*
> +		 * Advertise EPTP switching unconditionally
> +		 * since we emulate it
> +		 */
> +		vmx->nested.nested_vmx_vmfunc_controls =
> +			VMX_VMFUNC_EPTP_SWITCHING;
>  	}
>  
>  	/*
> @@ -7772,6 +7786,9 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	struct vmcs12 *vmcs12;
>  	u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
> +	u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
> +	struct page *page = NULL;
> +	u64 *l1_eptp_list;
>  
>  	/*
>  	 * VMFUNC is only supported for nested guests, but we always enable the
> @@ -7784,11 +7801,43 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
>  	}
>  
>  	vmcs12 = get_vmcs12(vcpu);
> -	if ((vmcs12->vm_function_control & (1 << function)) == 0)
> +	if (((vmcs12->vm_function_control & (1 << function)) == 0) ||
> +	    WARN_ON_ONCE(function))
> +		goto fail;
> +
> +	if (!nested_cpu_has_ept(vmcs12) ||
> +	    !nested_cpu_has_eptp_switching(vmcs12))
> +		goto fail;
> +
> +	if (!vmcs12->eptp_list_address || index >= VMFUNC_EPTP_ENTRIES)
> +		goto fail;
> +
> +	page = nested_get_page(vcpu, vmcs12->eptp_list_address);
> +	if (!page)
> +		goto fail;
> +
> +	l1_eptp_list = kmap(page);
> +	if (!l1_eptp_list[index])
>  		goto fail;
> -	WARN(1, "VMCS12 VM function control should have been zero");
> +
> +	/*
> +	 * If the (L2) guest does a vmfunc to the currently
> +	 * active ept pointer, we don't have to do anything else
> +	 */
> +	if (vmcs12->ept_pointer != l1_eptp_list[index]) {
> +		kvm_mmu_unload(vcpu);
> +		/*
> +		 * TODO: Verify that guest ept satisfies vmentry prereqs
> +		 */
> +		vmcs12->ept_pointer = l1_eptp_list[index];
> +		kvm_mmu_reload(vcpu);
> +		kunmap(page);
> +	}

Missing nested_release_page_clean, here and at the "fail" label.

The TODO is a symptom of a bigger problem, so I guess it's okay for now.

Paolo

> +	return kvm_skip_emulated_instruction(vcpu);
>  
>  fail:
> +	if (page)
> +		kunmap(page);
>  	nested_vmx_vmexit(vcpu, vmx->exit_reason,
>  			  vmcs_read32(VM_EXIT_INTR_INFO),
>  			  vmcs_readl(EXIT_QUALIFICATION));
>
diff mbox

Patch

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da5375e..5f63a2e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -115,6 +115,10 @@ 
 #define VMX_MISC_SAVE_EFER_LMA			0x00000020
 #define VMX_MISC_ACTIVITY_HLT			0x00000040
 
+/* VMFUNC functions */
+#define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
+#define VMFUNC_EPTP_ENTRIES  512
+
 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
 {
 	return vmx_basic & GENMASK_ULL(30, 0);
@@ -200,6 +204,8 @@  enum vmcs_field {
 	EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
 	EOI_EXIT_BITMAP3                = 0x00002022,
 	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
+	EPTP_LIST_ADDRESS               = 0x00002024,
+	EPTP_LIST_ADDRESS_HIGH          = 0x00002025,
 	VMREAD_BITMAP                   = 0x00002026,
 	VMWRITE_BITMAP                  = 0x00002028,
 	XSS_EXIT_BITMAP                 = 0x0000202C,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7364678..3a4aa68 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -246,6 +246,7 @@  struct __packed vmcs12 {
 	u64 eoi_exit_bitmap1;
 	u64 eoi_exit_bitmap2;
 	u64 eoi_exit_bitmap3;
+	u64 eptp_list_address;
 	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
@@ -771,6 +772,7 @@  static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
 	FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
 	FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
+	FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
 	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
@@ -1402,6 +1404,13 @@  static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
 }
 
+static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has_vmfunc(vmcs12) &&
+		(vmcs12->vm_function_control &
+		 VMX_VMFUNC_EPTP_SWITCHING);
+}
+
 static inline bool is_nmi(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2791,7 +2800,12 @@  static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	if (cpu_has_vmx_vmfunc()) {
 		vmx->nested.nested_vmx_secondary_ctls_high |=
 			SECONDARY_EXEC_ENABLE_VMFUNC;
-		vmx->nested.nested_vmx_vmfunc_controls = 0;
+		/*
+		 * Advertise EPTP switching unconditionally
+		 * since we emulate it
+		 */
+		vmx->nested.nested_vmx_vmfunc_controls =
+			VMX_VMFUNC_EPTP_SWITCHING;
 	}
 
 	/*
@@ -7772,6 +7786,9 @@  static int handle_vmfunc(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct vmcs12 *vmcs12;
 	u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
+	u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
+	struct page *page = NULL;
+	u64 *l1_eptp_list;
 
 	/*
 	 * VMFUNC is only supported for nested guests, but we always enable the
@@ -7784,11 +7801,43 @@  static int handle_vmfunc(struct kvm_vcpu *vcpu)
 	}
 
 	vmcs12 = get_vmcs12(vcpu);
-	if ((vmcs12->vm_function_control & (1 << function)) == 0)
+	if (((vmcs12->vm_function_control & (1 << function)) == 0) ||
+	    WARN_ON_ONCE(function))
+		goto fail;
+
+	if (!nested_cpu_has_ept(vmcs12) ||
+	    !nested_cpu_has_eptp_switching(vmcs12))
+		goto fail;
+
+	if (!vmcs12->eptp_list_address || index >= VMFUNC_EPTP_ENTRIES)
+		goto fail;
+
+	page = nested_get_page(vcpu, vmcs12->eptp_list_address);
+	if (!page)
+		goto fail;
+
+	l1_eptp_list = kmap(page);
+	if (!l1_eptp_list[index])
 		goto fail;
-	WARN(1, "VMCS12 VM function control should have been zero");
+
+	/*
+	 * If the (L2) guest does a vmfunc to the currently
+	 * active ept pointer, we don't have to do anything else
+	 */
+	if (vmcs12->ept_pointer != l1_eptp_list[index]) {
+		kvm_mmu_unload(vcpu);
+		/*
+		 * TODO: Verify that guest ept satisfies vmentry prereqs
+		 */
+		vmcs12->ept_pointer = l1_eptp_list[index];
+		kvm_mmu_reload(vcpu);
+		kunmap(page);
+	}
+	return kvm_skip_emulated_instruction(vcpu);
 
 fail:
+	if (page)
+		kunmap(page);
 	nested_vmx_vmexit(vcpu, vmx->exit_reason,
 			  vmcs_read32(VM_EXIT_INTR_INFO),
 			  vmcs_readl(EXIT_QUALIFICATION));