diff mbox

[3/3] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor

Message ID 20170801232433.31749-4-bsd@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bandan Das Aug. 1, 2017, 11:24 p.m. UTC
When L2 uses vmfunc, L0 utilizes the associated vmexit to
emulate a switching of the ept pointer by reloading the
guest MMU.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Bandan Das <bsd@redhat.com>
---
 arch/x86/include/asm/vmx.h |   6 +++
 arch/x86/kvm/vmx.c         | 130 ++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 130 insertions(+), 6 deletions(-)

Comments

David Hildenbrand Aug. 3, 2017, 11:39 a.m. UTC | #1
>  
> +static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	u64 mask = address & 0x7;
> +	int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +
> +	/* Check for memory type validity */
> +	switch (mask) {
> +	case 0:
> +		if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
> +			return false;
> +		break;
> +	case 6:
> +		if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
> +			return false;
> +		break;
> +	default:
> +		return false;
> +	}
> +
> +	/* Bits 5:3 must be 3 */
> +	if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW)
> +		return false;
> +
> +	/* Reserved bits should not be set */
> +	if (address >> maxphyaddr || ((address >> 7) & 0x1f))
> +		return false;
> +
> +	/* AD, if set, should be supported */
> +	if ((address & VMX_EPT_AD_ENABLE_BIT)) {
> +		if (!enable_ept_ad_bits)
> +			return false;

In theory (I guess) we would have to check here if
(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT)

But I am no expert on this.

> +	}
> +
> +	return true;
> +}
> +
> +static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
> +				     struct vmcs12 *vmcs12)
> +{
> +	u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
> +	u64 *l1_eptp_list, address;
> +	struct page *page;
> +	bool accessed_dirty;
> +	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
> +
> +	if (!nested_cpu_has_eptp_switching(vmcs12) ||
> +	    !nested_cpu_has_ept(vmcs12))
> +		return 1;
> +
> +	if (index >= VMFUNC_EPTP_ENTRIES)
> +		return 1;
> +
> +	page = nested_get_page(vcpu, vmcs12->eptp_list_address);
> +	if (!page)
> +		return 1;
> +
> +	l1_eptp_list = kmap(page);
> +	address = l1_eptp_list[index];
> +	accessed_dirty = !!(address & VMX_EPT_AD_ENABLE_BIT);

Minor nit: Can't you directly do

kunmap(page);
nested_release_page_clean(page);

at this point?

We can fix this up later.

We could even later factor this out into sth. like "nested_vmx_read_guest".
Paolo Bonzini Aug. 3, 2017, 12:41 p.m. UTC | #2
On 03/08/2017 13:39, David Hildenbrand wrote:
>> +	/* AD, if set, should be supported */
>> +	if ((address & VMX_EPT_AD_ENABLE_BIT)) {
>> +		if (!enable_ept_ad_bits)
>> +			return false;
> In theory (I guess) we would have to check here if
> (vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT)

Yes, that's a more correct check than enable_ept_ad_bits.

>>
>> +	page = nested_get_page(vcpu, vmcs12->eptp_list_address);
>> +	if (!page)
>> +		return 1;
>> +
>> +	l1_eptp_list = kmap(page);
>> +	address = l1_eptp_list[index];
>> +	accessed_dirty = !!(address & VMX_EPT_AD_ENABLE_BIT);
> 
> Minor nit: Can't you directly do
> 
> kunmap(page);
> nested_release_page_clean(page);
> 
> at this point?
> 
> We can fix this up later.

You actually can do simply kvm_vcpu_read_guest_page(vcpu,
vmcs12->eptp_list_address >> PAGE_SHIFT, &address, index * 8, 8).

Paolo
David Hildenbrand Aug. 3, 2017, 12:47 p.m. UTC | #3
>> Minor nit: Can't you directly do
>>
>> kunmap(page);
>> nested_release_page_clean(page);
>>
>> at this point?
>>
>> We can fix this up later.
> 
> You actually can do simply kvm_vcpu_read_guest_page(vcpu,
> vmcs12->eptp_list_address >> PAGE_SHIFT, &address, index * 8, 8).
> 

Fascinating how nested is able to confuse me every time :) . Sure, this
is just a G1 address, not a G2 address ... (maybe we should rename
nested_get_page to guest_get_page ...)

> Paolo
>
Bandan Das Aug. 3, 2017, 7:55 p.m. UTC | #4
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 03/08/2017 13:39, David Hildenbrand wrote:
>>> +	/* AD, if set, should be supported */
>>> +	if ((address & VMX_EPT_AD_ENABLE_BIT)) {
>>> +		if (!enable_ept_ad_bits)
>>> +			return false;
>> In theory (I guess) we would have to check here if
>> (vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT)
>
> Yes, that's a more correct check than enable_ept_ad_bits.
>
>>>
>>> +	page = nested_get_page(vcpu, vmcs12->eptp_list_address);
>>> +	if (!page)
>>> +		return 1;
>>> +
>>> +	l1_eptp_list = kmap(page);
>>> +	address = l1_eptp_list[index];
>>> +	accessed_dirty = !!(address & VMX_EPT_AD_ENABLE_BIT);
>> 
>> Minor nit: Can't you directly do
>> 
>> kunmap(page);
>> nested_release_page_clean(page);
>> 
>> at this point?
>> 
>> We can fix this up later.
>
> You actually can do simply kvm_vcpu_read_guest_page(vcpu,
> vmcs12->eptp_list_address >> PAGE_SHIFT, &address, index * 8, 8).

Thanks Paolo, for the interesting tip. David, I sent a new version with the correct
check for AD and using this instead of kmap(page).

> Paolo
diff mbox

Patch

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da5375e..5f63a2e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -115,6 +115,10 @@ 
 #define VMX_MISC_SAVE_EFER_LMA			0x00000020
 #define VMX_MISC_ACTIVITY_HLT			0x00000040
 
+/* VMFUNC functions */
+#define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
+#define VMFUNC_EPTP_ENTRIES  512
+
 static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
 {
 	return vmx_basic & GENMASK_ULL(30, 0);
@@ -200,6 +204,8 @@  enum vmcs_field {
 	EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
 	EOI_EXIT_BITMAP3                = 0x00002022,
 	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
+	EPTP_LIST_ADDRESS               = 0x00002024,
+	EPTP_LIST_ADDRESS_HIGH          = 0x00002025,
 	VMREAD_BITMAP                   = 0x00002026,
 	VMWRITE_BITMAP                  = 0x00002028,
 	XSS_EXIT_BITMAP                 = 0x0000202C,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 042ea88..7235e9a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -249,6 +249,7 @@  struct __packed vmcs12 {
 	u64 eoi_exit_bitmap1;
 	u64 eoi_exit_bitmap2;
 	u64 eoi_exit_bitmap3;
+	u64 eptp_list_address;
 	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
@@ -774,6 +775,7 @@  static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
 	FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
 	FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
+	FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
 	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
@@ -1406,6 +1408,13 @@  static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
 }
 
+static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has_vmfunc(vmcs12) &&
+		(vmcs12->vm_function_control &
+		 VMX_VMFUNC_EPTP_SWITCHING);
+}
+
 static inline bool is_nmi(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2818,7 +2827,12 @@  static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	if (cpu_has_vmx_vmfunc()) {
 		vmx->nested.nested_vmx_secondary_ctls_high |=
 			SECONDARY_EXEC_ENABLE_VMFUNC;
-		vmx->nested.nested_vmx_vmfunc_controls = 0;
+		/*
+		 * Advertise EPTP switching unconditionally
+		 * since we emulate it
+		 */
+		vmx->nested.nested_vmx_vmfunc_controls =
+			VMX_VMFUNC_EPTP_SWITCHING;
 	}
 
 	/*
@@ -7820,6 +7834,94 @@  static int handle_preemption_timer(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	u64 mask = address & 0x7;
+	int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+	/* Check for memory type validity */
+	switch (mask) {
+	case 0:
+		if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
+			return false;
+		break;
+	case 6:
+		if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	/* Bits 5:3 must be 3 */
+	if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW)
+		return false;
+
+	/* Reserved bits should not be set */
+	if (address >> maxphyaddr || ((address >> 7) & 0x1f))
+		return false;
+
+	/* AD, if set, should be supported */
+	if ((address & VMX_EPT_AD_ENABLE_BIT)) {
+		if (!enable_ept_ad_bits)
+			return false;
+	}
+
+	return true;
+}
+
+static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
+				     struct vmcs12 *vmcs12)
+{
+	u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
+	u64 *l1_eptp_list, address;
+	struct page *page;
+	bool accessed_dirty;
+	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
+	if (!nested_cpu_has_eptp_switching(vmcs12) ||
+	    !nested_cpu_has_ept(vmcs12))
+		return 1;
+
+	if (index >= VMFUNC_EPTP_ENTRIES)
+		return 1;
+
+	page = nested_get_page(vcpu, vmcs12->eptp_list_address);
+	if (!page)
+		return 1;
+
+	l1_eptp_list = kmap(page);
+	address = l1_eptp_list[index];
+	accessed_dirty = !!(address & VMX_EPT_AD_ENABLE_BIT);
+
+	/*
+	 * If the (L2) guest does a vmfunc to the currently
+	 * active ept pointer, we don't have to do anything else
+	 */
+	if (vmcs12->ept_pointer != address) {
+		if (!valid_ept_address(vcpu, address)) {
+			kunmap(page);
+			nested_release_page_clean(page);
+			return 1;
+		}
+		kvm_mmu_unload(vcpu);
+		mmu->ept_ad = accessed_dirty;
+		mmu->base_role.ad_disabled = !accessed_dirty;
+		vmcs12->ept_pointer = address;
+		/*
+		 * TODO: Check what's the correct approach in case
+		 * mmu reload fails. Currently, we just let the next
+		 * reload potentially fail
+		 */
+		kvm_mmu_reload(vcpu);
+	}
+
+	kunmap(page);
+	nested_release_page_clean(page);
+	return 0;
+}
+
 static int handle_vmfunc(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7839,7 +7941,16 @@  static int handle_vmfunc(struct kvm_vcpu *vcpu)
 	vmcs12 = get_vmcs12(vcpu);
 	if ((vmcs12->vm_function_control & (1 << function)) == 0)
 		goto fail;
-	WARN_ONCE(1, "VMCS12 VM function control should have been zero");
+
+	switch (function) {
+	case 0:
+		if (nested_vmx_eptp_switching(vcpu, vmcs12))
+			goto fail;
+		break;
+	default:
+		goto fail;
+	}
+	return kvm_skip_emulated_instruction(vcpu);
 
 fail:
 	nested_vmx_vmexit(vcpu, vmx->exit_reason,
@@ -10437,10 +10548,17 @@  static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 				vmx->nested.nested_vmx_entry_ctls_high))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
-	if (nested_cpu_has_vmfunc(vmcs12) &&
-	    (vmcs12->vm_function_control &
-	     ~vmx->nested.nested_vmx_vmfunc_controls))
-		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+	if (nested_cpu_has_vmfunc(vmcs12)) {
+		if (vmcs12->vm_function_control &
+		    ~vmx->nested.nested_vmx_vmfunc_controls)
+			return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+		if (nested_cpu_has_eptp_switching(vmcs12)) {
+			if (!nested_cpu_has_ept(vmcs12) ||
+			    !page_address_valid(vcpu, vmcs12->eptp_list_address))
+				return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+		}
+	}
 
 	if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
 		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;