diff mbox

[v2,2/5] mmu: don't set the present bit unconditionally

Message ID 1468361932-16580-3-git-send-email-bsd@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bandan Das July 12, 2016, 10:18 p.m. UTC
To support execute only mappings on behalf of L1
hypervisors, we teach set_spte() to honor L1's valid XWR
bits. This is only if host supports EPT execute only. Reuse
ACC_USER_MASK to signify if the L1 hypervisor has the R bit
set. Add a new variable "shadow_present_mask" that is
set for non EPT cases and preserves the existing behavior
for those cases.

Signed-off-by: Bandan Das <bsd@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/mmu.c              | 15 ++++++++++++---
 arch/x86/kvm/paging_tmpl.h      |  8 +++++++-
 arch/x86/kvm/vmx.c              |  7 +++++--
 arch/x86/kvm/x86.c              |  4 ++--
 5 files changed, 27 insertions(+), 9 deletions(-)

Comments

Paolo Bonzini July 13, 2016, 8:10 a.m. UTC | #1
On 13/07/2016 00:18, Bandan Das wrote:
> +	/*
> +	 * In the non-EPT case, execonly is not valid and so
> +	 * the following line is equivalent to spte |= PT_PRESENT_MASK.

I think the comment need not mention non-EPT.

> +	 * For the EPT case, shadow_present_mask is 0 if hardware
> +	 * supports it and we honor whatever way the guest set it.

if hardware supports exec-only page table entries.

> +	 * See: FNAME(gpte_access) in paging_tmpl.h
> +	 */
> +	spte |= shadow_present_mask;
>  	if (!speculative)
>  		spte |= shadow_accessed_mask;
>  
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index bc019f7..f2741db 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -181,13 +181,19 @@ no_present:
>  	return true;
>  }
>  
> +/*
> + * For PTTYPE_EPT, a page table can be executable but not readable
> + * on supported processors. Therefore, set_spte does not automatically
> + * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
> + * to signify readability since it isn't used in the EPT case
> + */
>  static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
>  {
>  	unsigned access;
>  #if PTTYPE == PTTYPE_EPT
>  	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
>  		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
> -		ACC_USER_MASK;
> +		((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
>  #else
>  	BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
>  	BUILD_BUG_ON(ACC_EXEC_MASK != 1);
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 64a79f2..f73b5dc 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6366,10 +6366,13 @@ static __init int hardware_setup(void)
>  	vmx_disable_intercept_msr_write_x2apic(0x83f);
>  
>  	if (enable_ept) {
> -		kvm_mmu_set_mask_ptes(0ull,
> +		kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
>  			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
>  			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
> -			0ull, VMX_EPT_EXECUTABLE_MASK);
> +			0ull, VMX_EPT_EXECUTABLE_MASK,
> +			cpu_has_vmx_ept_execute_only() ?
> +				      0ull : PT_PRESENT_MASK);

Better to use VMX_EPT_READABLE_MASK here, which removes the need for the
BUILD_BUG_ON.

I can do these changes myself.

Paolo

> +		BUILD_BUG_ON(PT_PRESENT_MASK != VMX_EPT_READABLE_MASK);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 69e62862..c0acc66 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1026,7 +1026,7 @@  void kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_init_vm(struct kvm *kvm);
 void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask);
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask);
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 87b62dc..ae80aa4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -175,6 +175,7 @@  static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mmio_mask;
+static u64 __read_mostly shadow_present_mask;
 
 static void mmu_spte_set(u64 *sptep, u64 spte);
 static void mmu_free_roots(struct kvm_vcpu *vcpu);
@@ -282,13 +283,14 @@  static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
 }
 
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-		u64 dirty_mask, u64 nx_mask, u64 x_mask)
+		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask)
 {
 	shadow_user_mask = user_mask;
 	shadow_accessed_mask = accessed_mask;
 	shadow_dirty_mask = dirty_mask;
 	shadow_nx_mask = nx_mask;
 	shadow_x_mask = x_mask;
+	shadow_present_mask = p_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -2515,13 +2517,20 @@  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
 		    bool can_unsync, bool host_writable)
 {
-	u64 spte;
+	u64 spte = 0;
 	int ret = 0;
 
 	if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
 		return 0;
 
-	spte = PT_PRESENT_MASK;
+	/*
+	 * In the non-EPT case, execonly is not valid and so
+	 * the following line is equivalent to spte |= PT_PRESENT_MASK.
+	 * For the EPT case, shadow_present_mask is 0 if hardware
+	 * supports it and we honor whatever way the guest set it.
+	 * See: FNAME(gpte_access) in paging_tmpl.h
+	 */
+	spte |= shadow_present_mask;
 	if (!speculative)
 		spte |= shadow_accessed_mask;
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bc019f7..f2741db 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -181,13 +181,19 @@  no_present:
 	return true;
 }
 
+/*
+ * For PTTYPE_EPT, a page table can be executable but not readable
+ * on supported processors. Therefore, set_spte does not automatically
+ * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
+ * to signify readability since it isn't used in the EPT case
+ */
 static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
 {
 	unsigned access;
 #if PTTYPE == PTTYPE_EPT
 	access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
 		((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
-		ACC_USER_MASK;
+		((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
 #else
 	BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
 	BUILD_BUG_ON(ACC_EXEC_MASK != 1);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 64a79f2..f73b5dc 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6366,10 +6366,13 @@  static __init int hardware_setup(void)
 	vmx_disable_intercept_msr_write_x2apic(0x83f);
 
 	if (enable_ept) {
-		kvm_mmu_set_mask_ptes(0ull,
+		kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
 			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
 			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-			0ull, VMX_EPT_EXECUTABLE_MASK);
+			0ull, VMX_EPT_EXECUTABLE_MASK,
+			cpu_has_vmx_ept_execute_only() ?
+				      0ull : PT_PRESENT_MASK);
+		BUILD_BUG_ON(PT_PRESENT_MASK != VMX_EPT_READABLE_MASK);
 		ept_set_mmio_spte_mask();
 		kvm_enable_tdp();
 	} else
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7da5dd2..b15f214 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5867,8 +5867,8 @@  int kvm_arch_init(void *opaque)
 	kvm_x86_ops = ops;
 
 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
-			PT_DIRTY_MASK, PT64_NX_MASK, 0);
-
+			PT_DIRTY_MASK, PT64_NX_MASK, 0,
+			PT_PRESENT_MASK);
 	kvm_timer_init();
 
 	perf_register_guest_info_callbacks(&kvm_guest_cbs);