diff mbox

[RFC,10/19] kvm: vmx: Hook in kvmi_page_fault()

Message ID 20170616134348.17725-11-alazar@bitdefender.com (mailing list archive)
State New, archived
Headers show

Commit Message

Adalbert Lazăr June 16, 2017, 1:43 p.m. UTC
From: Mihai Dontu <mdontu@bitdefender.com>

Notify the guest introspection tool when a #PF occurs due to a failed
permission check in the shadow page tables.

This call and the code involved in managing the shadow page tables
permissions are the essence of a security solution using guest
introspection facilities.

The shadow page tables are used to guarantee the purpose of code areas
inside the guest (code, rodata, stack, heap etc.) Each attempt at an
operation unfitting for a certain memory range (eg. execute code in
heap) triggers a #PF and gives the introspection tool the chance to
audit the code attempting the operation. The possible responses can be:

 * allow it
 * allow it via emulation
 * allow it via emulation and with custom input (see the 'Change
 emulation context' patch)
 * deny it by skipping the instruction

The #PF event is generated only for pages for which the guest
introspection tool has shown interest (ie. has previously touched it by
adjusting the permissions).

Page size is essential for performance (the smaller the better), that's
why huge pages should be split. At the time of writing this patch, they
are disabled with CONFIG_TRANSPARENT_HUGEPAGE=n.

Signed-off-by: Mihai Dontu <mdontu@bitdefender.com>
---
 arch/x86/include/asm/kvm_host.h |  4 ++--
 arch/x86/kvm/mmu.c              | 51 +++++++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/vmx.c              | 24 ++++++++++++++++---
 3 files changed, 72 insertions(+), 7 deletions(-)
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 40d1ee68474a..8d1d80bd2230 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1238,8 +1238,8 @@  void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
-		       void *insn, int insn_len);
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
+		       void *insn, int insn_len, unsigned long gva, bool pf);
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
 void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 12e4c33ff879..3d2527626694 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -40,6 +40,8 @@ 
 #include <linux/uaccess.h>
 #include <linux/hash.h>
 #include <linux/kern_levels.h>
+#include <linux/kvmi.h>
+#include "../../../../virt/kvm/kvmi.h"
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -4723,11 +4725,46 @@  static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
 	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
 }
 
+static enum emulation_result __kvm_mmu_page_fault(struct kvm_vcpu *vcpu,
+						  gpa_t gpa, unsigned long gva,
+						  bool *again)
+{
+	unsigned int opts = 0;
+	unsigned long eq = vcpu->arch.exit_qualification;
+	u64 spte = kvm_mmu_get_spte(vcpu->kvm, vcpu, gpa);
+	enum emulation_result er = EMULATE_FAIL;
+
+	if (spte == -ENOENT) {
+		/* The SPTE is not present */
+		*again = true;
+		return EMULATE_FAIL;
+	}
+
+	if (!kvmi_page_fault(vcpu, gpa, gva, eq, &opts))
+		return EMULATE_FAIL;
+
+	if (opts & KVMI_EVENT_NOEMU)
+		er = EMULATE_DONE;
+	else {
+		er = x86_emulate_instruction(vcpu, gpa, 0, NULL, 0);
+
+		vcpu->ctx_size = 0;
+		vcpu->ctx_pos = 0;
+
+		if (er != EMULATE_DONE)
+			kvm_err("%s: emulate failed (err: %d, gpa: %llX)\n",
+			     __func__, er, gpa);
+	}
+
+	return er;
+}
+
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
-		       void *insn, int insn_len)
+		       void *insn, int insn_len, unsigned long gva, bool pf)
 {
 	int r, emulation_type = EMULTYPE_RETRY;
 	enum emulation_result er;
+	bool again = false;
 	bool direct = vcpu->arch.mmu.direct_map || mmu_is_nested(vcpu);
 
 	if (unlikely(error_code & PFERR_RSVD_MASK)) {
@@ -4742,12 +4779,21 @@  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 			return r;
 	}
 
+	if (pf) {
+		er = __kvm_mmu_page_fault(vcpu, cr2, gva, &again);
+		if (er != EMULATE_FAIL)
+			goto check_er;
+	}
+
 	r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
 				      false);
 	if (r < 0)
 		return r;
-	if (!r)
+	if (!r) {
+		if (again)
+			__kvm_mmu_page_fault(vcpu, cr2, gva, &again);
 		return 1;
+	}
 
 	/*
 	 * Before emulating the instruction, check if the error code
@@ -4769,6 +4815,7 @@  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 emulate:
 	er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
 
+check_er:
 	switch (er) {
 	case EMULATE_DONE:
 		return 1;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7a594cfcb2ea..f99fcc86f141 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5653,7 +5653,8 @@  static int handle_exception(struct kvm_vcpu *vcpu)
 
 		if (kvm_event_needs_reinjection(vcpu))
 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
-		return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
+		return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0, 0,
+					  false);
 	}
 
 	ex_no = intr_info & INTR_INFO_VECTOR_MASK;
@@ -6204,6 +6205,8 @@  static int handle_task_switch(struct kvm_vcpu *vcpu)
 
 static int handle_ept_violation(struct kvm_vcpu *vcpu)
 {
+	bool pf = false;
+	unsigned long gla = 0;
 	unsigned long exit_qualification;
 	gpa_t gpa;
 	u32 error_code;
@@ -6234,6 +6237,21 @@  static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
 	trace_kvm_page_fault(gpa, exit_qualification);
 
+	if ((exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) {
+		pf  = true;
+		gla = vmcs_readl(GUEST_LINEAR_ADDRESS);
+
+		/*
+		 * It can happen for kvm_read_cr3() to return 0 event though
+		 * the page fault took place as a result of a guest page table
+		 * translation
+		 *
+		 * TODO: Fix kvm_read_cr3(). The problem is in is_paging()
+		 */
+		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
+		__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+	}
+
 	/* Is it a read fault? */
 	error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
 		     ? PFERR_USER_MASK : 0;
@@ -6252,7 +6270,7 @@  static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	vcpu->arch.gpa_available = true;
 	vcpu->arch.exit_qualification = exit_qualification;
 
-	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
+	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0, gla, pf);
 }
 
 static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
@@ -6273,7 +6291,7 @@  static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 					      EMULATE_DONE;
 
 	if (unlikely(ret == RET_MMIO_PF_INVALID))
-		return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0);
+		return kvm_mmu_page_fault(vcpu, gpa, 0, NULL, 0, 0, false);
 
 	if (unlikely(ret == RET_MMIO_PF_RETRY))
 		return 1;