diff mbox series

[RFC,v5,12/20] kvm: x86: hook in the VM introspection subsystem

Message ID 20181220182850.4579-13-alazar@bitdefender.com (mailing list archive)
State New, archived
Headers show
Series VM introspection | expand

Commit Message

Adalbert Lazăr Dec. 20, 2018, 6:28 p.m. UTC
From: Mihai DONTU <mdontu@bitdefender.com>

Notify KVMI on vCPU create/destroy, VM destroy events, the new ioctl-s
(KVM_INTROSPECTION_HOOK, KVM_INTROSPECTION_UNHOOK) and when a KVMI
command is received (KVM_REQ_INTROSPECTION).

Change the x86 emulator and mmu behavior when KVMI is present (TODO:
do it when active) or when dealing with a page of interest for the
introspection tool.

Also, the EPT AD bits feature is disabled by this patch.

Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com>
Signed-off-by: Mihai Donțu <mdontu@bitdefender.com>
Signed-off-by: Nicușor Cîțu <ncitu@bitdefender.com>
Signed-off-by: Mircea Cîrjaliu <mcirjaliu@bitdefender.com>
---
 arch/x86/kvm/mmu.c  | 13 +++++++++++--
 arch/x86/kvm/svm.c  |  2 ++
 arch/x86/kvm/vmx.c  |  4 +++-
 arch/x86/kvm/x86.c  | 33 ++++++++++++++++++++++++++++-----
 virt/kvm/kvm_main.c | 34 +++++++++++++++++++++++++++++++++-
 5 files changed, 77 insertions(+), 9 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ed4b3c593bf5..85a980e5cc27 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -40,6 +40,7 @@ 
 #include <linux/uaccess.h>
 #include <linux/hash.h>
 #include <linux/kern_levels.h>
+#include <linux/kvmi.h>
 
 #include <asm/page.h>
 #include <asm/pat.h>
@@ -2458,6 +2459,9 @@  static void clear_sp_write_flooding_count(u64 *spte)
 static unsigned int kvm_mmu_page_track_acc(struct kvm_vcpu *vcpu, gfn_t gfn,
 					   unsigned int acc)
 {
+	if (!kvmi_tracked_gfn(vcpu, gfn))
+		return acc;
+
 	if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREREAD))
 		acc &= ~ACC_USER_MASK;
 	if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREWRITE) ||
@@ -5433,8 +5437,13 @@  int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 	 */
 	if (vcpu->arch.mmu->direct_map &&
 	    (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
-		kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
-		return 1;
+		if (kvmi_tracked_gfn(vcpu, gpa_to_gfn(cr2))) {
+			if (kvmi_update_ad_flags(vcpu))
+				return 1;
+		} else {
+			kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
+			return 1;
+		}
 	}
 
 	/*
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f355860d845c..766578401d1c 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -18,6 +18,7 @@ 
 #define pr_fmt(fmt) "SVM: " fmt
 
 #include <linux/kvm_host.h>
+#include <linux/kvmi.h>
 
 #include "irq.h"
 #include "mmu.h"
@@ -50,6 +51,7 @@ 
 #include <asm/kvm_para.h>
 #include <asm/irq_remapping.h>
 #include <asm/spec-ctrl.h>
+#include <asm/kvmi.h>
 
 #include <asm/virtext.h>
 #include "trace.h"
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fc0219336edb..e9de89bd7f1c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -36,6 +36,7 @@ 
 #include <linux/hrtimer.h>
 #include <linux/frame.h>
 #include <linux/nospec.h>
+#include <linux/kvmi.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -55,6 +56,7 @@ 
 #include <asm/mmu_context.h>
 #include <asm/spec-ctrl.h>
 #include <asm/mshyperv.h>
+#include <asm/kvmi.h>
 
 #include "trace.h"
 #include "pmu.h"
@@ -7939,7 +7941,7 @@  static __init int hardware_setup(void)
 	    !cpu_has_vmx_invept_global())
 		enable_ept = 0;
 
-	if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
+	if (!cpu_has_vmx_ept_ad_bits() || !enable_ept || kvmi_is_present())
 		enable_ept_ad_bits = 0;
 
 	if (!cpu_has_vmx_unrestricted_guest() || !enable_ept)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e413f81d6c46..f073fe596244 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -20,6 +20,8 @@ 
  */
 
 #include <linux/kvm_host.h>
+#include <linux/kvmi.h>
+#include <uapi/linux/kvmi.h>
 #include "irq.h"
 #include "mmu.h"
 #include "i8254.h"
@@ -3076,6 +3078,11 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = kvm_x86_ops->get_nested_state ?
 			kvm_x86_ops->get_nested_state(NULL, 0, 0) : 0;
 		break;
+#ifdef CONFIG_KVM_INTROSPECTION
+	case KVM_CAP_INTROSPECTION:
+		r = KVMI_VERSION;
+		break;
+#endif
 	default:
 		break;
 	}
@@ -5314,7 +5321,7 @@  static int emulator_read_write_onepage(unsigned long addr, void *val,
 	 * operation using rep will only have the initial GPA from the NPF
 	 * occurred.
 	 */
-	if (vcpu->arch.gpa_available &&
+	if (vcpu->arch.gpa_available && !kvmi_is_present() &&
 	    emulator_can_use_gpa(ctxt) &&
 	    (addr & ~PAGE_MASK) == (vcpu->arch.gpa_val & ~PAGE_MASK)) {
 		gpa = vcpu->arch.gpa_val;
@@ -6096,7 +6103,8 @@  static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
 		indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 
-		if (indirect_shadow_pages)
+		if (indirect_shadow_pages
+		    && !kvmi_tracked_gfn(vcpu, gpa_to_gfn(gpa)))
 			kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
 
 		return true;
@@ -6107,7 +6115,8 @@  static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
 	 * and it failed try to unshadow page and re-enter the
 	 * guest to let CPU execute the instruction.
 	 */
-	kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
+	if (!kvmi_tracked_gfn(vcpu, gpa_to_gfn(gpa)))
+		kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
 
 	/*
 	 * If the access faults on its page table, it can not
@@ -6159,6 +6168,9 @@  static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	if (!vcpu->arch.mmu->direct_map)
 		gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL);
 
+	if (kvmi_tracked_gfn(vcpu, gpa_to_gfn(gpa)))
+		return false;
+
 	kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
 
 	return true;
@@ -6324,6 +6336,8 @@  int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 
 	vcpu->arch.l1tf_flush_l1d = true;
 
+	kvmi_init_emulate(vcpu);
+
 	/*
 	 * Clear write_fault_to_shadow_pgtable here to ensure it is
 	 * never reused.
@@ -6360,6 +6374,8 @@  int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 		if (r != EMULATION_OK) {
 			if (emulation_type & EMULTYPE_TRAP_UD)
 				return EMULATE_FAIL;
+			if (!kvmi_track_emul_unimplemented(vcpu, cr2))
+				return EMULATE_DONE;
 			if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
 						emulation_type))
 				return EMULATE_DONE;
@@ -6429,9 +6445,10 @@  int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			writeback = false;
 		r = EMULATE_USER_EXIT;
 		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
-	} else if (r == EMULATION_RESTART)
+	} else if (r == EMULATION_RESTART) {
+		kvmi_activate_rep_complete(vcpu);
 		goto restart;
-	else
+	} else
 		r = EMULATE_DONE;
 
 	if (writeback) {
@@ -7677,6 +7694,9 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		 */
 		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
 			kvm_hv_process_stimers(vcpu);
+
+		if (kvm_check_request(KVM_REQ_INTROSPECTION, vcpu))
+			kvmi_handle_requests(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -9492,6 +9512,9 @@  static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
 	    (vcpu->arch.smi_pending && !is_smm(vcpu)))
 		return true;
 
+	if (kvm_test_request(KVM_REQ_INTROSPECTION, vcpu))
+		return true;
+
 	if (kvm_arch_interrupt_allowed(vcpu) &&
 	    (kvm_cpu_has_interrupt(vcpu) ||
 	    kvm_guest_apic_has_interrupt(vcpu)))
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2679e476b6c3..8db2bf9ca0c2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -51,6 +51,7 @@ 
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/bsearch.h>
+#include <linux/kvmi.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -315,6 +316,11 @@  int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	r = kvm_arch_vcpu_init(vcpu);
 	if (r < 0)
 		goto fail_free_run;
+
+	r = kvmi_vcpu_init(vcpu);
+	if (r < 0)
+		goto fail_free_run;
+
 	return 0;
 
 fail_free_run:
@@ -332,6 +338,7 @@  void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
 	 * descriptors are already gone.
 	 */
 	put_pid(rcu_dereference_protected(vcpu->pid, 1));
+	kvmi_vcpu_uninit(vcpu);
 	kvm_arch_vcpu_uninit(vcpu);
 	free_page((unsigned long)vcpu->run);
 }
@@ -681,6 +688,8 @@  static struct kvm *kvm_create_vm(unsigned long type)
 	if (r)
 		goto out_err;
 
+	kvmi_create_vm(kvm);
+
 	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
 	spin_unlock(&kvm_lock);
@@ -726,6 +735,7 @@  static void kvm_destroy_vm(struct kvm *kvm)
 	int i;
 	struct mm_struct *mm = kvm->mm;
 
+	kvmi_destroy_vm(kvm);
 	kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
 	kvm_destroy_vm_debugfs(kvm);
 	kvm_arch_sync_events(kvm);
@@ -1476,7 +1486,7 @@  static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 	 * Whoever called remap_pfn_range is also going to call e.g.
 	 * unmap_mapping_range before the underlying pages are freed,
 	 * causing a call to our MMU notifier.
-	 */ 
+	 */
 	kvm_get_pfn(pfn);
 
 	*p_pfn = pfn;
@@ -3133,6 +3143,24 @@  static long kvm_vm_ioctl(struct file *filp,
 	case KVM_CHECK_EXTENSION:
 		r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
 		break;
+#ifdef CONFIG_KVM_INTROSPECTION
+	case KVM_INTROSPECTION_HOOK: {
+		struct kvm_introspection i;
+
+		r = -EFAULT;
+		if (copy_from_user(&i, argp, sizeof(i)))
+			goto out;
+
+		r = kvmi_hook(kvm, &i);
+		break;
+	}
+	case KVM_INTROSPECTION_UNHOOK:
+		r = -EFAULT;
+		if (kvmi_notify_unhook(kvm))
+			goto out;
+		r = 0;
+		break;
+#endif /* CONFIG_KVM_INTROSPECTION */
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
@@ -4075,6 +4103,9 @@  int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 	r = kvm_vfio_ops_init();
 	WARN_ON(r);
 
+	r = kvmi_init();
+	WARN_ON(r);
+
 	return 0;
 
 out_unreg:
@@ -4100,6 +4131,7 @@  EXPORT_SYMBOL_GPL(kvm_init);
 
 void kvm_exit(void)
 {
+	kvmi_uninit();
 	debugfs_remove_recursive(kvm_debugfs_dir);
 	misc_deregister(&kvm_dev);
 	kmem_cache_destroy(kvm_vcpu_cache);