new file mode 100644
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVMI_HOST_H
+#define _ASM_X86_KVMI_HOST_H
+
+#include <asm/kvm_host.h>
+#include <asm/kvm_page_track.h>
+
+struct kvmi_arch_mem_access {
+ unsigned long active[KVM_PAGE_TRACK_MAX][BITS_TO_LONGS(KVM_MEM_SLOTS_NUM)];
+};
+
+#endif /* _ASM_X86_KVMI_HOST_H */
@@ -91,6 +91,12 @@ void kvmi_arch_setup_event(struct kvm_vcpu *vcpu, struct kvmi_event *ev)
kvmi_get_msrs(vcpu, event);
}
+bool kvmi_arch_pf_event(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ u8 access)
+{
+ return KVMI_EVENT_ACTION_CONTINUE; /* TODO */
+}
+
int kvmi_arch_cmd_get_vcpu_info(struct kvm_vcpu *vcpu,
struct kvmi_get_vcpu_info_reply *rpl)
{
@@ -102,3 +108,42 @@ int kvmi_arch_cmd_get_vcpu_info(struct kvm_vcpu *vcpu,
return 0;
}
+static const struct {
+ unsigned int allow_bit;
+ enum kvm_page_track_mode track_mode;
+} track_modes[] = {
+ { KVMI_PAGE_ACCESS_R, KVM_PAGE_TRACK_PREREAD },
+ { KVMI_PAGE_ACCESS_W, KVM_PAGE_TRACK_PREWRITE },
+ { KVMI_PAGE_ACCESS_X, KVM_PAGE_TRACK_PREEXEC },
+};
+
+void kvmi_arch_update_page_tracking(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ struct kvmi_mem_access *m)
+{
+ struct kvmi_arch_mem_access *arch = &m->arch;
+ int i;
+
+ if (!slot) {
+ slot = gfn_to_memslot(kvm, m->gfn);
+ if (!slot)
+ return;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(track_modes); i++) {
+ unsigned int allow_bit = track_modes[i].allow_bit;
+ enum kvm_page_track_mode mode = track_modes[i].track_mode;
+ bool slot_tracked = test_bit(slot->id, arch->active[mode]);
+
+ if (m->access & allow_bit) {
+ if (slot_tracked) {
+ kvm_slot_page_track_remove_page(kvm, slot,
+ m->gfn, mode);
+ clear_bit(slot->id, arch->active[mode]);
+ }
+ } else if (!slot_tracked) {
+ kvm_slot_page_track_add_page(kvm, slot, m->gfn, mode);
+ set_bit(slot->id, arch->active[mode]);
+ }
+ }
+}
@@ -70,6 +70,10 @@ enum {
#define KVMI_EVENT_ACTION_RETRY 1
#define KVMI_EVENT_ACTION_CRASH 2
+#define KVMI_PAGE_ACCESS_R (1 << 0)
+#define KVMI_PAGE_ACCESS_W (1 << 1)
+#define KVMI_PAGE_ACCESS_X (1 << 2)
+
#define KVMI_MSG_SIZE (4096 - sizeof(struct kvmi_msg_hdr))
struct kvmi_msg_hdr {
@@ -11,10 +11,27 @@
#include <linux/bitmap.h>
static struct kmem_cache *msg_cache;
+static struct kmem_cache *radix_cache;
static struct kmem_cache *job_cache;
static bool kvmi_create_vcpu_event(struct kvm_vcpu *vcpu);
static void kvmi_abort_events(struct kvm *kvm);
+static bool kvmi_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ u8 *new, int bytes, struct kvm_page_track_notifier_node *node,
+ bool *data_ready);
+static bool kvmi_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ const u8 *new, int bytes, struct kvm_page_track_notifier_node *node);
+static bool kvmi_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ struct kvm_page_track_notifier_node *node);
+static void kvmi_track_create_slot(struct kvm *kvm,
+ struct kvm_memory_slot *slot, unsigned long npages,
+ struct kvm_page_track_notifier_node *node);
+static void kvmi_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ struct kvm_page_track_notifier_node *node);
+
+static const u8 full_access = KVMI_PAGE_ACCESS_R |
+ KVMI_PAGE_ACCESS_W |
+ KVMI_PAGE_ACCESS_X;
void *kvmi_msg_alloc(void)
{
@@ -34,23 +51,96 @@ void kvmi_msg_free(void *addr)
kmem_cache_free(msg_cache, addr);
}
+static struct kvmi_mem_access *__kvmi_get_gfn_access(struct kvmi *ikvm,
+ const gfn_t gfn)
+{
+ return radix_tree_lookup(&ikvm->access_tree, gfn);
+}
+
+static int kvmi_get_gfn_access(struct kvmi *ikvm, const gfn_t gfn,
+ u8 *access)
+{
+ struct kvmi_mem_access *m;
+
+ *access = full_access;
+
+ read_lock(&ikvm->access_tree_lock);
+ m = __kvmi_get_gfn_access(ikvm, gfn);
+ if (m)
+ *access = m->access;
+ read_unlock(&ikvm->access_tree_lock);
+
+ return m ? 0 : -1;
+}
+
+static bool kvmi_restricted_access(struct kvmi *ikvm, gpa_t gpa, u8 access)
+{
+ u8 allowed_access;
+ int err;
+
+ err = kvmi_get_gfn_access(ikvm, gpa_to_gfn(gpa), &allowed_access);
+
+ if (err)
+ return false;
+
+ /*
+ * We want to be notified only for violations involving access
+ * bits that we've specifically cleared
+ */
+ if ((~allowed_access) & access)
+ return true;
+
+ return false;
+}
+
+static void kvmi_clear_mem_access(struct kvm *kvm)
+{
+ void **slot;
+ struct radix_tree_iter iter;
+ struct kvmi *ikvm = IKVM(kvm);
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+ write_lock(&ikvm->access_tree_lock);
+
+ radix_tree_for_each_slot(slot, &ikvm->access_tree, &iter, 0) {
+ struct kvmi_mem_access *m = *slot;
+
+ m->access = full_access;
+ kvmi_arch_update_page_tracking(kvm, NULL, m);
+
+ radix_tree_iter_delete(&ikvm->access_tree, &iter, slot);
+ kmem_cache_free(radix_cache, m);
+ }
+
+ write_unlock(&ikvm->access_tree_lock);
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
+}
+
static void kvmi_cache_destroy(void)
{
kmem_cache_destroy(msg_cache);
msg_cache = NULL;
+ kmem_cache_destroy(radix_cache);
+ radix_cache = NULL;
kmem_cache_destroy(job_cache);
job_cache = NULL;
}
static int kvmi_cache_create(void)
{
+ radix_cache = kmem_cache_create("kvmi_radix_tree",
+ sizeof(struct kvmi_mem_access),
+ 0, SLAB_ACCOUNT, NULL);
job_cache = kmem_cache_create("kvmi_job",
sizeof(struct kvmi_job),
0, SLAB_ACCOUNT, NULL);
msg_cache = kmem_cache_create("kvmi_msg", KVMI_MSG_SIZE_ALLOC,
4096, SLAB_ACCOUNT, NULL);
- if (!msg_cache || !job_cache) {
+ if (!msg_cache || !radix_cache || !job_cache) {
kvmi_cache_destroy();
return -1;
@@ -77,6 +167,10 @@ static bool alloc_kvmi(struct kvm *kvm, const struct kvm_introspection *qemu)
if (!ikvm)
return false;
+ /* see comments of radix_tree_preload() - no direct reclaim */
+ INIT_RADIX_TREE(&ikvm->access_tree, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM);
+ rwlock_init(&ikvm->access_tree_lock);
+
atomic_set(&ikvm->ev_seq, 0);
set_bit(KVMI_GET_VERSION, ikvm->cmd_allow_mask);
@@ -85,6 +179,12 @@ static bool alloc_kvmi(struct kvm *kvm, const struct kvm_introspection *qemu)
memcpy(&ikvm->uuid, &qemu->uuid, sizeof(ikvm->uuid));
+ ikvm->kptn_node.track_preread = kvmi_track_preread;
+ ikvm->kptn_node.track_prewrite = kvmi_track_prewrite;
+ ikvm->kptn_node.track_preexec = kvmi_track_preexec;
+ ikvm->kptn_node.track_create_slot = kvmi_track_create_slot;
+ ikvm->kptn_node.track_flush_slot = kvmi_track_flush_slot;
+
ikvm->kvm = kvm;
kvm->kvmi = ikvm;
@@ -276,6 +376,179 @@ void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu)
vcpu->kvmi = NULL;
}
+static bool is_pf_of_interest(struct kvm_vcpu *vcpu, gpa_t gpa, u8 access)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ if (kvm_mmu_nested_pagefault(vcpu))
+ return false;
+
+ /* Have we shown interest in this page? */
+ return kvmi_restricted_access(IKVM(kvm), gpa, access);
+}
+
+static bool __kvmi_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ u8 *new, int bytes, struct kvm_page_track_notifier_node *node,
+ bool *data_ready)
+{
+ bool ret;
+
+ if (!is_pf_of_interest(vcpu, gpa, KVMI_PAGE_ACCESS_R))
+ return true;
+
+ ret = kvmi_arch_pf_event(vcpu, gpa, gva, KVMI_PAGE_ACCESS_R);
+
+ return ret;
+}
+
+static bool kvmi_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ u8 *new, int bytes, struct kvm_page_track_notifier_node *node,
+ bool *data_ready)
+{
+ struct kvmi *ikvm;
+ bool ret = true;
+
+ ikvm = kvmi_get(vcpu->kvm);
+ if (!ikvm)
+ return true;
+
+ if (is_event_enabled(vcpu, KVMI_EVENT_PF))
+ ret = __kvmi_track_preread(vcpu, gpa, gva, new, bytes, node,
+ data_ready);
+
+ kvmi_put(vcpu->kvm);
+
+ return ret;
+}
+
+static bool __kvmi_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ const u8 *new, int bytes,
+ struct kvm_page_track_notifier_node *node)
+{
+ if (!is_pf_of_interest(vcpu, gpa, KVMI_PAGE_ACCESS_W))
+ return true;
+
+ return kvmi_arch_pf_event(vcpu, gpa, gva, KVMI_PAGE_ACCESS_W);
+}
+
+static bool kvmi_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ const u8 *new, int bytes,
+ struct kvm_page_track_notifier_node *node)
+{
+ struct kvmi *ikvm;
+ bool ret = true;
+
+ ikvm = kvmi_get(vcpu->kvm);
+ if (!ikvm)
+ return true;
+
+ if (is_event_enabled(vcpu, KVMI_EVENT_PF))
+ ret = __kvmi_track_prewrite(vcpu, gpa, gva, new, bytes, node);
+
+ kvmi_put(vcpu->kvm);
+
+ return ret;
+}
+
+static bool __kvmi_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ struct kvm_page_track_notifier_node *node)
+{
+ if (!is_pf_of_interest(vcpu, gpa, KVMI_PAGE_ACCESS_X))
+ return true;
+
+ return kvmi_arch_pf_event(vcpu, gpa, gva, KVMI_PAGE_ACCESS_X);
+}
+
+static bool kvmi_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ struct kvm_page_track_notifier_node *node)
+{
+ struct kvmi *ikvm;
+ bool ret = true;
+
+ ikvm = kvmi_get(vcpu->kvm);
+ if (!ikvm)
+ return true;
+
+ if (is_event_enabled(vcpu, KVMI_EVENT_PF))
+ ret = __kvmi_track_preexec(vcpu, gpa, gva, node);
+
+ kvmi_put(vcpu->kvm);
+
+ return ret;
+}
+
+static void kvmi_track_create_slot(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ unsigned long npages,
+ struct kvm_page_track_notifier_node *node)
+{
+ struct kvmi *ikvm;
+ gfn_t start = slot->base_gfn;
+ const gfn_t end = start + npages;
+ int idx;
+
+ ikvm = kvmi_get(kvm);
+ if (!ikvm)
+ return;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+ read_lock(&ikvm->access_tree_lock);
+
+ while (start < end) {
+ struct kvmi_mem_access *m;
+
+ m = __kvmi_get_gfn_access(ikvm, start);
+ if (m)
+ kvmi_arch_update_page_tracking(kvm, slot, m);
+ start++;
+ }
+
+ read_unlock(&ikvm->access_tree_lock);
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ kvmi_put(kvm);
+}
+
+static void kvmi_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
+ struct kvm_page_track_notifier_node *node)
+{
+ struct kvmi *ikvm;
+ gfn_t start = slot->base_gfn;
+ const gfn_t end = start + slot->npages;
+ int idx;
+
+ ikvm = kvmi_get(kvm);
+ if (!ikvm)
+ return;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+ write_lock(&ikvm->access_tree_lock);
+
+ while (start < end) {
+ struct kvmi_mem_access *m;
+
+ m = __kvmi_get_gfn_access(ikvm, start);
+ if (m) {
+ u8 prev_access = m->access;
+
+ m->access = full_access;
+ kvmi_arch_update_page_tracking(kvm, slot, m);
+ m->access = prev_access;
+ }
+
+ start++;
+ }
+
+ write_unlock(&ikvm->access_tree_lock);
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ kvmi_put(kvm);
+}
+
static void kvmi_end_introspection(struct kvmi *ikvm)
{
struct kvm *kvm = ikvm->kvm;
@@ -290,6 +563,22 @@ static void kvmi_end_introspection(struct kvmi *ikvm)
*/
kvmi_abort_events(kvm);
+ /*
+ * This may sleep on synchronize_srcu() so it's not allowed to be
+ * called under kvmi_put().
+ * Also synchronize_srcu() may deadlock on (page tracking) read-side
+ * regions that are waiting for reply to events, so must be called
+ * after kvmi_abort_events().
+ */
+ kvm_page_track_unregister_notifier(kvm, &ikvm->kptn_node);
+
+ /*
+ * This function uses kvm->mmu_lock so it's not allowed to be
+ * called under kvmi_put(). It can reach a deadlock if called
+ * from kvm_mmu_load -> kvmi_tracked_gfn -> kvmi_put.
+ */
+ kvmi_clear_mem_access(kvm);
+
/*
* At this moment the socket is shut down, no more commands will come
* from the introspector, and the only way into the introspection is
@@ -351,6 +640,8 @@ int kvmi_hook(struct kvm *kvm, const struct kvm_introspection *qemu)
goto err_alloc;
}
+ kvm_page_track_register_notifier(kvm, &ikvm->kptn_node);
+
/*
* Make sure all the KVM/KVMI structures are linked and no pointer
* is read as NULL after the reference count has been set.
@@ -6,6 +6,7 @@
#include <linux/kvm_host.h>
#include <uapi/linux/kvmi.h>
+#include <asm/kvmi_host.h>
#define kvmi_debug(ikvm, fmt, ...) \
kvm_debug("%pU " fmt, &ikvm->uuid, ## __VA_ARGS__)
@@ -104,6 +105,10 @@ struct kvmi_vcpu {
struct kvmi {
struct kvm *kvm;
+ struct kvm_page_track_notifier_node kptn_node;
+
+ struct radix_tree_root access_tree;
+ rwlock_t access_tree_lock;
struct socket *sock;
struct task_struct *recv;
@@ -118,6 +123,17 @@ struct kvmi {
bool cmd_reply_disabled;
};
+struct kvmi_mem_access {
+ gfn_t gfn;
+ u8 access;
+ struct kvmi_arch_mem_access arch;
+};
+
+static inline bool is_event_enabled(struct kvm_vcpu *vcpu, int event)
+{
+ return false; /* TODO */
+}
+
/* kvmi_msg.c */
bool kvmi_sock_get(struct kvmi *ikvm, int fd);
void kvmi_sock_shutdown(struct kvmi *ikvm);
@@ -138,7 +154,12 @@ int kvmi_add_job(struct kvm_vcpu *vcpu,
void *ctx, void (*free_fct)(void *ctx));
/* arch */
+void kvmi_arch_update_page_tracking(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ struct kvmi_mem_access *m);
void kvmi_arch_setup_event(struct kvm_vcpu *vcpu, struct kvmi_event *ev);
+bool kvmi_arch_pf_event(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ u8 access);
int kvmi_arch_cmd_get_vcpu_info(struct kvm_vcpu *vcpu,
struct kvmi_get_vcpu_info_reply *rpl);