@@ -563,6 +563,25 @@ On x86 the structure looks like this::
It contains information about the vCPU state at the time of the event.
+An event reply begins with two common structures::
+
+ struct kvmi_vcpu_hdr;
+ struct kvmi_event_reply {
+ __u8 action;
+ __u8 event;
+ __u16 padding1;
+ __u32 padding2;
+ };
+
+All events accept the KVMI_EVENT_ACTION_CRASH action, which stops the
+guest ungracefully, but as soon as possible.
+
+Most of the events accept the KVMI_EVENT_ACTION_CONTINUE action, which
+lets the instruction that caused the event to continue.
+
+Some of the events accept the KVMI_EVENT_ACTION_RETRY action, to continue
+by re-entering in guest.
+
Specific event data can follow these common structures.
1. KVMI_EVENT_UNHOOK
@@ -604,7 +623,8 @@ operation can proceed).
struct kvmi_vcpu_hdr;
struct kvmi_event_reply;
-This event is sent in response to a *KVMI_VCPU_PAUSE* command.
+This event is sent in response to a *KVMI_VCPU_PAUSE* command and
+cannot be controlled with *KVMI_VCPU_CONTROL_EVENTS*.
Because it has a low priority, it will be sent after any other vCPU
introspection event and when no other vCPU introspection command is
queued.
@@ -5,8 +5,89 @@
* Copyright (C) 2019-2020 Bitdefender S.R.L.
*/
+#include "linux/kvm_host.h"
+#include "x86.h"
#include "../../../virt/kvm/introspection/kvmi_int.h"
+static unsigned int kvmi_vcpu_mode(const struct kvm_vcpu *vcpu,
+ const struct kvm_sregs *sregs)
+{
+ unsigned int mode = 0;
+
+ if (is_long_mode((struct kvm_vcpu *) vcpu)) {
+ if (sregs->cs.l)
+ mode = 8;
+ else if (!sregs->cs.db)
+ mode = 2;
+ else
+ mode = 4;
+ } else if (sregs->cr0 & X86_CR0_PE) {
+ if (!sregs->cs.db)
+ mode = 2;
+ else
+ mode = 4;
+ } else if (!sregs->cs.db) {
+ mode = 2;
+ } else {
+ mode = 4;
+ }
+
+ return mode;
+}
+
+static void kvmi_get_msrs(struct kvm_vcpu *vcpu, struct kvmi_event_arch *event)
+{
+ struct msr_data msr;
+
+ msr.host_initiated = true;
+
+ msr.index = MSR_IA32_SYSENTER_CS;
+ kvm_x86_ops.get_msr(vcpu, &msr);
+ event->msrs.sysenter_cs = msr.data;
+
+ msr.index = MSR_IA32_SYSENTER_ESP;
+ kvm_x86_ops.get_msr(vcpu, &msr);
+ event->msrs.sysenter_esp = msr.data;
+
+ msr.index = MSR_IA32_SYSENTER_EIP;
+ kvm_x86_ops.get_msr(vcpu, &msr);
+ event->msrs.sysenter_eip = msr.data;
+
+ msr.index = MSR_EFER;
+ kvm_x86_ops.get_msr(vcpu, &msr);
+ event->msrs.efer = msr.data;
+
+ msr.index = MSR_STAR;
+ kvm_x86_ops.get_msr(vcpu, &msr);
+ event->msrs.star = msr.data;
+
+ msr.index = MSR_LSTAR;
+ kvm_x86_ops.get_msr(vcpu, &msr);
+ event->msrs.lstar = msr.data;
+
+ msr.index = MSR_CSTAR;
+ kvm_x86_ops.get_msr(vcpu, &msr);
+ event->msrs.cstar = msr.data;
+
+ msr.index = MSR_IA32_CR_PAT;
+ kvm_x86_ops.get_msr(vcpu, &msr);
+ event->msrs.pat = msr.data;
+
+ msr.index = MSR_KERNEL_GS_BASE;
+ kvm_x86_ops.get_msr(vcpu, &msr);
+ event->msrs.shadow_gs = msr.data;
+}
+
+void kvmi_arch_setup_event(struct kvm_vcpu *vcpu, struct kvmi_event *ev)
+{
+ struct kvmi_event_arch *event = &ev->arch;
+
+ kvm_arch_vcpu_get_regs(vcpu, &event->regs);
+ kvm_arch_vcpu_get_sregs(vcpu, &event->sregs);
+ ev->arch.mode = kvmi_vcpu_mode(vcpu, &event->sregs);
+ kvmi_get_msrs(vcpu, event);
+}
+
int kvmi_arch_cmd_vcpu_get_info(struct kvm_vcpu *vcpu,
struct kvmi_vcpu_get_info_reply *rpl)
{
@@ -6,6 +6,14 @@
#include <asm/kvmi_host.h>
+struct kvmi_vcpu_reply {
+ int error;
+ int action;
+ u32 seq;
+ void *data;
+ size_t size;
+};
+
struct kvmi_job {
struct list_head link;
void *ctx;
@@ -20,6 +28,9 @@ struct kvm_vcpu_introspection {
spinlock_t job_lock;
atomic_t pause_requests;
+
+ struct kvmi_vcpu_reply reply;
+ bool waiting_for_reply;
};
struct kvm_introspection {
@@ -38,6 +38,12 @@ enum {
KVMI_NUM_EVENTS
};
+enum {
+ KVMI_EVENT_ACTION_CONTINUE = 0,
+ KVMI_EVENT_ACTION_RETRY = 1,
+ KVMI_EVENT_ACTION_CRASH = 2,
+};
+
struct kvmi_msg_hdr {
__u16 id;
__u16 size;
@@ -124,4 +130,11 @@ struct kvmi_event {
struct kvmi_event_arch arch;
};
+struct kvmi_event_reply {
+ __u8 action;
+ __u8 event;
+ __u16 padding1;
+ __u32 padding2;
+};
+
#endif /* _UAPI__LINUX_KVMI_H */
@@ -34,6 +34,12 @@ static vm_paddr_t test_gpa;
static uint8_t test_write_pattern;
static int page_size;
+struct vcpu_reply {
+ struct kvmi_msg_hdr hdr;
+ struct kvmi_vcpu_hdr vcpu_hdr;
+ struct kvmi_event_reply reply;
+};
+
struct vcpu_worker_data {
struct kvm_vm *vm;
int vcpu_id;
@@ -772,14 +778,54 @@ static void pause_vcpu(void)
cmd_vcpu_pause(1, 0, 0);
}
+static void reply_to_event(struct kvmi_msg_hdr *ev_hdr, struct kvmi_event *ev,
+ __u8 action, struct vcpu_reply *rpl, size_t rpl_size)
+{
+ ssize_t r;
+
+ rpl->hdr.id = ev_hdr->id;
+ rpl->hdr.seq = ev_hdr->seq;
+ rpl->hdr.size = rpl_size - sizeof(rpl->hdr);
+
+ rpl->vcpu_hdr.vcpu = ev->vcpu;
+
+ rpl->reply.action = action;
+ rpl->reply.event = ev->event;
+
+ r = send(Userspace_socket, rpl, rpl_size, 0);
+ TEST_ASSERT(r == rpl_size,
+ "send() failed, sending %zd, result %zd, errno %d (%s)\n",
+ rpl_size, r, errno, strerror(errno));
+}
+
+static void discard_pause_event(struct kvm_vm *vm)
+{
+ struct vcpu_worker_data data = {.vm = vm, .vcpu_id = VCPU_ID};
+ struct vcpu_reply rpl = {};
+ struct kvmi_msg_hdr hdr;
+ pthread_t vcpu_thread;
+ struct kvmi_event ev;
+
+ vcpu_thread = start_vcpu_worker(&data);
+
+ receive_event(&hdr, &ev, sizeof(ev), KVMI_EVENT_PAUSE_VCPU);
+
+ reply_to_event(&hdr, &ev, KVMI_EVENT_ACTION_CONTINUE,
+ &rpl, sizeof(rpl));
+
+ stop_vcpu_worker(vcpu_thread, &data);
+}
+
static void test_pause(struct kvm_vm *vm)
{
__u8 no_wait = 0, wait = 1, wait_inval = 2;
__u8 padding = 1, no_padding = 0;
pause_vcpu();
+ discard_pause_event(vm);
cmd_vcpu_pause(wait, no_padding, 0);
+ discard_pause_event(vm);
cmd_vcpu_pause(wait_inval, no_padding, -KVM_EINVAL);
cmd_vcpu_pause(no_wait, padding, -KVM_EINVAL);
@@ -358,6 +358,7 @@ static void kvmi_job_release_vcpu(struct kvm_vcpu *vcpu, void *ctx)
struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
atomic_set(&vcpui->pause_requests, 0);
+ vcpui->waiting_for_reply = false;
}
static void kvmi_release_vcpus(struct kvm *kvm)
@@ -743,12 +744,33 @@ void kvmi_run_jobs(struct kvm_vcpu *vcpu)
}
}
+static void kvmi_handle_unsupported_event_action(struct kvm *kvm)
+{
+ kvmi_sock_shutdown(KVMI(kvm));
+}
+
+void kvmi_handle_common_event_actions(struct kvm *kvm, u32 action)
+{
+ switch (action) {
+ default:
+ kvmi_handle_unsupported_event_action(kvm);
+ }
+}
+
static void kvmi_vcpu_pause_event(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+ u32 action;
atomic_dec(&vcpui->pause_requests);
- /* to be implemented */
+
+ action = kvmi_msg_send_vcpu_pause(vcpu);
+ switch (action) {
+ case KVMI_EVENT_ACTION_CONTINUE:
+ break;
+ default:
+ kvmi_handle_common_event_actions(vcpu->kvm, action);
+ }
}
void kvmi_handle_requests(struct kvm_vcpu *vcpu)
@@ -27,6 +27,7 @@ void kvmi_sock_shutdown(struct kvm_introspection *kvmi);
void kvmi_sock_put(struct kvm_introspection *kvmi);
bool kvmi_msg_process(struct kvm_introspection *kvmi);
int kvmi_msg_send_unhook(struct kvm_introspection *kvmi);
+u32 kvmi_msg_send_vcpu_pause(struct kvm_vcpu *vcpu);
/* kvmi.c */
void *kvmi_msg_alloc(void);
@@ -37,6 +38,7 @@ bool kvmi_is_known_vm_event(u8 id);
int kvmi_add_job(struct kvm_vcpu *vcpu,
void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
void *ctx, void (*free_fct)(void *ctx));
+void kvmi_run_jobs(struct kvm_vcpu *vcpu);
int kvmi_cmd_vm_control_events(struct kvm_introspection *kvmi,
unsigned int event_id, bool enable);
int kvmi_cmd_read_physical(struct kvm *kvm, u64 gpa, size_t size,
@@ -51,5 +53,6 @@ int kvmi_cmd_vcpu_pause(struct kvm_vcpu *vcpu, bool wait);
/* arch */
int kvmi_arch_cmd_vcpu_get_info(struct kvm_vcpu *vcpu,
struct kvmi_vcpu_get_info_reply *rpl);
+void kvmi_arch_setup_event(struct kvm_vcpu *vcpu, struct kvmi_event *ev);
#endif
@@ -337,6 +337,66 @@ static int handle_vcpu_get_info(const struct kvmi_vcpu_msg_job *job,
return kvmi_msg_vcpu_reply(job, msg, 0, &rpl, sizeof(rpl));
}
+static int check_event_reply(const struct kvmi_msg_hdr *msg,
+ const struct kvmi_event_reply *reply,
+ const struct kvmi_vcpu_reply *expected,
+ u8 *action, size_t *received)
+{
+ size_t msg_size, common, event_size;
+ int err = -EINVAL;
+
+ if (unlikely(msg->seq != expected->seq))
+ return err;
+
+ msg_size = msg->size;
+ common = sizeof(struct kvmi_vcpu_hdr) + sizeof(*reply);
+
+ if (check_sub_overflow(msg_size, common, &event_size))
+ return err;
+
+ if (unlikely(event_size > expected->size))
+ return err;
+
+ if (unlikely(reply->padding1 || reply->padding2))
+ return err;
+
+ *received = event_size;
+ *action = reply->action;
+ return 0;
+}
+
+static int handle_vcpu_event_reply(const struct kvmi_vcpu_msg_job *job,
+ const struct kvmi_msg_hdr *msg,
+ const void *rpl)
+{
+ struct kvm_vcpu_introspection *vcpui = VCPUI(job->vcpu);
+ struct kvmi_vcpu_reply *expected = &vcpui->reply;
+ const struct kvmi_event_reply *reply = rpl;
+ const void *reply_data = reply + 1;
+ size_t useful, received;
+ u8 action;
+
+ expected->error = check_event_reply(msg, reply, expected, &action,
+ &received);
+ if (unlikely(expected->error))
+ goto out;
+
+ useful = min(received, expected->size);
+ if (useful)
+ memcpy(expected->data, reply_data, useful);
+
+ if (expected->size > useful)
+ memset((char *)expected->data + useful, 0,
+ expected->size - useful);
+
+ expected->action = action;
+ expected->error = 0;
+
+out:
+ vcpui->waiting_for_reply = false;
+ return expected->error;
+}
+
/*
* These functions are executed from the vCPU thread. The receiving thread
* passes the messages using a newly allocated 'struct kvmi_vcpu_msg_job'
@@ -345,6 +405,7 @@ static int handle_vcpu_get_info(const struct kvmi_vcpu_msg_job *job,
*/
static int(*const msg_vcpu[])(const struct kvmi_vcpu_msg_job *,
const struct kvmi_msg_hdr *, const void *) = {
+ [KVMI_EVENT] = handle_vcpu_event_reply,
[KVMI_VCPU_GET_INFO] = handle_vcpu_get_info,
};
@@ -430,7 +491,7 @@ static int kvmi_msg_do_vm_cmd(struct kvm_introspection *kvmi,
static bool is_message_allowed(struct kvm_introspection *kvmi, u16 id)
{
- return kvmi_is_command_allowed(kvmi, id);
+ return id == KVMI_EVENT || kvmi_is_command_allowed(kvmi, id);
}
static int kvmi_msg_vm_reply_ec(struct kvm_introspection *kvmi,
@@ -450,7 +511,8 @@ static int kvmi_msg_handle_vm_cmd(struct kvm_introspection *kvmi,
static bool vcpu_can_handle_messages(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.mp_state != KVM_MP_STATE_UNINITIALIZED;
+ return VCPUI(vcpu)->waiting_for_reply
+ || vcpu->arch.mp_state != KVM_MP_STATE_UNINITIALIZED;
}
static int kvmi_get_vcpu_if_ready(struct kvm_introspection *kvmi,
@@ -554,6 +616,13 @@ static void kvmi_setup_event_common(struct kvmi_event *ev, u32 ev_id,
ev->size = sizeof(*ev);
}
+static void kvmi_setup_event(struct kvm_vcpu *vcpu, struct kvmi_event *ev,
+ u32 ev_id)
+{
+ kvmi_setup_event_common(ev, ev_id, kvm_vcpu_get_idx(vcpu));
+ kvmi_arch_setup_event(vcpu, ev);
+}
+
int kvmi_msg_send_unhook(struct kvm_introspection *kvmi)
{
struct kvmi_msg_hdr hdr;
@@ -570,3 +639,85 @@ int kvmi_msg_send_unhook(struct kvm_introspection *kvmi)
return kvmi_sock_write(kvmi, vec, n, msg_size);
}
+
+static int kvmi_wait_for_reply(struct kvm_vcpu *vcpu)
+{
+ struct rcuwait *waitp = kvm_arch_vcpu_get_wait(vcpu);
+ struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+ int err = 0;
+
+ while (vcpui->waiting_for_reply && !err) {
+ kvmi_run_jobs(vcpu);
+
+ err = rcuwait_wait_event(waitp,
+ !vcpui->waiting_for_reply ||
+ !list_empty(&vcpui->job_list),
+ TASK_KILLABLE);
+ }
+
+ return err;
+}
+
+static void kvmi_setup_vcpu_reply(struct kvm_vcpu_introspection *vcpui,
+ u32 event_seq, void *rpl, size_t rpl_size)
+{
+ memset(&vcpui->reply, 0, sizeof(vcpui->reply));
+
+ vcpui->reply.seq = event_seq;
+ vcpui->reply.data = rpl;
+ vcpui->reply.size = rpl_size;
+ vcpui->reply.error = -EINTR;
+ vcpui->waiting_for_reply = true;
+}
+
+static int kvmi_send_event(struct kvm_vcpu *vcpu, u32 ev_id,
+ void *ev, size_t ev_size,
+ void *rpl, size_t rpl_size, int *action)
+{
+ struct kvmi_msg_hdr hdr;
+ struct kvmi_event common;
+ struct kvec vec[] = {
+ {.iov_base = &hdr, .iov_len = sizeof(hdr) },
+ {.iov_base = &common, .iov_len = sizeof(common)},
+ {.iov_base = ev, .iov_len = ev_size },
+ };
+ size_t msg_size = sizeof(hdr) + sizeof(common) + ev_size;
+ size_t n = ARRAY_SIZE(vec) - (ev_size == 0 ? 1 : 0);
+ struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+ struct kvm_introspection *kvmi = KVMI(vcpu->kvm);
+ int err;
+
+ kvmi_setup_event_msg_hdr(kvmi, &hdr, msg_size);
+ kvmi_setup_event(vcpu, &common, ev_id);
+ kvmi_setup_vcpu_reply(vcpui, hdr.seq, rpl, rpl_size);
+
+ err = kvmi_sock_write(kvmi, vec, n, msg_size);
+ if (err)
+ goto out;
+
+ err = kvmi_wait_for_reply(vcpu);
+ if (err)
+ goto out;
+
+ err = vcpui->reply.error;
+
+ if (!err)
+ *action = vcpui->reply.action;
+
+out:
+ if (err)
+ kvmi_sock_shutdown(kvmi);
+ return err;
+}
+
+u32 kvmi_msg_send_vcpu_pause(struct kvm_vcpu *vcpu)
+{
+ int err, action;
+
+ err = kvmi_send_event(vcpu, KVMI_EVENT_PAUSE_VCPU, NULL, 0,
+ NULL, 0, &action);
+ if (err)
+ return KVMI_EVENT_ACTION_CONTINUE;
+
+ return action;
+}
This event is sent by the vCPU thread as a response to the KVMI_VCPU_PAUSE command, but it has a lower priority, being sent after any other introspection event and when no other introspection command is queued. The number of KVMI_EVENT_PAUSE_VCPU will match the number of successful KVMI_VCPU_PAUSE commands. Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com> --- Documentation/virt/kvm/kvmi.rst | 22 ++- arch/x86/kvm/kvmi.c | 81 +++++++++ include/linux/kvmi_host.h | 11 ++ include/uapi/linux/kvmi.h | 13 ++ .../testing/selftests/kvm/x86_64/kvmi_test.c | 46 ++++++ virt/kvm/introspection/kvmi.c | 24 ++- virt/kvm/introspection/kvmi_int.h | 3 + virt/kvm/introspection/kvmi_msg.c | 155 +++++++++++++++++- 8 files changed, 351 insertions(+), 4 deletions(-)