@@ -820,6 +820,48 @@ one page (offset + size <= PAGE_SIZE).
* -KVM_EINVAL - the specified gpa is invalid
+16. KVMI_PAUSE_VCPU
+-------------------
+
+:Architecture: all
+:Versions: >= 1
+:Parameters:
+
+ struct kvmi_vcpu_hdr;
+ struct kvmi_pause_vcpu {
+ __u8 wait;
+ __u8 padding1;
+ __u16 padding2;
+ __u32 padding3;
+ };
+
+:Returns:
+
+::
+
+ struct kvmi_error_code;
+
+Kicks the vCPU from guest.
+
+If `wait` is 1, the command will wait for vCPU to acknowledge the IPI.
+
+The vCPU will handle the pending commands/events and send the
+*KVMI_EVENT_PAUSE_VCPU* event (one for every successful *KVMI_PAUSE_VCPU*
+command) before returning to guest.
+
+Please note that new vCPUs might by created at any time.
+The introspection tool should use *KVMI_CONTROL_VM_EVENTS* to enable the
+*KVMI_EVENT_CREATE_VCPU* event in order to stop these new vCPUs as well
+(by delaying the event reply).
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY - the selected vCPU has too many queued *KVMI_EVENT_PAUSE_VCPU* events
+* -KVM_EPERM - the *KVMI_EVENT_PAUSE_VCPU* event is disallowed (see *KVMI_CONTROL_EVENTS*)
+ and the introspection tool expects a reply.
Events
======
@@ -992,3 +1034,29 @@ The *RETRY* action is used by the introspector to retry the execution of
the current instruction. Either using single-step (if ``singlestep`` is
not zero) or return to guest (if the introspector changed the instruction
pointer or the page restrictions).
+
+4. KVMI_EVENT_PAUSE_VCPU
+------------------------
+
+:Architectures: all
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+ struct kvmi_event;
+
+:Returns:
+
+::
+
+ struct kvmi_vcpu_hdr;
+ struct kvmi_event_reply;
+
+This event is sent in response to a *KVMI_PAUSE_VCPU* command and
+cannot be disabled via *KVMI_CONTROL_EVENTS*.
+
+This event has a low priority. It will be sent after any other vCPU
+introspection event and when no vCPU introspection command is queued.
+
@@ -18,6 +18,7 @@
#define KVM_EPERM EPERM
#define KVM_EOPNOTSUPP 95
#define KVM_EAGAIN 11
+#define KVM_EBUSY EBUSY
#define KVM_ENOMEM ENOMEM
#define KVM_HC_VAPIC_POLL_IRQ 1
@@ -177,6 +177,13 @@ struct kvmi_get_vcpu_info_reply {
__u64 tsc_speed;
};
+struct kvmi_pause_vcpu {
+ __u8 wait;
+ __u8 padding1;
+ __u16 padding2;
+ __u32 padding3;
+};
+
struct kvmi_control_events {
__u16 event_id;
__u8 enable;
@@ -11,6 +11,8 @@
#include <linux/kthread.h>
#include <linux/bitmap.h>
+#define MAX_PAUSE_REQUESTS 1001
+
static struct kmem_cache *msg_cache;
static struct kmem_cache *radix_cache;
static struct kmem_cache *job_cache;
@@ -1090,6 +1092,39 @@ static bool kvmi_create_vcpu_event(struct kvm_vcpu *vcpu)
return ret;
}
+static bool __kvmi_pause_vcpu_event(struct kvm_vcpu *vcpu)
+{
+ u32 action;
+ bool ret = false;
+
+ action = kvmi_msg_send_pause_vcpu(vcpu);
+ switch (action) {
+ case KVMI_EVENT_ACTION_CONTINUE:
+ ret = true;
+ break;
+ default:
+ kvmi_handle_common_event_actions(vcpu, action, "PAUSE");
+ }
+
+ return ret;
+}
+
+static bool kvmi_pause_vcpu_event(struct kvm_vcpu *vcpu)
+{
+ struct kvmi *ikvm;
+ bool ret = true;
+
+ ikvm = kvmi_get(vcpu->kvm);
+ if (!ikvm)
+ return true;
+
+ ret = __kvmi_pause_vcpu_event(vcpu);
+
+ kvmi_put(vcpu->kvm);
+
+ return ret;
+}
+
void kvmi_run_jobs(struct kvm_vcpu *vcpu)
{
struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
@@ -1154,6 +1189,7 @@ int kvmi_run_jobs_and_wait(struct kvm_vcpu *vcpu)
void kvmi_handle_requests(struct kvm_vcpu *vcpu)
{
+ struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
struct kvmi *ikvm;
ikvm = kvmi_get(vcpu->kvm);
@@ -1165,6 +1201,12 @@ void kvmi_handle_requests(struct kvm_vcpu *vcpu)
if (err)
break;
+
+ if (!atomic_read(&ivcpu->pause_requests))
+ break;
+
+ atomic_dec(&ivcpu->pause_requests);
+ kvmi_pause_vcpu_event(vcpu);
}
kvmi_put(vcpu->kvm);
@@ -1351,10 +1393,33 @@ int kvmi_cmd_control_vm_events(struct kvmi *ikvm, unsigned int event_id,
return 0;
}
+int kvmi_cmd_pause_vcpu(struct kvm_vcpu *vcpu, bool wait)
+{
+ struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
+ unsigned int req = KVM_REQ_INTROSPECTION;
+
+ if (atomic_read(&ivcpu->pause_requests) > MAX_PAUSE_REQUESTS)
+ return -KVM_EBUSY;
+
+ atomic_inc(&ivcpu->pause_requests);
+ kvm_make_request(req, vcpu);
+ if (wait)
+ kvm_vcpu_kick_and_wait(vcpu);
+ else
+ kvm_vcpu_kick(vcpu);
+
+ return 0;
+}
+
static void kvmi_job_abort(struct kvm_vcpu *vcpu, void *ctx)
{
struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
+ /*
+ * The thread that might increment this atomic is stopped
+ * and this thread is the only one that could decrement it.
+ */
+ atomic_set(&ivcpu->pause_requests, 0);
ivcpu->reply_waiting = false;
}
@@ -100,6 +100,8 @@ struct kvmi_vcpu {
bool rep_complete;
bool effective_rep_complete;
+ atomic_t pause_requests;
+
bool reply_waiting;
struct kvmi_vcpu_reply reply;
@@ -164,6 +166,7 @@ u32 kvmi_msg_send_pf(struct kvm_vcpu *vcpu, u64 gpa, u64 gva, u8 access,
bool *singlestep, bool *rep_complete,
u64 *ctx_addr, u8 *ctx, u32 *ctx_size);
u32 kvmi_msg_send_create_vcpu(struct kvm_vcpu *vcpu);
+u32 kvmi_msg_send_pause_vcpu(struct kvm_vcpu *vcpu);
int kvmi_msg_send_unhook(struct kvmi *ikvm);
/* kvmi.c */
@@ -185,6 +188,7 @@ int kvmi_cmd_control_events(struct kvm_vcpu *vcpu, unsigned int event_id,
bool enable);
int kvmi_cmd_control_vm_events(struct kvmi *ikvm, unsigned int event_id,
bool enable);
+int kvmi_cmd_pause_vcpu(struct kvm_vcpu *vcpu, bool wait);
int kvmi_run_jobs_and_wait(struct kvm_vcpu *vcpu);
int kvmi_add_job(struct kvm_vcpu *vcpu,
void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
@@ -34,6 +34,7 @@ static const char *const msg_IDs[] = {
[KVMI_GET_PAGE_WRITE_BITMAP] = "KVMI_GET_PAGE_WRITE_BITMAP",
[KVMI_GET_VCPU_INFO] = "KVMI_GET_VCPU_INFO",
[KVMI_GET_VERSION] = "KVMI_GET_VERSION",
+ [KVMI_PAUSE_VCPU] = "KVMI_PAUSE_VCPU",
[KVMI_READ_PHYSICAL] = "KVMI_READ_PHYSICAL",
[KVMI_SET_PAGE_ACCESS] = "KVMI_SET_PAGE_ACCESS",
[KVMI_SET_PAGE_WRITE_BITMAP] = "KVMI_SET_PAGE_WRITE_BITMAP",
@@ -457,6 +458,53 @@ static bool invalid_vcpu_hdr(const struct kvmi_vcpu_hdr *hdr)
return hdr->padding1 || hdr->padding2;
}
+/*
+ * We handle this vCPU command on the receiving thread to make it easier
+ * for userspace to implement a 'pause VM' command. Usually, this is done
+ * by sending one 'pause vCPU' command for every vCPU. By handling the
+ * command here, the userspace can:
+ * - optimize, by not requesting a reply for the first N-1 vCPU's
+ * - consider the VM stopped once it receives the reply
+ * for the last 'pause vCPU' command
+ */
+static int handle_pause_vcpu(struct kvmi *ikvm,
+ const struct kvmi_msg_hdr *msg,
+ const void *_req)
+{
+ const struct kvmi_pause_vcpu *req = _req;
+ const struct kvmi_vcpu_hdr *cmd;
+ struct kvm_vcpu *vcpu = NULL;
+ int err;
+
+ if (req->padding1 || req->padding2 || req->padding3) {
+ err = -KVM_EINVAL;
+ goto reply;
+ }
+
+ cmd = (const struct kvmi_vcpu_hdr *) (msg + 1);
+
+ if (invalid_vcpu_hdr(cmd)) {
+ err = -KVM_EINVAL;
+ goto reply;
+ }
+
+ if (!is_event_allowed(ikvm, KVMI_EVENT_PAUSE_VCPU)) {
+ err = -KVM_EPERM;
+
+ if (ikvm->cmd_reply_disabled)
+ return kvmi_msg_vm_reply(ikvm, msg, err, NULL, 0);
+
+ goto reply;
+ }
+
+ err = kvmi_get_vcpu(ikvm, cmd->vcpu, &vcpu);
+ if (!err)
+ err = kvmi_cmd_pause_vcpu(vcpu, req->wait == 1);
+
+reply:
+ return kvmi_msg_vm_maybe_reply(ikvm, msg, err, NULL, 0);
+}
+
/*
* These commands are executed on the receiving thread/worker.
*/
@@ -471,6 +519,7 @@ static int(*const msg_vm[])(struct kvmi *, const struct kvmi_msg_hdr *,
[KVMI_GET_PAGE_ACCESS] = handle_get_page_access,
[KVMI_GET_PAGE_WRITE_BITMAP] = handle_get_page_write_bitmap,
[KVMI_GET_VERSION] = handle_get_version,
+ [KVMI_PAUSE_VCPU] = handle_pause_vcpu,
[KVMI_READ_PHYSICAL] = handle_read_physical,
[KVMI_SET_PAGE_ACCESS] = handle_set_page_access,
[KVMI_SET_PAGE_WRITE_BITMAP] = handle_set_page_write_bitmap,
@@ -966,3 +1015,15 @@ u32 kvmi_msg_send_create_vcpu(struct kvm_vcpu *vcpu)
return action;
}
+
+u32 kvmi_msg_send_pause_vcpu(struct kvm_vcpu *vcpu)
+{
+ int err, action;
+
+ err = kvmi_send_event(vcpu, KVMI_EVENT_PAUSE_VCPU, NULL, 0,
+ NULL, 0, &action);
+ if (err)
+ return KVMI_EVENT_ACTION_CONTINUE;
+
+ return action;
+}
This is the only vCPU command handled by the receiving worker. It increments a pause request counter and kicks the vCPU. This event is send by the vCPU thread, but has a low priority. It will be sent after any other vCPU introspection event and when no vCPU introspection command is queued. Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com> --- Documentation/virtual/kvm/kvmi.rst | 68 ++++++++++++++++++++++++++++++ include/uapi/linux/kvm_para.h | 1 + include/uapi/linux/kvmi.h | 7 +++ virt/kvm/kvmi.c | 65 ++++++++++++++++++++++++++++ virt/kvm/kvmi_int.h | 4 ++ virt/kvm/kvmi_msg.c | 61 +++++++++++++++++++++++++++ 6 files changed, 206 insertions(+)