diff mbox series

[RFC,v6,49/92] kvm: introspection: add KVMI_PAUSE_VCPU and KVMI_EVENT_PAUSE_VCPU

Message ID 20190809160047.8319-50-alazar@bitdefender.com (mailing list archive)
State New, archived
Headers show
Series VM introspection | expand

Commit Message

Adalbert Lazăr Aug. 9, 2019, 4 p.m. UTC
This is the only vCPU command handled by the receiving worker.
It increments a pause request counter and kicks the vCPU.

This event is send by the vCPU thread, but has a low priority. It
will be sent after any other vCPU introspection event and when no vCPU
introspection command is queued.

Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com>
---
 Documentation/virtual/kvm/kvmi.rst | 68 ++++++++++++++++++++++++++++++
 include/uapi/linux/kvm_para.h      |  1 +
 include/uapi/linux/kvmi.h          |  7 +++
 virt/kvm/kvmi.c                    | 65 ++++++++++++++++++++++++++++
 virt/kvm/kvmi_int.h                |  4 ++
 virt/kvm/kvmi_msg.c                | 61 +++++++++++++++++++++++++++
 6 files changed, 206 insertions(+)
diff mbox series

Patch

diff --git a/Documentation/virtual/kvm/kvmi.rst b/Documentation/virtual/kvm/kvmi.rst
index eef32107837a..558d3eb6007f 100644
--- a/Documentation/virtual/kvm/kvmi.rst
+++ b/Documentation/virtual/kvm/kvmi.rst
@@ -820,6 +820,48 @@  one page (offset + size <= PAGE_SIZE).
 
 * -KVM_EINVAL - the specified gpa is invalid
 
+16. KVMI_PAUSE_VCPU
+-------------------
+
+:Architecture: all
+:Versions: >= 1
+:Parameters:
+
+	struct kvmi_vcpu_hdr;
+	struct kvmi_pause_vcpu {
+		__u8 wait;
+		__u8 padding1;
+		__u16 padding2;
+		__u32 padding3;
+	};
+
+:Returns:
+
+::
+
+	struct kvmi_error_code;
+
+Kicks the vCPU from guest.
+
+If `wait` is 1, the command will wait for vCPU to acknowledge the IPI.
+
+The vCPU will handle the pending commands/events and send the
+*KVMI_EVENT_PAUSE_VCPU* event (one for every successful *KVMI_PAUSE_VCPU*
+command) before returning to guest.
+
+Please note that new vCPUs might by created at any time.
+The introspection tool should use *KVMI_CONTROL_VM_EVENTS* to enable the
+*KVMI_EVENT_CREATE_VCPU* event in order to stop these new vCPUs as well
+(by delaying the event reply).
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY  - the selected vCPU has too many queued *KVMI_EVENT_PAUSE_VCPU* events
+* -KVM_EPERM  - the *KVMI_EVENT_PAUSE_VCPU* event is disallowed (see *KVMI_CONTROL_EVENTS*)
+		and the introspection tool expects a reply.
 Events
 ======
 
@@ -992,3 +1034,29 @@  The *RETRY* action is used by the introspector to retry the execution of
 the current instruction. Either using single-step (if ``singlestep`` is
 not zero) or return to guest (if the introspector changed the instruction
 pointer or the page restrictions).
+
+4. KVMI_EVENT_PAUSE_VCPU
+------------------------
+
+:Architectures: all
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+	struct kvmi_event;
+
+:Returns:
+
+::
+
+	struct kvmi_vcpu_hdr;
+	struct kvmi_event_reply;
+
+This event is sent in response to a *KVMI_PAUSE_VCPU* command and
+cannot be disabled via *KVMI_CONTROL_EVENTS*.
+
+This event has a low priority. It will be sent after any other vCPU
+introspection event and when no vCPU introspection command is queued.
+
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 54c0e20f5b64..07e3f2662b36 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -18,6 +18,7 @@ 
 #define KVM_EPERM		EPERM
 #define KVM_EOPNOTSUPP		95
 #define KVM_EAGAIN		11
+#define KVM_EBUSY		EBUSY
 #define KVM_ENOMEM		ENOMEM
 
 #define KVM_HC_VAPIC_POLL_IRQ		1
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index be3f066f314e..ca9c6b6aeed5 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -177,6 +177,13 @@  struct kvmi_get_vcpu_info_reply {
 	__u64 tsc_speed;
 };
 
+struct kvmi_pause_vcpu {
+	__u8 wait;
+	__u8 padding1;
+	__u16 padding2;
+	__u32 padding3;
+};
+
 struct kvmi_control_events {
 	__u16 event_id;
 	__u8 enable;
diff --git a/virt/kvm/kvmi.c b/virt/kvm/kvmi.c
index a84eb150e116..85de2da3eb7b 100644
--- a/virt/kvm/kvmi.c
+++ b/virt/kvm/kvmi.c
@@ -11,6 +11,8 @@ 
 #include <linux/kthread.h>
 #include <linux/bitmap.h>
 
+#define MAX_PAUSE_REQUESTS 1001
+
 static struct kmem_cache *msg_cache;
 static struct kmem_cache *radix_cache;
 static struct kmem_cache *job_cache;
@@ -1090,6 +1092,39 @@  static bool kvmi_create_vcpu_event(struct kvm_vcpu *vcpu)
 	return ret;
 }
 
+static bool __kvmi_pause_vcpu_event(struct kvm_vcpu *vcpu)
+{
+	u32 action;
+	bool ret = false;
+
+	action = kvmi_msg_send_pause_vcpu(vcpu);
+	switch (action) {
+	case KVMI_EVENT_ACTION_CONTINUE:
+		ret = true;
+		break;
+	default:
+		kvmi_handle_common_event_actions(vcpu, action, "PAUSE");
+	}
+
+	return ret;
+}
+
+static bool kvmi_pause_vcpu_event(struct kvm_vcpu *vcpu)
+{
+	struct kvmi *ikvm;
+	bool ret = true;
+
+	ikvm = kvmi_get(vcpu->kvm);
+	if (!ikvm)
+		return true;
+
+	ret = __kvmi_pause_vcpu_event(vcpu);
+
+	kvmi_put(vcpu->kvm);
+
+	return ret;
+}
+
 void kvmi_run_jobs(struct kvm_vcpu *vcpu)
 {
 	struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
@@ -1154,6 +1189,7 @@  int kvmi_run_jobs_and_wait(struct kvm_vcpu *vcpu)
 
 void kvmi_handle_requests(struct kvm_vcpu *vcpu)
 {
+	struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
 	struct kvmi *ikvm;
 
 	ikvm = kvmi_get(vcpu->kvm);
@@ -1165,6 +1201,12 @@  void kvmi_handle_requests(struct kvm_vcpu *vcpu)
 
 		if (err)
 			break;
+
+		if (!atomic_read(&ivcpu->pause_requests))
+			break;
+
+		atomic_dec(&ivcpu->pause_requests);
+		kvmi_pause_vcpu_event(vcpu);
 	}
 
 	kvmi_put(vcpu->kvm);
@@ -1351,10 +1393,33 @@  int kvmi_cmd_control_vm_events(struct kvmi *ikvm, unsigned int event_id,
 	return 0;
 }
 
+int kvmi_cmd_pause_vcpu(struct kvm_vcpu *vcpu, bool wait)
+{
+	struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
+	unsigned int req = KVM_REQ_INTROSPECTION;
+
+	if (atomic_read(&ivcpu->pause_requests) > MAX_PAUSE_REQUESTS)
+		return -KVM_EBUSY;
+
+	atomic_inc(&ivcpu->pause_requests);
+	kvm_make_request(req, vcpu);
+	if (wait)
+		kvm_vcpu_kick_and_wait(vcpu);
+	else
+		kvm_vcpu_kick(vcpu);
+
+	return 0;
+}
+
 static void kvmi_job_abort(struct kvm_vcpu *vcpu, void *ctx)
 {
 	struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
 
+	/*
+	 * The thread that might increment this atomic is stopped
+	 * and this thread is the only one that could decrement it.
+	 */
+	atomic_set(&ivcpu->pause_requests, 0);
 	ivcpu->reply_waiting = false;
 }
 
diff --git a/virt/kvm/kvmi_int.h b/virt/kvm/kvmi_int.h
index 7bdff70d4309..cb3b0ce87bc1 100644
--- a/virt/kvm/kvmi_int.h
+++ b/virt/kvm/kvmi_int.h
@@ -100,6 +100,8 @@  struct kvmi_vcpu {
 	bool rep_complete;
 	bool effective_rep_complete;
 
+	atomic_t pause_requests;
+
 	bool reply_waiting;
 	struct kvmi_vcpu_reply reply;
 
@@ -164,6 +166,7 @@  u32 kvmi_msg_send_pf(struct kvm_vcpu *vcpu, u64 gpa, u64 gva, u8 access,
 		     bool *singlestep, bool *rep_complete,
 		     u64 *ctx_addr, u8 *ctx, u32 *ctx_size);
 u32 kvmi_msg_send_create_vcpu(struct kvm_vcpu *vcpu);
+u32 kvmi_msg_send_pause_vcpu(struct kvm_vcpu *vcpu);
 int kvmi_msg_send_unhook(struct kvmi *ikvm);
 
 /* kvmi.c */
@@ -185,6 +188,7 @@  int kvmi_cmd_control_events(struct kvm_vcpu *vcpu, unsigned int event_id,
 			    bool enable);
 int kvmi_cmd_control_vm_events(struct kvmi *ikvm, unsigned int event_id,
 			       bool enable);
+int kvmi_cmd_pause_vcpu(struct kvm_vcpu *vcpu, bool wait);
 int kvmi_run_jobs_and_wait(struct kvm_vcpu *vcpu);
 int kvmi_add_job(struct kvm_vcpu *vcpu,
 		 void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
diff --git a/virt/kvm/kvmi_msg.c b/virt/kvm/kvmi_msg.c
index 9c20a9cfda42..a4446eed354d 100644
--- a/virt/kvm/kvmi_msg.c
+++ b/virt/kvm/kvmi_msg.c
@@ -34,6 +34,7 @@  static const char *const msg_IDs[] = {
 	[KVMI_GET_PAGE_WRITE_BITMAP] = "KVMI_GET_PAGE_WRITE_BITMAP",
 	[KVMI_GET_VCPU_INFO]         = "KVMI_GET_VCPU_INFO",
 	[KVMI_GET_VERSION]           = "KVMI_GET_VERSION",
+	[KVMI_PAUSE_VCPU]            = "KVMI_PAUSE_VCPU",
 	[KVMI_READ_PHYSICAL]         = "KVMI_READ_PHYSICAL",
 	[KVMI_SET_PAGE_ACCESS]       = "KVMI_SET_PAGE_ACCESS",
 	[KVMI_SET_PAGE_WRITE_BITMAP] = "KVMI_SET_PAGE_WRITE_BITMAP",
@@ -457,6 +458,53 @@  static bool invalid_vcpu_hdr(const struct kvmi_vcpu_hdr *hdr)
 	return hdr->padding1 || hdr->padding2;
 }
 
+/*
+ * We handle this vCPU command on the receiving thread to make it easier
+ * for userspace to implement a 'pause VM' command. Usually, this is done
+ * by sending one 'pause vCPU' command for every vCPU. By handling the
+ * command here, the userspace can:
+ *    - optimize, by not requesting a reply for the first N-1 vCPU's
+ *    - consider the VM stopped once it receives the reply
+ *      for the last 'pause vCPU' command
+ */
+static int handle_pause_vcpu(struct kvmi *ikvm,
+			     const struct kvmi_msg_hdr *msg,
+			     const void *_req)
+{
+	const struct kvmi_pause_vcpu *req = _req;
+	const struct kvmi_vcpu_hdr *cmd;
+	struct kvm_vcpu *vcpu = NULL;
+	int err;
+
+	if (req->padding1 || req->padding2 || req->padding3) {
+		err = -KVM_EINVAL;
+		goto reply;
+	}
+
+	cmd = (const struct kvmi_vcpu_hdr *) (msg + 1);
+
+	if (invalid_vcpu_hdr(cmd)) {
+		err = -KVM_EINVAL;
+		goto reply;
+	}
+
+	if (!is_event_allowed(ikvm, KVMI_EVENT_PAUSE_VCPU)) {
+		err = -KVM_EPERM;
+
+		if (ikvm->cmd_reply_disabled)
+			return kvmi_msg_vm_reply(ikvm, msg, err, NULL, 0);
+
+		goto reply;
+	}
+
+	err = kvmi_get_vcpu(ikvm, cmd->vcpu, &vcpu);
+	if (!err)
+		err = kvmi_cmd_pause_vcpu(vcpu, req->wait == 1);
+
+reply:
+	return kvmi_msg_vm_maybe_reply(ikvm, msg, err, NULL, 0);
+}
+
 /*
  * These commands are executed on the receiving thread/worker.
  */
@@ -471,6 +519,7 @@  static int(*const msg_vm[])(struct kvmi *, const struct kvmi_msg_hdr *,
 	[KVMI_GET_PAGE_ACCESS]       = handle_get_page_access,
 	[KVMI_GET_PAGE_WRITE_BITMAP] = handle_get_page_write_bitmap,
 	[KVMI_GET_VERSION]           = handle_get_version,
+	[KVMI_PAUSE_VCPU]            = handle_pause_vcpu,
 	[KVMI_READ_PHYSICAL]         = handle_read_physical,
 	[KVMI_SET_PAGE_ACCESS]       = handle_set_page_access,
 	[KVMI_SET_PAGE_WRITE_BITMAP] = handle_set_page_write_bitmap,
@@ -966,3 +1015,15 @@  u32 kvmi_msg_send_create_vcpu(struct kvm_vcpu *vcpu)
 
 	return action;
 }
+
+u32 kvmi_msg_send_pause_vcpu(struct kvm_vcpu *vcpu)
+{
+	int err, action;
+
+	err = kvmi_send_event(vcpu, KVMI_EVENT_PAUSE_VCPU, NULL, 0,
+			      NULL, 0, &action);
+	if (err)
+		return KVMI_EVENT_ACTION_CONTINUE;
+
+	return action;
+}