diff mbox series

[RFC,14/15] workpending: Provide infrastructure for work before entering a guest

Message ID 20190919150809.860645841@linutronix.de (mailing list archive)
State New, archived
Headers show
Series entry: Provide generic implementation for host and guest entry/exit work | expand

Commit Message

Thomas Gleixner Sept. 19, 2019, 3:03 p.m. UTC
Entering a guest is similar to exiting to user space. Pending work like
handling signals, rescheduling, task work etc. needs to be handled before
that.

Provide generic infrastructure to avoid duplication of the same handling code
all over the place.

Update ARM64 struct kvm_vcpu_stat with a signal_exit member so the generic
code compiles.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm64/include/asm/kvm_host.h |    1 
 include/linux/entry-common.h      |   66 ++++++++++++++++++++++++++++++++++++++
 kernel/entry/common.c             |   44 +++++++++++++++++++++++++
 3 files changed, 111 insertions(+)

Comments

Paolo Bonzini Sept. 19, 2019, 3:40 p.m. UTC | #1
Quick API review before I dive into the implementation.

On 19/09/19 17:03, Thomas Gleixner wrote:
> +	/*
> +	 * Before returning to guest mode handle all pending work
> +	 */
> +	if (ti_work & _TIF_SIGPENDING) {
> +		vcpu->run->exit_reason = KVM_EXIT_INTR;
> +		vcpu->stat.signal_exits++;
> +		return -EINTR;
> +	}
> +
> +	if (ti_work & _TIF_NEED_RESCHED) {
> +		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> +		schedule();
> +		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> +	}
> +
> +	if (ti_work & _TIF_PATCH_PENDING) {
> +		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> +		klp_update_patch_state(current);
> +		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> +	}
> +
> +	if (ti_work & _TIF_NOTIFY_RESUME) {
> +		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> +		clear_thread_flag(TIF_NOTIFY_RESUME);
> +		tracehook_notify_resume(NULL);
> +		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> +	}
> +
> +	/* Any extra architecture specific work */
> +	return arch_exit_to_guestmode_work(kvm, vcpu, ti_work);
> +}

Perhaps, in virt/kvm/kvm_main.c:

int kvm_exit_to_guestmode_work(struct kvm *kvm, struct kvm_vcpu *vcpu,
				unsigned long ti_work)
{
	int r;

	/*
	 * Before returning to guest mode handle all pending work
	 */
	if (ti_work & _TIF_SIGPENDING) {
		vcpu->run->exit_reason = KVM_EXIT_INTR;
		vcpu->stat.signal_exits++;
		return -EINTR;
	}

	srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
	core_exit_to_guestmode_work(ti_work);
	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);

	return r;
}

and in kernel/entry/common.c:

int core_exit_to_guestmode_work(unsigned long ti_work)
{
	/*
	 * Before returning to guest mode handle all pending work
	 */
	if (ti_work & _TIF_NEED_RESCHED)
		schedule();

	if (ti_work & _TIF_PATCH_PENDING)
		klp_update_patch_state(current);

	if (ti_work & _TIF_NOTIFY_RESUME) {
		clear_thread_flag(TIF_NOTIFY_RESUME);
		tracehook_notify_resume(NULL);
	}
	return arch_exit_to_guestmode_work(ti_work);
}

so that kernel/entry/ is not polluted with KVM structs and APIs.

Perhaps even extract the body of core_exit_to_usermode_work's while loop
to a separate function, and call it as

	core_exit_to_usermode_work_once(NULL,
				ti_work & EXIT_TO_GUESTMODE_WORK);

from core_exit_to_guestmode_work.

In general I don't mind having these exit_to_guestmode functions in
kvm_host.h, and only having entry-common.h export EXIT_TO_GUESTMODE_WORK
and ARCH_EXIT_TO_GUESTMODE_WORK.  Unless you had good reasons to do the
opposite...

Paolo
Thomas Gleixner Sept. 20, 2019, 11:48 a.m. UTC | #2
On Thu, 19 Sep 2019, Paolo Bonzini wrote:
> > +	/* Any extra architecture specific work */
> > +	return arch_exit_to_guestmode_work(kvm, vcpu, ti_work);
> > +}
> 
> Perhaps, in virt/kvm/kvm_main.c:
> 
> int kvm_exit_to_guestmode_work(struct kvm *kvm, struct kvm_vcpu *vcpu,
> 				unsigned long ti_work)
> {
...
> }
> 
> and in kernel/entry/common.c:
> 
> int core_exit_to_guestmode_work(unsigned long ti_work)
> {
...
> }

Makes sense.
 
> so that kernel/entry/ is not polluted with KVM structs and APIs.
> 
> Perhaps even extract the body of core_exit_to_usermode_work's while loop
> to a separate function, and call it as
> 
> 	core_exit_to_usermode_work_once(NULL,
> 				ti_work & EXIT_TO_GUESTMODE_WORK);

Doh, its too obvious now that you mention it :)

> from core_exit_to_guestmode_work.
> 
> In general I don't mind having these exit_to_guestmode functions in
> kvm_host.h, and only having entry-common.h export EXIT_TO_GUESTMODE_WORK
> and ARCH_EXIT_TO_GUESTMODE_WORK.  Unless you had good reasons to do the
> opposite...

That was an arbitrary choice and it does not matter much where it lives.

Thanks,

	tglx
Andy Lutomirski Sept. 23, 2019, 6:17 p.m. UTC | #3
On Thu, Sep 19, 2019 at 8:09 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> Entering a guest is similar to exiting to user space. Pending work like
> handling signals, rescheduling, task work etc. needs to be handled before
> that.
>
> Provide generic infrastructure to avoid duplication of the same handling code
> all over the place.
>
> Update ARM64 struct kvm_vcpu_stat with a signal_exit member so the generic
> code compiles.
>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>  arch/arm64/include/asm/kvm_host.h |    1
>  include/linux/entry-common.h      |   66 ++++++++++++++++++++++++++++++++++++++
>  kernel/entry/common.c             |   44 +++++++++++++++++++++++++
>  3 files changed, 111 insertions(+)
>
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -409,6 +409,7 @@ struct kvm_vcpu_stat {
>         u64 wfi_exit_stat;
>         u64 mmio_exit_user;
>         u64 mmio_exit_kernel;
> +       u64 signal_exits;
>         u64 exits;
>  };
>
> --- a/include/linux/entry-common.h
> +++ b/include/linux/entry-common.h
> @@ -255,4 +255,70 @@ static inline void arch_syscall_exit_tra
>  /* Common syscall exit function */
>  void syscall_exit_to_usermode(struct pt_regs *regs, long syscall, long retval);
>
> +#if IS_ENABLED(CONFIG_KVM)
> +
> +#include <linux/kvm_host.h>
> +
> +#ifndef ARCH_EXIT_TO_GUESTMODE_WORK
> +# define ARCH_EXIT_TO_GUESTMODE_WORK   (0)
> +#endif
> +
> +#define EXIT_TO_GUESTMODE_WORK                                         \
> +       (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_RESUME |     \
> +        ARCH_EXIT_TO_GUESTMODE_WORK)
> +
> +int core_exit_to_guestmode_work(struct kvm *kvm, struct kvm_vcpu *vcpu,
> +                               unsigned long ti_work);
> +
> +/**
> + * arch_exit_to_guestmode - Architecture specific exit to guest mode function
> + * @kvm:       Pointer to the guest instance
> + * @vcpu:      Pointer to current's VCPU data
> + * @ti_work:   Cached TIF flags gathered in exit_to_guestmode()
> + *
> + * Invoked from core_exit_to_guestmode_work(). Can be replaced by
> + * architecture specific code.
> + */
> +static inline int arch_exit_to_guestmode(struct kvm *kvm, struct kvm_vcpu *vcpu,
> +                                        unsigned long ti_work);

Can you add a comment about whether IRQs are supposed to be off (I
assume they are) and perhaps a lockdep assertion to verify it?
Miroslav Benes Sept. 26, 2019, 11:35 a.m. UTC | #4
> --- a/include/linux/entry-common.h
> +++ b/include/linux/entry-common.h

[...]

> +#define EXIT_TO_GUESTMODE_WORK						\
> +	(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_RESUME |	\
> +	 ARCH_EXIT_TO_GUESTMODE_WORK)

[...]

> --- a/kernel/entry/common.c
> +++ b/kernel/entry/common.c
>
> +int core_exit_to_guestmode_work(struct kvm *kvm, struct kvm_vcpu *vcpu,
> +				unsigned long ti_work)
> +{
> +	/*
> +	 * Before returning to guest mode handle all pending work
> +	 */
> +	if (ti_work & _TIF_SIGPENDING) {
> +		vcpu->run->exit_reason = KVM_EXIT_INTR;
> +		vcpu->stat.signal_exits++;
> +		return -EINTR;
> +	}
> +
> +	if (ti_work & _TIF_NEED_RESCHED) {
> +		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> +		schedule();
> +		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> +	}
> +
> +	if (ti_work & _TIF_PATCH_PENDING) {
> +		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> +		klp_update_patch_state(current);
> +		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> +	}

If I am reading the code correctly, _TIF_PATCH_PENDING is not a part of 
EXIT_TO_GUESTMODE_WORK, so the handling code here would not be called on 
any arch as of now.

I also think that _TIF_PATCH_PENDING must not be handled here generally. 
It could break consistency guarantees when live patching KVM (and we do 
that from time to time).

Adding live-patching ML to CC.

Miroslav

> +	if (ti_work & _TIF_NOTIFY_RESUME) {
> +		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
> +		clear_thread_flag(TIF_NOTIFY_RESUME);
> +		tracehook_notify_resume(NULL);
> +		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
> +	}
> +
> +	/* Any extra architecture specific work */
> +	return arch_exit_to_guestmode_work(kvm, vcpu, ti_work);
> +}
diff mbox series

Patch

--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -409,6 +409,7 @@  struct kvm_vcpu_stat {
 	u64 wfi_exit_stat;
 	u64 mmio_exit_user;
 	u64 mmio_exit_kernel;
+	u64 signal_exits;
 	u64 exits;
 };
 
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -255,4 +255,70 @@  static inline void arch_syscall_exit_tra
 /* Common syscall exit function */
 void syscall_exit_to_usermode(struct pt_regs *regs, long syscall, long retval);
 
+#if IS_ENABLED(CONFIG_KVM)
+
+#include <linux/kvm_host.h>
+
+#ifndef ARCH_EXIT_TO_GUESTMODE_WORK
+# define ARCH_EXIT_TO_GUESTMODE_WORK	(0)
+#endif
+
+#define EXIT_TO_GUESTMODE_WORK						\
+	(_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_RESUME |	\
+	 ARCH_EXIT_TO_GUESTMODE_WORK)
+
+int core_exit_to_guestmode_work(struct kvm *kvm, struct kvm_vcpu *vcpu,
+				unsigned long ti_work);
+
+/**
+ * arch_exit_to_guestmode - Architecture specific exit to guest mode function
+ * @kvm:	Pointer to the guest instance
+ * @vcpu:	Pointer to current's VCPU data
+ * @ti_work:	Cached TIF flags gathered in exit_to_guestmode()
+ *
+ * Invoked from core_exit_to_guestmode_work(). Can be replaced by
+ * architecture specific code.
+ */
+static inline int arch_exit_to_guestmode(struct kvm *kvm, struct kvm_vcpu *vcpu,
+					 unsigned long ti_work);
+
+#ifndef arch_exit_to_guestmode
+static inline int arch_exit_to_guestmode(struct kvm *kvm, struct kvm_vcpu *vcpu,
+					 unsigned long ti_work)
+{
+	return 0;
+}
+#endif
+
+/**
+ * exit_to_guestmode - Check and handle pending work which needs to be
+ *		       handled before returning to guest mode
+ * @kvm:	Pointer to the guest instance
+ * @vcpu:	Pointer to current's VCPU data
+ *
+ * Returns: 0 or an error code
+ */
+static inline int exit_to_guestmode(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+	unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
+
+	if (unlikely(ti_work & EXIT_TO_GUESTMODE_WORK))
+		return core_exit_to_guestmode_work(kvm, vcpu, ti_work);
+	return 0;
+}
+
+
+/**
+ * _exit_to_guestmode_work_pending - Check if work is pending which needs to be
+ *				     handled before returning to guest mode
+ */
+static inline bool exit_to_guestmode_work_pending(void)
+{
+	unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
+
+	return !!(ti_work & EXIT_TO_GUESTMODE_WORK);
+
+}
+#endif /* CONFIG_KVM */
+
 #endif
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -174,3 +174,47 @@  void syscall_exit_to_usermode(struct pt_
 	do_exit_to_usermode(regs);
 #endif
 }
+
+#if IS_ENABLED(CONFIG_KVM)
+int __weak arch_exit_to_guestmode_work(struct kvm *kvm, struct kvm_vcpu *vcpu,
+				       unsigned long ti_work)
+{
+	return 0;
+}
+
+int core_exit_to_guestmode_work(struct kvm *kvm, struct kvm_vcpu *vcpu,
+				unsigned long ti_work)
+{
+	/*
+	 * Before returning to guest mode handle all pending work
+	 */
+	if (ti_work & _TIF_SIGPENDING) {
+		vcpu->run->exit_reason = KVM_EXIT_INTR;
+		vcpu->stat.signal_exits++;
+		return -EINTR;
+	}
+
+	if (ti_work & _TIF_NEED_RESCHED) {
+		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+		schedule();
+		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+	}
+
+	if (ti_work & _TIF_PATCH_PENDING) {
+		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+		klp_update_patch_state(current);
+		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+	}
+
+	if (ti_work & _TIF_NOTIFY_RESUME) {
+		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(NULL);
+		vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+	}
+
+	/* Any extra architecture specific work */
+	return arch_exit_to_guestmode_work(kvm, vcpu, ti_work);
+}
+EXPORT_SYMBOL_GPL(core_exit_to_guestmode_work);
+#endif