diff mbox series

[2/7] KVM: x86: Implement Hyper-V's vCPU suspended state

Message ID 20241004140810.34231-3-nikwip@amazon.de (mailing list archive)
State New
Headers show
Series KVM: x86: Introduce new ioctl KVM_HYPERV_SET_TLB_FLUSH_INHIBIT | expand

Commit Message

Nikolas Wipper Oct. 4, 2024, 2:08 p.m. UTC
Introduce a suspension state for Hyper-V enlightened vCPUs. Microsoft's
"Hypervisor Top Level Functional Specification" (TLFS) introduces this
state as a "vCPU that is stopped on a instruction guest boundary, either
explicitly or implicitly due to an intercept". The only documented
explicit suspension is caused in response to a TLB Flush hypercall, which
will use the state switching API in subsequent patches.

Each Hyper-V vCPU now has a 'suspended' boolean field, which is checked
before entering the guest. When set, it forces the vCPU to block. Once in
that state, the vCPU ignores any events. The vCPU is unsuspended by
clearing 'suspend' and issuing a request to force the vCPU out of sleep.

Suspensions are issued as a mechanism to halt execution until state change
is observed on a remote vCPU. Hyper-V vCPUs track this with 'waiting_on',
which holds the 'vcpu_id' of the remote vCPU that forced the vCPU to enter
the suspended state. It's the remote vCPU's responsibility to wake up the
suspended vCPUs when ready. 'waiting_on' ensures the remote vCPU can
selectively unsuspend vCPUs that blocked on its behalf while leaving other
suspended vCPUs undisturbed. One vCPU can only be suspended due to a
single remote vCPU, but different vCPUs can be suspended on behalf of
different or the same remote vCPU(s). The guest is responsible for
avoiding circular dependencies between suspended vCPUs.

Callers of the suspend API are responsible for ensuring that suspend and
unsuspend aren't called in parallel while targeting the same pair of
vCPUs. Otherwise kvm_hv_vcpu_{un}suspend_tlb_flush() ensure 'waiting_on'
and 'suspended' are updated and accessed in the correct order. This, for
example, avoids races where the unsuspended vCPU re-suspends before
kvm_hv_vcpu_unsuspend_tlb_flush() is done updating 'waiting_on'.

Signed-off-by: Nikolas Wipper <nikwip@amazon.de>
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/hyperv.c           | 30 ++++++++++++++++++++++++++++++
 arch/x86/kvm/hyperv.h           | 17 +++++++++++++++++
 arch/x86/kvm/x86.c              |  4 +++-
 4 files changed, 53 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 46e0a466d7fb..7571ac578884 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -695,6 +695,9 @@  struct kvm_vcpu_hv {
 		u64 vm_id;
 		u32 vp_id;
 	} nested;
+
+	bool suspended;
+	int waiting_on;
 };
 
 struct kvm_hypervisor_cpuid {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4f0a94346d00..6e7941ed25ae 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -971,6 +971,7 @@  int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.hyperv = hv_vcpu;
 	hv_vcpu->vcpu = vcpu;
+	hv_vcpu->waiting_on = -1;
 
 	synic_init(&hv_vcpu->synic);
 
@@ -2915,3 +2916,32 @@  int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 
 	return 0;
 }
+
+void kvm_hv_vcpu_suspend_tlb_flush(struct kvm_vcpu *vcpu, int vcpu_id)
+{
+	/* waiting_on's store should happen before suspended's */
+	WRITE_ONCE(vcpu->arch.hyperv->waiting_on, vcpu_id);
+	WRITE_ONCE(vcpu->arch.hyperv->suspended, true);
+}
+
+void kvm_hv_vcpu_unsuspend_tlb_flush(struct kvm_vcpu *vcpu)
+{
+	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
+	struct kvm_vcpu_hv *vcpu_hv;
+	struct kvm_vcpu *v;
+	unsigned long i;
+
+	kvm_for_each_vcpu(i, v, vcpu->kvm) {
+		vcpu_hv = to_hv_vcpu(v);
+
+		if (kvm_hv_vcpu_suspended(v) &&
+		    READ_ONCE(vcpu_hv->waiting_on) == vcpu->vcpu_id) {
+			/* waiting_on's store should happen before suspended's */
+			WRITE_ONCE(v->arch.hyperv->waiting_on, -1);
+			WRITE_ONCE(v->arch.hyperv->suspended, false);
+			__set_bit(i, vcpu_mask);
+		}
+	}
+
+	kvm_make_vcpus_request_mask(vcpu->kvm, KVM_REQ_EVENT, vcpu_mask);
+}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 913bfc96959c..a55832cea221 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -265,6 +265,15 @@  static inline void kvm_hv_nested_transtion_tlb_flush(struct kvm_vcpu *vcpu,
 }
 
 int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
+
+static inline bool kvm_hv_vcpu_suspended(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hyperv_enabled &&
+	       READ_ONCE(vcpu->arch.hyperv->suspended);
+}
+
+void kvm_hv_vcpu_suspend_tlb_flush(struct kvm_vcpu *vcpu, int vcpu_id);
+void kvm_hv_vcpu_unsuspend_tlb_flush(struct kvm_vcpu *vcpu);
 #else /* CONFIG_KVM_HYPERV */
 static inline void kvm_hv_setup_tsc_page(struct kvm *kvm,
 					 struct pvclock_vcpu_time_info *hv_clock) {}
@@ -321,6 +330,14 @@  static inline u32 kvm_hv_get_vpindex(struct kvm_vcpu *vcpu)
 	return vcpu->vcpu_idx;
 }
 static inline void kvm_hv_nested_transtion_tlb_flush(struct kvm_vcpu *vcpu, bool tdp_enabled) {}
+
+static inline bool kvm_hv_vcpu_suspended(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
+static inline void kvm_hv_vcpu_suspend_tlb_flush(struct kvm_vcpu *vcpu, int vcpu_id) {}
+static inline void kvm_hv_vcpu_unsuspend_tlb_flush(struct kvm_vcpu *vcpu) {}
 #endif /* CONFIG_KVM_HYPERV */
 
 #endif /* __ARCH_X86_KVM_HYPERV_H__ */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 15080385b8fe..18d0a300e79a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -135,6 +135,8 @@  static void store_regs(struct kvm_vcpu *vcpu);
 static int sync_regs(struct kvm_vcpu *vcpu);
 static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu);
 
+static inline bool kvm_hv_vcpu_suspended(struct kvm_vcpu *vcpu);
+
 static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
 static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
 
@@ -11107,7 +11109,7 @@  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 static bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
 {
 	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
-		!vcpu->arch.apf.halted);
+		!vcpu->arch.apf.halted && !kvm_hv_vcpu_suspended(vcpu));
 }
 
 static bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)