diff mbox series

[RFC,2/2] Boost vCPUs based on IPI-sender and receiver information

Message ID 20210421150831.60133-3-kentaishiguro@sslab.ics.keio.ac.jp (mailing list archive)
State New, archived
Headers show
Series Mitigating Excessive Pause-Loop Exiting in VM-Agnostic KVM | expand

Commit Message

Kenta Ishiguro April 21, 2021, 3:08 p.m. UTC
This commit monitors IPI communication between vCPUs and leverages the
relationship between vCPUs to select boost candidates.

Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Kenta Ishiguro <kentaishiguro@sslab.ics.keio.ac.jp>
---
 arch/x86/kvm/lapic.c     | 14 ++++++++++++++
 arch/x86/kvm/vmx/vmx.c   |  2 ++
 include/linux/kvm_host.h |  5 +++++
 virt/kvm/kvm_main.c      | 26 ++++++++++++++++++++++++--
 4 files changed, 45 insertions(+), 2 deletions(-)

Comments

Sean Christopherson April 21, 2021, 3:43 p.m. UTC | #1
On Thu, Apr 22, 2021, Kenta Ishiguro wrote:
> This commit monitors IPI communication between vCPUs and leverages the
> relationship between vCPUs to select boost candidates.
> 
> Cc: David Hildenbrand <david@redhat.com>
> Signed-off-by: Kenta Ishiguro <kentaishiguro@sslab.ics.keio.ac.jp>
> ---
>  arch/x86/kvm/lapic.c     | 14 ++++++++++++++
>  arch/x86/kvm/vmx/vmx.c   |  2 ++
>  include/linux/kvm_host.h |  5 +++++
>  virt/kvm/kvm_main.c      | 26 ++++++++++++++++++++++++--
>  4 files changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index cc369b9ad8f1..c8d967ddecf9 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1269,6 +1269,18 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
>  }
>  EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
>  
> +static void mark_ipi_receiver(struct kvm_lapic *apic, struct kvm_lapic_irq *irq)
> +{
> +	struct kvm_vcpu *dest_vcpu;
> +	u64 prev_ipi_received;
> +
> +	dest_vcpu = kvm_get_vcpu_by_id(apic->vcpu->kvm, irq->dest_id);
> +	if (READ_ONCE(dest_vcpu->sched_outed)) {
> +		prev_ipi_received = READ_ONCE(dest_vcpu->ipi_received);
> +		WRITE_ONCE(dest_vcpu->ipi_received, prev_ipi_received | (1 << apic->vcpu->vcpu_id));

The lack of "ull" on '1' actually means this will probably go sideways at >32 vCPUs.
Regardless, there needs to be an actual fallback path when the number of vCPUs
exceeds what the IPI-boost can support.  That said, it should be quite easy to
allocate a bitmap to support an arbitrary number of vCPUs.

> +	}
> +}
> +
>  void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
>  {
>  	struct kvm_lapic_irq irq;
> @@ -1287,6 +1299,8 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
>  
>  	trace_kvm_apic_ipi(icr_low, irq.dest_id);
>  
> +	mark_ipi_receiver(apic, &irq);
> +
>  	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
>  }
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 29b40e092d13..ced50935a38b 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6718,6 +6718,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  		vmcs_write32(PLE_WINDOW, vmx->ple_window);
>  	}
>  
> +	WRITE_ONCE(vcpu->ipi_received, 0);
> +
>  	/*
>  	 * We did this in prepare_switch_to_guest, because it needs to
>  	 * be within srcu_read_lock.
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 1b65e7204344..6726aeec03e7 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -320,6 +320,11 @@ struct kvm_vcpu {
>  #endif
>  	bool preempted;
>  	bool ready;
> +	bool sched_outed;
> +	/*
> +	 * The current implementation of strict boost supports up to 64 vCPUs
> +	 */
> +	u64 ipi_received;

...

>  	struct kvm_vcpu_arch arch;
>  	struct kvm_dirty_ring dirty_ring;
>  };
Sean Christopherson April 21, 2021, 4:16 p.m. UTC | #2
On Thu, Apr 22, 2021, Kenta Ishiguro wrote:
> This commit monitors IPI communication between vCPUs and leverages the
> relationship between vCPUs to select boost candidates.
> 
> Cc: David Hildenbrand <david@redhat.com>
> Signed-off-by: Kenta Ishiguro <kentaishiguro@sslab.ics.keio.ac.jp>
> ---
>  arch/x86/kvm/lapic.c     | 14 ++++++++++++++
>  arch/x86/kvm/vmx/vmx.c   |  2 ++
>  include/linux/kvm_host.h |  5 +++++
>  virt/kvm/kvm_main.c      | 26 ++++++++++++++++++++++++--
>  4 files changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index cc369b9ad8f1..c8d967ddecf9 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1269,6 +1269,18 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
>  }
>  EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
>  
> +static void mark_ipi_receiver(struct kvm_lapic *apic, struct kvm_lapic_irq *irq)
> +{
> +	struct kvm_vcpu *dest_vcpu;
> +	u64 prev_ipi_received;
> +
> +	dest_vcpu = kvm_get_vcpu_by_id(apic->vcpu->kvm, irq->dest_id);
> +	if (READ_ONCE(dest_vcpu->sched_outed)) {

dest_vcpu needs to be checked for NULL.

> +		prev_ipi_received = READ_ONCE(dest_vcpu->ipi_received);
> +		WRITE_ONCE(dest_vcpu->ipi_received, prev_ipi_received | (1 << apic->vcpu->vcpu_id));
> +	}
> +}
> +
>  void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
>  {
>  	struct kvm_lapic_irq irq;
> @@ -1287,6 +1299,8 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
>  
>  	trace_kvm_apic_ipi(icr_low, irq.dest_id);
>  
> +	mark_ipi_receiver(apic, &irq);
> +
>  	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
>  }
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 29b40e092d13..ced50935a38b 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6718,6 +6718,8 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  		vmcs_write32(PLE_WINDOW, vmx->ple_window);
>  	}
>  
> +	WRITE_ONCE(vcpu->ipi_received, 0);

Given that ipi_received is cleared when the vCPU is run, is there actually an
observable advantage to tracking which vCPU sent the IPI?  I.e. how do the
numbers look if ipi_received is a simple bool, and kvm_vcpu_on_spin() yields to
any vCPU that has an IPI pending?

>  	/*
>  	 * We did this in prepare_switch_to_guest, because it needs to
>  	 * be within srcu_read_lock.

...

> @@ -4873,6 +4894,7 @@ static void kvm_sched_out(struct preempt_notifier *pn,
>  		WRITE_ONCE(vcpu->preempted, true);
>  		WRITE_ONCE(vcpu->ready, true);
>  	}
> +	WRITE_ONCE(vcpu->sched_outed, true);

s/sched_outed/scheduled_out to be more grammatically correct.

It might also make sense to introduce the flag in a separate path.  Or even
better, can the existing "preempted" and "ready" be massaged so that we don't
have three flags that are tracking the same basic thing, with slightly different
semantics?

>  	kvm_arch_vcpu_put(vcpu);
>  	__this_cpu_write(kvm_running_vcpu, NULL);
>  }
> -- 
> 2.30.2
>
diff mbox series

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cc369b9ad8f1..c8d967ddecf9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1269,6 +1269,18 @@  void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
 }
 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
 
+static void mark_ipi_receiver(struct kvm_lapic *apic, struct kvm_lapic_irq *irq)
+{
+	struct kvm_vcpu *dest_vcpu;
+	u64 prev_ipi_received;
+
+	dest_vcpu = kvm_get_vcpu_by_id(apic->vcpu->kvm, irq->dest_id);
+	if (READ_ONCE(dest_vcpu->sched_outed)) {
+		prev_ipi_received = READ_ONCE(dest_vcpu->ipi_received);
+		WRITE_ONCE(dest_vcpu->ipi_received, prev_ipi_received | (1 << apic->vcpu->vcpu_id));
+	}
+}
+
 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
 {
 	struct kvm_lapic_irq irq;
@@ -1287,6 +1299,8 @@  void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
 
 	trace_kvm_apic_ipi(icr_low, irq.dest_id);
 
+	mark_ipi_receiver(apic, &irq);
+
 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 29b40e092d13..ced50935a38b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6718,6 +6718,8 @@  static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmcs_write32(PLE_WINDOW, vmx->ple_window);
 	}
 
+	WRITE_ONCE(vcpu->ipi_received, 0);
+
 	/*
 	 * We did this in prepare_switch_to_guest, because it needs to
 	 * be within srcu_read_lock.
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1b65e7204344..6726aeec03e7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -320,6 +320,11 @@  struct kvm_vcpu {
 #endif
 	bool preempted;
 	bool ready;
+	bool sched_outed;
+	/*
+	 * The current implementation of strict boost supports up to 64 vCPUs
+	 */
+	u64 ipi_received;
 	struct kvm_vcpu_arch arch;
 	struct kvm_dirty_ring dirty_ring;
 };
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 383df23514b9..08e629957e7e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -413,6 +413,10 @@  static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	kvm_vcpu_set_dy_eligible(vcpu, false);
 	vcpu->preempted = false;
 	vcpu->ready = false;
+
+	vcpu->sched_outed = false;
+	vcpu->ipi_received = 0;
+
 	preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
 }
 
@@ -3011,6 +3015,7 @@  void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 	int try = 3;
 	int pass;
 	int i;
+	u64 prev_ipi_received;
 
 	kvm_vcpu_set_in_spin_loop(me, true);
 	/*
@@ -3031,12 +3036,25 @@  void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 				continue;
 			if (vcpu == me)
 				continue;
+			prev_ipi_received = READ_ONCE(vcpu->ipi_received);
+			if (!READ_ONCE(vcpu->preempted) &&
+			    !(prev_ipi_received & (1 << me->vcpu_id))) {
+				WRITE_ONCE(vcpu->ipi_received,
+					   prev_ipi_received | (1 << me->vcpu_id));
+				continue;
+			}
 			if (rcuwait_active(&vcpu->wait) &&
 			    !vcpu_dy_runnable(vcpu))
 				continue;
 			if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
-				!kvm_arch_vcpu_in_kernel(vcpu))
-				continue;
+				!kvm_arch_vcpu_in_kernel(vcpu)) {
+				prev_ipi_received = READ_ONCE(vcpu->ipi_received);
+				if (!(prev_ipi_received & (1 << me->vcpu_id))) {
+					WRITE_ONCE(vcpu->ipi_received,
+						   prev_ipi_received | (1 << me->vcpu_id));
+					continue;
+				}
+			}
 			if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
 				continue;
 
@@ -4859,6 +4877,9 @@  static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 	WRITE_ONCE(vcpu->preempted, false);
 	WRITE_ONCE(vcpu->ready, false);
 
+	WRITE_ONCE(vcpu->sched_outed, false);
+	WRITE_ONCE(vcpu->ipi_received, 0);
+
 	__this_cpu_write(kvm_running_vcpu, vcpu);
 	kvm_arch_sched_in(vcpu, cpu);
 	kvm_arch_vcpu_load(vcpu, cpu);
@@ -4873,6 +4894,7 @@  static void kvm_sched_out(struct preempt_notifier *pn,
 		WRITE_ONCE(vcpu->preempted, true);
 		WRITE_ONCE(vcpu->ready, true);
 	}
+	WRITE_ONCE(vcpu->sched_outed, true);
 	kvm_arch_vcpu_put(vcpu);
 	__this_cpu_write(kvm_running_vcpu, NULL);
 }