diff mbox series

[1/2] KVM: Boost vCPUs that are delivering interrupts

Message ID 1563457031-21189-2-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show
Series Boost vCPUs that are ready to deliver interrupts | expand

Commit Message

Paolo Bonzini July 18, 2019, 1:37 p.m. UTC
From: Wanpeng Li <wanpengli@tencent.com>

Inspired by commit 9cac38dd5d (KVM/s390: Set preempted flag during
vcpu wakeup and interrupt delivery), we want to also boost not just
lock holders but also vCPUs that are delivering interrupts. Most
smp_call_function_many calls are synchronous, so the IPI target vCPUs
are also good yield candidates.  This patch introduces vcpu->ready to
boost vCPUs during wakeup and interrupt delivery time; unlike s390 we do
not reuse vcpu->preempted so that voluntarily preempted vCPUs are taken
into account by kvm_vcpu_on_spin, but vmx_vcpu_pi_put is not affected
(VT-d PI handles voluntary preemption separately, in pi_pre_block).

Testing on 80 HT 2 socket Xeon Skylake server, with 80 vCPUs VM 80GB RAM:
ebizzy -M

            vanilla     boosting    improved
1VM          21443       23520         9%
2VM           2800        8000       180%
3VM           1800        3100        72%

Testing on my Haswell desktop 8 HT, with 8 vCPUs VM 8GB RAM, two VMs,
one running ebizzy -M, the other running 'stress --cpu 2':

w/ boosting + w/o pv sched yield(vanilla)

            vanilla     boosting   improved
              1570         4000      155%

w/ boosting + w/ pv sched yield(vanilla)

            vanilla     boosting   improved
              1844         5157      179%

w/o boosting, perf top in VM:

 72.33%  [kernel]       [k] smp_call_function_many
  4.22%  [kernel]       [k] call_function_i
  3.71%  [kernel]       [k] async_page_fault

w/ boosting, perf top in VM:

 38.43%  [kernel]       [k] smp_call_function_many
  6.31%  [kernel]       [k] async_page_fault
  6.13%  libc-2.23.so   [.] __memcpy_avx_unaligned
  4.88%  [kernel]       [k] call_function_interrupt

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Marc Zyngier <maz@kernel.org>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	v2->v3: put it in kvm_vcpu_wake_up, use WRITE_ONCE

 arch/s390/kvm/interrupt.c | 2 +-
 include/linux/kvm_host.h  | 1 +
 virt/kvm/kvm_main.c       | 9 +++++++--
 3 files changed, 9 insertions(+), 3 deletions(-)

Comments

Christian Borntraeger July 18, 2019, 1:45 p.m. UTC | #1
On 18.07.19 15:37, Paolo Bonzini wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> Inspired by commit 9cac38dd5d (KVM/s390: Set preempted flag during
> vcpu wakeup and interrupt delivery), we want to also boost not just
> lock holders but also vCPUs that are delivering interrupts. Most
> smp_call_function_many calls are synchronous, so the IPI target vCPUs
> are also good yield candidates.  This patch introduces vcpu->ready to
> boost vCPUs during wakeup and interrupt delivery time; unlike s390 we do
> not reuse vcpu->preempted so that voluntarily preempted vCPUs are taken
> into account by kvm_vcpu_on_spin, but vmx_vcpu_pi_put is not affected
> (VT-d PI handles voluntary preemption separately, in pi_pre_block).
> 
> Testing on 80 HT 2 socket Xeon Skylake server, with 80 vCPUs VM 80GB RAM:
> ebizzy -M
> 
>             vanilla     boosting    improved
> 1VM          21443       23520         9%
> 2VM           2800        8000       180%
> 3VM           1800        3100        72%
> 
> Testing on my Haswell desktop 8 HT, with 8 vCPUs VM 8GB RAM, two VMs,
> one running ebizzy -M, the other running 'stress --cpu 2':
> 
> w/ boosting + w/o pv sched yield(vanilla)
> 
>             vanilla     boosting   improved
>               1570         4000      155%
> 
> w/ boosting + w/ pv sched yield(vanilla)
> 
>             vanilla     boosting   improved
>               1844         5157      179%
> 
> w/o boosting, perf top in VM:
> 
>  72.33%  [kernel]       [k] smp_call_function_many
>   4.22%  [kernel]       [k] call_function_i
>   3.71%  [kernel]       [k] async_page_fault
> 
> w/ boosting, perf top in VM:
> 
>  38.43%  [kernel]       [k] smp_call_function_many
>   6.31%  [kernel]       [k] async_page_fault
>   6.13%  libc-2.23.so   [.] __memcpy_avx_unaligned
>   4.88%  [kernel]       [k] call_function_interrupt
> 
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
> Cc: Paul Mackerras <paulus@ozlabs.org>
> Cc: Marc Zyngier <maz@kernel.org>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> 	v2->v3: put it in kvm_vcpu_wake_up, use WRITE_ONCE


Looks good. Some more comments

> 
>  arch/s390/kvm/interrupt.c | 2 +-
>  include/linux/kvm_host.h  | 1 +
>  virt/kvm/kvm_main.c       | 9 +++++++--
[...]

> @@ -4205,6 +4206,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
>  
>  	if (vcpu->preempted)
>  		vcpu->preempted = false;
> +	if (vcpu->ready)
> +		WRITE_ONCE(vcpu->ready, false);

What is the rationale of checking before writing. Avoiding writable cache line ping pong?


>  
>  	kvm_arch_sched_in(vcpu, cpu);
>  
> @@ -4216,8 +4219,10 @@ static void kvm_sched_out(struct preempt_notifier *pn,
>  {
>  	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
>  
> -	if (current->state == TASK_RUNNING)
> +	if (current->state == TASK_RUNNING) {
>  		vcpu->preempted = true;

WOuld it make sense to also use WRITE_ONCE for vcpu->preempted ?

> +		WRITE_ONCE(vcpu->ready, true);
> +	}
>  	kvm_arch_vcpu_put(vcpu);
>  }
>  
>
Paolo Bonzini July 18, 2019, 1:50 p.m. UTC | #2
On 18/07/19 15:45, Christian Borntraeger wrote:
> 
> 
> On 18.07.19 15:37, Paolo Bonzini wrote:
>> From: Wanpeng Li <wanpengli@tencent.com>
>>
>> Inspired by commit 9cac38dd5d (KVM/s390: Set preempted flag during
>> vcpu wakeup and interrupt delivery), we want to also boost not just
>> lock holders but also vCPUs that are delivering interrupts. Most
>> smp_call_function_many calls are synchronous, so the IPI target vCPUs
>> are also good yield candidates.  This patch introduces vcpu->ready to
>> boost vCPUs during wakeup and interrupt delivery time; unlike s390 we do
>> not reuse vcpu->preempted so that voluntarily preempted vCPUs are taken
>> into account by kvm_vcpu_on_spin, but vmx_vcpu_pi_put is not affected
>> (VT-d PI handles voluntary preemption separately, in pi_pre_block).
>>
>> Testing on 80 HT 2 socket Xeon Skylake server, with 80 vCPUs VM 80GB RAM:
>> ebizzy -M
>>
>>             vanilla     boosting    improved
>> 1VM          21443       23520         9%
>> 2VM           2800        8000       180%
>> 3VM           1800        3100        72%
>>
>> Testing on my Haswell desktop 8 HT, with 8 vCPUs VM 8GB RAM, two VMs,
>> one running ebizzy -M, the other running 'stress --cpu 2':
>>
>> w/ boosting + w/o pv sched yield(vanilla)
>>
>>             vanilla     boosting   improved
>>               1570         4000      155%
>>
>> w/ boosting + w/ pv sched yield(vanilla)
>>
>>             vanilla     boosting   improved
>>               1844         5157      179%
>>
>> w/o boosting, perf top in VM:
>>
>>  72.33%  [kernel]       [k] smp_call_function_many
>>   4.22%  [kernel]       [k] call_function_i
>>   3.71%  [kernel]       [k] async_page_fault
>>
>> w/ boosting, perf top in VM:
>>
>>  38.43%  [kernel]       [k] smp_call_function_many
>>   6.31%  [kernel]       [k] async_page_fault
>>   6.13%  libc-2.23.so   [.] __memcpy_avx_unaligned
>>   4.88%  [kernel]       [k] call_function_interrupt
>>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: Radim Krčmář <rkrcmar@redhat.com>
>> Cc: Christian Borntraeger <borntraeger@de.ibm.com>
>> Cc: Paul Mackerras <paulus@ozlabs.org>
>> Cc: Marc Zyngier <maz@kernel.org>
>> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>> 	v2->v3: put it in kvm_vcpu_wake_up, use WRITE_ONCE
> 
> 
> Looks good. Some more comments
> 
>>
>>  arch/s390/kvm/interrupt.c | 2 +-
>>  include/linux/kvm_host.h  | 1 +
>>  virt/kvm/kvm_main.c       | 9 +++++++--
> [...]
> 
>> @@ -4205,6 +4206,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
>>  
>>  	if (vcpu->preempted)
>>  		vcpu->preempted = false;
>> +	if (vcpu->ready)
>> +		WRITE_ONCE(vcpu->ready, false);
> 
> What is the rationale of checking before writing. Avoiding writable cache line ping pong?

I think it can be removed.  The only case where you'd have ping pong is
when vcpu->ready is true due to kvm_vcpu_wake_up, so it's not saving
anything.

>>  	kvm_arch_sched_in(vcpu, cpu);
>>  
>> @@ -4216,8 +4219,10 @@ static void kvm_sched_out(struct preempt_notifier *pn,
>>  {
>>  	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
>>  
>> -	if (current->state == TASK_RUNNING)
>> +	if (current->state == TASK_RUNNING) {
>>  		vcpu->preempted = true;
> 
> WOuld it make sense to also use WRITE_ONCE for vcpu->preempted ?

vcpu->preempted is not read/written anymore by other threads after this
patch.
> 
>> +		WRITE_ONCE(vcpu->ready, true);
>> +	}
>>  	kvm_arch_vcpu_put(vcpu);
>>  }
>>  
>>
>
diff mbox series

Patch

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9dde4d7d8704..26f8bf4a22a7 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1240,7 +1240,7 @@  void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
 		 * The vcpu gave up the cpu voluntarily, mark it as a good
 		 * yield-candidate.
 		 */
-		vcpu->preempted = true;
+		WRITE_ONCE(vcpu->ready, true);
 		swake_up_one(&vcpu->wq);
 		vcpu->stat.halt_wakeup++;
 	}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c5da875f19e3..5c5b5867024c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -318,6 +318,7 @@  struct kvm_vcpu {
 	} spin_loop;
 #endif
 	bool preempted;
+	bool ready;
 	struct kvm_vcpu_arch arch;
 	struct dentry *debugfs_dentry;
 };
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b4ab59dd6846..65665e13ab9a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2387,6 +2387,7 @@  bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 	wqp = kvm_arch_vcpu_wq(vcpu);
 	if (swq_has_sleeper(wqp)) {
 		swake_up_one(wqp);
+		WRITE_ONCE(vcpu->ready, true);
 		++vcpu->stat.halt_wakeup;
 		return true;
 	}
@@ -2500,7 +2501,7 @@  void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 				continue;
 			} else if (pass && i > last_boosted_vcpu)
 				break;
-			if (!READ_ONCE(vcpu->preempted))
+			if (!READ_ONCE(vcpu->ready))
 				continue;
 			if (vcpu == me)
 				continue;
@@ -4205,6 +4206,8 @@  static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 
 	if (vcpu->preempted)
 		vcpu->preempted = false;
+	if (vcpu->ready)
+		WRITE_ONCE(vcpu->ready, false);
 
 	kvm_arch_sched_in(vcpu, cpu);
 
@@ -4216,8 +4219,10 @@  static void kvm_sched_out(struct preempt_notifier *pn,
 {
 	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
 
-	if (current->state == TASK_RUNNING)
+	if (current->state == TASK_RUNNING) {
 		vcpu->preempted = true;
+		WRITE_ONCE(vcpu->ready, true);
+	}
 	kvm_arch_vcpu_put(vcpu);
 }