diff mbox series

KVM: X86: Count success and invalid yields

Message ID 1617697935-4158-1-git-send-email-wanpengli@tencent.com (mailing list archive)
State New, archived
Headers show
Series KVM: X86: Count success and invalid yields | expand

Commit Message

Wanpeng Li April 6, 2021, 8:32 a.m. UTC
From: Wanpeng Li <wanpengli@tencent.com>

To analyze some performance issues with lock contention and scheduling,
it is nice to know when directed yield are successful or failing.

Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 26 ++++++++++++++++++++------
 2 files changed, 22 insertions(+), 6 deletions(-)

Comments

Sean Christopherson April 8, 2021, 5:08 p.m. UTC | #1
On Tue, Apr 06, 2021, Wanpeng Li wrote:
> From: Wanpeng Li <wanpengli@tencent.com>
> 
> To analyze some performance issues with lock contention and scheduling,
> it is nice to know when directed yield are successful or failing.
> 
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/x86.c              | 26 ++++++++++++++++++++------
>  2 files changed, 22 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 44f8930..157bcaa 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1126,6 +1126,8 @@ struct kvm_vcpu_stat {
>  	u64 halt_poll_success_ns;
>  	u64 halt_poll_fail_ns;
>  	u64 nested_run;
> +	u64 yield_directed;
> +	u64 yield_directed_ignore;
>  };
>  
>  struct x86_instruction_info;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 16fb395..3b475cd 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -246,6 +246,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
>  	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
>  	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
>  	VCPU_STAT("nested_run", nested_run),
> +	VCPU_STAT("yield_directed", yield_directed),

This is ambiguous, it's not clear without looking at the code if it's counting
attempts or actual yields.

> +	VCPU_STAT("yield_directed_ignore", yield_directed_ignore),

"ignored" also feels a bit misleading, as that implies KVM deliberately ignored
a valid request, whereas many of the failure paths are due to invalid requests
or errors of some kind.

What about mirroring the halt poll stats, i.e. track "attempted" and "successful",
as opposed to "attempted" and "ignored/failed".    And maybe switched directed
and yield?  I.e. directed_yield_attempted and directed_yield_successful.

Alternatively, would it make sense to do s/directed/pv, or is that not worth the
potential risk of being wrong if a non-paravirt use case comes along?

	pv_yield_attempted
	pv_yield_successful

>  	VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
>  	VM_STAT("mmu_pte_write", mmu_pte_write),
>  	VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
> @@ -8211,21 +8213,33 @@ void kvm_apicv_init(struct kvm *kvm, bool enable)
>  }
>  EXPORT_SYMBOL_GPL(kvm_apicv_init);
>  
> -static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
> +static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
>  {
>  	struct kvm_vcpu *target = NULL;
>  	struct kvm_apic_map *map;
>  
> +	vcpu->stat.yield_directed++;
> +
>  	rcu_read_lock();
> -	map = rcu_dereference(kvm->arch.apic_map);
> +	map = rcu_dereference(vcpu->kvm->arch.apic_map);
>  
>  	if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
>  		target = map->phys_map[dest_id]->vcpu;
>  
>  	rcu_read_unlock();
> +	if (!target)
> +		goto no_yield;
> +
> +	if (!READ_ONCE(target->ready))

I vote to keep these checks together.  That'll also make the addition of the
"don't yield to self" check match the order of ready vs. self in kvm_vcpu_on_spin().

	if (!target || !READ_ONCE(target->ready))

> +		goto no_yield;
>  
> -	if (target && READ_ONCE(target->ready))
> -		kvm_vcpu_yield_to(target);
> +	if (kvm_vcpu_yield_to(target) <= 0)
> +		goto no_yield;
> +	return;
> +
> +no_yield:
> +	vcpu->stat.yield_directed_ignore++;
> +	return;
>  }
>  
>  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
> @@ -8272,7 +8286,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>  			break;
>  
>  		kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
> -		kvm_sched_yield(vcpu->kvm, a1);
> +		kvm_sched_yield(vcpu, a1);
>  		ret = 0;
>  		break;
>  #ifdef CONFIG_X86_64
> @@ -8290,7 +8304,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
>  		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
>  			break;
>  
> -		kvm_sched_yield(vcpu->kvm, a0);
> +		kvm_sched_yield(vcpu, a0);
>  		ret = 0;
>  		break;
>  	default:
> -- 
> 2.7.4
>
Wanpeng Li April 9, 2021, 3:05 a.m. UTC | #2
On Fri, 9 Apr 2021 at 01:08, Sean Christopherson <seanjc@google.com> wrote:
>
> On Tue, Apr 06, 2021, Wanpeng Li wrote:
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > To analyze some performance issues with lock contention and scheduling,
> > it is nice to know when directed yield are successful or failing.
> >
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  2 ++
> >  arch/x86/kvm/x86.c              | 26 ++++++++++++++++++++------
> >  2 files changed, 22 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 44f8930..157bcaa 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1126,6 +1126,8 @@ struct kvm_vcpu_stat {
> >       u64 halt_poll_success_ns;
> >       u64 halt_poll_fail_ns;
> >       u64 nested_run;
> > +     u64 yield_directed;
> > +     u64 yield_directed_ignore;
> >  };
> >
> >  struct x86_instruction_info;
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 16fb395..3b475cd 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -246,6 +246,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
> >       VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
> >       VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
> >       VCPU_STAT("nested_run", nested_run),
> > +     VCPU_STAT("yield_directed", yield_directed),
>
> This is ambiguous, it's not clear without looking at the code if it's counting
> attempts or actual yields.
>
> > +     VCPU_STAT("yield_directed_ignore", yield_directed_ignore),
>
> "ignored" also feels a bit misleading, as that implies KVM deliberately ignored
> a valid request, whereas many of the failure paths are due to invalid requests
> or errors of some kind.
>
> What about mirroring the halt poll stats, i.e. track "attempted" and "successful",
> as opposed to "attempted" and "ignored/failed".    And maybe switched directed
> and yield?  I.e. directed_yield_attempted and directed_yield_successful.

Good suggestion.

>
> Alternatively, would it make sense to do s/directed/pv, or is that not worth the
> potential risk of being wrong if a non-paravirt use case comes along?
>
>         pv_yield_attempted
>         pv_yield_successful
>
> >       VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
> >       VM_STAT("mmu_pte_write", mmu_pte_write),
> >       VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
> > @@ -8211,21 +8213,33 @@ void kvm_apicv_init(struct kvm *kvm, bool enable)
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_apicv_init);
> >
> > -static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
> > +static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
> >  {
> >       struct kvm_vcpu *target = NULL;
> >       struct kvm_apic_map *map;
> >
> > +     vcpu->stat.yield_directed++;
> > +
> >       rcu_read_lock();
> > -     map = rcu_dereference(kvm->arch.apic_map);
> > +     map = rcu_dereference(vcpu->kvm->arch.apic_map);
> >
> >       if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
> >               target = map->phys_map[dest_id]->vcpu;
> >
> >       rcu_read_unlock();
> > +     if (!target)
> > +             goto no_yield;
> > +
> > +     if (!READ_ONCE(target->ready))
>
> I vote to keep these checks together.  That'll also make the addition of the
> "don't yield to self" check match the order of ready vs. self in kvm_vcpu_on_spin().

Do it in v2.

    Wanpeng
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 44f8930..157bcaa 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1126,6 +1126,8 @@  struct kvm_vcpu_stat {
 	u64 halt_poll_success_ns;
 	u64 halt_poll_fail_ns;
 	u64 nested_run;
+	u64 yield_directed;
+	u64 yield_directed_ignore;
 };
 
 struct x86_instruction_info;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 16fb395..3b475cd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,6 +246,8 @@  struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
 	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
 	VCPU_STAT("nested_run", nested_run),
+	VCPU_STAT("yield_directed", yield_directed),
+	VCPU_STAT("yield_directed_ignore", yield_directed_ignore),
 	VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
 	VM_STAT("mmu_pte_write", mmu_pte_write),
 	VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
@@ -8211,21 +8213,33 @@  void kvm_apicv_init(struct kvm *kvm, bool enable)
 }
 EXPORT_SYMBOL_GPL(kvm_apicv_init);
 
-static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
+static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
 {
 	struct kvm_vcpu *target = NULL;
 	struct kvm_apic_map *map;
 
+	vcpu->stat.yield_directed++;
+
 	rcu_read_lock();
-	map = rcu_dereference(kvm->arch.apic_map);
+	map = rcu_dereference(vcpu->kvm->arch.apic_map);
 
 	if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
 		target = map->phys_map[dest_id]->vcpu;
 
 	rcu_read_unlock();
+	if (!target)
+		goto no_yield;
+
+	if (!READ_ONCE(target->ready))
+		goto no_yield;
 
-	if (target && READ_ONCE(target->ready))
-		kvm_vcpu_yield_to(target);
+	if (kvm_vcpu_yield_to(target) <= 0)
+		goto no_yield;
+	return;
+
+no_yield:
+	vcpu->stat.yield_directed_ignore++;
+	return;
 }
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -8272,7 +8286,7 @@  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 			break;
 
 		kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
-		kvm_sched_yield(vcpu->kvm, a1);
+		kvm_sched_yield(vcpu, a1);
 		ret = 0;
 		break;
 #ifdef CONFIG_X86_64
@@ -8290,7 +8304,7 @@  int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 		if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
 			break;
 
-		kvm_sched_yield(vcpu->kvm, a0);
+		kvm_sched_yield(vcpu, a0);
 		ret = 0;
 		break;
 	default: