diff mbox series

[v6] arch/powerpc/kvm: Add support for reading VPA counters for pseries guests

Message ID 20240506145605.73794-1-gautam@linux.ibm.com (mailing list archive)
State New
Headers show
Series [v6] arch/powerpc/kvm: Add support for reading VPA counters for pseries guests | expand

Commit Message

Gautam Menghani May 6, 2024, 2:56 p.m. UTC
PAPR hypervisor has introduced three new counters in the VPA area of
LPAR CPUs for KVM L2 guest (see [1] for terminology) observability - 2
for context switches from host to guest and vice versa, and 1 counter
for getting the total time spent inside the KVM guest. Add a tracepoint
that enables reading the counters for use by ftrace/perf. Note that this
tracepoint is only available for nestedv2 API (i.e, KVM on PowerVM).

[1] Terminology:
a. L1 refers to the VM (LPAR) booted on top of PAPR hypervisor
b. L2 refers to the KVM guest booted on top of L1.

Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
---
v5 -> v6:
1. Use TRACE_EVENT_FN to enable/disable counters only once.
2. Remove the agg. counters from vcpu->arch.
3. Use PACA to maintain old counter values instead of zeroing on every
entry.
4. Simplify variable names

v4 -> v5:
1. Define helper functions for getting/setting the accumulation counter
in L2's VPA

v3 -> v4:
1. After vcpu_run, check the VPA flag instead of checking for tracepoint
being enabled for disabling the cs time accumulation.

v2 -> v3:
1. Move the counter disabling and zeroing code to a different function.
2. Move the get_lppaca() inside the tracepoint_enabled() branch.
3. Add the aggregation logic to maintain total context switch time.

v1 -> v2:
1. Fix the build error due to invalid struct member reference.

 arch/powerpc/include/asm/lppaca.h | 11 +++++--
 arch/powerpc/include/asm/paca.h   |  5 +++
 arch/powerpc/kvm/book3s_hv.c      | 52 +++++++++++++++++++++++++++++++
 arch/powerpc/kvm/trace_hv.h       | 27 ++++++++++++++++
 4 files changed, 92 insertions(+), 3 deletions(-)

Comments

Naveen N Rao May 7, 2024, 6:35 a.m. UTC | #1
On Mon, May 06, 2024 at 08:26:03PM GMT, Gautam Menghani wrote:
> PAPR hypervisor has introduced three new counters in the VPA area of
> LPAR CPUs for KVM L2 guest (see [1] for terminology) observability - 2
> for context switches from host to guest and vice versa, and 1 counter
> for getting the total time spent inside the KVM guest. Add a tracepoint
> that enables reading the counters for use by ftrace/perf. Note that this
> tracepoint is only available for nestedv2 API (i.e, KVM on PowerVM).
> 
> [1] Terminology:
> a. L1 refers to the VM (LPAR) booted on top of PAPR hypervisor
> b. L2 refers to the KVM guest booted on top of L1.
> 
> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> ---
> v5 -> v6:
> 1. Use TRACE_EVENT_FN to enable/disable counters only once.
> 2. Remove the agg. counters from vcpu->arch.
> 3. Use PACA to maintain old counter values instead of zeroing on every
> entry.
> 4. Simplify variable names
> 
> v4 -> v5:
> 1. Define helper functions for getting/setting the accumulation counter
> in L2's VPA
> 
> v3 -> v4:
> 1. After vcpu_run, check the VPA flag instead of checking for tracepoint
> being enabled for disabling the cs time accumulation.
> 
> v2 -> v3:
> 1. Move the counter disabling and zeroing code to a different function.
> 2. Move the get_lppaca() inside the tracepoint_enabled() branch.
> 3. Add the aggregation logic to maintain total context switch time.
> 
> v1 -> v2:
> 1. Fix the build error due to invalid struct member reference.
> 
>  arch/powerpc/include/asm/lppaca.h | 11 +++++--
>  arch/powerpc/include/asm/paca.h   |  5 +++
>  arch/powerpc/kvm/book3s_hv.c      | 52 +++++++++++++++++++++++++++++++
>  arch/powerpc/kvm/trace_hv.h       | 27 ++++++++++++++++
>  4 files changed, 92 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
> index 61ec2447dabf..f40a646bee3c 100644
> --- a/arch/powerpc/include/asm/lppaca.h
> +++ b/arch/powerpc/include/asm/lppaca.h
> @@ -62,7 +62,8 @@ struct lppaca {
>  	u8	donate_dedicated_cpu;	/* Donate dedicated CPU cycles */
>  	u8	fpregs_in_use;
>  	u8	pmcregs_in_use;
> -	u8	reserved8[28];
> +	u8	l2_counters_enable;  /* Enable usage of counters for KVM guest */
> +	u8	reserved8[27];
>  	__be64	wait_state_cycles;	/* Wait cycles for this proc */
>  	u8	reserved9[28];
>  	__be16	slb_count;		/* # of SLBs to maintain */
> @@ -92,9 +93,13 @@ struct lppaca {
>  	/* cacheline 4-5 */
>  
>  	__be32	page_ins;		/* CMO Hint - # page ins by OS */
> -	u8	reserved12[148];
> +	u8	reserved12[28];
> +	volatile __be64 l1_to_l2_cs_tb;
> +	volatile __be64 l2_to_l1_cs_tb;
> +	volatile __be64 l2_runtime_tb;
> +	u8 reserved13[96];
>  	volatile __be64 dtl_idx;	/* Dispatch Trace Log head index */
> -	u8	reserved13[96];
> +	u8	reserved14[96];
>  } ____cacheline_aligned;
>  
>  #define lppaca_of(cpu)	(*paca_ptrs[cpu]->lppaca_ptr)
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index 1d58da946739..f20ac7a6efa4 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -278,6 +278,11 @@ struct paca_struct {
>  	struct mce_info *mce_info;
>  	u8 mce_pending_irq_work;
>  #endif /* CONFIG_PPC_BOOK3S_64 */
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +	u64 l1_to_l2_cs;
> +	u64 l2_to_l1_cs;
> +	u64 l2_runtime_agg;
> +#endif
>  } ____cacheline_aligned;
>  
>  extern void copy_mm_to_paca(struct mm_struct *mm);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 8e86eb577eb8..ed69ad58bd02 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -4108,6 +4108,54 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
>  	}
>  }
>  
> +static inline int kvmhv_get_l2_counters_status(void)
> +{
> +	return get_lppaca()->l2_counters_enable;
> +}
> +
> +static inline void kvmhv_set_l2_counters_status(int cpu, bool status)
> +{
> +	if (status)
> +		lppaca_of(cpu).l2_counters_enable = 1;
> +	else
> +		lppaca_of(cpu).l2_counters_enable = 0;
> +}
> +
> +int kmvhv_counters_tracepoint_regfunc(void)
> +{
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu) {
> +		kvmhv_set_l2_counters_status(cpu, true);
> +	}
> +	return 0;
> +}
> +
> +void kmvhv_counters_tracepoint_unregfunc(void)
> +{
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu) {
> +		kvmhv_set_l2_counters_status(cpu, false);
> +	}
> +}
> +
> +static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
> +{
> +	struct lppaca *lp = get_lppaca();
> +	u64 l1_to_l2_ns, l2_to_l1_ns, l2_runtime_ns;
> +
> +	l1_to_l2_ns = tb_to_ns(be64_to_cpu(lp->l1_to_l2_cs_tb));
> +	l2_to_l1_ns = tb_to_ns(be64_to_cpu(lp->l2_to_l1_cs_tb));
> +	l2_runtime_ns = tb_to_ns(be64_to_cpu(lp->l2_runtime_tb));
> +	trace_kvmppc_vcpu_stats(vcpu, l1_to_l2_ns - local_paca->l1_to_l2_cs,
> +					l2_to_l1_ns - local_paca->l2_to_l1_cs,
> +					l2_runtime_ns - local_paca->l2_runtime_agg);

Depending on how the hypervisor works, if the vcpu was in l2 when the 
tracepoint is enabled, the counters may not be updated on exit and we 
may emit a trace with all values zero. If that is possible, it might be 
a good idea to only emit the trace if any of the counters are non-zero.

Otherwise, this looks good to me.
Acked-by: Naveen N Rao <naveen@kernel.org>


- Naveen

> +	local_paca->l1_to_l2_cs = l1_to_l2_ns;
> +	local_paca->l2_to_l1_cs = l2_to_l1_ns;
> +	local_paca->l2_runtime_agg = l2_runtime_ns;
> +}
> +
>  static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
>  				     unsigned long lpcr, u64 *tb)
>  {
> @@ -4156,6 +4204,10 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
>  
>  	timer_rearm_host_dec(*tb);
>  
> +	/* Record context switch and guest_run_time data */
> +	if (kvmhv_get_l2_counters_status())
> +		do_trace_nested_cs_time(vcpu);
> +
>  	return trap;
>  }
>  
> diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
> index 8d57c8428531..dc118ab88f23 100644
> --- a/arch/powerpc/kvm/trace_hv.h
> +++ b/arch/powerpc/kvm/trace_hv.h
> @@ -238,6 +238,9 @@
>  	{H_MULTI_THREADS_ACTIVE,	"H_MULTI_THREADS_ACTIVE"}, \
>  	{H_OUTSTANDING_COP_OPS,		"H_OUTSTANDING_COP_OPS"}
>  
> +int kmvhv_counters_tracepoint_regfunc(void);
> +void kmvhv_counters_tracepoint_unregfunc(void);
> +
>  TRACE_EVENT(kvm_guest_enter,
>  	TP_PROTO(struct kvm_vcpu *vcpu),
>  	TP_ARGS(vcpu),
> @@ -512,6 +515,30 @@ TRACE_EVENT(kvmppc_run_vcpu_exit,
>  			__entry->vcpu_id, __entry->exit, __entry->ret)
>  );
>  
> +TRACE_EVENT_FN(kvmppc_vcpu_stats,
> +	TP_PROTO(struct kvm_vcpu *vcpu, u64 l1_to_l2_cs, u64 l2_to_l1_cs, u64 l2_runtime),
> +
> +	TP_ARGS(vcpu, l1_to_l2_cs, l2_to_l1_cs, l2_runtime),
> +
> +	TP_STRUCT__entry(
> +		__field(int,		vcpu_id)
> +		__field(u64,		l1_to_l2_cs)
> +		__field(u64,		l2_to_l1_cs)
> +		__field(u64,		l2_runtime)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->vcpu_id  = vcpu->vcpu_id;
> +		__entry->l1_to_l2_cs = l1_to_l2_cs;
> +		__entry->l2_to_l1_cs = l2_to_l1_cs;
> +		__entry->l2_runtime = l2_runtime;
> +	),
> +
> +	TP_printk("VCPU %d: l1_to_l2_cs_time=%llu ns l2_to_l1_cs_time=%llu ns l2_runtime=%llu ns",
> +		__entry->vcpu_id,  __entry->l1_to_l2_cs,
> +		__entry->l2_to_l1_cs, __entry->l2_runtime),
> +	kmvhv_counters_tracepoint_regfunc, kmvhv_counters_tracepoint_unregfunc
> +);
>  #endif /* _TRACE_KVM_HV_H */
>  
>  /* This part must be outside protection */
> -- 
> 2.44.0
>
Nicholas Piggin May 8, 2024, 12:21 p.m. UTC | #2
On Tue May 7, 2024 at 12:56 AM AEST, Gautam Menghani wrote:
> PAPR hypervisor has introduced three new counters in the VPA area of
> LPAR CPUs for KVM L2 guest (see [1] for terminology) observability - 2
> for context switches from host to guest and vice versa, and 1 counter
> for getting the total time spent inside the KVM guest. Add a tracepoint
> that enables reading the counters for use by ftrace/perf. Note that this
> tracepoint is only available for nestedv2 API (i.e, KVM on PowerVM).
>
> [1] Terminology:
> a. L1 refers to the VM (LPAR) booted on top of PAPR hypervisor
> b. L2 refers to the KVM guest booted on top of L1.
>
> Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
> Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> ---
> v5 -> v6:
> 1. Use TRACE_EVENT_FN to enable/disable counters only once.
> 2. Remove the agg. counters from vcpu->arch.
> 3. Use PACA to maintain old counter values instead of zeroing on every
> entry.
> 4. Simplify variable names
>
> v4 -> v5:
> 1. Define helper functions for getting/setting the accumulation counter
> in L2's VPA
>
> v3 -> v4:
> 1. After vcpu_run, check the VPA flag instead of checking for tracepoint
> being enabled for disabling the cs time accumulation.
>
> v2 -> v3:
> 1. Move the counter disabling and zeroing code to a different function.
> 2. Move the get_lppaca() inside the tracepoint_enabled() branch.
> 3. Add the aggregation logic to maintain total context switch time.
>
> v1 -> v2:
> 1. Fix the build error due to invalid struct member reference.
>
>  arch/powerpc/include/asm/lppaca.h | 11 +++++--
>  arch/powerpc/include/asm/paca.h   |  5 +++
>  arch/powerpc/kvm/book3s_hv.c      | 52 +++++++++++++++++++++++++++++++
>  arch/powerpc/kvm/trace_hv.h       | 27 ++++++++++++++++
>  4 files changed, 92 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
> index 61ec2447dabf..f40a646bee3c 100644
> --- a/arch/powerpc/include/asm/lppaca.h
> +++ b/arch/powerpc/include/asm/lppaca.h
> @@ -62,7 +62,8 @@ struct lppaca {
>  	u8	donate_dedicated_cpu;	/* Donate dedicated CPU cycles */
>  	u8	fpregs_in_use;
>  	u8	pmcregs_in_use;
> -	u8	reserved8[28];
> +	u8	l2_counters_enable;  /* Enable usage of counters for KVM guest */
> +	u8	reserved8[27];
>  	__be64	wait_state_cycles;	/* Wait cycles for this proc */
>  	u8	reserved9[28];
>  	__be16	slb_count;		/* # of SLBs to maintain */
> @@ -92,9 +93,13 @@ struct lppaca {
>  	/* cacheline 4-5 */
>  
>  	__be32	page_ins;		/* CMO Hint - # page ins by OS */
> -	u8	reserved12[148];
> +	u8	reserved12[28];
> +	volatile __be64 l1_to_l2_cs_tb;
> +	volatile __be64 l2_to_l1_cs_tb;
> +	volatile __be64 l2_runtime_tb;

I wonder if we shouldn't be moving over to use READ_ONCE for these
instead of volatile.

Probably doesn't really matter here. Maybe general audit of volatiles
in arch/powerpc could be something to put in linuxppc issues.

> +	u8 reserved13[96];
>  	volatile __be64 dtl_idx;	/* Dispatch Trace Log head index */
> -	u8	reserved13[96];
> +	u8	reserved14[96];
>  } ____cacheline_aligned;
>  
>  #define lppaca_of(cpu)	(*paca_ptrs[cpu]->lppaca_ptr)
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index 1d58da946739..f20ac7a6efa4 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -278,6 +278,11 @@ struct paca_struct {
>  	struct mce_info *mce_info;
>  	u8 mce_pending_irq_work;
>  #endif /* CONFIG_PPC_BOOK3S_64 */
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +	u64 l1_to_l2_cs;
> +	u64 l2_to_l1_cs;
> +	u64 l2_runtime_agg;
> +#endif
>  } ____cacheline_aligned;

I don't think these really need to be in the paca.

paca is for per-cpu stuff that is accessed in real mode, early interrupt
entry, etc.

>  
>  extern void copy_mm_to_paca(struct mm_struct *mm);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 8e86eb577eb8..ed69ad58bd02 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -4108,6 +4108,54 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
>  	}
>  }
>  
> +static inline int kvmhv_get_l2_counters_status(void)
> +{
> +	return get_lppaca()->l2_counters_enable;
> +}
> +
> +static inline void kvmhv_set_l2_counters_status(int cpu, bool status)
> +{
> +	if (status)
> +		lppaca_of(cpu).l2_counters_enable = 1;
> +	else
> +		lppaca_of(cpu).l2_counters_enable = 0;
> +}
> +
> +int kmvhv_counters_tracepoint_regfunc(void)
> +{
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu) {
> +		kvmhv_set_l2_counters_status(cpu, true);
> +	}
> +	return 0;
> +}
> +
> +void kmvhv_counters_tracepoint_unregfunc(void)
> +{
> +	int cpu;
> +
> +	for_each_possible_cpu(cpu) {
> +		kvmhv_set_l2_counters_status(cpu, false);
> +	}
> +}
> +
> +static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
> +{
> +	struct lppaca *lp = get_lppaca();
> +	u64 l1_to_l2_ns, l2_to_l1_ns, l2_runtime_ns;
> +
> +	l1_to_l2_ns = tb_to_ns(be64_to_cpu(lp->l1_to_l2_cs_tb));
> +	l2_to_l1_ns = tb_to_ns(be64_to_cpu(lp->l2_to_l1_cs_tb));
> +	l2_runtime_ns = tb_to_ns(be64_to_cpu(lp->l2_runtime_tb));
> +	trace_kvmppc_vcpu_stats(vcpu, l1_to_l2_ns - local_paca->l1_to_l2_cs,
> +					l2_to_l1_ns - local_paca->l2_to_l1_cs,
> +					l2_runtime_ns - local_paca->l2_runtime_agg);
> +	local_paca->l1_to_l2_cs = l1_to_l2_ns;
> +	local_paca->l2_to_l1_cs = l2_to_l1_ns;
> +	local_paca->l2_runtime_agg = l2_runtime_ns;
> +}

So you're just using the per-cpu values to cache the last value
read so next time you can take a delta.

Could you zero the counters before entry? Or just use local
variables to read the before values.

If you want to keep the same scheme, I think per-cpu variables
should work.

Otherwise,

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

> +
>  static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
>  				     unsigned long lpcr, u64 *tb)
>  {
> @@ -4156,6 +4204,10 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
>  
>  	timer_rearm_host_dec(*tb);
>  
> +	/* Record context switch and guest_run_time data */
> +	if (kvmhv_get_l2_counters_status())
> +		do_trace_nested_cs_time(vcpu);
> +
>  	return trap;
>  }
>  
> diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
> index 8d57c8428531..dc118ab88f23 100644
> --- a/arch/powerpc/kvm/trace_hv.h
> +++ b/arch/powerpc/kvm/trace_hv.h
> @@ -238,6 +238,9 @@
>  	{H_MULTI_THREADS_ACTIVE,	"H_MULTI_THREADS_ACTIVE"}, \
>  	{H_OUTSTANDING_COP_OPS,		"H_OUTSTANDING_COP_OPS"}
>  
> +int kmvhv_counters_tracepoint_regfunc(void);
> +void kmvhv_counters_tracepoint_unregfunc(void);
> +
>  TRACE_EVENT(kvm_guest_enter,
>  	TP_PROTO(struct kvm_vcpu *vcpu),
>  	TP_ARGS(vcpu),
> @@ -512,6 +515,30 @@ TRACE_EVENT(kvmppc_run_vcpu_exit,
>  			__entry->vcpu_id, __entry->exit, __entry->ret)
>  );
>  
> +TRACE_EVENT_FN(kvmppc_vcpu_stats,
> +	TP_PROTO(struct kvm_vcpu *vcpu, u64 l1_to_l2_cs, u64 l2_to_l1_cs, u64 l2_runtime),
> +
> +	TP_ARGS(vcpu, l1_to_l2_cs, l2_to_l1_cs, l2_runtime),
> +
> +	TP_STRUCT__entry(
> +		__field(int,		vcpu_id)
> +		__field(u64,		l1_to_l2_cs)
> +		__field(u64,		l2_to_l1_cs)
> +		__field(u64,		l2_runtime)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->vcpu_id  = vcpu->vcpu_id;
> +		__entry->l1_to_l2_cs = l1_to_l2_cs;
> +		__entry->l2_to_l1_cs = l2_to_l1_cs;
> +		__entry->l2_runtime = l2_runtime;
> +	),
> +
> +	TP_printk("VCPU %d: l1_to_l2_cs_time=%llu ns l2_to_l1_cs_time=%llu ns l2_runtime=%llu ns",
> +		__entry->vcpu_id,  __entry->l1_to_l2_cs,
> +		__entry->l2_to_l1_cs, __entry->l2_runtime),
> +	kmvhv_counters_tracepoint_regfunc, kmvhv_counters_tracepoint_unregfunc
> +);
>  #endif /* _TRACE_KVM_HV_H */
>  
>  /* This part must be outside protection */
Michael Ellerman May 8, 2024, 12:36 p.m. UTC | #3
Gautam Menghani <gautam@linux.ibm.com> writes:
> PAPR hypervisor has introduced three new counters in the VPA area of
> LPAR CPUs for KVM L2 guest (see [1] for terminology) observability - 2
> for context switches from host to guest and vice versa, and 1 counter
> for getting the total time spent inside the KVM guest. Add a tracepoint
> that enables reading the counters for use by ftrace/perf. Note that this
> tracepoint is only available for nestedv2 API (i.e, KVM on PowerVM).
...
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 8e86eb577eb8..ed69ad58bd02 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -4108,6 +4108,54 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
>  	}
>  }
>  
> +static inline int kvmhv_get_l2_counters_status(void)
> +{
> +	return get_lppaca()->l2_counters_enable;
> +}

This is breaking the powernv build:

$ make powernv_defconfig ; make -s -j (nproc)
make[1]: Entering directory '/home/michael/linux/.build'
  GEN     Makefile
#
# configuration written to .config
#
make[1]: Leaving directory '/home/michael/linux/.build'
../arch/powerpc/kvm/book3s_hv.c: In function ‘kvmhv_get_l2_counters_status’:
../arch/powerpc/kvm/book3s_hv.c:4113:16: error: implicit declaration of function ‘get_lppaca’; did you mean ‘get_paca’? [-Werror=implicit-function-declaration]
 4113 |         return get_lppaca()->l2_counters_enable;
      |                ^~~~~~~~~~
      |                get_paca
../arch/powerpc/kvm/book3s_hv.c:4113:28: error: invalid type argument of ‘->’ (have ‘int’)
 4113 |         return get_lppaca()->l2_counters_enable;
      |                            ^~
In file included from ../arch/powerpc/include/asm/paravirt.h:9,
                 from ../arch/powerpc/include/asm/qspinlock.h:7,
                 from ../arch/powerpc/include/asm/spinlock.h:7,
                 from ../include/linux/spinlock.h:95,
                 from ../include/linux/sched.h:2138,
                 from ../include/linux/hardirq.h:9,
                 from ../include/linux/kvm_host.h:7,
                 from ../arch/powerpc/kvm/book3s_hv.c:18:
../arch/powerpc/kvm/book3s_hv.c: In function ‘kvmhv_set_l2_counters_status’:
../arch/powerpc/include/asm/lppaca.h:105:41: error: ‘struct paca_struct’ has no member named ‘lppaca_ptr’
  105 | #define lppaca_of(cpu)  (*paca_ptrs[cpu]->lppaca_ptr)
      |                                         ^~
../arch/powerpc/kvm/book3s_hv.c:4119:17: note: in expansion of macro ‘lppaca_of’
 4119 |                 lppaca_of(cpu).l2_counters_enable = 1;
      |                 ^~~~~~~~~
../arch/powerpc/include/asm/lppaca.h:105:41: error: ‘struct paca_struct’ has no member named ‘lppaca_ptr’
  105 | #define lppaca_of(cpu)  (*paca_ptrs[cpu]->lppaca_ptr)
      |                                         ^~
../arch/powerpc/kvm/book3s_hv.c:4121:17: note: in expansion of macro ‘lppaca_of’
 4121 |                 lppaca_of(cpu).l2_counters_enable = 0;
      |                 ^~~~~~~~~
../arch/powerpc/kvm/book3s_hv.c: In function ‘do_trace_nested_cs_time’:
../arch/powerpc/kvm/book3s_hv.c:4145:29: error: initialization of ‘struct lppaca *’ from ‘int’ makes pointer from integer without a cast [-Werror=int-conversion]
 4145 |         struct lppaca *lp = get_lppaca();
      |                             ^~~~~~~~~~
../arch/powerpc/kvm/book3s_hv.c: In function ‘kvmhv_get_l2_counters_status’:
../arch/powerpc/kvm/book3s_hv.c:4114:1: error: control reaches end of non-void function [-Werror=return-type]
 4114 | }
      | ^
cc1: all warnings being treated as errors
make[5]: *** [../scripts/Makefile.build:244: arch/powerpc/kvm/book3s_hv.o] Error 1
make[5]: *** Waiting for unfinished jobs....
make[4]: *** [../scripts/Makefile.build:485: arch/powerpc/kvm] Error 2
make[4]: *** Waiting for unfinished jobs....
make[3]: *** [../scripts/Makefile.build:485: arch/powerpc] Error 2
make[3]: *** Waiting for unfinished jobs....
make[2]: *** [/home/michael/linux/Makefile:1919: .] Error 2
make[1]: *** [/home/michael/linux/Makefile:240: __sub-make] Error 2
make: *** [Makefile:240: __sub-make] Error 2


cheers
Nicholas Piggin May 9, 2024, 5:42 a.m. UTC | #4
On Wed May 8, 2024 at 10:36 PM AEST, Michael Ellerman wrote:
> Gautam Menghani <gautam@linux.ibm.com> writes:
> > PAPR hypervisor has introduced three new counters in the VPA area of
> > LPAR CPUs for KVM L2 guest (see [1] for terminology) observability - 2
> > for context switches from host to guest and vice versa, and 1 counter
> > for getting the total time spent inside the KVM guest. Add a tracepoint
> > that enables reading the counters for use by ftrace/perf. Note that this
> > tracepoint is only available for nestedv2 API (i.e, KVM on PowerVM).
> ...
> > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > index 8e86eb577eb8..ed69ad58bd02 100644
> > --- a/arch/powerpc/kvm/book3s_hv.c
> > +++ b/arch/powerpc/kvm/book3s_hv.c
> > @@ -4108,6 +4108,54 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
> >  	}
> >  }
> >  
> > +static inline int kvmhv_get_l2_counters_status(void)
> > +{
> > +	return get_lppaca()->l2_counters_enable;
> > +}
>
> This is breaking the powernv build:

[...]

All the nested KVM code should really go under CONFIG_PSERIES.
Possibly even moved out to its own file.

For now maybe you could just ifdef these few functions and
replace with noop variants for !PSERIES.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 61ec2447dabf..f40a646bee3c 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -62,7 +62,8 @@  struct lppaca {
 	u8	donate_dedicated_cpu;	/* Donate dedicated CPU cycles */
 	u8	fpregs_in_use;
 	u8	pmcregs_in_use;
-	u8	reserved8[28];
+	u8	l2_counters_enable;  /* Enable usage of counters for KVM guest */
+	u8	reserved8[27];
 	__be64	wait_state_cycles;	/* Wait cycles for this proc */
 	u8	reserved9[28];
 	__be16	slb_count;		/* # of SLBs to maintain */
@@ -92,9 +93,13 @@  struct lppaca {
 	/* cacheline 4-5 */
 
 	__be32	page_ins;		/* CMO Hint - # page ins by OS */
-	u8	reserved12[148];
+	u8	reserved12[28];
+	volatile __be64 l1_to_l2_cs_tb;
+	volatile __be64 l2_to_l1_cs_tb;
+	volatile __be64 l2_runtime_tb;
+	u8 reserved13[96];
 	volatile __be64 dtl_idx;	/* Dispatch Trace Log head index */
-	u8	reserved13[96];
+	u8	reserved14[96];
 } ____cacheline_aligned;
 
 #define lppaca_of(cpu)	(*paca_ptrs[cpu]->lppaca_ptr)
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1d58da946739..f20ac7a6efa4 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -278,6 +278,11 @@  struct paca_struct {
 	struct mce_info *mce_info;
 	u8 mce_pending_irq_work;
 #endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	u64 l1_to_l2_cs;
+	u64 l2_to_l1_cs;
+	u64 l2_runtime_agg;
+#endif
 } ____cacheline_aligned;
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8e86eb577eb8..ed69ad58bd02 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4108,6 +4108,54 @@  static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
 	}
 }
 
+static inline int kvmhv_get_l2_counters_status(void)
+{
+	return get_lppaca()->l2_counters_enable;
+}
+
+static inline void kvmhv_set_l2_counters_status(int cpu, bool status)
+{
+	if (status)
+		lppaca_of(cpu).l2_counters_enable = 1;
+	else
+		lppaca_of(cpu).l2_counters_enable = 0;
+}
+
+int kmvhv_counters_tracepoint_regfunc(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		kvmhv_set_l2_counters_status(cpu, true);
+	}
+	return 0;
+}
+
+void kmvhv_counters_tracepoint_unregfunc(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		kvmhv_set_l2_counters_status(cpu, false);
+	}
+}
+
+static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
+{
+	struct lppaca *lp = get_lppaca();
+	u64 l1_to_l2_ns, l2_to_l1_ns, l2_runtime_ns;
+
+	l1_to_l2_ns = tb_to_ns(be64_to_cpu(lp->l1_to_l2_cs_tb));
+	l2_to_l1_ns = tb_to_ns(be64_to_cpu(lp->l2_to_l1_cs_tb));
+	l2_runtime_ns = tb_to_ns(be64_to_cpu(lp->l2_runtime_tb));
+	trace_kvmppc_vcpu_stats(vcpu, l1_to_l2_ns - local_paca->l1_to_l2_cs,
+					l2_to_l1_ns - local_paca->l2_to_l1_cs,
+					l2_runtime_ns - local_paca->l2_runtime_agg);
+	local_paca->l1_to_l2_cs = l1_to_l2_ns;
+	local_paca->l2_to_l1_cs = l2_to_l1_ns;
+	local_paca->l2_runtime_agg = l2_runtime_ns;
+}
+
 static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
 				     unsigned long lpcr, u64 *tb)
 {
@@ -4156,6 +4204,10 @@  static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	timer_rearm_host_dec(*tb);
 
+	/* Record context switch and guest_run_time data */
+	if (kvmhv_get_l2_counters_status())
+		do_trace_nested_cs_time(vcpu);
+
 	return trap;
 }
 
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 8d57c8428531..dc118ab88f23 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -238,6 +238,9 @@ 
 	{H_MULTI_THREADS_ACTIVE,	"H_MULTI_THREADS_ACTIVE"}, \
 	{H_OUTSTANDING_COP_OPS,		"H_OUTSTANDING_COP_OPS"}
 
+int kmvhv_counters_tracepoint_regfunc(void);
+void kmvhv_counters_tracepoint_unregfunc(void);
+
 TRACE_EVENT(kvm_guest_enter,
 	TP_PROTO(struct kvm_vcpu *vcpu),
 	TP_ARGS(vcpu),
@@ -512,6 +515,30 @@  TRACE_EVENT(kvmppc_run_vcpu_exit,
 			__entry->vcpu_id, __entry->exit, __entry->ret)
 );
 
+TRACE_EVENT_FN(kvmppc_vcpu_stats,
+	TP_PROTO(struct kvm_vcpu *vcpu, u64 l1_to_l2_cs, u64 l2_to_l1_cs, u64 l2_runtime),
+
+	TP_ARGS(vcpu, l1_to_l2_cs, l2_to_l1_cs, l2_runtime),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(u64,		l1_to_l2_cs)
+		__field(u64,		l2_to_l1_cs)
+		__field(u64,		l2_runtime)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->l1_to_l2_cs = l1_to_l2_cs;
+		__entry->l2_to_l1_cs = l2_to_l1_cs;
+		__entry->l2_runtime = l2_runtime;
+	),
+
+	TP_printk("VCPU %d: l1_to_l2_cs_time=%llu ns l2_to_l1_cs_time=%llu ns l2_runtime=%llu ns",
+		__entry->vcpu_id,  __entry->l1_to_l2_cs,
+		__entry->l2_to_l1_cs, __entry->l2_runtime),
+	kmvhv_counters_tracepoint_regfunc, kmvhv_counters_tracepoint_unregfunc
+);
 #endif /* _TRACE_KVM_HV_H */
 
 /* This part must be outside protection */