diff mbox

[v2] intel_pstate: Add trace point to report internal state.

Message ID 1389897145-3479-1-git-send-email-dirk.j.brandewie@intel.com (mailing list archive)
State Accepted, archived
Headers show

Commit Message

dirk.brandewie@gmail.com Jan. 16, 2014, 6:32 p.m. UTC
From: Dirk Brandewie <dirk.j.brandewie@intel.com>

Add perf trace event "power:pstate_sample" to report driver state to
aid in diagnosing issues reported against intel_pstate.

Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 24 +++++++++++++++++++
 include/trace/events/power.h   | 53 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

Comments

Rafael J. Wysocki Jan. 17, 2014, 1:07 a.m. UTC | #1
On Thursday, January 16, 2014 10:32:25 AM dirk.brandewie@gmail.com wrote:
> From: Dirk Brandewie <dirk.j.brandewie@intel.com>
> 
> Add perf trace event "power:pstate_sample" to report driver state to
> aid in diagnosing issues reported against intel_pstate.
> 
> Signed-off-by: Dirk Brandewie <dirk.j.brandewie@intel.com>

Queued up for 3.14, thanks!

> ---
>  drivers/cpufreq/intel_pstate.c | 24 +++++++++++++++++++
>  include/trace/events/power.h   | 53 ++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 77 insertions(+)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index fe91dad..7e257b2 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -51,6 +51,8 @@ static inline int32_t div_fp(int32_t x, int32_t y)
>  	return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
>  }
>  
> +static u64 energy_divisor;
> +
>  struct sample {
>  	int32_t core_pct_busy;
>  	u64 aperf;
> @@ -559,6 +561,7 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
>  
>  	rdmsrl(MSR_IA32_APERF, aperf);
>  	rdmsrl(MSR_IA32_MPERF, mperf);
> +
>  	cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
>  	cpu->samples[cpu->sample_ptr].aperf = aperf;
>  	cpu->samples[cpu->sample_ptr].mperf = mperf;
> @@ -603,6 +606,7 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
>  	ctl = pid_calc(pid, busy_scaled);
>  
>  	steps = abs(ctl);
> +
>  	if (ctl < 0)
>  		intel_pstate_pstate_increase(cpu, steps);
>  	else
> @@ -612,9 +616,24 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
>  static void intel_pstate_timer_func(unsigned long __data)
>  {
>  	struct cpudata *cpu = (struct cpudata *) __data;
> +	struct sample *sample;
> +	u64 energy;
>  
>  	intel_pstate_sample(cpu);
> +
> +	sample = &cpu->samples[cpu->sample_ptr];
> +	rdmsrl(MSR_PKG_ENERGY_STATUS, energy);
> +
>  	intel_pstate_adjust_busy_pstate(cpu);
> +
> +	trace_pstate_sample(fp_toint(sample->core_pct_busy),
> +			fp_toint(intel_pstate_get_scaled_busy(cpu)),
> +			cpu->pstate.current_pstate,
> +			sample->mperf,
> +			sample->aperf,
> +			div64_u64(energy, energy_divisor),
> +			sample->freq);
> +
>  	intel_pstate_set_sample_time(cpu);
>  }
>  
> @@ -894,6 +913,7 @@ static int __init intel_pstate_init(void)
>  	int cpu, rc = 0;
>  	const struct x86_cpu_id *id;
>  	struct cpu_defaults *cpu_info;
> +	u64 units;
>  
>  	if (no_load)
>  		return -ENODEV;
> @@ -927,8 +947,12 @@ static int __init intel_pstate_init(void)
>  	if (rc)
>  		goto out;
>  
> +	rdmsrl(MSR_RAPL_POWER_UNIT, units);
> +	energy_divisor = 1 << ((units >> 8) & 0x1f); /* bits{12:8} */
> +
>  	intel_pstate_debug_expose_params();
>  	intel_pstate_sysfs_expose_params();
> +
>  	return rc;
>  out:
>  	get_online_cpus();
> diff --git a/include/trace/events/power.h b/include/trace/events/power.h
> index cda100d..9e9475c 100644
> --- a/include/trace/events/power.h
> +++ b/include/trace/events/power.h
> @@ -35,6 +35,59 @@ DEFINE_EVENT(cpu, cpu_idle,
>  	TP_ARGS(state, cpu_id)
>  );
>  
> +TRACE_EVENT(pstate_sample,
> +
> +	TP_PROTO(u32 core_busy,
> +		u32 scaled_busy,
> +		u32 state,
> +		u64 mperf,
> +		u64 aperf,
> +		u32 energy,
> +		u32 freq
> +		),
> +
> +	TP_ARGS(core_busy,
> +		scaled_busy,
> +		state,
> +		mperf,
> +		aperf,
> +		energy,
> +		freq
> +		),
> +
> +	TP_STRUCT__entry(
> +		__field(u32, core_busy)
> +		__field(u32, scaled_busy)
> +		__field(u32, state)
> +		__field(u64, mperf)
> +		__field(u64, aperf)
> +		__field(u32, energy)
> +		__field(u32, freq)
> +
> +	),
> +
> +	TP_fast_assign(
> +		__entry->core_busy = core_busy;
> +		__entry->scaled_busy = scaled_busy;
> +		__entry->state = state;
> +		__entry->mperf = mperf;
> +		__entry->aperf = aperf;
> +		__entry->energy = energy;
> +		__entry->freq = freq;
> +		),
> +
> +	TP_printk("core_busy=%lu scaled=%lu state=%lu mperf=%llu aperf=%llu energy=%lu freq=%lu ",
> +		(unsigned long)__entry->core_busy,
> +		(unsigned long)__entry->scaled_busy,
> +		(unsigned long)__entry->state,
> +		(unsigned long long)__entry->mperf,
> +		(unsigned long long)__entry->aperf,
> +		(unsigned long)__entry->energy,
> +		(unsigned long)__entry->freq
> +		)
> +
> +);
> +
>  /* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
>  #ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING
>  #define _PWR_EVENT_AVOID_DOUBLE_DEFINING
>
diff mbox

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index fe91dad..7e257b2 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -51,6 +51,8 @@  static inline int32_t div_fp(int32_t x, int32_t y)
 	return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
 }
 
+static u64 energy_divisor;
+
 struct sample {
 	int32_t core_pct_busy;
 	u64 aperf;
@@ -559,6 +561,7 @@  static inline void intel_pstate_sample(struct cpudata *cpu)
 
 	rdmsrl(MSR_IA32_APERF, aperf);
 	rdmsrl(MSR_IA32_MPERF, mperf);
+
 	cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT;
 	cpu->samples[cpu->sample_ptr].aperf = aperf;
 	cpu->samples[cpu->sample_ptr].mperf = mperf;
@@ -603,6 +606,7 @@  static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 	ctl = pid_calc(pid, busy_scaled);
 
 	steps = abs(ctl);
+
 	if (ctl < 0)
 		intel_pstate_pstate_increase(cpu, steps);
 	else
@@ -612,9 +616,24 @@  static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 static void intel_pstate_timer_func(unsigned long __data)
 {
 	struct cpudata *cpu = (struct cpudata *) __data;
+	struct sample *sample;
+	u64 energy;
 
 	intel_pstate_sample(cpu);
+
+	sample = &cpu->samples[cpu->sample_ptr];
+	rdmsrl(MSR_PKG_ENERGY_STATUS, energy);
+
 	intel_pstate_adjust_busy_pstate(cpu);
+
+	trace_pstate_sample(fp_toint(sample->core_pct_busy),
+			fp_toint(intel_pstate_get_scaled_busy(cpu)),
+			cpu->pstate.current_pstate,
+			sample->mperf,
+			sample->aperf,
+			div64_u64(energy, energy_divisor),
+			sample->freq);
+
 	intel_pstate_set_sample_time(cpu);
 }
 
@@ -894,6 +913,7 @@  static int __init intel_pstate_init(void)
 	int cpu, rc = 0;
 	const struct x86_cpu_id *id;
 	struct cpu_defaults *cpu_info;
+	u64 units;
 
 	if (no_load)
 		return -ENODEV;
@@ -927,8 +947,12 @@  static int __init intel_pstate_init(void)
 	if (rc)
 		goto out;
 
+	rdmsrl(MSR_RAPL_POWER_UNIT, units);
+	energy_divisor = 1 << ((units >> 8) & 0x1f); /* bits{12:8} */
+
 	intel_pstate_debug_expose_params();
 	intel_pstate_sysfs_expose_params();
+
 	return rc;
 out:
 	get_online_cpus();
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index cda100d..9e9475c 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -35,6 +35,59 @@  DEFINE_EVENT(cpu, cpu_idle,
 	TP_ARGS(state, cpu_id)
 );
 
+TRACE_EVENT(pstate_sample,
+
+	TP_PROTO(u32 core_busy,
+		u32 scaled_busy,
+		u32 state,
+		u64 mperf,
+		u64 aperf,
+		u32 energy,
+		u32 freq
+		),
+
+	TP_ARGS(core_busy,
+		scaled_busy,
+		state,
+		mperf,
+		aperf,
+		energy,
+		freq
+		),
+
+	TP_STRUCT__entry(
+		__field(u32, core_busy)
+		__field(u32, scaled_busy)
+		__field(u32, state)
+		__field(u64, mperf)
+		__field(u64, aperf)
+		__field(u32, energy)
+		__field(u32, freq)
+
+	),
+
+	TP_fast_assign(
+		__entry->core_busy = core_busy;
+		__entry->scaled_busy = scaled_busy;
+		__entry->state = state;
+		__entry->mperf = mperf;
+		__entry->aperf = aperf;
+		__entry->energy = energy;
+		__entry->freq = freq;
+		),
+
+	TP_printk("core_busy=%lu scaled=%lu state=%lu mperf=%llu aperf=%llu energy=%lu freq=%lu ",
+		(unsigned long)__entry->core_busy,
+		(unsigned long)__entry->scaled_busy,
+		(unsigned long)__entry->state,
+		(unsigned long long)__entry->mperf,
+		(unsigned long long)__entry->aperf,
+		(unsigned long)__entry->energy,
+		(unsigned long)__entry->freq
+		)
+
+);
+
 /* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
 #ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING
 #define _PWR_EVENT_AVOID_DOUBLE_DEFINING