diff mbox series

cpuidle: Add cpu_idle_miss trace event

Message ID Yt/AxPFi88neW7W5@e126311.manchester.arm.com (mailing list archive)
State Mainlined, archived
Headers show
Series cpuidle: Add cpu_idle_miss trace event | expand

Commit Message

Kajetan Puchalski July 26, 2022, 10:24 a.m. UTC
Add a trace event for cpuidle to track missed (too deep or too shallow)
wakeups.

After each wakeup, CPUIdle already computes whether the entered state was
optimal, above or below the desired one and updates the relevant
counters. This patch makes it possible to trace those events in addition
to just reading the counters.

The patterns of types and percentages of misses across different
workloads appear to be very consistent. This makes the trace event very
useful for comparing the relative correctness of different CPUIdle
governors for different types of workloads, or for finding the
optimal governor for a given device.

Signed-off-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
---
 drivers/cpuidle/cpuidle.c    |  6 +++++-
 include/trace/events/power.h | 22 ++++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)


base-commit: e0dccc3b76fb35bb257b4118367a883073d7390e

Comments

Steven Rostedt July 26, 2022, 2:08 p.m. UTC | #1
On Tue, 26 Jul 2022 11:24:04 +0100
Kajetan Puchalski <kajetan.puchalski@arm.com> wrote:

> --- a/include/trace/events/power.h
> +++ b/include/trace/events/power.h
> @@ -40,6 +40,28 @@ DEFINE_EVENT(cpu, cpu_idle,
>  	TP_ARGS(state, cpu_id)
>  );
>  
> +TRACE_EVENT(cpu_idle_miss,
> +
> +	TP_PROTO(unsigned int cpu_id, unsigned int state, bool below),
> +
> +	TP_ARGS(cpu_id, state, below),
> +
> +	TP_STRUCT__entry(
> +		__field(u32,		cpu_id)
> +		__field(u32,		state)
> +		__field(bool,		below)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->cpu_id = cpu_id;
> +		__entry->state = state;
> +		__entry->below = below;
> +	),
> +
> +	TP_printk("cpu_id=%lu state=%lu type=%s", (unsigned long)__entry->cpu_id,
> +		(unsigned long)__entry->state, (__entry->below)?"below":"above")
> +);
> +
>  TRACE_EVENT(powernv_throttle,
>  
>   	TP_PROTO(int chip_id, const char *reason, int pmax),

For the tracing POV,

Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>

-- Steve
Kajetan Puchalski July 26, 2022, 3:59 p.m. UTC | #2
On Tue, Jul 26, 2022 at 10:08:16AM -0400, Steven Rostedt wrote:
> On Tue, 26 Jul 2022 11:24:04 +0100
> Kajetan Puchalski <kajetan.puchalski@arm.com> wrote:
 
> For the tracing POV,
> 
> Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>

Thanks a lot for taking a look!

> -- Steve
Rafael J. Wysocki July 29, 2022, 2:57 p.m. UTC | #3
On Tue, Jul 26, 2022 at 12:24 PM Kajetan Puchalski
<kajetan.puchalski@arm.com> wrote:
>
> Add a trace event for cpuidle to track missed (too deep or too shallow)
> wakeups.
>
> After each wakeup, CPUIdle already computes whether the entered state was
> optimal, above or below the desired one and updates the relevant
> counters. This patch makes it possible to trace those events in addition
> to just reading the counters.
>
> The patterns of types and percentages of misses across different
> workloads appear to be very consistent. This makes the trace event very
> useful for comparing the relative correctness of different CPUIdle
> governors for different types of workloads, or for finding the
> optimal governor for a given device.
>
> Signed-off-by: Kajetan Puchalski <kajetan.puchalski@arm.com>
> ---
>  drivers/cpuidle/cpuidle.c    |  6 +++++-
>  include/trace/events/power.h | 22 ++++++++++++++++++++++
>  2 files changed, 27 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index ef2ea1b12cd8..bf57cab32456 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -8,6 +8,7 @@
>   * This code is licenced under the GPL.
>   */
>
> +#include "linux/percpu-defs.h"
>  #include <linux/clockchips.h>
>  #include <linux/kernel.h>
>  #include <linux/mutex.h>
> @@ -278,6 +279,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
>
>                                 /* Shallower states are enabled, so update. */
>                                 dev->states_usage[entered_state].above++;
> +                               trace_cpu_idle_miss(dev->cpu, entered_state, false);
>                                 break;
>                         }
>                 } else if (diff > delay) {
> @@ -289,8 +291,10 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
>                                  * Update if a deeper state would have been a
>                                  * better match for the observed idle duration.
>                                  */
> -                               if (diff - delay >= drv->states[i].target_residency_ns)
> +                               if (diff - delay >= drv->states[i].target_residency_ns) {
>                                         dev->states_usage[entered_state].below++;
> +                                       trace_cpu_idle_miss(dev->cpu, entered_state, true);
> +                               }
>
>                                 break;
>                         }
> diff --git a/include/trace/events/power.h b/include/trace/events/power.h
> index af5018aa9517..6539f23a5653 100644
> --- a/include/trace/events/power.h
> +++ b/include/trace/events/power.h
> @@ -40,6 +40,28 @@ DEFINE_EVENT(cpu, cpu_idle,
>         TP_ARGS(state, cpu_id)
>  );
>
> +TRACE_EVENT(cpu_idle_miss,
> +
> +       TP_PROTO(unsigned int cpu_id, unsigned int state, bool below),
> +
> +       TP_ARGS(cpu_id, state, below),
> +
> +       TP_STRUCT__entry(
> +               __field(u32,            cpu_id)
> +               __field(u32,            state)
> +               __field(bool,           below)
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->cpu_id = cpu_id;
> +               __entry->state = state;
> +               __entry->below = below;
> +       ),
> +
> +       TP_printk("cpu_id=%lu state=%lu type=%s", (unsigned long)__entry->cpu_id,
> +               (unsigned long)__entry->state, (__entry->below)?"below":"above")
> +);
> +
>  TRACE_EVENT(powernv_throttle,
>
>         TP_PROTO(int chip_id, const char *reason, int pmax),
>
> base-commit: e0dccc3b76fb35bb257b4118367a883073d7390e
> --

Applied as 5.20 material with the R-by tag from Steve, thanks!
diff mbox series

Patch

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index ef2ea1b12cd8..bf57cab32456 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -8,6 +8,7 @@ 
  * This code is licenced under the GPL.
  */
 
+#include "linux/percpu-defs.h"
 #include <linux/clockchips.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
@@ -278,6 +279,7 @@  int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 
 				/* Shallower states are enabled, so update. */
 				dev->states_usage[entered_state].above++;
+				trace_cpu_idle_miss(dev->cpu, entered_state, false);
 				break;
 			}
 		} else if (diff > delay) {
@@ -289,8 +291,10 @@  int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 				 * Update if a deeper state would have been a
 				 * better match for the observed idle duration.
 				 */
-				if (diff - delay >= drv->states[i].target_residency_ns)
+				if (diff - delay >= drv->states[i].target_residency_ns) {
 					dev->states_usage[entered_state].below++;
+					trace_cpu_idle_miss(dev->cpu, entered_state, true);
+				}
 
 				break;
 			}
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index af5018aa9517..6539f23a5653 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -40,6 +40,28 @@  DEFINE_EVENT(cpu, cpu_idle,
 	TP_ARGS(state, cpu_id)
 );
 
+TRACE_EVENT(cpu_idle_miss,
+
+	TP_PROTO(unsigned int cpu_id, unsigned int state, bool below),
+
+	TP_ARGS(cpu_id, state, below),
+
+	TP_STRUCT__entry(
+		__field(u32,		cpu_id)
+		__field(u32,		state)
+		__field(bool,		below)
+	),
+
+	TP_fast_assign(
+		__entry->cpu_id = cpu_id;
+		__entry->state = state;
+		__entry->below = below;
+	),
+
+	TP_printk("cpu_id=%lu state=%lu type=%s", (unsigned long)__entry->cpu_id,
+		(unsigned long)__entry->state, (__entry->below)?"below":"above")
+);
+
 TRACE_EVENT(powernv_throttle,
 
  	TP_PROTO(int chip_id, const char *reason, int pmax),