diff mbox

drivers: perf: arm: implement CPU_PM notifier

Message ID 1456251759-24768-1-git-send-email-lorenzo.pieralisi@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lorenzo Pieralisi Feb. 23, 2016, 6:22 p.m. UTC
When a CPU is suspended (either through suspend-to-RAM or CPUidle),
its PMU registers content can be lost, which means that counters
registers values that were initialized on power down entry have to be
reprogrammed on power-up to make sure the counters set-up is preserved
(ie on power-up registers take the reset values on Cold or Warm reset,
which can be architecturally UNKNOWN).

To guarantee seamless profiling conditions across a core power down
this patch adds a CPU PM notifier to ARM pmus, that upon CPU PM
entry/exit from low-power states saves/restores the pmu registers
set-up (by using the ARM perf API), so that the power-down/up cycle does
not affect the perf behaviour (apart from a black-out period between
power-up/down CPU PM notifications that is unavoidable).

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Kevin Hilman <khilman@baylibre.com>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
---
 drivers/perf/arm_pmu.c       | 95 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/perf/arm_pmu.h |  1 +
 2 files changed, 96 insertions(+)

Comments

Mathieu Poirier Feb. 24, 2016, 4:20 p.m. UTC | #1
On 23 February 2016 at 11:22, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
> When a CPU is suspended (either through suspend-to-RAM or CPUidle),
> its PMU registers content can be lost, which means that counters
> registers values that were initialized on power down entry have to be
> reprogrammed on power-up to make sure the counters set-up is preserved
> (ie on power-up registers take the reset values on Cold or Warm reset,
> which can be architecturally UNKNOWN).
>
> To guarantee seamless profiling conditions across a core power down
> this patch adds a CPU PM notifier to ARM pmus, that upon CPU PM
> entry/exit from low-power states saves/restores the pmu registers
> set-up (by using the ARM perf API), so that the power-down/up cycle does
> not affect the perf behaviour (apart from a black-out period between
> power-up/down CPU PM notifications that is unavoidable).
>
> Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Cc: Ashwin Chaugule <ashwin.chaugule@linaro.org>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Kevin Hilman <khilman@baylibre.com>
> Cc: Sudeep Holla <sudeep.holla@arm.com>
> Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
> Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> ---
>  drivers/perf/arm_pmu.c       | 95 ++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/perf/arm_pmu.h |  1 +
>  2 files changed, 96 insertions(+)
>
> diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
> index 166637f..12317a9 100644
> --- a/drivers/perf/arm_pmu.c
> +++ b/drivers/perf/arm_pmu.c
> @@ -13,6 +13,7 @@
>
>  #include <linux/bitmap.h>
>  #include <linux/cpumask.h>
> +#include <linux/cpu_pm.h>
>  #include <linux/export.h>
>  #include <linux/kernel.h>
>  #include <linux/of_device.h>
> @@ -710,6 +711,93 @@ static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
>         return NOTIFY_OK;
>  }
>
> +#ifdef CONFIG_CPU_PM
> +static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
> +{
> +       struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
> +       struct perf_event *event;
> +       int idx;
> +
> +       for (idx = 0; idx < armpmu->num_events; idx++) {
> +               /*
> +                * If the counter is not used skip it, there is no
> +                * need of stopping/restarting it.
> +                */
> +               if (!test_bit(idx, hw_events->used_mask))
> +                       continue;
> +
> +               event = hw_events->events[idx];
> +
> +               switch (cmd) {
> +               case CPU_PM_ENTER:
> +                       /*
> +                        * Stop and update the counter
> +                        */
> +                       armpmu_stop(event, PERF_EF_UPDATE);
> +                       break;
> +               case CPU_PM_EXIT:
> +               case CPU_PM_ENTER_FAILED:
> +                        /* Restore and enable the counter */
> +                       armpmu_start(event, PERF_EF_RELOAD);
> +                       break;
> +               default:
> +                       break;
> +               }
> +       }
> +}
> +
> +static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
> +                            void *v)
> +{
> +       struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
> +       struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
> +       int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
> +
> +       if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
> +               return NOTIFY_DONE;
> +
> +       /*
> +        * Always reset the PMU registers on power-up even if
> +        * there are no events running.
> +        */
> +       if (cmd == CPU_PM_EXIT && armpmu->reset)
> +               armpmu->reset(armpmu);

I think this patch does the right thing but I can't get the above
reset.  Wouldn't it be better to do the reset as part of the
CPU_PM_EXIT case below?  At this point nothing tells us the CPU won't
go back down before the event is enabled, wasting the cycle needed to
reset the PMU.

Thanks,
Mathieu

> +
> +       if (!enabled)
> +               return NOTIFY_OK;
> +
> +       switch (cmd) {
> +       case CPU_PM_ENTER:
> +               armpmu->stop(armpmu);
> +               cpu_pm_pmu_setup(armpmu, cmd);
> +               break;
> +       case CPU_PM_EXIT:
> +               cpu_pm_pmu_setup(armpmu, cmd);
> +       case CPU_PM_ENTER_FAILED:
> +               armpmu->start(armpmu);
> +               break;
> +       default:
> +               return NOTIFY_DONE;
> +       }
> +
> +       return NOTIFY_OK;
> +}
> +
> +static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
> +{
> +       cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify;
> +       return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb);
> +}
> +
> +static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
> +{
> +       cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
> +}
> +#else
> +static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
> +static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
> +#endif
> +
>  static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
>  {
>         int err;
> @@ -725,6 +813,10 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
>         if (err)
>                 goto out_hw_events;
>
> +       err = cpu_pm_pmu_register(cpu_pmu);
> +       if (err)
> +               goto out_unregister;
> +
>         for_each_possible_cpu(cpu) {
>                 struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
>                 raw_spin_lock_init(&events->pmu_lock);
> @@ -746,6 +838,8 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
>
>         return 0;
>
> +out_unregister:
> +       unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
>  out_hw_events:
>         free_percpu(cpu_hw_events);
>         return err;
> @@ -753,6 +847,7 @@ out_hw_events:
>
>  static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
>  {
> +       cpu_pm_pmu_unregister(cpu_pmu);
>         unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
>         free_percpu(cpu_pmu->hw_events);
>  }
> diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
> index 83b5e34..9ffa316 100644
> --- a/include/linux/perf/arm_pmu.h
> +++ b/include/linux/perf/arm_pmu.h
> @@ -107,6 +107,7 @@ struct arm_pmu {
>         struct platform_device  *plat_device;
>         struct pmu_hw_events    __percpu *hw_events;
>         struct notifier_block   hotplug_nb;
> +       struct notifier_block   cpu_pm_nb;
>  };
>
>  #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
> --
> 2.5.1
>
Lorenzo Pieralisi Feb. 24, 2016, 5:35 p.m. UTC | #2
On Wed, Feb 24, 2016 at 09:20:22AM -0700, Mathieu Poirier wrote:
> On 23 February 2016 at 11:22, Lorenzo Pieralisi

[...]

> > +static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
> > +                            void *v)
> > +{
> > +       struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
> > +       struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
> > +       int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
> > +
> > +       if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
> > +               return NOTIFY_DONE;
> > +
> > +       /*
> > +        * Always reset the PMU registers on power-up even if
> > +        * there are no events running.
> > +        */
> > +       if (cmd == CPU_PM_EXIT && armpmu->reset)
> > +               armpmu->reset(armpmu);
> 
> I think this patch does the right thing but I can't get the above
> reset.  Wouldn't it be better to do the reset as part of the
> CPU_PM_EXIT case below?  At this point nothing tells us the CPU won't
> go back down before the event is enabled, wasting the cycle needed to
> reset the PMU.

The logic goes, if the cpu is woken up and it has no events enabled,
if we do not reset it (mind, ->reset here sets the PMU register values
to a sane default, some of them are architecturally UNKNOWN on reset, it
does NOT reset the PMU) _and_ we subsequently install an event on it we
do have a problem, that's why whenever a core gets out of low-power we
have to reset its pmu.

Does it make sense ?

Thanks,
Lorenzo

> 
> Thanks,
> Mathieu
> 
> > +
> > +       if (!enabled)
> > +               return NOTIFY_OK;
> > +
> > +       switch (cmd) {
> > +       case CPU_PM_ENTER:
> > +               armpmu->stop(armpmu);
> > +               cpu_pm_pmu_setup(armpmu, cmd);
> > +               break;
> > +       case CPU_PM_EXIT:
> > +               cpu_pm_pmu_setup(armpmu, cmd);
> > +       case CPU_PM_ENTER_FAILED:
> > +               armpmu->start(armpmu);
> > +               break;
> > +       default:
> > +               return NOTIFY_DONE;
> > +       }
> > +
> > +       return NOTIFY_OK;
> > +}
> > +
> > +static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
> > +{
> > +       cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify;
> > +       return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb);
> > +}
> > +
> > +static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
> > +{
> > +       cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
> > +}
> > +#else
> > +static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
> > +static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
> > +#endif
> > +
> >  static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
> >  {
> >         int err;
> > @@ -725,6 +813,10 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
> >         if (err)
> >                 goto out_hw_events;
> >
> > +       err = cpu_pm_pmu_register(cpu_pmu);
> > +       if (err)
> > +               goto out_unregister;
> > +
> >         for_each_possible_cpu(cpu) {
> >                 struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
> >                 raw_spin_lock_init(&events->pmu_lock);
> > @@ -746,6 +838,8 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
> >
> >         return 0;
> >
> > +out_unregister:
> > +       unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
> >  out_hw_events:
> >         free_percpu(cpu_hw_events);
> >         return err;
> > @@ -753,6 +847,7 @@ out_hw_events:
> >
> >  static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
> >  {
> > +       cpu_pm_pmu_unregister(cpu_pmu);
> >         unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
> >         free_percpu(cpu_pmu->hw_events);
> >  }
> > diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
> > index 83b5e34..9ffa316 100644
> > --- a/include/linux/perf/arm_pmu.h
> > +++ b/include/linux/perf/arm_pmu.h
> > @@ -107,6 +107,7 @@ struct arm_pmu {
> >         struct platform_device  *plat_device;
> >         struct pmu_hw_events    __percpu *hw_events;
> >         struct notifier_block   hotplug_nb;
> > +       struct notifier_block   cpu_pm_nb;
> >  };
> >
> >  #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
> > --
> > 2.5.1
> >
>
Ashwin Chaugule Feb. 24, 2016, 7:53 p.m. UTC | #3
Hi Lorenzo,

On 24 February 2016 at 12:35, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
> On Wed, Feb 24, 2016 at 09:20:22AM -0700, Mathieu Poirier wrote:
>> On 23 February 2016 at 11:22, Lorenzo Pieralisi
>
> [...]
>
>> > +static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
>> > +                            void *v)
>> > +{
>> > +       struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
>> > +       struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
>> > +       int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
>> > +
>> > +       if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
>> > +               return NOTIFY_DONE;
>> > +
>> > +       /*
>> > +        * Always reset the PMU registers on power-up even if
>> > +        * there are no events running.
>> > +        */
>> > +       if (cmd == CPU_PM_EXIT && armpmu->reset)
>> > +               armpmu->reset(armpmu);
>>
>> I think this patch does the right thing but I can't get the above
>> reset.  Wouldn't it be better to do the reset as part of the
>> CPU_PM_EXIT case below?  At this point nothing tells us the CPU won't
>> go back down before the event is enabled, wasting the cycle needed to
>> reset the PMU.
>
> The logic goes, if the cpu is woken up and it has no events enabled,
> if we do not reset it (mind, ->reset here sets the PMU register values
> to a sane default, some of them are architecturally UNKNOWN on reset, it
> does NOT reset the PMU) _and_ we subsequently install an event on it we
> do have a problem, that's why whenever a core gets out of low-power we
> have to reset its pmu.

Dont we blow out the previous value in the counter when installing an
event? Or has that changed lately? IIRC, there was some initial value
we'd program into the counter to avoid missing the overflow event.
(sorry its been too long) ;)

Cheers,
Ashwin.
Lorenzo Pieralisi Feb. 24, 2016, 10:31 p.m. UTC | #4
On Wed, Feb 24, 2016 at 02:53:02PM -0500, Ashwin Chaugule wrote:
> Hi Lorenzo,
> 
> On 24 February 2016 at 12:35, Lorenzo Pieralisi
> <lorenzo.pieralisi@arm.com> wrote:
> > On Wed, Feb 24, 2016 at 09:20:22AM -0700, Mathieu Poirier wrote:
> >> On 23 February 2016 at 11:22, Lorenzo Pieralisi
> >
> > [...]
> >
> >> > +static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
> >> > +                            void *v)
> >> > +{
> >> > +       struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
> >> > +       struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
> >> > +       int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
> >> > +
> >> > +       if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
> >> > +               return NOTIFY_DONE;
> >> > +
> >> > +       /*
> >> > +        * Always reset the PMU registers on power-up even if
> >> > +        * there are no events running.
> >> > +        */
> >> > +       if (cmd == CPU_PM_EXIT && armpmu->reset)
> >> > +               armpmu->reset(armpmu);
> >>
> >> I think this patch does the right thing but I can't get the above
> >> reset.  Wouldn't it be better to do the reset as part of the
> >> CPU_PM_EXIT case below?  At this point nothing tells us the CPU won't
> >> go back down before the event is enabled, wasting the cycle needed to
> >> reset the PMU.
> >
> > The logic goes, if the cpu is woken up and it has no events enabled,
> > if we do not reset it (mind, ->reset here sets the PMU register values
> > to a sane default, some of them are architecturally UNKNOWN on reset, it
> > does NOT reset the PMU) _and_ we subsequently install an event on it we
> > do have a problem, that's why whenever a core gets out of low-power we
> > have to reset its pmu.
> 
> Dont we blow out the previous value in the counter when installing an
> event? Or has that changed lately? IIRC, there was some initial value
> we'd program into the counter to avoid missing the overflow event.
> (sorry its been too long) ;)

If you mean there is no need of resetting the value since we are
overwriting it anyway you should see ->reset from a PMU unit POW
not just an event. If you look at the ->reset method for eg v8, you
will see that the reset method operates on all counters and the PMU
unit as a whole, that's the only sane way of setting up the PMU unit
before enabling single counters (some registers are UNKNOWN at reset).

To make my reply to Mathieu clearer, the ->reset method contains
operations (eg writing PMCR counters reset bits) that do carry out
counters reset, what I wanted to say is that the ->reset method
does not by itself drive the PMU HW reset signal, that's what the
power controller does when it resets the CPU on power up.

The PMU ->reset method must be called on a cpu on power-up, to make
sure PMU HW is set-up to sane default values and ready to be used (ie
install counters), for instance on v8 all counters must be disabled
(irq inclusive) and reset, that's what armv8pmu_reset() does.

I hope this makes things clearer.

Thanks,
Lorenzo
Kevin Hilman Feb. 25, 2016, 1:10 a.m. UTC | #5
Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> writes:

> When a CPU is suspended (either through suspend-to-RAM or CPUidle),
> its PMU registers content can be lost, which means that counters
> registers values that were initialized on power down entry have to be
> reprogrammed on power-up to make sure the counters set-up is preserved

This assumes that the PMUs are always sharing a power rail with a
specific CPU, not the cluster.  Is that a reliable assumption?

Kevin
Lorenzo Pieralisi Feb. 25, 2016, 9:44 a.m. UTC | #6
On Wed, Feb 24, 2016 at 05:10:20PM -0800, Kevin Hilman wrote:
> Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> writes:
> 
> > When a CPU is suspended (either through suspend-to-RAM or CPUidle),
> > its PMU registers content can be lost, which means that counters
> > registers values that were initialized on power down entry have to be
> > reprogrammed on power-up to make sure the counters set-up is preserved
> 
> This assumes that the PMUs are always sharing a power rail with a
> specific CPU, not the cluster.  Is that a reliable assumption?

As reliable as assuming the GIC HW state needs save/restore on idle-state
entry, if we have no power domains information linked to CPU PM
notifiers saving/restoring everything every time an idle state deeper
than wfi is entered is the best we can do.

As far as this patch is concerned, if the PMU is on a different power
domain than the CPU, I agree that stopping/resetting/restoring the
PMU registers is a waste of cycles (I will test it also by triggering
the notifiers on wfi, to make sure the reset is not disruptive on
a cpu that is not powered off), I see no alternative other than disabling
idle to carry out profiling sanely (or implement CPU PM notifiers with
runtime PM).

Thanks,
Lorenzo
Mathieu Poirier Feb. 25, 2016, 4:32 p.m. UTC | #7
On 25 February 2016 at 02:44, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
> On Wed, Feb 24, 2016 at 05:10:20PM -0800, Kevin Hilman wrote:
>> Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> writes:
>>
>> > When a CPU is suspended (either through suspend-to-RAM or CPUidle),
>> > its PMU registers content can be lost, which means that counters
>> > registers values that were initialized on power down entry have to be
>> > reprogrammed on power-up to make sure the counters set-up is preserved
>>
>> This assumes that the PMUs are always sharing a power rail with a
>> specific CPU, not the cluster.  Is that a reliable assumption?
>
> As reliable as assuming the GIC HW state needs save/restore on idle-state
> entry, if we have no power domains information linked to CPU PM
> notifiers saving/restoring everything every time an idle state deeper
> than wfi is entered is the best we can do.
>
> As far as this patch is concerned, if the PMU is on a different power
> domain than the CPU, I agree that stopping/resetting/restoring the
> PMU registers is a waste of cycles (I will test it also by triggering
> the notifiers on wfi, to make sure the reset is not disruptive on
> a cpu that is not powered off), I see no alternative other than disabling
> idle to carry out profiling sanely (or implement CPU PM notifiers with
> runtime PM).

I totally understand - I'm faced with the same problem on Coresight.
To me the ultimate solution is still to integrate CPU power domain
management with runtime PM (like we talked about many times before).
I know Lina is working on something that will get us close to that but
it's not finished yet.

>
> Thanks,
> Lorenzo
Mathieu Poirier Feb. 25, 2016, 4:37 p.m. UTC | #8
On 24 February 2016 at 10:35, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
> On Wed, Feb 24, 2016 at 09:20:22AM -0700, Mathieu Poirier wrote:
>> On 23 February 2016 at 11:22, Lorenzo Pieralisi
>
> [...]
>
>> > +static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
>> > +                            void *v)
>> > +{
>> > +       struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
>> > +       struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
>> > +       int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
>> > +
>> > +       if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
>> > +               return NOTIFY_DONE;
>> > +
>> > +       /*
>> > +        * Always reset the PMU registers on power-up even if
>> > +        * there are no events running.
>> > +        */
>> > +       if (cmd == CPU_PM_EXIT && armpmu->reset)
>> > +               armpmu->reset(armpmu);
>>
>> I think this patch does the right thing but I can't get the above
>> reset.  Wouldn't it be better to do the reset as part of the
>> CPU_PM_EXIT case below?  At this point nothing tells us the CPU won't
>> go back down before the event is enabled, wasting the cycle needed to
>> reset the PMU.
>
> The logic goes, if the cpu is woken up and it has no events enabled,
> if we do not reset it (mind, ->reset here sets the PMU register values
> to a sane default, some of them are architecturally UNKNOWN on reset, it
> does NOT reset the PMU) _and_ we subsequently install an event on it we
> do have a problem, that's why whenever a core gets out of low-power we
> have to reset its pmu.
>
> Does it make sense ?

Ok, I understand the context and your solution will work.

Isn't there a flag somewhere in the PMU registers that indicate the
status has been lost due a loss of power?  If so I think the best
solution is to probe that flag when an event gets installed.  If the
unit has gone through a power down the reset can be done before
installing the event.

Mathieu

>
> Thanks,
> Lorenzo
>
>>
>> Thanks,
>> Mathieu
>>
>> > +
>> > +       if (!enabled)
>> > +               return NOTIFY_OK;
>> > +
>> > +       switch (cmd) {
>> > +       case CPU_PM_ENTER:
>> > +               armpmu->stop(armpmu);
>> > +               cpu_pm_pmu_setup(armpmu, cmd);
>> > +               break;
>> > +       case CPU_PM_EXIT:
>> > +               cpu_pm_pmu_setup(armpmu, cmd);
>> > +       case CPU_PM_ENTER_FAILED:
>> > +               armpmu->start(armpmu);
>> > +               break;
>> > +       default:
>> > +               return NOTIFY_DONE;
>> > +       }
>> > +
>> > +       return NOTIFY_OK;
>> > +}
>> > +
>> > +static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
>> > +{
>> > +       cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify;
>> > +       return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb);
>> > +}
>> > +
>> > +static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
>> > +{
>> > +       cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
>> > +}
>> > +#else
>> > +static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
>> > +static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
>> > +#endif
>> > +
>> >  static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
>> >  {
>> >         int err;
>> > @@ -725,6 +813,10 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
>> >         if (err)
>> >                 goto out_hw_events;
>> >
>> > +       err = cpu_pm_pmu_register(cpu_pmu);
>> > +       if (err)
>> > +               goto out_unregister;
>> > +
>> >         for_each_possible_cpu(cpu) {
>> >                 struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
>> >                 raw_spin_lock_init(&events->pmu_lock);
>> > @@ -746,6 +838,8 @@ static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
>> >
>> >         return 0;
>> >
>> > +out_unregister:
>> > +       unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
>> >  out_hw_events:
>> >         free_percpu(cpu_hw_events);
>> >         return err;
>> > @@ -753,6 +847,7 @@ out_hw_events:
>> >
>> >  static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
>> >  {
>> > +       cpu_pm_pmu_unregister(cpu_pmu);
>> >         unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
>> >         free_percpu(cpu_pmu->hw_events);
>> >  }
>> > diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
>> > index 83b5e34..9ffa316 100644
>> > --- a/include/linux/perf/arm_pmu.h
>> > +++ b/include/linux/perf/arm_pmu.h
>> > @@ -107,6 +107,7 @@ struct arm_pmu {
>> >         struct platform_device  *plat_device;
>> >         struct pmu_hw_events    __percpu *hw_events;
>> >         struct notifier_block   hotplug_nb;
>> > +       struct notifier_block   cpu_pm_nb;
>> >  };
>> >
>> >  #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
>> > --
>> > 2.5.1
>> >
>>
Will Deacon Feb. 25, 2016, 4:43 p.m. UTC | #9
On Thu, Feb 25, 2016 at 09:32:17AM -0700, Mathieu Poirier wrote:
> On 25 February 2016 at 02:44, Lorenzo Pieralisi
> <lorenzo.pieralisi@arm.com> wrote:
> > On Wed, Feb 24, 2016 at 05:10:20PM -0800, Kevin Hilman wrote:
> >> Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> writes:
> >>
> >> > When a CPU is suspended (either through suspend-to-RAM or CPUidle),
> >> > its PMU registers content can be lost, which means that counters
> >> > registers values that were initialized on power down entry have to be
> >> > reprogrammed on power-up to make sure the counters set-up is preserved
> >>
> >> This assumes that the PMUs are always sharing a power rail with a
> >> specific CPU, not the cluster.  Is that a reliable assumption?
> >
> > As reliable as assuming the GIC HW state needs save/restore on idle-state
> > entry, if we have no power domains information linked to CPU PM
> > notifiers saving/restoring everything every time an idle state deeper
> > than wfi is entered is the best we can do.
> >
> > As far as this patch is concerned, if the PMU is on a different power
> > domain than the CPU, I agree that stopping/resetting/restoring the
> > PMU registers is a waste of cycles (I will test it also by triggering
> > the notifiers on wfi, to make sure the reset is not disruptive on
> > a cpu that is not powered off), I see no alternative other than disabling
> > idle to carry out profiling sanely (or implement CPU PM notifiers with
> > runtime PM).
> 
> I totally understand - I'm faced with the same problem on Coresight.
> To me the ultimate solution is still to integrate CPU power domain
> management with runtime PM (like we talked about many times before).
> I know Lina is working on something that will get us close to that but
> it's not finished yet.

I've been waiting for that code for *years* now...

Ultimately, Juno-r2 loses its PMU state over idle if you run mainline
on it and, worse still, you end up getting junk numbers back to userspace,
so it's not even obvious what happened.

I'm not aware of any flags that indicate loss of state.

Will
Lorenzo Pieralisi Feb. 25, 2016, 6:46 p.m. UTC | #10
On Thu, Feb 25, 2016 at 04:43:54PM +0000, Will Deacon wrote:
> On Thu, Feb 25, 2016 at 09:32:17AM -0700, Mathieu Poirier wrote:
> > On 25 February 2016 at 02:44, Lorenzo Pieralisi
> > <lorenzo.pieralisi@arm.com> wrote:
> > > On Wed, Feb 24, 2016 at 05:10:20PM -0800, Kevin Hilman wrote:
> > >> Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> writes:
> > >>
> > >> > When a CPU is suspended (either through suspend-to-RAM or CPUidle),
> > >> > its PMU registers content can be lost, which means that counters
> > >> > registers values that were initialized on power down entry have to be
> > >> > reprogrammed on power-up to make sure the counters set-up is preserved
> > >>
> > >> This assumes that the PMUs are always sharing a power rail with a
> > >> specific CPU, not the cluster.  Is that a reliable assumption?
> > >
> > > As reliable as assuming the GIC HW state needs save/restore on idle-state
> > > entry, if we have no power domains information linked to CPU PM
> > > notifiers saving/restoring everything every time an idle state deeper
> > > than wfi is entered is the best we can do.
> > >
> > > As far as this patch is concerned, if the PMU is on a different power
> > > domain than the CPU, I agree that stopping/resetting/restoring the
> > > PMU registers is a waste of cycles (I will test it also by triggering
> > > the notifiers on wfi, to make sure the reset is not disruptive on
> > > a cpu that is not powered off), I see no alternative other than disabling
> > > idle to carry out profiling sanely (or implement CPU PM notifiers with
> > > runtime PM).
> > 
> > I totally understand - I'm faced with the same problem on Coresight.
> > To me the ultimate solution is still to integrate CPU power domain
> > management with runtime PM (like we talked about many times before).
> > I know Lina is working on something that will get us close to that but
> > it's not finished yet.
> 
> I've been waiting for that code for *years* now...
> 
> Ultimately, Juno-r2 loses its PMU state over idle if you run mainline
> on it and, worse still, you end up getting junk numbers back to userspace,
> so it's not even obvious what happened.

I hacked the idle driver to trigger notifiers on wfi (so PMU state is
kept intact on idle exit - to mimic idle states that do not destroy PMU
context) and still get some reasonable perf stats, this needs more testing
but it means that save/restore still work on idle states that do not destroy
PMU context (ok, we add a profile black out period between CPU PM entry and
exit, that can't be avoided).

> I'm not aware of any flags that indicate loss of state.

kvm notifier had to add a check (in hyp_init_cpu_pm_notifier()), since
restoring KVM hooks if the cpu was not reset triggers kernel errors,
point is, we need to have a hook (this is generic arm perf core, it has
to work on all arm pmus) that allows us detecting if a reset happened,
question is if we should bother (and if it is doable), given the test
above, I am happy to implement it.

I definitely agree it is time to fix this, it has been hanging in
the balance for too long.

Lorenzo
Kevin Hilman Feb. 25, 2016, 9:55 p.m. UTC | #11
Will Deacon <will.deacon@arm.com> writes:

> On Thu, Feb 25, 2016 at 09:32:17AM -0700, Mathieu Poirier wrote:
>> On 25 February 2016 at 02:44, Lorenzo Pieralisi
>> <lorenzo.pieralisi@arm.com> wrote:
>> > On Wed, Feb 24, 2016 at 05:10:20PM -0800, Kevin Hilman wrote:
>> >> Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> writes:
>> >>
>> >> > When a CPU is suspended (either through suspend-to-RAM or CPUidle),
>> >> > its PMU registers content can be lost, which means that counters
>> >> > registers values that were initialized on power down entry have to be
>> >> > reprogrammed on power-up to make sure the counters set-up is preserved
>> >>
>> >> This assumes that the PMUs are always sharing a power rail with a
>> >> specific CPU, not the cluster.  Is that a reliable assumption?
>> >
>> > As reliable as assuming the GIC HW state needs save/restore on idle-state
>> > entry, if we have no power domains information linked to CPU PM
>> > notifiers saving/restoring everything every time an idle state deeper
>> > than wfi is entered is the best we can do.
>> >
>> > As far as this patch is concerned, if the PMU is on a different power
>> > domain than the CPU, I agree that stopping/resetting/restoring the
>> > PMU registers is a waste of cycles (I will test it also by triggering
>> > the notifiers on wfi, to make sure the reset is not disruptive on
>> > a cpu that is not powered off), I see no alternative other than disabling
>> > idle to carry out profiling sanely (or implement CPU PM notifiers with
>> > runtime PM).
>> 
>> I totally understand - I'm faced with the same problem on Coresight.
>> To me the ultimate solution is still to integrate CPU power domain
>> management with runtime PM (like we talked about many times before).
>> I know Lina is working on something that will get us close to that but
>> it's not finished yet.
>
> I've been waiting for that code for *years* now...

It's still coming... ;)

But, I didn't mean to suggest this patch should wait for a PM domain
based solution, I just wanted to be clear about the assumptions.  I have
no objections to this patch as it fixes a long-standing issue.

Acked-by: Kevin Hilman <khilman@baylibre.com>

Kevin
Ashwin Chaugule Feb. 26, 2016, 12:22 a.m. UTC | #12
On 24 February 2016 at 17:31, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
> On Wed, Feb 24, 2016 at 02:53:02PM -0500, Ashwin Chaugule wrote:
>> Hi Lorenzo,
>>
>> On 24 February 2016 at 12:35, Lorenzo Pieralisi
>> <lorenzo.pieralisi@arm.com> wrote:
>> > On Wed, Feb 24, 2016 at 09:20:22AM -0700, Mathieu Poirier wrote:
>> >> On 23 February 2016 at 11:22, Lorenzo Pieralisi
>> >
>> > [...]
>> >
>> >> > +static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
>> >> > +                            void *v)
>> >> > +{
>> >> > +       struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
>> >> > +       struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
>> >> > +       int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
>> >> > +
>> >> > +       if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
>> >> > +               return NOTIFY_DONE;
>> >> > +
>> >> > +       /*
>> >> > +        * Always reset the PMU registers on power-up even if
>> >> > +        * there are no events running.
>> >> > +        */
>> >> > +       if (cmd == CPU_PM_EXIT && armpmu->reset)
>> >> > +               armpmu->reset(armpmu);
>> >>
>> >> I think this patch does the right thing but I can't get the above
>> >> reset.  Wouldn't it be better to do the reset as part of the
>> >> CPU_PM_EXIT case below?  At this point nothing tells us the CPU won't
>> >> go back down before the event is enabled, wasting the cycle needed to
>> >> reset the PMU.
>> >
>> > The logic goes, if the cpu is woken up and it has no events enabled,
>> > if we do not reset it (mind, ->reset here sets the PMU register values
>> > to a sane default, some of them are architecturally UNKNOWN on reset, it
>> > does NOT reset the PMU) _and_ we subsequently install an event on it we
>> > do have a problem, that's why whenever a core gets out of low-power we
>> > have to reset its pmu.
>>
>> Dont we blow out the previous value in the counter when installing an
>> event? Or has that changed lately? IIRC, there was some initial value
>> we'd program into the counter to avoid missing the overflow event.
>> (sorry its been too long) ;)
>
> If you mean there is no need of resetting the value since we are
> overwriting it anyway you should see ->reset from a PMU unit POW
> not just an event. If you look at the ->reset method for eg v8, you
> will see that the reset method operates on all counters and the PMU
> unit as a whole, that's the only sane way of setting up the PMU unit
> before enabling single counters (some registers are UNKNOWN at reset).
>
> To make my reply to Mathieu clearer, the ->reset method contains
> operations (eg writing PMCR counters reset bits) that do carry out
> counters reset, what I wanted to say is that the ->reset method
> does not by itself drive the PMU HW reset signal, that's what the
> power controller does when it resets the CPU on power up.
>
> The PMU ->reset method must be called on a cpu on power-up, to make
> sure PMU HW is set-up to sane default values and ready to be used (ie
> install counters), for instance on v8 all counters must be disabled
> (irq inclusive) and reset, that's what armv8pmu_reset() does.
>
> I hope this makes things clearer.
>

Indeed. Thanks for the explanation. The patch looks good to me.

Acked-by: Ashwin Chaugule <ashwin.chaugule@linaro.org>


> Thanks,
> Lorenzo
diff mbox

Patch

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 166637f..12317a9 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -13,6 +13,7 @@ 
 
 #include <linux/bitmap.h>
 #include <linux/cpumask.h>
+#include <linux/cpu_pm.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/of_device.h>
@@ -710,6 +711,93 @@  static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
 	return NOTIFY_OK;
 }
 
+#ifdef CONFIG_CPU_PM
+static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, unsigned long cmd)
+{
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	struct perf_event *event;
+	int idx;
+
+	for (idx = 0; idx < armpmu->num_events; idx++) {
+		/*
+		 * If the counter is not used skip it, there is no
+		 * need of stopping/restarting it.
+		 */
+		if (!test_bit(idx, hw_events->used_mask))
+			continue;
+
+		event = hw_events->events[idx];
+
+		switch (cmd) {
+		case CPU_PM_ENTER:
+			/*
+			 * Stop and update the counter
+			 */
+			armpmu_stop(event, PERF_EF_UPDATE);
+			break;
+		case CPU_PM_EXIT:
+		case CPU_PM_ENTER_FAILED:
+			 /* Restore and enable the counter */
+			armpmu_start(event, PERF_EF_RELOAD);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static int cpu_pm_pmu_notify(struct notifier_block *b, unsigned long cmd,
+			     void *v)
+{
+	struct arm_pmu *armpmu = container_of(b, struct arm_pmu, cpu_pm_nb);
+	struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
+	int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
+
+	if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus))
+		return NOTIFY_DONE;
+
+	/*
+	 * Always reset the PMU registers on power-up even if
+	 * there are no events running.
+	 */
+	if (cmd == CPU_PM_EXIT && armpmu->reset)
+		armpmu->reset(armpmu);
+
+	if (!enabled)
+		return NOTIFY_OK;
+
+	switch (cmd) {
+	case CPU_PM_ENTER:
+		armpmu->stop(armpmu);
+		cpu_pm_pmu_setup(armpmu, cmd);
+		break;
+	case CPU_PM_EXIT:
+		cpu_pm_pmu_setup(armpmu, cmd);
+	case CPU_PM_ENTER_FAILED:
+		armpmu->start(armpmu);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu)
+{
+	cpu_pmu->cpu_pm_nb.notifier_call = cpu_pm_pmu_notify;
+	return cpu_pm_register_notifier(&cpu_pmu->cpu_pm_nb);
+}
+
+static void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu)
+{
+	cpu_pm_unregister_notifier(&cpu_pmu->cpu_pm_nb);
+}
+#else
+static inline int cpu_pm_pmu_register(struct arm_pmu *cpu_pmu) { return 0; }
+static inline void cpu_pm_pmu_unregister(struct arm_pmu *cpu_pmu) { }
+#endif
+
 static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 {
 	int err;
@@ -725,6 +813,10 @@  static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 	if (err)
 		goto out_hw_events;
 
+	err = cpu_pm_pmu_register(cpu_pmu);
+	if (err)
+		goto out_unregister;
+
 	for_each_possible_cpu(cpu) {
 		struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
 		raw_spin_lock_init(&events->pmu_lock);
@@ -746,6 +838,8 @@  static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
 
 	return 0;
 
+out_unregister:
+	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
 out_hw_events:
 	free_percpu(cpu_hw_events);
 	return err;
@@ -753,6 +847,7 @@  out_hw_events:
 
 static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 {
+	cpu_pm_pmu_unregister(cpu_pmu);
 	unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
 	free_percpu(cpu_pmu->hw_events);
 }
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 83b5e34..9ffa316 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -107,6 +107,7 @@  struct arm_pmu {
 	struct platform_device	*plat_device;
 	struct pmu_hw_events	__percpu *hw_events;
 	struct notifier_block	hotplug_nb;
+	struct notifier_block	cpu_pm_nb;
 };
 
 #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))