diff mbox series

cpufreq: Record stats when fast switching is enabled

Message ID 20190131015139.126890-1-mka@chromium.org (mailing list archive)
State Changes Requested, archived
Headers show
Series cpufreq: Record stats when fast switching is enabled | expand

Commit Message

Matthias Kaehlcke Jan. 31, 2019, 1:51 a.m. UTC
When fast switching is enabled currently no cpufreq stats are
recorded and the corresponding sysfs attributes appear empty (see
also commit 1aefc75b2449 ("cpufreq: stats: Make the stats code
non-modular")).

Record the stats after a successful fast switch and re-enable access
through sysfs when fast switching is enabled. Since
cpufreq_stats_update() can now be called in interrupt context (during
a fast switch) disable local IRQs while holding the stats spinlock.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
---
The change is so simple that I wonder if I'm missing some important
reason why the stats can't/shouldn't be updated during/after a fast
switch ...

I would expect that holding the stats spinlock briefly in
cpufreq_stats_update() shouldn't be a problem. In theory it would
also be an option to have a per stats lock, though it seems overkill
from my (possibly ignorant) point of view.
---
 drivers/cpufreq/cpufreq.c       |  8 +++++++-
 drivers/cpufreq/cpufreq_stats.c | 11 +++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

Comments

Viresh Kumar Jan. 31, 2019, 8:30 a.m. UTC | #1
On 30-01-19, 17:51, Matthias Kaehlcke wrote:
> When fast switching is enabled currently no cpufreq stats are
> recorded and the corresponding sysfs attributes appear empty (see
> also commit 1aefc75b2449 ("cpufreq: stats: Make the stats code
> non-modular")).
> 
> Record the stats after a successful fast switch and re-enable access
> through sysfs when fast switching is enabled. Since
> cpufreq_stats_update() can now be called in interrupt context (during
> a fast switch) disable local IRQs while holding the stats spinlock.
> 
> Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> ---
> The change is so simple that I wonder if I'm missing some important
> reason why the stats can't/shouldn't be updated during/after a fast
> switch ...
> 
> I would expect that holding the stats spinlock briefly in
> cpufreq_stats_update() shouldn't be a problem. In theory it would
> also be an option to have a per stats lock, though it seems overkill
> from my (possibly ignorant) point of view.
> ---
>  drivers/cpufreq/cpufreq.c       |  8 +++++++-
>  drivers/cpufreq/cpufreq_stats.c | 11 +++--------
>  2 files changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index e35a886e00bcf..63aadb0bbddfe 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1857,9 +1857,15 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
>  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
>  					unsigned int target_freq)
>  {
> +	unsigned int freq;
> +
>  	target_freq = clamp_val(target_freq, policy->min, policy->max);
>  
> -	return cpufreq_driver->fast_switch(policy, target_freq);
> +	freq = cpufreq_driver->fast_switch(policy, target_freq);
> +	if (freq)
> +		cpufreq_stats_record_transition(policy, freq);
> +
> +	return freq;
>  }
>  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
>  
> diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> index 1572129844a5b..21b919bfaeccf 100644
> --- a/drivers/cpufreq/cpufreq_stats.c
> +++ b/drivers/cpufreq/cpufreq_stats.c
> @@ -30,11 +30,12 @@ struct cpufreq_stats {
>  static void cpufreq_stats_update(struct cpufreq_stats *stats)
>  {
>  	unsigned long long cur_time = get_jiffies_64();
> +	unsigned long flags;
>  
> -	spin_lock(&cpufreq_stats_lock);
> +	spin_lock_irqsave(&cpufreq_stats_lock, flags);
>  	stats->time_in_state[stats->last_index] += cur_time - stats->last_time;
>  	stats->last_time = cur_time;
> -	spin_unlock(&cpufreq_stats_lock);
> +	spin_unlock_irqrestore(&cpufreq_stats_lock, flags);
>  }

The only problem that I can think of (or recall) is that this routine
also gets called when time_in_state sysfs file is read and that can
end up taking lock which the scheduler's hotpath will wait for.
Rafael J. Wysocki Jan. 31, 2019, 10:02 a.m. UTC | #2
On Thu, Jan 31, 2019 at 2:51 AM Matthias Kaehlcke <mka@chromium.org> wrote:
>
> When fast switching is enabled currently no cpufreq stats are
> recorded and the corresponding sysfs attributes appear empty (see
> also commit 1aefc75b2449 ("cpufreq: stats: Make the stats code
> non-modular")).
>
> Record the stats after a successful fast switch and re-enable access
> through sysfs when fast switching is enabled. Since
> cpufreq_stats_update() can now be called in interrupt context (during
> a fast switch) disable local IRQs while holding the stats spinlock.
>
> Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> ---
> The change is so simple that I wonder if I'm missing some important
> reason why the stats can't/shouldn't be updated during/after a fast
> switch ...
>
> I would expect that holding the stats spinlock briefly in
> cpufreq_stats_update() shouldn't be a problem. In theory it would
> also be an option to have a per stats lock, though it seems overkill
> from my (possibly ignorant) point of view.
> ---
>  drivers/cpufreq/cpufreq.c       |  8 +++++++-
>  drivers/cpufreq/cpufreq_stats.c | 11 +++--------
>  2 files changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index e35a886e00bcf..63aadb0bbddfe 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1857,9 +1857,15 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
>  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
>                                         unsigned int target_freq)
>  {
> +       unsigned int freq;
> +
>         target_freq = clamp_val(target_freq, policy->min, policy->max);
>
> -       return cpufreq_driver->fast_switch(policy, target_freq);
> +       freq = cpufreq_driver->fast_switch(policy, target_freq);
> +       if (freq)
> +               cpufreq_stats_record_transition(policy, freq);

No extra locking in the fast switch path, pretty please.

This runs in the scheduler context and it adds enough overhead in there already.

> +
> +       return freq;
>  }
>  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
Rafael J. Wysocki Jan. 31, 2019, 10:03 a.m. UTC | #3
On Thu, Jan 31, 2019 at 9:30 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 30-01-19, 17:51, Matthias Kaehlcke wrote:
> > When fast switching is enabled currently no cpufreq stats are
> > recorded and the corresponding sysfs attributes appear empty (see
> > also commit 1aefc75b2449 ("cpufreq: stats: Make the stats code
> > non-modular")).
> >
> > Record the stats after a successful fast switch and re-enable access
> > through sysfs when fast switching is enabled. Since
> > cpufreq_stats_update() can now be called in interrupt context (during
> > a fast switch) disable local IRQs while holding the stats spinlock.
> >
> > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > ---
> > The change is so simple that I wonder if I'm missing some important
> > reason why the stats can't/shouldn't be updated during/after a fast
> > switch ...
> >
> > I would expect that holding the stats spinlock briefly in
> > cpufreq_stats_update() shouldn't be a problem. In theory it would
> > also be an option to have a per stats lock, though it seems overkill
> > from my (possibly ignorant) point of view.
> > ---
> >  drivers/cpufreq/cpufreq.c       |  8 +++++++-
> >  drivers/cpufreq/cpufreq_stats.c | 11 +++--------
> >  2 files changed, 10 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > index e35a886e00bcf..63aadb0bbddfe 100644
> > --- a/drivers/cpufreq/cpufreq.c
> > +++ b/drivers/cpufreq/cpufreq.c
> > @@ -1857,9 +1857,15 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
> >  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
> >                                       unsigned int target_freq)
> >  {
> > +     unsigned int freq;
> > +
> >       target_freq = clamp_val(target_freq, policy->min, policy->max);
> >
> > -     return cpufreq_driver->fast_switch(policy, target_freq);
> > +     freq = cpufreq_driver->fast_switch(policy, target_freq);
> > +     if (freq)
> > +             cpufreq_stats_record_transition(policy, freq);
> > +
> > +     return freq;
> >  }
> >  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
> >
> > diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> > index 1572129844a5b..21b919bfaeccf 100644
> > --- a/drivers/cpufreq/cpufreq_stats.c
> > +++ b/drivers/cpufreq/cpufreq_stats.c
> > @@ -30,11 +30,12 @@ struct cpufreq_stats {
> >  static void cpufreq_stats_update(struct cpufreq_stats *stats)
> >  {
> >       unsigned long long cur_time = get_jiffies_64();
> > +     unsigned long flags;
> >
> > -     spin_lock(&cpufreq_stats_lock);
> > +     spin_lock_irqsave(&cpufreq_stats_lock, flags);
> >       stats->time_in_state[stats->last_index] += cur_time - stats->last_time;
> >       stats->last_time = cur_time;
> > -     spin_unlock(&cpufreq_stats_lock);
> > +     spin_unlock_irqrestore(&cpufreq_stats_lock, flags);
> >  }
>
> The only problem that I can think of (or recall) is that this routine
> also gets called when time_in_state sysfs file is read and that can
> end up taking lock which the scheduler's hotpath will wait for.

What about the extra locking overhead in the scheduler context?
Viresh Kumar Jan. 31, 2019, 10:07 a.m. UTC | #4
On 31-01-19, 11:03, Rafael J. Wysocki wrote:
> On Thu, Jan 31, 2019 at 9:30 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >
> > On 30-01-19, 17:51, Matthias Kaehlcke wrote:
> > > When fast switching is enabled currently no cpufreq stats are
> > > recorded and the corresponding sysfs attributes appear empty (see
> > > also commit 1aefc75b2449 ("cpufreq: stats: Make the stats code
> > > non-modular")).
> > >
> > > Record the stats after a successful fast switch and re-enable access
> > > through sysfs when fast switching is enabled. Since
> > > cpufreq_stats_update() can now be called in interrupt context (during
> > > a fast switch) disable local IRQs while holding the stats spinlock.
> > >
> > > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > > ---
> > > The change is so simple that I wonder if I'm missing some important
> > > reason why the stats can't/shouldn't be updated during/after a fast
> > > switch ...
> > >
> > > I would expect that holding the stats spinlock briefly in
> > > cpufreq_stats_update() shouldn't be a problem. In theory it would
> > > also be an option to have a per stats lock, though it seems overkill
> > > from my (possibly ignorant) point of view.
> > > ---
> > >  drivers/cpufreq/cpufreq.c       |  8 +++++++-
> > >  drivers/cpufreq/cpufreq_stats.c | 11 +++--------
> > >  2 files changed, 10 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > > index e35a886e00bcf..63aadb0bbddfe 100644
> > > --- a/drivers/cpufreq/cpufreq.c
> > > +++ b/drivers/cpufreq/cpufreq.c
> > > @@ -1857,9 +1857,15 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
> > >  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
> > >                                       unsigned int target_freq)
> > >  {
> > > +     unsigned int freq;
> > > +
> > >       target_freq = clamp_val(target_freq, policy->min, policy->max);
> > >
> > > -     return cpufreq_driver->fast_switch(policy, target_freq);
> > > +     freq = cpufreq_driver->fast_switch(policy, target_freq);
> > > +     if (freq)
> > > +             cpufreq_stats_record_transition(policy, freq);
> > > +
> > > +     return freq;
> > >  }
> > >  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
> > >
> > > diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> > > index 1572129844a5b..21b919bfaeccf 100644
> > > --- a/drivers/cpufreq/cpufreq_stats.c
> > > +++ b/drivers/cpufreq/cpufreq_stats.c
> > > @@ -30,11 +30,12 @@ struct cpufreq_stats {
> > >  static void cpufreq_stats_update(struct cpufreq_stats *stats)
> > >  {
> > >       unsigned long long cur_time = get_jiffies_64();
> > > +     unsigned long flags;
> > >
> > > -     spin_lock(&cpufreq_stats_lock);
> > > +     spin_lock_irqsave(&cpufreq_stats_lock, flags);
> > >       stats->time_in_state[stats->last_index] += cur_time - stats->last_time;
> > >       stats->last_time = cur_time;
> > > -     spin_unlock(&cpufreq_stats_lock);
> > > +     spin_unlock_irqrestore(&cpufreq_stats_lock, flags);
> > >  }
> >
> > The only problem that I can think of (or recall) is that this routine
> > also gets called when time_in_state sysfs file is read and that can
> > end up taking lock which the scheduler's hotpath will wait for.
> 
> What about the extra locking overhead in the scheduler context?

What about using READ_ONCE/WRITE_ONCE here ? Not sure if we really
need locking in this particular case.
Rafael J. Wysocki Jan. 31, 2019, 10:14 a.m. UTC | #5
On Thu, Jan 31, 2019 at 11:07 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 31-01-19, 11:03, Rafael J. Wysocki wrote:
> > On Thu, Jan 31, 2019 at 9:30 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > >
> > > On 30-01-19, 17:51, Matthias Kaehlcke wrote:
> > > > When fast switching is enabled currently no cpufreq stats are
> > > > recorded and the corresponding sysfs attributes appear empty (see
> > > > also commit 1aefc75b2449 ("cpufreq: stats: Make the stats code
> > > > non-modular")).
> > > >
> > > > Record the stats after a successful fast switch and re-enable access
> > > > through sysfs when fast switching is enabled. Since
> > > > cpufreq_stats_update() can now be called in interrupt context (during
> > > > a fast switch) disable local IRQs while holding the stats spinlock.
> > > >
> > > > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > > > ---
> > > > The change is so simple that I wonder if I'm missing some important
> > > > reason why the stats can't/shouldn't be updated during/after a fast
> > > > switch ...
> > > >
> > > > I would expect that holding the stats spinlock briefly in
> > > > cpufreq_stats_update() shouldn't be a problem. In theory it would
> > > > also be an option to have a per stats lock, though it seems overkill
> > > > from my (possibly ignorant) point of view.
> > > > ---
> > > >  drivers/cpufreq/cpufreq.c       |  8 +++++++-
> > > >  drivers/cpufreq/cpufreq_stats.c | 11 +++--------
> > > >  2 files changed, 10 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > > > index e35a886e00bcf..63aadb0bbddfe 100644
> > > > --- a/drivers/cpufreq/cpufreq.c
> > > > +++ b/drivers/cpufreq/cpufreq.c
> > > > @@ -1857,9 +1857,15 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
> > > >  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
> > > >                                       unsigned int target_freq)
> > > >  {
> > > > +     unsigned int freq;
> > > > +
> > > >       target_freq = clamp_val(target_freq, policy->min, policy->max);
> > > >
> > > > -     return cpufreq_driver->fast_switch(policy, target_freq);
> > > > +     freq = cpufreq_driver->fast_switch(policy, target_freq);
> > > > +     if (freq)
> > > > +             cpufreq_stats_record_transition(policy, freq);
> > > > +
> > > > +     return freq;
> > > >  }
> > > >  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
> > > >
> > > > diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> > > > index 1572129844a5b..21b919bfaeccf 100644
> > > > --- a/drivers/cpufreq/cpufreq_stats.c
> > > > +++ b/drivers/cpufreq/cpufreq_stats.c
> > > > @@ -30,11 +30,12 @@ struct cpufreq_stats {
> > > >  static void cpufreq_stats_update(struct cpufreq_stats *stats)
> > > >  {
> > > >       unsigned long long cur_time = get_jiffies_64();
> > > > +     unsigned long flags;
> > > >
> > > > -     spin_lock(&cpufreq_stats_lock);
> > > > +     spin_lock_irqsave(&cpufreq_stats_lock, flags);
> > > >       stats->time_in_state[stats->last_index] += cur_time - stats->last_time;
> > > >       stats->last_time = cur_time;
> > > > -     spin_unlock(&cpufreq_stats_lock);
> > > > +     spin_unlock_irqrestore(&cpufreq_stats_lock, flags);
> > > >  }
> > >
> > > The only problem that I can think of (or recall) is that this routine
> > > also gets called when time_in_state sysfs file is read and that can
> > > end up taking lock which the scheduler's hotpath will wait for.
> >
> > What about the extra locking overhead in the scheduler context?
>
> What about using READ_ONCE/WRITE_ONCE here ? Not sure if we really
> need locking in this particular case.

If that works, then fine, but ISTR some synchronization issues related to that.
Matthias Kaehlcke Jan. 31, 2019, 6:37 p.m. UTC | #6
On Thu, Jan 31, 2019 at 11:14:03AM +0100, Rafael J. Wysocki wrote:
> On Thu, Jan 31, 2019 at 11:07 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >
> > On 31-01-19, 11:03, Rafael J. Wysocki wrote:
> > > On Thu, Jan 31, 2019 at 9:30 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > > >
> > > > On 30-01-19, 17:51, Matthias Kaehlcke wrote:
> > > > > When fast switching is enabled currently no cpufreq stats are
> > > > > recorded and the corresponding sysfs attributes appear empty (see
> > > > > also commit 1aefc75b2449 ("cpufreq: stats: Make the stats code
> > > > > non-modular")).
> > > > >
> > > > > Record the stats after a successful fast switch and re-enable access
> > > > > through sysfs when fast switching is enabled. Since
> > > > > cpufreq_stats_update() can now be called in interrupt context (during
> > > > > a fast switch) disable local IRQs while holding the stats spinlock.
> > > > >
> > > > > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > > > > ---
> > > > > The change is so simple that I wonder if I'm missing some important
> > > > > reason why the stats can't/shouldn't be updated during/after a fast
> > > > > switch ...
> > > > >
> > > > > I would expect that holding the stats spinlock briefly in
> > > > > cpufreq_stats_update() shouldn't be a problem. In theory it would
> > > > > also be an option to have a per stats lock, though it seems overkill
> > > > > from my (possibly ignorant) point of view.
> > > > > ---
> > > > >  drivers/cpufreq/cpufreq.c       |  8 +++++++-
> > > > >  drivers/cpufreq/cpufreq_stats.c | 11 +++--------
> > > > >  2 files changed, 10 insertions(+), 9 deletions(-)
> > > > >
> > > > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > > > > index e35a886e00bcf..63aadb0bbddfe 100644
> > > > > --- a/drivers/cpufreq/cpufreq.c
> > > > > +++ b/drivers/cpufreq/cpufreq.c
> > > > > @@ -1857,9 +1857,15 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
> > > > >  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
> > > > >                                       unsigned int target_freq)
> > > > >  {
> > > > > +     unsigned int freq;
> > > > > +
> > > > >       target_freq = clamp_val(target_freq, policy->min, policy->max);
> > > > >
> > > > > -     return cpufreq_driver->fast_switch(policy, target_freq);
> > > > > +     freq = cpufreq_driver->fast_switch(policy, target_freq);
> > > > > +     if (freq)
> > > > > +             cpufreq_stats_record_transition(policy, freq);
> > > > > +
> > > > > +     return freq;
> > > > >  }
> > > > >  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
> > > > >
> > > > > diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> > > > > index 1572129844a5b..21b919bfaeccf 100644
> > > > > --- a/drivers/cpufreq/cpufreq_stats.c
> > > > > +++ b/drivers/cpufreq/cpufreq_stats.c
> > > > > @@ -30,11 +30,12 @@ struct cpufreq_stats {
> > > > >  static void cpufreq_stats_update(struct cpufreq_stats *stats)
> > > > >  {
> > > > >       unsigned long long cur_time = get_jiffies_64();
> > > > > +     unsigned long flags;
> > > > >
> > > > > -     spin_lock(&cpufreq_stats_lock);
> > > > > +     spin_lock_irqsave(&cpufreq_stats_lock, flags);
> > > > >       stats->time_in_state[stats->last_index] += cur_time - stats->last_time;
> > > > >       stats->last_time = cur_time;
> > > > > -     spin_unlock(&cpufreq_stats_lock);
> > > > > +     spin_unlock_irqrestore(&cpufreq_stats_lock, flags);
> > > > >  }
> > > >
> > > > The only problem that I can think of (or recall) is that this routine
> > > > also gets called when time_in_state sysfs file is read and that can
> > > > end up taking lock which the scheduler's hotpath will wait for.
> > >
> > > What about the extra locking overhead in the scheduler context?
> >
> > What about using READ_ONCE/WRITE_ONCE here ? Not sure if we really
> > need locking in this particular case.
> 
> If that works, then fine, but ISTR some synchronization issues related to that.

I also think there would be synchronization issues :(

Is your main concern with the spin lock the contention case or the
general overhead of locking?

It would be really nice to have cpufreq stats with schedutil. We
initially considered a sysfs attribute to allow to temporarily disable
fast switching, but at closer sight this seems messy (would require
quite some rework in cpufreq_schedutil.c), besides not recording the
actual behavior.

If another (rarely and only shortly held) lock in scheduler context is
a no-go deferred recording could be an option, if that can be
implemented without locks in scheduler context.
Rafael J. Wysocki Jan. 31, 2019, 11:34 p.m. UTC | #7
On Thursday, January 31, 2019 7:37:30 PM CET Matthias Kaehlcke wrote:
> On Thu, Jan 31, 2019 at 11:14:03AM +0100, Rafael J. Wysocki wrote:
> > On Thu, Jan 31, 2019 at 11:07 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > >
> > > On 31-01-19, 11:03, Rafael J. Wysocki wrote:
> > > > On Thu, Jan 31, 2019 at 9:30 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > > > >
> > > > > On 30-01-19, 17:51, Matthias Kaehlcke wrote:
> > > > > > When fast switching is enabled currently no cpufreq stats are
> > > > > > recorded and the corresponding sysfs attributes appear empty (see
> > > > > > also commit 1aefc75b2449 ("cpufreq: stats: Make the stats code
> > > > > > non-modular")).
> > > > > >
> > > > > > Record the stats after a successful fast switch and re-enable access
> > > > > > through sysfs when fast switching is enabled. Since
> > > > > > cpufreq_stats_update() can now be called in interrupt context (during
> > > > > > a fast switch) disable local IRQs while holding the stats spinlock.
> > > > > >
> > > > > > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > > > > > ---
> > > > > > The change is so simple that I wonder if I'm missing some important
> > > > > > reason why the stats can't/shouldn't be updated during/after a fast
> > > > > > switch ...
> > > > > >
> > > > > > I would expect that holding the stats spinlock briefly in
> > > > > > cpufreq_stats_update() shouldn't be a problem. In theory it would
> > > > > > also be an option to have a per stats lock, though it seems overkill
> > > > > > from my (possibly ignorant) point of view.
> > > > > > ---
> > > > > >  drivers/cpufreq/cpufreq.c       |  8 +++++++-
> > > > > >  drivers/cpufreq/cpufreq_stats.c | 11 +++--------
> > > > > >  2 files changed, 10 insertions(+), 9 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > > > > > index e35a886e00bcf..63aadb0bbddfe 100644
> > > > > > --- a/drivers/cpufreq/cpufreq.c
> > > > > > +++ b/drivers/cpufreq/cpufreq.c
> > > > > > @@ -1857,9 +1857,15 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
> > > > > >  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
> > > > > >                                       unsigned int target_freq)
> > > > > >  {
> > > > > > +     unsigned int freq;
> > > > > > +
> > > > > >       target_freq = clamp_val(target_freq, policy->min, policy->max);
> > > > > >
> > > > > > -     return cpufreq_driver->fast_switch(policy, target_freq);
> > > > > > +     freq = cpufreq_driver->fast_switch(policy, target_freq);
> > > > > > +     if (freq)
> > > > > > +             cpufreq_stats_record_transition(policy, freq);
> > > > > > +
> > > > > > +     return freq;
> > > > > >  }
> > > > > >  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
> > > > > >
> > > > > > diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> > > > > > index 1572129844a5b..21b919bfaeccf 100644
> > > > > > --- a/drivers/cpufreq/cpufreq_stats.c
> > > > > > +++ b/drivers/cpufreq/cpufreq_stats.c
> > > > > > @@ -30,11 +30,12 @@ struct cpufreq_stats {
> > > > > >  static void cpufreq_stats_update(struct cpufreq_stats *stats)
> > > > > >  {
> > > > > >       unsigned long long cur_time = get_jiffies_64();
> > > > > > +     unsigned long flags;
> > > > > >
> > > > > > -     spin_lock(&cpufreq_stats_lock);
> > > > > > +     spin_lock_irqsave(&cpufreq_stats_lock, flags);
> > > > > >       stats->time_in_state[stats->last_index] += cur_time - stats->last_time;
> > > > > >       stats->last_time = cur_time;
> > > > > > -     spin_unlock(&cpufreq_stats_lock);
> > > > > > +     spin_unlock_irqrestore(&cpufreq_stats_lock, flags);
> > > > > >  }
> > > > >
> > > > > The only problem that I can think of (or recall) is that this routine
> > > > > also gets called when time_in_state sysfs file is read and that can
> > > > > end up taking lock which the scheduler's hotpath will wait for.
> > > >
> > > > What about the extra locking overhead in the scheduler context?
> > >
> > > What about using READ_ONCE/WRITE_ONCE here ? Not sure if we really
> > > need locking in this particular case.
> > 
> > If that works, then fine, but ISTR some synchronization issues related to that.
> 
> I also think there would be synchronization issues :(
> 
> Is your main concern with the spin lock the contention case or the
> general overhead of locking?

The general overhead is bad enough.  The contention case would be a
disaster.

> It would be really nice to have cpufreq stats with schedutil. We
> initially considered a sysfs attribute to allow to temporarily disable
> fast switching, but at closer sight this seems messy (would require
> quite some rework in cpufreq_schedutil.c), besides not recording the
> actual behavior.
> 
> If another (rarely and only shortly held) lock in scheduler context

This is a global spinlock and you'd like to take it on every frequency
change for each policy.  On x86, as a rule, there is a policy per logical
CPU and systems with hundreds of these are not uncommon.  Come on.

> is a no-go deferred recording could be an option, if that can be
> implemented without locks in scheduler context.

Why do you need the stats at all in the fast switch case?

There is the cpu_frequency tracepoint that can be used to callect
all data that you need.  Why can't that be used?
Matthias Kaehlcke Feb. 1, 2019, 12:06 a.m. UTC | #8
On Fri, Feb 01, 2019 at 12:34:32AM +0100, Rafael J. Wysocki wrote:
> On Thursday, January 31, 2019 7:37:30 PM CET Matthias Kaehlcke wrote:
> > On Thu, Jan 31, 2019 at 11:14:03AM +0100, Rafael J. Wysocki wrote:
> > > On Thu, Jan 31, 2019 at 11:07 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > > >
> > > > On 31-01-19, 11:03, Rafael J. Wysocki wrote:
> > > > > On Thu, Jan 31, 2019 at 9:30 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > > > > >
> > > > > > On 30-01-19, 17:51, Matthias Kaehlcke wrote:
> > > > > > > When fast switching is enabled currently no cpufreq stats are
> > > > > > > recorded and the corresponding sysfs attributes appear empty (see
> > > > > > > also commit 1aefc75b2449 ("cpufreq: stats: Make the stats code
> > > > > > > non-modular")).
> > > > > > >
> > > > > > > Record the stats after a successful fast switch and re-enable access
> > > > > > > through sysfs when fast switching is enabled. Since
> > > > > > > cpufreq_stats_update() can now be called in interrupt context (during
> > > > > > > a fast switch) disable local IRQs while holding the stats spinlock.
> > > > > > >
> > > > > > > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > > > > > > ---
> > > > > > > The change is so simple that I wonder if I'm missing some important
> > > > > > > reason why the stats can't/shouldn't be updated during/after a fast
> > > > > > > switch ...
> > > > > > >
> > > > > > > I would expect that holding the stats spinlock briefly in
> > > > > > > cpufreq_stats_update() shouldn't be a problem. In theory it would
> > > > > > > also be an option to have a per stats lock, though it seems overkill
> > > > > > > from my (possibly ignorant) point of view.
> > > > > > > ---
> > > > > > >  drivers/cpufreq/cpufreq.c       |  8 +++++++-
> > > > > > >  drivers/cpufreq/cpufreq_stats.c | 11 +++--------
> > > > > > >  2 files changed, 10 insertions(+), 9 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > > > > > > index e35a886e00bcf..63aadb0bbddfe 100644
> > > > > > > --- a/drivers/cpufreq/cpufreq.c
> > > > > > > +++ b/drivers/cpufreq/cpufreq.c
> > > > > > > @@ -1857,9 +1857,15 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
> > > > > > >  unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
> > > > > > >                                       unsigned int target_freq)
> > > > > > >  {
> > > > > > > +     unsigned int freq;
> > > > > > > +
> > > > > > >       target_freq = clamp_val(target_freq, policy->min, policy->max);
> > > > > > >
> > > > > > > -     return cpufreq_driver->fast_switch(policy, target_freq);
> > > > > > > +     freq = cpufreq_driver->fast_switch(policy, target_freq);
> > > > > > > +     if (freq)
> > > > > > > +             cpufreq_stats_record_transition(policy, freq);
> > > > > > > +
> > > > > > > +     return freq;
> > > > > > >  }
> > > > > > >  EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
> > > > > > >
> > > > > > > diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> > > > > > > index 1572129844a5b..21b919bfaeccf 100644
> > > > > > > --- a/drivers/cpufreq/cpufreq_stats.c
> > > > > > > +++ b/drivers/cpufreq/cpufreq_stats.c
> > > > > > > @@ -30,11 +30,12 @@ struct cpufreq_stats {
> > > > > > >  static void cpufreq_stats_update(struct cpufreq_stats *stats)
> > > > > > >  {
> > > > > > >       unsigned long long cur_time = get_jiffies_64();
> > > > > > > +     unsigned long flags;
> > > > > > >
> > > > > > > -     spin_lock(&cpufreq_stats_lock);
> > > > > > > +     spin_lock_irqsave(&cpufreq_stats_lock, flags);
> > > > > > >       stats->time_in_state[stats->last_index] += cur_time - stats->last_time;
> > > > > > >       stats->last_time = cur_time;
> > > > > > > -     spin_unlock(&cpufreq_stats_lock);
> > > > > > > +     spin_unlock_irqrestore(&cpufreq_stats_lock, flags);
> > > > > > >  }
> > > > > >
> > > > > > The only problem that I can think of (or recall) is that this routine
> > > > > > also gets called when time_in_state sysfs file is read and that can
> > > > > > end up taking lock which the scheduler's hotpath will wait for.
> > > > >
> > > > > What about the extra locking overhead in the scheduler context?
> > > >
> > > > What about using READ_ONCE/WRITE_ONCE here ? Not sure if we really
> > > > need locking in this particular case.
> > > 
> > > If that works, then fine, but ISTR some synchronization issues related to that.
> > 
> > I also think there would be synchronization issues :(
> > 
> > Is your main concern with the spin lock the contention case or the
> > general overhead of locking?
> 
> The general overhead is bad enough.  The contention case would be a
> disaster.
> 
> > It would be really nice to have cpufreq stats with schedutil. We
> > initially considered a sysfs attribute to allow to temporarily disable
> > fast switching, but at closer sight this seems messy (would require
> > quite some rework in cpufreq_schedutil.c), besides not recording the
> > actual behavior.
> > 
> > If another (rarely and only shortly held) lock in scheduler context
> 
> This is a global spinlock and you'd like to take it on every frequency
> change for each policy.  On x86, as a rule, there is a policy per logical
> CPU and systems with hundreds of these are not uncommon.  Come on.

Thanks for helping me to get a better understanding of the problem. If
the global spinlock was the main issue, this could be fixed by having
a per stats/policy lock, but it seems there's more than that.

> > is a no-go deferred recording could be an option, if that can be
> > implemented without locks in scheduler context.
> 
> Why do you need the stats at all in the fast switch case?

For the same reason as in the non-fast switch case, easy access to the
stats with existing tooling (or no tooling at all).

> There is the cpu_frequency tracepoint that can be used to callect
> all data that you need.  Why can't that be used?

It could be used, but requires non-standard tooling to process the
data and tracing must be enabled.

Could a CONFIG option make sense to enable it (off by default),
or is the overhead (with a per stats lock) so high that it would be
unreasonable to use it (I really don't have a good sense on this)?

Thanks

Matthias
Danny Lin July 3, 2020, 1 a.m. UTC | #9
On Thu, Jan 31, 2019 at 2:14 AM, Rafael J. Wysocki wrote:
> On Thu, Jan 31, 2019 at 11:07 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >
> > On 31-01-19, 11:03, Rafael J. Wysocki wrote:
> > > On Thu, Jan 31, 2019 at 9:30 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> > > >
> > > > The only problem that I can think of (or recall) is that this routine
> > > > also gets called when time_in_state sysfs file is read and that can
> > > > end up taking lock which the scheduler's hotpath will wait for.
> > >
> > > What about the extra locking overhead in the scheduler context?
> >
> > What about using READ_ONCE/WRITE_ONCE here ? Not sure if we really
> > need locking in this particular case.
> 
> If that works, then fine, but ISTR some synchronization issues related to that.

Maybe using READ/WRITE_ONCE for time_in_state is problematic, but is
there any reason why atomics wouldn't work for this? As far as I can
tell, atomics are necessary to protect time_in_state due to its
multi-step add operation, and READ/WRITE_ONCE can be used for last_time
because all operations on it are single-op sets/gets.

I've been using the setup described above on a downstream arm64 4.14
kernel for nearly a year with no issues. I haven't noticed any
significant anomalies in the stats so far. The system in question has 8
CPUs split into 3 cpufreq policies and fast switch is used with the
schedutil governor, so it should be exercising the stats update path
enough.

Sorry for bumping an old thread.
diff mbox series

Patch

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index e35a886e00bcf..63aadb0bbddfe 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1857,9 +1857,15 @@  EXPORT_SYMBOL(cpufreq_unregister_notifier);
 unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
 					unsigned int target_freq)
 {
+	unsigned int freq;
+
 	target_freq = clamp_val(target_freq, policy->min, policy->max);
 
-	return cpufreq_driver->fast_switch(policy, target_freq);
+	freq = cpufreq_driver->fast_switch(policy, target_freq);
+	if (freq)
+		cpufreq_stats_record_transition(policy, freq);
+
+	return freq;
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
 
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index 1572129844a5b..21b919bfaeccf 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -30,11 +30,12 @@  struct cpufreq_stats {
 static void cpufreq_stats_update(struct cpufreq_stats *stats)
 {
 	unsigned long long cur_time = get_jiffies_64();
+	unsigned long flags;
 
-	spin_lock(&cpufreq_stats_lock);
+	spin_lock_irqsave(&cpufreq_stats_lock, flags);
 	stats->time_in_state[stats->last_index] += cur_time - stats->last_time;
 	stats->last_time = cur_time;
-	spin_unlock(&cpufreq_stats_lock);
+	spin_unlock_irqrestore(&cpufreq_stats_lock, flags);
 }
 
 static void cpufreq_stats_clear_table(struct cpufreq_stats *stats)
@@ -58,9 +59,6 @@  static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
 	ssize_t len = 0;
 	int i;
 
-	if (policy->fast_switch_enabled)
-		return 0;
-
 	cpufreq_stats_update(stats);
 	for (i = 0; i < stats->state_num; i++) {
 		len += sprintf(buf + len, "%u %llu\n", stats->freq_table[i],
@@ -84,9 +82,6 @@  static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf)
 	ssize_t len = 0;
 	int i, j;
 
-	if (policy->fast_switch_enabled)
-		return 0;
-
 	len += snprintf(buf + len, PAGE_SIZE - len, "   From  :    To\n");
 	len += snprintf(buf + len, PAGE_SIZE - len, "         : ");
 	for (i = 0; i < stats->state_num; i++) {