diff mbox

[1/5] cpufreq: handle SW coordinated CPUs

Message ID 1356620142-8680-2-git-send-email-fabio.baltieri@linaro.org (mailing list archive)
State Accepted, archived
Headers show

Commit Message

Fabio Baltieri Dec. 27, 2012, 2:55 p.m. UTC
From: Rickard Andersson <rickard.andersson@stericsson.com>

This patch fixes a bug that occurred when we had load on a secondary CPU
and the primary CPU was sleeping. Only one sampling timer was spawned
and it was spawned as a deferred timer on the primary CPU, so when a
secondary CPU had a change in load this was not detected by the cpufreq
governor (both ondemand and conservative).

This patch make sure that deferred timers are run on all CPUs in the
case of software controlled CPUs that run on the same frequency.

Signed-off-by: Rickard Andersson <rickard.andersson@stericsson.com>
Signed-off-by: Fabio Baltieri <fabio.baltieri@linaro.org>
---
 drivers/cpufreq/cpufreq_conservative.c |  3 ++-
 drivers/cpufreq/cpufreq_governor.c     | 44 +++++++++++++++++++++++++++++-----
 drivers/cpufreq/cpufreq_governor.h     |  1 +
 drivers/cpufreq/cpufreq_ondemand.c     |  3 ++-
 4 files changed, 43 insertions(+), 8 deletions(-)

Comments

Viresh Kumar Jan. 30, 2013, 7:03 a.m. UTC | #1
I am starting to follow cpufreq patches religiously now and so have to come
back to this old thread due to some crash we got :)

Its still not pushed upstream, so better to get it resolved before 3.9.

On Thu, Dec 27, 2012 at 8:25 PM, Fabio Baltieri
<fabio.baltieri@linaro.org> wrote:

> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c

>  static inline void dbs_timer_init(struct dbs_data *dbs_data,
> -               struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate)
> +                                 struct cpu_dbs_common_info *cdbs,
> +                                 unsigned int sampling_rate,
> +                                 int cpu)
>  {
>         int delay = delay_for_sampling_rate(sampling_rate);
> +       struct cpu_dbs_common_info *cdbs_local = dbs_data->get_cpu_cdbs(cpu);

I couldn't understand the real need for this, as it should really give
back the same
pointer pointed out by: cdbs and hence no need of cpu in params too..

I may be wrong here :)

>
> -       INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer);
> -       schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay);
> +       schedule_delayed_work_on(cpu, &cdbs_local->work, delay);
>  }
>
>  static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs)
> @@ -217,6 +227,10 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data,
>                         if (ignore_nice)
>                                 j_cdbs->prev_cpu_nice =
>                                         kcpustat_cpu(j).cpustat[CPUTIME_NICE];
> +
> +                       mutex_init(&j_cdbs->timer_mutex);
> +                       INIT_DEFERRABLE_WORK(&j_cdbs->work,
> +                                            dbs_data->gov_dbs_timer);
>                 }
>
>                 /*
> @@ -275,15 +289,33 @@ second_time:
>                 }
>                 mutex_unlock(&dbs_data->mutex);
>
> -               mutex_init(&cpu_cdbs->timer_mutex);
> -               dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate);
> +               if (dbs_sw_coordinated_cpus(cpu_cdbs)) {
> +                       for_each_cpu(j, policy->cpus) {
> +                               struct cpu_dbs_common_info *j_cdbs;
> +
> +                               j_cdbs = dbs_data->get_cpu_cdbs(j);
> +                               dbs_timer_init(dbs_data, j_cdbs,
> +                                              *sampling_rate, j);
> +                       }
> +               } else {
> +                       dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate, cpu);
> +               }

do you really need this else part? In case of uniprocessor systems also, the if
block should be enough. Isn't it?

>                 break;
>
>         case CPUFREQ_GOV_STOP:
>                 if (dbs_data->governor == GOV_CONSERVATIVE)
>                         cs_dbs_info->enable = 0;
>
> -               dbs_timer_exit(cpu_cdbs);
> +               if (dbs_sw_coordinated_cpus(cpu_cdbs)) {
> +                       for_each_cpu(j, policy->cpus) {
> +                               struct cpu_dbs_common_info *j_cdbs;
> +
> +                               j_cdbs = dbs_data->get_cpu_cdbs(j);
> +                               dbs_timer_exit(j_cdbs);
> +                       }
> +               } else {
> +                       dbs_timer_exit(cpu_cdbs);
> +               }

ditto.

>                 mutex_lock(&dbs_data->mutex);
>                 mutex_destroy(&cpu_cdbs->timer_mutex);
> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
> index f661654..5bf6fb8 100644
> --- a/drivers/cpufreq/cpufreq_governor.h
> +++ b/drivers/cpufreq/cpufreq_governor.h
> @@ -171,6 +171,7 @@ static inline int delay_for_sampling_rate(unsigned int sampling_rate)
>
>  u64 get_cpu_idle_time(unsigned int cpu, u64 *wall);
>  void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
> +bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs);
>  int cpufreq_governor_dbs(struct dbs_data *dbs_data,
>                 struct cpufreq_policy *policy, unsigned int event);
>  #endif /* _CPUFREQ_GOVERNER_H */
> diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
> index 7731f7c..93bb56d 100644
> --- a/drivers/cpufreq/cpufreq_ondemand.c
> +++ b/drivers/cpufreq/cpufreq_ondemand.c
> @@ -243,7 +243,8 @@ static void od_dbs_timer(struct work_struct *work)
>                 }
>         }
>
> -       schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay);
> +       schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work,
> +                       delay);
>         mutex_unlock(&dbs_info->cdbs.timer_mutex);
>  }
>
> --
> 1.7.12.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe cpufreq" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Fabio Baltieri Jan. 30, 2013, 9:14 a.m. UTC | #2
Hello Viresh,

On Wed, Jan 30, 2013 at 12:33:40PM +0530, Viresh Kumar wrote:
> I am starting to follow cpufreq patches religiously now and so have to come
> back to this old thread due to some crash we got :)
> 
> Its still not pushed upstream, so better to get it resolved before 3.9.

Definitely, that's what we have -next for!

> On Thu, Dec 27, 2012 at 8:25 PM, Fabio Baltieri
> <fabio.baltieri@linaro.org> wrote:
> 
> > diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> 
> >  static inline void dbs_timer_init(struct dbs_data *dbs_data,
> > -               struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate)
> > +                                 struct cpu_dbs_common_info *cdbs,
> > +                                 unsigned int sampling_rate,
> > +                                 int cpu)
> >  {
> >         int delay = delay_for_sampling_rate(sampling_rate);
> > +       struct cpu_dbs_common_info *cdbs_local = dbs_data->get_cpu_cdbs(cpu);
> 
> I couldn't understand the real need for this, as it should really give
> back the same
> pointer pointed out by: cdbs and hence no need of cpu in params too..
> 
> I may be wrong here :)

You are actually right.  This comes from the first version of the patch
(I basically rewrote it after the common code rafactoring), and cdbs was
meant to be always the one for the master CPU while cpu should indicate
the one being initialized.  Then the thing turned out as:

A - I dropped the code specific for master cdbs here as it was already
there on another code path following the rafactoring.
B - I passed j_cdbs = dbs_data->get_cpu_cdbs(j) in the init cycle while
it was really meant to be get_cpu_cdbs(cpu).

> >
> > -       INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer);
> > -       schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay);
> > +       schedule_delayed_work_on(cpu, &cdbs_local->work, delay);
> >  }
> >
> >  static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs)
> > @@ -217,6 +227,10 @@ int cpufreq_governor_dbs(struct dbs_data *dbs_data,
> >                         if (ignore_nice)
> >                                 j_cdbs->prev_cpu_nice =
> >                                         kcpustat_cpu(j).cpustat[CPUTIME_NICE];
> > +
> > +                       mutex_init(&j_cdbs->timer_mutex);
> > +                       INIT_DEFERRABLE_WORK(&j_cdbs->work,
> > +                                            dbs_data->gov_dbs_timer);
> >                 }
> >
> >                 /*
> > @@ -275,15 +289,33 @@ second_time:
> >                 }
> >                 mutex_unlock(&dbs_data->mutex);
> >
> > -               mutex_init(&cpu_cdbs->timer_mutex);
> > -               dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate);
> > +               if (dbs_sw_coordinated_cpus(cpu_cdbs)) {
> > +                       for_each_cpu(j, policy->cpus) {
> > +                               struct cpu_dbs_common_info *j_cdbs;
> > +
> > +                               j_cdbs = dbs_data->get_cpu_cdbs(j);
> > +                               dbs_timer_init(dbs_data, j_cdbs,
> > +                                              *sampling_rate, j);
> > +                       }
> > +               } else {
> > +                       dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate, cpu);
> > +               }
> 
> do you really need this else part? In case of uniprocessor systems also, the if
> block should be enough. Isn't it?

Same reason, get_cpu_cdbs(j) was meant to be get_cpu_cdbs(cpu) but
that's not used anymore in the last version of the patch, and the same
for the last hunk.

I'll send a patch to clean this up, thanks for spotting it!

Fabio

> >                 break;
> >
> >         case CPUFREQ_GOV_STOP:
> >                 if (dbs_data->governor == GOV_CONSERVATIVE)
> >                         cs_dbs_info->enable = 0;
> >
> > -               dbs_timer_exit(cpu_cdbs);
> > +               if (dbs_sw_coordinated_cpus(cpu_cdbs)) {
> > +                       for_each_cpu(j, policy->cpus) {
> > +                               struct cpu_dbs_common_info *j_cdbs;
> > +
> > +                               j_cdbs = dbs_data->get_cpu_cdbs(j);
> > +                               dbs_timer_exit(j_cdbs);
> > +                       }
> > +               } else {
> > +                       dbs_timer_exit(cpu_cdbs);
> > +               }
> 
> ditto.
Fabio Baltieri Jan. 30, 2013, 11:04 a.m. UTC | #3
On Wed, Jan 30, 2013 at 10:14:53AM +0100, Fabio Baltieri wrote:
> Hello Viresh,
> 
> On Wed, Jan 30, 2013 at 12:33:40PM +0530, Viresh Kumar wrote:
> > I am starting to follow cpufreq patches religiously now and so have to come
> > back to this old thread due to some crash we got :)
> > 
> > Its still not pushed upstream, so better to get it resolved before 3.9.
> 
> Definitely, that's what we have -next for!
> 
> > On Thu, Dec 27, 2012 at 8:25 PM, Fabio Baltieri
> > <fabio.baltieri@linaro.org> wrote:
> > 
> > > diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> > 
> > >  static inline void dbs_timer_init(struct dbs_data *dbs_data,
> > > -               struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate)
> > > +                                 struct cpu_dbs_common_info *cdbs,
> > > +                                 unsigned int sampling_rate,
> > > +                                 int cpu)
> > >  {
> > >         int delay = delay_for_sampling_rate(sampling_rate);
> > > +       struct cpu_dbs_common_info *cdbs_local = dbs_data->get_cpu_cdbs(cpu);
> > 
> > I couldn't understand the real need for this, as it should really give
> > back the same
> > pointer pointed out by: cdbs and hence no need of cpu in params too..

Small sidenote, actually what I'm going to drop here i *cdbs, as I need
cpu for schedule_delayed_work_on and can't use cdbs->cpu for that as
it's the master's one.

Fabio

> > 
> > I may be wrong here :)
> 
> You are actually right.  This comes from the first version of the patch
> (I basically rewrote it after the common code rafactoring), and cdbs was
> meant to be always the one for the master CPU while cpu should indicate
> the one being initialized.  Then the thing turned out as:
> 
> A - I dropped the code specific for master cdbs here as it was already
> there on another code path following the rafactoring.
> B - I passed j_cdbs = dbs_data->get_cpu_cdbs(j) in the init cycle while
> it was really meant to be get_cpu_cdbs(cpu).
> 
> > >
> > > -       INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer);
> > > -       schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay);
> > > +       schedule_delayed_work_on(cpu, &cdbs_local->work, delay);
> > >  }
Viresh Kumar Jan. 30, 2013, 11:17 a.m. UTC | #4
On 30 January 2013 16:34, Fabio Baltieri <fabio.baltieri@linaro.org> wrote:
> Small sidenote, actually what I'm going to drop here i *cdbs, as I need
> cpu for schedule_delayed_work_on and can't use cdbs->cpu for that as
> it's the master's one.

I can't find code which would do j_cdbs->cpu = j and so j_cdbs->cpu is
un-initialized. So, if that's true, you can initialize that and drop
cpu param too.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Fabio Baltieri Jan. 30, 2013, 11:42 a.m. UTC | #5
On Wed, Jan 30, 2013 at 04:47:58PM +0530, Viresh Kumar wrote:
> On 30 January 2013 16:34, Fabio Baltieri <fabio.baltieri@linaro.org> wrote:
> > Small sidenote, actually what I'm going to drop here i *cdbs, as I need
> > cpu for schedule_delayed_work_on and can't use cdbs->cpu for that as
> > it's the master's one.
> 
> I can't find code which would do j_cdbs->cpu = j and so j_cdbs->cpu is
> un-initialized. So, if that's true, you can initialize that and drop
> cpu param too.

Right, it looks like cdbs->cpu is not really needed anymore.  Anyway I'm
quite happy with how it came out passing cpu number, code is a bit more
compact, so I'm sending that version and than we can discuss if recycle
cdbs->cpu or just drop it.

Fabio
diff mbox

Patch

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 64ef737..b9d7f14 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -122,7 +122,8 @@  static void cs_dbs_timer(struct work_struct *work)
 
 	dbs_check_cpu(&cs_dbs_data, cpu);
 
-	schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay);
+	schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work,
+			delay);
 	mutex_unlock(&dbs_info->cdbs.timer_mutex);
 }
 
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 6c5f1d3..b0e4506 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -161,13 +161,23 @@  void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
 }
 EXPORT_SYMBOL_GPL(dbs_check_cpu);
 
+bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs)
+{
+	struct cpufreq_policy *policy = cdbs->cur_policy;
+
+	return cpumask_weight(policy->cpus) > 1;
+}
+EXPORT_SYMBOL_GPL(dbs_sw_coordinated_cpus);
+
 static inline void dbs_timer_init(struct dbs_data *dbs_data,
-		struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate)
+				  struct cpu_dbs_common_info *cdbs,
+				  unsigned int sampling_rate,
+				  int cpu)
 {
 	int delay = delay_for_sampling_rate(sampling_rate);
+	struct cpu_dbs_common_info *cdbs_local = dbs_data->get_cpu_cdbs(cpu);
 
-	INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer);
-	schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay);
+	schedule_delayed_work_on(cpu, &cdbs_local->work, delay);
 }
 
 static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs)
@@ -217,6 +227,10 @@  int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 			if (ignore_nice)
 				j_cdbs->prev_cpu_nice =
 					kcpustat_cpu(j).cpustat[CPUTIME_NICE];
+
+			mutex_init(&j_cdbs->timer_mutex);
+			INIT_DEFERRABLE_WORK(&j_cdbs->work,
+					     dbs_data->gov_dbs_timer);
 		}
 
 		/*
@@ -275,15 +289,33 @@  second_time:
 		}
 		mutex_unlock(&dbs_data->mutex);
 
-		mutex_init(&cpu_cdbs->timer_mutex);
-		dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate);
+		if (dbs_sw_coordinated_cpus(cpu_cdbs)) {
+			for_each_cpu(j, policy->cpus) {
+				struct cpu_dbs_common_info *j_cdbs;
+
+				j_cdbs = dbs_data->get_cpu_cdbs(j);
+				dbs_timer_init(dbs_data, j_cdbs,
+					       *sampling_rate, j);
+			}
+		} else {
+			dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate, cpu);
+		}
 		break;
 
 	case CPUFREQ_GOV_STOP:
 		if (dbs_data->governor == GOV_CONSERVATIVE)
 			cs_dbs_info->enable = 0;
 
-		dbs_timer_exit(cpu_cdbs);
+		if (dbs_sw_coordinated_cpus(cpu_cdbs)) {
+			for_each_cpu(j, policy->cpus) {
+				struct cpu_dbs_common_info *j_cdbs;
+
+				j_cdbs = dbs_data->get_cpu_cdbs(j);
+				dbs_timer_exit(j_cdbs);
+			}
+		} else {
+			dbs_timer_exit(cpu_cdbs);
+		}
 
 		mutex_lock(&dbs_data->mutex);
 		mutex_destroy(&cpu_cdbs->timer_mutex);
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index f661654..5bf6fb8 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -171,6 +171,7 @@  static inline int delay_for_sampling_rate(unsigned int sampling_rate)
 
 u64 get_cpu_idle_time(unsigned int cpu, u64 *wall);
 void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
+bool dbs_sw_coordinated_cpus(struct cpu_dbs_common_info *cdbs);
 int cpufreq_governor_dbs(struct dbs_data *dbs_data,
 		struct cpufreq_policy *policy, unsigned int event);
 #endif /* _CPUFREQ_GOVERNER_H */
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 7731f7c..93bb56d 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -243,7 +243,8 @@  static void od_dbs_timer(struct work_struct *work)
 		}
 	}
 
-	schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay);
+	schedule_delayed_work_on(smp_processor_id(), &dbs_info->cdbs.work,
+			delay);
 	mutex_unlock(&dbs_info->cdbs.timer_mutex);
 }