diff mbox

[V3,4/5] cpufreq: governor: Quit work-handlers early if governor is stopped

Message ID 1e579d2bf8dbee09295725cda37bd92222fe61fb.1444723240.git.viresh.kumar@linaro.org (mailing list archive)
State Changes Requested, archived
Headers show

Commit Message

Viresh Kumar Oct. 13, 2015, 8:09 a.m. UTC
cpufreq_governor_lock is abused by using it outside of cpufreq core,
i.e. in cpufreq-governors. But we didn't had a better solution to the
problem (described later) at that point of time, and following was the
only acceptable solution:

6f1e4efd882e ("cpufreq: Fix timer/workqueue corruption by protecting
reading governor_enabled")

The cpufreq governor core is fixed against possible races now and things
are in much better shape.

The original problem:

When a CPU is hot unplugged, we cancel delayed works for all
policy->cpus via gov_cancel_work(). If the work is already running on
any CPU, the workqueue code will wait for the work to finish, to prevent
the work items from re-queuing themselves.

This works most of the time, except for the case where the work handler
determines that it should adjust the delay for all other CPUs, that the
policy is managing. When this happens, the canceling CPU will cancel its
own work but can queue up the works on other policy->cpus.

For example, consider CPU 0-4 in a policy and we called
gov_cancel_work() for them. Workqueue core canceled the works for 0-3
and is waiting for the handler to finish on CPU4. At that time, handler
on CPU4 can restart works on CPU 0-3 again. Which makes 0-3 run works,
which the governor core thinks are canceled.

To fix that in a different (non-hacky) way, set set shared->policy to
false before trying to cancel the work. It should be updated within
timer_mutex, which will prevent the work-handlers to start. Once the
work-handlers finds that we are already trying to stop the governor, it
will exit early. And that will prevent queuing of works again as well.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq_governor.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

Comments

Rafael J. Wysocki Oct. 28, 2015, 7:10 a.m. UTC | #1
On Tuesday, October 13, 2015 01:39:04 PM Viresh Kumar wrote:
> cpufreq_governor_lock is abused by using it outside of cpufreq core,
> i.e. in cpufreq-governors. But we didn't had a better solution to the
> problem (described later) at that point of time, and following was the
> only acceptable solution:
> 
> 6f1e4efd882e ("cpufreq: Fix timer/workqueue corruption by protecting
> reading governor_enabled")
> 
> The cpufreq governor core is fixed against possible races now and things
> are in much better shape.
> 
> The original problem:
> 
> When a CPU is hot unplugged, we cancel delayed works for all
> policy->cpus via gov_cancel_work(). If the work is already running on
> any CPU, the workqueue code will wait for the work to finish, to prevent
> the work items from re-queuing themselves.
> 
> This works most of the time, except for the case where the work handler
> determines that it should adjust the delay for all other CPUs, that the
> policy is managing. When this happens, the canceling CPU will cancel its
> own work but can queue up the works on other policy->cpus.
> 
> For example, consider CPU 0-4 in a policy and we called
> gov_cancel_work() for them. Workqueue core canceled the works for 0-3
> and is waiting for the handler to finish on CPU4. At that time, handler
> on CPU4 can restart works on CPU 0-3 again. Which makes 0-3 run works,
> which the governor core thinks are canceled.
> 
> To fix that in a different (non-hacky) way, set set shared->policy to
> false before trying to cancel the work. It should be updated within
> timer_mutex, which will prevent the work-handlers to start. Once the
> work-handlers finds that we are already trying to stop the governor, it
> will exit early. And that will prevent queuing of works again as well.
> 
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

I have a hard time figuring out what the patch is supposed to achieve from
the above.

Do we eventually want to get rid of cpufreq_governor_lock and that's why we're
doing this?

> ---
>  drivers/cpufreq/cpufreq_governor.c | 33 +++++++++++++++++++++++----------
>  1 file changed, 23 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> index 750626d8fb03..931424ca96d9 100644
> --- a/drivers/cpufreq/cpufreq_governor.c
> +++ b/drivers/cpufreq/cpufreq_governor.c
> @@ -171,10 +171,6 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>  {
>  	int i;
>  
> -	mutex_lock(&cpufreq_governor_lock);
> -	if (!policy->governor_enabled)
> -		goto out_unlock;
> -
>  	if (!all_cpus) {
>  		/*
>  		 * Use raw_smp_processor_id() to avoid preemptible warnings.
> @@ -188,9 +184,6 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>  		for_each_cpu(i, policy->cpus)
>  			__gov_queue_work(i, dbs_data, delay);
>  	}
> -
> -out_unlock:
> -	mutex_unlock(&cpufreq_governor_lock);
>  }
>  EXPORT_SYMBOL_GPL(gov_queue_work);
>  
> @@ -229,13 +222,24 @@ static void dbs_timer(struct work_struct *work)
>  	struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info,
>  						 dwork.work);
>  	struct cpu_common_dbs_info *shared = cdbs->shared;
> -	struct cpufreq_policy *policy = shared->policy;
> -	struct dbs_data *dbs_data = policy->governor_data;
> +	struct cpufreq_policy *policy;
> +	struct dbs_data *dbs_data;
>  	unsigned int sampling_rate, delay;
>  	bool modify_all = true;
>  
>  	mutex_lock(&shared->timer_mutex);
>  
> +	policy = shared->policy;
> +
> +	/*
> +	 * Governor might already be disabled and there is no point continuing
> +	 * with the work-handler.
> +	 */
> +	if (!policy)
> +		goto unlock;
> +
> +	dbs_data = policy->governor_data;
> +
>  	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
>  		struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
>  
> @@ -252,6 +256,7 @@ static void dbs_timer(struct work_struct *work)
>  	delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all);
>  	gov_queue_work(dbs_data, policy, delay, modify_all);
>  
> +unlock:
>  	mutex_unlock(&shared->timer_mutex);
>  }
>  
> @@ -488,9 +493,17 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy,
>  	if (!shared || !shared->policy)
>  		return -EBUSY;
>  
> +	/*
> +	 * Work-handler must see this updated, as it should not proceed any
> +	 * further after governor is disabled. And so timer_mutex is taken while
> +	 * updating this value.
> +	 */
> +	mutex_lock(&shared->timer_mutex);
> +	shared->policy = NULL;
> +	mutex_unlock(&shared->timer_mutex);

So this assumes that dbs_timer() will acquire the mutex and see that
shared->policy is now NULL, so it will bail out immediately, but ->

> +
>  	gov_cancel_work(dbs_data, policy);
>  
> -	shared->policy = NULL;
>  	mutex_destroy(&shared->timer_mutex);

-> the mutex is destroyed here, so what the guarantee that the mutex will
still be around when dbs_timer() runs?

>  	return 0;
>  }
> 

Thanks,
Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar Oct. 28, 2015, 8:25 a.m. UTC | #2
On 28-10-15, 08:10, Rafael J. Wysocki wrote:
> I have a hard time figuring out what the patch is supposed to achieve from
> the above.

We had a problem earlier, where even after stopping the governor for a
policy, the work was still queued for some of its CPUs.

We failed to understand the real problem then, and so abused the wider
cpufreq_governor_lock.

I understood the problem much better now, and got a straight forward,
and precise solution for that.

> Do we eventually want to get rid of cpufreq_governor_lock and that's why we're
> doing this?

That's another benefit we get out of this change.

> > +	mutex_lock(&shared->timer_mutex);
> > +	shared->policy = NULL;
> > +	mutex_unlock(&shared->timer_mutex);

Right.

> So this assumes that dbs_timer() will acquire the mutex and see that
> shared->policy is now NULL, so it will bail out immediately, but ->
> 
> > +
> >  	gov_cancel_work(dbs_data, policy);
> >  
> > -	shared->policy = NULL;
> >  	mutex_destroy(&shared->timer_mutex);
> 
> -> the mutex is destroyed here, so what the guarantee that the mutex will
> still be around when dbs_timer() runs?

You really got me worried for few minutes :)

The earlier update of shared->policy = NULL, makes sure that no
work-handler can start real work. After the unlock the work handlers
will start executing but will return early.

We also have gov_cancel_work(), which will until the time all the
current handlers have finished executing and no work is queued.

And so we are sure that there are no users of the mutex when it is
destroyed.
Viresh Kumar Oct. 28, 2015, 2:46 p.m. UTC | #3
On 28-10-15, 16:12, Rafael J. Wysocki wrote:
> So this is a changelog matching your patch:
> 
> "gov_queue_work() acquires cpufreq_governor_lock to allow cpufreq_governor_stop()
> to drain delayed work items possibly scheduled on CPUs that share the policy with
> a CPU being taken offline.
> 
> However, the same goal may be achieved in a more straightforward way if the
> policy pointer in the struct cpu_dbs_info matching the policy CPU is reset
> upfront by cpufreq_governor_stop() under the timer_mutex belonging to it and
> checked against NULL, under the same lock, at the beginning of dbs_timer().
> 
> In that case every instance of dbs_timer() run for a struct cpu_dbs_info
> sharing the policy pointer in question after cpufreq_governor_stop() has started
> will notice that that pointer is NULL and bail out immediately without queuing up
> any new work items.  In turn, gov_cancel_work() called by cpufreq_governor_stop()
> before destroying timer_mutex will wait for all of the delayed work items
> currently running on the CPUs sharing the policy to drop the mutex, so it may
> be destroyed safely.
> 
> Make cpufreq_governor_stop() and dbs_timer() work as described and modify
> gov_queue_work() so it does not acquire cpufreq_governor_lock any more."

Looks far better, thanks :)
Rafael J. Wysocki Oct. 28, 2015, 3:12 p.m. UTC | #4
On Wednesday, October 28, 2015 01:55:59 PM Viresh Kumar wrote:
> On 28-10-15, 08:10, Rafael J. Wysocki wrote:
> > I have a hard time figuring out what the patch is supposed to achieve from
> > the above.
> 
> We had a problem earlier, where even after stopping the governor for a
> policy, the work was still queued for some of its CPUs.
> 
> We failed to understand the real problem then, and so abused the wider
> cpufreq_governor_lock.
> 
> I understood the problem much better now, and got a straight forward,
> and precise solution for that.
> 
> > Do we eventually want to get rid of cpufreq_governor_lock and that's why we're
> > doing this?
> 
> That's another benefit we get out of this change.
> 
> > > +	mutex_lock(&shared->timer_mutex);
> > > +	shared->policy = NULL;
> > > +	mutex_unlock(&shared->timer_mutex);
> 
> Right.
> 
> > So this assumes that dbs_timer() will acquire the mutex and see that
> > shared->policy is now NULL, so it will bail out immediately, but ->
> > 
> > > +
> > >  	gov_cancel_work(dbs_data, policy);
> > >  
> > > -	shared->policy = NULL;
> > >  	mutex_destroy(&shared->timer_mutex);
> > 
> > -> the mutex is destroyed here, so what the guarantee that the mutex will
> > still be around when dbs_timer() runs?
> 
> You really got me worried for few minutes :)
> 
> The earlier update of shared->policy = NULL, makes sure that no
> work-handler can start real work. After the unlock the work handlers
> will start executing but will return early.

That's not sufficient, because it doesn't guarantee that the lock will be
dropped before we destroy it.

> We also have gov_cancel_work(), which will until the time all the
> current handlers have finished executing and no work is queued.
> 
> And so we are sure that there are no users of the mutex when it is
> destroyed.

OK, so the gov_cancel_work() provides the guarantee.

So this is a changelog matching your patch:

"gov_queue_work() acquires cpufreq_governor_lock to allow cpufreq_governor_stop()
to drain delayed work items possibly scheduled on CPUs that share the policy with
a CPU being taken offline.

However, the same goal may be achieved in a more straightforward way if the
policy pointer in the struct cpu_dbs_info matching the policy CPU is reset
upfront by cpufreq_governor_stop() under the timer_mutex belonging to it and
checked against NULL, under the same lock, at the beginning of dbs_timer().

In that case every instance of dbs_timer() run for a struct cpu_dbs_info
sharing the policy pointer in question after cpufreq_governor_stop() has started
will notice that that pointer is NULL and bail out immediately without queuing up
any new work items.  In turn, gov_cancel_work() called by cpufreq_governor_stop()
before destroying timer_mutex will wait for all of the delayed work items
currently running on the CPUs sharing the policy to drop the mutex, so it may
be destroyed safely.

Make cpufreq_governor_stop() and dbs_timer() work as described and modify
gov_queue_work() so it does not acquire cpufreq_governor_lock any more."

Thanks,
Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 750626d8fb03..931424ca96d9 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -171,10 +171,6 @@  void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
 {
 	int i;
 
-	mutex_lock(&cpufreq_governor_lock);
-	if (!policy->governor_enabled)
-		goto out_unlock;
-
 	if (!all_cpus) {
 		/*
 		 * Use raw_smp_processor_id() to avoid preemptible warnings.
@@ -188,9 +184,6 @@  void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
 		for_each_cpu(i, policy->cpus)
 			__gov_queue_work(i, dbs_data, delay);
 	}
-
-out_unlock:
-	mutex_unlock(&cpufreq_governor_lock);
 }
 EXPORT_SYMBOL_GPL(gov_queue_work);
 
@@ -229,13 +222,24 @@  static void dbs_timer(struct work_struct *work)
 	struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info,
 						 dwork.work);
 	struct cpu_common_dbs_info *shared = cdbs->shared;
-	struct cpufreq_policy *policy = shared->policy;
-	struct dbs_data *dbs_data = policy->governor_data;
+	struct cpufreq_policy *policy;
+	struct dbs_data *dbs_data;
 	unsigned int sampling_rate, delay;
 	bool modify_all = true;
 
 	mutex_lock(&shared->timer_mutex);
 
+	policy = shared->policy;
+
+	/*
+	 * Governor might already be disabled and there is no point continuing
+	 * with the work-handler.
+	 */
+	if (!policy)
+		goto unlock;
+
+	dbs_data = policy->governor_data;
+
 	if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
 		struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 
@@ -252,6 +256,7 @@  static void dbs_timer(struct work_struct *work)
 	delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all);
 	gov_queue_work(dbs_data, policy, delay, modify_all);
 
+unlock:
 	mutex_unlock(&shared->timer_mutex);
 }
 
@@ -488,9 +493,17 @@  static int cpufreq_governor_stop(struct cpufreq_policy *policy,
 	if (!shared || !shared->policy)
 		return -EBUSY;
 
+	/*
+	 * Work-handler must see this updated, as it should not proceed any
+	 * further after governor is disabled. And so timer_mutex is taken while
+	 * updating this value.
+	 */
+	mutex_lock(&shared->timer_mutex);
+	shared->policy = NULL;
+	mutex_unlock(&shared->timer_mutex);
+
 	gov_cancel_work(dbs_data, policy);
 
-	shared->policy = NULL;
 	mutex_destroy(&shared->timer_mutex);
 	return 0;
 }