diff mbox

cpufreq: Avoid a couple of races related to cpufreq_cpu_get()

Message ID 6245332.6H0YK9yF0m@vostro.rjw.lan (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Rafael J. Wysocki Nov. 14, 2016, 9:51 p.m. UTC
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

The cpumask_test_cpu() check in cpufreq_cpu_get_raw() is sort of
pointless, because it may be racy with respect to CPU online/offline
which sets/clears the policy->cpus mask.

For this reason, it is better to reserve cpufreq_cpu_get_raw() for
the ondemand governor, which calls it for online CPUs only with CPU
online/offline locked anyway, and move the cpumask_test_cpu() up the
call chain.

Moreover, the callers of cpufreq_cpu_get() that really care about
avoiding races with CPU online/offline should better carry out that
check under policy->rwsem.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c |   46 +++++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 15 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Viresh Kumar Nov. 15, 2016, 4:09 a.m. UTC | #1
On 14-11-16, 22:51, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> The cpumask_test_cpu() check in cpufreq_cpu_get_raw() is sort of
> pointless, because it may be racy with respect to CPU online/offline
> which sets/clears the policy->cpus mask.
> 
> For this reason, it is better to reserve cpufreq_cpu_get_raw() for
> the ondemand governor, which calls it for online CPUs only with CPU
> online/offline locked anyway, and move the cpumask_test_cpu() up the
> call chain.
> 
> Moreover, the callers of cpufreq_cpu_get() that really care about
> avoiding races with CPU online/offline should better carry out that
> check under policy->rwsem.
> 
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
>  drivers/cpufreq/cpufreq.c |   46 +++++++++++++++++++++++++++++++---------------
>  1 file changed, 31 insertions(+), 15 deletions(-)
> 
> Index: linux-pm/drivers/cpufreq/cpufreq.c
> ===================================================================
> --- linux-pm.orig/drivers/cpufreq/cpufreq.c
> +++ linux-pm/drivers/cpufreq/cpufreq.c
> @@ -65,6 +65,12 @@ static struct cpufreq_driver *cpufreq_dr
>  static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
>  static DEFINE_RWLOCK(cpufreq_driver_lock);
>  
> +struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu)
> +{
> +	return per_cpu(cpufreq_cpu_data, cpu);
> +}
> +EXPORT_SYMBOL_GPL(cpufreq_cpu_get_raw);
> +

It may be better to move this to cpufreq.h and make it inline instead
as this is really light weight now.

>  /* Flag to suspend/resume CPUFreq governors */
>  static bool cpufreq_suspended;
>  
> @@ -192,19 +198,12 @@ int cpufreq_generic_init(struct cpufreq_
>  }
>  EXPORT_SYMBOL_GPL(cpufreq_generic_init);
>  
> -struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu)
> -{
> -	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
> -
> -	return policy && cpumask_test_cpu(cpu, policy->cpus) ? policy : NULL;
> -}
> -EXPORT_SYMBOL_GPL(cpufreq_cpu_get_raw);
> -
>  unsigned int cpufreq_generic_get(unsigned int cpu)
>  {
> -	struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu);
> +	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
>  
> -	if (!policy || IS_ERR(policy->clk)) {
> +	if (!policy || !cpumask_test_cpu(cpu, policy->cpus) ||

The race you described in commit log is still valid at this point,
isn't it ?

> +	    IS_ERR(policy->clk)) {
>  		pr_err("%s: No %s associated to cpu: %d\n",
>  		       __func__, policy ? "clk" : "policy", cpu);
>  		return 0;
> @@ -240,7 +239,7 @@ struct cpufreq_policy *cpufreq_cpu_get(u
>  
>  	if (cpufreq_driver) {
>  		/* get the CPU */
> -		policy = cpufreq_cpu_get_raw(cpu);
> +		policy = per_cpu(cpufreq_cpu_data, cpu);

This changes the expectations of the users of cpufreq_cpu_get() as
this will return policy for non policy->cpus as well now. I am sure
this will break some of the assumptions at the callers and we need to
visit all the sites to make sure it is fine.

>  		if (policy)
>  			kobject_get(&policy->kobj);
>  	}
> @@ -1328,7 +1327,7 @@ static int cpufreq_offline(unsigned int
>  
>  	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>  
> -	policy = cpufreq_cpu_get_raw(cpu);
> +	policy = per_cpu(cpufreq_cpu_data, cpu);

I think we can keep cpufreq_cpu_get_raw() here instead, as that will
do exactly this. Also we need the policy->cpus test here.

>  	if (!policy) {
>  		pr_debug("%s: No cpu_data found\n", __func__);
>  		return 0;
> @@ -1455,7 +1454,9 @@ unsigned int cpufreq_quick_get(unsigned
>  
>  	policy = cpufreq_cpu_get(cpu);
>  	if (policy) {
> -		ret_freq = policy->cur;
> +		if (cpumask_test_cpu(cpu, policy->cpus))

We can still have the race here, isn't it ?

> +			ret_freq = policy->cur;
> +
>  		cpufreq_cpu_put(policy);
>  	}
>  
> @@ -1475,7 +1476,9 @@ unsigned int cpufreq_quick_get_max(unsig
>  	unsigned int ret_freq = 0;
>  
>  	if (policy) {
> -		ret_freq = policy->max;
> +		if (cpumask_test_cpu(cpu, policy->cpus))

Same here..

> +			ret_freq = policy->max;
> +
>  		cpufreq_cpu_put(policy);
>  	}
>  
> @@ -1526,7 +1529,10 @@ unsigned int cpufreq_get(unsigned int cp
>  
>  	if (policy) {
>  		down_read(&policy->rwsem);
> -		ret_freq = __cpufreq_get(policy);
> +
> +		if (cpumask_test_cpu(cpu, policy->cpus))
> +			ret_freq = __cpufreq_get(policy);
> +

We don't have the race here ..

>  		up_read(&policy->rwsem);
>  
>  		cpufreq_cpu_put(policy);
> @@ -2142,6 +2148,11 @@ int cpufreq_get_policy(struct cpufreq_po
>  	if (!cpu_policy)
>  		return -EINVAL;
>  
> +	if (!cpumask_test_cpu(cpu, policy->cpus)) {

This is still racy..

> +		cpufreq_cpu_put(cpu_policy);
> +		return -EINVAL;
> +	}
> +
>  	memcpy(policy, cpu_policy, sizeof(*policy));
>  
>  	cpufreq_cpu_put(cpu_policy);
> @@ -2265,6 +2276,11 @@ int cpufreq_update_policy(unsigned int c
>  
>  	down_write(&policy->rwsem);
>  
> +	if (!cpumask_test_cpu(cpu, policy->cpus)) {

This is not racy.

> +		ret = -ENODEV;
> +		goto unlock;
> +	}
> +
>  	pr_debug("updating policy for CPU %u\n", cpu);
>  	memcpy(&new_policy, policy, sizeof(*policy));
>  	new_policy.min = policy->user_policy.min;
Rafael J. Wysocki Nov. 15, 2016, 12:59 p.m. UTC | #2
On Tue, Nov 15, 2016 at 5:09 AM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 14-11-16, 22:51, Rafael J. Wysocki wrote:
>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>
>> The cpumask_test_cpu() check in cpufreq_cpu_get_raw() is sort of
>> pointless, because it may be racy with respect to CPU online/offline
>> which sets/clears the policy->cpus mask.
>>
>> For this reason, it is better to reserve cpufreq_cpu_get_raw() for
>> the ondemand governor, which calls it for online CPUs only with CPU
>> online/offline locked anyway, and move the cpumask_test_cpu() up the
>> call chain.
>>
>> Moreover, the callers of cpufreq_cpu_get() that really care about
>> avoiding races with CPU online/offline should better carry out that
>> check under policy->rwsem.
>>
>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>> ---
>>  drivers/cpufreq/cpufreq.c |   46 +++++++++++++++++++++++++++++++---------------
>>  1 file changed, 31 insertions(+), 15 deletions(-)
>>
>> Index: linux-pm/drivers/cpufreq/cpufreq.c
>> ===================================================================
>> --- linux-pm.orig/drivers/cpufreq/cpufreq.c
>> +++ linux-pm/drivers/cpufreq/cpufreq.c
>> @@ -65,6 +65,12 @@ static struct cpufreq_driver *cpufreq_dr
>>  static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
>>  static DEFINE_RWLOCK(cpufreq_driver_lock);
>>
>> +struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu)
>> +{
>> +     return per_cpu(cpufreq_cpu_data, cpu);
>> +}
>> +EXPORT_SYMBOL_GPL(cpufreq_cpu_get_raw);
>> +
>
> It may be better to move this to cpufreq.h and make it inline instead
> as this is really light weight now.

I wanted to avoid exporting cpufreq_cpu_data.

>>  /* Flag to suspend/resume CPUFreq governors */
>>  static bool cpufreq_suspended;
>>
>> @@ -192,19 +198,12 @@ int cpufreq_generic_init(struct cpufreq_
>>  }
>>  EXPORT_SYMBOL_GPL(cpufreq_generic_init);
>>
>> -struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu)
>> -{
>> -     struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
>> -
>> -     return policy && cpumask_test_cpu(cpu, policy->cpus) ? policy : NULL;
>> -}
>> -EXPORT_SYMBOL_GPL(cpufreq_cpu_get_raw);
>> -
>>  unsigned int cpufreq_generic_get(unsigned int cpu)
>>  {
>> -     struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu);
>> +     struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
>>
>> -     if (!policy || IS_ERR(policy->clk)) {
>> +     if (!policy || !cpumask_test_cpu(cpu, policy->cpus) ||
>
> The race you described in commit log is still valid at this point,
> isn't it ?

Yes, it is.

This just preserves the current behaviour and it only is racy during
CPU online/offline (if you are unlucky enough to invoke this at a
wrong time).  It does the right thing for offline CPUs, though.

>> +         IS_ERR(policy->clk)) {
>>               pr_err("%s: No %s associated to cpu: %d\n",
>>                      __func__, policy ? "clk" : "policy", cpu);
>>               return 0;
>> @@ -240,7 +239,7 @@ struct cpufreq_policy *cpufreq_cpu_get(u
>>
>>       if (cpufreq_driver) {
>>               /* get the CPU */
>> -             policy = cpufreq_cpu_get_raw(cpu);
>> +             policy = per_cpu(cpufreq_cpu_data, cpu);
>
> This changes the expectations of the users of cpufreq_cpu_get() as
> this will return policy for non policy->cpus as well now. I am sure
> this will break some of the assumptions at the callers and we need to
> visit all the sites to make sure it is fine.

Yes.  That's what the rest of the patch is for, pretty much. :-)

>>               if (policy)
>>                       kobject_get(&policy->kobj);
>>       }
>> @@ -1328,7 +1327,7 @@ static int cpufreq_offline(unsigned int
>>
>>       pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>>
>> -     policy = cpufreq_cpu_get_raw(cpu);
>> +     policy = per_cpu(cpufreq_cpu_data, cpu);
>
> I think we can keep cpufreq_cpu_get_raw() here instead, as that will
> do exactly this. Also we need the policy->cpus test here.

No and not quite.

First, if someone wants to understand the code, it is easier to do
that if the per-CPU variable is accessed directly.

Second, I forgot about the driver unregistration case.  In that case
we need the check, but it needs to go under policy->rwsem.

>>       if (!policy) {
>>               pr_debug("%s: No cpu_data found\n", __func__);
>>               return 0;
>> @@ -1455,7 +1454,9 @@ unsigned int cpufreq_quick_get(unsigned
>>
>>       policy = cpufreq_cpu_get(cpu);
>>       if (policy) {
>> -             ret_freq = policy->cur;
>> +             if (cpumask_test_cpu(cpu, policy->cpus))
>
> We can still have the race here, isn't it ?

Yes.  That's to preserve the current behaviour.

>> +                     ret_freq = policy->cur;
>> +
>>               cpufreq_cpu_put(policy);
>>       }
>>
>> @@ -1475,7 +1476,9 @@ unsigned int cpufreq_quick_get_max(unsig
>>       unsigned int ret_freq = 0;
>>
>>       if (policy) {
>> -             ret_freq = policy->max;
>> +             if (cpumask_test_cpu(cpu, policy->cpus))
>
> Same here..

Right.

>> +                     ret_freq = policy->max;
>> +
>>               cpufreq_cpu_put(policy);
>>       }
>>
>> @@ -1526,7 +1529,10 @@ unsigned int cpufreq_get(unsigned int cp
>>
>>       if (policy) {
>>               down_read(&policy->rwsem);
>> -             ret_freq = __cpufreq_get(policy);
>> +
>> +             if (cpumask_test_cpu(cpu, policy->cpus))
>> +                     ret_freq = __cpufreq_get(policy);
>> +
>
> We don't have the race here ..

Well, what about the tegra-devfreq.c call site?

And I'm not sure what you mean.

Without the patch, CPU offline may take place after the policy check
above and before the down_read() AFAICS in which case we'll end up
invoking ->get() from the cpufreq driver on an offline CPU, which may
not be a good idea.

>>               up_read(&policy->rwsem);
>>
>>               cpufreq_cpu_put(policy);
>> @@ -2142,6 +2148,11 @@ int cpufreq_get_policy(struct cpufreq_po
>>       if (!cpu_policy)
>>               return -EINVAL;
>>
>> +     if (!cpumask_test_cpu(cpu, policy->cpus)) {
>
> This is still racy..

Yes.  Same comment as before: current behaviour.

>> +             cpufreq_cpu_put(cpu_policy);
>> +             return -EINVAL;
>> +     }
>> +
>>       memcpy(policy, cpu_policy, sizeof(*policy));
>>
>>       cpufreq_cpu_put(cpu_policy);
>> @@ -2265,6 +2276,11 @@ int cpufreq_update_policy(unsigned int c
>>
>>       down_write(&policy->rwsem);
>>
>> +     if (!cpumask_test_cpu(cpu, policy->cpus)) {
>
> This is not racy.

It is without the patch.

Again, the CPU may go offline between the policy check and the
down_write().  If that happens, we'll end up calling low-level stuff
on an offline CPU.

>> +             ret = -ENODEV;
>> +             goto unlock;
>> +     }
>> +
>>       pr_debug("updating policy for CPU %u\n", cpu);
>>       memcpy(&new_policy, policy, sizeof(*policy));
>>       new_policy.min = policy->user_policy.min;
>
> --

Thanks,
Rafael
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

Index: linux-pm/drivers/cpufreq/cpufreq.c
===================================================================
--- linux-pm.orig/drivers/cpufreq/cpufreq.c
+++ linux-pm/drivers/cpufreq/cpufreq.c
@@ -65,6 +65,12 @@  static struct cpufreq_driver *cpufreq_dr
 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
 static DEFINE_RWLOCK(cpufreq_driver_lock);
 
+struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu)
+{
+	return per_cpu(cpufreq_cpu_data, cpu);
+}
+EXPORT_SYMBOL_GPL(cpufreq_cpu_get_raw);
+
 /* Flag to suspend/resume CPUFreq governors */
 static bool cpufreq_suspended;
 
@@ -192,19 +198,12 @@  int cpufreq_generic_init(struct cpufreq_
 }
 EXPORT_SYMBOL_GPL(cpufreq_generic_init);
 
-struct cpufreq_policy *cpufreq_cpu_get_raw(unsigned int cpu)
-{
-	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
-
-	return policy && cpumask_test_cpu(cpu, policy->cpus) ? policy : NULL;
-}
-EXPORT_SYMBOL_GPL(cpufreq_cpu_get_raw);
-
 unsigned int cpufreq_generic_get(unsigned int cpu)
 {
-	struct cpufreq_policy *policy = cpufreq_cpu_get_raw(cpu);
+	struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
 
-	if (!policy || IS_ERR(policy->clk)) {
+	if (!policy || !cpumask_test_cpu(cpu, policy->cpus) ||
+	    IS_ERR(policy->clk)) {
 		pr_err("%s: No %s associated to cpu: %d\n",
 		       __func__, policy ? "clk" : "policy", cpu);
 		return 0;
@@ -240,7 +239,7 @@  struct cpufreq_policy *cpufreq_cpu_get(u
 
 	if (cpufreq_driver) {
 		/* get the CPU */
-		policy = cpufreq_cpu_get_raw(cpu);
+		policy = per_cpu(cpufreq_cpu_data, cpu);
 		if (policy)
 			kobject_get(&policy->kobj);
 	}
@@ -1328,7 +1327,7 @@  static int cpufreq_offline(unsigned int
 
 	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
 
-	policy = cpufreq_cpu_get_raw(cpu);
+	policy = per_cpu(cpufreq_cpu_data, cpu);
 	if (!policy) {
 		pr_debug("%s: No cpu_data found\n", __func__);
 		return 0;
@@ -1455,7 +1454,9 @@  unsigned int cpufreq_quick_get(unsigned
 
 	policy = cpufreq_cpu_get(cpu);
 	if (policy) {
-		ret_freq = policy->cur;
+		if (cpumask_test_cpu(cpu, policy->cpus))
+			ret_freq = policy->cur;
+
 		cpufreq_cpu_put(policy);
 	}
 
@@ -1475,7 +1476,9 @@  unsigned int cpufreq_quick_get_max(unsig
 	unsigned int ret_freq = 0;
 
 	if (policy) {
-		ret_freq = policy->max;
+		if (cpumask_test_cpu(cpu, policy->cpus))
+			ret_freq = policy->max;
+
 		cpufreq_cpu_put(policy);
 	}
 
@@ -1526,7 +1529,10 @@  unsigned int cpufreq_get(unsigned int cp
 
 	if (policy) {
 		down_read(&policy->rwsem);
-		ret_freq = __cpufreq_get(policy);
+
+		if (cpumask_test_cpu(cpu, policy->cpus))
+			ret_freq = __cpufreq_get(policy);
+
 		up_read(&policy->rwsem);
 
 		cpufreq_cpu_put(policy);
@@ -2142,6 +2148,11 @@  int cpufreq_get_policy(struct cpufreq_po
 	if (!cpu_policy)
 		return -EINVAL;
 
+	if (!cpumask_test_cpu(cpu, policy->cpus)) {
+		cpufreq_cpu_put(cpu_policy);
+		return -EINVAL;
+	}
+
 	memcpy(policy, cpu_policy, sizeof(*policy));
 
 	cpufreq_cpu_put(cpu_policy);
@@ -2265,6 +2276,11 @@  int cpufreq_update_policy(unsigned int c
 
 	down_write(&policy->rwsem);
 
+	if (!cpumask_test_cpu(cpu, policy->cpus)) {
+		ret = -ENODEV;
+		goto unlock;
+	}
+
 	pr_debug("updating policy for CPU %u\n", cpu);
 	memcpy(&new_policy, policy, sizeof(*policy));
 	new_policy.min = policy->user_policy.min;