diff mbox

mutex warning in cpufreq + RFC patch

Message ID 20130828025721.GA19754@codeaurora.org (mailing list archive)
State RFC, archived
Headers show

Commit Message

Stephen Boyd Aug. 28, 2013, 2:57 a.m. UTC
I'm running this simple test code in a shell on my 3.10 kernel and running
into this warning rather quickly.

	cd /sys/devices/system/cpu/cpu1
	while true
	do
	echo 0 > online
	echo 1 > online
	done &
	while true
	do
	echo 300000 > cpufreq/scaling_min_freq
	echo 1000000 > cpufreq/scaling_min_freq
	done

(Note you should place valid values for min/max freq in the example
above.)

WARNING: at kernel/mutex.c:341 __mutex_lock_slowpath+0x14c/0x410()              DEBUG_LOCKS_WARN_ON(l->magic != l)
Modules linked in:                                                              CPU: 0 PID: 1960 Comm: sh Tainted: G        W    3.10.0 #32                     [<c010c178>] (unwind_backtrace+0x0/0x11c) from [<c0109dec>] (show_stack+0x10/0x14)                                                                              [<c0109dec>] (show_stack+0x10/0x14) from [<c01904cc>] (warn_slowpath_common+0x4c/0x6c)                                                                          [<c01904cc>] (warn_slowpath_common+0x4c/0x6c) from [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c)                                                                   [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c) from [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410)                                                                [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410) from [<c08a0618>] (mutex_lock+0x20/0x3c)                                                                       [<c08a0618>] (mutex_lock+0x20/
 0x3c) from [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8)                                                                        [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8) from [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4)                                                               [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4) from [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0)                                                               [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0) from [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c)                                                            [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c) from [<c0633ae4>] (store+0x58/0x90)                                                                             [<c0633ae4>] (store+0x58/0x90) from [<c02a69d4>] (sysfs_write_file+0x100/0x148)
[<c02a69d4>] (sysfs_write_file+0x100/0x148) from [<c0255c18>] (vfs_write+0xcc/0x174)
[<c0255c18>] (vfs_write+0xcc/0x174) from [<c0255f70>] (SyS_write+0x38/0x64)     [<c0255f70>] (SyS_write+0x38/0x64) from [<c0106120>] (ret_fast_syscall+0x0/0x30)

This is happening because the governor is stopped via hotplug and
while we're in the middle of touching the scaling_min_freq file.
When the governor is stopped we destroy the timer_mutex that the
scaling_min_freq thread is just about to acquire. From what I can
tell, we shouldn't be stopping the governor until after the
kobjects go away or we should start and stop the governor while
holding the policy semaphore otherwise userspace can come in and
use uninitialized things. I have this hack which seems to mostly
work. Thoughts?

----8<----

Comments

Viresh Kumar Aug. 28, 2013, 6:58 a.m. UTC | #1
Hi Stephen,

On 28 August 2013 08:27, Stephen Boyd <sboyd@codeaurora.org> wrote:
> I'm running this simple test code in a shell on my 3.10 kernel and running
> into this warning rather quickly.
>
>         cd /sys/devices/system/cpu/cpu1
>         while true
>         do
>         echo 0 > online
>         echo 1 > online
>         done &
>         while true
>         do
>         echo 300000 > cpufreq/scaling_min_freq
>         echo 1000000 > cpufreq/scaling_min_freq
>         done
>
> (Note you should place valid values for min/max freq in the example
> above.)
>
> WARNING: at kernel/mutex.c:341 __mutex_lock_slowpath+0x14c/0x410()              DEBUG_LOCKS_WARN_ON(l->magic != l)
> Modules linked in:                                                              CPU: 0 PID: 1960 Comm: sh Tainted: G        W    3.10.0 #32                     [<c010c178>] (unwind_backtrace+0x0/0x11c) from [<c0109dec>] (show_stack+0x10/0x14)                                                                              [<c0109dec>] (show_stack+0x10/0x14) from [<c01904cc>] (warn_slowpath_common+0x4c/0x6c)                                                                          [<c01904cc>] (warn_slowpath_common+0x4c/0x6c) from [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c)                                                                   [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c) from [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410)                                                                [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410) from [<c08a0618>] (mutex_lock+0x20/0x3c)                                                                       [<c08a0618>] (mutex_lock+0x2
 0/0x3c) from [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8)                                                                        [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8) from [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4)                                                               [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4) from [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0)                                                               [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0) from [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c)                                                            [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c) from [<c0633ae4>] (store+0x58/0x90)                                                                             [<c0633ae4>] (store+0x58/0x90) from [<c02a69d4>] (sysfs_write_file+0x100/0x148)
> [<c02a69d4>] (sysfs_write_file+0x100/0x148) from [<c0255c18>] (vfs_write+0xcc/0x174)
> [<c0255c18>] (vfs_write+0xcc/0x174) from [<c0255f70>] (SyS_write+0x38/0x64)     [<c0255f70>] (SyS_write+0x38/0x64) from [<c0106120>] (ret_fast_syscall+0x0/0x30)
>
> This is happening because the governor is stopped via hotplug and
> while we're in the middle of touching the scaling_min_freq file.
> When the governor is stopped we destroy the timer_mutex that the
> scaling_min_freq thread is just about to acquire. From what I can
> tell, we shouldn't be stopping the governor until after the
> kobjects go away or we should start and stop the governor while
> holding the policy semaphore otherwise userspace can come in and
> use uninitialized things. I have this hack which seems to mostly
> work. Thoughts?

I haven't gone through the hack yet, but I am trying to understand the
problem first.. There had been some work in the past around this
kind of scenarios..

commit 95731ebb114c5f0c028459388560fc2a72fe5049
Author: Xiaoguang Chen <chenxg@marvell.com>
Date:   Wed Jun 19 15:00:07 2013 +0800

    cpufreq: Fix governor start/stop race condition


The problem probably is poor error checking which is still present at
few places, in __cpufreq_set_policy() routine..

Can you try after fixing them? Something similar has to be done..

commit 3de9bdeb28638e164d1f0eb38dd68e3f5d2ac95c
Author: Viresh Kumar <viresh.kumar@linaro.org>
Date:   Tue Aug 6 22:53:13 2013 +0530

    cpufreq: improve error checking on return values of __cpufreq_governor()
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Boyd Aug. 28, 2013, 4:52 p.m. UTC | #2
On 08/27/13 23:58, Viresh Kumar wrote:
> I haven't gone through the hack yet, but I am trying to understand the
> problem first.. There had been some work in the past around this
> kind of scenarios..
>
> commit 95731ebb114c5f0c028459388560fc2a72fe5049
> Author: Xiaoguang Chen <chenxg@marvell.com>
> Date:   Wed Jun 19 15:00:07 2013 +0800
>
>     cpufreq: Fix governor start/stop race condition
>
>
> The problem probably is poor error checking which is still present at
> few places, in __cpufreq_set_policy() routine..
>
> Can you try after fixing them? Something similar has to be done..
>
> commit 3de9bdeb28638e164d1f0eb38dd68e3f5d2ac95c
> Author: Viresh Kumar <viresh.kumar@linaro.org>
> Date:   Tue Aug 6 22:53:13 2013 +0530
>
>     cpufreq: improve error checking on return values of __cpufreq_governor()

No the problem isn't poor error checking. The problem is between
gov_stop and gov_start userspace can come in and write scaling_min_freq
which will try to acquire the mutex (sorry the copy paste of the error
got messed up so I've repasted it).

WARNING: at kernel/mutex.c:341 __mutex_lock_slowpath+0x14c/0x410()              
DEBUG_LOCKS_WARN_ON(l->magic != l)
Modules linked in:                                                              CPU: 0 PID: 1960 Comm: sh Tainted: G        W    3.10.0 #32                     
[<c010c178>] (unwind_backtrace+0x0/0x11c) from [<c0109dec>] (show_stack+0x10/0x14)
[<c0109dec>] (show_stack+0x10/0x14) from [<c01904cc>] (warn_slowpath_common+0x4c/0x6c)
[<c01904cc>] (warn_slowpath_common+0x4c/0x6c) from [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c)
[<c019056c>] (warn_slowpath_fmt+0x2c/0x3c) from [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410)
[<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410) from [<c08a0618>] (mutex_lock+0x20/0x3c)
[<c08a0618>] (mutex_lock+0x20/0x3c) from [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8)
[<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8) from [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4)
[<c06325b0>] (__cpufreq_governor+0xdc/0x1a4) from [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0)
[<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0) from [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c)
[<c0632ea0>] (store_scaling_min_freq+0x80/0x9c) from [<c0633ae4>] (store+0x58/0x90)
[<c0633ae4>] (store+0x58/0x90) from [<c02a69d4>] (sysfs_write_file+0x100/0x148)
[<c02a69d4>] (sysfs_write_file+0x100/0x148) from [<c0255c18>] (vfs_write+0xcc/0x174)
[<c0255c18>] (vfs_write+0xcc/0x174) from [<c0255f70>] (SyS_write+0x38/0x64)
[<c0255f70>] (SyS_write+0x38/0x64) from [<c0106120>] (ret_fast_syscall+0x0/0x30)


I've applied these patches on top of v3.10

f51e1eb63d9c28cec188337ee656a13be6980cfd (cpufreq: Fix cpufreq regression after suspend/resume
aae760ed21cd690fe8a6db9f3a177ad55d7e12ab (cpufreq: Revert commit a66b2e to fix suspend/resume regression)
e8d05276f236ee6435e78411f62be9714e0b9377 (cpufreq: Revert commit 2f7021a8 to fix CPU hotplug regression) 
2a99859932281ed6c2ecdd988855f8f6838f6743 (cpufreq: Fix cpufreq driver module refcount balance after suspend/resume)
419e172145cf6c51d436a8bf4afcd17511f0ff79 (cpufreq: don't leave stale policy pointer in cdbs->cur_policy)
95731ebb114c5f0c028459388560fc2a72fe5049 (cpufreq: Fix governor start/stop race condition)

That second to last one causes a NULL pointer exception after the mutex
warning above because the limits case does

    if (policy->max < cpu_cdbs->cur_policy->cur)

and that dereferences a NULL cur_policy pointer.

Are there any fixes that I'm missing? I see that some things are
changing in linux-next but they don't look like fixes, more like
optimizations.
Viresh Kumar Aug. 29, 2013, 8:37 a.m. UTC | #3
On 28 August 2013 22:22, Stephen Boyd <sboyd@codeaurora.org> wrote:
> On 08/27/13 23:58, Viresh Kumar wrote:
>> I haven't gone through the hack yet, but I am trying to understand the
>> problem first.. There had been some work in the past around this
>> kind of scenarios..
>>
>> commit 95731ebb114c5f0c028459388560fc2a72fe5049
>> Author: Xiaoguang Chen <chenxg@marvell.com>
>> Date:   Wed Jun 19 15:00:07 2013 +0800
>>
>>     cpufreq: Fix governor start/stop race condition
>>
>>
>> The problem probably is poor error checking which is still present at
>> few places, in __cpufreq_set_policy() routine..
>>
>> Can you try after fixing them? Something similar has to be done..
>>
>> commit 3de9bdeb28638e164d1f0eb38dd68e3f5d2ac95c
>> Author: Viresh Kumar <viresh.kumar@linaro.org>
>> Date:   Tue Aug 6 22:53:13 2013 +0530
>>
>>     cpufreq: improve error checking on return values of __cpufreq_governor()
>
> No the problem isn't poor error checking. The problem is between
> gov_stop and gov_start userspace can come in and write scaling_min_freq
> which will try to acquire the mutex (sorry the copy paste of the error
> got messed up so I've repasted it).
>
> WARNING: at kernel/mutex.c:341 __mutex_lock_slowpath+0x14c/0x410()
> DEBUG_LOCKS_WARN_ON(l->magic != l)
> Modules linked in:                                                              CPU: 0 PID: 1960 Comm: sh Tainted: G        W    3.10.0 #32
> [<c010c178>] (unwind_backtrace+0x0/0x11c) from [<c0109dec>] (show_stack+0x10/0x14)
> [<c0109dec>] (show_stack+0x10/0x14) from [<c01904cc>] (warn_slowpath_common+0x4c/0x6c)
> [<c01904cc>] (warn_slowpath_common+0x4c/0x6c) from [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c)
> [<c019056c>] (warn_slowpath_fmt+0x2c/0x3c) from [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410)
> [<c08a0334>] (__mutex_lock_slowpath+0x14c/0x410) from [<c08a0618>] (mutex_lock+0x20/0x3c)
> [<c08a0618>] (mutex_lock+0x20/0x3c) from [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8)
> [<c0636114>] (cpufreq_governor_dbs+0x568/0x5f8) from [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4)
> [<c06325b0>] (__cpufreq_governor+0xdc/0x1a4) from [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0)
> [<c06328f0>] (__cpufreq_set_policy+0x278/0x2c0) from [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c)
> [<c0632ea0>] (store_scaling_min_freq+0x80/0x9c) from [<c0633ae4>] (store+0x58/0x90)
> [<c0633ae4>] (store+0x58/0x90) from [<c02a69d4>] (sysfs_write_file+0x100/0x148)
> [<c02a69d4>] (sysfs_write_file+0x100/0x148) from [<c0255c18>] (vfs_write+0xcc/0x174)
> [<c0255c18>] (vfs_write+0xcc/0x174) from [<c0255f70>] (SyS_write+0x38/0x64)
> [<c0255f70>] (SyS_write+0x38/0x64) from [<c0106120>] (ret_fast_syscall+0x0/0x30)
>
>
> I've applied these patches on top of v3.10
>
> f51e1eb63d9c28cec188337ee656a13be6980cfd (cpufreq: Fix cpufreq regression after suspend/resume
> aae760ed21cd690fe8a6db9f3a177ad55d7e12ab (cpufreq: Revert commit a66b2e to fix suspend/resume regression)
> e8d05276f236ee6435e78411f62be9714e0b9377 (cpufreq: Revert commit 2f7021a8 to fix CPU hotplug regression)
> 2a99859932281ed6c2ecdd988855f8f6838f6743 (cpufreq: Fix cpufreq driver module refcount balance after suspend/resume)
> 419e172145cf6c51d436a8bf4afcd17511f0ff79 (cpufreq: don't leave stale policy pointer in cdbs->cur_policy)
> 95731ebb114c5f0c028459388560fc2a72fe5049 (cpufreq: Fix governor start/stop race condition)
>
> That second to last one causes a NULL pointer exception after the mutex
> warning above because the limits case does
>
>     if (policy->max < cpu_cdbs->cur_policy->cur)
>
> and that dereferences a NULL cur_policy pointer.

I have seen something similar and the error checking patch that
I mentioned earlier came as solution to that only..

> Are there any fixes that I'm missing? I see that some things are
> changing in linux-next but they don't look like fixes, more like
> optimizations.

Getting patches over 3.10 would be tricky.. You are two kernel
version back and that's not going to help much.. There are too many
patches in between linux-next and 3.10..

I really can't tell you which specific ones to include, as I am lost in them :)

probably try to get all of them in ? i.e. All patches touching drivers/cpufreq
and include/linux/cpufreq.h..

I have got Arndale (Samsung-exnos) board where offlining CPUs is broken
@Kukjin: Can you please try to get it fixed?? It leads to crashes..
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar Aug. 29, 2013, 8:39 a.m. UTC | #4
On 29 August 2013 14:07, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 28 August 2013 22:22, Stephen Boyd <sboyd@codeaurora.org> wrote:
>> On 08/27/13 23:58, Viresh Kumar wrote:
>>> I haven't gone through the hack yet, but I am trying to understand the
>>> problem first.. There had been some work in the past around this
>>> kind of scenarios..
>>>
>>> commit 95731ebb114c5f0c028459388560fc2a72fe5049
>>> Author: Xiaoguang Chen <chenxg@marvell.com>
>>> Date:   Wed Jun 19 15:00:07 2013 +0800
>>>
>>>     cpufreq: Fix governor start/stop race condition
>>>
>>>
>>> The problem probably is poor error checking which is still present at
>>> few places, in __cpufreq_set_policy() routine..
>>>
>>> Can you try after fixing them? Something similar has to be done..
>>>
>>> commit 3de9bdeb28638e164d1f0eb38dd68e3f5d2ac95c
>>> Author: Viresh Kumar <viresh.kumar@linaro.org>
>>> Date:   Tue Aug 6 22:53:13 2013 +0530
>>>
>>>     cpufreq: improve error checking on return values of __cpufreq_governor()
>>
>> No the problem isn't poor error checking. The problem is between
>> gov_stop and gov_start userspace can come in and write scaling_min_freq
>> which will try to acquire the mutex (sorry the copy paste of the error
>> got messed up so I've repasted it).

I forgot to answer here :(

I would still say this could be a problem.. Suppose one thread tried to STOP
governor and stopped it.. Now other one came and tried to STOP it, it failed
but due to poor error checking, went ahead to next step.. and then they
got into issues..

I still see this as an potential issue in this case..
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index cbfe3c1..134004b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -823,11 +823,11 @@  static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling,
 	policy = cpufreq_cpu_get(sibling);
 	WARN_ON(!policy);
 
+	lock_policy_rwsem_write(sibling);
+
 	if (has_target)
 		__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 
-	lock_policy_rwsem_write(sibling);
-
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 
 	cpumask_set_cpu(cpu, policy->cpus);
@@ -835,12 +835,11 @@  static int cpufreq_add_policy_cpu(unsigned int cpu, unsigned int sibling,
 	per_cpu(cpufreq_cpu_data, cpu) = policy;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-	unlock_policy_rwsem_write(sibling);
-
 	if (has_target) {
 		__cpufreq_governor(policy, CPUFREQ_GOV_START);
 		__cpufreq_governor(policy, CPUFREQ_GOV_LIMITS);
 	}
+	unlock_policy_rwsem_write(sibling);
 
 	ret = sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
 	if (ret) {
@@ -1037,9 +1036,6 @@  static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif
 		return -EINVAL;
 	}
 
-	if (cpufreq_driver->target)
-		__cpufreq_governor(data, CPUFREQ_GOV_STOP);
-
 #ifdef CONFIG_HOTPLUG_CPU
 	if (!cpufreq_driver->setpolicy)
 		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
@@ -1048,9 +1044,6 @@  static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif
 
 	WARN_ON(lock_policy_rwsem_write(cpu));
 	cpus = cpumask_weight(data->cpus);
-
-	if (cpus > 1)
-		cpumask_clear_cpu(cpu, data->cpus);
 	unlock_policy_rwsem_write(cpu);
 
 	if (cpu != data->cpu) {
@@ -1086,9 +1079,6 @@  static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif
 
 	/* If cpu is last user of policy, free policy */
 	if (cpus == 1) {
-		if (cpufreq_driver->target)
-			__cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
-
 		lock_policy_rwsem_read(cpu);
 		kobj = &data->kobj;
 		cmp = &data->kobj_unregister;
@@ -1103,6 +1093,11 @@  static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif
 		wait_for_completion(cmp);
 		pr_debug("wait complete\n");
 
+		if (cpufreq_driver->target) {
+			__cpufreq_governor(data, CPUFREQ_GOV_STOP);
+			__cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
+		}
+
 		if (cpufreq_driver->exit)
 			cpufreq_driver->exit(data);
 
@@ -1113,8 +1108,13 @@  static int __cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif
 		pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
 		cpufreq_cpu_put(data);
 		if (cpufreq_driver->target) {
+			WARN_ON(lock_policy_rwsem_write(cpu));
+			__cpufreq_governor(data, CPUFREQ_GOV_STOP);
+			if (cpus > 1)
+				cpumask_clear_cpu(cpu, data->cpus);
 			__cpufreq_governor(data, CPUFREQ_GOV_START);
 			__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
+			unlock_policy_rwsem_write(cpu);
 		}
 	}