diff mbox

[LOCKDEP] cpufreq: possible circular locking dependency detected

Message ID 51DE1BE1.3090707@linux.vnet.ibm.com (mailing list archive)
State Changes Requested, archived
Headers show

Commit Message

Michael Wang July 11, 2013, 2:43 a.m. UTC
Hi, Sergey

On 07/11/2013 07:13 AM, Sergey Senozhatsky wrote:
[snip]
> 
> 
> Please kindly review the following patch.
> 
> 
> 
> Remove cpu device only upon succesful cpu down on CPU_POST_DEAD event,
> so we can kill off CPU_DOWN_FAILED case and eliminate potential extra
> remove/add path:
> 	
> 	hotplug lock
> 		CPU_DOWN_PREPARE: __cpufreq_remove_dev
> 		CPU_DOWN_FAILED: cpufreq_add_dev
> 	hotplug unlock
> 
> Since cpu still present on CPU_DEAD event, cpu stats table should be
> kept longer and removed later on CPU_POST_DEAD as well.
> 
> Because CPU_POST_DEAD action performed with hotplug lock released, CPU_DOWN
> might block existing gov_queue_work() user (blocked on get_online_cpus())
> and unblock it with one of policy->cpus offlined, thus cpu_is_offline()
> check is performed in __gov_queue_work().
> 
> Besides, existing gov_queue_work() hotplug guard extended to protect all
> __gov_queue_work() calls: for both all_cpus and !all_cpus cases.
> 
> CPUFREQ_GOV_START performs direct __gov_queue_work() call because hotplug
> lock already held there, opposing to previous gov_queue_work() and nested
> get/put_online_cpus().

Nice to know you have some idea on solving the issue ;-)

I'm not sure whether I catch the idea, but seems like you are trying
to re-organize the timing of add/remove device.

I'm sure that we have more than one way to solve the issues, but what
we need is the cure of root...

As Srivatsa discovered, the root issue may be:
	gov_cancel_work() failed to stop all the work after it's return.

And Viresh also confirmed that this is not by-designed.

Which means gov_queue_work() invoked by od_dbs_timer() is supposed to
never happen after CPUFREQ_GOV_STOP notify, the whole policy should
stop working at that time.

But it failed to, and the work concurrent with cpu dying caused the
first problem.

Thus I think we should focus on this and suggested below fix, I'd like
to know your opinions :)

Regards,
Michael Wang


> 
> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> 
> ---
> 
>  drivers/cpufreq/cpufreq.c          |  5 +----
>  drivers/cpufreq/cpufreq_governor.c | 17 +++++++++++------
>  drivers/cpufreq/cpufreq_stats.c    |  2 +-
>  3 files changed, 13 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 6a015ad..f8aacf1 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1943,13 +1943,10 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
>  		case CPU_ONLINE:
>  			cpufreq_add_dev(dev, NULL);
>  			break;
> -		case CPU_DOWN_PREPARE:
> +		case CPU_POST_DEAD:
>  		case CPU_UP_CANCELED_FROZEN:
>  			__cpufreq_remove_dev(dev, NULL);
>  			break;
> -		case CPU_DOWN_FAILED:
> -			cpufreq_add_dev(dev, NULL);
> -			break;
>  		}
>  	}
>  	return NOTIFY_OK;
> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> index 4645876..681d5d6 100644
> --- a/drivers/cpufreq/cpufreq_governor.c
> +++ b/drivers/cpufreq/cpufreq_governor.c
> @@ -125,7 +125,11 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
>  		unsigned int delay)
>  {
>  	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
> -
> +	/* cpu offline might block existing gov_queue_work() user,
> +	 * unblocking it after CPU_DEAD and before CPU_POST_DEAD.
> +	 * thus potentially we can hit offlined CPU */
> +	if (unlikely(cpu_is_offline(cpu)))
> +		return;
>  	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
>  }
> 
> @@ -133,15 +137,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>  		unsigned int delay, bool all_cpus)
>  {
>  	int i;
> -
> +	get_online_cpus();
>  	if (!all_cpus) {
>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>  	} else {
> -		get_online_cpus();
>  		for_each_cpu(i, policy->cpus)
>  			__gov_queue_work(i, dbs_data, delay);
> -		put_online_cpus();
>  	}
> +	put_online_cpus();
>  }
>  EXPORT_SYMBOL_GPL(gov_queue_work);
> 
> @@ -354,8 +357,10 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
>  		/* Initiate timer time stamp */
>  		cpu_cdbs->time_stamp = ktime_get();
> 
> -		gov_queue_work(dbs_data, policy,
> -				delay_for_sampling_rate(sampling_rate), true);
> +		/* hotplug lock already held */
> +		for_each_cpu(j, policy->cpus)
> +			__gov_queue_work(j, dbs_data,
> +				delay_for_sampling_rate(sampling_rate));
>  		break;
> 
>  	case CPUFREQ_GOV_STOP:
> diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> index cd9e817..833816e 100644
> --- a/drivers/cpufreq/cpufreq_stats.c
> +++ b/drivers/cpufreq/cpufreq_stats.c
> @@ -355,7 +355,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
>  	case CPU_DOWN_PREPARE:
>  		cpufreq_stats_free_sysfs(cpu);
>  		break;
> -	case CPU_DEAD:
> +	case CPU_POST_DEAD:
>  		cpufreq_stats_free_table(cpu);
>  		break;
>  	case CPU_UP_CANCELED_FROZEN:
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Sergey Senozhatsky July 11, 2013, 8:22 a.m. UTC | #1
On (07/11/13 10:43), Michael Wang wrote:
> Hi, Sergey
> 
> On 07/11/2013 07:13 AM, Sergey Senozhatsky wrote:
> [snip]
> > 
> > 
> > Please kindly review the following patch.
> > 
> > 
> > 
> > Remove cpu device only upon succesful cpu down on CPU_POST_DEAD event,
> > so we can kill off CPU_DOWN_FAILED case and eliminate potential extra
> > remove/add path:
> > 	
> > 	hotplug lock
> > 		CPU_DOWN_PREPARE: __cpufreq_remove_dev
> > 		CPU_DOWN_FAILED: cpufreq_add_dev
> > 	hotplug unlock
> > 
> > Since cpu still present on CPU_DEAD event, cpu stats table should be
> > kept longer and removed later on CPU_POST_DEAD as well.
> > 
> > Because CPU_POST_DEAD action performed with hotplug lock released, CPU_DOWN
> > might block existing gov_queue_work() user (blocked on get_online_cpus())
> > and unblock it with one of policy->cpus offlined, thus cpu_is_offline()
> > check is performed in __gov_queue_work().
> > 
> > Besides, existing gov_queue_work() hotplug guard extended to protect all
> > __gov_queue_work() calls: for both all_cpus and !all_cpus cases.
> > 
> > CPUFREQ_GOV_START performs direct __gov_queue_work() call because hotplug
> > lock already held there, opposing to previous gov_queue_work() and nested
> > get/put_online_cpus().
> 
> Nice to know you have some idea on solving the issue ;-)
> 
> I'm not sure whether I catch the idea, but seems like you are trying
> to re-organize the timing of add/remove device.
> 
> I'm sure that we have more than one way to solve the issues, but what
> we need is the cure of root...
> 
> As Srivatsa discovered, the root issue may be:
> 	gov_cancel_work() failed to stop all the work after it's return.
> 
> And Viresh also confirmed that this is not by-designed.
> 
> Which means gov_queue_work() invoked by od_dbs_timer() is supposed to
> never happen after CPUFREQ_GOV_STOP notify, the whole policy should
> stop working at that time.
> 
> But it failed to, and the work concurrent with cpu dying caused the
> first problem.
> 
> Thus I think we should focus on this and suggested below fix, I'd like
> to know your opinions :)
>

Hello Michael,
nice job! works fine for me.

Reported-and-Tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>


	-ss

> Regards,
> Michael Wang
> 
> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> index dc9b72e..a64b544 100644
> --- a/drivers/cpufreq/cpufreq_governor.c
> +++ b/drivers/cpufreq/cpufreq_governor.c
> @@ -178,13 +178,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>  {
>  	int i;
>  
> +	if (dbs_data->queue_stop)
> +		return;
> +
>  	if (!all_cpus) {
>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>  	} else {
> -		get_online_cpus();
>  		for_each_cpu(i, policy->cpus)
>  			__gov_queue_work(i, dbs_data, delay);
> -		put_online_cpus();
>  	}
>  }
>  EXPORT_SYMBOL_GPL(gov_queue_work);
> @@ -193,12 +194,27 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
>  		struct cpufreq_policy *policy)
>  {
>  	struct cpu_dbs_common_info *cdbs;
> -	int i;
> +	int i, round = 2;
>  
> +	dbs_data->queue_stop = 1;
> +redo:
> +	round--;
>  	for_each_cpu(i, policy->cpus) {
>  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
>  		cancel_delayed_work_sync(&cdbs->work);
>  	}
> +
> +	/*
> +	 * Since there is no lock to prvent re-queue the
> +	 * cancelled work, some early cancelled work might
> +	 * have been queued again by later cancelled work.
> +	 *
> +	 * Flush the work again with dbs_data->queue_stop
> +	 * enabled, this time there will be no survivors.
> +	 */
> +	if (round)
> +		goto redo;
> +	dbs_data->queue_stop = 0;
>  }
>  
>  /* Will return if we need to evaluate cpu load again or not */
> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
> index e16a961..9116135 100644
> --- a/drivers/cpufreq/cpufreq_governor.h
> +++ b/drivers/cpufreq/cpufreq_governor.h
> @@ -213,6 +213,7 @@ struct dbs_data {
>  	unsigned int min_sampling_rate;
>  	int usage_count;
>  	void *tuners;
> +	int queue_stop;
>  
>  	/* dbs_mutex protects dbs_enable in governor start/stop */
>  	struct mutex mutex;
> 
> > 
> > Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> > 
> > ---
> > 
> >  drivers/cpufreq/cpufreq.c          |  5 +----
> >  drivers/cpufreq/cpufreq_governor.c | 17 +++++++++++------
> >  drivers/cpufreq/cpufreq_stats.c    |  2 +-
> >  3 files changed, 13 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > index 6a015ad..f8aacf1 100644
> > --- a/drivers/cpufreq/cpufreq.c
> > +++ b/drivers/cpufreq/cpufreq.c
> > @@ -1943,13 +1943,10 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
> >  		case CPU_ONLINE:
> >  			cpufreq_add_dev(dev, NULL);
> >  			break;
> > -		case CPU_DOWN_PREPARE:
> > +		case CPU_POST_DEAD:
> >  		case CPU_UP_CANCELED_FROZEN:
> >  			__cpufreq_remove_dev(dev, NULL);
> >  			break;
> > -		case CPU_DOWN_FAILED:
> > -			cpufreq_add_dev(dev, NULL);
> > -			break;
> >  		}
> >  	}
> >  	return NOTIFY_OK;
> > diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> > index 4645876..681d5d6 100644
> > --- a/drivers/cpufreq/cpufreq_governor.c
> > +++ b/drivers/cpufreq/cpufreq_governor.c
> > @@ -125,7 +125,11 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
> >  		unsigned int delay)
> >  {
> >  	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
> > -
> > +	/* cpu offline might block existing gov_queue_work() user,
> > +	 * unblocking it after CPU_DEAD and before CPU_POST_DEAD.
> > +	 * thus potentially we can hit offlined CPU */
> > +	if (unlikely(cpu_is_offline(cpu)))
> > +		return;
> >  	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
> >  }
> > 
> > @@ -133,15 +137,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
> >  		unsigned int delay, bool all_cpus)
> >  {
> >  	int i;
> > -
> > +	get_online_cpus();
> >  	if (!all_cpus) {
> >  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
> >  	} else {
> > -		get_online_cpus();
> >  		for_each_cpu(i, policy->cpus)
> >  			__gov_queue_work(i, dbs_data, delay);
> > -		put_online_cpus();
> >  	}
> > +	put_online_cpus();
> >  }
> >  EXPORT_SYMBOL_GPL(gov_queue_work);
> > 
> > @@ -354,8 +357,10 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
> >  		/* Initiate timer time stamp */
> >  		cpu_cdbs->time_stamp = ktime_get();
> > 
> > -		gov_queue_work(dbs_data, policy,
> > -				delay_for_sampling_rate(sampling_rate), true);
> > +		/* hotplug lock already held */
> > +		for_each_cpu(j, policy->cpus)
> > +			__gov_queue_work(j, dbs_data,
> > +				delay_for_sampling_rate(sampling_rate));
> >  		break;
> > 
> >  	case CPUFREQ_GOV_STOP:
> > diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> > index cd9e817..833816e 100644
> > --- a/drivers/cpufreq/cpufreq_stats.c
> > +++ b/drivers/cpufreq/cpufreq_stats.c
> > @@ -355,7 +355,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
> >  	case CPU_DOWN_PREPARE:
> >  		cpufreq_stats_free_sysfs(cpu);
> >  		break;
> > -	case CPU_DEAD:
> > +	case CPU_POST_DEAD:
> >  		cpufreq_stats_free_table(cpu);
> >  		break;
> >  	case CPU_UP_CANCELED_FROZEN:
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> > 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Wang July 11, 2013, 8:47 a.m. UTC | #2
On 07/11/2013 04:22 PM, Sergey Senozhatsky wrote:
[snip]
>>
> 
> Hello Michael,
> nice job! works fine for me.
> 
> Reported-and-Tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>

Thanks for the test :)

Borislav may also doing some testing, let's wait for few days and see
whether there are any point we missed.

And we should also thanks Srivatsa for catching the root issue ;-)

Regards,
Michael Wang

> 
> 
> 	-ss
> 
>> Regards,
>> Michael Wang
>>
>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
>> index dc9b72e..a64b544 100644
>> --- a/drivers/cpufreq/cpufreq_governor.c
>> +++ b/drivers/cpufreq/cpufreq_governor.c
>> @@ -178,13 +178,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>>  {
>>  	int i;
>>  
>> +	if (dbs_data->queue_stop)
>> +		return;
>> +
>>  	if (!all_cpus) {
>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>>  	} else {
>> -		get_online_cpus();
>>  		for_each_cpu(i, policy->cpus)
>>  			__gov_queue_work(i, dbs_data, delay);
>> -		put_online_cpus();
>>  	}
>>  }
>>  EXPORT_SYMBOL_GPL(gov_queue_work);
>> @@ -193,12 +194,27 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
>>  		struct cpufreq_policy *policy)
>>  {
>>  	struct cpu_dbs_common_info *cdbs;
>> -	int i;
>> +	int i, round = 2;
>>  
>> +	dbs_data->queue_stop = 1;
>> +redo:
>> +	round--;
>>  	for_each_cpu(i, policy->cpus) {
>>  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
>>  		cancel_delayed_work_sync(&cdbs->work);
>>  	}
>> +
>> +	/*
>> +	 * Since there is no lock to prvent re-queue the
>> +	 * cancelled work, some early cancelled work might
>> +	 * have been queued again by later cancelled work.
>> +	 *
>> +	 * Flush the work again with dbs_data->queue_stop
>> +	 * enabled, this time there will be no survivors.
>> +	 */
>> +	if (round)
>> +		goto redo;
>> +	dbs_data->queue_stop = 0;
>>  }
>>  
>>  /* Will return if we need to evaluate cpu load again or not */
>> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
>> index e16a961..9116135 100644
>> --- a/drivers/cpufreq/cpufreq_governor.h
>> +++ b/drivers/cpufreq/cpufreq_governor.h
>> @@ -213,6 +213,7 @@ struct dbs_data {
>>  	unsigned int min_sampling_rate;
>>  	int usage_count;
>>  	void *tuners;
>> +	int queue_stop;
>>  
>>  	/* dbs_mutex protects dbs_enable in governor start/stop */
>>  	struct mutex mutex;
>>
>>>
>>> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
>>>
>>> ---
>>>
>>>  drivers/cpufreq/cpufreq.c          |  5 +----
>>>  drivers/cpufreq/cpufreq_governor.c | 17 +++++++++++------
>>>  drivers/cpufreq/cpufreq_stats.c    |  2 +-
>>>  3 files changed, 13 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>>> index 6a015ad..f8aacf1 100644
>>> --- a/drivers/cpufreq/cpufreq.c
>>> +++ b/drivers/cpufreq/cpufreq.c
>>> @@ -1943,13 +1943,10 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
>>>  		case CPU_ONLINE:
>>>  			cpufreq_add_dev(dev, NULL);
>>>  			break;
>>> -		case CPU_DOWN_PREPARE:
>>> +		case CPU_POST_DEAD:
>>>  		case CPU_UP_CANCELED_FROZEN:
>>>  			__cpufreq_remove_dev(dev, NULL);
>>>  			break;
>>> -		case CPU_DOWN_FAILED:
>>> -			cpufreq_add_dev(dev, NULL);
>>> -			break;
>>>  		}
>>>  	}
>>>  	return NOTIFY_OK;
>>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
>>> index 4645876..681d5d6 100644
>>> --- a/drivers/cpufreq/cpufreq_governor.c
>>> +++ b/drivers/cpufreq/cpufreq_governor.c
>>> @@ -125,7 +125,11 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
>>>  		unsigned int delay)
>>>  {
>>>  	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
>>> -
>>> +	/* cpu offline might block existing gov_queue_work() user,
>>> +	 * unblocking it after CPU_DEAD and before CPU_POST_DEAD.
>>> +	 * thus potentially we can hit offlined CPU */
>>> +	if (unlikely(cpu_is_offline(cpu)))
>>> +		return;
>>>  	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
>>>  }
>>>
>>> @@ -133,15 +137,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>>>  		unsigned int delay, bool all_cpus)
>>>  {
>>>  	int i;
>>> -
>>> +	get_online_cpus();
>>>  	if (!all_cpus) {
>>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>>>  	} else {
>>> -		get_online_cpus();
>>>  		for_each_cpu(i, policy->cpus)
>>>  			__gov_queue_work(i, dbs_data, delay);
>>> -		put_online_cpus();
>>>  	}
>>> +	put_online_cpus();
>>>  }
>>>  EXPORT_SYMBOL_GPL(gov_queue_work);
>>>
>>> @@ -354,8 +357,10 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
>>>  		/* Initiate timer time stamp */
>>>  		cpu_cdbs->time_stamp = ktime_get();
>>>
>>> -		gov_queue_work(dbs_data, policy,
>>> -				delay_for_sampling_rate(sampling_rate), true);
>>> +		/* hotplug lock already held */
>>> +		for_each_cpu(j, policy->cpus)
>>> +			__gov_queue_work(j, dbs_data,
>>> +				delay_for_sampling_rate(sampling_rate));
>>>  		break;
>>>
>>>  	case CPUFREQ_GOV_STOP:
>>> diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
>>> index cd9e817..833816e 100644
>>> --- a/drivers/cpufreq/cpufreq_stats.c
>>> +++ b/drivers/cpufreq/cpufreq_stats.c
>>> @@ -355,7 +355,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
>>>  	case CPU_DOWN_PREPARE:
>>>  		cpufreq_stats_free_sysfs(cpu);
>>>  		break;
>>> -	case CPU_DEAD:
>>> +	case CPU_POST_DEAD:
>>>  		cpufreq_stats_free_table(cpu);
>>>  		break;
>>>  	case CPU_UP_CANCELED_FROZEN:
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>> Please read the FAQ at  http://www.tux.org/lkml/
>>>
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Wang July 11, 2013, 8:48 a.m. UTC | #3
On 07/11/2013 04:47 PM, Michael Wang wrote:
> On 07/11/2013 04:22 PM, Sergey Senozhatsky wrote:
> [snip]
>>>
>>
>> Hello Michael,
>> nice job! works fine for me.
>>
>> Reported-and-Tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> 
> Thanks for the test :)
> 
> Borislav may also doing some testing, let's wait for few days and see
> whether there are any point we missed.

s /Borislav/Bartlomiej

> 
> And we should also thanks Srivatsa for catching the root issue ;-)
> 
> Regards,
> Michael Wang
> 
>>
>>
>> 	-ss
>>
>>> Regards,
>>> Michael Wang
>>>
>>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
>>> index dc9b72e..a64b544 100644
>>> --- a/drivers/cpufreq/cpufreq_governor.c
>>> +++ b/drivers/cpufreq/cpufreq_governor.c
>>> @@ -178,13 +178,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>>>  {
>>>  	int i;
>>>  
>>> +	if (dbs_data->queue_stop)
>>> +		return;
>>> +
>>>  	if (!all_cpus) {
>>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>>>  	} else {
>>> -		get_online_cpus();
>>>  		for_each_cpu(i, policy->cpus)
>>>  			__gov_queue_work(i, dbs_data, delay);
>>> -		put_online_cpus();
>>>  	}
>>>  }
>>>  EXPORT_SYMBOL_GPL(gov_queue_work);
>>> @@ -193,12 +194,27 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
>>>  		struct cpufreq_policy *policy)
>>>  {
>>>  	struct cpu_dbs_common_info *cdbs;
>>> -	int i;
>>> +	int i, round = 2;
>>>  
>>> +	dbs_data->queue_stop = 1;
>>> +redo:
>>> +	round--;
>>>  	for_each_cpu(i, policy->cpus) {
>>>  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
>>>  		cancel_delayed_work_sync(&cdbs->work);
>>>  	}
>>> +
>>> +	/*
>>> +	 * Since there is no lock to prvent re-queue the
>>> +	 * cancelled work, some early cancelled work might
>>> +	 * have been queued again by later cancelled work.
>>> +	 *
>>> +	 * Flush the work again with dbs_data->queue_stop
>>> +	 * enabled, this time there will be no survivors.
>>> +	 */
>>> +	if (round)
>>> +		goto redo;
>>> +	dbs_data->queue_stop = 0;
>>>  }
>>>  
>>>  /* Will return if we need to evaluate cpu load again or not */
>>> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
>>> index e16a961..9116135 100644
>>> --- a/drivers/cpufreq/cpufreq_governor.h
>>> +++ b/drivers/cpufreq/cpufreq_governor.h
>>> @@ -213,6 +213,7 @@ struct dbs_data {
>>>  	unsigned int min_sampling_rate;
>>>  	int usage_count;
>>>  	void *tuners;
>>> +	int queue_stop;
>>>  
>>>  	/* dbs_mutex protects dbs_enable in governor start/stop */
>>>  	struct mutex mutex;
>>>
>>>>
>>>> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
>>>>
>>>> ---
>>>>
>>>>  drivers/cpufreq/cpufreq.c          |  5 +----
>>>>  drivers/cpufreq/cpufreq_governor.c | 17 +++++++++++------
>>>>  drivers/cpufreq/cpufreq_stats.c    |  2 +-
>>>>  3 files changed, 13 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>>>> index 6a015ad..f8aacf1 100644
>>>> --- a/drivers/cpufreq/cpufreq.c
>>>> +++ b/drivers/cpufreq/cpufreq.c
>>>> @@ -1943,13 +1943,10 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
>>>>  		case CPU_ONLINE:
>>>>  			cpufreq_add_dev(dev, NULL);
>>>>  			break;
>>>> -		case CPU_DOWN_PREPARE:
>>>> +		case CPU_POST_DEAD:
>>>>  		case CPU_UP_CANCELED_FROZEN:
>>>>  			__cpufreq_remove_dev(dev, NULL);
>>>>  			break;
>>>> -		case CPU_DOWN_FAILED:
>>>> -			cpufreq_add_dev(dev, NULL);
>>>> -			break;
>>>>  		}
>>>>  	}
>>>>  	return NOTIFY_OK;
>>>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
>>>> index 4645876..681d5d6 100644
>>>> --- a/drivers/cpufreq/cpufreq_governor.c
>>>> +++ b/drivers/cpufreq/cpufreq_governor.c
>>>> @@ -125,7 +125,11 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
>>>>  		unsigned int delay)
>>>>  {
>>>>  	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
>>>> -
>>>> +	/* cpu offline might block existing gov_queue_work() user,
>>>> +	 * unblocking it after CPU_DEAD and before CPU_POST_DEAD.
>>>> +	 * thus potentially we can hit offlined CPU */
>>>> +	if (unlikely(cpu_is_offline(cpu)))
>>>> +		return;
>>>>  	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
>>>>  }
>>>>
>>>> @@ -133,15 +137,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>>>>  		unsigned int delay, bool all_cpus)
>>>>  {
>>>>  	int i;
>>>> -
>>>> +	get_online_cpus();
>>>>  	if (!all_cpus) {
>>>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>>>>  	} else {
>>>> -		get_online_cpus();
>>>>  		for_each_cpu(i, policy->cpus)
>>>>  			__gov_queue_work(i, dbs_data, delay);
>>>> -		put_online_cpus();
>>>>  	}
>>>> +	put_online_cpus();
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(gov_queue_work);
>>>>
>>>> @@ -354,8 +357,10 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
>>>>  		/* Initiate timer time stamp */
>>>>  		cpu_cdbs->time_stamp = ktime_get();
>>>>
>>>> -		gov_queue_work(dbs_data, policy,
>>>> -				delay_for_sampling_rate(sampling_rate), true);
>>>> +		/* hotplug lock already held */
>>>> +		for_each_cpu(j, policy->cpus)
>>>> +			__gov_queue_work(j, dbs_data,
>>>> +				delay_for_sampling_rate(sampling_rate));
>>>>  		break;
>>>>
>>>>  	case CPUFREQ_GOV_STOP:
>>>> diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
>>>> index cd9e817..833816e 100644
>>>> --- a/drivers/cpufreq/cpufreq_stats.c
>>>> +++ b/drivers/cpufreq/cpufreq_stats.c
>>>> @@ -355,7 +355,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
>>>>  	case CPU_DOWN_PREPARE:
>>>>  		cpufreq_stats_free_sysfs(cpu);
>>>>  		break;
>>>> -	case CPU_DEAD:
>>>> +	case CPU_POST_DEAD:
>>>>  		cpufreq_stats_free_table(cpu);
>>>>  		break;
>>>>  	case CPU_UP_CANCELED_FROZEN:
>>>> --
>>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>>>> the body of a message to majordomo@vger.kernel.org
>>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>> Please read the FAQ at  http://www.tux.org/lkml/
>>>>
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
>>
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sergey Senozhatsky July 11, 2013, 9:01 a.m. UTC | #4
On (07/11/13 16:47), Michael Wang wrote:
> On 07/11/2013 04:22 PM, Sergey Senozhatsky wrote:
> [snip]
> >>
> > 
> > Hello Michael,
> > nice job! works fine for me.
> > 
> > Reported-and-Tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> 
> Thanks for the test :)
> 
> Borislav may also doing some testing, let's wait for few days and see
> whether there are any point we missed.
> 
> And we should also thanks Srivatsa for catching the root issue ;-)

Sure, many thanks to everyone.
I'll perform some additional testing.

	-ss

> Regards,
> Michael Wang
> 
> > 
> > 
> > 	-ss
> > 
> >> Regards,
> >> Michael Wang
> >>
> >> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> >> index dc9b72e..a64b544 100644
> >> --- a/drivers/cpufreq/cpufreq_governor.c
> >> +++ b/drivers/cpufreq/cpufreq_governor.c
> >> @@ -178,13 +178,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
> >>  {
> >>  	int i;
> >>  
> >> +	if (dbs_data->queue_stop)
> >> +		return;
> >> +
> >>  	if (!all_cpus) {
> >>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
> >>  	} else {
> >> -		get_online_cpus();
> >>  		for_each_cpu(i, policy->cpus)
> >>  			__gov_queue_work(i, dbs_data, delay);
> >> -		put_online_cpus();
> >>  	}
> >>  }
> >>  EXPORT_SYMBOL_GPL(gov_queue_work);
> >> @@ -193,12 +194,27 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
> >>  		struct cpufreq_policy *policy)
> >>  {
> >>  	struct cpu_dbs_common_info *cdbs;
> >> -	int i;
> >> +	int i, round = 2;
> >>  
> >> +	dbs_data->queue_stop = 1;
> >> +redo:
> >> +	round--;
> >>  	for_each_cpu(i, policy->cpus) {
> >>  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
> >>  		cancel_delayed_work_sync(&cdbs->work);
> >>  	}
> >> +
> >> +	/*
> >> +	 * Since there is no lock to prvent re-queue the
> >> +	 * cancelled work, some early cancelled work might
> >> +	 * have been queued again by later cancelled work.
> >> +	 *
> >> +	 * Flush the work again with dbs_data->queue_stop
> >> +	 * enabled, this time there will be no survivors.
> >> +	 */
> >> +	if (round)
> >> +		goto redo;
> >> +	dbs_data->queue_stop = 0;
> >>  }
> >>  
> >>  /* Will return if we need to evaluate cpu load again or not */
> >> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
> >> index e16a961..9116135 100644
> >> --- a/drivers/cpufreq/cpufreq_governor.h
> >> +++ b/drivers/cpufreq/cpufreq_governor.h
> >> @@ -213,6 +213,7 @@ struct dbs_data {
> >>  	unsigned int min_sampling_rate;
> >>  	int usage_count;
> >>  	void *tuners;
> >> +	int queue_stop;
> >>  
> >>  	/* dbs_mutex protects dbs_enable in governor start/stop */
> >>  	struct mutex mutex;
> >>
> >>>
> >>> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> >>>
> >>> ---
> >>>
> >>>  drivers/cpufreq/cpufreq.c          |  5 +----
> >>>  drivers/cpufreq/cpufreq_governor.c | 17 +++++++++++------
> >>>  drivers/cpufreq/cpufreq_stats.c    |  2 +-
> >>>  3 files changed, 13 insertions(+), 11 deletions(-)
> >>>
> >>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> >>> index 6a015ad..f8aacf1 100644
> >>> --- a/drivers/cpufreq/cpufreq.c
> >>> +++ b/drivers/cpufreq/cpufreq.c
> >>> @@ -1943,13 +1943,10 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
> >>>  		case CPU_ONLINE:
> >>>  			cpufreq_add_dev(dev, NULL);
> >>>  			break;
> >>> -		case CPU_DOWN_PREPARE:
> >>> +		case CPU_POST_DEAD:
> >>>  		case CPU_UP_CANCELED_FROZEN:
> >>>  			__cpufreq_remove_dev(dev, NULL);
> >>>  			break;
> >>> -		case CPU_DOWN_FAILED:
> >>> -			cpufreq_add_dev(dev, NULL);
> >>> -			break;
> >>>  		}
> >>>  	}
> >>>  	return NOTIFY_OK;
> >>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> >>> index 4645876..681d5d6 100644
> >>> --- a/drivers/cpufreq/cpufreq_governor.c
> >>> +++ b/drivers/cpufreq/cpufreq_governor.c
> >>> @@ -125,7 +125,11 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
> >>>  		unsigned int delay)
> >>>  {
> >>>  	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
> >>> -
> >>> +	/* cpu offline might block existing gov_queue_work() user,
> >>> +	 * unblocking it after CPU_DEAD and before CPU_POST_DEAD.
> >>> +	 * thus potentially we can hit offlined CPU */
> >>> +	if (unlikely(cpu_is_offline(cpu)))
> >>> +		return;
> >>>  	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
> >>>  }
> >>>
> >>> @@ -133,15 +137,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
> >>>  		unsigned int delay, bool all_cpus)
> >>>  {
> >>>  	int i;
> >>> -
> >>> +	get_online_cpus();
> >>>  	if (!all_cpus) {
> >>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
> >>>  	} else {
> >>> -		get_online_cpus();
> >>>  		for_each_cpu(i, policy->cpus)
> >>>  			__gov_queue_work(i, dbs_data, delay);
> >>> -		put_online_cpus();
> >>>  	}
> >>> +	put_online_cpus();
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(gov_queue_work);
> >>>
> >>> @@ -354,8 +357,10 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
> >>>  		/* Initiate timer time stamp */
> >>>  		cpu_cdbs->time_stamp = ktime_get();
> >>>
> >>> -		gov_queue_work(dbs_data, policy,
> >>> -				delay_for_sampling_rate(sampling_rate), true);
> >>> +		/* hotplug lock already held */
> >>> +		for_each_cpu(j, policy->cpus)
> >>> +			__gov_queue_work(j, dbs_data,
> >>> +				delay_for_sampling_rate(sampling_rate));
> >>>  		break;
> >>>
> >>>  	case CPUFREQ_GOV_STOP:
> >>> diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> >>> index cd9e817..833816e 100644
> >>> --- a/drivers/cpufreq/cpufreq_stats.c
> >>> +++ b/drivers/cpufreq/cpufreq_stats.c
> >>> @@ -355,7 +355,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
> >>>  	case CPU_DOWN_PREPARE:
> >>>  		cpufreq_stats_free_sysfs(cpu);
> >>>  		break;
> >>> -	case CPU_DEAD:
> >>> +	case CPU_POST_DEAD:
> >>>  		cpufreq_stats_free_table(cpu);
> >>>  		break;
> >>>  	case CPU_UP_CANCELED_FROZEN:
> >>> --
> >>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> >>> the body of a message to majordomo@vger.kernel.org
> >>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> >>> Please read the FAQ at  http://www.tux.org/lkml/
> >>>
> >>
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> > 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bartlomiej Zolnierkiewicz July 11, 2013, 11:47 a.m. UTC | #5
Hi,

On Thursday, July 11, 2013 04:48:51 PM Michael Wang wrote:
> On 07/11/2013 04:47 PM, Michael Wang wrote:
> > On 07/11/2013 04:22 PM, Sergey Senozhatsky wrote:
> > [snip]
> >>>
> >>
> >> Hello Michael,
> >> nice job! works fine for me.
> >>
> >> Reported-and-Tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> > 
> > Thanks for the test :)
> > 
> > Borislav may also doing some testing, let's wait for few days and see
> > whether there are any point we missed.
> 
> s /Borislav/Bartlomiej

Michael's patch also works for me. Thanks to everyone involved!
(My only nitpick for the patch is that ->queue_stop can be made bool.)

Reported-and-Tested-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>

I think that it would also be helpful if Jiri or Borislav could test
the patch and see if it really works for them and fixes the original
warning they were experiencing on x86.

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics

> > And we should also thanks Srivatsa for catching the root issue ;-)
> > 
> > Regards,
> > Michael Wang
> > 
> >>
> >>
> >> 	-ss
> >>
> >>> Regards,
> >>> Michael Wang
> >>>
> >>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> >>> index dc9b72e..a64b544 100644
> >>> --- a/drivers/cpufreq/cpufreq_governor.c
> >>> +++ b/drivers/cpufreq/cpufreq_governor.c
> >>> @@ -178,13 +178,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
> >>>  {
> >>>  	int i;
> >>>  
> >>> +	if (dbs_data->queue_stop)
> >>> +		return;
> >>> +
> >>>  	if (!all_cpus) {
> >>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
> >>>  	} else {
> >>> -		get_online_cpus();
> >>>  		for_each_cpu(i, policy->cpus)
> >>>  			__gov_queue_work(i, dbs_data, delay);
> >>> -		put_online_cpus();
> >>>  	}
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(gov_queue_work);
> >>> @@ -193,12 +194,27 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
> >>>  		struct cpufreq_policy *policy)
> >>>  {
> >>>  	struct cpu_dbs_common_info *cdbs;
> >>> -	int i;
> >>> +	int i, round = 2;
> >>>  
> >>> +	dbs_data->queue_stop = 1;
> >>> +redo:
> >>> +	round--;
> >>>  	for_each_cpu(i, policy->cpus) {
> >>>  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
> >>>  		cancel_delayed_work_sync(&cdbs->work);
> >>>  	}
> >>> +
> >>> +	/*
> >>> +	 * Since there is no lock to prvent re-queue the
> >>> +	 * cancelled work, some early cancelled work might
> >>> +	 * have been queued again by later cancelled work.
> >>> +	 *
> >>> +	 * Flush the work again with dbs_data->queue_stop
> >>> +	 * enabled, this time there will be no survivors.
> >>> +	 */
> >>> +	if (round)
> >>> +		goto redo;
> >>> +	dbs_data->queue_stop = 0;
> >>>  }
> >>>  
> >>>  /* Will return if we need to evaluate cpu load again or not */
> >>> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
> >>> index e16a961..9116135 100644
> >>> --- a/drivers/cpufreq/cpufreq_governor.h
> >>> +++ b/drivers/cpufreq/cpufreq_governor.h
> >>> @@ -213,6 +213,7 @@ struct dbs_data {
> >>>  	unsigned int min_sampling_rate;
> >>>  	int usage_count;
> >>>  	void *tuners;
> >>> +	int queue_stop;
> >>>  
> >>>  	/* dbs_mutex protects dbs_enable in governor start/stop */
> >>>  	struct mutex mutex;
> >>>
> >>>>
> >>>> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> >>>>
> >>>> ---
> >>>>
> >>>>  drivers/cpufreq/cpufreq.c          |  5 +----
> >>>>  drivers/cpufreq/cpufreq_governor.c | 17 +++++++++++------
> >>>>  drivers/cpufreq/cpufreq_stats.c    |  2 +-
> >>>>  3 files changed, 13 insertions(+), 11 deletions(-)
> >>>>
> >>>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> >>>> index 6a015ad..f8aacf1 100644
> >>>> --- a/drivers/cpufreq/cpufreq.c
> >>>> +++ b/drivers/cpufreq/cpufreq.c
> >>>> @@ -1943,13 +1943,10 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
> >>>>  		case CPU_ONLINE:
> >>>>  			cpufreq_add_dev(dev, NULL);
> >>>>  			break;
> >>>> -		case CPU_DOWN_PREPARE:
> >>>> +		case CPU_POST_DEAD:
> >>>>  		case CPU_UP_CANCELED_FROZEN:
> >>>>  			__cpufreq_remove_dev(dev, NULL);
> >>>>  			break;
> >>>> -		case CPU_DOWN_FAILED:
> >>>> -			cpufreq_add_dev(dev, NULL);
> >>>> -			break;
> >>>>  		}
> >>>>  	}
> >>>>  	return NOTIFY_OK;
> >>>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> >>>> index 4645876..681d5d6 100644
> >>>> --- a/drivers/cpufreq/cpufreq_governor.c
> >>>> +++ b/drivers/cpufreq/cpufreq_governor.c
> >>>> @@ -125,7 +125,11 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
> >>>>  		unsigned int delay)
> >>>>  {
> >>>>  	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
> >>>> -
> >>>> +	/* cpu offline might block existing gov_queue_work() user,
> >>>> +	 * unblocking it after CPU_DEAD and before CPU_POST_DEAD.
> >>>> +	 * thus potentially we can hit offlined CPU */
> >>>> +	if (unlikely(cpu_is_offline(cpu)))
> >>>> +		return;
> >>>>  	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
> >>>>  }
> >>>>
> >>>> @@ -133,15 +137,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
> >>>>  		unsigned int delay, bool all_cpus)
> >>>>  {
> >>>>  	int i;
> >>>> -
> >>>> +	get_online_cpus();
> >>>>  	if (!all_cpus) {
> >>>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
> >>>>  	} else {
> >>>> -		get_online_cpus();
> >>>>  		for_each_cpu(i, policy->cpus)
> >>>>  			__gov_queue_work(i, dbs_data, delay);
> >>>> -		put_online_cpus();
> >>>>  	}
> >>>> +	put_online_cpus();
> >>>>  }
> >>>>  EXPORT_SYMBOL_GPL(gov_queue_work);
> >>>>
> >>>> @@ -354,8 +357,10 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
> >>>>  		/* Initiate timer time stamp */
> >>>>  		cpu_cdbs->time_stamp = ktime_get();
> >>>>
> >>>> -		gov_queue_work(dbs_data, policy,
> >>>> -				delay_for_sampling_rate(sampling_rate), true);
> >>>> +		/* hotplug lock already held */
> >>>> +		for_each_cpu(j, policy->cpus)
> >>>> +			__gov_queue_work(j, dbs_data,
> >>>> +				delay_for_sampling_rate(sampling_rate));
> >>>>  		break;
> >>>>
> >>>>  	case CPUFREQ_GOV_STOP:
> >>>> diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> >>>> index cd9e817..833816e 100644
> >>>> --- a/drivers/cpufreq/cpufreq_stats.c
> >>>> +++ b/drivers/cpufreq/cpufreq_stats.c
> >>>> @@ -355,7 +355,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
> >>>>  	case CPU_DOWN_PREPARE:
> >>>>  		cpufreq_stats_free_sysfs(cpu);
> >>>>  		break;
> >>>> -	case CPU_DEAD:
> >>>> +	case CPU_POST_DEAD:
> >>>>  		cpufreq_stats_free_table(cpu);
> >>>>  		break;
> >>>>  	case CPU_UP_CANCELED_FROZEN:
> >>>> --

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Wang July 12, 2013, 2:19 a.m. UTC | #6
On 07/11/2013 07:47 PM, Bartlomiej Zolnierkiewicz wrote:
[snip]
> 
> Michael's patch also works for me. Thanks to everyone involved!
> (My only nitpick for the patch is that ->queue_stop can be made bool.)
> 
> Reported-and-Tested-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
> 
> I think that it would also be helpful if Jiri or Borislav could test
> the patch and see if it really works for them and fixes the original
> warning they were experiencing on x86.

Thanks for the testing :)

I plan to send out the formal patch next week, so Jiri and Borislav
would have chance to join the discussion.

Regards,
Michael Wang

> 
> Best regards,
> --
> Bartlomiej Zolnierkiewicz
> Samsung R&D Institute Poland
> Samsung Electronics
> 
>>> And we should also thanks Srivatsa for catching the root issue ;-)
>>>
>>> Regards,
>>> Michael Wang
>>>
>>>>
>>>>
>>>> 	-ss
>>>>
>>>>> Regards,
>>>>> Michael Wang
>>>>>
>>>>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
>>>>> index dc9b72e..a64b544 100644
>>>>> --- a/drivers/cpufreq/cpufreq_governor.c
>>>>> +++ b/drivers/cpufreq/cpufreq_governor.c
>>>>> @@ -178,13 +178,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>>>>>  {
>>>>>  	int i;
>>>>>  
>>>>> +	if (dbs_data->queue_stop)
>>>>> +		return;
>>>>> +
>>>>>  	if (!all_cpus) {
>>>>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>>>>>  	} else {
>>>>> -		get_online_cpus();
>>>>>  		for_each_cpu(i, policy->cpus)
>>>>>  			__gov_queue_work(i, dbs_data, delay);
>>>>> -		put_online_cpus();
>>>>>  	}
>>>>>  }
>>>>>  EXPORT_SYMBOL_GPL(gov_queue_work);
>>>>> @@ -193,12 +194,27 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
>>>>>  		struct cpufreq_policy *policy)
>>>>>  {
>>>>>  	struct cpu_dbs_common_info *cdbs;
>>>>> -	int i;
>>>>> +	int i, round = 2;
>>>>>  
>>>>> +	dbs_data->queue_stop = 1;
>>>>> +redo:
>>>>> +	round--;
>>>>>  	for_each_cpu(i, policy->cpus) {
>>>>>  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
>>>>>  		cancel_delayed_work_sync(&cdbs->work);
>>>>>  	}
>>>>> +
>>>>> +	/*
>>>>> +	 * Since there is no lock to prvent re-queue the
>>>>> +	 * cancelled work, some early cancelled work might
>>>>> +	 * have been queued again by later cancelled work.
>>>>> +	 *
>>>>> +	 * Flush the work again with dbs_data->queue_stop
>>>>> +	 * enabled, this time there will be no survivors.
>>>>> +	 */
>>>>> +	if (round)
>>>>> +		goto redo;
>>>>> +	dbs_data->queue_stop = 0;
>>>>>  }
>>>>>  
>>>>>  /* Will return if we need to evaluate cpu load again or not */
>>>>> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
>>>>> index e16a961..9116135 100644
>>>>> --- a/drivers/cpufreq/cpufreq_governor.h
>>>>> +++ b/drivers/cpufreq/cpufreq_governor.h
>>>>> @@ -213,6 +213,7 @@ struct dbs_data {
>>>>>  	unsigned int min_sampling_rate;
>>>>>  	int usage_count;
>>>>>  	void *tuners;
>>>>> +	int queue_stop;
>>>>>  
>>>>>  	/* dbs_mutex protects dbs_enable in governor start/stop */
>>>>>  	struct mutex mutex;
>>>>>
>>>>>>
>>>>>> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
>>>>>>
>>>>>> ---
>>>>>>
>>>>>>  drivers/cpufreq/cpufreq.c          |  5 +----
>>>>>>  drivers/cpufreq/cpufreq_governor.c | 17 +++++++++++------
>>>>>>  drivers/cpufreq/cpufreq_stats.c    |  2 +-
>>>>>>  3 files changed, 13 insertions(+), 11 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>>>>>> index 6a015ad..f8aacf1 100644
>>>>>> --- a/drivers/cpufreq/cpufreq.c
>>>>>> +++ b/drivers/cpufreq/cpufreq.c
>>>>>> @@ -1943,13 +1943,10 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
>>>>>>  		case CPU_ONLINE:
>>>>>>  			cpufreq_add_dev(dev, NULL);
>>>>>>  			break;
>>>>>> -		case CPU_DOWN_PREPARE:
>>>>>> +		case CPU_POST_DEAD:
>>>>>>  		case CPU_UP_CANCELED_FROZEN:
>>>>>>  			__cpufreq_remove_dev(dev, NULL);
>>>>>>  			break;
>>>>>> -		case CPU_DOWN_FAILED:
>>>>>> -			cpufreq_add_dev(dev, NULL);
>>>>>> -			break;
>>>>>>  		}
>>>>>>  	}
>>>>>>  	return NOTIFY_OK;
>>>>>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
>>>>>> index 4645876..681d5d6 100644
>>>>>> --- a/drivers/cpufreq/cpufreq_governor.c
>>>>>> +++ b/drivers/cpufreq/cpufreq_governor.c
>>>>>> @@ -125,7 +125,11 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
>>>>>>  		unsigned int delay)
>>>>>>  {
>>>>>>  	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
>>>>>> -
>>>>>> +	/* cpu offline might block existing gov_queue_work() user,
>>>>>> +	 * unblocking it after CPU_DEAD and before CPU_POST_DEAD.
>>>>>> +	 * thus potentially we can hit offlined CPU */
>>>>>> +	if (unlikely(cpu_is_offline(cpu)))
>>>>>> +		return;
>>>>>>  	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
>>>>>>  }
>>>>>>
>>>>>> @@ -133,15 +137,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>>>>>>  		unsigned int delay, bool all_cpus)
>>>>>>  {
>>>>>>  	int i;
>>>>>> -
>>>>>> +	get_online_cpus();
>>>>>>  	if (!all_cpus) {
>>>>>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>>>>>>  	} else {
>>>>>> -		get_online_cpus();
>>>>>>  		for_each_cpu(i, policy->cpus)
>>>>>>  			__gov_queue_work(i, dbs_data, delay);
>>>>>> -		put_online_cpus();
>>>>>>  	}
>>>>>> +	put_online_cpus();
>>>>>>  }
>>>>>>  EXPORT_SYMBOL_GPL(gov_queue_work);
>>>>>>
>>>>>> @@ -354,8 +357,10 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
>>>>>>  		/* Initiate timer time stamp */
>>>>>>  		cpu_cdbs->time_stamp = ktime_get();
>>>>>>
>>>>>> -		gov_queue_work(dbs_data, policy,
>>>>>> -				delay_for_sampling_rate(sampling_rate), true);
>>>>>> +		/* hotplug lock already held */
>>>>>> +		for_each_cpu(j, policy->cpus)
>>>>>> +			__gov_queue_work(j, dbs_data,
>>>>>> +				delay_for_sampling_rate(sampling_rate));
>>>>>>  		break;
>>>>>>
>>>>>>  	case CPUFREQ_GOV_STOP:
>>>>>> diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
>>>>>> index cd9e817..833816e 100644
>>>>>> --- a/drivers/cpufreq/cpufreq_stats.c
>>>>>> +++ b/drivers/cpufreq/cpufreq_stats.c
>>>>>> @@ -355,7 +355,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
>>>>>>  	case CPU_DOWN_PREPARE:
>>>>>>  		cpufreq_stats_free_sysfs(cpu);
>>>>>>  		break;
>>>>>> -	case CPU_DEAD:
>>>>>> +	case CPU_POST_DEAD:
>>>>>>  		cpufreq_stats_free_table(cpu);
>>>>>>  		break;
>>>>>>  	case CPU_UP_CANCELED_FROZEN:
>>>>>> --
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sergey Senozhatsky July 14, 2013, 11:47 a.m. UTC | #7
On (07/11/13 10:43), Michael Wang wrote:
> [..]
> Nice to know you have some idea on solving the issue ;-)
> 
> I'm not sure whether I catch the idea, but seems like you are trying
> to re-organize the timing of add/remove device.
> 
> I'm sure that we have more than one way to solve the issues, but what
> we need is the cure of root...
> 
> As Srivatsa discovered, the root issue may be:
> 	gov_cancel_work() failed to stop all the work after it's return.
> 
> And Viresh also confirmed that this is not by-designed.
> 
> Which means gov_queue_work() invoked by od_dbs_timer() is supposed to
> never happen after CPUFREQ_GOV_STOP notify, the whole policy should
> stop working at that time.
> 
> But it failed to, and the work concurrent with cpu dying caused the
> first problem.
> 
> Thus I think we should focus on this and suggested below fix, I'd like
> to know your opinions :)
>

Hello,

I just realized that lockdep was disabling itself at startup (after recent AMD
radeon patch set) due to radeon kms error:

[    4.790019] [drm] Loading CEDAR Microcode
[    4.790943] r600_cp: Failed to load firmware "radeon/CEDAR_smc.bin"
[    4.791152] [drm:evergreen_startup] *ERROR* Failed to load firmware!
[    4.791330] radeon 0000:01:00.0: disabling GPU acceleration
[    4.792633] INFO: trying to register non-static key.
[    4.792792] the code is fine but needs lockdep annotation.
[    4.792953] turning off the locking correctness validator.


Now, as I fixed radeon kms, I can see:

[  806.660530] ------------[ cut here ]------------
[  806.660539] WARNING: CPU: 0 PID: 2389 at arch/x86/kernel/smp.c:124
native_smp_send_reschedule+0x57/0x60()

[  806.660572] Workqueue: events od_dbs_timer
[  806.660575]  0000000000000009 ffff8801531cfbd8 ffffffff816044ee
0000000000000000
[  806.660577]  ffff8801531cfc10 ffffffff8104995d 0000000000000003
ffff8801531f8000
[  806.660579]  000000010001ee39 0000000000000003 0000000000000003
ffff8801531cfc20
[  806.660580] Call Trace:
[  806.660587]  [<ffffffff816044ee>] dump_stack+0x4e/0x82
[  806.660591]  [<ffffffff8104995d>] warn_slowpath_common+0x7d/0xa0
[  806.660593]  [<ffffffff81049a3a>] warn_slowpath_null+0x1a/0x20
[  806.660595]  [<ffffffff8102ca07>] native_smp_send_reschedule+0x57/0x60
[  806.660599]  [<ffffffff81085211>] wake_up_nohz_cpu+0x61/0xb0
[  806.660603]  [<ffffffff8105cb6d>] add_timer_on+0x8d/0x1e0
[  806.660607]  [<ffffffff8106cc66>] __queue_delayed_work+0x166/0x1a0
[  806.660609]  [<ffffffff8106d6a9>] ? try_to_grab_pending+0xd9/0x1a0
[  806.660611]  [<ffffffff8106d7bf>] mod_delayed_work_on+0x4f/0x90
[  806.660613]  [<ffffffff8150f436>] gov_queue_work+0x56/0xd0
[  806.660615]  [<ffffffff8150e740>] od_dbs_timer+0xc0/0x160
[  806.660617]  [<ffffffff8106dbcd>] process_one_work+0x1cd/0x6a0
[  806.660619]  [<ffffffff8106db63>] ? process_one_work+0x163/0x6a0
[  806.660622]  [<ffffffff8106e8d1>] worker_thread+0x121/0x3a0
[  806.660627]  [<ffffffff810b668d>] ? trace_hardirqs_on+0xd/0x10
[  806.660629]  [<ffffffff8106e7b0>] ? manage_workers.isra.24+0x2a0/0x2a0
[  806.660633]  [<ffffffff810760cb>] kthread+0xdb/0xe0
[  806.660635]  [<ffffffff81075ff0>] ? insert_kthread_work+0x70/0x70
[  806.660639]  [<ffffffff8160de6c>] ret_from_fork+0x7c/0xb0
[  806.660640]  [<ffffffff81075ff0>] ? insert_kthread_work+0x70/0x70
[  806.660642] ---[ end trace 01ae278488a0ad6d ]---


The same problem why get/put_online_cpus() has been added to  __gov_queue_work()

commit 2f7021a815f20f3481c10884fe9735ce2a56db35
Author: Michael Wang

    cpufreq: protect 'policy->cpus' from offlining during
    __gov_queue_work()

	-ss

> Regards,
> Michael Wang
> 
> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> index dc9b72e..a64b544 100644
> --- a/drivers/cpufreq/cpufreq_governor.c
> +++ b/drivers/cpufreq/cpufreq_governor.c
> @@ -178,13 +178,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>  {
>  	int i;
>  
> +	if (dbs_data->queue_stop)
> +		return;
> +
>  	if (!all_cpus) {
>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>  	} else {
> -		get_online_cpus();
>  		for_each_cpu(i, policy->cpus)
>  			__gov_queue_work(i, dbs_data, delay);
> -		put_online_cpus();
>  	}
>  }
>  EXPORT_SYMBOL_GPL(gov_queue_work);
> @@ -193,12 +194,27 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
>  		struct cpufreq_policy *policy)
>  {
>  	struct cpu_dbs_common_info *cdbs;
> -	int i;
> +	int i, round = 2;
>  
> +	dbs_data->queue_stop = 1;
> +redo:
> +	round--;
>  	for_each_cpu(i, policy->cpus) {
>  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
>  		cancel_delayed_work_sync(&cdbs->work);
>  	}
> +
> +	/*
> +	 * Since there is no lock to prvent re-queue the
> +	 * cancelled work, some early cancelled work might
> +	 * have been queued again by later cancelled work.
> +	 *
> +	 * Flush the work again with dbs_data->queue_stop
> +	 * enabled, this time there will be no survivors.
> +	 */
> +	if (round)
> +		goto redo;
> +	dbs_data->queue_stop = 0;
>  }
>  
>  /* Will return if we need to evaluate cpu load again or not */
> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
> index e16a961..9116135 100644
> --- a/drivers/cpufreq/cpufreq_governor.h
> +++ b/drivers/cpufreq/cpufreq_governor.h
> @@ -213,6 +213,7 @@ struct dbs_data {
>  	unsigned int min_sampling_rate;
>  	int usage_count;
>  	void *tuners;
> +	int queue_stop;
>  
>  	/* dbs_mutex protects dbs_enable in governor start/stop */
>  	struct mutex mutex;
> 
> > 
> > Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> > 
> > ---
> > 
> >  drivers/cpufreq/cpufreq.c          |  5 +----
> >  drivers/cpufreq/cpufreq_governor.c | 17 +++++++++++------
> >  drivers/cpufreq/cpufreq_stats.c    |  2 +-
> >  3 files changed, 13 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > index 6a015ad..f8aacf1 100644
> > --- a/drivers/cpufreq/cpufreq.c
> > +++ b/drivers/cpufreq/cpufreq.c
> > @@ -1943,13 +1943,10 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
> >  		case CPU_ONLINE:
> >  			cpufreq_add_dev(dev, NULL);
> >  			break;
> > -		case CPU_DOWN_PREPARE:
> > +		case CPU_POST_DEAD:
> >  		case CPU_UP_CANCELED_FROZEN:
> >  			__cpufreq_remove_dev(dev, NULL);
> >  			break;
> > -		case CPU_DOWN_FAILED:
> > -			cpufreq_add_dev(dev, NULL);
> > -			break;
> >  		}
> >  	}
> >  	return NOTIFY_OK;
> > diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> > index 4645876..681d5d6 100644
> > --- a/drivers/cpufreq/cpufreq_governor.c
> > +++ b/drivers/cpufreq/cpufreq_governor.c
> > @@ -125,7 +125,11 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
> >  		unsigned int delay)
> >  {
> >  	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
> > -
> > +	/* cpu offline might block existing gov_queue_work() user,
> > +	 * unblocking it after CPU_DEAD and before CPU_POST_DEAD.
> > +	 * thus potentially we can hit offlined CPU */
> > +	if (unlikely(cpu_is_offline(cpu)))
> > +		return;
> >  	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
> >  }
> > 
> > @@ -133,15 +137,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
> >  		unsigned int delay, bool all_cpus)
> >  {
> >  	int i;
> > -
> > +	get_online_cpus();
> >  	if (!all_cpus) {
> >  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
> >  	} else {
> > -		get_online_cpus();
> >  		for_each_cpu(i, policy->cpus)
> >  			__gov_queue_work(i, dbs_data, delay);
> > -		put_online_cpus();
> >  	}
> > +	put_online_cpus();
> >  }
> >  EXPORT_SYMBOL_GPL(gov_queue_work);
> > 
> > @@ -354,8 +357,10 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
> >  		/* Initiate timer time stamp */
> >  		cpu_cdbs->time_stamp = ktime_get();
> > 
> > -		gov_queue_work(dbs_data, policy,
> > -				delay_for_sampling_rate(sampling_rate), true);
> > +		/* hotplug lock already held */
> > +		for_each_cpu(j, policy->cpus)
> > +			__gov_queue_work(j, dbs_data,
> > +				delay_for_sampling_rate(sampling_rate));
> >  		break;
> > 
> >  	case CPUFREQ_GOV_STOP:
> > diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> > index cd9e817..833816e 100644
> > --- a/drivers/cpufreq/cpufreq_stats.c
> > +++ b/drivers/cpufreq/cpufreq_stats.c
> > @@ -355,7 +355,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
> >  	case CPU_DOWN_PREPARE:
> >  		cpufreq_stats_free_sysfs(cpu);
> >  		break;
> > -	case CPU_DEAD:
> > +	case CPU_POST_DEAD:
> >  		cpufreq_stats_free_table(cpu);
> >  		break;
> >  	case CPU_UP_CANCELED_FROZEN:
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/
> > 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sergey Senozhatsky July 14, 2013, 12:06 p.m. UTC | #8
On (07/14/13 14:47), Sergey Senozhatsky wrote:
> 
> Now, as I fixed radeon kms, I can see:
> 
> [  806.660530] ------------[ cut here ]------------
> [  806.660539] WARNING: CPU: 0 PID: 2389 at arch/x86/kernel/smp.c:124
> native_smp_send_reschedule+0x57/0x60()

Well, this one is obviously not a lockdep error, I meant that previous
tests with disabled lockdep were invalid. Will re-do.

	-ss

> > Regards,
> > Michael Wang
> > 
> > diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> > index dc9b72e..a64b544 100644
> > --- a/drivers/cpufreq/cpufreq_governor.c
> > +++ b/drivers/cpufreq/cpufreq_governor.c
> > @@ -178,13 +178,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
> >  {
> >  	int i;
> >  
> > +	if (dbs_data->queue_stop)
> > +		return;
> > +
> >  	if (!all_cpus) {
> >  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
> >  	} else {
> > -		get_online_cpus();
> >  		for_each_cpu(i, policy->cpus)
> >  			__gov_queue_work(i, dbs_data, delay);
> > -		put_online_cpus();
> >  	}
> >  }
> >  EXPORT_SYMBOL_GPL(gov_queue_work);
> > @@ -193,12 +194,27 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
> >  		struct cpufreq_policy *policy)
> >  {
> >  	struct cpu_dbs_common_info *cdbs;
> > -	int i;
> > +	int i, round = 2;
> >  
> > +	dbs_data->queue_stop = 1;
> > +redo:
> > +	round--;
> >  	for_each_cpu(i, policy->cpus) {
> >  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
> >  		cancel_delayed_work_sync(&cdbs->work);
> >  	}
> > +
> > +	/*
> > +	 * Since there is no lock to prvent re-queue the
> > +	 * cancelled work, some early cancelled work might
> > +	 * have been queued again by later cancelled work.
> > +	 *
> > +	 * Flush the work again with dbs_data->queue_stop
> > +	 * enabled, this time there will be no survivors.
> > +	 */
> > +	if (round)
> > +		goto redo;
> > +	dbs_data->queue_stop = 0;
> >  }
> >  
> >  /* Will return if we need to evaluate cpu load again or not */
> > diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
> > index e16a961..9116135 100644
> > --- a/drivers/cpufreq/cpufreq_governor.h
> > +++ b/drivers/cpufreq/cpufreq_governor.h
> > @@ -213,6 +213,7 @@ struct dbs_data {
> >  	unsigned int min_sampling_rate;
> >  	int usage_count;
> >  	void *tuners;
> > +	int queue_stop;
> >  
> >  	/* dbs_mutex protects dbs_enable in governor start/stop */
> >  	struct mutex mutex;
> > 
> > > 
> > > Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> > > 
> > > ---
> > > 
> > >  drivers/cpufreq/cpufreq.c          |  5 +----
> > >  drivers/cpufreq/cpufreq_governor.c | 17 +++++++++++------
> > >  drivers/cpufreq/cpufreq_stats.c    |  2 +-
> > >  3 files changed, 13 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> > > index 6a015ad..f8aacf1 100644
> > > --- a/drivers/cpufreq/cpufreq.c
> > > +++ b/drivers/cpufreq/cpufreq.c
> > > @@ -1943,13 +1943,10 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
> > >  		case CPU_ONLINE:
> > >  			cpufreq_add_dev(dev, NULL);
> > >  			break;
> > > -		case CPU_DOWN_PREPARE:
> > > +		case CPU_POST_DEAD:
> > >  		case CPU_UP_CANCELED_FROZEN:
> > >  			__cpufreq_remove_dev(dev, NULL);
> > >  			break;
> > > -		case CPU_DOWN_FAILED:
> > > -			cpufreq_add_dev(dev, NULL);
> > > -			break;
> > >  		}
> > >  	}
> > >  	return NOTIFY_OK;
> > > diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> > > index 4645876..681d5d6 100644
> > > --- a/drivers/cpufreq/cpufreq_governor.c
> > > +++ b/drivers/cpufreq/cpufreq_governor.c
> > > @@ -125,7 +125,11 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
> > >  		unsigned int delay)
> > >  {
> > >  	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
> > > -
> > > +	/* cpu offline might block existing gov_queue_work() user,
> > > +	 * unblocking it after CPU_DEAD and before CPU_POST_DEAD.
> > > +	 * thus potentially we can hit offlined CPU */
> > > +	if (unlikely(cpu_is_offline(cpu)))
> > > +		return;
> > >  	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
> > >  }
> > > 
> > > @@ -133,15 +137,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
> > >  		unsigned int delay, bool all_cpus)
> > >  {
> > >  	int i;
> > > -
> > > +	get_online_cpus();
> > >  	if (!all_cpus) {
> > >  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
> > >  	} else {
> > > -		get_online_cpus();
> > >  		for_each_cpu(i, policy->cpus)
> > >  			__gov_queue_work(i, dbs_data, delay);
> > > -		put_online_cpus();
> > >  	}
> > > +	put_online_cpus();
> > >  }
> > >  EXPORT_SYMBOL_GPL(gov_queue_work);
> > > 
> > > @@ -354,8 +357,10 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
> > >  		/* Initiate timer time stamp */
> > >  		cpu_cdbs->time_stamp = ktime_get();
> > > 
> > > -		gov_queue_work(dbs_data, policy,
> > > -				delay_for_sampling_rate(sampling_rate), true);
> > > +		/* hotplug lock already held */
> > > +		for_each_cpu(j, policy->cpus)
> > > +			__gov_queue_work(j, dbs_data,
> > > +				delay_for_sampling_rate(sampling_rate));
> > >  		break;
> > > 
> > >  	case CPUFREQ_GOV_STOP:
> > > diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
> > > index cd9e817..833816e 100644
> > > --- a/drivers/cpufreq/cpufreq_stats.c
> > > +++ b/drivers/cpufreq/cpufreq_stats.c
> > > @@ -355,7 +355,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
> > >  	case CPU_DOWN_PREPARE:
> > >  		cpufreq_stats_free_sysfs(cpu);
> > >  		break;
> > > -	case CPU_DEAD:
> > > +	case CPU_POST_DEAD:
> > >  		cpufreq_stats_free_table(cpu);
> > >  		break;
> > >  	case CPU_UP_CANCELED_FROZEN:
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > > Please read the FAQ at  http://www.tux.org/lkml/
> > > 
> > 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rafael Wysocki July 14, 2013, 3:56 p.m. UTC | #9
On Thursday, July 11, 2013 10:43:45 AM Michael Wang wrote:
> Hi, Sergey
> 
> On 07/11/2013 07:13 AM, Sergey Senozhatsky wrote:
> [snip]
> > 
> > 
> > Please kindly review the following patch.
> > 
> > 
> > 
> > Remove cpu device only upon succesful cpu down on CPU_POST_DEAD event,
> > so we can kill off CPU_DOWN_FAILED case and eliminate potential extra
> > remove/add path:
> > 	
> > 	hotplug lock
> > 		CPU_DOWN_PREPARE: __cpufreq_remove_dev
> > 		CPU_DOWN_FAILED: cpufreq_add_dev
> > 	hotplug unlock
> > 
> > Since cpu still present on CPU_DEAD event, cpu stats table should be
> > kept longer and removed later on CPU_POST_DEAD as well.
> > 
> > Because CPU_POST_DEAD action performed with hotplug lock released, CPU_DOWN
> > might block existing gov_queue_work() user (blocked on get_online_cpus())
> > and unblock it with one of policy->cpus offlined, thus cpu_is_offline()
> > check is performed in __gov_queue_work().
> > 
> > Besides, existing gov_queue_work() hotplug guard extended to protect all
> > __gov_queue_work() calls: for both all_cpus and !all_cpus cases.
> > 
> > CPUFREQ_GOV_START performs direct __gov_queue_work() call because hotplug
> > lock already held there, opposing to previous gov_queue_work() and nested
> > get/put_online_cpus().
> 
> Nice to know you have some idea on solving the issue ;-)
> 
> I'm not sure whether I catch the idea, but seems like you are trying
> to re-organize the timing of add/remove device.
> 
> I'm sure that we have more than one way to solve the issues, but what
> we need is the cure of root...
> 
> As Srivatsa discovered, the root issue may be:
> 	gov_cancel_work() failed to stop all the work after it's return.
> 
> And Viresh also confirmed that this is not by-designed.
> 
> Which means gov_queue_work() invoked by od_dbs_timer() is supposed to
> never happen after CPUFREQ_GOV_STOP notify, the whole policy should
> stop working at that time.
> 
> But it failed to, and the work concurrent with cpu dying caused the
> first problem.
> 
> Thus I think we should focus on this and suggested below fix, I'd like
> to know your opinions :)
> 
> Regards,
> Michael Wang
> 
> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
> index dc9b72e..a64b544 100644
> --- a/drivers/cpufreq/cpufreq_governor.c
> +++ b/drivers/cpufreq/cpufreq_governor.c
> @@ -178,13 +178,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>  {
>  	int i;
>  
> +	if (dbs_data->queue_stop)
> +		return;
> +
>  	if (!all_cpus) {
>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>  	} else {
> -		get_online_cpus();
>  		for_each_cpu(i, policy->cpus)
>  			__gov_queue_work(i, dbs_data, delay);
> -		put_online_cpus();
>  	}
>  }
>  EXPORT_SYMBOL_GPL(gov_queue_work);
> @@ -193,12 +194,27 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
>  		struct cpufreq_policy *policy)
>  {
>  	struct cpu_dbs_common_info *cdbs;
> -	int i;
> +	int i, round = 2;
>  
> +	dbs_data->queue_stop = 1;
> +redo:
> +	round--;
>  	for_each_cpu(i, policy->cpus) {
>  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
>  		cancel_delayed_work_sync(&cdbs->work);
>  	}
> +
> +	/*
> +	 * Since there is no lock to prvent re-queue the
> +	 * cancelled work, some early cancelled work might
> +	 * have been queued again by later cancelled work.
> +	 *
> +	 * Flush the work again with dbs_data->queue_stop
> +	 * enabled, this time there will be no survivors.
> +	 */
> +	if (round)
> +		goto redo;

Well, what about doing:

	for (round = 2; round; round--)
	  	for_each_cpu(i, policy->cpus) {
	  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
	  		cancel_delayed_work_sync(&cdbs->work);
	  	}

instead?

> +	dbs_data->queue_stop = 0;
>  }
>  
>  /* Will return if we need to evaluate cpu load again or not */
> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
> index e16a961..9116135 100644
> --- a/drivers/cpufreq/cpufreq_governor.h
> +++ b/drivers/cpufreq/cpufreq_governor.h
> @@ -213,6 +213,7 @@ struct dbs_data {
>  	unsigned int min_sampling_rate;
>  	int usage_count;
>  	void *tuners;
> +	int queue_stop;
>  
>  	/* dbs_mutex protects dbs_enable in governor start/stop */
>  	struct mutex mutex;
>
Michael Wang July 15, 2013, 2:42 a.m. UTC | #10
On 07/14/2013 07:47 PM, Sergey Senozhatsky wrote:
[snip]
> 
> Hello,
> 
> I just realized that lockdep was disabling itself at startup (after recent AMD
> radeon patch set) due to radeon kms error:
> 
> [    4.790019] [drm] Loading CEDAR Microcode
> [    4.790943] r600_cp: Failed to load firmware "radeon/CEDAR_smc.bin"
> [    4.791152] [drm:evergreen_startup] *ERROR* Failed to load firmware!
> [    4.791330] radeon 0000:01:00.0: disabling GPU acceleration
> [    4.792633] INFO: trying to register non-static key.
> [    4.792792] the code is fine but needs lockdep annotation.
> [    4.792953] turning off the locking correctness validator.
> 
> 
> Now, as I fixed radeon kms, I can see:
> 
> [  806.660530] ------------[ cut here ]------------
> [  806.660539] WARNING: CPU: 0 PID: 2389 at arch/x86/kernel/smp.c:124
> native_smp_send_reschedule+0x57/0x60()
> 
> [  806.660572] Workqueue: events od_dbs_timer
> [  806.660575]  0000000000000009 ffff8801531cfbd8 ffffffff816044ee
> 0000000000000000
> [  806.660577]  ffff8801531cfc10 ffffffff8104995d 0000000000000003
> ffff8801531f8000
> [  806.660579]  000000010001ee39 0000000000000003 0000000000000003
> ffff8801531cfc20
> [  806.660580] Call Trace:
> [  806.660587]  [<ffffffff816044ee>] dump_stack+0x4e/0x82
> [  806.660591]  [<ffffffff8104995d>] warn_slowpath_common+0x7d/0xa0
> [  806.660593]  [<ffffffff81049a3a>] warn_slowpath_null+0x1a/0x20
> [  806.660595]  [<ffffffff8102ca07>] native_smp_send_reschedule+0x57/0x60
> [  806.660599]  [<ffffffff81085211>] wake_up_nohz_cpu+0x61/0xb0
> [  806.660603]  [<ffffffff8105cb6d>] add_timer_on+0x8d/0x1e0
> [  806.660607]  [<ffffffff8106cc66>] __queue_delayed_work+0x166/0x1a0
> [  806.660609]  [<ffffffff8106d6a9>] ? try_to_grab_pending+0xd9/0x1a0
> [  806.660611]  [<ffffffff8106d7bf>] mod_delayed_work_on+0x4f/0x90
> [  806.660613]  [<ffffffff8150f436>] gov_queue_work+0x56/0xd0
> [  806.660615]  [<ffffffff8150e740>] od_dbs_timer+0xc0/0x160
> [  806.660617]  [<ffffffff8106dbcd>] process_one_work+0x1cd/0x6a0
> [  806.660619]  [<ffffffff8106db63>] ? process_one_work+0x163/0x6a0
> [  806.660622]  [<ffffffff8106e8d1>] worker_thread+0x121/0x3a0
> [  806.660627]  [<ffffffff810b668d>] ? trace_hardirqs_on+0xd/0x10
> [  806.660629]  [<ffffffff8106e7b0>] ? manage_workers.isra.24+0x2a0/0x2a0
> [  806.660633]  [<ffffffff810760cb>] kthread+0xdb/0xe0
> [  806.660635]  [<ffffffff81075ff0>] ? insert_kthread_work+0x70/0x70
> [  806.660639]  [<ffffffff8160de6c>] ret_from_fork+0x7c/0xb0
> [  806.660640]  [<ffffffff81075ff0>] ? insert_kthread_work+0x70/0x70
> [  806.660642] ---[ end trace 01ae278488a0ad6d ]---

So it back again...

Currently I have some assumptions in my mind:
1. we still failed to stop od_dbs_timer after STOP notify.
2. there is some code else which restart the work after STOP notify.
3. policy->cpus is broken.

I think we need a more detail investigation this time, let's catch the
mouse out ;-)

Regards,
Michael Wang

> 
> 
> The same problem why get/put_online_cpus() has been added to  __gov_queue_work()
> 
> commit 2f7021a815f20f3481c10884fe9735ce2a56db35
> Author: Michael Wang
> 
>     cpufreq: protect 'policy->cpus' from offlining during
>     __gov_queue_work()
> 
> 	-ss
> 
>> Regards,
>> Michael Wang
>>
>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
>> index dc9b72e..a64b544 100644
>> --- a/drivers/cpufreq/cpufreq_governor.c
>> +++ b/drivers/cpufreq/cpufreq_governor.c
>> @@ -178,13 +178,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>>  {
>>  	int i;
>>  
>> +	if (dbs_data->queue_stop)
>> +		return;
>> +
>>  	if (!all_cpus) {
>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>>  	} else {
>> -		get_online_cpus();
>>  		for_each_cpu(i, policy->cpus)
>>  			__gov_queue_work(i, dbs_data, delay);
>> -		put_online_cpus();
>>  	}
>>  }
>>  EXPORT_SYMBOL_GPL(gov_queue_work);
>> @@ -193,12 +194,27 @@ static inline void gov_cancel_work(struct dbs_data *dbs_data,
>>  		struct cpufreq_policy *policy)
>>  {
>>  	struct cpu_dbs_common_info *cdbs;
>> -	int i;
>> +	int i, round = 2;
>>  
>> +	dbs_data->queue_stop = 1;
>> +redo:
>> +	round--;
>>  	for_each_cpu(i, policy->cpus) {
>>  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
>>  		cancel_delayed_work_sync(&cdbs->work);
>>  	}
>> +
>> +	/*
>> +	 * Since there is no lock to prvent re-queue the
>> +	 * cancelled work, some early cancelled work might
>> +	 * have been queued again by later cancelled work.
>> +	 *
>> +	 * Flush the work again with dbs_data->queue_stop
>> +	 * enabled, this time there will be no survivors.
>> +	 */
>> +	if (round)
>> +		goto redo;
>> +	dbs_data->queue_stop = 0;
>>  }
>>  
>>  /* Will return if we need to evaluate cpu load again or not */
>> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
>> index e16a961..9116135 100644
>> --- a/drivers/cpufreq/cpufreq_governor.h
>> +++ b/drivers/cpufreq/cpufreq_governor.h
>> @@ -213,6 +213,7 @@ struct dbs_data {
>>  	unsigned int min_sampling_rate;
>>  	int usage_count;
>>  	void *tuners;
>> +	int queue_stop;
>>  
>>  	/* dbs_mutex protects dbs_enable in governor start/stop */
>>  	struct mutex mutex;
>>
>>>
>>> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
>>>
>>> ---
>>>
>>>  drivers/cpufreq/cpufreq.c          |  5 +----
>>>  drivers/cpufreq/cpufreq_governor.c | 17 +++++++++++------
>>>  drivers/cpufreq/cpufreq_stats.c    |  2 +-
>>>  3 files changed, 13 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>>> index 6a015ad..f8aacf1 100644
>>> --- a/drivers/cpufreq/cpufreq.c
>>> +++ b/drivers/cpufreq/cpufreq.c
>>> @@ -1943,13 +1943,10 @@ static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
>>>  		case CPU_ONLINE:
>>>  			cpufreq_add_dev(dev, NULL);
>>>  			break;
>>> -		case CPU_DOWN_PREPARE:
>>> +		case CPU_POST_DEAD:
>>>  		case CPU_UP_CANCELED_FROZEN:
>>>  			__cpufreq_remove_dev(dev, NULL);
>>>  			break;
>>> -		case CPU_DOWN_FAILED:
>>> -			cpufreq_add_dev(dev, NULL);
>>> -			break;
>>>  		}
>>>  	}
>>>  	return NOTIFY_OK;
>>> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
>>> index 4645876..681d5d6 100644
>>> --- a/drivers/cpufreq/cpufreq_governor.c
>>> +++ b/drivers/cpufreq/cpufreq_governor.c
>>> @@ -125,7 +125,11 @@ static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
>>>  		unsigned int delay)
>>>  {
>>>  	struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
>>> -
>>> +	/* cpu offline might block existing gov_queue_work() user,
>>> +	 * unblocking it after CPU_DEAD and before CPU_POST_DEAD.
>>> +	 * thus potentially we can hit offlined CPU */
>>> +	if (unlikely(cpu_is_offline(cpu)))
>>> +		return;
>>>  	mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay);
>>>  }
>>>
>>> @@ -133,15 +137,14 @@ void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
>>>  		unsigned int delay, bool all_cpus)
>>>  {
>>>  	int i;
>>> -
>>> +	get_online_cpus();
>>>  	if (!all_cpus) {
>>>  		__gov_queue_work(smp_processor_id(), dbs_data, delay);
>>>  	} else {
>>> -		get_online_cpus();
>>>  		for_each_cpu(i, policy->cpus)
>>>  			__gov_queue_work(i, dbs_data, delay);
>>> -		put_online_cpus();
>>>  	}
>>> +	put_online_cpus();
>>>  }
>>>  EXPORT_SYMBOL_GPL(gov_queue_work);
>>>
>>> @@ -354,8 +357,10 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy,
>>>  		/* Initiate timer time stamp */
>>>  		cpu_cdbs->time_stamp = ktime_get();
>>>
>>> -		gov_queue_work(dbs_data, policy,
>>> -				delay_for_sampling_rate(sampling_rate), true);
>>> +		/* hotplug lock already held */
>>> +		for_each_cpu(j, policy->cpus)
>>> +			__gov_queue_work(j, dbs_data,
>>> +				delay_for_sampling_rate(sampling_rate));
>>>  		break;
>>>
>>>  	case CPUFREQ_GOV_STOP:
>>> diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
>>> index cd9e817..833816e 100644
>>> --- a/drivers/cpufreq/cpufreq_stats.c
>>> +++ b/drivers/cpufreq/cpufreq_stats.c
>>> @@ -355,7 +355,7 @@ static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb,
>>>  	case CPU_DOWN_PREPARE:
>>>  		cpufreq_stats_free_sysfs(cpu);
>>>  		break;
>>> -	case CPU_DEAD:
>>> +	case CPU_POST_DEAD:
>>>  		cpufreq_stats_free_table(cpu);
>>>  		break;
>>>  	case CPU_UP_CANCELED_FROZEN:
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>> Please read the FAQ at  http://www.tux.org/lkml/
>>>
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Wang July 15, 2013, 2:46 a.m. UTC | #11
On 07/14/2013 11:56 PM, Rafael J. Wysocki wrote:
[snip]
>> +
>> +	/*
>> +	 * Since there is no lock to prvent re-queue the
>> +	 * cancelled work, some early cancelled work might
>> +	 * have been queued again by later cancelled work.
>> +	 *
>> +	 * Flush the work again with dbs_data->queue_stop
>> +	 * enabled, this time there will be no survivors.
>> +	 */
>> +	if (round)
>> +		goto redo;
> 
> Well, what about doing:
> 
> 	for (round = 2; round; round--)
> 	  	for_each_cpu(i, policy->cpus) {
> 	  		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
> 	  		cancel_delayed_work_sync(&cdbs->work);
> 	  	}
> 
> instead?
> 

It could works, while I was a little dislike to use nested 'for' logical...

Anyway, seems like we have not solved the issue yet, so let's put these
down and focus on the fix firstly ;-)

Regards,
Michael Wang

>> +	dbs_data->queue_stop = 0;
>>  }
>>  
>>  /* Will return if we need to evaluate cpu load again or not */
>> diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
>> index e16a961..9116135 100644
>> --- a/drivers/cpufreq/cpufreq_governor.h
>> +++ b/drivers/cpufreq/cpufreq_governor.h
>> @@ -213,6 +213,7 @@ struct dbs_data {
>>  	unsigned int min_sampling_rate;
>>  	int usage_count;
>>  	void *tuners;
>> +	int queue_stop;
>>  
>>  	/* dbs_mutex protects dbs_enable in governor start/stop */
>>  	struct mutex mutex;
>>
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index dc9b72e..a64b544 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -178,13 +178,14 @@  void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
 {
 	int i;
 
+	if (dbs_data->queue_stop)
+		return;
+
 	if (!all_cpus) {
 		__gov_queue_work(smp_processor_id(), dbs_data, delay);
 	} else {
-		get_online_cpus();
 		for_each_cpu(i, policy->cpus)
 			__gov_queue_work(i, dbs_data, delay);
-		put_online_cpus();
 	}
 }
 EXPORT_SYMBOL_GPL(gov_queue_work);
@@ -193,12 +194,27 @@  static inline void gov_cancel_work(struct dbs_data *dbs_data,
 		struct cpufreq_policy *policy)
 {
 	struct cpu_dbs_common_info *cdbs;
-	int i;
+	int i, round = 2;
 
+	dbs_data->queue_stop = 1;
+redo:
+	round--;
 	for_each_cpu(i, policy->cpus) {
 		cdbs = dbs_data->cdata->get_cpu_cdbs(i);
 		cancel_delayed_work_sync(&cdbs->work);
 	}
+
+	/*
+	 * Since there is no lock to prvent re-queue the
+	 * cancelled work, some early cancelled work might
+	 * have been queued again by later cancelled work.
+	 *
+	 * Flush the work again with dbs_data->queue_stop
+	 * enabled, this time there will be no survivors.
+	 */
+	if (round)
+		goto redo;
+	dbs_data->queue_stop = 0;
 }
 
 /* Will return if we need to evaluate cpu load again or not */
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index e16a961..9116135 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -213,6 +213,7 @@  struct dbs_data {
 	unsigned int min_sampling_rate;
 	int usage_count;
 	void *tuners;
+	int queue_stop;
 
 	/* dbs_mutex protects dbs_enable in governor start/stop */
 	struct mutex mutex;