diff mbox

[v4,3/5] cpufreq: Don't destroy/realloc policy/sysfs on hotplug/suspend

Message ID 1406250448-470-4-git-send-email-skannan@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Saravana Kannan July 25, 2014, 1:07 a.m. UTC
This patch simplifies a lot of the hotplug/suspend code by not
adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
the cpufreq directory and policy in place irrespective of whether the CPUs
are ONLINE/OFFLINE.

Leaving the policy, sysfs and kobject in place also brings these additional
benefits:
* Faster suspend/resume
* Faster hotplug
* Sysfs file permissions maintained across hotplug
* Policy settings and governor tunables maintained across hotplug
* Cpufreq stats would be maintained across hotplug for all CPUs and can be
  queried even after CPU goes OFFLINE

Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
---
 drivers/cpufreq/cpufreq.c | 83 ++++++++++++++++-------------------------------
 1 file changed, 28 insertions(+), 55 deletions(-)

Comments

Rafael J. Wysocki July 31, 2014, 9:56 p.m. UTC | #1
On Thursday, July 24, 2014 06:07:26 PM Saravana Kannan wrote:
> This patch simplifies a lot of the hotplug/suspend code by not
> adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
> the cpufreq directory and policy in place irrespective of whether the CPUs
> are ONLINE/OFFLINE.

I'm still quite unsure how this is going to work with the real CPU hot-remove
that makes the entire sysfs cpu directories go away.  Can you please explain
that?

> Leaving the policy, sysfs and kobject in place also brings these additional
> benefits:
> * Faster suspend/resume
> * Faster hotplug
> * Sysfs file permissions maintained across hotplug
> * Policy settings and governor tunables maintained across hotplug
> * Cpufreq stats would be maintained across hotplug for all CPUs and can be
>   queried even after CPU goes OFFLINE
> 
> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>  drivers/cpufreq/cpufreq.c | 83 ++++++++++++++++-------------------------------
>  1 file changed, 28 insertions(+), 55 deletions(-)
> 
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index af4f291..d9fc6e5 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -865,7 +865,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>  	unsigned int j;
>  	int ret = 0;
>  
> -	for_each_cpu(j, policy->cpus) {
> +	for_each_cpu(j, policy->related_cpus) {
>  		struct device *cpu_dev;
>  
>  		if (j == policy->kobj_cpu)
> @@ -968,7 +968,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>  	int ret = 0;
>  	unsigned long flags;
>  
> -	if (has_target()) {
> +	if (cpumask_weight(policy->cpus) && has_target()) {
>  		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>  		if (ret) {
>  			pr_err("%s: Failed to stop governor\n", __func__);
> @@ -997,7 +997,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>  		}
>  	}
>  
> -	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
> +	return 0;
>  }
>  #endif
>  
> @@ -1100,9 +1100,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  	struct cpufreq_policy *policy;
>  	unsigned long flags;
>  	bool recover_policy = cpufreq_suspended;
> -#ifdef CONFIG_HOTPLUG_CPU
> -	struct cpufreq_policy *tpolicy;
> -#endif
>  
>  	if (cpu_is_offline(cpu))
>  		return 0;
> @@ -1113,28 +1110,22 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  	/* check whether a different CPU already registered this
>  	 * CPU because it is in the same boat. */
>  	policy = cpufreq_cpu_get(cpu);
> -	if (unlikely(policy)) {
> +	if (policy) {
> +		if (!cpumask_test_cpu(cpu, policy->cpus))
> +			ret = cpufreq_add_policy_cpu(policy, cpu, dev);
> +		else
> +			ret = 0;
>  		cpufreq_cpu_put(policy);
> -		return 0;
> +		return ret;
>  	}
>  #endif
>  
>  	if (!down_read_trylock(&cpufreq_rwsem))
>  		return 0;
>  
> -#ifdef CONFIG_HOTPLUG_CPU
> -	/* Check if this cpu was hot-unplugged earlier and has siblings */
> -	read_lock_irqsave(&cpufreq_driver_lock, flags);
> -	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
> -		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
> -			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
> -			up_read(&cpufreq_rwsem);
> -			return ret;
> -		}
> -	}
> -	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -#endif
> +	/* If we get this far, this is the first time we are adding the
> +	 * policy */
> +	recover_policy = false;
>  
>  	/*
>  	 * Restore the saved policy when doing light-weight init and fall back
> @@ -1189,7 +1180,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  
>  	down_write(&policy->rwsem);
>  	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -	for_each_cpu(j, policy->cpus)
> +	for_each_cpu(j, policy->related_cpus)
>  		per_cpu(cpufreq_cpu_data, j) = policy;
>  	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>  
> @@ -1274,7 +1265,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>  err_out_unregister:
>  err_get_freq:
>  	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -	for_each_cpu(j, policy->cpus)
> +	for_each_cpu(j, policy->related_cpus)
>  		per_cpu(cpufreq_cpu_data, j) = NULL;
>  	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>  
> @@ -1340,21 +1331,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>  					struct subsys_interface *sif)
>  {
>  	unsigned int cpu = dev->id, cpus;
> -	int new_cpu, ret;
> +	int new_cpu, ret = 0;
>  	unsigned long flags;
>  	struct cpufreq_policy *policy;
>  
>  	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>  
> -	write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> +	read_lock_irqsave(&cpufreq_driver_lock, flags);
>  	policy = per_cpu(cpufreq_cpu_data, cpu);
> -
> -	/* Save the policy somewhere when doing a light-weight tear-down */
> -	if (cpufreq_suspended)
> -		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
> -
> -	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>  
>  	if (!policy) {
>  		pr_debug("%s: No cpu_data found\n", __func__);
> @@ -1369,24 +1354,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>  		}
>  	}
>  
> -	if (!cpufreq_driver->setpolicy)
> -		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
> -			policy->governor->name, CPUFREQ_NAME_LEN);
> -
>  	down_read(&policy->rwsem);
>  	cpus = cpumask_weight(policy->cpus);
>  	up_read(&policy->rwsem);
>  
> -	if (cpu != policy->cpu) {
> -		sysfs_remove_link(&dev->kobj, "cpufreq");
> -	} else if (cpus > 1) {
> -		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
> -		if (new_cpu >= 0) {
> -			update_policy_cpu(policy, new_cpu);
> -
> -			if (!cpufreq_suspended)
> -				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
> -					 __func__, new_cpu, cpu);
> +	if (cpus > 1) {
> +		if (cpu == policy->cpu) {
> +			new_cpu = cpumask_any_but(policy->cpus, cpu);
> +			if (new_cpu >= 0)
> +				update_policy_cpu(policy, new_cpu);
>  		}
>  	} else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
>  		cpufreq_driver->stop_cpu(policy);
> @@ -1431,6 +1407,9 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>  	cpus = cpumask_weight(policy->cpus);
>  	up_read(&policy->rwsem);
>  
> +	if (cpu != policy->kobj_cpu)
> +		sysfs_remove_link(&dev->kobj, "cpufreq");
> +
>  	/* If cpu is last user of policy, free policy */
>  	if (cpus == 0) {
>  		if (has_target()) {
> @@ -1475,12 +1454,10 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>  static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
>  {
>  	unsigned int cpu = dev->id;
> -	int ret;
> -
> -	if (cpu_is_offline(cpu))
> -		return 0;
> +	int ret = 0;
>  
> -	ret = __cpufreq_remove_dev_prepare(dev, sif);
> +	if (cpu_online(cpu))
> +		ret = __cpufreq_remove_dev_prepare(dev, sif);
>  
>  	if (!ret)
>  		ret = __cpufreq_remove_dev_finish(dev, sif);
> @@ -2307,10 +2284,6 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
>  			__cpufreq_remove_dev_prepare(dev, NULL);
>  			break;
>  
> -		case CPU_POST_DEAD:
> -			__cpufreq_remove_dev_finish(dev, NULL);
> -			break;
> -
>  		case CPU_DOWN_FAILED:
>  			__cpufreq_add_dev(dev, NULL);
>  			break;
>
Saravana Kannan July 31, 2014, 10:15 p.m. UTC | #2
On 07/31/2014 02:56 PM, Rafael J. Wysocki wrote:
> On Thursday, July 24, 2014 06:07:26 PM Saravana Kannan wrote:
>> This patch simplifies a lot of the hotplug/suspend code by not
>> adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
>> the cpufreq directory and policy in place irrespective of whether the CPUs
>> are ONLINE/OFFLINE.
>
> I'm still quite unsure how this is going to work with the real CPU hot-remove
> that makes the entire sysfs cpu directories go away.  Can you please explain
> that?

With this patch it won't work correctly. 4/5 fixes it to work correctly. 
Just keeping them separate to make it easy to review.

We can squash 3/5 and 4/5 later if people prefer it that way.

-Saravana
Saravana Kannan July 31, 2014, 11:48 p.m. UTC | #3
On 07/31/2014 02:56 PM, Rafael J. Wysocki wrote:
> On Thursday, July 24, 2014 06:07:26 PM Saravana Kannan wrote:
>> This patch simplifies a lot of the hotplug/suspend code by not
>> adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
>> the cpufreq directory and policy in place irrespective of whether the CPUs
>> are ONLINE/OFFLINE.
>
> I'm still quite unsure how this is going to work with the real CPU hot-remove
> that makes the entire sysfs cpu directories go away.  Can you please explain
> that?

Sure. Not a problem. I just wanted to make sure you had a chance to look 
at the code first.

Physical hot-remove triggers a "remove" for all the  registered 
subsys_interfaces for that CPU (after going through a couple of 
functions). So, when that happens, the cpufreq subsys_interface remove 
for that CPU gets called. At that point, I clean up that CPU's SW states 
as if it was never plugged in from the start. If that CPU was the owner 
of the sysfs directory, I move it over to a different CPU.

-Saravana
Viresh Kumar Aug. 7, 2014, 10:48 a.m. UTC | #4
On 25 July 2014 06:37, Saravana Kannan <skannan@codeaurora.org> wrote:
> This patch simplifies a lot of the hotplug/suspend code by not
> adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
> the cpufreq directory and policy in place irrespective of whether the CPUs
> are ONLINE/OFFLINE.
>
> Leaving the policy, sysfs and kobject in place also brings these additional
> benefits:
> * Faster suspend/resume
> * Faster hotplug
> * Sysfs file permissions maintained across hotplug
> * Policy settings and governor tunables maintained across hotplug
> * Cpufreq stats would be maintained across hotplug for all CPUs and can be
>   queried even after CPU goes OFFLINE
>
> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
> ---
>  drivers/cpufreq/cpufreq.c | 83 ++++++++++++++++-------------------------------
>  1 file changed, 28 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index af4f291..d9fc6e5 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -865,7 +865,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>         unsigned int j;
>         int ret = 0;
>
> -       for_each_cpu(j, policy->cpus) {
> +       for_each_cpu(j, policy->related_cpus) {
>                 struct device *cpu_dev;
>
>                 if (j == policy->kobj_cpu)
> @@ -968,7 +968,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>         int ret = 0;
>         unsigned long flags;
>
> -       if (has_target()) {
> +       if (cpumask_weight(policy->cpus) && has_target()) {

Probably cpumask_empty() would be more readable here.

>                 ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>                 if (ret) {
>                         pr_err("%s: Failed to stop governor\n", __func__);
> @@ -997,7 +997,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>                 }
>         }
>
> -       return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
> +       return 0;
>  }
>  #endif
>
> @@ -1100,9 +1100,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         struct cpufreq_policy *policy;
>         unsigned long flags;
>         bool recover_policy = cpufreq_suspended;
> -#ifdef CONFIG_HOTPLUG_CPU
> -       struct cpufreq_policy *tpolicy;
> -#endif
>
>         if (cpu_is_offline(cpu))
>                 return 0;
> @@ -1113,28 +1110,22 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>         /* check whether a different CPU already registered this
>          * CPU because it is in the same boat. */
>         policy = cpufreq_cpu_get(cpu);
> -       if (unlikely(policy)) {
> +       if (policy) {
> +               if (!cpumask_test_cpu(cpu, policy->cpus))
> +                       ret = cpufreq_add_policy_cpu(policy, cpu, dev);
> +               else
> +                       ret = 0;
>                 cpufreq_cpu_put(policy);
> -               return 0;
> +               return ret;
>         }
>  #endif
>
>         if (!down_read_trylock(&cpufreq_rwsem))
>                 return 0;
>
> -#ifdef CONFIG_HOTPLUG_CPU
> -       /* Check if this cpu was hot-unplugged earlier and has siblings */
> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
> -       list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
> -               if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
> -                       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -                       ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
> -                       up_read(&cpufreq_rwsem);
> -                       return ret;
> -               }
> -       }
> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
> -#endif
> +       /* If we get this far, this is the first time we are adding the
> +        * policy */

I think I have already asked you to use proper comment style?

> +       recover_policy = false;

For this patch, probably it will work fine but I hope you will get rid of
this variable completely in next patches..


> @@ -1340,21 +1331,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>                                         struct subsys_interface *sif)
>  {
>         unsigned int cpu = dev->id, cpus;
> -       int new_cpu, ret;
> +       int new_cpu, ret = 0;

Why?

>         unsigned long flags;
>         struct cpufreq_policy *policy;
>
>         pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>
> -       write_lock_irqsave(&cpufreq_driver_lock, flags);
> -
> +       read_lock_irqsave(&cpufreq_driver_lock, flags);
>         policy = per_cpu(cpufreq_cpu_data, cpu);
> -
> -       /* Save the policy somewhere when doing a light-weight tear-down */
> -       if (cpufreq_suspended)
> -               per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
> -
> -       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> +       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>
>         if (!policy) {
>                 pr_debug("%s: No cpu_data found\n", __func__);
> @@ -1369,24 +1354,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>                 }
>         }
>
> -       if (!cpufreq_driver->setpolicy)
> -               strncpy(per_cpu(cpufreq_cpu_governor, cpu),
> -                       policy->governor->name, CPUFREQ_NAME_LEN);
> -

Why? Probably I did mention this earlier as well?

>         down_read(&policy->rwsem);
>         cpus = cpumask_weight(policy->cpus);
>         up_read(&policy->rwsem);
>
> -       if (cpu != policy->cpu) {
> -               sysfs_remove_link(&dev->kobj, "cpufreq");
> -       } else if (cpus > 1) {
> -               new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
> -               if (new_cpu >= 0) {
> -                       update_policy_cpu(policy, new_cpu);
> -
> -                       if (!cpufreq_suspended)
> -                               pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
> -                                        __func__, new_cpu, cpu);
> +       if (cpus > 1) {
> +               if (cpu == policy->cpu) {
> +                       new_cpu = cpumask_any_but(policy->cpus, cpu);
> +                       if (new_cpu >= 0)

Can this ever be false?

> +                               update_policy_cpu(policy, new_cpu);
>                 }
>         } else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
>                 cpufreq_driver->stop_cpu(policy);
> @@ -1431,6 +1407,9 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>         cpus = cpumask_weight(policy->cpus);
>         up_read(&policy->rwsem);
>
> +       if (cpu != policy->kobj_cpu)
> +               sysfs_remove_link(&dev->kobj, "cpufreq");
> +

Why?

>         /* If cpu is last user of policy, free policy */
>         if (cpus == 0) {
>                 if (has_target()) {
> @@ -1475,12 +1454,10 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>  static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
>  {
>         unsigned int cpu = dev->id;
> -       int ret;
> -
> -       if (cpu_is_offline(cpu))
> -               return 0;
> +       int ret = 0;
>
> -       ret = __cpufreq_remove_dev_prepare(dev, sif);
> +       if (cpu_online(cpu))
> +               ret = __cpufreq_remove_dev_prepare(dev, sif);

Why do you need a change here?

>         if (!ret)
>                 ret = __cpufreq_remove_dev_finish(dev, sif);
> @@ -2307,10 +2284,6 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
>                         __cpufreq_remove_dev_prepare(dev, NULL);
>                         break;
>
> -               case CPU_POST_DEAD:
> -                       __cpufreq_remove_dev_finish(dev, NULL);
> -                       break;
> -

Sure? Who will call dev_finish() now?

>                 case CPU_DOWN_FAILED:
>                         __cpufreq_add_dev(dev, NULL);
>                         break;
> --
> 1.8.2.1
>
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> hosted by The Linux Foundation
Viresh Kumar Aug. 7, 2014, 10:51 a.m. UTC | #5
On 1 August 2014 03:26, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
> I'm still quite unsure how this is going to work with the real CPU hot-remove
> that makes the entire sysfs cpu directories go away.  Can you please explain
> that?

I have little less knowledge on this kind of hotplugs, can you please enlighten
me with some info about this?

Are we talking about big servers which are actually a combination of multiple
motherboards (with SoC's), and any motherboard can be plugged out at
run time. Obviously a single kernel would be running for all these motherboards.

I don't know if we already support that.. Sorry for my lack of
knowledge on this..

--
viresh
Saravana Kannan Aug. 11, 2014, 10:13 p.m. UTC | #6
On 08/07/2014 03:48 AM, Viresh Kumar wrote:
> On 25 July 2014 06:37, Saravana Kannan <skannan@codeaurora.org> wrote:
>> This patch simplifies a lot of the hotplug/suspend code by not
>> adding/removing/moving the policy/sysfs/kobj during hotplug and just leaves
>> the cpufreq directory and policy in place irrespective of whether the CPUs
>> are ONLINE/OFFLINE.
>>
>> Leaving the policy, sysfs and kobject in place also brings these additional
>> benefits:
>> * Faster suspend/resume
>> * Faster hotplug
>> * Sysfs file permissions maintained across hotplug
>> * Policy settings and governor tunables maintained across hotplug
>> * Cpufreq stats would be maintained across hotplug for all CPUs and can be
>>    queried even after CPU goes OFFLINE
>>
>> Signed-off-by: Saravana Kannan <skannan@codeaurora.org>
>> ---
>>   drivers/cpufreq/cpufreq.c | 83 ++++++++++++++++-------------------------------
>>   1 file changed, 28 insertions(+), 55 deletions(-)
>>
>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>> index af4f291..d9fc6e5 100644
>> --- a/drivers/cpufreq/cpufreq.c
>> +++ b/drivers/cpufreq/cpufreq.c
>> @@ -865,7 +865,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
>>          unsigned int j;
>>          int ret = 0;
>>
>> -       for_each_cpu(j, policy->cpus) {
>> +       for_each_cpu(j, policy->related_cpus) {
>>                  struct device *cpu_dev;
>>
>>                  if (j == policy->kobj_cpu)
>> @@ -968,7 +968,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>>          int ret = 0;
>>          unsigned long flags;
>>
>> -       if (has_target()) {
>> +       if (cpumask_weight(policy->cpus) && has_target()) {
>
> Probably cpumask_empty() would be more readable here.

Agreed.

>
>>                  ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
>>                  if (ret) {
>>                          pr_err("%s: Failed to stop governor\n", __func__);
>> @@ -997,7 +997,7 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
>>                  }
>>          }
>>
>> -       return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
>> +       return 0;
>>   }
>>   #endif
>>
>> @@ -1100,9 +1100,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>          struct cpufreq_policy *policy;
>>          unsigned long flags;
>>          bool recover_policy = cpufreq_suspended;
>> -#ifdef CONFIG_HOTPLUG_CPU
>> -       struct cpufreq_policy *tpolicy;
>> -#endif
>>
>>          if (cpu_is_offline(cpu))
>>                  return 0;
>> @@ -1113,28 +1110,22 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
>>          /* check whether a different CPU already registered this
>>           * CPU because it is in the same boat. */
>>          policy = cpufreq_cpu_get(cpu);
>> -       if (unlikely(policy)) {
>> +       if (policy) {
>> +               if (!cpumask_test_cpu(cpu, policy->cpus))
>> +                       ret = cpufreq_add_policy_cpu(policy, cpu, dev);
>> +               else
>> +                       ret = 0;
>>                  cpufreq_cpu_put(policy);
>> -               return 0;
>> +               return ret;
>>          }
>>   #endif
>>
>>          if (!down_read_trylock(&cpufreq_rwsem))
>>                  return 0;
>>
>> -#ifdef CONFIG_HOTPLUG_CPU
>> -       /* Check if this cpu was hot-unplugged earlier and has siblings */
>> -       read_lock_irqsave(&cpufreq_driver_lock, flags);
>> -       list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
>> -               if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
>> -                       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> -                       ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
>> -                       up_read(&cpufreq_rwsem);
>> -                       return ret;
>> -               }
>> -       }
>> -       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> -#endif
>> +       /* If we get this far, this is the first time we are adding the
>> +        * policy */
>
> I think I have already asked you to use proper comment style?

I did. Then I think I noticed some of the existing comments did keep the 
/* in its own line even for multiline comments. So, I got confused. Will 
fix.

>
>> +       recover_policy = false;
>
> For this patch, probably it will work fine but I hope you will get rid of
> this variable completely in next patches..
>

Yup. In 5/5

>
>> @@ -1340,21 +1331,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>>                                          struct subsys_interface *sif)
>>   {
>>          unsigned int cpu = dev->id, cpus;
>> -       int new_cpu, ret;
>> +       int new_cpu, ret = 0;
>
> Why?

Apparently for no good reason :) Probably some stale change when I was 
splitting up the patches. I'll double check and remove this.


>>          unsigned long flags;
>>          struct cpufreq_policy *policy;
>>
>>          pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
>>
>> -       write_lock_irqsave(&cpufreq_driver_lock, flags);
>> -
>> +       read_lock_irqsave(&cpufreq_driver_lock, flags);
>>          policy = per_cpu(cpufreq_cpu_data, cpu);
>> -
>> -       /* Save the policy somewhere when doing a light-weight tear-down */
>> -       if (cpufreq_suspended)
>> -               per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
>> -
>> -       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
>> +       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
>>
>>          if (!policy) {
>>                  pr_debug("%s: No cpu_data found\n", __func__);
>> @@ -1369,24 +1354,15 @@ static int __cpufreq_remove_dev_prepare(struct device *dev,
>>                  }
>>          }
>>
>> -       if (!cpufreq_driver->setpolicy)
>> -               strncpy(per_cpu(cpufreq_cpu_governor, cpu),
>> -                       policy->governor->name, CPUFREQ_NAME_LEN);
>> -
>
> Why? Probably I did mention this earlier as well?

This code is saving the governor name here to restore it when the policy 
is created again after suspend/resume or hotplug of all CPUs. Since we 
no longer throw away the policy struct, there's no point in doing this.

I should remove this per cpu variable though. Will do it in v5.

>
>>          down_read(&policy->rwsem);
>>          cpus = cpumask_weight(policy->cpus);
>>          up_read(&policy->rwsem);
>>
>> -       if (cpu != policy->cpu) {
>> -               sysfs_remove_link(&dev->kobj, "cpufreq");
>> -       } else if (cpus > 1) {
>> -               new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
>> -               if (new_cpu >= 0) {
>> -                       update_policy_cpu(policy, new_cpu);
>> -
>> -                       if (!cpufreq_suspended)
>> -                               pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
>> -                                        __func__, new_cpu, cpu);
>> +       if (cpus > 1) {
>> +               if (cpu == policy->cpu) {
>> +                       new_cpu = cpumask_any_but(policy->cpus, cpu);
>> +                       if (new_cpu >= 0)
>
> Can this ever be false?

If this is the last CPU going down. This part of the code didn't really 
change. I just moved the cpumask_any_but() from nominate policy to here 
since I'm not longer moving the kobj around.

>
>> +                               update_policy_cpu(policy, new_cpu);
>>                  }
>>          } else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
>>                  cpufreq_driver->stop_cpu(policy);

>> @@ -1431,6 +1407,9 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>>          cpus = cpumask_weight(policy->cpus);
>>          up_read(&policy->rwsem);
>>
>> +       if (cpu != policy->kobj_cpu)
>> +               sysfs_remove_link(&dev->kobj, "cpufreq");
>> +
>
> Why?

For the physical hot-remove case or when the cpufreq driver is unregistered.

>
>>          /* If cpu is last user of policy, free policy */
>>          if (cpus == 0) {
>>                  if (has_target()) {
>> @@ -1475,12 +1454,10 @@ static int __cpufreq_remove_dev_finish(struct device *dev,
>>   static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
>>   {
>>          unsigned int cpu = dev->id;
>> -       int ret;
>> -
>> -       if (cpu_is_offline(cpu))
>> -               return 0;
>> +       int ret = 0;
>>
>> -       ret = __cpufreq_remove_dev_prepare(dev, sif);
>> +       if (cpu_online(cpu))
>> +               ret = __cpufreq_remove_dev_prepare(dev, sif);
>
> Why do you need a change here?

Since we no longer do remove_dev_finish during hotplug, we can't just 
short circuit the entire function. We have to finish the remove when the 
CPU is hot-removed or when the cpufreq driver is unregistered.

>
>>          if (!ret)
>>                  ret = __cpufreq_remove_dev_finish(dev, sif);
>> @@ -2307,10 +2284,6 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
>>                          __cpufreq_remove_dev_prepare(dev, NULL);
>>                          break;
>>
>> -               case CPU_POST_DEAD:
>> -                       __cpufreq_remove_dev_finish(dev, NULL);
>> -                       break;
>> -
>
> Sure? Who will call dev_finish() now?

At this point, all remove_dev_finish() does is remove the sysfs links 
and destroy the policy. So, it never needs to be called for hotplug. 
Only during physical hot-remove or during cpufreq driver unregister.

>
>>                  case CPU_DOWN_FAILED:
>>                          __cpufreq_add_dev(dev, NULL);
>>                          break;
>> --
>> 1.8.2.1
>>
>> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
>> hosted by The Linux Foundation

-Saravana
Viresh Kumar Aug. 12, 2014, 8:51 a.m. UTC | #7
On 12 August 2014 03:43, Saravana Kannan <skannan@codeaurora.org> wrote:
> On 08/07/2014 03:48 AM, Viresh Kumar wrote:

>>> @@ -1369,24 +1354,15 @@ static int __cpufreq_remove_dev_prepare(struct
>>> device *dev,
>>>                  }
>>>          }
>>>
>>> -       if (!cpufreq_driver->setpolicy)
>>> -               strncpy(per_cpu(cpufreq_cpu_governor, cpu),
>>> -                       policy->governor->name, CPUFREQ_NAME_LEN);
>>> -
>>
>>
>> Why? Probably I did mention this earlier as well?
>
>
> This code is saving the governor name here to restore it when the policy is
> created again after suspend/resume or hotplug of all CPUs. Since we no
> longer throw away the policy struct, there's no point in doing this.
>
> I should remove this per cpu variable though. Will do it in v5.

Hmm, makes sense. So probably keep this code in this patch and get rid
of all uses of 'cpufreq_cpu_governor' in a separate patch.

>>> +       if (cpus > 1) {
>>> +               if (cpu == policy->cpu) {
>>> +                       new_cpu = cpumask_any_but(policy->cpus, cpu);
>>> +                       if (new_cpu >= 0)
>>
>>
>> Can this ever be false?
>
>
> If this is the last CPU going down.

Can that be true? Its present in (cpus > 1) block :)

>>>   static int cpufreq_remove_dev(struct device *dev, struct
>>> subsys_interface *sif)
>>>   {
>>>          unsigned int cpu = dev->id;
>>> -       int ret;
>>> -
>>> -       if (cpu_is_offline(cpu))
>>> -               return 0;
>>> +       int ret = 0;
>>>
>>> -       ret = __cpufreq_remove_dev_prepare(dev, sif);
>>> +       if (cpu_online(cpu))
>>> +               ret = __cpufreq_remove_dev_prepare(dev, sif);
>>
>>
>> Why do you need a change here?
>
>
> Since we no longer do remove_dev_finish during hotplug, we can't just short
> circuit the entire function. We have to finish the remove when the CPU is
> hot-removed or when the cpufreq driver is unregistered.

I think this is tricky and we must have a clear comment here..
I missed this on my initial reviews..
Viresh Kumar Aug. 12, 2014, 9:17 a.m. UTC | #8
On 7 August 2014 16:21, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 1 August 2014 03:26, Rafael J. Wysocki <rjw@rjwysocki.net> wrote:
>> I'm still quite unsure how this is going to work with the real CPU hot-remove
>> that makes the entire sysfs cpu directories go away.  Can you please explain
>> that?
>
> I have little less knowledge on this kind of hotplugs, can you please enlighten
> me with some info about this?
>
> Are we talking about big servers which are actually a combination of multiple
> motherboards (with SoC's), and any motherboard can be plugged out at
> run time. Obviously a single kernel would be running for all these motherboards.
>
> I don't know if we already support that.. Sorry for my lack of
> knowledge on this..

Ping!!
diff mbox

Patch

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index af4f291..d9fc6e5 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -865,7 +865,7 @@  static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
 	unsigned int j;
 	int ret = 0;
 
-	for_each_cpu(j, policy->cpus) {
+	for_each_cpu(j, policy->related_cpus) {
 		struct device *cpu_dev;
 
 		if (j == policy->kobj_cpu)
@@ -968,7 +968,7 @@  static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 	int ret = 0;
 	unsigned long flags;
 
-	if (has_target()) {
+	if (cpumask_weight(policy->cpus) && has_target()) {
 		ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
 		if (ret) {
 			pr_err("%s: Failed to stop governor\n", __func__);
@@ -997,7 +997,7 @@  static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy,
 		}
 	}
 
-	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
+	return 0;
 }
 #endif
 
@@ -1100,9 +1100,6 @@  static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	struct cpufreq_policy *policy;
 	unsigned long flags;
 	bool recover_policy = cpufreq_suspended;
-#ifdef CONFIG_HOTPLUG_CPU
-	struct cpufreq_policy *tpolicy;
-#endif
 
 	if (cpu_is_offline(cpu))
 		return 0;
@@ -1113,28 +1110,22 @@  static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	/* check whether a different CPU already registered this
 	 * CPU because it is in the same boat. */
 	policy = cpufreq_cpu_get(cpu);
-	if (unlikely(policy)) {
+	if (policy) {
+		if (!cpumask_test_cpu(cpu, policy->cpus))
+			ret = cpufreq_add_policy_cpu(policy, cpu, dev);
+		else
+			ret = 0;
 		cpufreq_cpu_put(policy);
-		return 0;
+		return ret;
 	}
 #endif
 
 	if (!down_read_trylock(&cpufreq_rwsem))
 		return 0;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	/* Check if this cpu was hot-unplugged earlier and has siblings */
-	read_lock_irqsave(&cpufreq_driver_lock, flags);
-	list_for_each_entry(tpolicy, &cpufreq_policy_list, policy_list) {
-		if (cpumask_test_cpu(cpu, tpolicy->related_cpus)) {
-			read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-			ret = cpufreq_add_policy_cpu(tpolicy, cpu, dev);
-			up_read(&cpufreq_rwsem);
-			return ret;
-		}
-	}
-	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
-#endif
+	/* If we get this far, this is the first time we are adding the
+	 * policy */
+	recover_policy = false;
 
 	/*
 	 * Restore the saved policy when doing light-weight init and fall back
@@ -1189,7 +1180,7 @@  static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 
 	down_write(&policy->rwsem);
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
-	for_each_cpu(j, policy->cpus)
+	for_each_cpu(j, policy->related_cpus)
 		per_cpu(cpufreq_cpu_data, j) = policy;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
@@ -1274,7 +1265,7 @@  static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 err_out_unregister:
 err_get_freq:
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
-	for_each_cpu(j, policy->cpus)
+	for_each_cpu(j, policy->related_cpus)
 		per_cpu(cpufreq_cpu_data, j) = NULL;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
@@ -1340,21 +1331,15 @@  static int __cpufreq_remove_dev_prepare(struct device *dev,
 					struct subsys_interface *sif)
 {
 	unsigned int cpu = dev->id, cpus;
-	int new_cpu, ret;
+	int new_cpu, ret = 0;
 	unsigned long flags;
 	struct cpufreq_policy *policy;
 
 	pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
 
-	write_lock_irqsave(&cpufreq_driver_lock, flags);
-
+	read_lock_irqsave(&cpufreq_driver_lock, flags);
 	policy = per_cpu(cpufreq_cpu_data, cpu);
-
-	/* Save the policy somewhere when doing a light-weight tear-down */
-	if (cpufreq_suspended)
-		per_cpu(cpufreq_cpu_data_fallback, cpu) = policy;
-
-	write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+	read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
 	if (!policy) {
 		pr_debug("%s: No cpu_data found\n", __func__);
@@ -1369,24 +1354,15 @@  static int __cpufreq_remove_dev_prepare(struct device *dev,
 		}
 	}
 
-	if (!cpufreq_driver->setpolicy)
-		strncpy(per_cpu(cpufreq_cpu_governor, cpu),
-			policy->governor->name, CPUFREQ_NAME_LEN);
-
 	down_read(&policy->rwsem);
 	cpus = cpumask_weight(policy->cpus);
 	up_read(&policy->rwsem);
 
-	if (cpu != policy->cpu) {
-		sysfs_remove_link(&dev->kobj, "cpufreq");
-	} else if (cpus > 1) {
-		new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu);
-		if (new_cpu >= 0) {
-			update_policy_cpu(policy, new_cpu);
-
-			if (!cpufreq_suspended)
-				pr_debug("%s: policy Kobject moved to cpu: %d from: %d\n",
-					 __func__, new_cpu, cpu);
+	if (cpus > 1) {
+		if (cpu == policy->cpu) {
+			new_cpu = cpumask_any_but(policy->cpus, cpu);
+			if (new_cpu >= 0)
+				update_policy_cpu(policy, new_cpu);
 		}
 	} else if (cpufreq_driver->stop_cpu && cpufreq_driver->setpolicy) {
 		cpufreq_driver->stop_cpu(policy);
@@ -1431,6 +1407,9 @@  static int __cpufreq_remove_dev_finish(struct device *dev,
 	cpus = cpumask_weight(policy->cpus);
 	up_read(&policy->rwsem);
 
+	if (cpu != policy->kobj_cpu)
+		sysfs_remove_link(&dev->kobj, "cpufreq");
+
 	/* If cpu is last user of policy, free policy */
 	if (cpus == 0) {
 		if (has_target()) {
@@ -1475,12 +1454,10 @@  static int __cpufreq_remove_dev_finish(struct device *dev,
 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 {
 	unsigned int cpu = dev->id;
-	int ret;
-
-	if (cpu_is_offline(cpu))
-		return 0;
+	int ret = 0;
 
-	ret = __cpufreq_remove_dev_prepare(dev, sif);
+	if (cpu_online(cpu))
+		ret = __cpufreq_remove_dev_prepare(dev, sif);
 
 	if (!ret)
 		ret = __cpufreq_remove_dev_finish(dev, sif);
@@ -2307,10 +2284,6 @@  static int cpufreq_cpu_callback(struct notifier_block *nfb,
 			__cpufreq_remove_dev_prepare(dev, NULL);
 			break;
 
-		case CPU_POST_DEAD:
-			__cpufreq_remove_dev_finish(dev, NULL);
-			break;
-
 		case CPU_DOWN_FAILED:
 			__cpufreq_add_dev(dev, NULL);
 			break;