diff mbox

[3/3] thermal/intel_powerclamp: Convert to CPU hotplug state

Message ID 1476707572-32215-4-git-send-email-pmladek@suse.com (mailing list archive)
State Changes Requested
Delegated to: Zhang Rui
Headers show

Commit Message

Petr Mladek Oct. 17, 2016, 12:32 p.m. UTC
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

This is a conversation to the new hotplug state machine with
the difference that CPU_DEAD becomes CPU_PREDOWN.

At the same time it makes the handling of the two states symmetrical.
stop_power_clamp_worker() is called unconditionally and the controversial
error message is removed.

Finally, the hotplug state callbacks are removed after the powerclamping
is stopped to avoid a potential race.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
[pmladek@suse.com: Fixed the possible race in powerclamp_exit()]
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 drivers/thermal/intel_powerclamp.c | 69 +++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 34 deletions(-)

Comments

Jacob Pan Oct. 21, 2016, 8:21 p.m. UTC | #1
On Mon, 17 Oct 2016 14:32:52 +0200
Petr Mladek <pmladek@suse.com> wrote:

> From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> 
Hi Sebastian,
I applied this patchset on 4.9-rc1 and run some cpu online/offline
loops test while injecting idle, e.g. 25%. I got system hang after a
few cycles. Still looking into root cause.

Thanks,

Jacob
> This is a conversation to the new hotplug state machine with
> the difference that CPU_DEAD becomes CPU_PREDOWN.
> 
> At the same time it makes the handling of the two states symmetrical.
> stop_power_clamp_worker() is called unconditionally and the
> controversial error message is removed.
> 
> Finally, the hotplug state callbacks are removed after the
> powerclamping is stopped to avoid a potential race.
> 
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> [pmladek@suse.com: Fixed the possible race in powerclamp_exit()]
> Signed-off-by: Petr Mladek <pmladek@suse.com>
> ---
>  drivers/thermal/intel_powerclamp.c | 69
> +++++++++++++++++++------------------- 1 file changed, 35
> insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/thermal/intel_powerclamp.c
> b/drivers/thermal/intel_powerclamp.c index a94f7c849a4e..390e50b97324
> 100644 --- a/drivers/thermal/intel_powerclamp.c
> +++ b/drivers/thermal/intel_powerclamp.c
> @@ -622,43 +622,35 @@ static void end_power_clamp(void)
>  	}
>  }
>  
> -static int powerclamp_cpu_callback(struct notifier_block *nfb,
> -				unsigned long action, void *hcpu)
> +static int powerclamp_cpu_online(unsigned int cpu)
>  {
> -	unsigned long cpu = (unsigned long)hcpu;
> +	if (clamping == false)
> +		return 0;
> +	start_power_clamp_worker(cpu);
> +	/* prefer BSP as controlling CPU */
> +	if (cpu == 0) {
> +		control_cpu = 0;
> +		smp_mb();
> +	}
> +	return 0;
> +}
>  
> -	if (false == clamping)
> -		goto exit_ok;
> +static int powerclamp_cpu_predown(unsigned int cpu)
> +{
> +	if (clamping == false)
> +		return 0;
>  
> -	switch (action) {
> -	case CPU_ONLINE:
> -		start_power_clamp_worker(cpu);
> -		/* prefer BSP as controlling CPU */
> -		if (cpu == 0) {
> -			control_cpu = 0;
> -			smp_mb();
> -		}
> -		break;
> -	case CPU_DEAD:
> -		if (test_bit(cpu, cpu_clamping_mask)) {
> -			pr_err("cpu %lu dead but powerclamping
> thread is not\n",
> -				cpu);
> -			stop_power_clamp_worker(cpu);
> -		}
> -		if (cpu == control_cpu) {
> -			control_cpu = smp_processor_id();
> -			smp_mb();
> -		}
> -	}
> +	stop_power_clamp_worker(cpu);
> +	if (cpu != control_cpu)
> +		return 0;
>  
> -exit_ok:
> -	return NOTIFY_OK;
> +	control_cpu = cpumask_first(cpu_online_mask);
> +	if (control_cpu == cpu)
> +		control_cpu = cpumask_next(cpu, cpu_online_mask);
> +	smp_mb();
> +	return 0;
>  }
>  
> -static struct notifier_block powerclamp_cpu_notifier = {
> -	.notifier_call = powerclamp_cpu_callback,
> -};
> -
>  static int powerclamp_get_max_state(struct thermal_cooling_device
> *cdev, unsigned long *state)
>  {
> @@ -788,6 +780,8 @@ static inline void
> powerclamp_create_debug_files(void)
> debugfs_remove_recursive(debug_dir); }
>  
> +static enum cpuhp_state hp_state;
> +
>  static int __init powerclamp_init(void)
>  {
>  	int retval;
> @@ -805,7 +799,14 @@ static int __init powerclamp_init(void)
>  
>  	/* set default limit, maybe adjusted during runtime based on
> feedback */ window_size = 2;
> -	register_hotcpu_notifier(&powerclamp_cpu_notifier);
> +	retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> +
> "thermal/intel_powerclamp:online",
> +					   powerclamp_cpu_online,
> +					   powerclamp_cpu_predown);
> +	if (retval < 0)
> +		goto exit_free;
> +
> +	hp_state = retval;
>  
>  	worker_data = alloc_percpu(struct powerclamp_worker_data);
>  	if (!worker_data) {
> @@ -830,7 +831,7 @@ static int __init powerclamp_init(void)
>  exit_free_thread:
>  	free_percpu(worker_data);
>  exit_unregister:
> -	unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
> +	cpuhp_remove_state_nocalls(hp_state);
>  exit_free:
>  	kfree(cpu_clamping_mask);
>  	return retval;
> @@ -839,8 +840,8 @@ static int __init powerclamp_init(void)
>  
>  static void __exit powerclamp_exit(void)
>  {
> -	unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
>  	end_power_clamp();
> +	cpuhp_remove_state_nocalls(hp_state);
>  	free_percpu(worker_data);
>  	thermal_cooling_device_unregister(cooling_dev);
>  	kfree(cpu_clamping_mask);

[Jacob Pan]
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Petr Mladek Oct. 24, 2016, 3:48 p.m. UTC | #2
On Fri 2016-10-21 13:21:18, Jacob Pan wrote:
> On Mon, 17 Oct 2016 14:32:52 +0200
> Petr Mladek <pmladek@suse.com> wrote:
> 
> > From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > 
> Hi Sebastian,
> I applied this patchset on 4.9-rc1 and run some cpu online/offline
> loops test while injecting idle, e.g. 25%. I got system hang after a
> few cycles. Still looking into root cause.

You might need the patch
("sched/fair: Fix sched domains NULL deference in select_idle_sibling()")
from linux-tip, see
https://lkml.kernel.org/r/tip-9cfb38a7ba5a9c27c1af8093fb1af4b699c0a441@git.kernel.org

I have mentioned it in the cover letter. I am sorry if it was not
visible enough.

Best Regards,
Petr
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jacob Pan Oct. 24, 2016, 4:55 p.m. UTC | #3
On Mon, 24 Oct 2016 17:48:07 +0200
Petr Mladek <pmladek@suse.com> wrote:

> On Fri 2016-10-21 13:21:18, Jacob Pan wrote:
> > On Mon, 17 Oct 2016 14:32:52 +0200
> > Petr Mladek <pmladek@suse.com> wrote:
> > 
> > > From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > 
> > Hi Sebastian,
> > I applied this patchset on 4.9-rc1 and run some cpu online/offline
> > loops test while injecting idle, e.g. 25%. I got system hang after a
> > few cycles. Still looking into root cause.
> 
> You might need the patch
> ("sched/fair: Fix sched domains NULL deference in
> select_idle_sibling()") from linux-tip, see
> https://lkml.kernel.org/r/tip-9cfb38a7ba5a9c27c1af8093fb1af4b699c0a441@git.kernel.org
> 
> I have mentioned it in the cover letter. I am sorry if it was not
> visible enough.
Thanks for point it out again, I missed it.

Now rc2 has this fix so I tried again cpuhp seems to work well now.

However, I found suspend to ram does not work with this patchset. Have
you  tested it with "echo mem > /sys/power/state" while doing idle
injection?
It fails to enter s3 on my system.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Petr Mladek Oct. 27, 2016, 2:53 p.m. UTC | #4
On Mon 2016-10-24 09:55:29, Jacob Pan wrote:
> On Mon, 24 Oct 2016 17:48:07 +0200
> Petr Mladek <pmladek@suse.com> wrote:
> 
> > On Fri 2016-10-21 13:21:18, Jacob Pan wrote:
> > > On Mon, 17 Oct 2016 14:32:52 +0200
> > > Petr Mladek <pmladek@suse.com> wrote:
> > > 
> > > > From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> > > > 
> > > Hi Sebastian,
> > > I applied this patchset on 4.9-rc1 and run some cpu online/offline
> > > loops test while injecting idle, e.g. 25%. I got system hang after a
> > > few cycles. Still looking into root cause.
> > 
> > You might need the patch
> > ("sched/fair: Fix sched domains NULL deference in
> > select_idle_sibling()") from linux-tip, see
> > https://lkml.kernel.org/r/tip-9cfb38a7ba5a9c27c1af8093fb1af4b699c0a441@git.kernel.org
> > 
> > I have mentioned it in the cover letter. I am sorry if it was not
> > visible enough.
> Thanks for point it out again, I missed it.
> 
> Now rc2 has this fix so I tried again cpuhp seems to work well now.
> 
> However, I found suspend to ram does not work with this patchset. Have
> you  tested it with "echo mem > /sys/power/state" while doing idle
> injection?
> It fails to enter s3 on my system.

Hmm, I haven't tested the system suspend. I am sorry but I am snowed
under some other tasks this week and will be on the Plumbers
conference the next week. I hope that I will have some time to look
at it then.

In each case, I wonder if the problem is caused by the conversion
to the kthread worker or by the CPU hotplug state conversion.

Best Regards,
Petr
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sebastian Andrzej Siewior Oct. 27, 2016, 3:17 p.m. UTC | #5
On 2016-10-27 16:53:48 [+0200], Petr Mladek wrote:
> 
> In each case, I wonder if the problem is caused by the conversion
> to the kthread worker or by the CPU hotplug state conversion.

drop the hotplug patch and you will see.

> Best Regards,
> Petr

Sebastian
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index a94f7c849a4e..390e50b97324 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -622,43 +622,35 @@  static void end_power_clamp(void)
 	}
 }
 
-static int powerclamp_cpu_callback(struct notifier_block *nfb,
-				unsigned long action, void *hcpu)
+static int powerclamp_cpu_online(unsigned int cpu)
 {
-	unsigned long cpu = (unsigned long)hcpu;
+	if (clamping == false)
+		return 0;
+	start_power_clamp_worker(cpu);
+	/* prefer BSP as controlling CPU */
+	if (cpu == 0) {
+		control_cpu = 0;
+		smp_mb();
+	}
+	return 0;
+}
 
-	if (false == clamping)
-		goto exit_ok;
+static int powerclamp_cpu_predown(unsigned int cpu)
+{
+	if (clamping == false)
+		return 0;
 
-	switch (action) {
-	case CPU_ONLINE:
-		start_power_clamp_worker(cpu);
-		/* prefer BSP as controlling CPU */
-		if (cpu == 0) {
-			control_cpu = 0;
-			smp_mb();
-		}
-		break;
-	case CPU_DEAD:
-		if (test_bit(cpu, cpu_clamping_mask)) {
-			pr_err("cpu %lu dead but powerclamping thread is not\n",
-				cpu);
-			stop_power_clamp_worker(cpu);
-		}
-		if (cpu == control_cpu) {
-			control_cpu = smp_processor_id();
-			smp_mb();
-		}
-	}
+	stop_power_clamp_worker(cpu);
+	if (cpu != control_cpu)
+		return 0;
 
-exit_ok:
-	return NOTIFY_OK;
+	control_cpu = cpumask_first(cpu_online_mask);
+	if (control_cpu == cpu)
+		control_cpu = cpumask_next(cpu, cpu_online_mask);
+	smp_mb();
+	return 0;
 }
 
-static struct notifier_block powerclamp_cpu_notifier = {
-	.notifier_call = powerclamp_cpu_callback,
-};
-
 static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
 				 unsigned long *state)
 {
@@ -788,6 +780,8 @@  static inline void powerclamp_create_debug_files(void)
 	debugfs_remove_recursive(debug_dir);
 }
 
+static enum cpuhp_state hp_state;
+
 static int __init powerclamp_init(void)
 {
 	int retval;
@@ -805,7 +799,14 @@  static int __init powerclamp_init(void)
 
 	/* set default limit, maybe adjusted during runtime based on feedback */
 	window_size = 2;
-	register_hotcpu_notifier(&powerclamp_cpu_notifier);
+	retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					   "thermal/intel_powerclamp:online",
+					   powerclamp_cpu_online,
+					   powerclamp_cpu_predown);
+	if (retval < 0)
+		goto exit_free;
+
+	hp_state = retval;
 
 	worker_data = alloc_percpu(struct powerclamp_worker_data);
 	if (!worker_data) {
@@ -830,7 +831,7 @@  static int __init powerclamp_init(void)
 exit_free_thread:
 	free_percpu(worker_data);
 exit_unregister:
-	unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
+	cpuhp_remove_state_nocalls(hp_state);
 exit_free:
 	kfree(cpu_clamping_mask);
 	return retval;
@@ -839,8 +840,8 @@  static int __init powerclamp_init(void)
 
 static void __exit powerclamp_exit(void)
 {
-	unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
 	end_power_clamp();
+	cpuhp_remove_state_nocalls(hp_state);
 	free_percpu(worker_data);
 	thermal_cooling_device_unregister(cooling_dev);
 	kfree(cpu_clamping_mask);