diff mbox

cpufreq, highbank: enable ECME thermal notifications

Message ID 1382226889-11062-1-git-send-email-mark.langsdorf@calxeda.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mark Langsdorf Oct. 19, 2013, 11:54 p.m. UTC
The ECME sends thermal messages with a maximum and minimum allowed
frequency when the SoC status reaches certain trip points known to the
ECME. Use a notifier function to capture those messages and pass them
to a work-queued function that can trigger a policy re-evaluation by
cpufreq, capping the allowable frequencies.

The core of the policy adjusting code was taken from
drivers/thermal/cpu_cooling.c.

Signed-off-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
---
 drivers/cpufreq/highbank-cpufreq.c | 117 +++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)

Comments

Mark Langsdorf Nov. 19, 2013, 4:31 p.m. UTC | #1
Would it be possible to get some kind of ACK or NAK for this patch?
Thank you.

--Mark Langsdorf
Calxeda, Inc.


On 10/19/2013 06:54 PM, Mark Langsdorf wrote:
> The ECME sends thermal messages with a maximum and minimum allowed
> frequency when the SoC status reaches certain trip points known to the
> ECME. Use a notifier function to capture those messages and pass them
> to a work-queued function that can trigger a policy re-evaluation by
> cpufreq, capping the allowable frequencies.
> 
> The core of the policy adjusting code was taken from
> drivers/thermal/cpu_cooling.c.
> 
> Signed-off-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
> ---
>  drivers/cpufreq/highbank-cpufreq.c | 117 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 117 insertions(+)
> 
> diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c
> index b61b5a3..f0521d3 100644
> --- a/drivers/cpufreq/highbank-cpufreq.c
> +++ b/drivers/cpufreq/highbank-cpufreq.c
> @@ -17,15 +17,32 @@
>  #include <linux/module.h>
>  #include <linux/clk.h>
>  #include <linux/cpu.h>
> +#include <linux/cpufreq.h>
>  #include <linux/err.h>
>  #include <linux/of.h>
>  #include <linux/mailbox.h>
>  #include <linux/platform_device.h>
> +#include <linux/cpumask.h>
> +#include <linux/workqueue.h>
>  
>  #define HB_CPUFREQ_CHANGE_NOTE	0x80000001
> +#define HB_CPUFREQ_HEALTH_NOTE  0x00001001
> +#define HB_CPUFREQ_TPS_REPORT	1
>  #define HB_CPUFREQ_IPC_LEN	7
>  #define HB_CPUFREQ_VOLT_RETRIES	15
>  
> +#define NOTIFY_INVALID NULL
> +
> +struct hb_notify_device {
> +	unsigned long max_freq;
> +	unsigned long min_freq;
> +	unsigned long thermal_state;
> +	struct cpumask *allowed_cpus;
> +};
> +
> +static struct hb_notify_device hb_records, *notify_device = NOTIFY_INVALID;
> +static struct work_struct hb_thermal_wq;
> +
>  static int hb_voltage_change(unsigned int freq)
>  {
>  	u32 msg[HB_CPUFREQ_IPC_LEN] = {HB_CPUFREQ_CHANGE_NOTE, freq / 1000000};
> @@ -33,6 +50,89 @@ static int hb_voltage_change(unsigned int freq)
>  	return pl320_ipc_transmit(msg);
>  }
>  
> +static int hb_thermal_cpufreq_notify(struct notifier_block *nb,
> +				    unsigned long event, void *data)
> +{
> +	struct cpufreq_policy *policy = data;
> +	unsigned long max_freq = 0, min_freq = 0;
> +
> +	if (event != CPUFREQ_ADJUST || notify_device == NOTIFY_INVALID)
> +		return 0;
> +
> +	if (cpumask_test_cpu(policy->cpu, notify_device->allowed_cpus)) {
> +		max_freq = notify_device->max_freq;
> +		min_freq = notify_device->min_freq;
> +	}
> +
> +	/* Never exceed user_policy.max */
> +	if (max_freq > policy->user_policy.max)
> +		max_freq = policy->user_policy.max;
> +	if (min_freq < policy->user_policy.min)
> +		min_freq = policy->user_policy.min;
> +
> +	if ((policy->max != max_freq) || (policy->min != min_freq))
> +		cpufreq_verify_within_limits(policy, min_freq, max_freq);
> +
> +	return 0;
> +}
> +
> +static struct notifier_block hb_thermal_cpufreq_nb = {
> +	.notifier_call = hb_thermal_cpufreq_notify,
> +};
> +
> +/*
> + * We can't call cpufreq_adjust_policy from inside a notifier, so
> + * do it from inside a workqueue
> + */
> +static void hb_thermal_wq_task(struct work_struct *work)
> +{
> +	unsigned int cpuid;
> +	struct cpumask *mask = hb_records.allowed_cpus;
> +
> +	notify_device = &hb_records;
> +
> +	for_each_cpu(cpuid, mask) {
> +		struct cpufreq_policy policy;
> +		if (cpufreq_get_policy(&policy, cpuid) == 0) {
> +			cpufreq_update_policy(cpuid);
> +			break;
> +		}
> +	}
> +	notify_device = NOTIFY_INVALID;
> +}
> +
> +static int hb_thermal_tps_notify(struct notifier_block *nb,
> +				unsigned long action, void *mailbox)
> +{
> +	unsigned long *data = (unsigned long *) mailbox;
> +
> +	if ((action != HB_CPUFREQ_HEALTH_NOTE) &&
> +			(data[0] != HB_CPUFREQ_TPS_REPORT))
> +		return 0;
> +
> +	/*
> +	 * If we're already at this thermal state, or if ECME isn't
> +	 * sending frequency data, there's nothing to do
> +	 */
> +	if ((hb_records.thermal_state == data[1]) || !data[2] || !data[3])
> +		return NOTIFY_DONE;
> +
> +	hb_records.thermal_state = data[1];
> +	hb_records.min_freq = data[2];
> +	hb_records.max_freq = data[3];
> +	notify_device = &hb_records;
> +
> +	schedule_work(&hb_thermal_wq);
> +
> +	pr_warn("ECME TPS event: frequencies limited to %lu-%lu MHz\n",
> +	       hb_records.min_freq, hb_records.max_freq);
> +	return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block hb_thermal_tps_nb = {
> +	.notifier_call = hb_thermal_tps_notify,
> +};
> +
>  static int hb_cpufreq_clk_notify(struct notifier_block *nb,
>  				unsigned long action, void *hclk)
>  {
> @@ -100,6 +200,23 @@ static int hb_cpufreq_driver_init(void)
>  		goto out_put_node;
>  	}
>  
> +	/*
> +	 * Set up thermal state notifications from ECME, but continue
> +	 * running even if they don't work.
> +	 */
> +	ret = pl320_ipc_register_notifier(&hb_thermal_tps_nb);
> +	if (!ret) {
> +		ret = cpufreq_register_notifier(&hb_thermal_cpufreq_nb,
> +				CPUFREQ_POLICY_NOTIFIER);
> +		if (ret)
> +			pl320_ipc_unregister_notifier(&hb_thermal_tps_nb);
> +		else {
> +			hb_records.allowed_cpus = (struct cpumask *)
> +						cpu_possible_mask;
> +			INIT_WORK(&hb_thermal_wq, hb_thermal_wq_task);
> +		}
> +	}
> +
>  	/* Instantiate cpufreq-cpu0 */
>  	platform_device_register_full(&devinfo);
>  
>
Viresh Kumar Nov. 28, 2013, 6:36 a.m. UTC | #2
Hi Mark,

Sorry for a months delay.. I would suggest adding specific people in --to
field whom you want to review your patches. That's always helpful.

On Sun, Oct 20, 2013 at 5:24 AM, Mark Langsdorf
<mark.langsdorf@calxeda.com> wrote:
> The ECME sends thermal messages with a maximum and minimum allowed
> frequency when the SoC status reaches certain trip points known to the
> ECME. Use a notifier function to capture those messages and pass them
> to a work-queued function that can trigger a policy re-evaluation by
> cpufreq, capping the allowable frequencies.
>
> The core of the policy adjusting code was taken from
> drivers/thermal/cpu_cooling.c.
>
> Signed-off-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
> ---
>  drivers/cpufreq/highbank-cpufreq.c | 117 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 117 insertions(+)
>
> diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c
> index b61b5a3..f0521d3 100644
> --- a/drivers/cpufreq/highbank-cpufreq.c
> +++ b/drivers/cpufreq/highbank-cpufreq.c
> @@ -17,15 +17,32 @@
>  #include <linux/module.h>
>  #include <linux/clk.h>
>  #include <linux/cpu.h>
> +#include <linux/cpufreq.h>

How was this missing earlier :)

>  #include <linux/err.h>
>  #include <linux/of.h>
>  #include <linux/mailbox.h>
>  #include <linux/platform_device.h>
> +#include <linux/cpumask.h>

Please add this after cpu.h to keep them in ascending order..
Also rearrange module and kernel.h to make this list sorted out in the
same patch..

> +#include <linux/workqueue.h>
>
>  #define HB_CPUFREQ_CHANGE_NOTE 0x80000001
> +#define HB_CPUFREQ_HEALTH_NOTE  0x00001001
> +#define HB_CPUFREQ_TPS_REPORT  1
>  #define HB_CPUFREQ_IPC_LEN     7
>  #define HB_CPUFREQ_VOLT_RETRIES        15
>
> +#define NOTIFY_INVALID NULL

Not actually required. Just use NULL everywhere you want this..

> +struct hb_notify_device {
> +       unsigned long max_freq;
> +       unsigned long min_freq;
> +       unsigned long thermal_state;
> +       struct cpumask *allowed_cpus;

Because this is always equal to cpu_possible_mask, you
don't actually need it..

> +};
> +
> +static struct hb_notify_device hb_records, *notify_device = NOTIFY_INVALID;
> +static struct work_struct hb_thermal_wq;
> +
>  static int hb_voltage_change(unsigned int freq)
>  {
>         u32 msg[HB_CPUFREQ_IPC_LEN] = {HB_CPUFREQ_CHANGE_NOTE, freq / 1000000};
> @@ -33,6 +50,89 @@ static int hb_voltage_change(unsigned int freq)
>         return pl320_ipc_transmit(msg);
>  }
>
> +static int hb_thermal_cpufreq_notify(struct notifier_block *nb,
> +                                   unsigned long event, void *data)
> +{
> +       struct cpufreq_policy *policy = data;
> +       unsigned long max_freq = 0, min_freq = 0;
> +
> +       if (event != CPUFREQ_ADJUST || notify_device == NOTIFY_INVALID)

I believe you should check notify_device == NOTIFY_INVALID first and then
value of event.

> +               return 0;
> +
> +       if (cpumask_test_cpu(policy->cpu, notify_device->allowed_cpus)) {

What's your system configuration (Just for my understanding)? How many
CPUs you have? They are sharing clock lines?

And anyway how can policy->cpu be out of cpu_possible_mask ?

> +               max_freq = notify_device->max_freq;
> +               min_freq = notify_device->min_freq;
> +       }
> +
> +       /* Never exceed user_policy.max */
> +       if (max_freq > policy->user_policy.max)
> +               max_freq = policy->user_policy.max;
> +       if (min_freq < policy->user_policy.min)
> +               min_freq = policy->user_policy.min;
> +
> +       if ((policy->max != max_freq) || (policy->min != min_freq))

Probably just call cpufreq_verify_within_limits() directly as we have
similar checks in there, it isn't too heavy ?

> +               cpufreq_verify_within_limits(policy, min_freq, max_freq);
> +
> +       return 0;
> +}
> +
> +static struct notifier_block hb_thermal_cpufreq_nb = {
> +       .notifier_call = hb_thermal_cpufreq_notify,
> +};
> +
> +/*
> + * We can't call cpufreq_adjust_policy from inside a notifier, so
> + * do it from inside a workqueue
> + */

Why exactly? Adding that into comment would be more useful..

> +static void hb_thermal_wq_task(struct work_struct *work)
> +{
> +       unsigned int cpuid;
> +       struct cpumask *mask = hb_records.allowed_cpus;
> +
> +       notify_device = &hb_records;
> +
> +       for_each_cpu(cpuid, mask) {

That's not a optimal solution. We have CPUs in groups normally
(i.e. sharing clock lines) and so each policy structure might be
common for few.. And so we need to do below only for policy->cpu
and no other CPUs from policy->affected_cpus.

> +               struct cpufreq_policy policy;
> +               if (cpufreq_get_policy(&policy, cpuid) == 0) {

Do you expect any CPUs to exist without a policy? If not, then
this check is just not required (Its heavy, memcpy)..

Otherwise, also just call cpufreq_update_policy() and check
return type for -ENODEV, which is returned if there is no
policy associated with it..

> +                       cpufreq_update_policy(cpuid);
> +                       break;
> +               }
> +       }
> +       notify_device = NOTIFY_INVALID;
> +}
diff mbox

Patch

diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c
index b61b5a3..f0521d3 100644
--- a/drivers/cpufreq/highbank-cpufreq.c
+++ b/drivers/cpufreq/highbank-cpufreq.c
@@ -17,15 +17,32 @@ 
 #include <linux/module.h>
 #include <linux/clk.h>
 #include <linux/cpu.h>
+#include <linux/cpufreq.h>
 #include <linux/err.h>
 #include <linux/of.h>
 #include <linux/mailbox.h>
 #include <linux/platform_device.h>
+#include <linux/cpumask.h>
+#include <linux/workqueue.h>
 
 #define HB_CPUFREQ_CHANGE_NOTE	0x80000001
+#define HB_CPUFREQ_HEALTH_NOTE  0x00001001
+#define HB_CPUFREQ_TPS_REPORT	1
 #define HB_CPUFREQ_IPC_LEN	7
 #define HB_CPUFREQ_VOLT_RETRIES	15
 
+#define NOTIFY_INVALID NULL
+
+struct hb_notify_device {
+	unsigned long max_freq;
+	unsigned long min_freq;
+	unsigned long thermal_state;
+	struct cpumask *allowed_cpus;
+};
+
+static struct hb_notify_device hb_records, *notify_device = NOTIFY_INVALID;
+static struct work_struct hb_thermal_wq;
+
 static int hb_voltage_change(unsigned int freq)
 {
 	u32 msg[HB_CPUFREQ_IPC_LEN] = {HB_CPUFREQ_CHANGE_NOTE, freq / 1000000};
@@ -33,6 +50,89 @@  static int hb_voltage_change(unsigned int freq)
 	return pl320_ipc_transmit(msg);
 }
 
+static int hb_thermal_cpufreq_notify(struct notifier_block *nb,
+				    unsigned long event, void *data)
+{
+	struct cpufreq_policy *policy = data;
+	unsigned long max_freq = 0, min_freq = 0;
+
+	if (event != CPUFREQ_ADJUST || notify_device == NOTIFY_INVALID)
+		return 0;
+
+	if (cpumask_test_cpu(policy->cpu, notify_device->allowed_cpus)) {
+		max_freq = notify_device->max_freq;
+		min_freq = notify_device->min_freq;
+	}
+
+	/* Never exceed user_policy.max */
+	if (max_freq > policy->user_policy.max)
+		max_freq = policy->user_policy.max;
+	if (min_freq < policy->user_policy.min)
+		min_freq = policy->user_policy.min;
+
+	if ((policy->max != max_freq) || (policy->min != min_freq))
+		cpufreq_verify_within_limits(policy, min_freq, max_freq);
+
+	return 0;
+}
+
+static struct notifier_block hb_thermal_cpufreq_nb = {
+	.notifier_call = hb_thermal_cpufreq_notify,
+};
+
+/*
+ * We can't call cpufreq_adjust_policy from inside a notifier, so
+ * do it from inside a workqueue
+ */
+static void hb_thermal_wq_task(struct work_struct *work)
+{
+	unsigned int cpuid;
+	struct cpumask *mask = hb_records.allowed_cpus;
+
+	notify_device = &hb_records;
+
+	for_each_cpu(cpuid, mask) {
+		struct cpufreq_policy policy;
+		if (cpufreq_get_policy(&policy, cpuid) == 0) {
+			cpufreq_update_policy(cpuid);
+			break;
+		}
+	}
+	notify_device = NOTIFY_INVALID;
+}
+
+static int hb_thermal_tps_notify(struct notifier_block *nb,
+				unsigned long action, void *mailbox)
+{
+	unsigned long *data = (unsigned long *) mailbox;
+
+	if ((action != HB_CPUFREQ_HEALTH_NOTE) &&
+			(data[0] != HB_CPUFREQ_TPS_REPORT))
+		return 0;
+
+	/*
+	 * If we're already at this thermal state, or if ECME isn't
+	 * sending frequency data, there's nothing to do
+	 */
+	if ((hb_records.thermal_state == data[1]) || !data[2] || !data[3])
+		return NOTIFY_DONE;
+
+	hb_records.thermal_state = data[1];
+	hb_records.min_freq = data[2];
+	hb_records.max_freq = data[3];
+	notify_device = &hb_records;
+
+	schedule_work(&hb_thermal_wq);
+
+	pr_warn("ECME TPS event: frequencies limited to %lu-%lu MHz\n",
+	       hb_records.min_freq, hb_records.max_freq);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block hb_thermal_tps_nb = {
+	.notifier_call = hb_thermal_tps_notify,
+};
+
 static int hb_cpufreq_clk_notify(struct notifier_block *nb,
 				unsigned long action, void *hclk)
 {
@@ -100,6 +200,23 @@  static int hb_cpufreq_driver_init(void)
 		goto out_put_node;
 	}
 
+	/*
+	 * Set up thermal state notifications from ECME, but continue
+	 * running even if they don't work.
+	 */
+	ret = pl320_ipc_register_notifier(&hb_thermal_tps_nb);
+	if (!ret) {
+		ret = cpufreq_register_notifier(&hb_thermal_cpufreq_nb,
+				CPUFREQ_POLICY_NOTIFIER);
+		if (ret)
+			pl320_ipc_unregister_notifier(&hb_thermal_tps_nb);
+		else {
+			hb_records.allowed_cpus = (struct cpumask *)
+						cpu_possible_mask;
+			INIT_WORK(&hb_thermal_wq, hb_thermal_wq_task);
+		}
+	}
+
 	/* Instantiate cpufreq-cpu0 */
 	platform_device_register_full(&devinfo);