Message ID | 1382226889-11062-1-git-send-email-mark.langsdorf@calxeda.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Would it be possible to get some kind of ACK or NAK for this patch? Thank you. --Mark Langsdorf Calxeda, Inc. On 10/19/2013 06:54 PM, Mark Langsdorf wrote: > The ECME sends thermal messages with a maximum and minimum allowed > frequency when the SoC status reaches certain trip points known to the > ECME. Use a notifier function to capture those messages and pass them > to a work-queued function that can trigger a policy re-evaluation by > cpufreq, capping the allowable frequencies. > > The core of the policy adjusting code was taken from > drivers/thermal/cpu_cooling.c. > > Signed-off-by: Mark Langsdorf <mark.langsdorf@calxeda.com> > --- > drivers/cpufreq/highbank-cpufreq.c | 117 +++++++++++++++++++++++++++++++++++++ > 1 file changed, 117 insertions(+) > > diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c > index b61b5a3..f0521d3 100644 > --- a/drivers/cpufreq/highbank-cpufreq.c > +++ b/drivers/cpufreq/highbank-cpufreq.c > @@ -17,15 +17,32 @@ > #include <linux/module.h> > #include <linux/clk.h> > #include <linux/cpu.h> > +#include <linux/cpufreq.h> > #include <linux/err.h> > #include <linux/of.h> > #include <linux/mailbox.h> > #include <linux/platform_device.h> > +#include <linux/cpumask.h> > +#include <linux/workqueue.h> > > #define HB_CPUFREQ_CHANGE_NOTE 0x80000001 > +#define HB_CPUFREQ_HEALTH_NOTE 0x00001001 > +#define HB_CPUFREQ_TPS_REPORT 1 > #define HB_CPUFREQ_IPC_LEN 7 > #define HB_CPUFREQ_VOLT_RETRIES 15 > > +#define NOTIFY_INVALID NULL > + > +struct hb_notify_device { > + unsigned long max_freq; > + unsigned long min_freq; > + unsigned long thermal_state; > + struct cpumask *allowed_cpus; > +}; > + > +static struct hb_notify_device hb_records, *notify_device = NOTIFY_INVALID; > +static struct work_struct hb_thermal_wq; > + > static int hb_voltage_change(unsigned int freq) > { > u32 msg[HB_CPUFREQ_IPC_LEN] = {HB_CPUFREQ_CHANGE_NOTE, freq / 1000000}; > @@ -33,6 +50,89 @@ static int hb_voltage_change(unsigned int freq) > return pl320_ipc_transmit(msg); > } > > +static int hb_thermal_cpufreq_notify(struct notifier_block *nb, > + unsigned long event, void *data) > +{ > + struct cpufreq_policy *policy = data; > + unsigned long max_freq = 0, min_freq = 0; > + > + if (event != CPUFREQ_ADJUST || notify_device == NOTIFY_INVALID) > + return 0; > + > + if (cpumask_test_cpu(policy->cpu, notify_device->allowed_cpus)) { > + max_freq = notify_device->max_freq; > + min_freq = notify_device->min_freq; > + } > + > + /* Never exceed user_policy.max */ > + if (max_freq > policy->user_policy.max) > + max_freq = policy->user_policy.max; > + if (min_freq < policy->user_policy.min) > + min_freq = policy->user_policy.min; > + > + if ((policy->max != max_freq) || (policy->min != min_freq)) > + cpufreq_verify_within_limits(policy, min_freq, max_freq); > + > + return 0; > +} > + > +static struct notifier_block hb_thermal_cpufreq_nb = { > + .notifier_call = hb_thermal_cpufreq_notify, > +}; > + > +/* > + * We can't call cpufreq_adjust_policy from inside a notifier, so > + * do it from inside a workqueue > + */ > +static void hb_thermal_wq_task(struct work_struct *work) > +{ > + unsigned int cpuid; > + struct cpumask *mask = hb_records.allowed_cpus; > + > + notify_device = &hb_records; > + > + for_each_cpu(cpuid, mask) { > + struct cpufreq_policy policy; > + if (cpufreq_get_policy(&policy, cpuid) == 0) { > + cpufreq_update_policy(cpuid); > + break; > + } > + } > + notify_device = NOTIFY_INVALID; > +} > + > +static int hb_thermal_tps_notify(struct notifier_block *nb, > + unsigned long action, void *mailbox) > +{ > + unsigned long *data = (unsigned long *) mailbox; > + > + if ((action != HB_CPUFREQ_HEALTH_NOTE) && > + (data[0] != HB_CPUFREQ_TPS_REPORT)) > + return 0; > + > + /* > + * If we're already at this thermal state, or if ECME isn't > + * sending frequency data, there's nothing to do > + */ > + if ((hb_records.thermal_state == data[1]) || !data[2] || !data[3]) > + return NOTIFY_DONE; > + > + hb_records.thermal_state = data[1]; > + hb_records.min_freq = data[2]; > + hb_records.max_freq = data[3]; > + notify_device = &hb_records; > + > + schedule_work(&hb_thermal_wq); > + > + pr_warn("ECME TPS event: frequencies limited to %lu-%lu MHz\n", > + hb_records.min_freq, hb_records.max_freq); > + return NOTIFY_DONE; > +} > + > +static struct notifier_block hb_thermal_tps_nb = { > + .notifier_call = hb_thermal_tps_notify, > +}; > + > static int hb_cpufreq_clk_notify(struct notifier_block *nb, > unsigned long action, void *hclk) > { > @@ -100,6 +200,23 @@ static int hb_cpufreq_driver_init(void) > goto out_put_node; > } > > + /* > + * Set up thermal state notifications from ECME, but continue > + * running even if they don't work. > + */ > + ret = pl320_ipc_register_notifier(&hb_thermal_tps_nb); > + if (!ret) { > + ret = cpufreq_register_notifier(&hb_thermal_cpufreq_nb, > + CPUFREQ_POLICY_NOTIFIER); > + if (ret) > + pl320_ipc_unregister_notifier(&hb_thermal_tps_nb); > + else { > + hb_records.allowed_cpus = (struct cpumask *) > + cpu_possible_mask; > + INIT_WORK(&hb_thermal_wq, hb_thermal_wq_task); > + } > + } > + > /* Instantiate cpufreq-cpu0 */ > platform_device_register_full(&devinfo); > >
Hi Mark, Sorry for a months delay.. I would suggest adding specific people in --to field whom you want to review your patches. That's always helpful. On Sun, Oct 20, 2013 at 5:24 AM, Mark Langsdorf <mark.langsdorf@calxeda.com> wrote: > The ECME sends thermal messages with a maximum and minimum allowed > frequency when the SoC status reaches certain trip points known to the > ECME. Use a notifier function to capture those messages and pass them > to a work-queued function that can trigger a policy re-evaluation by > cpufreq, capping the allowable frequencies. > > The core of the policy adjusting code was taken from > drivers/thermal/cpu_cooling.c. > > Signed-off-by: Mark Langsdorf <mark.langsdorf@calxeda.com> > --- > drivers/cpufreq/highbank-cpufreq.c | 117 +++++++++++++++++++++++++++++++++++++ > 1 file changed, 117 insertions(+) > > diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c > index b61b5a3..f0521d3 100644 > --- a/drivers/cpufreq/highbank-cpufreq.c > +++ b/drivers/cpufreq/highbank-cpufreq.c > @@ -17,15 +17,32 @@ > #include <linux/module.h> > #include <linux/clk.h> > #include <linux/cpu.h> > +#include <linux/cpufreq.h> How was this missing earlier :) > #include <linux/err.h> > #include <linux/of.h> > #include <linux/mailbox.h> > #include <linux/platform_device.h> > +#include <linux/cpumask.h> Please add this after cpu.h to keep them in ascending order.. Also rearrange module and kernel.h to make this list sorted out in the same patch.. > +#include <linux/workqueue.h> > > #define HB_CPUFREQ_CHANGE_NOTE 0x80000001 > +#define HB_CPUFREQ_HEALTH_NOTE 0x00001001 > +#define HB_CPUFREQ_TPS_REPORT 1 > #define HB_CPUFREQ_IPC_LEN 7 > #define HB_CPUFREQ_VOLT_RETRIES 15 > > +#define NOTIFY_INVALID NULL Not actually required. Just use NULL everywhere you want this.. > +struct hb_notify_device { > + unsigned long max_freq; > + unsigned long min_freq; > + unsigned long thermal_state; > + struct cpumask *allowed_cpus; Because this is always equal to cpu_possible_mask, you don't actually need it.. > +}; > + > +static struct hb_notify_device hb_records, *notify_device = NOTIFY_INVALID; > +static struct work_struct hb_thermal_wq; > + > static int hb_voltage_change(unsigned int freq) > { > u32 msg[HB_CPUFREQ_IPC_LEN] = {HB_CPUFREQ_CHANGE_NOTE, freq / 1000000}; > @@ -33,6 +50,89 @@ static int hb_voltage_change(unsigned int freq) > return pl320_ipc_transmit(msg); > } > > +static int hb_thermal_cpufreq_notify(struct notifier_block *nb, > + unsigned long event, void *data) > +{ > + struct cpufreq_policy *policy = data; > + unsigned long max_freq = 0, min_freq = 0; > + > + if (event != CPUFREQ_ADJUST || notify_device == NOTIFY_INVALID) I believe you should check notify_device == NOTIFY_INVALID first and then value of event. > + return 0; > + > + if (cpumask_test_cpu(policy->cpu, notify_device->allowed_cpus)) { What's your system configuration (Just for my understanding)? How many CPUs you have? They are sharing clock lines? And anyway how can policy->cpu be out of cpu_possible_mask ? > + max_freq = notify_device->max_freq; > + min_freq = notify_device->min_freq; > + } > + > + /* Never exceed user_policy.max */ > + if (max_freq > policy->user_policy.max) > + max_freq = policy->user_policy.max; > + if (min_freq < policy->user_policy.min) > + min_freq = policy->user_policy.min; > + > + if ((policy->max != max_freq) || (policy->min != min_freq)) Probably just call cpufreq_verify_within_limits() directly as we have similar checks in there, it isn't too heavy ? > + cpufreq_verify_within_limits(policy, min_freq, max_freq); > + > + return 0; > +} > + > +static struct notifier_block hb_thermal_cpufreq_nb = { > + .notifier_call = hb_thermal_cpufreq_notify, > +}; > + > +/* > + * We can't call cpufreq_adjust_policy from inside a notifier, so > + * do it from inside a workqueue > + */ Why exactly? Adding that into comment would be more useful.. > +static void hb_thermal_wq_task(struct work_struct *work) > +{ > + unsigned int cpuid; > + struct cpumask *mask = hb_records.allowed_cpus; > + > + notify_device = &hb_records; > + > + for_each_cpu(cpuid, mask) { That's not a optimal solution. We have CPUs in groups normally (i.e. sharing clock lines) and so each policy structure might be common for few.. And so we need to do below only for policy->cpu and no other CPUs from policy->affected_cpus. > + struct cpufreq_policy policy; > + if (cpufreq_get_policy(&policy, cpuid) == 0) { Do you expect any CPUs to exist without a policy? If not, then this check is just not required (Its heavy, memcpy).. Otherwise, also just call cpufreq_update_policy() and check return type for -ENODEV, which is returned if there is no policy associated with it.. > + cpufreq_update_policy(cpuid); > + break; > + } > + } > + notify_device = NOTIFY_INVALID; > +}
diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c index b61b5a3..f0521d3 100644 --- a/drivers/cpufreq/highbank-cpufreq.c +++ b/drivers/cpufreq/highbank-cpufreq.c @@ -17,15 +17,32 @@ #include <linux/module.h> #include <linux/clk.h> #include <linux/cpu.h> +#include <linux/cpufreq.h> #include <linux/err.h> #include <linux/of.h> #include <linux/mailbox.h> #include <linux/platform_device.h> +#include <linux/cpumask.h> +#include <linux/workqueue.h> #define HB_CPUFREQ_CHANGE_NOTE 0x80000001 +#define HB_CPUFREQ_HEALTH_NOTE 0x00001001 +#define HB_CPUFREQ_TPS_REPORT 1 #define HB_CPUFREQ_IPC_LEN 7 #define HB_CPUFREQ_VOLT_RETRIES 15 +#define NOTIFY_INVALID NULL + +struct hb_notify_device { + unsigned long max_freq; + unsigned long min_freq; + unsigned long thermal_state; + struct cpumask *allowed_cpus; +}; + +static struct hb_notify_device hb_records, *notify_device = NOTIFY_INVALID; +static struct work_struct hb_thermal_wq; + static int hb_voltage_change(unsigned int freq) { u32 msg[HB_CPUFREQ_IPC_LEN] = {HB_CPUFREQ_CHANGE_NOTE, freq / 1000000}; @@ -33,6 +50,89 @@ static int hb_voltage_change(unsigned int freq) return pl320_ipc_transmit(msg); } +static int hb_thermal_cpufreq_notify(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct cpufreq_policy *policy = data; + unsigned long max_freq = 0, min_freq = 0; + + if (event != CPUFREQ_ADJUST || notify_device == NOTIFY_INVALID) + return 0; + + if (cpumask_test_cpu(policy->cpu, notify_device->allowed_cpus)) { + max_freq = notify_device->max_freq; + min_freq = notify_device->min_freq; + } + + /* Never exceed user_policy.max */ + if (max_freq > policy->user_policy.max) + max_freq = policy->user_policy.max; + if (min_freq < policy->user_policy.min) + min_freq = policy->user_policy.min; + + if ((policy->max != max_freq) || (policy->min != min_freq)) + cpufreq_verify_within_limits(policy, min_freq, max_freq); + + return 0; +} + +static struct notifier_block hb_thermal_cpufreq_nb = { + .notifier_call = hb_thermal_cpufreq_notify, +}; + +/* + * We can't call cpufreq_adjust_policy from inside a notifier, so + * do it from inside a workqueue + */ +static void hb_thermal_wq_task(struct work_struct *work) +{ + unsigned int cpuid; + struct cpumask *mask = hb_records.allowed_cpus; + + notify_device = &hb_records; + + for_each_cpu(cpuid, mask) { + struct cpufreq_policy policy; + if (cpufreq_get_policy(&policy, cpuid) == 0) { + cpufreq_update_policy(cpuid); + break; + } + } + notify_device = NOTIFY_INVALID; +} + +static int hb_thermal_tps_notify(struct notifier_block *nb, + unsigned long action, void *mailbox) +{ + unsigned long *data = (unsigned long *) mailbox; + + if ((action != HB_CPUFREQ_HEALTH_NOTE) && + (data[0] != HB_CPUFREQ_TPS_REPORT)) + return 0; + + /* + * If we're already at this thermal state, or if ECME isn't + * sending frequency data, there's nothing to do + */ + if ((hb_records.thermal_state == data[1]) || !data[2] || !data[3]) + return NOTIFY_DONE; + + hb_records.thermal_state = data[1]; + hb_records.min_freq = data[2]; + hb_records.max_freq = data[3]; + notify_device = &hb_records; + + schedule_work(&hb_thermal_wq); + + pr_warn("ECME TPS event: frequencies limited to %lu-%lu MHz\n", + hb_records.min_freq, hb_records.max_freq); + return NOTIFY_DONE; +} + +static struct notifier_block hb_thermal_tps_nb = { + .notifier_call = hb_thermal_tps_notify, +}; + static int hb_cpufreq_clk_notify(struct notifier_block *nb, unsigned long action, void *hclk) { @@ -100,6 +200,23 @@ static int hb_cpufreq_driver_init(void) goto out_put_node; } + /* + * Set up thermal state notifications from ECME, but continue + * running even if they don't work. + */ + ret = pl320_ipc_register_notifier(&hb_thermal_tps_nb); + if (!ret) { + ret = cpufreq_register_notifier(&hb_thermal_cpufreq_nb, + CPUFREQ_POLICY_NOTIFIER); + if (ret) + pl320_ipc_unregister_notifier(&hb_thermal_tps_nb); + else { + hb_records.allowed_cpus = (struct cpumask *) + cpu_possible_mask; + INIT_WORK(&hb_thermal_wq, hb_thermal_wq_task); + } + } + /* Instantiate cpufreq-cpu0 */ platform_device_register_full(&devinfo);
The ECME sends thermal messages with a maximum and minimum allowed frequency when the SoC status reaches certain trip points known to the ECME. Use a notifier function to capture those messages and pass them to a work-queued function that can trigger a policy re-evaluation by cpufreq, capping the allowable frequencies. The core of the policy adjusting code was taken from drivers/thermal/cpu_cooling.c. Signed-off-by: Mark Langsdorf <mark.langsdorf@calxeda.com> --- drivers/cpufreq/highbank-cpufreq.c | 117 +++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+)