diff mbox

[3/3] cpufreq: Add a generic cpufreq-cpu0 driver

Message ID 1342713281-31114-4-git-send-email-shawn.guo@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Shawn Guo July 19, 2012, 3:54 p.m. UTC
It adds a generic cpufreq driver for CPU0 frequency management based on
clk, regulator, OPP and device tree support.  It can support both
uniprocessor (UP) and those symmetric multiprocessor (SMP) systems which
share clock and voltage across all CPUs.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
---
 .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |   58 +++++
 drivers/cpufreq/Kconfig                            |   11 +
 drivers/cpufreq/Makefile                           |    2 +
 drivers/cpufreq/cpufreq-cpu0.c                     |  235 ++++++++++++++++++++
 4 files changed, 306 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
 create mode 100644 drivers/cpufreq/cpufreq-cpu0.c

Comments

Santosh Shilimkar July 20, 2012, 6:52 a.m. UTC | #1
Shawn,

On Thu, Jul 19, 2012 at 9:24 PM, Shawn Guo <shawn.guo@linaro.org> wrote:
> It adds a generic cpufreq driver for CPU0 frequency management based on
> clk, regulator, OPP and device tree support.  It can support both
> uniprocessor (UP) and those symmetric multiprocessor (SMP) systems which
> share clock and voltage across all CPUs.
>
> Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
> ---

[...]

> diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
> index e24a2a1..e77a1e0 100644
> --- a/drivers/cpufreq/Kconfig
> +++ b/drivers/cpufreq/Kconfig
> @@ -179,6 +179,17 @@ config CPU_FREQ_GOV_CONSERVATIVE
>
>           If in doubt, say N.
>
> +config GENERIC_CPUFREQ_CPU0
> +       bool "Generic cpufreq driver for CPU0"
> +       depends on HAVE_CLK && REGULATOR && PM_OPP && OF
> +       select CPU_FREQ_TABLE
> +       help
> +         This adds a generic cpufreq driver for CPU0 frequency management.
> +         It supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
> +         systems which share clock and voltage across all CPUs.
> +
> +         If in doubt, say N.
> +
This *_CPU0 doesn't seems to be appropriate since this infrastructure will be
used on SMP systems where CPUs shares clock/voltage rails. May be you can
get rid of that unless there is a strong need.

[...]

> +++ b/drivers/cpufreq/cpufreq-cpu0.c
> @@ -0,0 +1,235 @@
> +/*
> + * Copyright (C) 2012 Freescale Semiconductor, Inc.
> + *
> + * Reuse some codes from drivers/cpufreq/omap-cpufreq.c
> + *
> + * The code contained herein is licensed under the GNU General Public
> + * License. You may obtain a copy of the GNU General Public License
> + * Version 2 or later at the following locations:
> + *
> + * http://www.opensource.org/licenses/gpl-license.html
> + * http://www.gnu.org/copyleft/gpl.html
> + */
> +
> +#define pr_fmt(fmt)    KBUILD_MODNAME ": " fmt
> +
> +#include <linux/clk.h>
> +#include <linux/cpu.h>
> +#include <linux/cpufreq.h>
> +#include <linux/err.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/opp.h>
> +#include <linux/regulator/consumer.h>
> +#include <linux/slab.h>
> +
> +/* voltage tolerance in percentage */
> +static unsigned int voltage_tolerance;
> +
> +static struct device *cpu_dev;
> +static struct clk *cpu_clk;
> +static struct regulator *cpu_reg;
> +static struct cpufreq_frequency_table *freq_table;
> +
> +static int cpu0_verify_speed(struct cpufreq_policy *policy)
> +{
> +       return cpufreq_frequency_table_verify(policy, freq_table);
> +}
> +
> +static unsigned int cpu0_get_speed(unsigned int cpu)
> +{
> +       return clk_get_rate(cpu_clk) / 1000;
> +}
> +
> +static int cpu0_set_target(struct cpufreq_policy *policy,
> +                          unsigned int target_freq, unsigned int relation)
> +{
> +       struct cpufreq_freqs freqs;
> +       struct opp *opp;
> +       unsigned long freq_Hz, volt = 0, volt_old = 0, tol = 0;
> +       unsigned int index, cpu;
> +       int ret = 0;
> +
> +       ret = cpufreq_frequency_table_target(policy, freq_table, target_freq,
> +                                            relation, &index);
> +       if (ret) {
> +               pr_err("failed to match target freqency %d: %d\n",
> +                      target_freq, ret);
> +               return ret;
> +       }
> +
> +       freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
> +       if (freq_Hz < 0)
> +               freq_Hz = freq_table[index].frequency * 1000;
> +       freqs.new = freq_Hz / 1000;
> +       freqs.old = clk_get_rate(cpu_clk) / 1000;
> +
> +       if (freqs.old == freqs.new)
> +               return 0;
> +
> +       for_each_cpu(cpu, policy->cpus) {
You need for_each_online_cpu() here o.w you will end up calling the
notifier on CPU which is are hot-plugged out or not online yet.

> +               freqs.cpu = cpu;
> +               cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
> +       }
> +
> +       if (cpu_reg) {
> +               opp = opp_find_freq_ceil(cpu_dev, &freq_Hz);
> +               if (IS_ERR(opp)) {
> +                       pr_err("failed to find OPP for %ld\n", freq_Hz);
> +                       return PTR_ERR(opp);
> +               }
> +               volt = opp_get_voltage(opp);
> +               tol = volt * voltage_tolerance / 100;
> +               volt_old = regulator_get_voltage(cpu_reg);
> +       }
> +
> +       pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
> +                freqs.old / 1000, volt_old ? volt_old / 1000 : -1,
> +                freqs.new / 1000, volt ? volt / 1000 : -1);
> +
> +       /* scaling up?  scale voltage before frequency */
> +       if (cpu_reg && freqs.new > freqs.old) {
> +               if (regulator_set_voltage(cpu_reg, volt - tol, volt + tol)) {
> +                       pr_warn("Unable to scale voltage up\n");
> +                       freqs.new = freqs.old;
> +                       goto done;
Do you need a POST notifier in case of the failure ?

> +               }
> +       }
> +
> +       ret = clk_set_rate(cpu_clk, freqs.new * 1000);
> +
> +       /* scaling down?  scale voltage after frequency */
> +       if (cpu_reg && (freqs.new < freqs.old)) {
> +               if (regulator_set_voltage(cpu_reg, volt - tol, volt + tol)) {
> +                       pr_warn("Unable to scale voltage down\n");
> +                       ret = clk_set_rate(cpu_clk, freqs.old * 1000);
> +                       freqs.new = freqs.old;
> +                       goto done;
Here too.

> +               }
> +       }
> +
> +done:
> +       for_each_cpu(cpu, policy->cpus) {
Same as first comment.

> +               freqs.cpu = cpu;
> +               cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
> +       }
> +
> +       return ret;
> +}
> +
> +static int cpu0_cpufreq_init(struct cpufreq_policy *policy)
> +{
> +       struct device_node *np;
> +       int ret;
> +
> +       /* Initialize resources via CPU0 */
> +       if (policy->cpu != 0)
> +               return -EINVAL;
> +
> +       np = of_find_node_by_path("/cpus/cpu@0");
> +       if (!np) {
> +               pr_err("failed to find cpu0 node\n");
> +               return -ENOENT;
> +       }
> +
> +       cpu_dev = get_cpu_device(0);
> +       if (!cpu_dev) {
> +               pr_err("failed to get cpu0 device\n");
> +               ret = -ENODEV;
> +               goto out_put_node;
> +       }
> +
> +       cpu_dev->of_node = np;
> +       ret = of_init_opp_table(cpu_dev);
> +       if (ret) {
> +               pr_err("failed to init OPP table\n");
> +               goto out_put_node;
> +       }
> +
> +       ret = opp_init_cpufreq_table(cpu_dev, &freq_table);
> +       if (ret) {
> +               pr_err("failed to init cpufreq table\n");
> +               goto out_put_node;
> +       }
> +
> +       ret = cpufreq_frequency_table_cpuinfo(policy, freq_table);
> +       if (ret) {
> +               pr_err("invalid frequency table for cpu0\n");
> +               goto out_free_table;
> +       }
> +
> +       if (of_property_read_u32(np, "transition-latency",
> +                                &policy->cpuinfo.transition_latency))
> +               policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
> +
> +       of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance);
> +
> +       cpu_clk = clk_get(cpu_dev, NULL);
> +       if (IS_ERR(cpu_clk)) {
> +               pr_err("failed to get cpu0 clock\n");
> +               ret = PTR_ERR(cpu_clk);
> +               goto out_free_table;
> +       }
> +       policy->cur = clk_get_rate(cpu_clk) / 1000;
> +
> +       cpu_reg = regulator_get(cpu_dev, "cpu0");
> +       if (IS_ERR(cpu_reg)) {
> +               pr_warn("failed to get cpu0 regulator\n");
> +               cpu_reg = NULL;
> +       }
> +
> +       /*
> +        * The driver only supports the SMP configuartion where all processors
> +        * share the clock and voltage and clock.  Use cpufreq affected_cpus
> +        * interface to have all CPUs scaled together.
> +        */
> +       policy->shared_type = CPUFREQ_SHARED_TYPE_ANY;
> +       cpumask_setall(policy->cpus);
> +
> +       cpufreq_frequency_table_get_attr(freq_table, policy->cpu);
> +
> +       of_node_put(np);
> +       return 0;
> +
> +out_free_table:
> +       opp_free_cpufreq_table(cpu_dev, &freq_table);
> +out_put_node:
> +       of_node_put(np);
> +       return ret;
> +}
> +
> +static int cpu0_cpufreq_exit(struct cpufreq_policy *policy)
> +{
> +       cpufreq_frequency_table_put_attr(policy->cpu);
> +       regulator_put(cpu_reg);
> +       clk_put(cpu_clk);
> +       opp_free_cpufreq_table(cpu_dev, &freq_table);
> +
> +       return 0;
> +}
> +
> +static struct freq_attr *cpu0_cpufreq_attr[] = {
> +       &cpufreq_freq_attr_scaling_available_freqs,
> +       NULL,
> +};
> +
> +static struct cpufreq_driver cpu0_cpufreq_driver = {
> +       .flags = CPUFREQ_STICKY,
> +       .verify = cpu0_verify_speed,
> +       .target = cpu0_set_target,
> +       .get = cpu0_get_speed,
> +       .init = cpu0_cpufreq_init,
> +       .exit = cpu0_cpufreq_exit,
> +       .name = "generic_cpu0",
> +       .attr = cpu0_cpufreq_attr,
> +};
> +
> +static int __devinit cpu0_cpufreq_driver_init(void)
> +{
> +       return cpufreq_register_driver(&cpu0_cpufreq_driver);
> +}
> +late_initcall(cpu0_cpufreq_driver_init);

Can you also add support to build this as loadable module ?

Regards
santosh
Shawn Guo July 20, 2012, 12:33 p.m. UTC | #2
Thanks for the reviewing, Santosh.

On Fri, Jul 20, 2012 at 12:22:14PM +0530, Shilimkar, Santosh wrote:
> > +config GENERIC_CPUFREQ_CPU0
> > +       bool "Generic cpufreq driver for CPU0"
> > +       depends on HAVE_CLK && REGULATOR && PM_OPP && OF
> > +       select CPU_FREQ_TABLE
> > +       help
> > +         This adds a generic cpufreq driver for CPU0 frequency management.
> > +         It supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
> > +         systems which share clock and voltage across all CPUs.
> > +
> > +         If in doubt, say N.
> > +
> This *_CPU0 doesn't seems to be appropriate since this infrastructure will be
> used on SMP systems where CPUs shares clock/voltage rails. May be you can
> get rid of that unless there is a strong need.
> 
All the resource handlers, clk, regulator, opp, DT node, are retrieved
from CPU0 device even for SMP.  I hope the naming of the driver could
tell:

- The driver supports UP
- The driver supports SMP where CPUs shares clock/voltage rails by
  managing CPU0 (resource)
- The driver does not support SMP where individual CPU can scale
  clock/voltage independently.

I also thought about naming the driver cpufreq-coupled, but the name
misses the implication of UP support somehow, so I chose cpufreq-cpu0.

> > +static int cpu0_set_target(struct cpufreq_policy *policy,
> > +                          unsigned int target_freq, unsigned int relation)
> > +{
> > +       struct cpufreq_freqs freqs;
> > +       struct opp *opp;
> > +       unsigned long freq_Hz, volt = 0, volt_old = 0, tol = 0;
> > +       unsigned int index, cpu;
> > +       int ret = 0;
> > +
> > +       ret = cpufreq_frequency_table_target(policy, freq_table, target_freq,
> > +                                            relation, &index);
> > +       if (ret) {
> > +               pr_err("failed to match target freqency %d: %d\n",
> > +                      target_freq, ret);
> > +               return ret;
> > +       }
> > +
> > +       freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
> > +       if (freq_Hz < 0)
> > +               freq_Hz = freq_table[index].frequency * 1000;
> > +       freqs.new = freq_Hz / 1000;
> > +       freqs.old = clk_get_rate(cpu_clk) / 1000;
> > +
> > +       if (freqs.old == freqs.new)
> > +               return 0;
> > +
> > +       for_each_cpu(cpu, policy->cpus) {
> You need for_each_online_cpu() here o.w you will end up calling the
> notifier on CPU which is are hot-plugged out or not online yet.
> 
Yes, that's sensible.  Since we have all the CPUs set in policy->cpus
in this driver, we do not have to enumerate policy->cpus, and checking
online CPUs should be better for the cases you mentioned.

> > +               freqs.cpu = cpu;
> > +               cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
> > +       }
> > +
> > +       if (cpu_reg) {
> > +               opp = opp_find_freq_ceil(cpu_dev, &freq_Hz);
> > +               if (IS_ERR(opp)) {
> > +                       pr_err("failed to find OPP for %ld\n", freq_Hz);
> > +                       return PTR_ERR(opp);
> > +               }
> > +               volt = opp_get_voltage(opp);
> > +               tol = volt * voltage_tolerance / 100;
> > +               volt_old = regulator_get_voltage(cpu_reg);
> > +       }
> > +
> > +       pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
> > +                freqs.old / 1000, volt_old ? volt_old / 1000 : -1,
> > +                freqs.new / 1000, volt ? volt / 1000 : -1);
> > +
> > +       /* scaling up?  scale voltage before frequency */
> > +       if (cpu_reg && freqs.new > freqs.old) {
> > +               if (regulator_set_voltage(cpu_reg, volt - tol, volt + tol)) {
> > +                       pr_warn("Unable to scale voltage up\n");
> > +                       freqs.new = freqs.old;
> > +                       goto done;
> Do you need a POST notifier in case of the failure ?
> 
Honestly, I'm not quite sure about that.  This is a piece of code
reused from omap-cpufreq driver.  Looking at cpufreq_notify_transition,
it seems to me that the POST notifier is not really necessary for
failure case.

> > +static struct cpufreq_driver cpu0_cpufreq_driver = {
> > +       .flags = CPUFREQ_STICKY,
> > +       .verify = cpu0_verify_speed,
> > +       .target = cpu0_set_target,
> > +       .get = cpu0_get_speed,
> > +       .init = cpu0_cpufreq_init,
> > +       .exit = cpu0_cpufreq_exit,
> > +       .name = "generic_cpu0",
> > +       .attr = cpu0_cpufreq_attr,
> > +};
> > +
> > +static int __devinit cpu0_cpufreq_driver_init(void)
> > +{
> > +       return cpufreq_register_driver(&cpu0_cpufreq_driver);
> > +}
> > +late_initcall(cpu0_cpufreq_driver_init);
> 
> Can you also add support to build this as loadable module ?
> 
I'm just following the common pattern of ARM cpufreq drivers to have
CPUFREQ_STICKY set in cpufreq_driver.flag and use "bool" for the driver
Kconfig.  I'm not sure about the necessity of building this as a module.
Richard Zhao July 20, 2012, 12:51 p.m. UTC | #3
Hi, Shawn,

I find you create a new driver rather than work on my generic cpufreq driver based on clk and regulator.
Is there any reason besides adding opp support?

Thanks
Richard
Shawn Guo July 20, 2012, 1:15 p.m. UTC | #4
On Fri, Jul 20, 2012 at 08:51:17PM +0800, Richard Zhao wrote:
> Hi, Shawn,
> 
> I find you create a new driver rather than work on my generic cpufreq driver based on clk and regulator.
> Is there any reason besides adding opp support?
> 
Adopting OPP support forces me to look at omap-cpufreq driver, hence
makes me create a new driver by referencing to omap-cpufreq.
Mike Turquette July 20, 2012, 3:50 p.m. UTC | #5
On Fri, Jul 20, 2012 at 5:33 AM, Shawn Guo <shawn.guo@linaro.org> wrote:
> On Fri, Jul 20, 2012 at 12:22:14PM +0530, Shilimkar, Santosh wrote:
>> This *_CPU0 doesn't seems to be appropriate since this infrastructure will be
>> used on SMP systems where CPUs shares clock/voltage rails. May be you can
>> get rid of that unless there is a strong need.
>>
> All the resource handlers, clk, regulator, opp, DT node, are retrieved
> from CPU0 device even for SMP.  I hope the naming of the driver could
> tell:
>
> - The driver supports UP
> - The driver supports SMP where CPUs shares clock/voltage rails by
>   managing CPU0 (resource)
> - The driver does not support SMP where individual CPU can scale
>   clock/voltage independently.
>

How about cpufreq-single-thread.c and CONFIG_CPUFREQ_SINGLE_THREAD?
That makes sense for both UP and SMP.

Regards,
Mike
Santosh Shilimkar July 21, 2012, 5:04 a.m. UTC | #6
On Fri, Jul 20, 2012 at 9:20 PM, Turquette, Mike <mturquette@ti.com> wrote:
> On Fri, Jul 20, 2012 at 5:33 AM, Shawn Guo <shawn.guo@linaro.org> wrote:
>> On Fri, Jul 20, 2012 at 12:22:14PM +0530, Shilimkar, Santosh wrote:
>>> This *_CPU0 doesn't seems to be appropriate since this infrastructure will be
>>> used on SMP systems where CPUs shares clock/voltage rails. May be you can
>>> get rid of that unless there is a strong need.
>>>
>> All the resource handlers, clk, regulator, opp, DT node, are retrieved
>> from CPU0 device even for SMP.  I hope the naming of the driver could
>> tell:
>>
>> - The driver supports UP
>> - The driver supports SMP where CPUs shares clock/voltage rails by
>>   managing CPU0 (resource)
>> - The driver does not support SMP where individual CPU can scale
>>   clock/voltage independently.
>>
>
> How about cpufreq-single-thread.c and CONFIG_CPUFREQ_SINGLE_THREAD?
> That makes sense for both UP and SMP.
>
Indeed. This sounds more appropriate and also reflects what actually happens
with a UP or shared clock SMP case.

Regards
Santosh
Shawn Guo July 21, 2012, 6:38 a.m. UTC | #7
On Sat, Jul 21, 2012 at 10:34:29AM +0530, Shilimkar, Santosh wrote:
> On Fri, Jul 20, 2012 at 9:20 PM, Turquette, Mike <mturquette@ti.com> wrote:
> > How about cpufreq-single-thread.c and CONFIG_CPUFREQ_SINGLE_THREAD?
> > That makes sense for both UP and SMP.
> >
> Indeed. This sounds more appropriate and also reflects what actually happens
> with a UP or shared clock SMP case.
> 
While I agree with this observation, the suggested naming does not
reflect the rationale of CPU0 though, which is really important for
driver to work, and is exactly the thing I like about *_CPU0 naming.

The driver needs the following stuff around CPU0 to be functional.

- Device tree node /cpus/cpu@0
- clk lookup with dev_id being "cpu0"
- regulator with id being "cpu0"

I think the *_CPU0 naming does a better job on emphasising those in the
first place.
Mark Brown July 26, 2012, 1:11 p.m. UTC | #8
On Thu, Jul 19, 2012 at 11:54:41PM +0800, Shawn Guo wrote:

> +Optional properties:
> +- transition-latency: Specify the possible maximum transition latency,
> +  in unit of nanoseconds.

This should make it clear that the transition latency being documented
here is just that for the core clock change itself, there may be other
sources of latency like the regulator ramp time or reprogramming PLLs.

> +	if (cpu_reg) {
> +		opp = opp_find_freq_ceil(cpu_dev, &freq_Hz);
> +		if (IS_ERR(opp)) {
> +			pr_err("failed to find OPP for %ld\n", freq_Hz);
> +			return PTR_ERR(opp);
> +		}
> +		volt = opp_get_voltage(opp);
> +		tol = volt * voltage_tolerance / 100;
> +		volt_old = regulator_get_voltage(cpu_reg);

regulator_get_voltage() might be a bit expensive, many regulators will
query the hardware every time.  Since it's just for diagnostic purposes
it'd be better to not bother and let people read the historical
information from the logs.

> +	/* scaling up?  scale voltage before frequency */
> +	if (cpu_reg && freqs.new > freqs.old) {
> +		if (regulator_set_voltage(cpu_reg, volt - tol, volt + tol)) {

This is a totally sane and sensible use case for specifying a voltage
range, we should move it into the regulator core for other users so you
can just specify the voltage and the tolerance directly.

It is a little sad here as it means that we loose the ability to do
frequency only scaling if there's no regulator which is nice.
Richard Zhao July 27, 2012, 2:04 a.m. UTC | #9
On Sat, Jul 21, 2012 at 02:38:13PM +0800, Shawn Guo wrote:
> On Sat, Jul 21, 2012 at 10:34:29AM +0530, Shilimkar, Santosh wrote:
> > On Fri, Jul 20, 2012 at 9:20 PM, Turquette, Mike <mturquette@ti.com> wrote:
> > > How about cpufreq-single-thread.c and CONFIG_CPUFREQ_SINGLE_THREAD?
> > > That makes sense for both UP and SMP.
> > >
> > Indeed. This sounds more appropriate and also reflects what actually happens
> > with a UP or shared clock SMP case.
> > 
> While I agree with this observation, the suggested naming does not
> reflect the rationale of CPU0 though, which is really important for
> driver to work, and is exactly the thing I like about *_CPU0 naming.
> 
> The driver needs the following stuff around CPU0 to be functional.
> 
> - Device tree node /cpus/cpu@0
> - clk lookup with dev_id being "cpu0"
> - regulator with id being "cpu0"
> 
> I think the *_CPU0 naming does a better job on emphasising those in the
> first place.
Are we going to support the case in the future that cores has
independent freq? If yes, maybe we should have a more common name.

Thanks
Richard
> 
> -- 
> Regards,
> Shawn
Richard Zhao July 27, 2012, 2:13 a.m. UTC | #10
On Thu, Jul 26, 2012 at 02:11:21PM +0100, Mark Brown wrote:
> On Thu, Jul 19, 2012 at 11:54:41PM +0800, Shawn Guo wrote:
> 
> > +Optional properties:
> > +- transition-latency: Specify the possible maximum transition latency,
> > +  in unit of nanoseconds.
> 
> This should make it clear that the transition latency being documented
> here is just that for the core clock change itself, there may be other
> sources of latency like the regulator ramp time or reprogramming PLLs.
I think it's the total time and board dts can over-write it if it
needs. Different transitions between different operating points may
differ, and regulator may be able to indicate the transition time but
clk don't have such api, and probably not worth to have.
 
Thanks
Richard
Richard Zhao July 27, 2012, 6:33 a.m. UTC | #11
On Thu, Jul 19, 2012 at 11:54:41PM +0800, Shawn Guo wrote:
> It adds a generic cpufreq driver for CPU0 frequency management based on
> clk, regulator, OPP and device tree support.  It can support both
> uniprocessor (UP) and those symmetric multiprocessor (SMP) systems which
> share clock and voltage across all CPUs.
> 
> Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
> ---
>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |   58 +++++
>  drivers/cpufreq/Kconfig                            |   11 +
>  drivers/cpufreq/Makefile                           |    2 +
>  drivers/cpufreq/cpufreq-cpu0.c                     |  235 ++++++++++++++++++++
>  4 files changed, 306 insertions(+), 0 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>  create mode 100644 drivers/cpufreq/cpufreq-cpu0.c
> 
> diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> new file mode 100644
> index 0000000..94799bc
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> @@ -0,0 +1,58 @@
> +Generic cpufreq driver for CPU0
It's going to have generic name if it will become more generic.

> +
> +It is a generic cpufreq driver for CPU0 frequency management.  It
> +supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
> +systems which share clock and voltage across all CPUs.
> +
> +Both required and optional properties listed below must be defined
> +under node /cpus/cpu@0.
> +
> +Required properties:
> +- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt
> +  for details
> +
> +Optional properties:
> +- transition-latency: Specify the possible maximum transition latency,
> +  in unit of nanoseconds.
> +- voltage-tolerance: Specify the CPU voltage tolerance in percentage.
Why do we have the same tolerance for all points? I think you can
either remove tolerance or add it to opps.
> +
> +Examples:
> +
> +cpus {
> +	#address-cells = <1>;
> +	#size-cells = <0>;
> +
> +	cpu@0 {
> +		compatible = "arm,cortex-a9";
> +		reg = <0>;
> +		next-level-cache = <&L2>;
> +		operating-points = <
> +			/* kHz    uV    en */
> +			1200000 1275000 0
> +			996000  1225000 1
> +			792000  1100000 1
> +			672000  1100000 0
> +			396000  950000  1
> +			198000  850000  1
> +		>;
> +		transition-latency = <61036>; /* two CLK32 periods */
> +	};
> +
> +	cpu@1 {
> +		compatible = "arm,cortex-a9";
> +		reg = <1>;
> +		next-level-cache = <&L2>;
> +	};
> +
> +	cpu@2 {
> +		compatible = "arm,cortex-a9";
> +		reg = <2>;
> +		next-level-cache = <&L2>;
> +	};
> +
> +	cpu@3 {
> +		compatible = "arm,cortex-a9";
> +		reg = <3>;
> +		next-level-cache = <&L2>;
> +	};
> +};
> diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
> index e24a2a1..e77a1e0 100644
> --- a/drivers/cpufreq/Kconfig
> +++ b/drivers/cpufreq/Kconfig
> @@ -179,6 +179,17 @@ config CPU_FREQ_GOV_CONSERVATIVE
>  
>  	  If in doubt, say N.
>  
> +config GENERIC_CPUFREQ_CPU0
> +	bool "Generic cpufreq driver for CPU0"
> +	depends on HAVE_CLK && REGULATOR && PM_OPP && OF
> +	select CPU_FREQ_TABLE
> +	help
> +	  This adds a generic cpufreq driver for CPU0 frequency management.
> +	  It supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
> +	  systems which share clock and voltage across all CPUs.
> +
> +	  If in doubt, say N.
> +
>  menu "x86 CPU frequency scaling drivers"
>  depends on X86
>  source "drivers/cpufreq/Kconfig.x86"
> diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
> index 9531fc2..a378ed2 100644
> --- a/drivers/cpufreq/Makefile
> +++ b/drivers/cpufreq/Makefile
> @@ -13,6 +13,8 @@ obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o
>  # CPUfreq cross-arch helpers
>  obj-$(CONFIG_CPU_FREQ_TABLE)		+= freq_table.o
>  
> +obj-$(CONFIG_GENERIC_CPUFREQ_CPU0)	+= cpufreq-cpu0.o
> +
>  ##################################################################################
>  # x86 drivers.
>  # Link order matters. K8 is preferred to ACPI because of firmware bugs in early
> diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
> new file mode 100644
> index 0000000..f1bb7f37
> --- /dev/null
> +++ b/drivers/cpufreq/cpufreq-cpu0.c
> @@ -0,0 +1,235 @@
> +/*
> + * Copyright (C) 2012 Freescale Semiconductor, Inc.
> + *
> + * Reuse some codes from drivers/cpufreq/omap-cpufreq.c
> + *
> + * The code contained herein is licensed under the GNU General Public
> + * License. You may obtain a copy of the GNU General Public License
> + * Version 2 or later at the following locations:
> + *
> + * http://www.opensource.org/licenses/gpl-license.html
> + * http://www.gnu.org/copyleft/gpl.html
> + */
> +
> +#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
> +
> +#include <linux/clk.h>
> +#include <linux/cpu.h>
> +#include <linux/cpufreq.h>
> +#include <linux/err.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/opp.h>
> +#include <linux/regulator/consumer.h>
> +#include <linux/slab.h>
> +
> +/* voltage tolerance in percentage */
> +static unsigned int voltage_tolerance;
> +
> +static struct device *cpu_dev;
> +static struct clk *cpu_clk;
> +static struct regulator *cpu_reg;
> +static struct cpufreq_frequency_table *freq_table;
> +
> +static int cpu0_verify_speed(struct cpufreq_policy *policy)
> +{
> +	return cpufreq_frequency_table_verify(policy, freq_table);
> +}
> +
> +static unsigned int cpu0_get_speed(unsigned int cpu)
> +{
> +	return clk_get_rate(cpu_clk) / 1000;
> +}
> +
> +static int cpu0_set_target(struct cpufreq_policy *policy,
> +			   unsigned int target_freq, unsigned int relation)
> +{
> +	struct cpufreq_freqs freqs;
> +	struct opp *opp;
> +	unsigned long freq_Hz, volt = 0, volt_old = 0, tol = 0;
> +	unsigned int index, cpu;
> +	int ret = 0;
> +
> +	ret = cpufreq_frequency_table_target(policy, freq_table, target_freq,
> +					     relation, &index);
> +	if (ret) {
> +		pr_err("failed to match target freqency %d: %d\n",
> +		       target_freq, ret);
> +		return ret;
> +	}
> +
> +	freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
> +	if (freq_Hz < 0)
> +		freq_Hz = freq_table[index].frequency * 1000;
> +	freqs.new = freq_Hz / 1000;
> +	freqs.old = clk_get_rate(cpu_clk) / 1000;
> +
> +	if (freqs.old == freqs.new)
> +		return 0;
> +
> +	for_each_cpu(cpu, policy->cpus) {
> +		freqs.cpu = cpu;
> +		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
> +	}
> +
> +	if (cpu_reg) {
> +		opp = opp_find_freq_ceil(cpu_dev, &freq_Hz);
> +		if (IS_ERR(opp)) {
> +			pr_err("failed to find OPP for %ld\n", freq_Hz);
> +			return PTR_ERR(opp);
> +		}
> +		volt = opp_get_voltage(opp);
> +		tol = volt * voltage_tolerance / 100;
> +		volt_old = regulator_get_voltage(cpu_reg);
> +	}
> +
> +	pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
> +		 freqs.old / 1000, volt_old ? volt_old / 1000 : -1,
> +		 freqs.new / 1000, volt ? volt / 1000 : -1);
> +
> +	/* scaling up?  scale voltage before frequency */
> +	if (cpu_reg && freqs.new > freqs.old) {
> +		if (regulator_set_voltage(cpu_reg, volt - tol, volt + tol)) {
> +			pr_warn("Unable to scale voltage up\n");
> +			freqs.new = freqs.old;
> +			goto done;
> +		}
> +	}
> +
> +	ret = clk_set_rate(cpu_clk, freqs.new * 1000);
Check return value and fall back to previous point if it needs?
> +
> +	/* scaling down?  scale voltage after frequency */
> +	if (cpu_reg && (freqs.new < freqs.old)) {
> +		if (regulator_set_voltage(cpu_reg, volt - tol, volt + tol)) {
> +			pr_warn("Unable to scale voltage down\n");
> +			ret = clk_set_rate(cpu_clk, freqs.old * 1000);
> +			freqs.new = freqs.old;
> +			goto done;
> +		}
> +	}
> +
> +done:
> +	for_each_cpu(cpu, policy->cpus) {
> +		freqs.cpu = cpu;
> +		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
> +	}
> +
> +	return ret;
> +}
> +
> +static int cpu0_cpufreq_init(struct cpufreq_policy *policy)
> +{
> +	struct device_node *np;
> +	int ret;
> +
> +	/* Initialize resources via CPU0 */
> +	if (policy->cpu != 0)
> +		return -EINVAL;
> +
> +	np = of_find_node_by_path("/cpus/cpu@0");
> +	if (!np) {
> +		pr_err("failed to find cpu0 node\n");
> +		return -ENOENT;
> +	}
> +
> +	cpu_dev = get_cpu_device(0);
> +	if (!cpu_dev) {
> +		pr_err("failed to get cpu0 device\n");
> +		ret = -ENODEV;
> +		goto out_put_node;
> +	}
> +
> +	cpu_dev->of_node = np;
hmm.. sys dev can not set of_node when populate it?
And why not do it in module init? 
> +	ret = of_init_opp_table(cpu_dev);
> +	if (ret) {
> +		pr_err("failed to init OPP table\n");
> +		goto out_put_node;
> +	}
> +
> +	ret = opp_init_cpufreq_table(cpu_dev, &freq_table);
> +	if (ret) {
> +		pr_err("failed to init cpufreq table\n");
> +		goto out_put_node;
> +	}
> +
> +	ret = cpufreq_frequency_table_cpuinfo(policy, freq_table);
> +	if (ret) {
> +		pr_err("invalid frequency table for cpu0\n");
> +		goto out_free_table;
> +	}
> +
> +	if (of_property_read_u32(np, "transition-latency",
> +				 &policy->cpuinfo.transition_latency))
> +		policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
> +
> +	of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance);
> +
> +	cpu_clk = clk_get(cpu_dev, NULL);
> +	if (IS_ERR(cpu_clk)) {
> +		pr_err("failed to get cpu0 clock\n");
> +		ret = PTR_ERR(cpu_clk);
> +		goto out_free_table;
> +	}
> +	policy->cur = clk_get_rate(cpu_clk) / 1000;
> +
> +	cpu_reg = regulator_get(cpu_dev, "cpu0");
> +	if (IS_ERR(cpu_reg)) {
> +		pr_warn("failed to get cpu0 regulator\n");
> +		cpu_reg = NULL;
> +	}
> +
> +	/*
> +	 * The driver only supports the SMP configuartion where all processors
> +	 * share the clock and voltage and clock.  Use cpufreq affected_cpus
> +	 * interface to have all CPUs scaled together.
> +	 */
> +	policy->shared_type = CPUFREQ_SHARED_TYPE_ANY;
> +	cpumask_setall(policy->cpus);
> +
> +	cpufreq_frequency_table_get_attr(freq_table, policy->cpu);
> +
> +	of_node_put(np);
> +	return 0;
> +
> +out_free_table:
> +	opp_free_cpufreq_table(cpu_dev, &freq_table);
> +out_put_node:
> +	of_node_put(np);
> +	return ret;
> +}
> +
> +static int cpu0_cpufreq_exit(struct cpufreq_policy *policy)
> +{
> +	cpufreq_frequency_table_put_attr(policy->cpu);
> +	regulator_put(cpu_reg);
> +	clk_put(cpu_clk);
> +	opp_free_cpufreq_table(cpu_dev, &freq_table);
> +
> +	return 0;
> +}
> +
> +static struct freq_attr *cpu0_cpufreq_attr[] = {
> +	&cpufreq_freq_attr_scaling_available_freqs,
> +	NULL,
> +};
> +
> +static struct cpufreq_driver cpu0_cpufreq_driver = {
> +	.flags = CPUFREQ_STICKY,
> +	.verify = cpu0_verify_speed,
> +	.target = cpu0_set_target,
> +	.get = cpu0_get_speed,
> +	.init = cpu0_cpufreq_init,
> +	.exit = cpu0_cpufreq_exit,
> +	.name = "generic_cpu0",
> +	.attr = cpu0_cpufreq_attr,
> +};
static u32 max_freq = UINT_MAX / 1000; /* kHz */
module_param(max_freq, uint, 0);
MODULE_PARM_DESC(max_freq, "max cpu frequency in unit of kHz");

It's for debug. Make sense?

Thanks
Richard
> +
> +static int __devinit cpu0_cpufreq_driver_init(void)
> +{
> +	return cpufreq_register_driver(&cpu0_cpufreq_driver);
> +}
> +late_initcall(cpu0_cpufreq_driver_init);
> +
> +MODULE_AUTHOR("Shawn Guo <shawn.guo@linaro.org>");
> +MODULE_DESCRIPTION("Generic cpufreq driver for CPU0");
> +MODULE_LICENSE("GPL");
> -- 
> 1.7.5.4
>
Mark Brown July 27, 2012, 10:08 a.m. UTC | #12
On Fri, Jul 27, 2012 at 10:13:04AM +0800, Richard Zhao wrote:
> On Thu, Jul 26, 2012 at 02:11:21PM +0100, Mark Brown wrote:
> > On Thu, Jul 19, 2012 at 11:54:41PM +0800, Shawn Guo wrote:

> > > +Optional properties:
> > > +- transition-latency: Specify the possible maximum transition latency,
> > > +  in unit of nanoseconds.

> > This should make it clear that the transition latency being documented
> > here is just that for the core clock change itself, there may be other
> > sources of latency like the regulator ramp time or reprogramming PLLs.

> I think it's the total time and board dts can over-write it if it
> needs. Different transitions between different operating points may
> differ, and regulator may be able to indicate the transition time but
> clk don't have such api, and probably not worth to have.

That's going to be awfully manual if every board has to tweak values
(though obviously the main effect is just going to be bad decisions
rather than breakage).  I've seen several systems where the clock could
provide useful timing input here - the usual pattern is that you've got
a PLL which you can use as well as some dividers.  Transitions that only
need a divider update are extremely quick but transitions that change
the PLL setup can take much longer.  It seems better to just allow the
board maintainer to plug everything together rather than having to work
out their specific latencies to squeeze the performance out of the
system.
Shawn Guo July 30, 2012, 4:57 a.m. UTC | #13
On Fri, Jul 27, 2012 at 10:04:34AM +0800, Richard Zhao wrote:
> Are we going to support the case in the future that cores has
> independent freq? If yes, maybe we should have a more common name.
> 
I do not think we should support that with this driver.  That's one of
the reasons I named the driver this way.
Shawn Guo July 30, 2012, 6:52 a.m. UTC | #14
On Thu, Jul 26, 2012 at 02:11:21PM +0100, Mark Brown wrote:
> On Thu, Jul 19, 2012 at 11:54:41PM +0800, Shawn Guo wrote:
> 
> > +Optional properties:
> > +- transition-latency: Specify the possible maximum transition latency,
> > +  in unit of nanoseconds.
> 
> This should make it clear that the transition latency being documented
> here is just that for the core clock change itself, there may be other
> sources of latency like the regulator ramp time or reprogramming PLLs.
> 
I intended to make it be the total latency from whatever sources that
should be counted on particular system.  Rather than adding complexity
for the driver to figure out these latencies from every single source,
I would expect that people know the possible maximum latency in total
for their systems and specify it in device tree.

> > +	if (cpu_reg) {
> > +		opp = opp_find_freq_ceil(cpu_dev, &freq_Hz);
> > +		if (IS_ERR(opp)) {
> > +			pr_err("failed to find OPP for %ld\n", freq_Hz);
> > +			return PTR_ERR(opp);
> > +		}
> > +		volt = opp_get_voltage(opp);
> > +		tol = volt * voltage_tolerance / 100;
> > +		volt_old = regulator_get_voltage(cpu_reg);
> 
> regulator_get_voltage() might be a bit expensive, many regulators will
> query the hardware every time.  Since it's just for diagnostic purposes
> it'd be better to not bother and let people read the historical
> information from the logs.
> 
Ok, makes sense to me.

> > +	/* scaling up?  scale voltage before frequency */
> > +	if (cpu_reg && freqs.new > freqs.old) {
> > +		if (regulator_set_voltage(cpu_reg, volt - tol, volt + tol)) {
> 
> This is a totally sane and sensible use case for specifying a voltage
> range, we should move it into the regulator core for other users so you
> can just specify the voltage and the tolerance directly.
> 
Are you asking for a change on regulator_set_voltage API?  While I agree
with your comment, it's not a thing we need to necessarily do in this
series.  The change on the API requires a touch on all the existing
users.  I do not think it's nice to carry such a patch in this series.

> It is a little sad here as it means that we loose the ability to do
> frequency only scaling if there's no regulator which is nice.

I do not understand it.  The regulator is optional for the driver, and
we can still scale frequency even if there is no regulator.
Shawn Guo July 30, 2012, 8:17 a.m. UTC | #15
On Fri, Jul 27, 2012 at 02:33:35PM +0800, Richard Zhao wrote:
> > +Generic cpufreq driver for CPU0
> It's going to have generic name if it will become more generic.
> 
I'm not in the position to say that it will become even more generic.

> > +- voltage-tolerance: Specify the CPU voltage tolerance in percentage.
> Why do we have the same tolerance for all points?

Because I haven't seen any case that needs different tolerance for
different operating points.

> I think you can
> either remove tolerance or add it to opps.

I'm not going to remove it, because I have seen potential cpufreq-cpu0
candidates, e.g. omap-cpufreq, need it.  It's also improper to encode
it in operating-points, since OPP library does not have it.

> > +	ret = clk_set_rate(cpu_clk, freqs.new * 1000);
> Check return value and fall back to previous point if it needs?

Right, the voltage should be reverted back if clk_set_rate fails.

> > +	cpu_dev->of_node = np;
> hmm.. sys dev can not set of_node when populate it?

Since the sys dev is not populated from device tree, the of_node is
not set, and we have to do it here on our own.

> And why not do it in module init? 

What's the advantage of doing it in module init over here?

> static u32 max_freq = UINT_MAX / 1000; /* kHz */
> module_param(max_freq, uint, 0);
> MODULE_PARM_DESC(max_freq, "max cpu frequency in unit of kHz");
> 
> It's for debug. Make sense?
> 
It does not look so useful to me, as it never came to me when I was
debugging the driver.
Shawn Guo July 30, 2012, 8:20 a.m. UTC | #16
On Mon, Jul 30, 2012 at 02:52:20PM +0800, Shawn Guo wrote:
> On Thu, Jul 26, 2012 at 02:11:21PM +0100, Mark Brown wrote:
> > regulator_get_voltage() might be a bit expensive, many regulators will
> > query the hardware every time.  Since it's just for diagnostic purposes
> > it'd be better to not bother and let people read the historical
> > information from the logs.
> > 
> Ok, makes sense to me.
> 
In the reply to one of Richard's comment, the volt_old will be used
not only for diagnostic purpose but also for reverting voltage back
when clk_set_rate fails.
Richard Zhao July 30, 2012, 8:50 a.m. UTC | #17
On Mon, Jul 30, 2012 at 04:17:53PM +0800, Shawn Guo wrote:
> On Fri, Jul 27, 2012 at 02:33:35PM +0800, Richard Zhao wrote:
> > > +Generic cpufreq driver for CPU0
> > It's going to have generic name if it will become more generic.
> > 
> I'm not in the position to say that it will become even more generic.
> 
> > > +- voltage-tolerance: Specify the CPU voltage tolerance in percentage.
> > Why do we have the same tolerance for all points?
> 
> Because I haven't seen any case that needs different tolerance for
> different operating points.
> 
> > I think you can
> > either remove tolerance or add it to opps.
> 
> I'm not going to remove it, because I have seen potential cpufreq-cpu0
> candidates, e.g. omap-cpufreq, need it.  It's also improper to encode
> it in operating-points, since OPP library does not have it.
I think the real problem here is OPP only provide exact voltage but
regulator api needs a range.

> 
> > > +	ret = clk_set_rate(cpu_clk, freqs.new * 1000);
> > Check return value and fall back to previous point if it needs?
> 
> Right, the voltage should be reverted back if clk_set_rate fails.
> 
> > > +	cpu_dev->of_node = np;
> > hmm.. sys dev can not set of_node when populate it?
> 
> Since the sys dev is not populated from device tree, the of_node is
> not set, and we have to do it here on our own.
It sounds strange. But I think it's ok.
> 
> > And why not do it in module init? 
> 
> What's the advantage of doing it in module init over here?
IIRC, cpufreq driver init is called every time a new cpu get online.
So things that only needs to be done once can be put in module init.
> 
> > static u32 max_freq = UINT_MAX / 1000; /* kHz */
> > module_param(max_freq, uint, 0);
> > MODULE_PARM_DESC(max_freq, "max cpu frequency in unit of kHz");
> > 
> > It's for debug. Make sense?
> > 
> It does not look so useful to me, as it never came to me when I was
> debugging the driver.
Alright.

Thanks
Richard
> 
> -- 
> Regards,
> Shawn
Shawn Guo July 30, 2012, 9:24 a.m. UTC | #18
On Mon, Jul 30, 2012 at 04:50:41PM +0800, Richard Zhao wrote:
> I think the real problem here is OPP only provide exact voltage but
> regulator api needs a range.
> 
Maybe OPP will get improved on that when the real use cases occur.

> IIRC, cpufreq driver init is called every time a new cpu get online.
> So things that only needs to be done once can be put in module init.
> 
Ah, right.  Something should be moved to module init then.
Mark Brown July 30, 2012, 6:53 p.m. UTC | #19
On Mon, Jul 30, 2012 at 02:52:20PM +0800, Shawn Guo wrote:
> On Thu, Jul 26, 2012 at 02:11:21PM +0100, Mark Brown wrote:

> > This should make it clear that the transition latency being documented
> > here is just that for the core clock change itself, there may be other
> > sources of latency like the regulator ramp time or reprogramming PLLs.

> I intended to make it be the total latency from whatever sources that
> should be counted on particular system.  Rather than adding complexity
> for the driver to figure out these latencies from every single source,
> I would expect that people know the possible maximum latency in total
> for their systems and specify it in device tree.

Quite honestly this seems totally unrealistic for the majority of users,
especially given the very poor documentation for this stuff which SoC
vendors typically provide.  It's a reasonable amount of work to go back
and figure this stuff out (especially given that it should be varying
depending on the transition in question), and it's going to give us a
bunch of magic numbers in people's bindings.

> > > +	if (cpu_reg && freqs.new > freqs.old) {
> > > +		if (regulator_set_voltage(cpu_reg, volt - tol, volt + tol)) {

> > This is a totally sane and sensible use case for specifying a voltage
> > range, we should move it into the regulator core for other users so you
> > can just specify the voltage and the tolerance directly.

> Are you asking for a change on regulator_set_voltage API?  While I agree
> with your comment, it's not a thing we need to necessarily do in this
> series.  The change on the API requires a touch on all the existing
> users.  I do not think it's nice to carry such a patch in this series.

No, add a new API.

> > It is a little sad here as it means that we loose the ability to do
> > frequency only scaling if there's no regulator which is nice.

> I do not understand it.  The regulator is optional for the driver, and
> we can still scale frequency even if there is no regulator.

As soon as a regulator is available your code will insist that we're
able to set the voltage specified to set the frequency, and since it's
using tolerances not ranges to whatever the chip maximum is that'll mean
it'll stop doing frequency scaling at all.
Mark Brown July 30, 2012, 6:55 p.m. UTC | #20
On Mon, Jul 30, 2012 at 04:20:54PM +0800, Shawn Guo wrote:
> On Mon, Jul 30, 2012 at 02:52:20PM +0800, Shawn Guo wrote:

> > Ok, makes sense to me.

> In the reply to one of Richard's comment, the volt_old will be used
> not only for diagnostic purpose but also for reverting voltage back
> when clk_set_rate fails.

We should be able to figure out what operating point we set without
having to read it back from the hardware like this.
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
new file mode 100644
index 0000000..94799bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
@@ -0,0 +1,58 @@ 
+Generic cpufreq driver for CPU0
+
+It is a generic cpufreq driver for CPU0 frequency management.  It
+supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
+systems which share clock and voltage across all CPUs.
+
+Both required and optional properties listed below must be defined
+under node /cpus/cpu@0.
+
+Required properties:
+- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt
+  for details
+
+Optional properties:
+- transition-latency: Specify the possible maximum transition latency,
+  in unit of nanoseconds.
+- voltage-tolerance: Specify the CPU voltage tolerance in percentage.
+
+Examples:
+
+cpus {
+	#address-cells = <1>;
+	#size-cells = <0>;
+
+	cpu@0 {
+		compatible = "arm,cortex-a9";
+		reg = <0>;
+		next-level-cache = <&L2>;
+		operating-points = <
+			/* kHz    uV    en */
+			1200000 1275000 0
+			996000  1225000 1
+			792000  1100000 1
+			672000  1100000 0
+			396000  950000  1
+			198000  850000  1
+		>;
+		transition-latency = <61036>; /* two CLK32 periods */
+	};
+
+	cpu@1 {
+		compatible = "arm,cortex-a9";
+		reg = <1>;
+		next-level-cache = <&L2>;
+	};
+
+	cpu@2 {
+		compatible = "arm,cortex-a9";
+		reg = <2>;
+		next-level-cache = <&L2>;
+	};
+
+	cpu@3 {
+		compatible = "arm,cortex-a9";
+		reg = <3>;
+		next-level-cache = <&L2>;
+	};
+};
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index e24a2a1..e77a1e0 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -179,6 +179,17 @@  config CPU_FREQ_GOV_CONSERVATIVE
 
 	  If in doubt, say N.
 
+config GENERIC_CPUFREQ_CPU0
+	bool "Generic cpufreq driver for CPU0"
+	depends on HAVE_CLK && REGULATOR && PM_OPP && OF
+	select CPU_FREQ_TABLE
+	help
+	  This adds a generic cpufreq driver for CPU0 frequency management.
+	  It supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
+	  systems which share clock and voltage across all CPUs.
+
+	  If in doubt, say N.
+
 menu "x86 CPU frequency scaling drivers"
 depends on X86
 source "drivers/cpufreq/Kconfig.x86"
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 9531fc2..a378ed2 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -13,6 +13,8 @@  obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE)	+= cpufreq_conservative.o
 # CPUfreq cross-arch helpers
 obj-$(CONFIG_CPU_FREQ_TABLE)		+= freq_table.o
 
+obj-$(CONFIG_GENERIC_CPUFREQ_CPU0)	+= cpufreq-cpu0.o
+
 ##################################################################################
 # x86 drivers.
 # Link order matters. K8 is preferred to ACPI because of firmware bugs in early
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
new file mode 100644
index 0000000..f1bb7f37
--- /dev/null
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -0,0 +1,235 @@ 
+/*
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ *
+ * Reuse some codes from drivers/cpufreq/omap-cpufreq.c
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
+#include <linux/clk.h>
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/opp.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+/* voltage tolerance in percentage */
+static unsigned int voltage_tolerance;
+
+static struct device *cpu_dev;
+static struct clk *cpu_clk;
+static struct regulator *cpu_reg;
+static struct cpufreq_frequency_table *freq_table;
+
+static int cpu0_verify_speed(struct cpufreq_policy *policy)
+{
+	return cpufreq_frequency_table_verify(policy, freq_table);
+}
+
+static unsigned int cpu0_get_speed(unsigned int cpu)
+{
+	return clk_get_rate(cpu_clk) / 1000;
+}
+
+static int cpu0_set_target(struct cpufreq_policy *policy,
+			   unsigned int target_freq, unsigned int relation)
+{
+	struct cpufreq_freqs freqs;
+	struct opp *opp;
+	unsigned long freq_Hz, volt = 0, volt_old = 0, tol = 0;
+	unsigned int index, cpu;
+	int ret = 0;
+
+	ret = cpufreq_frequency_table_target(policy, freq_table, target_freq,
+					     relation, &index);
+	if (ret) {
+		pr_err("failed to match target freqency %d: %d\n",
+		       target_freq, ret);
+		return ret;
+	}
+
+	freq_Hz = clk_round_rate(cpu_clk, freq_table[index].frequency * 1000);
+	if (freq_Hz < 0)
+		freq_Hz = freq_table[index].frequency * 1000;
+	freqs.new = freq_Hz / 1000;
+	freqs.old = clk_get_rate(cpu_clk) / 1000;
+
+	if (freqs.old == freqs.new)
+		return 0;
+
+	for_each_cpu(cpu, policy->cpus) {
+		freqs.cpu = cpu;
+		cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+	}
+
+	if (cpu_reg) {
+		opp = opp_find_freq_ceil(cpu_dev, &freq_Hz);
+		if (IS_ERR(opp)) {
+			pr_err("failed to find OPP for %ld\n", freq_Hz);
+			return PTR_ERR(opp);
+		}
+		volt = opp_get_voltage(opp);
+		tol = volt * voltage_tolerance / 100;
+		volt_old = regulator_get_voltage(cpu_reg);
+	}
+
+	pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
+		 freqs.old / 1000, volt_old ? volt_old / 1000 : -1,
+		 freqs.new / 1000, volt ? volt / 1000 : -1);
+
+	/* scaling up?  scale voltage before frequency */
+	if (cpu_reg && freqs.new > freqs.old) {
+		if (regulator_set_voltage(cpu_reg, volt - tol, volt + tol)) {
+			pr_warn("Unable to scale voltage up\n");
+			freqs.new = freqs.old;
+			goto done;
+		}
+	}
+
+	ret = clk_set_rate(cpu_clk, freqs.new * 1000);
+
+	/* scaling down?  scale voltage after frequency */
+	if (cpu_reg && (freqs.new < freqs.old)) {
+		if (regulator_set_voltage(cpu_reg, volt - tol, volt + tol)) {
+			pr_warn("Unable to scale voltage down\n");
+			ret = clk_set_rate(cpu_clk, freqs.old * 1000);
+			freqs.new = freqs.old;
+			goto done;
+		}
+	}
+
+done:
+	for_each_cpu(cpu, policy->cpus) {
+		freqs.cpu = cpu;
+		cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+	}
+
+	return ret;
+}
+
+static int cpu0_cpufreq_init(struct cpufreq_policy *policy)
+{
+	struct device_node *np;
+	int ret;
+
+	/* Initialize resources via CPU0 */
+	if (policy->cpu != 0)
+		return -EINVAL;
+
+	np = of_find_node_by_path("/cpus/cpu@0");
+	if (!np) {
+		pr_err("failed to find cpu0 node\n");
+		return -ENOENT;
+	}
+
+	cpu_dev = get_cpu_device(0);
+	if (!cpu_dev) {
+		pr_err("failed to get cpu0 device\n");
+		ret = -ENODEV;
+		goto out_put_node;
+	}
+
+	cpu_dev->of_node = np;
+	ret = of_init_opp_table(cpu_dev);
+	if (ret) {
+		pr_err("failed to init OPP table\n");
+		goto out_put_node;
+	}
+
+	ret = opp_init_cpufreq_table(cpu_dev, &freq_table);
+	if (ret) {
+		pr_err("failed to init cpufreq table\n");
+		goto out_put_node;
+	}
+
+	ret = cpufreq_frequency_table_cpuinfo(policy, freq_table);
+	if (ret) {
+		pr_err("invalid frequency table for cpu0\n");
+		goto out_free_table;
+	}
+
+	if (of_property_read_u32(np, "transition-latency",
+				 &policy->cpuinfo.transition_latency))
+		policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
+
+	of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance);
+
+	cpu_clk = clk_get(cpu_dev, NULL);
+	if (IS_ERR(cpu_clk)) {
+		pr_err("failed to get cpu0 clock\n");
+		ret = PTR_ERR(cpu_clk);
+		goto out_free_table;
+	}
+	policy->cur = clk_get_rate(cpu_clk) / 1000;
+
+	cpu_reg = regulator_get(cpu_dev, "cpu0");
+	if (IS_ERR(cpu_reg)) {
+		pr_warn("failed to get cpu0 regulator\n");
+		cpu_reg = NULL;
+	}
+
+	/*
+	 * The driver only supports the SMP configuartion where all processors
+	 * share the clock and voltage and clock.  Use cpufreq affected_cpus
+	 * interface to have all CPUs scaled together.
+	 */
+	policy->shared_type = CPUFREQ_SHARED_TYPE_ANY;
+	cpumask_setall(policy->cpus);
+
+	cpufreq_frequency_table_get_attr(freq_table, policy->cpu);
+
+	of_node_put(np);
+	return 0;
+
+out_free_table:
+	opp_free_cpufreq_table(cpu_dev, &freq_table);
+out_put_node:
+	of_node_put(np);
+	return ret;
+}
+
+static int cpu0_cpufreq_exit(struct cpufreq_policy *policy)
+{
+	cpufreq_frequency_table_put_attr(policy->cpu);
+	regulator_put(cpu_reg);
+	clk_put(cpu_clk);
+	opp_free_cpufreq_table(cpu_dev, &freq_table);
+
+	return 0;
+}
+
+static struct freq_attr *cpu0_cpufreq_attr[] = {
+	&cpufreq_freq_attr_scaling_available_freqs,
+	NULL,
+};
+
+static struct cpufreq_driver cpu0_cpufreq_driver = {
+	.flags = CPUFREQ_STICKY,
+	.verify = cpu0_verify_speed,
+	.target = cpu0_set_target,
+	.get = cpu0_get_speed,
+	.init = cpu0_cpufreq_init,
+	.exit = cpu0_cpufreq_exit,
+	.name = "generic_cpu0",
+	.attr = cpu0_cpufreq_attr,
+};
+
+static int __devinit cpu0_cpufreq_driver_init(void)
+{
+	return cpufreq_register_driver(&cpu0_cpufreq_driver);
+}
+late_initcall(cpu0_cpufreq_driver_init);
+
+MODULE_AUTHOR("Shawn Guo <shawn.guo@linaro.org>");
+MODULE_DESCRIPTION("Generic cpufreq driver for CPU0");
+MODULE_LICENSE("GPL");