diff mbox

[v2,1/7] cpufreq: cpufreq-cpu0: allow optional safe voltage during frequency transitions

Message ID 1390047057-2239-2-git-send-email-thomas.ab@samsung.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thomas Abraham Jan. 18, 2014, 12:10 p.m. UTC
From: Thomas Abraham <thomas.ab@samsung.com>

On some platforms such as the Samsung Exynos, changing the frequency
of the CPU clock requires changing the frequency of the PLL that is
supplying the CPU clock. To change the frequency of the PLL, the CPU
clock is temporarily reparented to another parent clock.

The clock frequency of this temporary parent clock could be much higher
than the clock frequency of the PLL at the time of reparenting. Due
to the temporary increase in the CPU clock speed, the CPU (and any other
components in the CPU clock domain such as dividers, mux, etc.) have to
to be operated at a higher voltage level, called the safe voltage level.
This patch adds optional support to temporarily switch to a safe voltage
level during CPU frequency transitions.

Cc: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
---
 .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
 drivers/cpufreq/cpufreq-cpu0.c                     |   37 +++++++++++++++++--
 2 files changed, 40 insertions(+), 4 deletions(-)

Comments

Lukasz Majewski Jan. 20, 2014, 8:09 a.m. UTC | #1
Hi Thomas,

> From: Thomas Abraham <thomas.ab@samsung.com>
> 
> On some platforms such as the Samsung Exynos, changing the frequency
> of the CPU clock requires changing the frequency of the PLL that is
> supplying the CPU clock. To change the frequency of the PLL, the CPU
> clock is temporarily reparented to another parent clock.
> 
> The clock frequency of this temporary parent clock could be much
> higher than the clock frequency of the PLL at the time of
> reparenting. Due to the temporary increase in the CPU clock speed,
> the CPU (and any other components in the CPU clock domain such as
> dividers, mux, etc.) have to to be operated at a higher voltage
> level, called the safe voltage level. This patch adds optional
> support to temporarily switch to a safe voltage level during CPU
> frequency transitions.
> 
> Cc: Shawn Guo <shawn.guo@linaro.org>
> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
> ---
>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
>  drivers/cpufreq/cpufreq-cpu0.c                     |   37
> +++++++++++++++++-- 2 files changed, 40 insertions(+), 4 deletions(-)
> 
> diff --git
> a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt index
> f055515..37453ab 100644 ---
> a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt +++
> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt @@ -19,6
> +19,12 @@ Optional properties:
>  - cooling-min-level:
>  - cooling-max-level:
>       Please refer to
> Documentation/devicetree/bindings/thermal/thermal.txt. +- safe-opp:
> Certain platforms require that during a opp transition,
> +  a system should not go below a particular opp level. For such
> systems,
> +  this property specifies the minimum opp to be maintained during the
> +  opp transitions. The safe-opp value is a tuple with first element
> +  representing the safe frequency and the second element
> representing the
> +  safe voltage.
>  
>  Examples:
>  
> @@ -36,6 +42,7 @@ cpus {
>  			396000  950000
>  			198000  850000
>  		>;
> +		safe-opp = <396000 950000>
>  		clock-latency = <61036>; /* two CLK32 periods */
>  		#cooling-cells = <2>;
>  		cooling-min-level = <0>;
> diff --git a/drivers/cpufreq/cpufreq-cpu0.c
> b/drivers/cpufreq/cpufreq-cpu0.c index 0c12ffc..075d3d1 100644
> --- a/drivers/cpufreq/cpufreq-cpu0.c
> +++ b/drivers/cpufreq/cpufreq-cpu0.c
> @@ -27,6 +27,8 @@
>  
>  static unsigned int transition_latency;
>  static unsigned int voltage_tolerance; /* in percentage */
> +static unsigned long safe_frequency;
> +static unsigned long safe_voltage;
>  
>  static struct device *cpu_dev;
>  static struct clk *cpu_clk;
> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct cpufreq_policy
> *policy, unsigned int index) volt_old =
> regulator_get_voltage(cpu_reg); }
>  
> -	pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
> +	pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
>  		 old_freq / 1000, volt_old ? volt_old / 1000 : -1,
>  		 new_freq / 1000, volt ? volt / 1000 : -1);
>  
>  	/* scaling up?  scale voltage before frequency */
> -	if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
> +	if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
> +				new_freq >= safe_frequency) {
>  		ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>  		if (ret) {
>  			pr_err("failed to scale voltage up: %d\n",
> ret); return ret;
>  		}
> +	} else if (!IS_ERR(cpu_reg) && old_freq < safe_frequency) {
> +		/*
> +		 * the scaled up voltage level for the new_freq is
> lower
> +		 * than the safe voltage level. so set safe_voltage
> +		 * as the intermediate voltage level and revert it
> +		 * back after the frequency has been changed.
> +		 */
> +		ret = regulator_set_voltage_tol(cpu_reg,
> safe_voltage, tol);
> +		if (ret) {
> +			pr_err("failed to set safe voltage: %d\n",
> ret);
> +			return ret;
> +		}
>  	}
>  
>  	ret = clk_set_rate(cpu_clk, freq_exact);
> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct cpufreq_policy
> *policy, unsigned int index) }
>  
>  	/* scaling down?  scale voltage after frequency */
> -	if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
> +	if (!IS_ERR(cpu_reg) &&
> +			(new_freq < old_freq || new_freq <
> safe_frequency)) { ret = regulator_set_voltage_tol(cpu_reg, volt,
> tol); if (ret) {
>  			pr_err("failed to scale voltage down: %d\n",
> ret); @@ -116,6 +132,8 @@ static struct cpufreq_driver
> cpu0_cpufreq_driver = { 
>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
>  {
> +	const struct property *prop;
> +	struct dev_pm_opp *opp;
>  	struct device_node *np;
>  	int ret;
>  
> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct
> platform_device *pdev) goto out_put_node;
>  	}
>  
> +	prop = of_find_property(np, "safe-opp", NULL);
> +	if (prop) {
> +		if (prop->value && (prop->length / sizeof(u32)) ==
> 2) {
> +			const __be32 *val;
> +			val = prop->value;
> +			safe_frequency = be32_to_cpup(val++);
> +			safe_voltage = be32_to_cpup(val);
> +		} else {
> +			pr_err("invalid safe-opp level specified\n");
> +		}
> +	}
> +
>  	of_property_read_u32(np, "voltage-tolerance",
> &voltage_tolerance); 
>  	if (of_property_read_u32(np, "clock-latency",
> &transition_latency)) transition_latency = CPUFREQ_ETERNAL;
>  
>  	if (!IS_ERR(cpu_reg)) {
> -		struct dev_pm_opp *opp;
>  		unsigned long min_uV, max_uV;
>  		int i;
>  

Reviewed-by: Lukasz Majewski <l.majewski@samsung.com>
Shawn Guo Jan. 27, 2014, 7:16 a.m. UTC | #2
On Sat, Jan 18, 2014 at 05:40:51PM +0530, Thomas Abraham wrote:
> From: Thomas Abraham <thomas.ab@samsung.com>
> 
> On some platforms such as the Samsung Exynos, changing the frequency
> of the CPU clock requires changing the frequency of the PLL that is
> supplying the CPU clock. To change the frequency of the PLL, the CPU
> clock is temporarily reparented to another parent clock.
> 
> The clock frequency of this temporary parent clock could be much higher
> than the clock frequency of the PLL at the time of reparenting. Due
> to the temporary increase in the CPU clock speed, the CPU (and any other
> components in the CPU clock domain such as dividers, mux, etc.) have to
> to be operated at a higher voltage level, called the safe voltage level.
> This patch adds optional support to temporarily switch to a safe voltage
> level during CPU frequency transitions.
> 
> Cc: Shawn Guo <shawn.guo@linaro.org>
> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
> ---
>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++

The devicetree list should be copied for this change.

>  drivers/cpufreq/cpufreq-cpu0.c                     |   37 +++++++++++++++++--
>  2 files changed, 40 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> index f055515..37453ab 100644
> --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> @@ -19,6 +19,12 @@ Optional properties:
>  - cooling-min-level:
>  - cooling-max-level:
>       Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.
> +- safe-opp: Certain platforms require that during a opp transition,
> +  a system should not go below a particular opp level. For such systems,
> +  this property specifies the minimum opp to be maintained during the
> +  opp transitions. The safe-opp value is a tuple with first element
> +  representing the safe frequency and the second element representing the
> +  safe voltage.
>  
>  Examples:
>  
> @@ -36,6 +42,7 @@ cpus {
>  			396000  950000
>  			198000  850000
>  		>;
> +		safe-opp = <396000 950000>
>  		clock-latency = <61036>; /* two CLK32 periods */
>  		#cooling-cells = <2>;
>  		cooling-min-level = <0>;
> diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
> index 0c12ffc..075d3d1 100644
> --- a/drivers/cpufreq/cpufreq-cpu0.c
> +++ b/drivers/cpufreq/cpufreq-cpu0.c
> @@ -27,6 +27,8 @@
>  
>  static unsigned int transition_latency;
>  static unsigned int voltage_tolerance; /* in percentage */
> +static unsigned long safe_frequency;
> +static unsigned long safe_voltage;
>  
>  static struct device *cpu_dev;
>  static struct clk *cpu_clk;
> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>  		volt_old = regulator_get_voltage(cpu_reg);
>  	}
>  
> -	pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
> +	pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",

This is an unnecessary change?

Otherwise,

Acked-by: Shawn Guo <shawn.guo@linaro.org>

Shawn

>  		 old_freq / 1000, volt_old ? volt_old / 1000 : -1,
>  		 new_freq / 1000, volt ? volt / 1000 : -1);
>  
>  	/* scaling up?  scale voltage before frequency */
> -	if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
> +	if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
> +				new_freq >= safe_frequency) {
>  		ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>  		if (ret) {
>  			pr_err("failed to scale voltage up: %d\n", ret);
>  			return ret;
>  		}
> +	} else if (!IS_ERR(cpu_reg) && old_freq < safe_frequency) {
> +		/*
> +		 * the scaled up voltage level for the new_freq is lower
> +		 * than the safe voltage level. so set safe_voltage
> +		 * as the intermediate voltage level and revert it
> +		 * back after the frequency has been changed.
> +		 */
> +		ret = regulator_set_voltage_tol(cpu_reg, safe_voltage, tol);
> +		if (ret) {
> +			pr_err("failed to set safe voltage: %d\n", ret);
> +			return ret;
> +		}
>  	}
>  
>  	ret = clk_set_rate(cpu_clk, freq_exact);
> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>  	}
>  
>  	/* scaling down?  scale voltage after frequency */
> -	if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
> +	if (!IS_ERR(cpu_reg) &&
> +			(new_freq < old_freq || new_freq < safe_frequency)) {
>  		ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>  		if (ret) {
>  			pr_err("failed to scale voltage down: %d\n", ret);
> @@ -116,6 +132,8 @@ static struct cpufreq_driver cpu0_cpufreq_driver = {
>  
>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
>  {
> +	const struct property *prop;
> +	struct dev_pm_opp *opp;
>  	struct device_node *np;
>  	int ret;
>  
> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
>  		goto out_put_node;
>  	}
>  
> +	prop = of_find_property(np, "safe-opp", NULL);
> +	if (prop) {
> +		if (prop->value && (prop->length / sizeof(u32)) == 2) {
> +			const __be32 *val;
> +			val = prop->value;
> +			safe_frequency = be32_to_cpup(val++);
> +			safe_voltage = be32_to_cpup(val);
> +		} else {
> +			pr_err("invalid safe-opp level specified\n");
> +		}
> +	}
> +
>  	of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance);
>  
>  	if (of_property_read_u32(np, "clock-latency", &transition_latency))
>  		transition_latency = CPUFREQ_ETERNAL;
>  
>  	if (!IS_ERR(cpu_reg)) {
> -		struct dev_pm_opp *opp;
>  		unsigned long min_uV, max_uV;
>  		int i;
>  
> -- 
> 1.6.6.rc2
>
Mike Turquette Jan. 27, 2014, 8:25 p.m. UTC | #3
Quoting Thomas Abraham (2014-01-18 04:10:51)
> From: Thomas Abraham <thomas.ab@samsung.com>
> 
> On some platforms such as the Samsung Exynos, changing the frequency
> of the CPU clock requires changing the frequency of the PLL that is
> supplying the CPU clock. To change the frequency of the PLL, the CPU
> clock is temporarily reparented to another parent clock.
> 
> The clock frequency of this temporary parent clock could be much higher
> than the clock frequency of the PLL at the time of reparenting. Due
> to the temporary increase in the CPU clock speed, the CPU (and any other
> components in the CPU clock domain such as dividers, mux, etc.) have to
> to be operated at a higher voltage level, called the safe voltage level.
> This patch adds optional support to temporarily switch to a safe voltage
> level during CPU frequency transitions.
> 
> Cc: Shawn Guo <shawn.guo@linaro.org>
> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>

I'm not a fan of this change. This corner case should be abstracted away
somehow. I had talked to Chander Kayshap previously about handling
voltage changes in clock notifier callbacks, which then renders any
voltage change as a trivial part of the clock rate transition. That
means that this "safe voltage" thing could be handled automagically
without any additional code in the CPUfreq driver.

There are two nice ways to do this with the clock framework. First is
explicit re-parenting with voltage scaling done in the clock rate-change
notifiers:

clk_set_parent(cpu_clk, temp_parent);
/* implicit voltage scaling to "safe voltage" happens above */
clk_set_rate(pll, some_rate);
clk_set_parent(cpu_clk, pll);
/* implicit voltage scaling to nominal OPP voltage happens above */

The above sequence would require a separate exnyos CPUfreq driver, due
to the added clk_set_parent logic.

The second way to do this is to abstract the clk re-muxing logic out
into the clk driver, which would allow cpufreq-cpu0 to be used for the
exynos chips.

I'm more a fan of explicitly listing the Exact Steps for the cpu opp
transition in a separate exynos-specific CPUfreq driver, but that's
probably an unpopular view.

Regards,
Mike

> ---
>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
>  drivers/cpufreq/cpufreq-cpu0.c                     |   37 +++++++++++++++++--
>  2 files changed, 40 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> index f055515..37453ab 100644
> --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> @@ -19,6 +19,12 @@ Optional properties:
>  - cooling-min-level:
>  - cooling-max-level:
>       Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.
> +- safe-opp: Certain platforms require that during a opp transition,
> +  a system should not go below a particular opp level. For such systems,
> +  this property specifies the minimum opp to be maintained during the
> +  opp transitions. The safe-opp value is a tuple with first element
> +  representing the safe frequency and the second element representing the
> +  safe voltage.
>  
>  Examples:
>  
> @@ -36,6 +42,7 @@ cpus {
>                         396000  950000
>                         198000  850000
>                 >;
> +               safe-opp = <396000 950000>
>                 clock-latency = <61036>; /* two CLK32 periods */
>                 #cooling-cells = <2>;
>                 cooling-min-level = <0>;
> diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
> index 0c12ffc..075d3d1 100644
> --- a/drivers/cpufreq/cpufreq-cpu0.c
> +++ b/drivers/cpufreq/cpufreq-cpu0.c
> @@ -27,6 +27,8 @@
>  
>  static unsigned int transition_latency;
>  static unsigned int voltage_tolerance; /* in percentage */
> +static unsigned long safe_frequency;
> +static unsigned long safe_voltage;
>  
>  static struct device *cpu_dev;
>  static struct clk *cpu_clk;
> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>                 volt_old = regulator_get_voltage(cpu_reg);
>         }
>  
> -       pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
> +       pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
>                  old_freq / 1000, volt_old ? volt_old / 1000 : -1,
>                  new_freq / 1000, volt ? volt / 1000 : -1);
>  
>         /* scaling up?  scale voltage before frequency */
> -       if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
> +       if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
> +                               new_freq >= safe_frequency) {
>                 ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>                 if (ret) {
>                         pr_err("failed to scale voltage up: %d\n", ret);
>                         return ret;
>                 }
> +       } else if (!IS_ERR(cpu_reg) && old_freq < safe_frequency) {
> +               /*
> +                * the scaled up voltage level for the new_freq is lower
> +                * than the safe voltage level. so set safe_voltage
> +                * as the intermediate voltage level and revert it
> +                * back after the frequency has been changed.
> +                */
> +               ret = regulator_set_voltage_tol(cpu_reg, safe_voltage, tol);
> +               if (ret) {
> +                       pr_err("failed to set safe voltage: %d\n", ret);
> +                       return ret;
> +               }
>         }
>  
>         ret = clk_set_rate(cpu_clk, freq_exact);
> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>         }
>  
>         /* scaling down?  scale voltage after frequency */
> -       if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
> +       if (!IS_ERR(cpu_reg) &&
> +                       (new_freq < old_freq || new_freq < safe_frequency)) {
>                 ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>                 if (ret) {
>                         pr_err("failed to scale voltage down: %d\n", ret);
> @@ -116,6 +132,8 @@ static struct cpufreq_driver cpu0_cpufreq_driver = {
>  
>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
>  {
> +       const struct property *prop;
> +       struct dev_pm_opp *opp;
>         struct device_node *np;
>         int ret;
>  
> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
>                 goto out_put_node;
>         }
>  
> +       prop = of_find_property(np, "safe-opp", NULL);
> +       if (prop) {
> +               if (prop->value && (prop->length / sizeof(u32)) == 2) {
> +                       const __be32 *val;
> +                       val = prop->value;
> +                       safe_frequency = be32_to_cpup(val++);
> +                       safe_voltage = be32_to_cpup(val);
> +               } else {
> +                       pr_err("invalid safe-opp level specified\n");
> +               }
> +       }
> +
>         of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance);
>  
>         if (of_property_read_u32(np, "clock-latency", &transition_latency))
>                 transition_latency = CPUFREQ_ETERNAL;
>  
>         if (!IS_ERR(cpu_reg)) {
> -               struct dev_pm_opp *opp;
>                 unsigned long min_uV, max_uV;
>                 int i;
>  
> -- 
> 1.6.6.rc2
>
Thomas Abraham Jan. 28, 2014, 4:30 a.m. UTC | #4
Hi Shawn,

On Mon, Jan 27, 2014 at 12:46 PM, Shawn Guo <shawn.guo@linaro.org> wrote:
> On Sat, Jan 18, 2014 at 05:40:51PM +0530, Thomas Abraham wrote:
>> From: Thomas Abraham <thomas.ab@samsung.com>
>>
>> On some platforms such as the Samsung Exynos, changing the frequency
>> of the CPU clock requires changing the frequency of the PLL that is
>> supplying the CPU clock. To change the frequency of the PLL, the CPU
>> clock is temporarily reparented to another parent clock.
>>
>> The clock frequency of this temporary parent clock could be much higher
>> than the clock frequency of the PLL at the time of reparenting. Due
>> to the temporary increase in the CPU clock speed, the CPU (and any other
>> components in the CPU clock domain such as dividers, mux, etc.) have to
>> to be operated at a higher voltage level, called the safe voltage level.
>> This patch adds optional support to temporarily switch to a safe voltage
>> level during CPU frequency transitions.
>>
>> Cc: Shawn Guo <shawn.guo@linaro.org>
>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>> ---
>>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
>
> The devicetree list should be copied for this change.

Okay, will do in the next version.

>
>>  drivers/cpufreq/cpufreq-cpu0.c                     |   37 +++++++++++++++++--
>>  2 files changed, 40 insertions(+), 4 deletions(-)
>>
>> diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>> index f055515..37453ab 100644
>> --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>> +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>> @@ -19,6 +19,12 @@ Optional properties:
>>  - cooling-min-level:
>>  - cooling-max-level:
>>       Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.
>> +- safe-opp: Certain platforms require that during a opp transition,
>> +  a system should not go below a particular opp level. For such systems,
>> +  this property specifies the minimum opp to be maintained during the
>> +  opp transitions. The safe-opp value is a tuple with first element
>> +  representing the safe frequency and the second element representing the
>> +  safe voltage.
>>
>>  Examples:
>>
>> @@ -36,6 +42,7 @@ cpus {
>>                       396000  950000
>>                       198000  850000
>>               >;
>> +             safe-opp = <396000 950000>
>>               clock-latency = <61036>; /* two CLK32 periods */
>>               #cooling-cells = <2>;
>>               cooling-min-level = <0>;
>> diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
>> index 0c12ffc..075d3d1 100644
>> --- a/drivers/cpufreq/cpufreq-cpu0.c
>> +++ b/drivers/cpufreq/cpufreq-cpu0.c
>> @@ -27,6 +27,8 @@
>>
>>  static unsigned int transition_latency;
>>  static unsigned int voltage_tolerance; /* in percentage */
>> +static unsigned long safe_frequency;
>> +static unsigned long safe_voltage;
>>
>>  static struct device *cpu_dev;
>>  static struct clk *cpu_clk;
>> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>>               volt_old = regulator_get_voltage(cpu_reg);
>>       }
>>
>> -     pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
>> +     pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
>
> This is an unnecessary change?

Yes, sorry missed that.

>
> Otherwise,
>
> Acked-by: Shawn Guo <shawn.guo@linaro.org>

Thanks for your review.

Regards,
Thomas.

>
> Shawn
>
>>                old_freq / 1000, volt_old ? volt_old / 1000 : -1,
>>                new_freq / 1000, volt ? volt / 1000 : -1);
>>
>>       /* scaling up?  scale voltage before frequency */
>> -     if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
>> +     if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
>> +                             new_freq >= safe_frequency) {
>>               ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>>               if (ret) {
>>                       pr_err("failed to scale voltage up: %d\n", ret);
>>                       return ret;
>>               }
>> +     } else if (!IS_ERR(cpu_reg) && old_freq < safe_frequency) {
>> +             /*
>> +              * the scaled up voltage level for the new_freq is lower
>> +              * than the safe voltage level. so set safe_voltage
>> +              * as the intermediate voltage level and revert it
>> +              * back after the frequency has been changed.
>> +              */
>> +             ret = regulator_set_voltage_tol(cpu_reg, safe_voltage, tol);
>> +             if (ret) {
>> +                     pr_err("failed to set safe voltage: %d\n", ret);
>> +                     return ret;
>> +             }
>>       }
>>
>>       ret = clk_set_rate(cpu_clk, freq_exact);
>> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>>       }
>>
>>       /* scaling down?  scale voltage after frequency */
>> -     if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
>> +     if (!IS_ERR(cpu_reg) &&
>> +                     (new_freq < old_freq || new_freq < safe_frequency)) {
>>               ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>>               if (ret) {
>>                       pr_err("failed to scale voltage down: %d\n", ret);
>> @@ -116,6 +132,8 @@ static struct cpufreq_driver cpu0_cpufreq_driver = {
>>
>>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
>>  {
>> +     const struct property *prop;
>> +     struct dev_pm_opp *opp;
>>       struct device_node *np;
>>       int ret;
>>
>> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
>>               goto out_put_node;
>>       }
>>
>> +     prop = of_find_property(np, "safe-opp", NULL);
>> +     if (prop) {
>> +             if (prop->value && (prop->length / sizeof(u32)) == 2) {
>> +                     const __be32 *val;
>> +                     val = prop->value;
>> +                     safe_frequency = be32_to_cpup(val++);
>> +                     safe_voltage = be32_to_cpup(val);
>> +             } else {
>> +                     pr_err("invalid safe-opp level specified\n");
>> +             }
>> +     }
>> +
>>       of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance);
>>
>>       if (of_property_read_u32(np, "clock-latency", &transition_latency))
>>               transition_latency = CPUFREQ_ETERNAL;
>>
>>       if (!IS_ERR(cpu_reg)) {
>> -             struct dev_pm_opp *opp;
>>               unsigned long min_uV, max_uV;
>>               int i;
>>
>> --
>> 1.6.6.rc2
>>
>
Thomas Abraham Jan. 28, 2014, 5:30 a.m. UTC | #5
Hi Mike,

On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette <mturquette@linaro.org> wrote:
> Quoting Thomas Abraham (2014-01-18 04:10:51)
>> From: Thomas Abraham <thomas.ab@samsung.com>
>>
>> On some platforms such as the Samsung Exynos, changing the frequency
>> of the CPU clock requires changing the frequency of the PLL that is
>> supplying the CPU clock. To change the frequency of the PLL, the CPU
>> clock is temporarily reparented to another parent clock.
>>
>> The clock frequency of this temporary parent clock could be much higher
>> than the clock frequency of the PLL at the time of reparenting. Due
>> to the temporary increase in the CPU clock speed, the CPU (and any other
>> components in the CPU clock domain such as dividers, mux, etc.) have to
>> to be operated at a higher voltage level, called the safe voltage level.
>> This patch adds optional support to temporarily switch to a safe voltage
>> level during CPU frequency transitions.
>>
>> Cc: Shawn Guo <shawn.guo@linaro.org>
>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>
> I'm not a fan of this change. This corner case should be abstracted away
> somehow. I had talked to Chander Kayshap previously about handling
> voltage changes in clock notifier callbacks, which then renders any
> voltage change as a trivial part of the clock rate transition. That
> means that this "safe voltage" thing could be handled automagically
> without any additional code in the CPUfreq driver.
>
> There are two nice ways to do this with the clock framework. First is
> explicit re-parenting with voltage scaling done in the clock rate-change
> notifiers:
>
> clk_set_parent(cpu_clk, temp_parent);
> /* implicit voltage scaling to "safe voltage" happens above */
> clk_set_rate(pll, some_rate);
> clk_set_parent(cpu_clk, pll);
> /* implicit voltage scaling to nominal OPP voltage happens above */
>
> The above sequence would require a separate exnyos CPUfreq driver, due
> to the added clk_set_parent logic.
>
> The second way to do this is to abstract the clk re-muxing logic out
> into the clk driver, which would allow cpufreq-cpu0 to be used for the
> exynos chips.

This is the approach this patch series takes (patch 2/7). The clock
re-muxing logic is handled by a clock driver code. The difference from
what you suggested is that the safe voltage (that may be optionally)
required before doing the re-muxing is handled here in cpufreq-cpu0
driver.

The safe voltage setup can be done in the notifier as you suggested.
But, doing that in cpufreq-cpu0 driver will help other platforms reuse
this feature if required. Also, if done here, the regulator handling
is localized in this driver which otherwise would need to be handled
in two places, cpufreq-cpu0 driver and the clock notifier.

So I tend to prefer the approach in this patch but I am willing to
consider any suggestions. Shawn, it would be helpful if you could let
us know your thoughts on this. I am almost done with testing the v3 of
this series and want to post it so if there are any objections to the
changes in this patch, please let me know.

Thanks,
Thomas.

>
> I'm more a fan of explicitly listing the Exact Steps for the cpu opp
> transition in a separate exynos-specific CPUfreq driver, but that's
> probably an unpopular view.
>
> Regards,
> Mike
>
>> ---
>>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
>>  drivers/cpufreq/cpufreq-cpu0.c                     |   37 +++++++++++++++++--
>>  2 files changed, 40 insertions(+), 4 deletions(-)
>>
>> diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>> index f055515..37453ab 100644
>> --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>> +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>> @@ -19,6 +19,12 @@ Optional properties:
>>  - cooling-min-level:
>>  - cooling-max-level:
>>       Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.
>> +- safe-opp: Certain platforms require that during a opp transition,
>> +  a system should not go below a particular opp level. For such systems,
>> +  this property specifies the minimum opp to be maintained during the
>> +  opp transitions. The safe-opp value is a tuple with first element
>> +  representing the safe frequency and the second element representing the
>> +  safe voltage.
>>
>>  Examples:
>>
>> @@ -36,6 +42,7 @@ cpus {
>>                         396000  950000
>>                         198000  850000
>>                 >;
>> +               safe-opp = <396000 950000>
>>                 clock-latency = <61036>; /* two CLK32 periods */
>>                 #cooling-cells = <2>;
>>                 cooling-min-level = <0>;
>> diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
>> index 0c12ffc..075d3d1 100644
>> --- a/drivers/cpufreq/cpufreq-cpu0.c
>> +++ b/drivers/cpufreq/cpufreq-cpu0.c
>> @@ -27,6 +27,8 @@
>>
>>  static unsigned int transition_latency;
>>  static unsigned int voltage_tolerance; /* in percentage */
>> +static unsigned long safe_frequency;
>> +static unsigned long safe_voltage;
>>
>>  static struct device *cpu_dev;
>>  static struct clk *cpu_clk;
>> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>>                 volt_old = regulator_get_voltage(cpu_reg);
>>         }
>>
>> -       pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
>> +       pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
>>                  old_freq / 1000, volt_old ? volt_old / 1000 : -1,
>>                  new_freq / 1000, volt ? volt / 1000 : -1);
>>
>>         /* scaling up?  scale voltage before frequency */
>> -       if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
>> +       if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
>> +                               new_freq >= safe_frequency) {
>>                 ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>>                 if (ret) {
>>                         pr_err("failed to scale voltage up: %d\n", ret);
>>                         return ret;
>>                 }
>> +       } else if (!IS_ERR(cpu_reg) && old_freq < safe_frequency) {
>> +               /*
>> +                * the scaled up voltage level for the new_freq is lower
>> +                * than the safe voltage level. so set safe_voltage
>> +                * as the intermediate voltage level and revert it
>> +                * back after the frequency has been changed.
>> +                */
>> +               ret = regulator_set_voltage_tol(cpu_reg, safe_voltage, tol);
>> +               if (ret) {
>> +                       pr_err("failed to set safe voltage: %d\n", ret);
>> +                       return ret;
>> +               }
>>         }
>>
>>         ret = clk_set_rate(cpu_clk, freq_exact);
>> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>>         }
>>
>>         /* scaling down?  scale voltage after frequency */
>> -       if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
>> +       if (!IS_ERR(cpu_reg) &&
>> +                       (new_freq < old_freq || new_freq < safe_frequency)) {
>>                 ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>>                 if (ret) {
>>                         pr_err("failed to scale voltage down: %d\n", ret);
>> @@ -116,6 +132,8 @@ static struct cpufreq_driver cpu0_cpufreq_driver = {
>>
>>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
>>  {
>> +       const struct property *prop;
>> +       struct dev_pm_opp *opp;
>>         struct device_node *np;
>>         int ret;
>>
>> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
>>                 goto out_put_node;
>>         }
>>
>> +       prop = of_find_property(np, "safe-opp", NULL);
>> +       if (prop) {
>> +               if (prop->value && (prop->length / sizeof(u32)) == 2) {
>> +                       const __be32 *val;
>> +                       val = prop->value;
>> +                       safe_frequency = be32_to_cpup(val++);
>> +                       safe_voltage = be32_to_cpup(val);
>> +               } else {
>> +                       pr_err("invalid safe-opp level specified\n");
>> +               }
>> +       }
>> +
>>         of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance);
>>
>>         if (of_property_read_u32(np, "clock-latency", &transition_latency))
>>                 transition_latency = CPUFREQ_ETERNAL;
>>
>>         if (!IS_ERR(cpu_reg)) {
>> -               struct dev_pm_opp *opp;
>>                 unsigned long min_uV, max_uV;
>>                 int i;
>>
>> --
>> 1.6.6.rc2
>>
Lukasz Majewski Jan. 28, 2014, 8:17 a.m. UTC | #6
Hi Thomas, Mike

> Hi Mike,
> 
> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette
> <mturquette@linaro.org> wrote:
> > Quoting Thomas Abraham (2014-01-18 04:10:51)
> >> From: Thomas Abraham <thomas.ab@samsung.com>
> >>
> >> On some platforms such as the Samsung Exynos, changing the
> >> frequency of the CPU clock requires changing the frequency of the
> >> PLL that is supplying the CPU clock. To change the frequency of
> >> the PLL, the CPU clock is temporarily reparented to another parent
> >> clock.
> >>
> >> The clock frequency of this temporary parent clock could be much
> >> higher than the clock frequency of the PLL at the time of
> >> reparenting. Due to the temporary increase in the CPU clock speed,
> >> the CPU (and any other components in the CPU clock domain such as
> >> dividers, mux, etc.) have to to be operated at a higher voltage
> >> level, called the safe voltage level. This patch adds optional
> >> support to temporarily switch to a safe voltage level during CPU
> >> frequency transitions.
> >>
> >> Cc: Shawn Guo <shawn.guo@linaro.org>
> >> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
> >
> > I'm not a fan of this change. This corner case should be abstracted
> > away somehow. I had talked to Chander Kayshap previously about
> > handling voltage changes in clock notifier callbacks, which then
> > renders any voltage change as a trivial part of the clock rate
> > transition. That means that this "safe voltage" thing could be
> > handled automagically without any additional code in the CPUfreq
> > driver.
> >
> > There are two nice ways to do this with the clock framework. First
> > is explicit re-parenting with voltage scaling done in the clock
> > rate-change notifiers:
> >
> > clk_set_parent(cpu_clk, temp_parent);
> > /* implicit voltage scaling to "safe voltage" happens above */
> > clk_set_rate(pll, some_rate);
> > clk_set_parent(cpu_clk, pll);
> > /* implicit voltage scaling to nominal OPP voltage happens above */
> >

I must agree with Mike here. In my opinion the above approach is more
compliant with CCF (as I've pointed it out in my other comment - the
cpu_clk has more than one parent and we could switch between them when
needed). 

> > The above sequence would require a separate exnyos CPUfreq driver,
> > due to the added clk_set_parent logic.
> >
> > The second way to do this is to abstract the clk re-muxing logic out
> > into the clk driver, which would allow cpufreq-cpu0 to be used for
> > the exynos chips.
> 
> This is the approach this patch series takes (patch 2/7). The clock
> re-muxing logic is handled by a clock driver code. The difference from
> what you suggested is that the safe voltage (that may be optionally)
> required before doing the re-muxing is handled here in cpufreq-cpu0
> driver.
> 
> The safe voltage setup can be done in the notifier as you suggested.

If the clk_set_parent() approach is not suitable, then cannot we
consider using the one from highbank-cpufreq.c?

Here we have cpufreq-cpu0.c which sets voltage of the cpu_clk. 
In the highbank-cpufreq.c there are clock notifiers to change the
voltage.

Cannot Exynos reuse such approach? Why shall we pollute cpufreq-cpu0.c
with another solution?

> But, doing that in cpufreq-cpu0 driver will help other platforms reuse
> this feature if required. Also, if done here, the regulator handling
> is localized in this driver which otherwise would need to be handled
> in two places, cpufreq-cpu0 driver and the clock notifier.

I think that there is a logical distinction between setting voltage for
cpufreq-cpu0 related clock and increasing voltage of reparented clock.

The former fits naturally to cpufreq-cpu0, when the latter seems like
some corner case (as Mike pointed out) for Exynos. 

> 
> So I tend to prefer the approach in this patch but I am willing to
> consider any suggestions. 

Thomas, what do you think about highbank-cpufreq.c approach (with
using clock notifiers)? 

Do you think, that it is feasible to reuse it with Exynos?

> Shawn, it would be helpful if you could let
> us know your thoughts on this. I am almost done with testing the v3 of
> this series and want to post it so if there are any objections to the
> changes in this patch, please let me know.
> 
> Thanks,
> Thomas.
> 
> >
> > I'm more a fan of explicitly listing the Exact Steps for the cpu opp
> > transition in a separate exynos-specific CPUfreq driver, but that's
> > probably an unpopular view.
> >
> > Regards,
> > Mike
> >
> >> ---
> >>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
> >>  drivers/cpufreq/cpufreq-cpu0.c                     |   37
> >> +++++++++++++++++-- 2 files changed, 40 insertions(+), 4
> >> deletions(-)
> >>
> >> diff --git
> >> a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> >> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt index
> >> f055515..37453ab 100644 ---
> >> a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt +++
> >> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt @@
> >> -19,6 +19,12 @@ Optional properties:
> >>  - cooling-min-level:
> >>  - cooling-max-level:
> >>       Please refer to
> >> Documentation/devicetree/bindings/thermal/thermal.txt. +-
> >> safe-opp: Certain platforms require that during a opp transition,
> >> +  a system should not go below a particular opp level. For such
> >> systems,
> >> +  this property specifies the minimum opp to be maintained during
> >> the
> >> +  opp transitions. The safe-opp value is a tuple with first
> >> element
> >> +  representing the safe frequency and the second element
> >> representing the
> >> +  safe voltage.
> >>
> >>  Examples:
> >>
> >> @@ -36,6 +42,7 @@ cpus {
> >>                         396000  950000
> >>                         198000  850000
> >>                 >;
> >> +               safe-opp = <396000 950000>
> >>                 clock-latency = <61036>; /* two CLK32 periods */
> >>                 #cooling-cells = <2>;
> >>                 cooling-min-level = <0>;
> >> diff --git a/drivers/cpufreq/cpufreq-cpu0.c
> >> b/drivers/cpufreq/cpufreq-cpu0.c index 0c12ffc..075d3d1 100644
> >> --- a/drivers/cpufreq/cpufreq-cpu0.c
> >> +++ b/drivers/cpufreq/cpufreq-cpu0.c
> >> @@ -27,6 +27,8 @@
> >>
> >>  static unsigned int transition_latency;
> >>  static unsigned int voltage_tolerance; /* in percentage */
> >> +static unsigned long safe_frequency;
> >> +static unsigned long safe_voltage;
> >>
> >>  static struct device *cpu_dev;
> >>  static struct clk *cpu_clk;
> >> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct
> >> cpufreq_policy *policy, unsigned int index) volt_old =
> >> regulator_get_voltage(cpu_reg); }
> >>
> >> -       pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
> >> +       pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
> >>                  old_freq / 1000, volt_old ? volt_old / 1000 : -1,
> >>                  new_freq / 1000, volt ? volt / 1000 : -1);
> >>
> >>         /* scaling up?  scale voltage before frequency */
> >> -       if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
> >> +       if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
> >> +                               new_freq >= safe_frequency) {
> >>                 ret = regulator_set_voltage_tol(cpu_reg, volt,
> >> tol); if (ret) {
> >>                         pr_err("failed to scale voltage up: %d\n",
> >> ret); return ret;
> >>                 }
> >> +       } else if (!IS_ERR(cpu_reg) && old_freq < safe_frequency) {
> >> +               /*
> >> +                * the scaled up voltage level for the new_freq is
> >> lower
> >> +                * than the safe voltage level. so set safe_voltage
> >> +                * as the intermediate voltage level and revert it
> >> +                * back after the frequency has been changed.
> >> +                */
> >> +               ret = regulator_set_voltage_tol(cpu_reg,
> >> safe_voltage, tol);
> >> +               if (ret) {
> >> +                       pr_err("failed to set safe voltage: %d\n",
> >> ret);
> >> +                       return ret;
> >> +               }
> >>         }
> >>
> >>         ret = clk_set_rate(cpu_clk, freq_exact);
> >> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct
> >> cpufreq_policy *policy, unsigned int index) }
> >>
> >>         /* scaling down?  scale voltage after frequency */
> >> -       if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
> >> +       if (!IS_ERR(cpu_reg) &&
> >> +                       (new_freq < old_freq || new_freq <
> >> safe_frequency)) { ret = regulator_set_voltage_tol(cpu_reg, volt,
> >> tol); if (ret) {
> >>                         pr_err("failed to scale voltage down:
> >> %d\n", ret); @@ -116,6 +132,8 @@ static struct cpufreq_driver
> >> cpu0_cpufreq_driver = {
> >>
> >>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
> >>  {
> >> +       const struct property *prop;
> >> +       struct dev_pm_opp *opp;
> >>         struct device_node *np;
> >>         int ret;
> >>
> >> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct
> >> platform_device *pdev) goto out_put_node;
> >>         }
> >>
> >> +       prop = of_find_property(np, "safe-opp", NULL);
> >> +       if (prop) {
> >> +               if (prop->value && (prop->length / sizeof(u32)) ==
> >> 2) {
> >> +                       const __be32 *val;
> >> +                       val = prop->value;
> >> +                       safe_frequency = be32_to_cpup(val++);
> >> +                       safe_voltage = be32_to_cpup(val);
> >> +               } else {
> >> +                       pr_err("invalid safe-opp level
> >> specified\n");
> >> +               }
> >> +       }
> >> +
> >>         of_property_read_u32(np, "voltage-tolerance",
> >> &voltage_tolerance);
> >>
> >>         if (of_property_read_u32(np, "clock-latency",
> >> &transition_latency)) transition_latency = CPUFREQ_ETERNAL;
> >>
> >>         if (!IS_ERR(cpu_reg)) {
> >> -               struct dev_pm_opp *opp;
> >>                 unsigned long min_uV, max_uV;
> >>                 int i;
> >>
> >> --
> >> 1.6.6.rc2
> >>
Thomas Abraham Jan. 28, 2014, 11:36 a.m. UTC | #7
Hi Lukasz,

On Tue, Jan 28, 2014 at 1:47 PM, Lukasz Majewski <l.majewski@samsung.com> wrote:
> Hi Thomas, Mike
>
>> Hi Mike,
>>
>> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette
>> <mturquette@linaro.org> wrote:
>> > Quoting Thomas Abraham (2014-01-18 04:10:51)
>> >> From: Thomas Abraham <thomas.ab@samsung.com>
>> >>
>> >> On some platforms such as the Samsung Exynos, changing the
>> >> frequency of the CPU clock requires changing the frequency of the
>> >> PLL that is supplying the CPU clock. To change the frequency of
>> >> the PLL, the CPU clock is temporarily reparented to another parent
>> >> clock.
>> >>
>> >> The clock frequency of this temporary parent clock could be much
>> >> higher than the clock frequency of the PLL at the time of
>> >> reparenting. Due to the temporary increase in the CPU clock speed,
>> >> the CPU (and any other components in the CPU clock domain such as
>> >> dividers, mux, etc.) have to to be operated at a higher voltage
>> >> level, called the safe voltage level. This patch adds optional
>> >> support to temporarily switch to a safe voltage level during CPU
>> >> frequency transitions.
>> >>
>> >> Cc: Shawn Guo <shawn.guo@linaro.org>
>> >> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>> >
>> > I'm not a fan of this change. This corner case should be abstracted
>> > away somehow. I had talked to Chander Kayshap previously about
>> > handling voltage changes in clock notifier callbacks, which then
>> > renders any voltage change as a trivial part of the clock rate
>> > transition. That means that this "safe voltage" thing could be
>> > handled automagically without any additional code in the CPUfreq
>> > driver.
>> >
>> > There are two nice ways to do this with the clock framework. First
>> > is explicit re-parenting with voltage scaling done in the clock
>> > rate-change notifiers:
>> >
>> > clk_set_parent(cpu_clk, temp_parent);
>> > /* implicit voltage scaling to "safe voltage" happens above */
>> > clk_set_rate(pll, some_rate);
>> > clk_set_parent(cpu_clk, pll);
>> > /* implicit voltage scaling to nominal OPP voltage happens above */
>> >
>
> I must agree with Mike here. In my opinion the above approach is more
> compliant with CCF (as I've pointed it out in my other comment - the
> cpu_clk has more than one parent and we could switch between them when
> needed).

The mux which is used to re-parent is part of the larger opaque cpu
clock type (more like the composite clock). So I am not sure how this
isn't compliant with ccf.

>
>> > The above sequence would require a separate exnyos CPUfreq driver,
>> > due to the added clk_set_parent logic.
>> >
>> > The second way to do this is to abstract the clk re-muxing logic out
>> > into the clk driver, which would allow cpufreq-cpu0 to be used for
>> > the exynos chips.
>>
>> This is the approach this patch series takes (patch 2/7). The clock
>> re-muxing logic is handled by a clock driver code. The difference from
>> what you suggested is that the safe voltage (that may be optionally)
>> required before doing the re-muxing is handled here in cpufreq-cpu0
>> driver.
>>
>> The safe voltage setup can be done in the notifier as you suggested.
>
> If the clk_set_parent() approach is not suitable, then cannot we
> consider using the one from highbank-cpufreq.c?
>
> Here we have cpufreq-cpu0.c which sets voltage of the cpu_clk.
> In the highbank-cpufreq.c there are clock notifiers to change the
> voltage.
>
> Cannot Exynos reuse such approach? Why shall we pollute cpufreq-cpu0.c
> with another solution?

The highbank-cpufreq.c file was introduced because platforms using
this driver did not have the usual regulator to control the voltage.
The first commit of this driver explains this (copied below).

"Highbank processors depend on the external ECME to perform voltage
management based on a requested frequency. Communication between the
A9 cores and the ECME happens over the pl320 IPC channel."

So those platforms had no choice but to use an alternative approach to
control the voltage (and reuse cpufreq-cpu0 as much as possible). The
case with exynos is a different one. cpufreq-cpu0 is fully re-usable
for exynos with the additional support for "safe voltage". If we agree
that there might be existing or future platforms with single
clock/voltage rail that require the "safe voltage" feature, then
adding "safe voltage" support in cpufreq-cpu0 driver seems to be the
right approach.

>
>> But, doing that in cpufreq-cpu0 driver will help other platforms reuse
>> this feature if required. Also, if done here, the regulator handling
>> is localized in this driver which otherwise would need to be handled
>> in two places, cpufreq-cpu0 driver and the clock notifier.
>
> I think that there is a logical distinction between setting voltage for
> cpufreq-cpu0 related clock and increasing voltage of reparented clock.
>
> The former fits naturally to cpufreq-cpu0, when the latter seems like
> some corner case (as Mike pointed out) for Exynos.

Agreed, it is a corner case. But for this corner case, we are
performing some additional actions on the same regulator which is used
in the normal functioning of the driver.

>
>>
>> So I tend to prefer the approach in this patch but I am willing to
>> consider any suggestions.
>
> Thomas, what do you think about highbank-cpufreq.c approach (with
> using clock notifiers)?

I have made a related comment on this above.

>
> Do you think, that it is feasible to reuse it with Exynos?

highbank cpufreq driver is intended for a different purpose so I don't
think it can be reused for exynos. Yes, we can make exynos specific
hacks into highbank driver but how would that be better over the
approach in this patch?

>
>> Shawn, it would be helpful if you could let
>> us know your thoughts on this. I am almost done with testing the v3 of
>> this series and want to post it so if there are any objections to the
>> changes in this patch, please let me know.
>>
>> Thanks,
>> Thomas.
>>
>> >
>> > I'm more a fan of explicitly listing the Exact Steps for the cpu opp
>> > transition in a separate exynos-specific CPUfreq driver, but that's
>> > probably an unpopular view.
>> >
>> > Regards,
>> > Mike
>> >
>> >> ---
>> >>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
>> >>  drivers/cpufreq/cpufreq-cpu0.c                     |   37
>> >> +++++++++++++++++-- 2 files changed, 40 insertions(+), 4
>> >> deletions(-)
>> >>
>> >> diff --git
>> >> a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>> >> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt index
>> >> f055515..37453ab 100644 ---
>> >> a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt +++
>> >> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt @@
>> >> -19,6 +19,12 @@ Optional properties:
>> >>  - cooling-min-level:
>> >>  - cooling-max-level:
>> >>       Please refer to
>> >> Documentation/devicetree/bindings/thermal/thermal.txt. +-
>> >> safe-opp: Certain platforms require that during a opp transition,
>> >> +  a system should not go below a particular opp level. For such
>> >> systems,
>> >> +  this property specifies the minimum opp to be maintained during
>> >> the
>> >> +  opp transitions. The safe-opp value is a tuple with first
>> >> element
>> >> +  representing the safe frequency and the second element
>> >> representing the
>> >> +  safe voltage.
>> >>
>> >>  Examples:
>> >>
>> >> @@ -36,6 +42,7 @@ cpus {
>> >>                         396000  950000
>> >>                         198000  850000
>> >>                 >;
>> >> +               safe-opp = <396000 950000>
>> >>                 clock-latency = <61036>; /* two CLK32 periods */
>> >>                 #cooling-cells = <2>;
>> >>                 cooling-min-level = <0>;
>> >> diff --git a/drivers/cpufreq/cpufreq-cpu0.c
>> >> b/drivers/cpufreq/cpufreq-cpu0.c index 0c12ffc..075d3d1 100644
>> >> --- a/drivers/cpufreq/cpufreq-cpu0.c
>> >> +++ b/drivers/cpufreq/cpufreq-cpu0.c
>> >> @@ -27,6 +27,8 @@
>> >>
>> >>  static unsigned int transition_latency;
>> >>  static unsigned int voltage_tolerance; /* in percentage */
>> >> +static unsigned long safe_frequency;
>> >> +static unsigned long safe_voltage;
>> >>
>> >>  static struct device *cpu_dev;
>> >>  static struct clk *cpu_clk;
>> >> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct
>> >> cpufreq_policy *policy, unsigned int index) volt_old =
>> >> regulator_get_voltage(cpu_reg); }
>> >>
>> >> -       pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
>> >> +       pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
>> >>                  old_freq / 1000, volt_old ? volt_old / 1000 : -1,
>> >>                  new_freq / 1000, volt ? volt / 1000 : -1);
>> >>
>> >>         /* scaling up?  scale voltage before frequency */
>> >> -       if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
>> >> +       if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
>> >> +                               new_freq >= safe_frequency) {
>> >>                 ret = regulator_set_voltage_tol(cpu_reg, volt,
>> >> tol); if (ret) {
>> >>                         pr_err("failed to scale voltage up: %d\n",
>> >> ret); return ret;
>> >>                 }
>> >> +       } else if (!IS_ERR(cpu_reg) && old_freq < safe_frequency) {
>> >> +               /*
>> >> +                * the scaled up voltage level for the new_freq is
>> >> lower
>> >> +                * than the safe voltage level. so set safe_voltage
>> >> +                * as the intermediate voltage level and revert it
>> >> +                * back after the frequency has been changed.
>> >> +                */
>> >> +               ret = regulator_set_voltage_tol(cpu_reg,
>> >> safe_voltage, tol);
>> >> +               if (ret) {
>> >> +                       pr_err("failed to set safe voltage: %d\n",
>> >> ret);
>> >> +                       return ret;
>> >> +               }
>> >>         }
>> >>
>> >>         ret = clk_set_rate(cpu_clk, freq_exact);
>> >> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct
>> >> cpufreq_policy *policy, unsigned int index) }
>> >>
>> >>         /* scaling down?  scale voltage after frequency */
>> >> -       if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
>> >> +       if (!IS_ERR(cpu_reg) &&
>> >> +                       (new_freq < old_freq || new_freq <
>> >> safe_frequency)) { ret = regulator_set_voltage_tol(cpu_reg, volt,
>> >> tol); if (ret) {
>> >>                         pr_err("failed to scale voltage down:
>> >> %d\n", ret); @@ -116,6 +132,8 @@ static struct cpufreq_driver
>> >> cpu0_cpufreq_driver = {
>> >>
>> >>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
>> >>  {
>> >> +       const struct property *prop;
>> >> +       struct dev_pm_opp *opp;
>> >>         struct device_node *np;
>> >>         int ret;
>> >>
>> >> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct
>> >> platform_device *pdev) goto out_put_node;
>> >>         }
>> >>
>> >> +       prop = of_find_property(np, "safe-opp", NULL);
>> >> +       if (prop) {
>> >> +               if (prop->value && (prop->length / sizeof(u32)) ==
>> >> 2) {
>> >> +                       const __be32 *val;
>> >> +                       val = prop->value;
>> >> +                       safe_frequency = be32_to_cpup(val++);
>> >> +                       safe_voltage = be32_to_cpup(val);
>> >> +               } else {
>> >> +                       pr_err("invalid safe-opp level
>> >> specified\n");
>> >> +               }
>> >> +       }
>> >> +
>> >>         of_property_read_u32(np, "voltage-tolerance",
>> >> &voltage_tolerance);
>> >>
>> >>         if (of_property_read_u32(np, "clock-latency",
>> >> &transition_latency)) transition_latency = CPUFREQ_ETERNAL;
>> >>
>> >>         if (!IS_ERR(cpu_reg)) {
>> >> -               struct dev_pm_opp *opp;
>> >>                 unsigned long min_uV, max_uV;
>> >>                 int i;
>> >>
>> >> --
>> >> 1.6.6.rc2
>> >>
>
>
>
> --
> Best regards,
>
> Lukasz Majewski
>
> Samsung R&D Institute Poland (SRPOL) | Linux Platform Group

Thanks for your comments Lukasz.

Regards,
Thomas.
Shawn Guo Jan. 28, 2014, 11:49 a.m. UTC | #8
On Tue, Jan 28, 2014 at 11:00:29AM +0530, Thomas Abraham wrote:
> Hi Mike,
> 
> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette <mturquette@linaro.org> wrote:
> > Quoting Thomas Abraham (2014-01-18 04:10:51)
> >> From: Thomas Abraham <thomas.ab@samsung.com>
> >>
> >> On some platforms such as the Samsung Exynos, changing the frequency
> >> of the CPU clock requires changing the frequency of the PLL that is
> >> supplying the CPU clock. To change the frequency of the PLL, the CPU
> >> clock is temporarily reparented to another parent clock.
> >>
> >> The clock frequency of this temporary parent clock could be much higher
> >> than the clock frequency of the PLL at the time of reparenting. Due
> >> to the temporary increase in the CPU clock speed, the CPU (and any other
> >> components in the CPU clock domain such as dividers, mux, etc.) have to
> >> to be operated at a higher voltage level, called the safe voltage level.
> >> This patch adds optional support to temporarily switch to a safe voltage
> >> level during CPU frequency transitions.
> >>
> >> Cc: Shawn Guo <shawn.guo@linaro.org>
> >> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
> >
> > I'm not a fan of this change. This corner case should be abstracted away
> > somehow. I had talked to Chander Kayshap previously about handling
> > voltage changes in clock notifier callbacks, which then renders any
> > voltage change as a trivial part of the clock rate transition. That
> > means that this "safe voltage" thing could be handled automagically
> > without any additional code in the CPUfreq driver.
> >
> > There are two nice ways to do this with the clock framework. First is
> > explicit re-parenting with voltage scaling done in the clock rate-change
> > notifiers:
> >
> > clk_set_parent(cpu_clk, temp_parent);
> > /* implicit voltage scaling to "safe voltage" happens above */
> > clk_set_rate(pll, some_rate);
> > clk_set_parent(cpu_clk, pll);
> > /* implicit voltage scaling to nominal OPP voltage happens above */
> >
> > The above sequence would require a separate exnyos CPUfreq driver, due
> > to the added clk_set_parent logic.
> >
> > The second way to do this is to abstract the clk re-muxing logic out
> > into the clk driver, which would allow cpufreq-cpu0 to be used for the
> > exynos chips.
> 
> This is the approach this patch series takes (patch 2/7). The clock
> re-muxing logic is handled by a clock driver code. The difference from
> what you suggested is that the safe voltage (that may be optionally)
> required before doing the re-muxing is handled here in cpufreq-cpu0
> driver.
> 
> The safe voltage setup can be done in the notifier as you suggested.
> But, doing that in cpufreq-cpu0 driver will help other platforms reuse
> this feature if required. Also, if done here, the regulator handling
> is localized in this driver which otherwise would need to be handled
> in two places, cpufreq-cpu0 driver and the clock notifier.
> 
> So I tend to prefer the approach in this patch but I am willing to
> consider any suggestions. Shawn, it would be helpful if you could let
> us know your thoughts on this. I am almost done with testing the v3 of
> this series and want to post it so if there are any objections to the
> changes in this patch, please let me know.

To me, it's the best that we reuse cpufreq-cpu0 for exynos without any
changes on cpufreq-cpu0 driver ;)

Shawn
Thomas Abraham Jan. 28, 2014, 12:47 p.m. UTC | #9
On Tue, Jan 28, 2014 at 5:19 PM, Shawn Guo <shawn.guo@linaro.org> wrote:
> On Tue, Jan 28, 2014 at 11:00:29AM +0530, Thomas Abraham wrote:
>> Hi Mike,
>>
>> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette <mturquette@linaro.org> wrote:
>> > Quoting Thomas Abraham (2014-01-18 04:10:51)
>> >> From: Thomas Abraham <thomas.ab@samsung.com>
>> >>
>> >> On some platforms such as the Samsung Exynos, changing the frequency
>> >> of the CPU clock requires changing the frequency of the PLL that is
>> >> supplying the CPU clock. To change the frequency of the PLL, the CPU
>> >> clock is temporarily reparented to another parent clock.
>> >>
>> >> The clock frequency of this temporary parent clock could be much higher
>> >> than the clock frequency of the PLL at the time of reparenting. Due
>> >> to the temporary increase in the CPU clock speed, the CPU (and any other
>> >> components in the CPU clock domain such as dividers, mux, etc.) have to
>> >> to be operated at a higher voltage level, called the safe voltage level.
>> >> This patch adds optional support to temporarily switch to a safe voltage
>> >> level during CPU frequency transitions.
>> >>
>> >> Cc: Shawn Guo <shawn.guo@linaro.org>
>> >> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>> >
>> > I'm not a fan of this change. This corner case should be abstracted away
>> > somehow. I had talked to Chander Kayshap previously about handling
>> > voltage changes in clock notifier callbacks, which then renders any
>> > voltage change as a trivial part of the clock rate transition. That
>> > means that this "safe voltage" thing could be handled automagically
>> > without any additional code in the CPUfreq driver.
>> >
>> > There are two nice ways to do this with the clock framework. First is
>> > explicit re-parenting with voltage scaling done in the clock rate-change
>> > notifiers:
>> >
>> > clk_set_parent(cpu_clk, temp_parent);
>> > /* implicit voltage scaling to "safe voltage" happens above */
>> > clk_set_rate(pll, some_rate);
>> > clk_set_parent(cpu_clk, pll);
>> > /* implicit voltage scaling to nominal OPP voltage happens above */
>> >
>> > The above sequence would require a separate exnyos CPUfreq driver, due
>> > to the added clk_set_parent logic.
>> >
>> > The second way to do this is to abstract the clk re-muxing logic out
>> > into the clk driver, which would allow cpufreq-cpu0 to be used for the
>> > exynos chips.
>>
>> This is the approach this patch series takes (patch 2/7). The clock
>> re-muxing logic is handled by a clock driver code. The difference from
>> what you suggested is that the safe voltage (that may be optionally)
>> required before doing the re-muxing is handled here in cpufreq-cpu0
>> driver.
>>
>> The safe voltage setup can be done in the notifier as you suggested.
>> But, doing that in cpufreq-cpu0 driver will help other platforms reuse
>> this feature if required. Also, if done here, the regulator handling
>> is localized in this driver which otherwise would need to be handled
>> in two places, cpufreq-cpu0 driver and the clock notifier.
>>
>> So I tend to prefer the approach in this patch but I am willing to
>> consider any suggestions. Shawn, it would be helpful if you could let
>> us know your thoughts on this. I am almost done with testing the v3 of
>> this series and want to post it so if there are any objections to the
>> changes in this patch, please let me know.
>
> To me, it's the best that we reuse cpufreq-cpu0 for exynos without any
> changes on cpufreq-cpu0 driver ;)

Okay, so that leaves us with the only option of handling "safe
voltage" using clock notifier callbacks as suggested by Mike. So there
are two options - a samsung specific cpufreq driver handling the clock
notifiers (and reusing cpufreq-cpu0 driver) or the samsung clock
driver handles the clock notifiers (and reusing cpufreq-cpu0 driver).

With the second option, the clock driver will have to handle the
regulator lookup from the cpu node, deferring regulator lookup until
the cpu and regulator devices are registered and using regulator api
inside clock driver. This seems like too much code to just manage the
"safe voltage".

So how about the first option of samsung specific cpufreq driver. If
there are any other alternatives, please let me know.

Thanks,
Thomas.

>
> Shawn
>
Lukasz Majewski Jan. 28, 2014, 3:06 p.m. UTC | #10
Hi Thomas,

> Hi Lukasz,
> 
> On Tue, Jan 28, 2014 at 1:47 PM, Lukasz Majewski
> <l.majewski@samsung.com> wrote:
> > Hi Thomas, Mike
> >
> >> Hi Mike,
> >>
> >> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette
> >> <mturquette@linaro.org> wrote:
> >> > Quoting Thomas Abraham (2014-01-18 04:10:51)
> >> >> From: Thomas Abraham <thomas.ab@samsung.com>
> >> >>
> >> >> On some platforms such as the Samsung Exynos, changing the
> >> >> frequency of the CPU clock requires changing the frequency of
> >> >> the PLL that is supplying the CPU clock. To change the
> >> >> frequency of the PLL, the CPU clock is temporarily reparented
> >> >> to another parent clock.
> >> >>
> >> >> The clock frequency of this temporary parent clock could be much
> >> >> higher than the clock frequency of the PLL at the time of
> >> >> reparenting. Due to the temporary increase in the CPU clock
> >> >> speed, the CPU (and any other components in the CPU clock
> >> >> domain such as dividers, mux, etc.) have to to be operated at a
> >> >> higher voltage level, called the safe voltage level. This patch
> >> >> adds optional support to temporarily switch to a safe voltage
> >> >> level during CPU frequency transitions.
> >> >>
> >> >> Cc: Shawn Guo <shawn.guo@linaro.org>
> >> >> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
> >> >
> >> > I'm not a fan of this change. This corner case should be
> >> > abstracted away somehow. I had talked to Chander Kayshap
> >> > previously about handling voltage changes in clock notifier
> >> > callbacks, which then renders any voltage change as a trivial
> >> > part of the clock rate transition. That means that this "safe
> >> > voltage" thing could be handled automagically without any
> >> > additional code in the CPUfreq driver.
> >> >
> >> > There are two nice ways to do this with the clock framework.
> >> > First is explicit re-parenting with voltage scaling done in the
> >> > clock rate-change notifiers:
> >> >
> >> > clk_set_parent(cpu_clk, temp_parent);
> >> > /* implicit voltage scaling to "safe voltage" happens above */
> >> > clk_set_rate(pll, some_rate);
> >> > clk_set_parent(cpu_clk, pll);
> >> > /* implicit voltage scaling to nominal OPP voltage happens above
> >> > */
> >> >
> >
> > I must agree with Mike here. In my opinion the above approach is
> > more compliant with CCF (as I've pointed it out in my other comment
> > - the cpu_clk has more than one parent and we could switch between
> > them when needed).
> 
> The mux which is used to re-parent is part of the larger opaque cpu
> clock type (more like the composite clock). So I am not sure how this
> isn't compliant with ccf.

My point here is to use the clk_set_parent() explicitly instead of
changing the mux with writing values directly to registers.

However, I'm also aware, that we must reparent quickly. so I'm OK
with your approach.

> 
> >
> >> > The above sequence would require a separate exnyos CPUfreq
> >> > driver, due to the added clk_set_parent logic.
> >> >
> >> > The second way to do this is to abstract the clk re-muxing logic
> >> > out into the clk driver, which would allow cpufreq-cpu0 to be
> >> > used for the exynos chips.
> >>
> >> This is the approach this patch series takes (patch 2/7). The clock
> >> re-muxing logic is handled by a clock driver code. The difference
> >> from what you suggested is that the safe voltage (that may be
> >> optionally) required before doing the re-muxing is handled here in
> >> cpufreq-cpu0 driver.
> >>
> >> The safe voltage setup can be done in the notifier as you
> >> suggested.
> >
> > If the clk_set_parent() approach is not suitable, then cannot we
> > consider using the one from highbank-cpufreq.c?
> >
> > Here we have cpufreq-cpu0.c which sets voltage of the cpu_clk.
> > In the highbank-cpufreq.c there are clock notifiers to change the
> > voltage.
> >
> > Cannot Exynos reuse such approach? Why shall we pollute
> > cpufreq-cpu0.c with another solution?
> 
> The highbank-cpufreq.c file was introduced because platforms using
> this driver did not have the usual regulator to control the voltage.
> The first commit of this driver explains this (copied below).
> 
> "Highbank processors depend on the external ECME to perform voltage
> management based on a requested frequency. Communication between the
> A9 cores and the ECME happens over the pl320 IPC channel."
> 
> So those platforms had no choice but to use an alternative approach to
> control the voltage (and reuse cpufreq-cpu0 as much as possible). 
> The
> case with exynos is a different one.

Highbank needs to set voltage via IPC, Exynos needs to adjust voltage
when reparenting.

Both can be recognized as unusual cases. That is why I asked if we
could reuse the same approach for Exynos.

> cpufreq-cpu0 is fully re-usable
> for exynos with the additional support for "safe voltage". If we agree
> that there might be existing or future platforms with single
> clock/voltage rail that require the "safe voltage" feature, then
> adding "safe voltage" support in cpufreq-cpu0 driver seems to be the
> right approach.

I think that Shawn's opinion will be final here.

> 
> >
> >> But, doing that in cpufreq-cpu0 driver will help other platforms
> >> reuse this feature if required. Also, if done here, the regulator
> >> handling is localized in this driver which otherwise would need to
> >> be handled in two places, cpufreq-cpu0 driver and the clock
> >> notifier.
> >
> > I think that there is a logical distinction between setting voltage
> > for cpufreq-cpu0 related clock and increasing voltage of reparented
> > clock.
> >
> > The former fits naturally to cpufreq-cpu0, when the latter seems
> > like some corner case (as Mike pointed out) for Exynos.
> 
> Agreed, it is a corner case. But for this corner case, we are
> performing some additional actions on the same regulator which is used
> in the normal functioning of the driver.
> 
> >
> >>
> >> So I tend to prefer the approach in this patch but I am willing to
> >> consider any suggestions.
> >
> > Thomas, what do you think about highbank-cpufreq.c approach (with
> > using clock notifiers)?
> 
> I have made a related comment on this above.
> 
> >
> > Do you think, that it is feasible to reuse it with Exynos?
> 
> highbank cpufreq driver is intended for a different purpose so I don't
> think it can be reused for exynos. Yes, we can make exynos specific
> hacks into highbank driver but how would that be better over the
> approach in this patch?

I think, that I was misunderstood here. I wanted to ask if we could
reuse the clk notifier approach.

> 
> >
> >> Shawn, it would be helpful if you could let
> >> us know your thoughts on this. I am almost done with testing the
> >> v3 of this series and want to post it so if there are any
> >> objections to the changes in this patch, please let me know.
> >>
> >> Thanks,
> >> Thomas.
> >>
> >> >
> >> > I'm more a fan of explicitly listing the Exact Steps for the cpu
> >> > opp transition in a separate exynos-specific CPUfreq driver, but
> >> > that's probably an unpopular view.
> >> >
> >> > Regards,
> >> > Mike
> >> >
> >> >> ---
> >> >>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
> >> >>  drivers/cpufreq/cpufreq-cpu0.c                     |   37
> >> >> +++++++++++++++++-- 2 files changed, 40 insertions(+), 4
> >> >> deletions(-)
> >> >>
> >> >> diff --git
> >> >> a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> >> >> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
> >> >> index f055515..37453ab 100644 ---
> >> >> a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt +++
> >> >> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt @@
> >> >> -19,6 +19,12 @@ Optional properties:
> >> >>  - cooling-min-level:
> >> >>  - cooling-max-level:
> >> >>       Please refer to
> >> >> Documentation/devicetree/bindings/thermal/thermal.txt. +-
> >> >> safe-opp: Certain platforms require that during a opp
> >> >> transition,
> >> >> +  a system should not go below a particular opp level. For such
> >> >> systems,
> >> >> +  this property specifies the minimum opp to be maintained
> >> >> during the
> >> >> +  opp transitions. The safe-opp value is a tuple with first
> >> >> element
> >> >> +  representing the safe frequency and the second element
> >> >> representing the
> >> >> +  safe voltage.
> >> >>
> >> >>  Examples:
> >> >>
> >> >> @@ -36,6 +42,7 @@ cpus {
> >> >>                         396000  950000
> >> >>                         198000  850000
> >> >>                 >;
> >> >> +               safe-opp = <396000 950000>
> >> >>                 clock-latency = <61036>; /* two CLK32 periods */
> >> >>                 #cooling-cells = <2>;
> >> >>                 cooling-min-level = <0>;
> >> >> diff --git a/drivers/cpufreq/cpufreq-cpu0.c
> >> >> b/drivers/cpufreq/cpufreq-cpu0.c index 0c12ffc..075d3d1 100644
> >> >> --- a/drivers/cpufreq/cpufreq-cpu0.c
> >> >> +++ b/drivers/cpufreq/cpufreq-cpu0.c
> >> >> @@ -27,6 +27,8 @@
> >> >>
> >> >>  static unsigned int transition_latency;
> >> >>  static unsigned int voltage_tolerance; /* in percentage */
> >> >> +static unsigned long safe_frequency;
> >> >> +static unsigned long safe_voltage;
> >> >>
> >> >>  static struct device *cpu_dev;
> >> >>  static struct clk *cpu_clk;
> >> >> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct
> >> >> cpufreq_policy *policy, unsigned int index) volt_old =
> >> >> regulator_get_voltage(cpu_reg); }
> >> >>
> >> >> -       pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
> >> >> +       pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
> >> >>                  old_freq / 1000, volt_old ? volt_old / 1000 :
> >> >> -1, new_freq / 1000, volt ? volt / 1000 : -1);
> >> >>
> >> >>         /* scaling up?  scale voltage before frequency */
> >> >> -       if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
> >> >> +       if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
> >> >> +                               new_freq >= safe_frequency) {
> >> >>                 ret = regulator_set_voltage_tol(cpu_reg, volt,
> >> >> tol); if (ret) {
> >> >>                         pr_err("failed to scale voltage up:
> >> >> %d\n", ret); return ret;
> >> >>                 }
> >> >> +       } else if (!IS_ERR(cpu_reg) && old_freq <
> >> >> safe_frequency) {
> >> >> +               /*
> >> >> +                * the scaled up voltage level for the new_freq
> >> >> is lower
> >> >> +                * than the safe voltage level. so set
> >> >> safe_voltage
> >> >> +                * as the intermediate voltage level and revert
> >> >> it
> >> >> +                * back after the frequency has been changed.
> >> >> +                */
> >> >> +               ret = regulator_set_voltage_tol(cpu_reg,
> >> >> safe_voltage, tol);
> >> >> +               if (ret) {
> >> >> +                       pr_err("failed to set safe voltage:
> >> >> %d\n", ret);
> >> >> +                       return ret;
> >> >> +               }
> >> >>         }
> >> >>
> >> >>         ret = clk_set_rate(cpu_clk, freq_exact);
> >> >> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct
> >> >> cpufreq_policy *policy, unsigned int index) }
> >> >>
> >> >>         /* scaling down?  scale voltage after frequency */
> >> >> -       if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
> >> >> +       if (!IS_ERR(cpu_reg) &&
> >> >> +                       (new_freq < old_freq || new_freq <
> >> >> safe_frequency)) { ret = regulator_set_voltage_tol(cpu_reg,
> >> >> volt, tol); if (ret) {
> >> >>                         pr_err("failed to scale voltage down:
> >> >> %d\n", ret); @@ -116,6 +132,8 @@ static struct cpufreq_driver
> >> >> cpu0_cpufreq_driver = {
> >> >>
> >> >>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
> >> >>  {
> >> >> +       const struct property *prop;
> >> >> +       struct dev_pm_opp *opp;
> >> >>         struct device_node *np;
> >> >>         int ret;
> >> >>
> >> >> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct
> >> >> platform_device *pdev) goto out_put_node;
> >> >>         }
> >> >>
> >> >> +       prop = of_find_property(np, "safe-opp", NULL);
> >> >> +       if (prop) {
> >> >> +               if (prop->value && (prop->length / sizeof(u32))
> >> >> == 2) {
> >> >> +                       const __be32 *val;
> >> >> +                       val = prop->value;
> >> >> +                       safe_frequency = be32_to_cpup(val++);
> >> >> +                       safe_voltage = be32_to_cpup(val);
> >> >> +               } else {
> >> >> +                       pr_err("invalid safe-opp level
> >> >> specified\n");
> >> >> +               }
> >> >> +       }
> >> >> +
> >> >>         of_property_read_u32(np, "voltage-tolerance",
> >> >> &voltage_tolerance);
> >> >>
> >> >>         if (of_property_read_u32(np, "clock-latency",
> >> >> &transition_latency)) transition_latency = CPUFREQ_ETERNAL;
> >> >>
> >> >>         if (!IS_ERR(cpu_reg)) {
> >> >> -               struct dev_pm_opp *opp;
> >> >>                 unsigned long min_uV, max_uV;
> >> >>                 int i;
> >> >>
> >> >> --
> >> >> 1.6.6.rc2
> >> >>
> >
> >
> >
> > --
> > Best regards,
> >
> > Lukasz Majewski
> >
> > Samsung R&D Institute Poland (SRPOL) | Linux Platform Group
> 
> Thanks for your comments Lukasz.
> 
> Regards,
> Thomas.
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Thomas Abraham Jan. 28, 2014, 3:15 p.m. UTC | #11
Hi Lukasz,

On Tue, Jan 28, 2014 at 8:36 PM, Lukasz Majewski <l.majewski@samsung.com> wrote:
> Hi Thomas,
>
>> Hi Lukasz,
>>
>> On Tue, Jan 28, 2014 at 1:47 PM, Lukasz Majewski
>> <l.majewski@samsung.com> wrote:
>> > Hi Thomas, Mike
>> >
>> >> Hi Mike,
>> >>
>> >> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette
>> >> <mturquette@linaro.org> wrote:
>> >> > Quoting Thomas Abraham (2014-01-18 04:10:51)
>> >> >> From: Thomas Abraham <thomas.ab@samsung.com>
>> >> >>
>> >> >> On some platforms such as the Samsung Exynos, changing the
>> >> >> frequency of the CPU clock requires changing the frequency of
>> >> >> the PLL that is supplying the CPU clock. To change the
>> >> >> frequency of the PLL, the CPU clock is temporarily reparented
>> >> >> to another parent clock.
>> >> >>
>> >> >> The clock frequency of this temporary parent clock could be much
>> >> >> higher than the clock frequency of the PLL at the time of
>> >> >> reparenting. Due to the temporary increase in the CPU clock
>> >> >> speed, the CPU (and any other components in the CPU clock
>> >> >> domain such as dividers, mux, etc.) have to to be operated at a
>> >> >> higher voltage level, called the safe voltage level. This patch
>> >> >> adds optional support to temporarily switch to a safe voltage
>> >> >> level during CPU frequency transitions.
>> >> >>
>> >> >> Cc: Shawn Guo <shawn.guo@linaro.org>
>> >> >> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>> >> >
>> >> > I'm not a fan of this change. This corner case should be
>> >> > abstracted away somehow. I had talked to Chander Kayshap
>> >> > previously about handling voltage changes in clock notifier
>> >> > callbacks, which then renders any voltage change as a trivial
>> >> > part of the clock rate transition. That means that this "safe
>> >> > voltage" thing could be handled automagically without any
>> >> > additional code in the CPUfreq driver.
>> >> >
>> >> > There are two nice ways to do this with the clock framework.
>> >> > First is explicit re-parenting with voltage scaling done in the
>> >> > clock rate-change notifiers:
>> >> >
>> >> > clk_set_parent(cpu_clk, temp_parent);
>> >> > /* implicit voltage scaling to "safe voltage" happens above */
>> >> > clk_set_rate(pll, some_rate);
>> >> > clk_set_parent(cpu_clk, pll);
>> >> > /* implicit voltage scaling to nominal OPP voltage happens above
>> >> > */
>> >> >
>> >
>> > I must agree with Mike here. In my opinion the above approach is
>> > more compliant with CCF (as I've pointed it out in my other comment
>> > - the cpu_clk has more than one parent and we could switch between
>> > them when needed).
>>
>> The mux which is used to re-parent is part of the larger opaque cpu
>> clock type (more like the composite clock). So I am not sure how this
>> isn't compliant with ccf.
>
> My point here is to use the clk_set_parent() explicitly instead of
> changing the mux with writing values directly to registers.
>
> However, I'm also aware, that we must reparent quickly. so I'm OK
> with your approach.

Okay.

>
>>
>> >
>> >> > The above sequence would require a separate exnyos CPUfreq
>> >> > driver, due to the added clk_set_parent logic.
>> >> >
>> >> > The second way to do this is to abstract the clk re-muxing logic
>> >> > out into the clk driver, which would allow cpufreq-cpu0 to be
>> >> > used for the exynos chips.
>> >>
>> >> This is the approach this patch series takes (patch 2/7). The clock
>> >> re-muxing logic is handled by a clock driver code. The difference
>> >> from what you suggested is that the safe voltage (that may be
>> >> optionally) required before doing the re-muxing is handled here in
>> >> cpufreq-cpu0 driver.
>> >>
>> >> The safe voltage setup can be done in the notifier as you
>> >> suggested.
>> >
>> > If the clk_set_parent() approach is not suitable, then cannot we
>> > consider using the one from highbank-cpufreq.c?
>> >
>> > Here we have cpufreq-cpu0.c which sets voltage of the cpu_clk.
>> > In the highbank-cpufreq.c there are clock notifiers to change the
>> > voltage.
>> >
>> > Cannot Exynos reuse such approach? Why shall we pollute
>> > cpufreq-cpu0.c with another solution?
>>
>> The highbank-cpufreq.c file was introduced because platforms using
>> this driver did not have the usual regulator to control the voltage.
>> The first commit of this driver explains this (copied below).
>>
>> "Highbank processors depend on the external ECME to perform voltage
>> management based on a requested frequency. Communication between the
>> A9 cores and the ECME happens over the pl320 IPC channel."
>>
>> So those platforms had no choice but to use an alternative approach to
>> control the voltage (and reuse cpufreq-cpu0 as much as possible).
>> The
>> case with exynos is a different one.
>
> Highbank needs to set voltage via IPC, Exynos needs to adjust voltage
> when reparenting.
>
> Both can be recognized as unusual cases. That is why I asked if we
> could reuse the same approach for Exynos.

Okay.

>
>> cpufreq-cpu0 is fully re-usable
>> for exynos with the additional support for "safe voltage". If we agree
>> that there might be existing or future platforms with single
>> clock/voltage rail that require the "safe voltage" feature, then
>> adding "safe voltage" support in cpufreq-cpu0 driver seems to be the
>> right approach.
>
> I think that Shawn's opinion will be final here.
>
>>
>> >
>> >> But, doing that in cpufreq-cpu0 driver will help other platforms
>> >> reuse this feature if required. Also, if done here, the regulator
>> >> handling is localized in this driver which otherwise would need to
>> >> be handled in two places, cpufreq-cpu0 driver and the clock
>> >> notifier.
>> >
>> > I think that there is a logical distinction between setting voltage
>> > for cpufreq-cpu0 related clock and increasing voltage of reparented
>> > clock.
>> >
>> > The former fits naturally to cpufreq-cpu0, when the latter seems
>> > like some corner case (as Mike pointed out) for Exynos.
>>
>> Agreed, it is a corner case. But for this corner case, we are
>> performing some additional actions on the same regulator which is used
>> in the normal functioning of the driver.
>>
>> >
>> >>
>> >> So I tend to prefer the approach in this patch but I am willing to
>> >> consider any suggestions.
>> >
>> > Thomas, what do you think about highbank-cpufreq.c approach (with
>> > using clock notifiers)?
>>
>> I have made a related comment on this above.
>>
>> >
>> > Do you think, that it is feasible to reuse it with Exynos?
>>
>> highbank cpufreq driver is intended for a different purpose so I don't
>> think it can be reused for exynos. Yes, we can make exynos specific
>> hacks into highbank driver but how would that be better over the
>> approach in this patch?
>
> I think, that I was misunderstood here. I wanted to ask if we could
> reuse the clk notifier approach.

Okay, I misunderstood your comment. We could do something similar to
highbank cpufreq driver for exynos as well. Anyways, Shawn prefers not
to add "safe voltage" support into cpufreq-cpu0 driver. So we need to
look for other options.

Thanks,
Thomas.

>
>>
>> >
>> >> Shawn, it would be helpful if you could let
>> >> us know your thoughts on this. I am almost done with testing the
>> >> v3 of this series and want to post it so if there are any
>> >> objections to the changes in this patch, please let me know.
>> >>
>> >> Thanks,
>> >> Thomas.
>> >>
>> >> >
>> >> > I'm more a fan of explicitly listing the Exact Steps for the cpu
>> >> > opp transition in a separate exynos-specific CPUfreq driver, but
>> >> > that's probably an unpopular view.
>> >> >
>> >> > Regards,
>> >> > Mike
>> >> >
>> >> >> ---
>> >> >>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
>> >> >>  drivers/cpufreq/cpufreq-cpu0.c                     |   37
>> >> >> +++++++++++++++++-- 2 files changed, 40 insertions(+), 4
>> >> >> deletions(-)
>> >> >>
>> >> >> diff --git
>> >> >> a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>> >> >> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>> >> >> index f055515..37453ab 100644 ---
>> >> >> a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt +++
>> >> >> b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt @@
>> >> >> -19,6 +19,12 @@ Optional properties:
>> >> >>  - cooling-min-level:
>> >> >>  - cooling-max-level:
>> >> >>       Please refer to
>> >> >> Documentation/devicetree/bindings/thermal/thermal.txt. +-
>> >> >> safe-opp: Certain platforms require that during a opp
>> >> >> transition,
>> >> >> +  a system should not go below a particular opp level. For such
>> >> >> systems,
>> >> >> +  this property specifies the minimum opp to be maintained
>> >> >> during the
>> >> >> +  opp transitions. The safe-opp value is a tuple with first
>> >> >> element
>> >> >> +  representing the safe frequency and the second element
>> >> >> representing the
>> >> >> +  safe voltage.
>> >> >>
>> >> >>  Examples:
>> >> >>
>> >> >> @@ -36,6 +42,7 @@ cpus {
>> >> >>                         396000  950000
>> >> >>                         198000  850000
>> >> >>                 >;
>> >> >> +               safe-opp = <396000 950000>
>> >> >>                 clock-latency = <61036>; /* two CLK32 periods */
>> >> >>                 #cooling-cells = <2>;
>> >> >>                 cooling-min-level = <0>;
>> >> >> diff --git a/drivers/cpufreq/cpufreq-cpu0.c
>> >> >> b/drivers/cpufreq/cpufreq-cpu0.c index 0c12ffc..075d3d1 100644
>> >> >> --- a/drivers/cpufreq/cpufreq-cpu0.c
>> >> >> +++ b/drivers/cpufreq/cpufreq-cpu0.c
>> >> >> @@ -27,6 +27,8 @@
>> >> >>
>> >> >>  static unsigned int transition_latency;
>> >> >>  static unsigned int voltage_tolerance; /* in percentage */
>> >> >> +static unsigned long safe_frequency;
>> >> >> +static unsigned long safe_voltage;
>> >> >>
>> >> >>  static struct device *cpu_dev;
>> >> >>  static struct clk *cpu_clk;
>> >> >> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct
>> >> >> cpufreq_policy *policy, unsigned int index) volt_old =
>> >> >> regulator_get_voltage(cpu_reg); }
>> >> >>
>> >> >> -       pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
>> >> >> +       pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
>> >> >>                  old_freq / 1000, volt_old ? volt_old / 1000 :
>> >> >> -1, new_freq / 1000, volt ? volt / 1000 : -1);
>> >> >>
>> >> >>         /* scaling up?  scale voltage before frequency */
>> >> >> -       if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
>> >> >> +       if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
>> >> >> +                               new_freq >= safe_frequency) {
>> >> >>                 ret = regulator_set_voltage_tol(cpu_reg, volt,
>> >> >> tol); if (ret) {
>> >> >>                         pr_err("failed to scale voltage up:
>> >> >> %d\n", ret); return ret;
>> >> >>                 }
>> >> >> +       } else if (!IS_ERR(cpu_reg) && old_freq <
>> >> >> safe_frequency) {
>> >> >> +               /*
>> >> >> +                * the scaled up voltage level for the new_freq
>> >> >> is lower
>> >> >> +                * than the safe voltage level. so set
>> >> >> safe_voltage
>> >> >> +                * as the intermediate voltage level and revert
>> >> >> it
>> >> >> +                * back after the frequency has been changed.
>> >> >> +                */
>> >> >> +               ret = regulator_set_voltage_tol(cpu_reg,
>> >> >> safe_voltage, tol);
>> >> >> +               if (ret) {
>> >> >> +                       pr_err("failed to set safe voltage:
>> >> >> %d\n", ret);
>> >> >> +                       return ret;
>> >> >> +               }
>> >> >>         }
>> >> >>
>> >> >>         ret = clk_set_rate(cpu_clk, freq_exact);
>> >> >> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct
>> >> >> cpufreq_policy *policy, unsigned int index) }
>> >> >>
>> >> >>         /* scaling down?  scale voltage after frequency */
>> >> >> -       if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
>> >> >> +       if (!IS_ERR(cpu_reg) &&
>> >> >> +                       (new_freq < old_freq || new_freq <
>> >> >> safe_frequency)) { ret = regulator_set_voltage_tol(cpu_reg,
>> >> >> volt, tol); if (ret) {
>> >> >>                         pr_err("failed to scale voltage down:
>> >> >> %d\n", ret); @@ -116,6 +132,8 @@ static struct cpufreq_driver
>> >> >> cpu0_cpufreq_driver = {
>> >> >>
>> >> >>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
>> >> >>  {
>> >> >> +       const struct property *prop;
>> >> >> +       struct dev_pm_opp *opp;
>> >> >>         struct device_node *np;
>> >> >>         int ret;
>> >> >>
>> >> >> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct
>> >> >> platform_device *pdev) goto out_put_node;
>> >> >>         }
>> >> >>
>> >> >> +       prop = of_find_property(np, "safe-opp", NULL);
>> >> >> +       if (prop) {
>> >> >> +               if (prop->value && (prop->length / sizeof(u32))
>> >> >> == 2) {
>> >> >> +                       const __be32 *val;
>> >> >> +                       val = prop->value;
>> >> >> +                       safe_frequency = be32_to_cpup(val++);
>> >> >> +                       safe_voltage = be32_to_cpup(val);
>> >> >> +               } else {
>> >> >> +                       pr_err("invalid safe-opp level
>> >> >> specified\n");
>> >> >> +               }
>> >> >> +       }
>> >> >> +
>> >> >>         of_property_read_u32(np, "voltage-tolerance",
>> >> >> &voltage_tolerance);
>> >> >>
>> >> >>         if (of_property_read_u32(np, "clock-latency",
>> >> >> &transition_latency)) transition_latency = CPUFREQ_ETERNAL;
>> >> >>
>> >> >>         if (!IS_ERR(cpu_reg)) {
>> >> >> -               struct dev_pm_opp *opp;
>> >> >>                 unsigned long min_uV, max_uV;
>> >> >>                 int i;
>> >> >>
>> >> >> --
>> >> >> 1.6.6.rc2
>> >> >>
>> >
>> >
>> >
>> > --
>> > Best regards,
>> >
>> > Lukasz Majewski
>> >
>> > Samsung R&D Institute Poland (SRPOL) | Linux Platform Group
>>
>> Thanks for your comments Lukasz.
>>
>> Regards,
>> Thomas.
>>
>> _______________________________________________
>> linux-arm-kernel mailing list
>> linux-arm-kernel@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
>
>
> --
> Best regards,
>
> Lukasz Majewski
>
> Samsung R&D Institute Poland (SRPOL) | Linux Platform Group
Mike Turquette Jan. 28, 2014, 6:47 p.m. UTC | #12
On Mon, Jan 27, 2014 at 9:30 PM, Thomas Abraham <ta.omasab@gmail.com> wrote:
> Hi Mike,
>
> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette <mturquette@linaro.org> wrote:
>> Quoting Thomas Abraham (2014-01-18 04:10:51)
>>> From: Thomas Abraham <thomas.ab@samsung.com>
>>>
>>> On some platforms such as the Samsung Exynos, changing the frequency
>>> of the CPU clock requires changing the frequency of the PLL that is
>>> supplying the CPU clock. To change the frequency of the PLL, the CPU
>>> clock is temporarily reparented to another parent clock.
>>>
>>> The clock frequency of this temporary parent clock could be much higher
>>> than the clock frequency of the PLL at the time of reparenting. Due
>>> to the temporary increase in the CPU clock speed, the CPU (and any other
>>> components in the CPU clock domain such as dividers, mux, etc.) have to
>>> to be operated at a higher voltage level, called the safe voltage level.
>>> This patch adds optional support to temporarily switch to a safe voltage
>>> level during CPU frequency transitions.
>>>
>>> Cc: Shawn Guo <shawn.guo@linaro.org>
>>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>>
>> I'm not a fan of this change. This corner case should be abstracted away
>> somehow. I had talked to Chander Kayshap previously about handling
>> voltage changes in clock notifier callbacks, which then renders any
>> voltage change as a trivial part of the clock rate transition. That
>> means that this "safe voltage" thing could be handled automagically
>> without any additional code in the CPUfreq driver.
>>
>> There are two nice ways to do this with the clock framework. First is
>> explicit re-parenting with voltage scaling done in the clock rate-change
>> notifiers:
>>
>> clk_set_parent(cpu_clk, temp_parent);
>> /* implicit voltage scaling to "safe voltage" happens above */
>> clk_set_rate(pll, some_rate);
>> clk_set_parent(cpu_clk, pll);
>> /* implicit voltage scaling to nominal OPP voltage happens above */
>>
>> The above sequence would require a separate exnyos CPUfreq driver, due
>> to the added clk_set_parent logic.
>>
>> The second way to do this is to abstract the clk re-muxing logic out
>> into the clk driver, which would allow cpufreq-cpu0 to be used for the
>> exynos chips.
>
> This is the approach this patch series takes (patch 2/7). The clock
> re-muxing logic is handled by a clock driver code. The difference from
> what you suggested is that the safe voltage (that may be optionally)
> required before doing the re-muxing is handled here in cpufreq-cpu0
> driver.

Right, I understand the approach taken in this series and I'm not sure
it is the right one. Why does the clock driver handle the remuxing if
it is a functional dependency of the ARM core? As far as I can tell
the remux does not happen because it is necessary to generate the
required clock rate, but because we don't want to run the ARM core out
of spec for a short time while the PLL relocks. Assuming I have that
part of it right, I prefer for the parent mux operation to be a part
of the CPUfreq driver's .target callback instead of hidden away in the
clock driver.

A common pattern I'm seeing for the last 18 months is code
consolidation for the sake of code consolidation and it is not always
a good thing. Having hardware-specific machine drivers under
drivers/cpufreq/ is the right way to go, and we should only
consolidate a driver to cpufreq-cpu0 if it makes sense.

>
> The safe voltage setup can be done in the notifier as you suggested.
> But, doing that in cpufreq-cpu0 driver will help other platforms reuse
> this feature if required. Also, if done here, the regulator handling
> is localized in this driver which otherwise would need to be handled
> in two places, cpufreq-cpu0 driver and the clock notifier.

The notifiers are reusable across other platforms. And the notifier
can be entirely set up within the cpufreq driver. Code location is not
a problem. See this RFC series:
https://lkml.org/lkml/2013/7/7/110

>
> So I tend to prefer the approach in this patch but I am willing to
> consider any suggestions. Shawn, it would be helpful if you could let
> us know your thoughts on this. I am almost done with testing the v3 of
> this series and want to post it so if there are any objections to the
> changes in this patch, please let me know.

Well I wonder if the whole approach could be more generalized. The DT
bindings for CPU frequencies could be used by any platform instead of
being Exynos-specific. We could construct a binding which captures an
arbitrary clock sub-tree snapshot. By that I mean a DT binding in
which any number of clocks and their parents and rates could be
specified in a table. Separately we could have a binding that links a
given clock at a given rate to some specified regulator and voltage.
So in this way the bindings are re-usable.

These DT ideas should be considered separately from the CPUfreq notes
outlined above, and I will respond to patch #3 in this series once I
have a chance.

Thanks,
Mike

>
> Thanks,
> Thomas.
>
>>
>> I'm more a fan of explicitly listing the Exact Steps for the cpu opp
>> transition in a separate exynos-specific CPUfreq driver, but that's
>> probably an unpopular view.
>>
>> Regards,
>> Mike
>>
>>> ---
>>>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
>>>  drivers/cpufreq/cpufreq-cpu0.c                     |   37 +++++++++++++++++--
>>>  2 files changed, 40 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>>> index f055515..37453ab 100644
>>> --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>>> +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>>> @@ -19,6 +19,12 @@ Optional properties:
>>>  - cooling-min-level:
>>>  - cooling-max-level:
>>>       Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.
>>> +- safe-opp: Certain platforms require that during a opp transition,
>>> +  a system should not go below a particular opp level. For such systems,
>>> +  this property specifies the minimum opp to be maintained during the
>>> +  opp transitions. The safe-opp value is a tuple with first element
>>> +  representing the safe frequency and the second element representing the
>>> +  safe voltage.
>>>
>>>  Examples:
>>>
>>> @@ -36,6 +42,7 @@ cpus {
>>>                         396000  950000
>>>                         198000  850000
>>>                 >;
>>> +               safe-opp = <396000 950000>
>>>                 clock-latency = <61036>; /* two CLK32 periods */
>>>                 #cooling-cells = <2>;
>>>                 cooling-min-level = <0>;
>>> diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
>>> index 0c12ffc..075d3d1 100644
>>> --- a/drivers/cpufreq/cpufreq-cpu0.c
>>> +++ b/drivers/cpufreq/cpufreq-cpu0.c
>>> @@ -27,6 +27,8 @@
>>>
>>>  static unsigned int transition_latency;
>>>  static unsigned int voltage_tolerance; /* in percentage */
>>> +static unsigned long safe_frequency;
>>> +static unsigned long safe_voltage;
>>>
>>>  static struct device *cpu_dev;
>>>  static struct clk *cpu_clk;
>>> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>>>                 volt_old = regulator_get_voltage(cpu_reg);
>>>         }
>>>
>>> -       pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
>>> +       pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
>>>                  old_freq / 1000, volt_old ? volt_old / 1000 : -1,
>>>                  new_freq / 1000, volt ? volt / 1000 : -1);
>>>
>>>         /* scaling up?  scale voltage before frequency */
>>> -       if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
>>> +       if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
>>> +                               new_freq >= safe_frequency) {
>>>                 ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>>>                 if (ret) {
>>>                         pr_err("failed to scale voltage up: %d\n", ret);
>>>                         return ret;
>>>                 }
>>> +       } else if (!IS_ERR(cpu_reg) && old_freq < safe_frequency) {
>>> +               /*
>>> +                * the scaled up voltage level for the new_freq is lower
>>> +                * than the safe voltage level. so set safe_voltage
>>> +                * as the intermediate voltage level and revert it
>>> +                * back after the frequency has been changed.
>>> +                */
>>> +               ret = regulator_set_voltage_tol(cpu_reg, safe_voltage, tol);
>>> +               if (ret) {
>>> +                       pr_err("failed to set safe voltage: %d\n", ret);
>>> +                       return ret;
>>> +               }
>>>         }
>>>
>>>         ret = clk_set_rate(cpu_clk, freq_exact);
>>> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>>>         }
>>>
>>>         /* scaling down?  scale voltage after frequency */
>>> -       if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
>>> +       if (!IS_ERR(cpu_reg) &&
>>> +                       (new_freq < old_freq || new_freq < safe_frequency)) {
>>>                 ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>>>                 if (ret) {
>>>                         pr_err("failed to scale voltage down: %d\n", ret);
>>> @@ -116,6 +132,8 @@ static struct cpufreq_driver cpu0_cpufreq_driver = {
>>>
>>>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
>>>  {
>>> +       const struct property *prop;
>>> +       struct dev_pm_opp *opp;
>>>         struct device_node *np;
>>>         int ret;
>>>
>>> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
>>>                 goto out_put_node;
>>>         }
>>>
>>> +       prop = of_find_property(np, "safe-opp", NULL);
>>> +       if (prop) {
>>> +               if (prop->value && (prop->length / sizeof(u32)) == 2) {
>>> +                       const __be32 *val;
>>> +                       val = prop->value;
>>> +                       safe_frequency = be32_to_cpup(val++);
>>> +                       safe_voltage = be32_to_cpup(val);
>>> +               } else {
>>> +                       pr_err("invalid safe-opp level specified\n");
>>> +               }
>>> +       }
>>> +
>>>         of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance);
>>>
>>>         if (of_property_read_u32(np, "clock-latency", &transition_latency))
>>>                 transition_latency = CPUFREQ_ETERNAL;
>>>
>>>         if (!IS_ERR(cpu_reg)) {
>>> -               struct dev_pm_opp *opp;
>>>                 unsigned long min_uV, max_uV;
>>>                 int i;
>>>
>>> --
>>> 1.6.6.rc2
>>>
Thomas Abraham Jan. 30, 2014, 12:53 p.m. UTC | #13
Hi Mike,

On Wed, Jan 29, 2014 at 12:17 AM, Mike Turquette <mturquette@linaro.org> wrote:
> On Mon, Jan 27, 2014 at 9:30 PM, Thomas Abraham <ta.omasab@gmail.com> wrote:
>> Hi Mike,
>>
>> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette <mturquette@linaro.org> wrote:
>>> Quoting Thomas Abraham (2014-01-18 04:10:51)
>>>> From: Thomas Abraham <thomas.ab@samsung.com>
>>>>
>>>> On some platforms such as the Samsung Exynos, changing the frequency
>>>> of the CPU clock requires changing the frequency of the PLL that is
>>>> supplying the CPU clock. To change the frequency of the PLL, the CPU
>>>> clock is temporarily reparented to another parent clock.
>>>>
>>>> The clock frequency of this temporary parent clock could be much higher
>>>> than the clock frequency of the PLL at the time of reparenting. Due
>>>> to the temporary increase in the CPU clock speed, the CPU (and any other
>>>> components in the CPU clock domain such as dividers, mux, etc.) have to
>>>> to be operated at a higher voltage level, called the safe voltage level.
>>>> This patch adds optional support to temporarily switch to a safe voltage
>>>> level during CPU frequency transitions.
>>>>
>>>> Cc: Shawn Guo <shawn.guo@linaro.org>
>>>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>>>
>>> I'm not a fan of this change. This corner case should be abstracted away
>>> somehow. I had talked to Chander Kayshap previously about handling
>>> voltage changes in clock notifier callbacks, which then renders any
>>> voltage change as a trivial part of the clock rate transition. That
>>> means that this "safe voltage" thing could be handled automagically
>>> without any additional code in the CPUfreq driver.
>>>
>>> There are two nice ways to do this with the clock framework. First is
>>> explicit re-parenting with voltage scaling done in the clock rate-change
>>> notifiers:
>>>
>>> clk_set_parent(cpu_clk, temp_parent);
>>> /* implicit voltage scaling to "safe voltage" happens above */
>>> clk_set_rate(pll, some_rate);
>>> clk_set_parent(cpu_clk, pll);
>>> /* implicit voltage scaling to nominal OPP voltage happens above */
>>>
>>> The above sequence would require a separate exnyos CPUfreq driver, due
>>> to the added clk_set_parent logic.
>>>
>>> The second way to do this is to abstract the clk re-muxing logic out
>>> into the clk driver, which would allow cpufreq-cpu0 to be used for the
>>> exynos chips.
>>
>> This is the approach this patch series takes (patch 2/7). The clock
>> re-muxing logic is handled by a clock driver code. The difference from
>> what you suggested is that the safe voltage (that may be optionally)
>> required before doing the re-muxing is handled here in cpufreq-cpu0
>> driver.
>
> Right, I understand the approach taken in this series and I'm not sure
> it is the right one. Why does the clock driver handle the remuxing if
> it is a functional dependency of the ARM core?

The output of the PLL is the input to a tree of clock nodes. One of
the outputs from this tree is the clock to ARM. And the other outputs
do not serve as parents to any other clocks in the system but are used
internally in the bus interconnect. In addition to that, there are
clock speed restrictions for the clock outputs from this tree with
respect to speeds of other clocks in this clock tree.

Hence, this entire clock tree has been purged into a single composite
clock which includes mux and dividers. So the clock tree now looks
like PLL Output -> Custom Composite Clock -> ARM clock output.

But there is a problem when changing the ARM clock speed which in turn
causes change in PLL clock speed. When PLL clock speed has to be
changed, the PLL has to be first turned off. Which means the clock to
ARM core is cut-off. To avoid that, the Custom Composite Clock now has
to get its clock from another source until the PLL is ready to operate
again. So this composite clock does an automatic re-parenting (the mux
is within is composite clock) because it knows that it is its
responsibility to ensure clock ARM all the time.


> As far as I can tell
> the remux does not happen because it is necessary to generate the
> required clock rate, but because we don't want to run the ARM core out
> of spec for a short time while the PLL relocks. Assuming I have that
> part of it right, I prefer for the parent mux operation to be a part
> of the CPUfreq driver's .target callback instead of hidden away in the
> clock driver.

The re-parenting is mostly done to keep the ARM CPU clocked while the
PLL is stopped, reprogrammed and restarted. One of the side effects of
that is, the clock speed of the temporary parent could be higher then
what is allowed due to the ARM voltage at the time of re-parenting.
That is the reason to use the safe voltage.

>
> A common pattern I'm seeing for the last 18 months is code
> consolidation for the sake of code consolidation and it is not always
> a good thing. Having hardware-specific machine drivers under
> drivers/cpufreq/ is the right way to go, and we should only
> consolidate a driver to cpufreq-cpu0 if it makes sense.

Okay.I agree. And I did feel that adding the optional "safe voltage"
feature in cpufreq-cpu0 would help Exynos platforms reuse this driver.
And adding this feature in cpufreq-cpu did not feel like some
orthogonal approach but a logical extension.

>
>>
>> The safe voltage setup can be done in the notifier as you suggested.
>> But, doing that in cpufreq-cpu0 driver will help other platforms reuse
>> this feature if required. Also, if done here, the regulator handling
>> is localized in this driver which otherwise would need to be handled
>> in two places, cpufreq-cpu0 driver and the clock notifier.
>
> The notifiers are reusable across other platforms. And the notifier
> can be entirely set up within the cpufreq driver. Code location is not
> a problem. See this RFC series:
> https://lkml.org/lkml/2013/7/7/110

Okay.

>
>>
>> So I tend to prefer the approach in this patch but I am willing to
>> consider any suggestions. Shawn, it would be helpful if you could let
>> us know your thoughts on this. I am almost done with testing the v3 of
>> this series and want to post it so if there are any objections to the
>> changes in this patch, please let me know.
>
> Well I wonder if the whole approach could be more generalized. The DT
> bindings for CPU frequencies could be used by any platform instead of
> being Exynos-specific. We could construct a binding which captures an
> arbitrary clock sub-tree snapshot. By that I mean a DT binding in
> which any number of clocks and their parents and rates could be
> specified in a table. Separately we could have a binding that links a
> given clock at a given rate to some specified regulator and voltage.
> So in this way the bindings are re-usable.
>
> These DT ideas should be considered separately from the CPUfreq notes
> outlined above, and I will respond to patch #3 in this series once I
> have a chance.

Okay. Thanks Mike for your comments. And sorry for the delay in my reply.

Regards,
Thomas.

>
> Thanks,
> Mike
>
>>
>> Thanks,
>> Thomas.
>>
>>>
>>> I'm more a fan of explicitly listing the Exact Steps for the cpu opp
>>> transition in a separate exynos-specific CPUfreq driver, but that's
>>> probably an unpopular view.
>>>
>>> Regards,
>>> Mike
>>>
>>>> ---
>>>>  .../devicetree/bindings/cpufreq/cpufreq-cpu0.txt   |    7 ++++
>>>>  drivers/cpufreq/cpufreq-cpu0.c                     |   37 +++++++++++++++++--
>>>>  2 files changed, 40 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>>>> index f055515..37453ab 100644
>>>> --- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>>>> +++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
>>>> @@ -19,6 +19,12 @@ Optional properties:
>>>>  - cooling-min-level:
>>>>  - cooling-max-level:
>>>>       Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.
>>>> +- safe-opp: Certain platforms require that during a opp transition,
>>>> +  a system should not go below a particular opp level. For such systems,
>>>> +  this property specifies the minimum opp to be maintained during the
>>>> +  opp transitions. The safe-opp value is a tuple with first element
>>>> +  representing the safe frequency and the second element representing the
>>>> +  safe voltage.
>>>>
>>>>  Examples:
>>>>
>>>> @@ -36,6 +42,7 @@ cpus {
>>>>                         396000  950000
>>>>                         198000  850000
>>>>                 >;
>>>> +               safe-opp = <396000 950000>
>>>>                 clock-latency = <61036>; /* two CLK32 periods */
>>>>                 #cooling-cells = <2>;
>>>>                 cooling-min-level = <0>;
>>>> diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
>>>> index 0c12ffc..075d3d1 100644
>>>> --- a/drivers/cpufreq/cpufreq-cpu0.c
>>>> +++ b/drivers/cpufreq/cpufreq-cpu0.c
>>>> @@ -27,6 +27,8 @@
>>>>
>>>>  static unsigned int transition_latency;
>>>>  static unsigned int voltage_tolerance; /* in percentage */
>>>> +static unsigned long safe_frequency;
>>>> +static unsigned long safe_voltage;
>>>>
>>>>  static struct device *cpu_dev;
>>>>  static struct clk *cpu_clk;
>>>> @@ -64,17 +66,30 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>>>>                 volt_old = regulator_get_voltage(cpu_reg);
>>>>         }
>>>>
>>>> -       pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
>>>> +       pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
>>>>                  old_freq / 1000, volt_old ? volt_old / 1000 : -1,
>>>>                  new_freq / 1000, volt ? volt / 1000 : -1);
>>>>
>>>>         /* scaling up?  scale voltage before frequency */
>>>> -       if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
>>>> +       if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
>>>> +                               new_freq >= safe_frequency) {
>>>>                 ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>>>>                 if (ret) {
>>>>                         pr_err("failed to scale voltage up: %d\n", ret);
>>>>                         return ret;
>>>>                 }
>>>> +       } else if (!IS_ERR(cpu_reg) && old_freq < safe_frequency) {
>>>> +               /*
>>>> +                * the scaled up voltage level for the new_freq is lower
>>>> +                * than the safe voltage level. so set safe_voltage
>>>> +                * as the intermediate voltage level and revert it
>>>> +                * back after the frequency has been changed.
>>>> +                */
>>>> +               ret = regulator_set_voltage_tol(cpu_reg, safe_voltage, tol);
>>>> +               if (ret) {
>>>> +                       pr_err("failed to set safe voltage: %d\n", ret);
>>>> +                       return ret;
>>>> +               }
>>>>         }
>>>>
>>>>         ret = clk_set_rate(cpu_clk, freq_exact);
>>>> @@ -86,7 +101,8 @@ static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
>>>>         }
>>>>
>>>>         /* scaling down?  scale voltage after frequency */
>>>> -       if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
>>>> +       if (!IS_ERR(cpu_reg) &&
>>>> +                       (new_freq < old_freq || new_freq < safe_frequency)) {
>>>>                 ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
>>>>                 if (ret) {
>>>>                         pr_err("failed to scale voltage down: %d\n", ret);
>>>> @@ -116,6 +132,8 @@ static struct cpufreq_driver cpu0_cpufreq_driver = {
>>>>
>>>>  static int cpu0_cpufreq_probe(struct platform_device *pdev)
>>>>  {
>>>> +       const struct property *prop;
>>>> +       struct dev_pm_opp *opp;
>>>>         struct device_node *np;
>>>>         int ret;
>>>>
>>>> @@ -165,13 +183,24 @@ static int cpu0_cpufreq_probe(struct platform_device *pdev)
>>>>                 goto out_put_node;
>>>>         }
>>>>
>>>> +       prop = of_find_property(np, "safe-opp", NULL);
>>>> +       if (prop) {
>>>> +               if (prop->value && (prop->length / sizeof(u32)) == 2) {
>>>> +                       const __be32 *val;
>>>> +                       val = prop->value;
>>>> +                       safe_frequency = be32_to_cpup(val++);
>>>> +                       safe_voltage = be32_to_cpup(val);
>>>> +               } else {
>>>> +                       pr_err("invalid safe-opp level specified\n");
>>>> +               }
>>>> +       }
>>>> +
>>>>         of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance);
>>>>
>>>>         if (of_property_read_u32(np, "clock-latency", &transition_latency))
>>>>                 transition_latency = CPUFREQ_ETERNAL;
>>>>
>>>>         if (!IS_ERR(cpu_reg)) {
>>>> -               struct dev_pm_opp *opp;
>>>>                 unsigned long min_uV, max_uV;
>>>>                 int i;
>>>>
>>>> --
>>>> 1.6.6.rc2
>>>>
Heiko Stuebner Jan. 30, 2014, 3:09 p.m. UTC | #14
On Thursday, 30. January 2014 18:23:44 Thomas Abraham wrote:
> Hi Mike,
> 
> On Wed, Jan 29, 2014 at 12:17 AM, Mike Turquette <mturquette@linaro.org> 
wrote:
> > On Mon, Jan 27, 2014 at 9:30 PM, Thomas Abraham <ta.omasab@gmail.com> 
wrote:
> >> Hi Mike,
> >> 
> >> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette <mturquette@linaro.org> 
wrote:
> >>> Quoting Thomas Abraham (2014-01-18 04:10:51)
> >>> 
> > As far as I can tell
> > the remux does not happen because it is necessary to generate the
> > required clock rate, but because we don't want to run the ARM core out
> > of spec for a short time while the PLL relocks. Assuming I have that
> > part of it right, I prefer for the parent mux operation to be a part
> > of the CPUfreq driver's .target callback instead of hidden away in the
> > clock driver.
> 
> The re-parenting is mostly done to keep the ARM CPU clocked while the
> PLL is stopped, reprogrammed and restarted. One of the side effects of
> that is, the clock speed of the temporary parent could be higher then
> what is allowed due to the ARM voltage at the time of re-parenting.
> That is the reason to use the safe voltage.

The Rockchip-SoCs use something similar, so I'm following quite closely what 
Thomas is trying to do here, as similar solution would also solve this issue 
for me.

On some Rockchip-SoCs even stuff like pclk and hclk seems to be sourced from 
the divided armclk, creating additional constraints.

But on the RKs (at least in the upstream sources) the armclk is simply equal 
to the pll output. A divider exists, but is only used to make sure that the 
armclk stays below the original rate when sourced from the temp-parent, like

	if (clk_get_rate(temp_parent) > clk_get_rate(main_parent)
		set_divider(something so that rate(temp) <= rate(main)
	clk_set_parent(...)

Isn't there a similar possiblity on your platform, as it would remove the need 
for the safe-voltage?


In general I also like the approach of hiding the rate-change logic inside 
this composite clock, as the depending clocks can be easily kept in sync. As 
with the Rockchips the depending clocks are different for each of the three 
Cortex-A9 SoCs I looked at, it would be "interesting" if all of this would 
need to be done in a cpufreq driver.


Heiko
Mike Turquette Feb. 1, 2014, 4:10 a.m. UTC | #15
Quoting Heiko Stübner (2014-01-30 07:09:04)
> On Thursday, 30. January 2014 18:23:44 Thomas Abraham wrote:
> > Hi Mike,
> > 
> > On Wed, Jan 29, 2014 at 12:17 AM, Mike Turquette <mturquette@linaro.org> 
> wrote:
> > > On Mon, Jan 27, 2014 at 9:30 PM, Thomas Abraham <ta.omasab@gmail.com> 
> wrote:
> > >> Hi Mike,
> > >> 
> > >> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette <mturquette@linaro.org> 
> wrote:
> > >>> Quoting Thomas Abraham (2014-01-18 04:10:51)
> > >>> 
> > > As far as I can tell
> > > the remux does not happen because it is necessary to generate the
> > > required clock rate, but because we don't want to run the ARM core out
> > > of spec for a short time while the PLL relocks. Assuming I have that
> > > part of it right, I prefer for the parent mux operation to be a part
> > > of the CPUfreq driver's .target callback instead of hidden away in the
> > > clock driver.
> > 
> > The re-parenting is mostly done to keep the ARM CPU clocked while the
> > PLL is stopped, reprogrammed and restarted. One of the side effects of
> > that is, the clock speed of the temporary parent could be higher then
> > what is allowed due to the ARM voltage at the time of re-parenting.
> > That is the reason to use the safe voltage.
> 
> The Rockchip-SoCs use something similar, so I'm following quite closely what 
> Thomas is trying to do here, as similar solution would also solve this issue 
> for me.
> 
> On some Rockchip-SoCs even stuff like pclk and hclk seems to be sourced from 
> the divided armclk, creating additional constraints.
> 
> But on the RKs (at least in the upstream sources) the armclk is simply equal 
> to the pll output. A divider exists, but is only used to make sure that the 
> armclk stays below the original rate when sourced from the temp-parent, like
> 
>         if (clk_get_rate(temp_parent) > clk_get_rate(main_parent)
>                 set_divider(something so that rate(temp) <= rate(main)
>         clk_set_parent(...)
> 
> Isn't there a similar possiblity on your platform, as it would remove the need 
> for the safe-voltage?
> 
> 
> In general I also like the approach of hiding the rate-change logic inside 
> this composite clock, as the depending clocks can be easily kept in sync. As 
> with the Rockchips the depending clocks are different for each of the three 
> Cortex-A9 SoCs I looked at, it would be "interesting" if all of this would 
> need to be done in a cpufreq driver.

I wonder if hiding these details inside of the composite clock
implementation indicates the lack of some needed feature in the clk
core? I've discussed the idea of "coordinated rate changes" before. E.g:
Thomas Abraham Feb. 3, 2014, 4:06 p.m. UTC | #16
On Thu, Jan 30, 2014 at 8:39 PM, Heiko Stübner <heiko@sntech.de> wrote:
> On Thursday, 30. January 2014 18:23:44 Thomas Abraham wrote:
>> Hi Mike,
>>
>> On Wed, Jan 29, 2014 at 12:17 AM, Mike Turquette <mturquette@linaro.org>
> wrote:
>> > On Mon, Jan 27, 2014 at 9:30 PM, Thomas Abraham <ta.omasab@gmail.com>
> wrote:
>> >> Hi Mike,
>> >>
>> >> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette <mturquette@linaro.org>
> wrote:
>> >>> Quoting Thomas Abraham (2014-01-18 04:10:51)
>> >>>
>> > As far as I can tell
>> > the remux does not happen because it is necessary to generate the
>> > required clock rate, but because we don't want to run the ARM core out
>> > of spec for a short time while the PLL relocks. Assuming I have that
>> > part of it right, I prefer for the parent mux operation to be a part
>> > of the CPUfreq driver's .target callback instead of hidden away in the
>> > clock driver.
>>
>> The re-parenting is mostly done to keep the ARM CPU clocked while the
>> PLL is stopped, reprogrammed and restarted. One of the side effects of
>> that is, the clock speed of the temporary parent could be higher then
>> what is allowed due to the ARM voltage at the time of re-parenting.
>> That is the reason to use the safe voltage.
>
> The Rockchip-SoCs use something similar, so I'm following quite closely what
> Thomas is trying to do here, as similar solution would also solve this issue
> for me.
>
> On some Rockchip-SoCs even stuff like pclk and hclk seems to be sourced from
> the divided armclk, creating additional constraints.
>
> But on the RKs (at least in the upstream sources) the armclk is simply equal
> to the pll output. A divider exists, but is only used to make sure that the
> armclk stays below the original rate when sourced from the temp-parent, like
>
>         if (clk_get_rate(temp_parent) > clk_get_rate(main_parent)
>                 set_divider(something so that rate(temp) <= rate(main)
>         clk_set_parent(...)
>
> Isn't there a similar possiblity on your platform, as it would remove the need
> for the safe-voltage?

Hi Heiko,

Yes, this works too! I have tested this method on Exynos4210,
Exynos4412 and Exynos5250 and it works fine without any need for safe
voltage. This is much better than using safe voltage. Thank you for
suggesting this.

Regards,
Thomas.

>
>
> In general I also like the approach of hiding the rate-change logic inside
> this composite clock, as the depending clocks can be easily kept in sync. As
> with the Rockchips the depending clocks are different for each of the three
> Cortex-A9 SoCs I looked at, it would be "interesting" if all of this would
> need to be done in a cpufreq driver.
>
>
> Heiko
>
Thomas Abraham Feb. 3, 2014, 4:06 p.m. UTC | #17
On Sat, Feb 1, 2014 at 9:40 AM, Mike Turquette <mturquette@linaro.org> wrote:
> Quoting Heiko Stübner (2014-01-30 07:09:04)
>> On Thursday, 30. January 2014 18:23:44 Thomas Abraham wrote:
>> > Hi Mike,
>> >
>> > On Wed, Jan 29, 2014 at 12:17 AM, Mike Turquette <mturquette@linaro.org>
>> wrote:
>> > > On Mon, Jan 27, 2014 at 9:30 PM, Thomas Abraham <ta.omasab@gmail.com>
>> wrote:
>> > >> Hi Mike,
>> > >>
>> > >> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette <mturquette@linaro.org>
>> wrote:
>> > >>> Quoting Thomas Abraham (2014-01-18 04:10:51)
>> > >>>
>> > > As far as I can tell
>> > > the remux does not happen because it is necessary to generate the
>> > > required clock rate, but because we don't want to run the ARM core out
>> > > of spec for a short time while the PLL relocks. Assuming I have that
>> > > part of it right, I prefer for the parent mux operation to be a part
>> > > of the CPUfreq driver's .target callback instead of hidden away in the
>> > > clock driver.
>> >
>> > The re-parenting is mostly done to keep the ARM CPU clocked while the
>> > PLL is stopped, reprogrammed and restarted. One of the side effects of
>> > that is, the clock speed of the temporary parent could be higher then
>> > what is allowed due to the ARM voltage at the time of re-parenting.
>> > That is the reason to use the safe voltage.
>>
>> The Rockchip-SoCs use something similar, so I'm following quite closely what
>> Thomas is trying to do here, as similar solution would also solve this issue
>> for me.
>>
>> On some Rockchip-SoCs even stuff like pclk and hclk seems to be sourced from
>> the divided armclk, creating additional constraints.
>>
>> But on the RKs (at least in the upstream sources) the armclk is simply equal
>> to the pll output. A divider exists, but is only used to make sure that the
>> armclk stays below the original rate when sourced from the temp-parent, like
>>
>>         if (clk_get_rate(temp_parent) > clk_get_rate(main_parent)
>>                 set_divider(something so that rate(temp) <= rate(main)
>>         clk_set_parent(...)
>>
>> Isn't there a similar possiblity on your platform, as it would remove the need
>> for the safe-voltage?
>>
>>
>> In general I also like the approach of hiding the rate-change logic inside
>> this composite clock, as the depending clocks can be easily kept in sync. As
>> with the Rockchips the depending clocks are different for each of the three
>> Cortex-A9 SoCs I looked at, it would be "interesting" if all of this would
>> need to be done in a cpufreq driver.
>
> I wonder if hiding these details inside of the composite clock
> implementation indicates the lack of some needed feature in the clk
> core? I've discussed the idea of "coordinated rate changes" before. E.g:
>
> _________________________________________________________
> |  clk  |  opp-low      |  opp-mid      |  opp-fast     |
> |       |               |               |               |
> |pll    | 300000        |  600000       |  600000       |
> |       |               |               |               |
> |div    | 150000        |  300000       |  600000       |
> |       |               |               |               |
> |mpu_clk| 150000        |  300000       |  600000       |
> |       |               |               |               |
> |periph | 150000        |  150000       |  300000       |
> ---------------------------------------------------------
>
> A call to clk_set_rate() against any of those clocks will result in all
> of their dividers being updated. At the implementation level this might
> look something like this extremely simplified pseudocode:
>
> int clk_set_rate(struct clk* clk, unsigned long rate)
> {
>         /* trap clks that support coordinated rate changes */
>         if (clk->ops->coordinate_rate)
>                 return clk->ops->coordinate_rate(clk->hw, rate);
>         ...
> }
>
> and,
>
> struct coord_rates {
>         struct clk_hw *hw;
>         struct clk *parent;
>         struct clk *rate;
> };
>
> and in the clock driver,
>
> #define PLL 0
> #define DIV 1
> #define MPU 2
> #define PER 3
>
> #define NR_OPP 4
> #define NR_CLK 4
>
> struct coord_rates my_opps[NR_OPP][NR_CLK]; // populated from DT data
>
> int my_coordinate_rate_callback(struct clk_hw *hw, unsigned long rate)
> {
>         struct coord_rate **selected_opp;
>
>         for(i = 0; i < NR_OPP; i++) {
>                 for(j = 0; j < NR_CLK; j++) {
>                         if (my_opps[i][j]->hw == hw &&
>                                 my_opps[i][j]->rate == rate)
>                                 selected_opp = my_opps[i];
>                                 break;
>                 }
>         }
>
>         /*
>          * order of operations is specific to my hardware and should be
>          * managed by my clock driver, not generic code
>          */
>
>         __clk_set_parent(selected_opp[DIV]->hw, temp_parent);
>         __clk_set_rate(selected_opp[PLL]->hw, selected_opp[PLL]->rate);
>         __clk_set_parent(selected_opp[DIV]->hw,
>                                 selected_opp[PLL]->hw->clk);
>         ...
>
>         /*
>          * note that the above could be handled by a switch-case or
>          * something else
>          */
> }
>
> Thoughts? Please forgive any gaps in my logic or abuse of C.
>
> I have long thought that something like the above would someday go into
> a generic dvfs layer instead of the clock framework, but maybe starting
> with the clk framework makes more sense?

Hi Mike,

Yes, this will be very helpful for atomically controlling the rates of
a group of clocks. This coordinated rate change method can be used
during the armclk rate changes on Samsung platforms.

Thanks,
Thomas.

>
> Regards,
> Mike
>
>>
>>
>> Heiko
>>
Heiko Stuebner Feb. 5, 2014, 9:53 a.m. UTC | #18
Am Freitag, 31. Januar 2014, 20:10:51 schrieb Mike Turquette:
> Quoting Heiko Stübner (2014-01-30 07:09:04)
> 
> > On Thursday, 30. January 2014 18:23:44 Thomas Abraham wrote:
> > > Hi Mike,
> > > 
> > > On Wed, Jan 29, 2014 at 12:17 AM, Mike Turquette <mturquette@linaro.org>
> > 
> > wrote:
> > > > On Mon, Jan 27, 2014 at 9:30 PM, Thomas Abraham <ta.omasab@gmail.com>
> > 
> > wrote:
> > > >> Hi Mike,
> > > >> 
> > > >> On Tue, Jan 28, 2014 at 1:55 AM, Mike Turquette
> > > >> <mturquette@linaro.org>
> > 
> > wrote:
> > > >>> Quoting Thomas Abraham (2014-01-18 04:10:51)
> > > > 
> > > > As far as I can tell
> > > > the remux does not happen because it is necessary to generate the
> > > > required clock rate, but because we don't want to run the ARM core out
> > > > of spec for a short time while the PLL relocks. Assuming I have that
> > > > part of it right, I prefer for the parent mux operation to be a part
> > > > of the CPUfreq driver's .target callback instead of hidden away in the
> > > > clock driver.
> > > 
> > > The re-parenting is mostly done to keep the ARM CPU clocked while the
> > > PLL is stopped, reprogrammed and restarted. One of the side effects of
> > > that is, the clock speed of the temporary parent could be higher then
> > > what is allowed due to the ARM voltage at the time of re-parenting.
> > > That is the reason to use the safe voltage.
> > 
> > The Rockchip-SoCs use something similar, so I'm following quite closely
> > what Thomas is trying to do here, as similar solution would also solve
> > this issue for me.
> > 
> > On some Rockchip-SoCs even stuff like pclk and hclk seems to be sourced
> > from the divided armclk, creating additional constraints.
> > 
> > But on the RKs (at least in the upstream sources) the armclk is simply
> > equal to the pll output. A divider exists, but is only used to make sure
> > that the armclk stays below the original rate when sourced from the
> > temp-parent, like> 
> >         if (clk_get_rate(temp_parent) > clk_get_rate(main_parent)
> >         
> >                 set_divider(something so that rate(temp) <= rate(main)
> >         
> >         clk_set_parent(...)
> > 
> > Isn't there a similar possiblity on your platform, as it would remove the
> > need for the safe-voltage?
> > 
> > 
> > In general I also like the approach of hiding the rate-change logic inside
> > this composite clock, as the depending clocks can be easily kept in sync.
> > As with the Rockchips the depending clocks are different for each of the
> > three Cortex-A9 SoCs I looked at, it would be "interesting" if all of
> > this would need to be done in a cpufreq driver.
> 
> I wonder if hiding these details inside of the composite clock
> implementation indicates the lack of some needed feature in the clk
> core? I've discussed the idea of "coordinated rate changes" before. E.g:
> 
> _________________________________________________________
> 
> |  clk	|  opp-low	|  opp-mid	|  opp-fast	|
> |
> |pll	| 300000	|  600000	|  600000	|
> |
> |div	| 150000	|  300000	|  600000	|
> |
> |mpu_clk| 150000	|  300000       |  600000	|
> |
> |periph	| 150000	|  150000	|  300000	|
> 
> ---------------------------------------------------------
> 
> A call to clk_set_rate() against any of those clocks will result in all
> of their dividers being updated. At the implementation level this might
> look something like this extremely simplified pseudocode:
> 
> int clk_set_rate(struct clk* clk, unsigned long rate)
> {
> 	/* trap clks that support coordinated rate changes */
> 	if (clk->ops->coordinate_rate)
> 		return clk->ops->coordinate_rate(clk->hw, rate);
> 	...
> }
> 
> and,
> 
> struct coord_rates {
> 	struct clk_hw *hw;
> 	struct clk *parent;
> 	struct clk *rate;
> };
> 
> and in the clock driver,
> 
> #define PLL 0
> #define DIV 1
> #define MPU 2
> #define PER 3
> 
> #define NR_OPP 4
> #define NR_CLK 4
> 
> struct coord_rates my_opps[NR_OPP][NR_CLK]; // populated from DT data
> 
> int my_coordinate_rate_callback(struct clk_hw *hw, unsigned long rate)
> {
> 	struct coord_rate **selected_opp;
> 
> 	for(i = 0; i < NR_OPP; i++) {
> 		for(j = 0; j < NR_CLK; j++) {
> 			if (my_opps[i][j]->hw == hw &&
> 				my_opps[i][j]->rate == rate)
> 				selected_opp = my_opps[i];
> 				break;
> 		}
> 	}
> 
> 	/*
> 	 * order of operations is specific to my hardware and should be
> 	 * managed by my clock driver, not generic code
> 	 */
> 
> 	__clk_set_parent(selected_opp[DIV]->hw, temp_parent);
> 	__clk_set_rate(selected_opp[PLL]->hw, selected_opp[PLL]->rate);
> 	__clk_set_parent(selected_opp[DIV]->hw,
> 				selected_opp[PLL]->hw->clk);
> 	...
> 
> 	/*
> 	 * note that the above could be handled by a switch-case or
> 	 * something else
> 	 */
> }
> 
> Thoughts? Please forgive any gaps in my logic or abuse of C.
> 
> I have long thought that something like the above would someday go into
> a generic dvfs layer instead of the clock framework, but maybe starting
> with the clk framework makes more sense?

Similar to Thomas, this looks like the thing I'd need for my core clocks.

Also to me this really looks like something belonging to the clock framework, 
as we at this point really only have some clocks that in all cases need to be 
set together, independent of it beeing embedded in some scaling context or 
something else.


Heiko
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
index f055515..37453ab 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-cpu0.txt
@@ -19,6 +19,12 @@  Optional properties:
 - cooling-min-level:
 - cooling-max-level:
      Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.
+- safe-opp: Certain platforms require that during a opp transition,
+  a system should not go below a particular opp level. For such systems,
+  this property specifies the minimum opp to be maintained during the
+  opp transitions. The safe-opp value is a tuple with first element
+  representing the safe frequency and the second element representing the
+  safe voltage.
 
 Examples:
 
@@ -36,6 +42,7 @@  cpus {
 			396000  950000
 			198000  850000
 		>;
+		safe-opp = <396000 950000>
 		clock-latency = <61036>; /* two CLK32 periods */
 		#cooling-cells = <2>;
 		cooling-min-level = <0>;
diff --git a/drivers/cpufreq/cpufreq-cpu0.c b/drivers/cpufreq/cpufreq-cpu0.c
index 0c12ffc..075d3d1 100644
--- a/drivers/cpufreq/cpufreq-cpu0.c
+++ b/drivers/cpufreq/cpufreq-cpu0.c
@@ -27,6 +27,8 @@ 
 
 static unsigned int transition_latency;
 static unsigned int voltage_tolerance; /* in percentage */
+static unsigned long safe_frequency;
+static unsigned long safe_voltage;
 
 static struct device *cpu_dev;
 static struct clk *cpu_clk;
@@ -64,17 +66,30 @@  static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
 		volt_old = regulator_get_voltage(cpu_reg);
 	}
 
-	pr_debug("%u MHz, %ld mV --> %u MHz, %ld mV\n",
+	pr_debug("\n\n%u MHz, %ld mV --> %u MHz, %ld mV\n",
 		 old_freq / 1000, volt_old ? volt_old / 1000 : -1,
 		 new_freq / 1000, volt ? volt / 1000 : -1);
 
 	/* scaling up?  scale voltage before frequency */
-	if (!IS_ERR(cpu_reg) && new_freq > old_freq) {
+	if (!IS_ERR(cpu_reg) && new_freq > old_freq &&
+				new_freq >= safe_frequency) {
 		ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
 		if (ret) {
 			pr_err("failed to scale voltage up: %d\n", ret);
 			return ret;
 		}
+	} else if (!IS_ERR(cpu_reg) && old_freq < safe_frequency) {
+		/*
+		 * the scaled up voltage level for the new_freq is lower
+		 * than the safe voltage level. so set safe_voltage
+		 * as the intermediate voltage level and revert it
+		 * back after the frequency has been changed.
+		 */
+		ret = regulator_set_voltage_tol(cpu_reg, safe_voltage, tol);
+		if (ret) {
+			pr_err("failed to set safe voltage: %d\n", ret);
+			return ret;
+		}
 	}
 
 	ret = clk_set_rate(cpu_clk, freq_exact);
@@ -86,7 +101,8 @@  static int cpu0_set_target(struct cpufreq_policy *policy, unsigned int index)
 	}
 
 	/* scaling down?  scale voltage after frequency */
-	if (!IS_ERR(cpu_reg) && new_freq < old_freq) {
+	if (!IS_ERR(cpu_reg) &&
+			(new_freq < old_freq || new_freq < safe_frequency)) {
 		ret = regulator_set_voltage_tol(cpu_reg, volt, tol);
 		if (ret) {
 			pr_err("failed to scale voltage down: %d\n", ret);
@@ -116,6 +132,8 @@  static struct cpufreq_driver cpu0_cpufreq_driver = {
 
 static int cpu0_cpufreq_probe(struct platform_device *pdev)
 {
+	const struct property *prop;
+	struct dev_pm_opp *opp;
 	struct device_node *np;
 	int ret;
 
@@ -165,13 +183,24 @@  static int cpu0_cpufreq_probe(struct platform_device *pdev)
 		goto out_put_node;
 	}
 
+	prop = of_find_property(np, "safe-opp", NULL);
+	if (prop) {
+		if (prop->value && (prop->length / sizeof(u32)) == 2) {
+			const __be32 *val;
+			val = prop->value;
+			safe_frequency = be32_to_cpup(val++);
+			safe_voltage = be32_to_cpup(val);
+		} else {
+			pr_err("invalid safe-opp level specified\n");
+		}
+	}
+
 	of_property_read_u32(np, "voltage-tolerance", &voltage_tolerance);
 
 	if (of_property_read_u32(np, "clock-latency", &transition_latency))
 		transition_latency = CPUFREQ_ETERNAL;
 
 	if (!IS_ERR(cpu_reg)) {
-		struct dev_pm_opp *opp;
 		unsigned long min_uV, max_uV;
 		int i;