diff mbox

[v2,06/14] ARM: sun8i: clk: Add clk-factor rate application method

Message ID 20160625034511.7966-7-megous@megous.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ondřej Jirman June 25, 2016, 3:45 a.m. UTC
From: Ondrej Jirman <megous@megous.com>

PLL1 on H3 requires special factors application algorithm,
when the rate is changed. This algorithm was extracted
from the arisc code that handles frequency scaling
in the BSP kernel.

This commit adds optional apply function to
struct factors_data, that can implement non-trivial
factors application method, when necessary.

Also struct clk_factors_config is extended with position
of the PLL lock flag.

Signed-off-by: Ondrej Jirman <megous@megous.com>
---
 Documentation/devicetree/bindings/clock/sunxi.txt |  1 +
 drivers/clk/sunxi/clk-factors.c                   | 34 +++++------
 drivers/clk/sunxi/clk-factors.h                   | 12 ++++
 drivers/clk/sunxi/clk-sunxi.c                     | 72 ++++++++++++++++++++++-
 4 files changed, 98 insertions(+), 21 deletions(-)

Comments

Maxime Ripard June 30, 2016, 8:40 p.m. UTC | #1
Hi,

On Sat, Jun 25, 2016 at 05:45:03AM +0200, megous@megous.com wrote:
> From: Ondrej Jirman <megous@megous.com>
> 
> PLL1 on H3 requires special factors application algorithm,
> when the rate is changed. This algorithm was extracted
> from the arisc code that handles frequency scaling
> in the BSP kernel.
> 
> This commit adds optional apply function to
> struct factors_data, that can implement non-trivial
> factors application method, when necessary.
> 
> Also struct clk_factors_config is extended with position
> of the PLL lock flag.

Have you tested the current implementation, and found that it was not
working, or did you duplicate the arisc code directly?

>  /**
> + * sun8i_h3_apply_pll1_factors() - applies n, k, m, p factors to the
> + * register using an algorithm that tries to reserve the PLL lock
> + */
> +
> +static void sun8i_h3_apply_pll1_factors(struct clk_factors *factors, struct factors_request *req)
> +{
> +	const struct clk_factors_config *config = factors->config;
> +	u32 reg;
> +
> +	/* Fetch the register value */
> +	reg = readl(factors->reg);
> +
> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) < req->p) {
> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
> +
> +		writel(reg, factors->reg);
> +		__delay(2000);
> +	}

So there was some doubts about the fact that P was being used, or at
least that it was useful.

> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) < req->m) {
> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
> +
> +		writel(reg, factors->reg);
> +		__delay(2000);
> +	}
> +
> +	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req->n);
> +	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req->k);
> +
> +	writel(reg, factors->reg);
> +	__delay(20);
> +
> +	while (!(readl(factors->reg) & (1 << config->lock)));

So, they are applying the dividers first, and then applying the
multipliers, and then wait for the PLL to stabilize.

> +
> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) > req->m) {
> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
> +
> +		writel(reg, factors->reg);
> +		__delay(2000);
> +	}
> +
> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) > req->p) {
> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
> +
> +		writel(reg, factors->reg);
> +		__delay(2000);
> +	}

However, this is kind of weird, why would you need to re-apply the
dividers? Nothing really changes. Have you tried without that part?

Since this is really specific, I guess you could simply make the
clk_ops for the nkmp clocks public, and just re-implement set_rate
using that logic.

You might also need to set an upper limit on P, since the last value
(4) is not a valid one.

I guess you could do that by adding a max field in the __ccu_div
structure.

Maxime
Ondřej Jirman July 1, 2016, 12:50 a.m. UTC | #2
Hi,

On 30.6.2016 22:40, Maxime Ripard wrote:
> Hi,
> 
> On Sat, Jun 25, 2016 at 05:45:03AM +0200, megous@megous.com wrote:
>> From: Ondrej Jirman <megous@megous.com>
>>
>> PLL1 on H3 requires special factors application algorithm,
>> when the rate is changed. This algorithm was extracted
>> from the arisc code that handles frequency scaling
>> in the BSP kernel.
>>
>> This commit adds optional apply function to
>> struct factors_data, that can implement non-trivial
>> factors application method, when necessary.
>>
>> Also struct clk_factors_config is extended with position
>> of the PLL lock flag.
> 
> Have you tested the current implementation, and found that it was not
> working, or did you duplicate the arisc code directly?

I have tested the current implementation, and it was not working. It
depended on some other factors, like the initial setup done by u-boot.
It didn't work reliably.

Then I reverse engineered arisc, in an effort to see what's the
difference, between mainline and BSP code.

>>  /**
>> + * sun8i_h3_apply_pll1_factors() - applies n, k, m, p factors to the
>> + * register using an algorithm that tries to reserve the PLL lock
>> + */
>> +
>> +static void sun8i_h3_apply_pll1_factors(struct clk_factors *factors, struct factors_request *req)
>> +{
>> +	const struct clk_factors_config *config = factors->config;
>> +	u32 reg;
>> +
>> +	/* Fetch the register value */
>> +	reg = readl(factors->reg);
>> +
>> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) < req->p) {
>> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
>> +
>> +		writel(reg, factors->reg);
>> +		__delay(2000);
>> +	}
> 
> So there was some doubts about the fact that P was being used, or at
> least that it was useful.

p is necessary to reduce frequencies below 288 MHz according to the
datasheet.

>> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) < req->m) {
>> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
>> +
>> +		writel(reg, factors->reg);
>> +		__delay(2000);
>> +	}
>> +
>> +	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req->n);
>> +	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req->k);
>> +
>> +	writel(reg, factors->reg);
>> +	__delay(20);
>> +
>> +	while (!(readl(factors->reg) & (1 << config->lock)));
> 
> So, they are applying the dividers first, and then applying the
> multipliers, and then wait for the PLL to stabilize.

Not exactly, first we are increasing dividers if the new dividers are
higher that that what's already set. This ensures that because
application of dividers is immediate by the design of the PLL, the
application of multipliers isn't. So the VCO would still run at the same
frequency for a while gradually rising to a new value for example,
while the dividers would be reduced immediately. Leading to crash.

PLL
--------------------------
PRE DIV(f0) -> VCO(f1) -> POST DIV(f2)
   P             K,N           M

Example: (we set all factors at once, reducing dividers and multipliers
at the same time at 0ms - this should lead to no change in the output
frequency, but...)

-1ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz
 0ms: f0 = 24MHz, f1 = 2GHz,   f2 = 2GHz       - boom
 1ms: f0 = 24MHz, f1 = 1.5GHz, f2 = 1.5GHz
 2ms: f0 = 24MHz, f1 = 1GHz,   f2 = 1GHz

The current code crashes exactly at boom, you don't get any more
instructions to execute.

See.

So this patch first increases dividers (only if necessary), changes
multipliers and waits for change to happen (takes around 2000 cycles),
and then decreases dividers (only if necessary).

So we get:

-1ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz
 0ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz   - no boom, multiplier
                                             reduced
 1ms: f0 = 24MHz, f1 = 1.5GHz, f2 = 0.75GHz
1.9ms: f0 = 24MHz, f1 = 1GHz,   f2 = 0.5GHz - we got PLL sync
 2ms: f0 = 24MHz, f1 = 1GHz,   f2 = 1GHz   - and here we reduce divider
at last

>> +
>> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) > req->m) {
>> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
>> +
>> +		writel(reg, factors->reg);
>> +		__delay(2000);
>> +	}
>> +
>> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) > req->p) {
>> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
>> +
>> +		writel(reg, factors->reg);
>> +		__delay(2000);
>> +	}
> 
> However, this is kind of weird, why would you need to re-apply the
> dividers? Nothing really changes. Have you tried without that part?

See above, we either increase before PLL change, or reduce dividers
after the change. Nothing is re-applied.

> Since this is really specific, I guess you could simply make the
> clk_ops for the nkmp clocks public, and just re-implement set_rate
> using that logic.

I would argue that this may be necessary for other PLL clocks too, if
you can get out of bounds output frequency, by changing the dividers too
early or too late. So perhaps this code should be generalized for other
PLL clocks too, instead.

> 
> You might also need to set an upper limit on P, since the last value
> (4) is not a valid one.

I think, that should be done by the factors calculation function already.

> I guess you could do that by adding a max field in the __ccu_div
> structure.
> 
> Maxime
> 

regards,
  Ondrej
Ondřej Jirman July 1, 2016, 12:53 a.m. UTC | #3
On 30.6.2016 22:40, Maxime Ripard wrote:
> Hi,
> 
> On Sat, Jun 25, 2016 at 05:45:03AM +0200, megous@megous.com wrote:
>> From: Ondrej Jirman <megous@megous.com>
>>
>> PLL1 on H3 requires special factors application algorithm,
>> when the rate is changed. This algorithm was extracted
>> from the arisc code that handles frequency scaling
>> in the BSP kernel.
>>
>> This commit adds optional apply function to
>> struct factors_data, that can implement non-trivial
>> factors application method, when necessary.
>>
>> Also struct clk_factors_config is extended with position
>> of the PLL lock flag.
> 
> Have you tested the current implementation, and found that it was not
> working, or did you duplicate the arisc code directly?

Also of note is that similar code probably doesn't crash in u-boot,
because there, before changing the PLL1 clock, the cpu is switched to
24MHz osc, so it is not overclocked, even if factors align in such a way
that you'd get the behavior I described in the other email.

>>  /**
>> + * sun8i_h3_apply_pll1_factors() - applies n, k, m, p factors to the
>> + * register using an algorithm that tries to reserve the PLL lock
>> + */
>> +
>> +static void sun8i_h3_apply_pll1_factors(struct clk_factors *factors, struct factors_request *req)
>> +{
>> +	const struct clk_factors_config *config = factors->config;
>> +	u32 reg;
>> +
>> +	/* Fetch the register value */
>> +	reg = readl(factors->reg);
>> +
>> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) < req->p) {
>> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
>> +
>> +		writel(reg, factors->reg);
>> +		__delay(2000);
>> +	}
> 
> So there was some doubts about the fact that P was being used, or at
> least that it was useful.
> 
>> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) < req->m) {
>> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
>> +
>> +		writel(reg, factors->reg);
>> +		__delay(2000);
>> +	}
>> +
>> +	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req->n);
>> +	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req->k);
>> +
>> +	writel(reg, factors->reg);
>> +	__delay(20);
>> +
>> +	while (!(readl(factors->reg) & (1 << config->lock)));
> 
> So, they are applying the dividers first, and then applying the
> multipliers, and then wait for the PLL to stabilize.
> 
>> +
>> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) > req->m) {
>> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
>> +
>> +		writel(reg, factors->reg);
>> +		__delay(2000);
>> +	}
>> +
>> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) > req->p) {
>> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
>> +
>> +		writel(reg, factors->reg);
>> +		__delay(2000);
>> +	}
> 
> However, this is kind of weird, why would you need to re-apply the
> dividers? Nothing really changes. Have you tried without that part?
> 
> Since this is really specific, I guess you could simply make the
> clk_ops for the nkmp clocks public, and just re-implement set_rate
> using that logic.
> 
> You might also need to set an upper limit on P, since the last value
> (4) is not a valid one.
> 
> I guess you could do that by adding a max field in the __ccu_div
> structure.
> 
> Maxime
>
Jean-Francois Moine July 1, 2016, 5:37 a.m. UTC | #4
On Fri, 1 Jul 2016 02:50:57 +0200
Ondřej Jirman <megous@megous.com> wrote:

> > Since this is really specific, I guess you could simply make the
> > clk_ops for the nkmp clocks public, and just re-implement set_rate
> > using that logic.
> 
> I would argue that this may be necessary for other PLL clocks too, if
> you can get out of bounds output frequency, by changing the dividers too
> early or too late. So perhaps this code should be generalized for other
> PLL clocks too, instead.

The documentation says that only the CPU and DDR PLLs can be dynamically
changed after boot.
Ondřej Jirman July 1, 2016, 6:34 a.m. UTC | #5
On 1.7.2016 07:37, Jean-Francois Moine wrote:
> On Fri, 1 Jul 2016 02:50:57 +0200
> Ondřej Jirman <megous@megous.com> wrote:
> 
>>> Since this is really specific, I guess you could simply make the
>>> clk_ops for the nkmp clocks public, and just re-implement set_rate
>>> using that logic.
>>
>> I would argue that this may be necessary for other PLL clocks too, if
>> you can get out of bounds output frequency, by changing the dividers too
>> early or too late. So perhaps this code should be generalized for other
>> PLL clocks too, instead.
> 
> The documentation says that only the CPU and DDR PLLs can be dynamically
> changed after boot.

The question is what exactly is meant by after boot. :) Anyway, if the
kernel has no business changing some other PLLs, if there's code for
changing them, should it be dropped?

regards,
  Ondrej
Jean-Francois Moine July 1, 2016, 7:47 a.m. UTC | #6
On Fri, 1 Jul 2016 08:34:21 +0200
Ondřej Jirman <megous@megous.com> wrote:

> > The documentation says that only the CPU and DDR PLLs can be dynamically
> > changed after boot.
> 
> The question is what exactly is meant by after boot. :) Anyway, if the
> kernel has no business changing some other PLLs, if there's code for
> changing them, should it be dropped?

No, because all the other PLLs may not be initialized by the U-boot
(audio, video, gpu...), and also, their rate may be changed safely by
stopping them (gate).
Maxime Ripard July 15, 2016, 8:19 a.m. UTC | #7
On Fri, Jul 01, 2016 at 02:53:52AM +0200, Ondřej Jirman wrote:
> On 30.6.2016 22:40, Maxime Ripard wrote:
> > Hi,
> > 
> > On Sat, Jun 25, 2016 at 05:45:03AM +0200, megous@megous.com wrote:
> >> From: Ondrej Jirman <megous@megous.com>
> >>
> >> PLL1 on H3 requires special factors application algorithm,
> >> when the rate is changed. This algorithm was extracted
> >> from the arisc code that handles frequency scaling
> >> in the BSP kernel.
> >>
> >> This commit adds optional apply function to
> >> struct factors_data, that can implement non-trivial
> >> factors application method, when necessary.
> >>
> >> Also struct clk_factors_config is extended with position
> >> of the PLL lock flag.
> > 
> > Have you tested the current implementation, and found that it was not
> > working, or did you duplicate the arisc code directly?
> 
> Also of note is that similar code probably doesn't crash in u-boot,
> because there, before changing the PLL1 clock, the cpu is switched to
> 24MHz osc, so it is not overclocked, even if factors align in such a way
> that you'd get the behavior I described in the other email.

That's also something that we can do.

See Meson's clk-cpu clock notifiers for example.

Maxime
Maxime Ripard July 15, 2016, 8:53 a.m. UTC | #8
On Fri, Jul 01, 2016 at 02:50:57AM +0200, Ondřej Jirman wrote:
> >>  /**
> >> + * sun8i_h3_apply_pll1_factors() - applies n, k, m, p factors to the
> >> + * register using an algorithm that tries to reserve the PLL lock
> >> + */
> >> +
> >> +static void sun8i_h3_apply_pll1_factors(struct clk_factors *factors, struct factors_request *req)
> >> +{
> >> +	const struct clk_factors_config *config = factors->config;
> >> +	u32 reg;
> >> +
> >> +	/* Fetch the register value */
> >> +	reg = readl(factors->reg);
> >> +
> >> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) < req->p) {
> >> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
> >> +
> >> +		writel(reg, factors->reg);
> >> +		__delay(2000);
> >> +	}
> > 
> > So there was some doubts about the fact that P was being used, or at
> > least that it was useful.
> 
> p is necessary to reduce frequencies below 288 MHz according to the
> datasheet.

Yes, but you could reach those frequencies without P, too, and it's
not part of any OPP provided by Allwinner.

> >> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) < req->m) {
> >> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
> >> +
> >> +		writel(reg, factors->reg);
> >> +		__delay(2000);
> >> +	}
> >> +
> >> +	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req->n);
> >> +	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req->k);
> >> +
> >> +	writel(reg, factors->reg);
> >> +	__delay(20);
> >> +
> >> +	while (!(readl(factors->reg) & (1 << config->lock)));
> > 
> > So, they are applying the dividers first, and then applying the
> > multipliers, and then wait for the PLL to stabilize.
> 
> Not exactly, first we are increasing dividers if the new dividers are
> higher that that what's already set. This ensures that because
> application of dividers is immediate by the design of the PLL, the
> application of multipliers isn't. So the VCO would still run at the same
> frequency for a while gradually rising to a new value for example,
> while the dividers would be reduced immediately. Leading to crash.
> 
> PLL
> --------------------------
> PRE DIV(f0) -> VCO(f1) -> POST DIV(f2)
>    P             K,N           M
> 
> Example: (we set all factors at once, reducing dividers and multipliers
> at the same time at 0ms - this should lead to no change in the output
> frequency, but...)
> 
> -1ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz
>  0ms: f0 = 24MHz, f1 = 2GHz,   f2 = 2GHz       - boom
>  1ms: f0 = 24MHz, f1 = 1.5GHz, f2 = 1.5GHz
>  2ms: f0 = 24MHz, f1 = 1GHz,   f2 = 1GHz
> 
> The current code crashes exactly at boom, you don't get any more
> instructions to execute.
> 
> See.
> 
> So this patch first increases dividers (only if necessary), changes
> multipliers and waits for change to happen (takes around 2000 cycles),
> and then decreases dividers (only if necessary).
> 
> So we get:
> 
> -1ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz
>  0ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz   - no boom, multiplier
>                                              reduced
>  1ms: f0 = 24MHz, f1 = 1.5GHz, f2 = 0.75GHz
> 1.9ms: f0 = 24MHz, f1 = 1GHz,   f2 = 0.5GHz - we got PLL sync
>  2ms: f0 = 24MHz, f1 = 1GHz,   f2 = 1GHz   - and here we reduce divider
> at last

Awesome explanation, thanks!

So I guess it really all boils down to the fact that the CPU is
clocked way outside of it's operating frequency while the PLL
stabilizes, right?

If so, then yes, trying to switch to the 24MHz oscillator before
applying the factors, and then switching back when the PLL is stable
would be a nice solution.

I just checked, and all the SoCs we've had so far have that
possibility, so if it works, for now, I'd like to stick to that.

> >> +
> >> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) > req->m) {
> >> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
> >> +
> >> +		writel(reg, factors->reg);
> >> +		__delay(2000);
> >> +	}
> >> +
> >> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) > req->p) {
> >> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
> >> +
> >> +		writel(reg, factors->reg);
> >> +		__delay(2000);
> >> +	}
> > 
> > However, this is kind of weird, why would you need to re-apply the
> > dividers? Nothing really changes. Have you tried without that part?
> 
> See above, we either increase before PLL change, or reduce dividers
> after the change. Nothing is re-applied.
> 
> > Since this is really specific, I guess you could simply make the
> > clk_ops for the nkmp clocks public, and just re-implement set_rate
> > using that logic.
> 
> I would argue that this may be necessary for other PLL clocks too, if
> you can get out of bounds output frequency, by changing the dividers too
> early or too late. So perhaps this code should be generalized for other
> PLL clocks too, instead.

We can scale down the problem a bit. The only PLL we modify are the
CPU, audio and video ones.

The CPU should definitely be addressed, but what would be the outcome
of an out of bounds audio pll for example? Is it just taking more time
to stabilize, or would it hurt the system?

In the former case, then we can just wait. In the latter, we of course
need to come up with a solution.

Maxime
Ondřej Jirman July 15, 2016, 10:38 a.m. UTC | #9
On 15.7.2016 10:53, Maxime Ripard wrote:
> On Fri, Jul 01, 2016 at 02:50:57AM +0200, Ondřej Jirman wrote:
>>>>  /**
>>>> + * sun8i_h3_apply_pll1_factors() - applies n, k, m, p factors to the
>>>> + * register using an algorithm that tries to reserve the PLL lock
>>>> + */
>>>> +
>>>> +static void sun8i_h3_apply_pll1_factors(struct clk_factors *factors, struct factors_request *req)
>>>> +{
>>>> +	const struct clk_factors_config *config = factors->config;
>>>> +	u32 reg;
>>>> +
>>>> +	/* Fetch the register value */
>>>> +	reg = readl(factors->reg);
>>>> +
>>>> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) < req->p) {
>>>> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
>>>> +
>>>> +		writel(reg, factors->reg);
>>>> +		__delay(2000);
>>>> +	}
>>>
>>> So there was some doubts about the fact that P was being used, or at
>>> least that it was useful.
>>
>> p is necessary to reduce frequencies below 288 MHz according to the
>> datasheet.
> 
> Yes, but you could reach those frequencies without P, too, and it's
> not part of any OPP provided by Allwinner.

The arisc firmware for H3 contains table of factors for frequences from
0 to 2GHz and, P is used below 240MHz. M is never used, BTW. (other
datasheets specify M as for testing use only, whatever that means - not
H3, but it seems it's the same PLL block)


... snip ...
	{ .n = 9, .k = 0, .m = 0, .p = 2 }, // 60 => 60 MHz
	{ .n = 10, .k = 0, .m = 0, .p = 2 }, // 66 => 66 MHz
	{ .n = 11, .k = 0, .m = 0, .p = 2 }, // 72 => 72 MHz
	{ .n = 12, .k = 0, .m = 0, .p = 2 }, // 78 => 78 MHz
	{ .n = 13, .k = 0, .m = 0, .p = 2 }, // 84 => 84 MHz
	{ .n = 14, .k = 0, .m = 0, .p = 2 }, // 90 => 90 MHz
	{ .n = 15, .k = 0, .m = 0, .p = 2 }, // 96 => 96 MHz
	{ .n = 16, .k = 0, .m = 0, .p = 2 }, // 102 => 102 MHz
	{ .n = 17, .k = 0, .m = 0, .p = 2 }, // 108 => 108 MHz
	{ .n = 18, .k = 0, .m = 0, .p = 2 }, // 114 => 114 MHz
	{ .n = 9, .k = 0, .m = 0, .p = 1 }, // 120 => 120 MHz
	{ .n = 10, .k = 0, .m = 0, .p = 1 }, // 126 => 132 MHz
	{ .n = 10, .k = 0, .m = 0, .p = 1 }, // 132 => 132 MHz
	{ .n = 11, .k = 0, .m = 0, .p = 1 }, // 138 => 144 MHz
	{ .n = 11, .k = 0, .m = 0, .p = 1 }, // 144 => 144 MHz
	{ .n = 12, .k = 0, .m = 0, .p = 1 }, // 150 => 156 MHz
	{ .n = 12, .k = 0, .m = 0, .p = 1 }, // 156 => 156 MHz
	{ .n = 13, .k = 0, .m = 0, .p = 1 }, // 162 => 168 MHz
	{ .n = 13, .k = 0, .m = 0, .p = 1 }, // 168 => 168 MHz
	{ .n = 14, .k = 0, .m = 0, .p = 1 }, // 174 => 180 MHz
	{ .n = 14, .k = 0, .m = 0, .p = 1 }, // 180 => 180 MHz
	{ .n = 15, .k = 0, .m = 0, .p = 1 }, // 186 => 192 MHz
	{ .n = 15, .k = 0, .m = 0, .p = 1 }, // 192 => 192 MHz
	{ .n = 16, .k = 0, .m = 0, .p = 1 }, // 198 => 204 MHz
	{ .n = 16, .k = 0, .m = 0, .p = 1 }, // 204 => 204 MHz
	{ .n = 17, .k = 0, .m = 0, .p = 1 }, // 210 => 216 MHz
	{ .n = 17, .k = 0, .m = 0, .p = 1 }, // 216 => 216 MHz
	{ .n = 18, .k = 0, .m = 0, .p = 1 }, // 222 => 228 MHz
	{ .n = 18, .k = 0, .m = 0, .p = 1 }, // 228 => 228 MHz
	{ .n = 9, .k = 0, .m = 0, .p = 0 }, // 234 => 240 MHz
	{ .n = 9, .k = 0, .m = 0, .p = 0 }, // 240 => 240 MHz
	{ .n = 10, .k = 0, .m = 0, .p = 0 }, // 246 => 264 MHz
	{ .n = 10, .k = 0, .m = 0, .p = 0 }, // 252 => 264 MHz
	{ .n = 10, .k = 0, .m = 0, .p = 0 }, // 258 => 264 MHz
	{ .n = 10, .k = 0, .m = 0, .p = 0 }, // 264 => 264 MHz
	{ .n = 11, .k = 0, .m = 0, .p = 0 }, // 270 => 288 MHz
	{ .n = 11, .k = 0, .m = 0, .p = 0 }, // 276 => 288 MHz
... snip ...


>>>> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) < req->m) {
>>>> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
>>>> +
>>>> +		writel(reg, factors->reg);
>>>> +		__delay(2000);
>>>> +	}
>>>> +
>>>> +	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req->n);
>>>> +	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req->k);
>>>> +
>>>> +	writel(reg, factors->reg);
>>>> +	__delay(20);
>>>> +
>>>> +	while (!(readl(factors->reg) & (1 << config->lock)));
>>>
>>> So, they are applying the dividers first, and then applying the
>>> multipliers, and then wait for the PLL to stabilize.
>>
>> Not exactly, first we are increasing dividers if the new dividers are
>> higher that that what's already set. This ensures that because
>> application of dividers is immediate by the design of the PLL, the
>> application of multipliers isn't. So the VCO would still run at the same
>> frequency for a while gradually rising to a new value for example,
>> while the dividers would be reduced immediately. Leading to crash.
>>
>> PLL
>> --------------------------
>> PRE DIV(f0) -> VCO(f1) -> POST DIV(f2)
>>    P             K,N           M
>>
>> Example: (we set all factors at once, reducing dividers and multipliers
>> at the same time at 0ms - this should lead to no change in the output
>> frequency, but...)
>>
>> -1ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz
>>  0ms: f0 = 24MHz, f1 = 2GHz,   f2 = 2GHz       - boom
>>  1ms: f0 = 24MHz, f1 = 1.5GHz, f2 = 1.5GHz
>>  2ms: f0 = 24MHz, f1 = 1GHz,   f2 = 1GHz
>>
>> The current code crashes exactly at boom, you don't get any more
>> instructions to execute.
>>
>> See.
>>
>> So this patch first increases dividers (only if necessary), changes
>> multipliers and waits for change to happen (takes around 2000 cycles),
>> and then decreases dividers (only if necessary).
>>
>> So we get:
>>
>> -1ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz
>>  0ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz   - no boom, multiplier
>>                                              reduced
>>  1ms: f0 = 24MHz, f1 = 1.5GHz, f2 = 0.75GHz
>> 1.9ms: f0 = 24MHz, f1 = 1GHz,   f2 = 0.5GHz - we got PLL sync
>>  2ms: f0 = 24MHz, f1 = 1GHz,   f2 = 1GHz   - and here we reduce divider
>> at last
> 
> Awesome explanation, thanks!
> 
> So I guess it really all boils down to the fact that the CPU is
> clocked way outside of it's operating frequency while the PLL
> stabilizes, right?

It may be, depending on the factors before and after change. I haven't
tested what factors the mainline kernel calculates for each frequency.
The arisc never uses M, and only uses P at frequencies that would not
allow for this behavior.

I created a test program for arisc that runs a loop on the main CPU
using msgbox to send pings to the arisc CPU, and the vary the PLL1
randomly from the arisc, and haven't been able to lockup the main CPU
yet with either method.

There's also AXI clock, that depends on PLL1. Arisc firmware uses the
same method to change it while changing CPUX clock. If the clock would
rise above certain frequency, AXI divider is increased before the PLL1
change. If it would fall below certain frequency it is decreased after
the PLL1 change.

Though, aw kernel has axi divider set to 3 for all PLL1 frequencies, so
this has no effect in practice.

It's all smoke and mirrors. :D

> If so, then yes, trying to switch to the 24MHz oscillator before
> applying the factors, and then switching back when the PLL is stable
> would be a nice solution.
> 
> I just checked, and all the SoCs we've had so far have that
> possibility, so if it works, for now, I'd like to stick to that.

It would need to be tested. U-boot does the change only once, while the
kernel would be doing it all the time and between various frequencies
and PLL settings. So the issues may show up with this solution too.

>>>> +
>>>> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) > req->m) {
>>>> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
>>>> +
>>>> +		writel(reg, factors->reg);
>>>> +		__delay(2000);
>>>> +	}
>>>> +
>>>> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) > req->p) {
>>>> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
>>>> +
>>>> +		writel(reg, factors->reg);
>>>> +		__delay(2000);
>>>> +	}
>>>
>>> However, this is kind of weird, why would you need to re-apply the
>>> dividers? Nothing really changes. Have you tried without that part?
>>
>> See above, we either increase before PLL change, or reduce dividers
>> after the change. Nothing is re-applied.
>>
>>> Since this is really specific, I guess you could simply make the
>>> clk_ops for the nkmp clocks public, and just re-implement set_rate
>>> using that logic.
>>
>> I would argue that this may be necessary for other PLL clocks too, if
>> you can get out of bounds output frequency, by changing the dividers too
>> early or too late. So perhaps this code should be generalized for other
>> PLL clocks too, instead.
> 
> We can scale down the problem a bit. The only PLL we modify are the
> CPU, audio and video ones.
> 
> The CPU should definitely be addressed, but what would be the outcome
> of an out of bounds audio pll for example? Is it just taking more time
> to stabilize, or would it hurt the system?

I have no idea. Given the low frequencies, you'll probably just get some
transient audio noise.

> In the former case, then we can just wait. In the latter, we of course
> need to come up with a solution.
> 
> Maxime
> 
>
Jean-Francois Moine July 15, 2016, 1:27 p.m. UTC | #10
On Fri, 15 Jul 2016 12:38:54 +0200
Ondřej Jirman <megous@megous.com> wrote:

> > If so, then yes, trying to switch to the 24MHz oscillator before
> > applying the factors, and then switching back when the PLL is stable
> > would be a nice solution.
> > 
> > I just checked, and all the SoCs we've had so far have that
> > possibility, so if it works, for now, I'd like to stick to that.
> 
> It would need to be tested. U-boot does the change only once, while the
> kernel would be doing it all the time and between various frequencies
> and PLL settings. So the issues may show up with this solution too.

I don't think this is a good idea: the CPU clock may be changed at any
time with the CPUFreq governor. I don't see the system moving from
1008MHz to 24MHz and then to 1200MHz when some computation is needed!

BTW, Ondřej, in my BPi M2+, I tried to change the CPU clock with your
code at kernel start time from 792MHz to 1008MHz, but the hardware
(arisc?) set an other value, and the system speed was lower than before
(the PLL-CPUx register is 0x90031521 on boot, I want to set it to
xxxx1410 and I read 0x91031f33 - sorry, I did not have a look at the
CPU SD pattern). Do you know why?
Ondřej Jirman July 15, 2016, 1:48 p.m. UTC | #11
On 15.7.2016 15:27, Jean-Francois Moine wrote:
> On Fri, 15 Jul 2016 12:38:54 +0200
> Ondřej Jirman <megous@megous.com> wrote:
> 
>>> If so, then yes, trying to switch to the 24MHz oscillator before
>>> applying the factors, and then switching back when the PLL is stable
>>> would be a nice solution.
>>>
>>> I just checked, and all the SoCs we've had so far have that
>>> possibility, so if it works, for now, I'd like to stick to that.
>>
>> It would need to be tested. U-boot does the change only once, while the
>> kernel would be doing it all the time and between various frequencies
>> and PLL settings. So the issues may show up with this solution too.
> 
> I don't think this is a good idea: the CPU clock may be changed at any
> time with the CPUFreq governor. I don't see the system moving from
> 1008MHz to 24MHz and then to 1200MHz when some computation is needed!

PLL lock time is around 10-20us, I'd guess based on the number of loops
in the PLL lock wait loop. So unless you'll be switching frequencies
many times per second, this should be barely noticeable.

But I'd like a different solution too.

> BTW, Ondřej, in my BPi M2+, I tried to change the CPU clock with your
> code at kernel start time from 792MHz to 1008MHz, but the hardware
> (arisc?) set an other value, and the system speed was lower than before
> (the PLL-CPUx register is 0x90031521 on boot, I want to set it to
> xxxx1410 and I read 0x91031f33 - sorry, I did not have a look at the
> CPU SD pattern). Do you know why?

No idea. Arisc shouldn't do anything, unless you load some firmware into
it, and release its reset line.
Michal Suchanek July 15, 2016, 2:22 p.m. UTC | #12
Hello,

On 15 July 2016 at 15:48, Ondřej Jirman <megous@megous.com> wrote:
>
>
> On 15.7.2016 15:27, Jean-Francois Moine wrote:
>> On Fri, 15 Jul 2016 12:38:54 +0200
>> Ondřej Jirman <megous@megous.com> wrote:
>>
>>>> If so, then yes, trying to switch to the 24MHz oscillator before
>>>> applying the factors, and then switching back when the PLL is stable
>>>> would be a nice solution.
>>>>
>>>> I just checked, and all the SoCs we've had so far have that
>>>> possibility, so if it works, for now, I'd like to stick to that.
>>>
>>> It would need to be tested. U-boot does the change only once, while the
>>> kernel would be doing it all the time and between various frequencies
>>> and PLL settings. So the issues may show up with this solution too.
>>
>> I don't think this is a good idea: the CPU clock may be changed at any
>> time with the CPUFreq governor. I don't see the system moving from
>> 1008MHz to 24MHz and then to 1200MHz when some computation is needed!
>
> PLL lock time is around 10-20us, I'd guess based on the number of loops
> in the PLL lock wait loop. So unless you'll be switching frequencies
> many times per second, this should be barely noticeable.
>
> But I'd like a different solution too.

Do you have a patch to test this?

For me changing CPU frequency on Orange Pi One always locks up the
system. I keep running it on the u-boot setup 1.08GHz at 1.1V

Thanks

Michal
Ondřej Jirman July 15, 2016, 4:33 p.m. UTC | #13
On 15.7.2016 16:22, Michal Suchanek wrote:
> Hello,
> 
> On 15 July 2016 at 15:48, Ondřej Jirman <megous@megous.com> wrote:
>>
>>
>> On 15.7.2016 15:27, Jean-Francois Moine wrote:
>>> On Fri, 15 Jul 2016 12:38:54 +0200
>>> Ondřej Jirman <megous@megous.com> wrote:
>>>
>>>>> If so, then yes, trying to switch to the 24MHz oscillator before
>>>>> applying the factors, and then switching back when the PLL is stable
>>>>> would be a nice solution.
>>>>>
>>>>> I just checked, and all the SoCs we've had so far have that
>>>>> possibility, so if it works, for now, I'd like to stick to that.
>>>>
>>>> It would need to be tested. U-boot does the change only once, while the
>>>> kernel would be doing it all the time and between various frequencies
>>>> and PLL settings. So the issues may show up with this solution too.
>>>
>>> I don't think this is a good idea: the CPU clock may be changed at any
>>> time with the CPUFreq governor. I don't see the system moving from
>>> 1008MHz to 24MHz and then to 1200MHz when some computation is needed!
>>
>> PLL lock time is around 10-20us, I'd guess based on the number of loops
>> in the PLL lock wait loop. So unless you'll be switching frequencies
>> many times per second, this should be barely noticeable.
>>
>> But I'd like a different solution too.
> 
> Do you have a patch to test this?
> 
> For me changing CPU frequency on Orange Pi One always locks up the
> system. I keep running it on the u-boot setup 1.08GHz at 1.1V

Not anymore. But it's quite simple to hack it into the
drivers/clk/sunxi/clk-factors.c:clk_factors_set_rate()

You can look at u-boot arch/arm/mach-sunxi/clock_sun6i.c:clock_set_pll1
for how to change the CPU clock source.

> Thanks
> 
> Michal
>
Maxime Ripard July 21, 2016, 9:48 a.m. UTC | #14
On Fri, Jul 15, 2016 at 12:38:54PM +0200, Ondřej Jirman wrote:
> On 15.7.2016 10:53, Maxime Ripard wrote:
> > On Fri, Jul 01, 2016 at 02:50:57AM +0200, Ondřej Jirman wrote:
> >>>>  /**
> >>>> + * sun8i_h3_apply_pll1_factors() - applies n, k, m, p factors to the
> >>>> + * register using an algorithm that tries to reserve the PLL lock
> >>>> + */
> >>>> +
> >>>> +static void sun8i_h3_apply_pll1_factors(struct clk_factors *factors, struct factors_request *req)
> >>>> +{
> >>>> +	const struct clk_factors_config *config = factors->config;
> >>>> +	u32 reg;
> >>>> +
> >>>> +	/* Fetch the register value */
> >>>> +	reg = readl(factors->reg);
> >>>> +
> >>>> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) < req->p) {
> >>>> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
> >>>> +
> >>>> +		writel(reg, factors->reg);
> >>>> +		__delay(2000);
> >>>> +	}
> >>>
> >>> So there was some doubts about the fact that P was being used, or at
> >>> least that it was useful.
> >>
> >> p is necessary to reduce frequencies below 288 MHz according to the
> >> datasheet.
> > 
> > Yes, but you could reach those frequencies without P, too, and it's
> > not part of any OPP provided by Allwinner.
> 
> The arisc firmware for H3 contains table of factors for frequences from
> 0 to 2GHz and, P is used below 240MHz. M is never used, BTW. (other
> datasheets specify M as for testing use only, whatever that means - not
> H3, but it seems it's the same PLL block)

Interesting. Which SoCs in particular?

> >>>> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) < req->m) {
> >>>> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
> >>>> +
> >>>> +		writel(reg, factors->reg);
> >>>> +		__delay(2000);
> >>>> +	}
> >>>> +
> >>>> +	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req->n);
> >>>> +	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req->k);
> >>>> +
> >>>> +	writel(reg, factors->reg);
> >>>> +	__delay(20);
> >>>> +
> >>>> +	while (!(readl(factors->reg) & (1 << config->lock)));
> >>>
> >>> So, they are applying the dividers first, and then applying the
> >>> multipliers, and then wait for the PLL to stabilize.
> >>
> >> Not exactly, first we are increasing dividers if the new dividers are
> >> higher that that what's already set. This ensures that because
> >> application of dividers is immediate by the design of the PLL, the
> >> application of multipliers isn't. So the VCO would still run at the same
> >> frequency for a while gradually rising to a new value for example,
> >> while the dividers would be reduced immediately. Leading to crash.
> >>
> >> PLL
> >> --------------------------
> >> PRE DIV(f0) -> VCO(f1) -> POST DIV(f2)
> >>    P             K,N           M
> >>
> >> Example: (we set all factors at once, reducing dividers and multipliers
> >> at the same time at 0ms - this should lead to no change in the output
> >> frequency, but...)
> >>
> >> -1ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz
> >>  0ms: f0 = 24MHz, f1 = 2GHz,   f2 = 2GHz       - boom
> >>  1ms: f0 = 24MHz, f1 = 1.5GHz, f2 = 1.5GHz
> >>  2ms: f0 = 24MHz, f1 = 1GHz,   f2 = 1GHz
> >>
> >> The current code crashes exactly at boom, you don't get any more
> >> instructions to execute.
> >>
> >> See.
> >>
> >> So this patch first increases dividers (only if necessary), changes
> >> multipliers and waits for change to happen (takes around 2000 cycles),
> >> and then decreases dividers (only if necessary).
> >>
> >> So we get:
> >>
> >> -1ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz
> >>  0ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz   - no boom, multiplier
> >>                                              reduced
> >>  1ms: f0 = 24MHz, f1 = 1.5GHz, f2 = 0.75GHz
> >> 1.9ms: f0 = 24MHz, f1 = 1GHz,   f2 = 0.5GHz - we got PLL sync
> >>  2ms: f0 = 24MHz, f1 = 1GHz,   f2 = 1GHz   - and here we reduce divider
> >> at last
> > 
> > Awesome explanation, thanks!
> > 
> > So I guess it really all boils down to the fact that the CPU is
> > clocked way outside of it's operating frequency while the PLL
> > stabilizes, right?
> 
> It may be, depending on the factors before and after change. I haven't
> tested what factors the mainline kernel calculates for each frequency.
> The arisc never uses M, and only uses P at frequencies that would not
> allow for this behavior.
> 
> I created a test program for arisc that runs a loop on the main CPU
> using msgbox to send pings to the arisc CPU, and the vary the PLL1
> randomly from the arisc, and haven't been able to lockup the main CPU
> yet with either method.
> 
> There's also AXI clock, that depends on PLL1. Arisc firmware uses the
> same method to change it while changing CPUX clock. If the clock would
> rise above certain frequency, AXI divider is increased before the PLL1
> change. If it would fall below certain frequency it is decreased after
> the PLL1 change.

If we ever need to change that, we can always rely on a clock notifier
to adjust the divider before the change happen (and support all the
scenarios, like the clock change has been aborted).

> > If so, then yes, trying to switch to the 24MHz oscillator before
> > applying the factors, and then switching back when the PLL is stable
> > would be a nice solution.
> > 
> > I just checked, and all the SoCs we've had so far have that
> > possibility, so if it works, for now, I'd like to stick to that.
> 
> It would need to be tested. U-boot does the change only once, while the
> kernel would be doing it all the time and between various frequencies
> and PLL settings. So the issues may show up with this solution too.

That would have the benefit of being quite easy to document, not be a
huge amount of code and it would work on all the CPUs PLLs we have so
far, so still, a pretty big win. If it doesn't, of course, we don't
really have the choice.

Maxime
Maxime Ripard July 21, 2016, 9:51 a.m. UTC | #15
On Fri, Jul 15, 2016 at 03:27:56PM +0200, Jean-Francois Moine wrote:
> On Fri, 15 Jul 2016 12:38:54 +0200
> Ondřej Jirman <megous@megous.com> wrote:
> 
> > > If so, then yes, trying to switch to the 24MHz oscillator before
> > > applying the factors, and then switching back when the PLL is stable
> > > would be a nice solution.
> > > 
> > > I just checked, and all the SoCs we've had so far have that
> > > possibility, so if it works, for now, I'd like to stick to that.
> > 
> > It would need to be tested. U-boot does the change only once, while the
> > kernel would be doing it all the time and between various frequencies
> > and PLL settings. So the issues may show up with this solution too.
> 
> I don't think this is a good idea: the CPU clock may be changed at any
> time with the CPUFreq governor. I don't see the system moving from
> 1008MHz to 24MHz and then to 1200MHz when some computation is needed!

It wouldn't happen that often. The sampling rate for the governor is
1000 times the latency, so, at most, 0.1% of the time would be spent
at 24MHz.

And if you're really concerned about performances, you won't enable
cpufreq anyway.

Maxime
Ondřej Jirman July 21, 2016, 9:52 a.m. UTC | #16
On 21.7.2016 11:48, Maxime Ripard wrote:
> On Fri, Jul 15, 2016 at 12:38:54PM +0200, Ondřej Jirman wrote:
>> On 15.7.2016 10:53, Maxime Ripard wrote:
>>> On Fri, Jul 01, 2016 at 02:50:57AM +0200, Ondřej Jirman wrote:
>>>>>>  /**
>>>>>> + * sun8i_h3_apply_pll1_factors() - applies n, k, m, p factors to the
>>>>>> + * register using an algorithm that tries to reserve the PLL lock
>>>>>> + */
>>>>>> +
>>>>>> +static void sun8i_h3_apply_pll1_factors(struct clk_factors *factors, struct factors_request *req)
>>>>>> +{
>>>>>> +	const struct clk_factors_config *config = factors->config;
>>>>>> +	u32 reg;
>>>>>> +
>>>>>> +	/* Fetch the register value */
>>>>>> +	reg = readl(factors->reg);
>>>>>> +
>>>>>> +	if (FACTOR_GET(config->pshift, config->pwidth, reg) < req->p) {
>>>>>> +		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
>>>>>> +
>>>>>> +		writel(reg, factors->reg);
>>>>>> +		__delay(2000);
>>>>>> +	}
>>>>>
>>>>> So there was some doubts about the fact that P was being used, or at
>>>>> least that it was useful.
>>>>
>>>> p is necessary to reduce frequencies below 288 MHz according to the
>>>> datasheet.
>>>
>>> Yes, but you could reach those frequencies without P, too, and it's
>>> not part of any OPP provided by Allwinner.
>>
>> The arisc firmware for H3 contains table of factors for frequences from
>> 0 to 2GHz and, P is used below 240MHz. M is never used, BTW. (other
>> datasheets specify M as for testing use only, whatever that means - not
>> H3, but it seems it's the same PLL block)
> 
> Interesting. Which SoCs in particular?
> 
>>>>>> +	if (FACTOR_GET(config->mshift, config->mwidth, reg) < req->m) {
>>>>>> +		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
>>>>>> +
>>>>>> +		writel(reg, factors->reg);
>>>>>> +		__delay(2000);
>>>>>> +	}
>>>>>> +
>>>>>> +	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req->n);
>>>>>> +	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req->k);
>>>>>> +
>>>>>> +	writel(reg, factors->reg);
>>>>>> +	__delay(20);
>>>>>> +
>>>>>> +	while (!(readl(factors->reg) & (1 << config->lock)));
>>>>>
>>>>> So, they are applying the dividers first, and then applying the
>>>>> multipliers, and then wait for the PLL to stabilize.
>>>>
>>>> Not exactly, first we are increasing dividers if the new dividers are
>>>> higher that that what's already set. This ensures that because
>>>> application of dividers is immediate by the design of the PLL, the
>>>> application of multipliers isn't. So the VCO would still run at the same
>>>> frequency for a while gradually rising to a new value for example,
>>>> while the dividers would be reduced immediately. Leading to crash.
>>>>
>>>> PLL
>>>> --------------------------
>>>> PRE DIV(f0) -> VCO(f1) -> POST DIV(f2)
>>>>    P             K,N           M
>>>>
>>>> Example: (we set all factors at once, reducing dividers and multipliers
>>>> at the same time at 0ms - this should lead to no change in the output
>>>> frequency, but...)
>>>>
>>>> -1ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz
>>>>  0ms: f0 = 24MHz, f1 = 2GHz,   f2 = 2GHz       - boom
>>>>  1ms: f0 = 24MHz, f1 = 1.5GHz, f2 = 1.5GHz
>>>>  2ms: f0 = 24MHz, f1 = 1GHz,   f2 = 1GHz
>>>>
>>>> The current code crashes exactly at boom, you don't get any more
>>>> instructions to execute.
>>>>
>>>> See.
>>>>
>>>> So this patch first increases dividers (only if necessary), changes
>>>> multipliers and waits for change to happen (takes around 2000 cycles),
>>>> and then decreases dividers (only if necessary).
>>>>
>>>> So we get:
>>>>
>>>> -1ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz
>>>>  0ms: f0 = 24MHz, f1 = 2GHz,   f2 = 1GHz   - no boom, multiplier
>>>>                                              reduced
>>>>  1ms: f0 = 24MHz, f1 = 1.5GHz, f2 = 0.75GHz
>>>> 1.9ms: f0 = 24MHz, f1 = 1GHz,   f2 = 0.5GHz - we got PLL sync
>>>>  2ms: f0 = 24MHz, f1 = 1GHz,   f2 = 1GHz   - and here we reduce divider
>>>> at last
>>>
>>> Awesome explanation, thanks!
>>>
>>> So I guess it really all boils down to the fact that the CPU is
>>> clocked way outside of it's operating frequency while the PLL
>>> stabilizes, right?
>>
>> It may be, depending on the factors before and after change. I haven't
>> tested what factors the mainline kernel calculates for each frequency.
>> The arisc never uses M, and only uses P at frequencies that would not
>> allow for this behavior.
>>
>> I created a test program for arisc that runs a loop on the main CPU
>> using msgbox to send pings to the arisc CPU, and the vary the PLL1
>> randomly from the arisc, and haven't been able to lockup the main CPU
>> yet with either method.
>>
>> There's also AXI clock, that depends on PLL1. Arisc firmware uses the
>> same method to change it while changing CPUX clock. If the clock would
>> rise above certain frequency, AXI divider is increased before the PLL1
>> change. If it would fall below certain frequency it is decreased after
>> the PLL1 change.
> 
> If we ever need to change that, we can always rely on a clock notifier
> to adjust the divider before the change happen (and support all the
> scenarios, like the clock change has been aborted).
> 
>>> If so, then yes, trying to switch to the 24MHz oscillator before
>>> applying the factors, and then switching back when the PLL is stable
>>> would be a nice solution.
>>>
>>> I just checked, and all the SoCs we've had so far have that
>>> possibility, so if it works, for now, I'd like to stick to that.
>>
>> It would need to be tested. U-boot does the change only once, while the
>> kernel would be doing it all the time and between various frequencies
>> and PLL settings. So the issues may show up with this solution too.
> 
> That would have the benefit of being quite easy to document, not be a
> huge amount of code and it would work on all the CPUs PLLs we have so
> far, so still, a pretty big win. If it doesn't, of course, we don't
> really have the choice.

It's probably more code though. It has to access different register from
the one that is already defined in dts, which would add a lot of code
and require dts changes. The original patch I sent is simpler than that.

regards,
  Ondrej

> Maxime
>
Maxime Ripard July 26, 2016, 6:32 a.m. UTC | #17
On Thu, Jul 21, 2016 at 11:52:15AM +0200, Ondřej Jirman wrote:
> >>> If so, then yes, trying to switch to the 24MHz oscillator before
> >>> applying the factors, and then switching back when the PLL is stable
> >>> would be a nice solution.
> >>>
> >>> I just checked, and all the SoCs we've had so far have that
> >>> possibility, so if it works, for now, I'd like to stick to that.
> >>
> >> It would need to be tested. U-boot does the change only once, while the
> >> kernel would be doing it all the time and between various frequencies
> >> and PLL settings. So the issues may show up with this solution too.
> > 
> > That would have the benefit of being quite easy to document, not be a
> > huge amount of code and it would work on all the CPUs PLLs we have so
> > far, so still, a pretty big win. If it doesn't, of course, we don't
> > really have the choice.
> 
> It's probably more code though. It has to access different register from
> the one that is already defined in dts, which would add a lot of code
> and require dts changes. The original patch I sent is simpler than that.

Why?

You can use container_of to retrieve the parent structure of the clock
notifier, and then you get a ccu_common structure pointer, with the
CCU base address, the clock register, its lock, etc.

Look at what is done in drivers/clk/meson/clk-cpu.c. It's like 20 LoC.

I don't really get why anything should be changed in the DT, or why it
would add a lot of code. Or maybe we're not talking about the same
thing?

Maxime
Ondřej Jirman July 28, 2016, 11:27 a.m. UTC | #18
Hi Maxime,

I don't have your sunxi-ng clock patches in my mailbox, so I'm replying
to this.

On 26.7.2016 08:32, Maxime Ripard wrote:
> On Thu, Jul 21, 2016 at 11:52:15AM +0200, Ondřej Jirman wrote:
>>>>> If so, then yes, trying to switch to the 24MHz oscillator before
>>>>> applying the factors, and then switching back when the PLL is stable
>>>>> would be a nice solution.
>>>>>
>>>>> I just checked, and all the SoCs we've had so far have that
>>>>> possibility, so if it works, for now, I'd like to stick to that.
>>>>
>>>> It would need to be tested. U-boot does the change only once, while the
>>>> kernel would be doing it all the time and between various frequencies
>>>> and PLL settings. So the issues may show up with this solution too.
>>>
>>> That would have the benefit of being quite easy to document, not be a
>>> huge amount of code and it would work on all the CPUs PLLs we have so
>>> far, so still, a pretty big win. If it doesn't, of course, we don't
>>> really have the choice.
>>
>> It's probably more code though. It has to access different register from
>> the one that is already defined in dts, which would add a lot of code
>> and require dts changes. The original patch I sent is simpler than that.
> 
> Why?
> 
> You can use container_of to retrieve the parent structure of the clock
> notifier, and then you get a ccu_common structure pointer, with the
> CCU base address, the clock register, its lock, etc.
> 
> Look at what is done in drivers/clk/meson/clk-cpu.c. It's like 20 LoC.
> 
> I don't really get why anything should be changed in the DT, or why it
> would add a lot of code. Or maybe we're not talking about the same
> thing?

I've looked at the new CCU code, particularly ccu_nkmp.c, and found that
it very liberally uses divider parameters, even in situations that are
out of spec compared to the current code in the kernel.

In the current code and especially in the original vendor code, divider
parameters are used as last resort only. Presumably because, of the
inherent trouble in changing them, as I described to you in other email.

The new ccu code uses dividers often and even at very high frequencies,
which goes against the spec.

In the vendor code M is never anything else but 0, and P is used only
for frequencies below 288MHz, which matches the H3 datasheet, which says:

"The P factor only use in the condition that PLL output less than 288
MHz."

Also other datasheets of similar socs from Allwinner state that M should
not be used in production code. So it may be that they either forgot to
state it in the H3 datasheet, or it can be used. In any case, they never
use M in their code, so it may be wise to keep to that.

When I boot with the new CCU code I get this:

PLL_CPUX = 0x00001B21 : M = 2, K = 3, N = 28, P = 1, EN = 0
PLL_CPUX : freq = 1008MHz

Mathematically it works, but it is against the spec.

Also, this:

analyzing CPU 0:
  driver: cpufreq-dt
  CPUs which run at the same hardware frequency: 0 1 2 3
  CPUs which need to have their frequency coordinated by software: 0 1 2 3
  maximum transition latency: 1.74 ms
  hardware limits: 120 MHz - 1.37 GHz
  available frequency steps:  120 MHz, 240 MHz, 480 MHz, 648 MHz, 816
MHz, 960 MHz, 1.01 GHz, 1.06 GHz, 1.10 GHz, 1.15 GHz, 1.20 GHz, 1.22
GHz, 1.25 GHz, 1.30 GHz, 1.34 GHz, 1.37 GHz
  available cpufreq governors: conservative ondemand userspace powersave
performance
  current policy: frequency should be within 240 MHz and 240 MHz.
                  The governor "performance" may decide which speed to use
                  within this range.
  current CPU frequency: 24.0 MHz (asserted by call to hardware)


Somehow, the new CCU code sets the CPUX to 24MHz no matter what.

I'm using your pen/clk-rework branch without any other patches that were
later sent to the mailing list.

regards,
  Ondrej

> 
> Maxime
>
Chen-Yu Tsai July 28, 2016, 11:38 a.m. UTC | #19
Hi,

On Thu, Jul 28, 2016 at 7:27 PM, Ondřej Jirman <megous@megous.com> wrote:
> Hi Maxime,
>
> I don't have your sunxi-ng clock patches in my mailbox, so I'm replying
> to this.
>
> On 26.7.2016 08:32, Maxime Ripard wrote:
>> On Thu, Jul 21, 2016 at 11:52:15AM +0200, Ondřej Jirman wrote:
>>>>>> If so, then yes, trying to switch to the 24MHz oscillator before
>>>>>> applying the factors, and then switching back when the PLL is stable
>>>>>> would be a nice solution.
>>>>>>
>>>>>> I just checked, and all the SoCs we've had so far have that
>>>>>> possibility, so if it works, for now, I'd like to stick to that.
>>>>>
>>>>> It would need to be tested. U-boot does the change only once, while the
>>>>> kernel would be doing it all the time and between various frequencies
>>>>> and PLL settings. So the issues may show up with this solution too.
>>>>
>>>> That would have the benefit of being quite easy to document, not be a
>>>> huge amount of code and it would work on all the CPUs PLLs we have so
>>>> far, so still, a pretty big win. If it doesn't, of course, we don't
>>>> really have the choice.
>>>
>>> It's probably more code though. It has to access different register from
>>> the one that is already defined in dts, which would add a lot of code
>>> and require dts changes. The original patch I sent is simpler than that.
>>
>> Why?
>>
>> You can use container_of to retrieve the parent structure of the clock
>> notifier, and then you get a ccu_common structure pointer, with the
>> CCU base address, the clock register, its lock, etc.
>>
>> Look at what is done in drivers/clk/meson/clk-cpu.c. It's like 20 LoC.
>>
>> I don't really get why anything should be changed in the DT, or why it
>> would add a lot of code. Or maybe we're not talking about the same
>> thing?
>
> I've looked at the new CCU code, particularly ccu_nkmp.c, and found that
> it very liberally uses divider parameters, even in situations that are
> out of spec compared to the current code in the kernel.
>
> In the current code and especially in the original vendor code, divider
> parameters are used as last resort only. Presumably because, of the
> inherent trouble in changing them, as I described to you in other email.
>
> The new ccu code uses dividers often and even at very high frequencies,
> which goes against the spec.
>
> In the vendor code M is never anything else but 0, and P is used only
> for frequencies below 288MHz, which matches the H3 datasheet, which says:
>
> "The P factor only use in the condition that PLL output less than 288
> MHz."
>
> Also other datasheets of similar socs from Allwinner state that M should
> not be used in production code. So it may be that they either forgot to
> state it in the H3 datasheet, or it can be used. In any case, they never
> use M in their code, so it may be wise to keep to that.
>
> When I boot with the new CCU code I get this:
>
> PLL_CPUX = 0x00001B21 : M = 2, K = 3, N = 28, P = 1, EN = 0
> PLL_CPUX : freq = 1008MHz
>
> Mathematically it works, but it is against the spec.
>
> Also, this:
>
> analyzing CPU 0:
>   driver: cpufreq-dt
>   CPUs which run at the same hardware frequency: 0 1 2 3
>   CPUs which need to have their frequency coordinated by software: 0 1 2 3
>   maximum transition latency: 1.74 ms
>   hardware limits: 120 MHz - 1.37 GHz
>   available frequency steps:  120 MHz, 240 MHz, 480 MHz, 648 MHz, 816
> MHz, 960 MHz, 1.01 GHz, 1.06 GHz, 1.10 GHz, 1.15 GHz, 1.20 GHz, 1.22
> GHz, 1.25 GHz, 1.30 GHz, 1.34 GHz, 1.37 GHz
>   available cpufreq governors: conservative ondemand userspace powersave
> performance
>   current policy: frequency should be within 240 MHz and 240 MHz.
>                   The governor "performance" may decide which speed to use
>                   within this range.
>   current CPU frequency: 24.0 MHz (asserted by call to hardware)
>
>
> Somehow, the new CCU code sets the CPUX to 24MHz no matter what.
>
> I'm using your pen/clk-rework branch without any other patches that were
> later sent to the mailing list.

The H3 CPUX clock does not have the CLK_SET_RATE_PARENT flag set, which
means cpufreq's attempts to set the clk rate will, in some cases, be
ignored as the change is not propagated up to the parent PLL. In the
worst case it will select the 24 MHz oscillator. The fix is to add
CLK_SET_RATE_PARENT. And then you'll have to deal with the PLL potentially
going too high, as you stated in your other mail, and can be mitigated
with my clk notifier patch.

ChenYu

> regards,
>   Ondrej
>
>>
>> Maxime
>>
>
> --
> You received this message because you are subscribed to the Google Groups "linux-sunxi" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to linux-sunxi+unsubscribe@googlegroups.com.
> For more options, visit https://groups.google.com/d/optout.
Maxime Ripard July 28, 2016, 9 p.m. UTC | #20
Hi Ondrej,

On Thu, Jul 28, 2016 at 01:27:05PM +0200, Ondřej Jirman wrote:
> Hi Maxime,
> 
> I don't have your sunxi-ng clock patches in my mailbox, so I'm replying
> to this.

You can find it in the clock maintainers tree:
https://git.kernel.org/cgit/linux/kernel/git/clk/linux.git/log/?h=clk-sunxi-ng

> On 26.7.2016 08:32, Maxime Ripard wrote:
> > On Thu, Jul 21, 2016 at 11:52:15AM +0200, Ondřej Jirman wrote:
> >>>>> If so, then yes, trying to switch to the 24MHz oscillator before
> >>>>> applying the factors, and then switching back when the PLL is stable
> >>>>> would be a nice solution.
> >>>>>
> >>>>> I just checked, and all the SoCs we've had so far have that
> >>>>> possibility, so if it works, for now, I'd like to stick to that.
> >>>>
> >>>> It would need to be tested. U-boot does the change only once, while the
> >>>> kernel would be doing it all the time and between various frequencies
> >>>> and PLL settings. So the issues may show up with this solution too.
> >>>
> >>> That would have the benefit of being quite easy to document, not be a
> >>> huge amount of code and it would work on all the CPUs PLLs we have so
> >>> far, so still, a pretty big win. If it doesn't, of course, we don't
> >>> really have the choice.
> >>
> >> It's probably more code though. It has to access different register from
> >> the one that is already defined in dts, which would add a lot of code
> >> and require dts changes. The original patch I sent is simpler than that.
> > 
> > Why?
> > 
> > You can use container_of to retrieve the parent structure of the clock
> > notifier, and then you get a ccu_common structure pointer, with the
> > CCU base address, the clock register, its lock, etc.
> > 
> > Look at what is done in drivers/clk/meson/clk-cpu.c. It's like 20 LoC.
> > 
> > I don't really get why anything should be changed in the DT, or why it
> > would add a lot of code. Or maybe we're not talking about the same
> > thing?
> 
> I've looked at the new CCU code, particularly ccu_nkmp.c, and found that
> it very liberally uses divider parameters, even in situations that are
> out of spec compared to the current code in the kernel.
> 
> In the current code and especially in the original vendor code, divider
> parameters are used as last resort only. Presumably because, of the
> inherent trouble in changing them, as I described to you in other email.
> 
> The new ccu code uses dividers often and even at very high frequencies,
> which goes against the spec.
> 
> In the vendor code M is never anything else but 0, and P is used only
> for frequencies below 288MHz, which matches the H3 datasheet, which says:

In the vendor code, P is never used either. All the boards we had so
far don't go that low, so we cannot make any of these assumptions,
especially since the vendor code has had the bad habit of doing
something wrong and / or useless in the past.

However, this implementation is a new thing, and it was using the H3
precisely because of its early stage of support to use it as a testbed
for the more established.

If you feel like we should use a different formula to favour the
multipliers over the dividers (or want to change the class of the CPU
PLL to an NKM or something else, this is totally doable.

> "The P factor only use in the condition that PLL output less than 288
> MHz."

And the datasheet also had some issues, either misleading or wrong
comments in the past. Don't get me wrong, I'm not saying that this is
wrong, just that we should not follow it religiously, and that we
should trust more the experiments than the datasheet.

> Also other datasheets of similar socs from Allwinner state that M should
> not be used in production code.

Which ones specifically?

> So it may be that they either forgot to state it in the H3
> datasheet, or it can be used. In any case, they never use M in their
> code, so it may be wise to keep to that.
> 
> When I boot with the new CCU code I get this:
> 
> PLL_CPUX = 0x00001B21 : M = 2, K = 3, N = 28, P = 1, EN = 0
> PLL_CPUX : freq = 1008MHz
> 
> Mathematically it works, but it is against the spec.
> 
> Also, this:
> 
> analyzing CPU 0:
>   driver: cpufreq-dt
>   CPUs which run at the same hardware frequency: 0 1 2 3
>   CPUs which need to have their frequency coordinated by software: 0 1 2 3
>   maximum transition latency: 1.74 ms
>   hardware limits: 120 MHz - 1.37 GHz
>   available frequency steps:  120 MHz, 240 MHz, 480 MHz, 648 MHz, 816
> MHz, 960 MHz, 1.01 GHz, 1.06 GHz, 1.10 GHz, 1.15 GHz, 1.20 GHz, 1.22
> GHz, 1.25 GHz, 1.30 GHz, 1.34 GHz, 1.37 GHz
>   available cpufreq governors: conservative ondemand userspace powersave
> performance
>   current policy: frequency should be within 240 MHz and 240 MHz.
>                   The governor "performance" may decide which speed to use
>                   within this range.
>   current CPU frequency: 24.0 MHz (asserted by call to hardware)
> 
> 
> Somehow, the new CCU code sets the CPUX to 24MHz no matter what.
> 
> I'm using your pen/clk-rework branch without any other patches that were
> later sent to the mailing list.

It's probably because of CLK_SET_RATE_PARENT on the CPU clock missing,
as Chen-Yu suggested.

Maxime
Ondřej Jirman July 28, 2016, 10:01 p.m. UTC | #21
On 28.7.2016 23:00, Maxime Ripard wrote:
> Hi Ondrej,
> 
> On Thu, Jul 28, 2016 at 01:27:05PM +0200, Ondřej Jirman wrote:
>> Hi Maxime,
>>
>> I don't have your sunxi-ng clock patches in my mailbox, so I'm replying
>> to this.
> 
> You can find it in the clock maintainers tree:
> https://git.kernel.org/cgit/linux/kernel/git/clk/linux.git/log/?h=clk-sunxi-ng
> 
>> On 26.7.2016 08:32, Maxime Ripard wrote:
>>> On Thu, Jul 21, 2016 at 11:52:15AM +0200, Ondřej Jirman wrote:
>>>>>>> If so, then yes, trying to switch to the 24MHz oscillator before
>>>>>>> applying the factors, and then switching back when the PLL is stable
>>>>>>> would be a nice solution.
>>>>>>>
>>>>>>> I just checked, and all the SoCs we've had so far have that
>>>>>>> possibility, so if it works, for now, I'd like to stick to that.
>>>>>>
>>>>>> It would need to be tested. U-boot does the change only once, while the
>>>>>> kernel would be doing it all the time and between various frequencies
>>>>>> and PLL settings. So the issues may show up with this solution too.
>>>>>
>>>>> That would have the benefit of being quite easy to document, not be a
>>>>> huge amount of code and it would work on all the CPUs PLLs we have so
>>>>> far, so still, a pretty big win. If it doesn't, of course, we don't
>>>>> really have the choice.
>>>>
>>>> It's probably more code though. It has to access different register from
>>>> the one that is already defined in dts, which would add a lot of code
>>>> and require dts changes. The original patch I sent is simpler than that.
>>>
>>> Why?
>>>
>>> You can use container_of to retrieve the parent structure of the clock
>>> notifier, and then you get a ccu_common structure pointer, with the
>>> CCU base address, the clock register, its lock, etc.
>>>
>>> Look at what is done in drivers/clk/meson/clk-cpu.c. It's like 20 LoC.
>>>
>>> I don't really get why anything should be changed in the DT, or why it
>>> would add a lot of code. Or maybe we're not talking about the same
>>> thing?
>>
>> I've looked at the new CCU code, particularly ccu_nkmp.c, and found that
>> it very liberally uses divider parameters, even in situations that are
>> out of spec compared to the current code in the kernel.
>>
>> In the current code and especially in the original vendor code, divider
>> parameters are used as last resort only. Presumably because, of the
>> inherent trouble in changing them, as I described to you in other email.
>>
>> The new ccu code uses dividers often and even at very high frequencies,
>> which goes against the spec.
>>
>> In the vendor code M is never anything else but 0, and P is used only
>> for frequencies below 288MHz, which matches the H3 datasheet, which says:
> 
> In the vendor code, P is never used either. All the boards we had so
> far don't go that low, so we cannot make any of these assumptions,
> especially since the vendor code has had the bad habit of doing
> something wrong and / or useless in the past.

P is used in the arisc firmware according to the spec for the lower
frequencies.

> However, this implementation is a new thing, and it was using the H3
> precisely because of its early stage of support to use it as a testbed
> for the more established.
> 
> If you feel like we should use a different formula to favour the
> multipliers over the dividers (or want to change the class of the CPU
> PLL to an NKM or something else, this is totally doable.

I think the original formula that's currently in the mainline kernel is
better and avoids fiddling with dividers too much.

>> "The P factor only use in the condition that PLL output less than 288
>> MHz."
> 
> And the datasheet also had some issues, either misleading or wrong
> comments in the past. Don't get me wrong, I'm not saying that this is
> wrong, just that we should not follow it religiously, and that we
> should trust more the experiments than the datasheet.

I can believe that. :) Regardless, I think the reasons given for
avoiding dividers are quite reasonable. It's based on how PLL block
works, not what manual says.

>> Also other datasheets of similar socs from Allwinner state that M should
>> not be used in production code.
> 
> Which ones specifically?

A83T for example. You can search for "only for test" phrase.

https://github.com/allwinner-zh/documents/blob/master/A83T/A83T_User_Manual_v1.5.1_20150513.pdf

Those PLLs are a bit different though.


regards,
  Ondrej

>> So it may be that they either forgot to state it in the H3
>> datasheet, or it can be used. In any case, they never use M in their
>> code, so it may be wise to keep to that.
>>
>> When I boot with the new CCU code I get this:
>>
>> PLL_CPUX = 0x00001B21 : M = 2, K = 3, N = 28, P = 1, EN = 0
>> PLL_CPUX : freq = 1008MHz
>>
>> Mathematically it works, but it is against the spec.
>>
>> Also, this:
>>
>> analyzing CPU 0:
>>   driver: cpufreq-dt
>>   CPUs which run at the same hardware frequency: 0 1 2 3
>>   CPUs which need to have their frequency coordinated by software: 0 1 2 3
>>   maximum transition latency: 1.74 ms
>>   hardware limits: 120 MHz - 1.37 GHz
>>   available frequency steps:  120 MHz, 240 MHz, 480 MHz, 648 MHz, 816
>> MHz, 960 MHz, 1.01 GHz, 1.06 GHz, 1.10 GHz, 1.15 GHz, 1.20 GHz, 1.22
>> GHz, 1.25 GHz, 1.30 GHz, 1.34 GHz, 1.37 GHz
>>   available cpufreq governors: conservative ondemand userspace powersave
>> performance
>>   current policy: frequency should be within 240 MHz and 240 MHz.
>>                   The governor "performance" may decide which speed to use
>>                   within this range.
>>   current CPU frequency: 24.0 MHz (asserted by call to hardware)
>>
>>
>> Somehow, the new CCU code sets the CPUX to 24MHz no matter what.
>>
>> I'm using your pen/clk-rework branch without any other patches that were
>> later sent to the mailing list.
> 
> It's probably because of CLK_SET_RATE_PARENT on the CPU clock missing,
> as Chen-Yu suggested.
> 
> Maxime
>
Maxime Ripard July 31, 2016, 10:31 a.m. UTC | #22
Hi,

On Fri, Jul 29, 2016 at 12:01:09AM +0200, Ondřej Jirman wrote:
> On 28.7.2016 23:00, Maxime Ripard wrote:
> > Hi Ondrej,
> > 
> > On Thu, Jul 28, 2016 at 01:27:05PM +0200, Ondřej Jirman wrote:
> >> Hi Maxime,
> >>
> >> I don't have your sunxi-ng clock patches in my mailbox, so I'm replying
> >> to this.
> > 
> > You can find it in the clock maintainers tree:
> > https://git.kernel.org/cgit/linux/kernel/git/clk/linux.git/log/?h=clk-sunxi-ng
> > 
> >> On 26.7.2016 08:32, Maxime Ripard wrote:
> >>> On Thu, Jul 21, 2016 at 11:52:15AM +0200, Ondřej Jirman wrote:
> >>>>>>> If so, then yes, trying to switch to the 24MHz oscillator before
> >>>>>>> applying the factors, and then switching back when the PLL is stable
> >>>>>>> would be a nice solution.
> >>>>>>>
> >>>>>>> I just checked, and all the SoCs we've had so far have that
> >>>>>>> possibility, so if it works, for now, I'd like to stick to that.
> >>>>>>
> >>>>>> It would need to be tested. U-boot does the change only once, while the
> >>>>>> kernel would be doing it all the time and between various frequencies
> >>>>>> and PLL settings. So the issues may show up with this solution too.
> >>>>>
> >>>>> That would have the benefit of being quite easy to document, not be a
> >>>>> huge amount of code and it would work on all the CPUs PLLs we have so
> >>>>> far, so still, a pretty big win. If it doesn't, of course, we don't
> >>>>> really have the choice.
> >>>>
> >>>> It's probably more code though. It has to access different register from
> >>>> the one that is already defined in dts, which would add a lot of code
> >>>> and require dts changes. The original patch I sent is simpler than that.
> >>>
> >>> Why?
> >>>
> >>> You can use container_of to retrieve the parent structure of the clock
> >>> notifier, and then you get a ccu_common structure pointer, with the
> >>> CCU base address, the clock register, its lock, etc.
> >>>
> >>> Look at what is done in drivers/clk/meson/clk-cpu.c. It's like 20 LoC.
> >>>
> >>> I don't really get why anything should be changed in the DT, or why it
> >>> would add a lot of code. Or maybe we're not talking about the same
> >>> thing?
> >>
> >> I've looked at the new CCU code, particularly ccu_nkmp.c, and found that
> >> it very liberally uses divider parameters, even in situations that are
> >> out of spec compared to the current code in the kernel.
> >>
> >> In the current code and especially in the original vendor code, divider
> >> parameters are used as last resort only. Presumably because, of the
> >> inherent trouble in changing them, as I described to you in other email.
> >>
> >> The new ccu code uses dividers often and even at very high frequencies,
> >> which goes against the spec.
> >>
> >> In the vendor code M is never anything else but 0, and P is used only
> >> for frequencies below 288MHz, which matches the H3 datasheet, which says:
> > 
> > In the vendor code, P is never used either. All the boards we had so
> > far don't go that low, so we cannot make any of these assumptions,
> > especially since the vendor code has had the bad habit of doing
> > something wrong and / or useless in the past.
> 
> P is used in the arisc firmware according to the spec for the lower
> frequencies.

Yes, but has anyone actually tested those frequencies? Judging from
the FEX files I could gather, cpufreq never actually goes lower than
480 MHz.

> > However, this implementation is a new thing, and it was using the H3
> > precisely because of its early stage of support to use it as a testbed
> > for the more established.
> > 
> > If you feel like we should use a different formula to favour the
> > multipliers over the dividers (or want to change the class of the CPU
> > PLL to an NKM or something else, this is totally doable.
> 
> I think the original formula that's currently in the mainline kernel is
> better and avoids fiddling with dividers too much.

Yeah, but the older formula is not generic at all. The whole rework
was precisely to avoid doing the whole one driver per clock that was
just becoming a nightmare to maintain, and a pain to add support for
new SoCs. That code will be used for A10's CPU and VE PLLs too for
example. And they probably have the same constraints, but with
different variations (available values of each factors for example).

> >> "The P factor only use in the condition that PLL output less than 288
> >> MHz."
> > 
> > And the datasheet also had some issues, either misleading or wrong
> > comments in the past. Don't get me wrong, I'm not saying that this is
> > wrong, just that we should not follow it religiously, and that we
> > should trust more the experiments than the datasheet.
> 
> I can believe that. :) Regardless, I think the reasons given for
> avoiding dividers are quite reasonable. It's based on how PLL block
> works, not what manual says.

Yes, indeed.

Would replacing the current factors computation function by something
like:

for (m = 1; m < max_m; m++)
    for (p = 1; p < max_p; p++)
        for (n = 1; n < max_n; n++)
	     for (k = 1; k < max_k; k++)
	         if rate == computed rate
		     break;

work for you?

That way, we will favor the multipliers over the dividers, and we can
always "blacklist" p or m (or both) by setting their maximum to 1
(this would need an extra field in _ccu_div though).

> >> Also other datasheets of similar socs from Allwinner state that M should
> >> not be used in production code.
> > 
> > Which ones specifically?
> 
> A83T for example. You can search for "only for test" phrase.
> 
> https://github.com/allwinner-zh/documents/blob/master/A83T/A83T_User_Manual_v1.5.1_20150513.pdf
> 
> Those PLLs are a bit different though.

Thanks,
Maxime
Ondřej Jirman July 31, 2016, 10:01 p.m. UTC | #23
Hi,

On 31.7.2016 12:31, Maxime Ripard wrote:
> Hi,
> 
> On Fri, Jul 29, 2016 at 12:01:09AM +0200, Ondřej Jirman wrote:
>> On 28.7.2016 23:00, Maxime Ripard wrote:
>>> Hi Ondrej,
>>>
>>> On Thu, Jul 28, 2016 at 01:27:05PM +0200, Ondřej Jirman wrote:
>>>> Hi Maxime,
>>>>
>>>> I don't have your sunxi-ng clock patches in my mailbox, so I'm replying
>>>> to this.
>>>
>>> You can find it in the clock maintainers tree:
>>> https://git.kernel.org/cgit/linux/kernel/git/clk/linux.git/log/?h=clk-sunxi-ng
>>>
>>>> On 26.7.2016 08:32, Maxime Ripard wrote:
>>>>> On Thu, Jul 21, 2016 at 11:52:15AM +0200, Ondřej Jirman wrote:
>>>>>>>>> If so, then yes, trying to switch to the 24MHz oscillator before
>>>>>>>>> applying the factors, and then switching back when the PLL is stable
>>>>>>>>> would be a nice solution.
>>>>>>>>>
>>>>>>>>> I just checked, and all the SoCs we've had so far have that
>>>>>>>>> possibility, so if it works, for now, I'd like to stick to that.
>>>>>>>>
>>>>>>>> It would need to be tested. U-boot does the change only once, while the
>>>>>>>> kernel would be doing it all the time and between various frequencies
>>>>>>>> and PLL settings. So the issues may show up with this solution too.
>>>>>>>
>>>>>>> That would have the benefit of being quite easy to document, not be a
>>>>>>> huge amount of code and it would work on all the CPUs PLLs we have so
>>>>>>> far, so still, a pretty big win. If it doesn't, of course, we don't
>>>>>>> really have the choice.
>>>>>>
>>>>>> It's probably more code though. It has to access different register from
>>>>>> the one that is already defined in dts, which would add a lot of code
>>>>>> and require dts changes. The original patch I sent is simpler than that.
>>>>>
>>>>> Why?
>>>>>
>>>>> You can use container_of to retrieve the parent structure of the clock
>>>>> notifier, and then you get a ccu_common structure pointer, with the
>>>>> CCU base address, the clock register, its lock, etc.
>>>>>
>>>>> Look at what is done in drivers/clk/meson/clk-cpu.c. It's like 20 LoC.
>>>>>
>>>>> I don't really get why anything should be changed in the DT, or why it
>>>>> would add a lot of code. Or maybe we're not talking about the same
>>>>> thing?
>>>>
>>>> I've looked at the new CCU code, particularly ccu_nkmp.c, and found that
>>>> it very liberally uses divider parameters, even in situations that are
>>>> out of spec compared to the current code in the kernel.
>>>>
>>>> In the current code and especially in the original vendor code, divider
>>>> parameters are used as last resort only. Presumably because, of the
>>>> inherent trouble in changing them, as I described to you in other email.
>>>>
>>>> The new ccu code uses dividers often and even at very high frequencies,
>>>> which goes against the spec.
>>>>
>>>> In the vendor code M is never anything else but 0, and P is used only
>>>> for frequencies below 288MHz, which matches the H3 datasheet, which says:
>>>
>>> In the vendor code, P is never used either. All the boards we had so
>>> far don't go that low, so we cannot make any of these assumptions,
>>> especially since the vendor code has had the bad habit of doing
>>> something wrong and / or useless in the past.
>>
>> P is used in the arisc firmware according to the spec for the lower
>> frequencies.
> 
> Yes, but has anyone actually tested those frequencies? Judging from
> the FEX files I could gather, cpufreq never actually goes lower than
> 480 MHz.
> 

I tested it. It works well down to 60MHz. You can even run on 24MHz if
you run directly from 24mhz osc. It's terribly slow, but it works ok.

I've rebased my working branch over the mainline kernel, which now
contains the sunxi-ng, and tested it. Cpufreq seems to work on orange pi
pc without any changes discussed in this thread. I didn't do any
extensive testing though. But it doesn't hang on boot or cpufreq config
changes.

You can see it here:

https://github.com/megous/linux/commits/orange-pi-4.8

>>> However, this implementation is a new thing, and it was using the H3
>>> precisely because of its early stage of support to use it as a testbed
>>> for the more established.
>>>
>>> If you feel like we should use a different formula to favour the
>>> multipliers over the dividers (or want to change the class of the CPU
>>> PLL to an NKM or something else, this is totally doable.
>>
>> I think the original formula that's currently in the mainline kernel is
>> better and avoids fiddling with dividers too much.
> 
> Yeah, but the older formula is not generic at all. The whole rework
> was precisely to avoid doing the whole one driver per clock that was
> just becoming a nightmare to maintain, and a pain to add support for
> new SoCs. That code will be used for A10's CPU and VE PLLs too for
> example. And they probably have the same constraints, but with
> different variations (available values of each factors for example).

Fair enough.

>>>> "The P factor only use in the condition that PLL output less than 288
>>>> MHz."
>>>
>>> And the datasheet also had some issues, either misleading or wrong
>>> comments in the past. Don't get me wrong, I'm not saying that this is
>>> wrong, just that we should not follow it religiously, and that we
>>> should trust more the experiments than the datasheet.
>>
>> I can believe that. :) Regardless, I think the reasons given for
>> avoiding dividers are quite reasonable. It's based on how PLL block
>> works, not what manual says.
> 
> Yes, indeed.
> 
> Would replacing the current factors computation function by something
> like:
> 
> for (m = 1; m < max_m; m++)
>     for (p = 1; p < max_p; p++)
>         for (n = 1; n < max_n; n++)
> 	     for (k = 1; k < max_k; k++)
> 	         if rate == computed rate
> 		     break;
> 
> work for you?

This would be better.

thank you and regards,
  Ondrej

> That way, we will favor the multipliers over the dividers, and we can
> always "blacklist" p or m (or both) by setting their maximum to 1
> (this would need an extra field in _ccu_div though).
> 
>>>> Also other datasheets of similar socs from Allwinner state that M should
>>>> not be used in production code.
>>>
>>> Which ones specifically?
>>
>> A83T for example. You can search for "only for test" phrase.
>>
>> https://github.com/allwinner-zh/documents/blob/master/A83T/A83T_User_Manual_v1.5.1_20150513.pdf
>>
>> Those PLLs are a bit different though.
> 
> Thanks,
> Maxime
>
Maxime Ripard Aug. 31, 2016, 8:25 p.m. UTC | #24
Hi Ondrej,

Sorry for the (very) belated reply.

On Mon, Aug 01, 2016 at 12:01:39AM +0200, Ondřej Jirman wrote:
> >>>> In the vendor code M is never anything else but 0, and P is used only
> >>>> for frequencies below 288MHz, which matches the H3 datasheet, which says:
> >>>
> >>> In the vendor code, P is never used either. All the boards we had so
> >>> far don't go that low, so we cannot make any of these assumptions,
> >>> especially since the vendor code has had the bad habit of doing
> >>> something wrong and / or useless in the past.
> >>
> >> P is used in the arisc firmware according to the spec for the lower
> >> frequencies.
> > 
> > Yes, but has anyone actually tested those frequencies? Judging from
> > the FEX files I could gather, cpufreq never actually goes lower than
> > 480 MHz.
> > 
> 
> I tested it. It works well down to 60MHz. You can even run on 24MHz if
> you run directly from 24mhz osc. It's terribly slow, but it works ok.

Ok, it's good to know (and yeah, I wouldn't expect a CPU at 24MHz to
be very fast :))

> I've rebased my working branch over the mainline kernel, which now
> contains the sunxi-ng, and tested it. Cpufreq seems to work on orange pi
> pc without any changes discussed in this thread. I didn't do any
> extensive testing though. But it doesn't hang on boot or cpufreq config
> changes.
> 
> You can see it here:
> 
> https://github.com/megous/linux/commits/orange-pi-4.8

Thanks a lot. That's good to know as well.

> >>>> "The P factor only use in the condition that PLL output less than 288
> >>>> MHz."
> >>>
> >>> And the datasheet also had some issues, either misleading or wrong
> >>> comments in the past. Don't get me wrong, I'm not saying that this is
> >>> wrong, just that we should not follow it religiously, and that we
> >>> should trust more the experiments than the datasheet.
> >>
> >> I can believe that. :) Regardless, I think the reasons given for
> >> avoiding dividers are quite reasonable. It's based on how PLL block
> >> works, not what manual says.
> > 
> > Yes, indeed.
> > 
> > Would replacing the current factors computation function by something
> > like:
> > 
> > for (m = 1; m < max_m; m++)
> >     for (p = 1; p < max_p; p++)
> >         for (n = 1; n < max_n; n++)
> > 	     for (k = 1; k < max_k; k++)
> > 	         if rate == computed rate
> > 		     break;
> > 
> > work for you?
> 
> This would be better.

Ok, I'll try to cook something up when I get the time. If you feel
like it's not done fast enough, feel free to do it.

Note that we also ended up merging for the A31 some code that reparent
CPUx temporarily while setting the PLL-CPUX rate, and it should be
somewhat generic. But of course, it's not exclusive.

Thanks again for all your tests, it's very appreciated.

Maxime
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt
index 5faae05..774500c 100644
--- a/Documentation/devicetree/bindings/clock/sunxi.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi.txt
@@ -10,6 +10,7 @@  Required properties:
 	"allwinner,sun4i-a10-pll1-clk" - for the main PLL clock and PLL4
 	"allwinner,sun6i-a31-pll1-clk" - for the main PLL clock on A31
 	"allwinner,sun8i-a23-pll1-clk" - for the main PLL clock on A23
+	"allwinner,sun8i-h3-pll1-clk" - for the main PLL clock on H3
 	"allwinner,sun4i-a10-pll3-clk" - for the video PLL clock on A10
 	"allwinner,sun9i-a80-pll4-clk" - for the peripheral PLLs on A80
 	"allwinner,sun4i-a10-pll5-clk" - for the PLL5 clock
diff --git a/drivers/clk/sunxi/clk-factors.c b/drivers/clk/sunxi/clk-factors.c
index ddefe96..7c165db 100644
--- a/drivers/clk/sunxi/clk-factors.c
+++ b/drivers/clk/sunxi/clk-factors.c
@@ -34,13 +34,6 @@ 
 
 #define FACTORS_MAX_PARENTS		5
 
-#define SETMASK(len, pos)		(((1U << (len)) - 1) << (pos))
-#define CLRMASK(len, pos)		(~(SETMASK(len, pos)))
-#define FACTOR_GET(bit, len, reg)	(((reg) & SETMASK(len, bit)) >> (bit))
-
-#define FACTOR_SET(bit, len, reg, val) \
-	(((reg) & CLRMASK(len, bit)) | (val << (bit)))
-
 static unsigned long clk_factors_recalc_rate(struct clk_hw *hw,
 					     unsigned long parent_rate)
 {
@@ -150,20 +143,24 @@  static int clk_factors_set_rate(struct clk_hw *hw, unsigned long rate,
 	if (factors->lock)
 		spin_lock_irqsave(factors->lock, flags);
 
-	/* Fetch the register value */
-	reg = readl(factors->reg);
+	if (factors->apply) {
+		factors->apply(factors, &req);
+	} else {
+		/* Fetch the register value */
+		reg = readl(factors->reg);
 
-	/* Set up the new factors - macros do not do anything if width is 0 */
-	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req.n);
-	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req.k);
-	reg = FACTOR_SET(config->mshift, config->mwidth, reg, req.m);
-	reg = FACTOR_SET(config->pshift, config->pwidth, reg, req.p);
+		/* Set up the new factors - macros do not do anything if width is 0 */
+		reg = FACTOR_SET(config->nshift, config->nwidth, reg, req.n);
+		reg = FACTOR_SET(config->kshift, config->kwidth, reg, req.k);
+		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req.m);
+		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req.p);
 
-	/* Apply them now */
-	writel(reg, factors->reg);
+		/* Apply them now */
+		writel(reg, factors->reg);
 
-	/* delay 500us so pll stabilizes */
-	__delay((rate >> 20) * 500 / 2);
+		/* delay 500us so pll stabilizes */
+		__delay((rate >> 20) * 500 / 2);
+	}
 
 	if (factors->lock)
 		spin_unlock_irqrestore(factors->lock, flags);
@@ -213,6 +210,7 @@  struct clk *sunxi_factors_register(struct device_node *node,
 	factors->config = data->table;
 	factors->get_factors = data->getter;
 	factors->recalc = data->recalc;
+	factors->apply = data->apply;
 	factors->lock = lock;
 
 	/* Add a gate if this factor clock can be gated */
diff --git a/drivers/clk/sunxi/clk-factors.h b/drivers/clk/sunxi/clk-factors.h
index 1e63c5b..661a45a 100644
--- a/drivers/clk/sunxi/clk-factors.h
+++ b/drivers/clk/sunxi/clk-factors.h
@@ -6,6 +6,13 @@ 
 
 #define SUNXI_FACTORS_NOT_APPLICABLE	(0)
 
+#define SETMASK(len, pos)		(((1U << (len)) - 1) << (pos))
+#define CLRMASK(len, pos)		(~(SETMASK(len, pos)))
+#define FACTOR_GET(bit, len, reg)	(((reg) & SETMASK(len, bit)) >> (bit))
+
+#define FACTOR_SET(bit, len, reg, val) \
+	(((reg) & CLRMASK(len, bit)) | (val << (bit)))
+
 struct clk_factors_config {
 	u8 nshift;
 	u8 nwidth;
@@ -16,6 +23,7 @@  struct clk_factors_config {
 	u8 pshift;
 	u8 pwidth;
 	u8 n_start;
+	u8 lock;
 };
 
 struct factors_request {
@@ -28,6 +36,8 @@  struct factors_request {
 	u8 p;
 };
 
+struct clk_factors;
+
 struct factors_data {
 	int enable;
 	int mux;
@@ -35,6 +45,7 @@  struct factors_data {
 	const struct clk_factors_config *table;
 	void (*getter)(struct factors_request *req);
 	void (*recalc)(struct factors_request *req);
+	void (*apply)(struct clk_factors *factors, struct factors_request *req);
 	const char *name;
 };
 
@@ -44,6 +55,7 @@  struct clk_factors {
 	const struct clk_factors_config *config;
 	void (*get_factors)(struct factors_request *req);
 	void (*recalc)(struct factors_request *req);
+	void (*apply)(struct clk_factors *factors, struct factors_request *req);
 	spinlock_t *lock;
 	/* for cleanup */
 	struct clk_mux *mux;
diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index 838b22a..e4bb908 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c
@@ -23,6 +23,7 @@ 
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/log2.h>
+#include <linux/delay.h>
 
 #include "clk-factors.h"
 
@@ -200,6 +201,56 @@  static void sun8i_a23_get_pll1_factors(struct factors_request *req)
 }
 
 /**
+ * sun8i_h3_apply_pll1_factors() - applies n, k, m, p factors to the
+ * register using an algorithm that tries to reserve the PLL lock
+ */
+
+static void sun8i_h3_apply_pll1_factors(struct clk_factors *factors, struct factors_request *req)
+{
+	const struct clk_factors_config *config = factors->config;
+	u32 reg;
+
+	/* Fetch the register value */
+	reg = readl(factors->reg);
+
+	if (FACTOR_GET(config->pshift, config->pwidth, reg) < req->p) {
+		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
+
+		writel(reg, factors->reg);
+		__delay(2000);
+	}
+
+	if (FACTOR_GET(config->mshift, config->mwidth, reg) < req->m) {
+		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
+
+		writel(reg, factors->reg);
+		__delay(2000);
+	}
+
+	reg = FACTOR_SET(config->nshift, config->nwidth, reg, req->n);
+	reg = FACTOR_SET(config->kshift, config->kwidth, reg, req->k);
+
+	writel(reg, factors->reg);
+	__delay(20);
+
+	while (!(readl(factors->reg) & (1 << config->lock)));
+
+	if (FACTOR_GET(config->mshift, config->mwidth, reg) > req->m) {
+		reg = FACTOR_SET(config->mshift, config->mwidth, reg, req->m);
+
+		writel(reg, factors->reg);
+		__delay(2000);
+	}
+
+	if (FACTOR_GET(config->pshift, config->pwidth, reg) > req->p) {
+		reg = FACTOR_SET(config->pshift, config->pwidth, reg, req->p);
+
+		writel(reg, factors->reg);
+		__delay(2000);
+	}
+}
+
+/**
  * sun4i_get_pll5_factors() - calculates n, k factors for PLL5
  * PLL5 rate is calculated as follows
  * rate = parent_rate * n * (k + 1)
@@ -451,6 +502,7 @@  static const struct clk_factors_config sun8i_a23_pll1_config = {
 	.pshift = 16,
 	.pwidth = 2,
 	.n_start = 1,
+	.lock = 28
 };
 
 static const struct clk_factors_config sun4i_pll5_config = {
@@ -513,6 +565,13 @@  static const struct factors_data sun8i_a23_pll1_data __initconst = {
 	.getter = sun8i_a23_get_pll1_factors,
 };
 
+static const struct factors_data sun8i_h3_pll1_data __initconst = {
+	.enable = 31,
+	.table = &sun8i_a23_pll1_config,
+	.getter = sun8i_a23_get_pll1_factors,
+	.apply = sun8i_h3_apply_pll1_factors,
+};
+
 static const struct factors_data sun7i_a20_pll4_data __initconst = {
 	.enable = 31,
 	.table = &sun4i_pll5_config,
@@ -590,12 +649,19 @@  static void __init sun6i_pll1_clk_setup(struct device_node *node)
 CLK_OF_DECLARE(sun6i_pll1, "allwinner,sun6i-a31-pll1-clk",
 	       sun6i_pll1_clk_setup);
 
-static void __init sun8i_pll1_clk_setup(struct device_node *node)
+static void __init sun8i_a23_pll1_clk_setup(struct device_node *node)
 {
 	sunxi_factors_clk_setup(node, &sun8i_a23_pll1_data);
 }
-CLK_OF_DECLARE(sun8i_pll1, "allwinner,sun8i-a23-pll1-clk",
-	       sun8i_pll1_clk_setup);
+CLK_OF_DECLARE(sun8i_a23_pll1, "allwinner,sun8i-a23-pll1-clk",
+	       sun8i_a23_pll1_clk_setup);
+
+static void __init sun8i_h3_pll1_clk_setup(struct device_node *node)
+{
+	sunxi_factors_clk_setup(node, &sun8i_h3_pll1_data);
+}
+CLK_OF_DECLARE(sun8i_h3_pll1, "allwinner,sun8i-h3-pll1-clk",
+	       sun8i_h3_pll1_clk_setup);
 
 static void __init sun7i_pll4_clk_setup(struct device_node *node)
 {