diff mbox series

[v2,1/2] clk: sunxi-ng: nkm: consider alternative parent rates when finding rate

Message ID 20230611090143.132257-2-frank@oltmanns.dev (mailing list archive)
State Changes Requested, archived
Headers show
Series clk: sunxi-ng: Consider alternative parent rates when determining NKM clock rate | expand

Commit Message

Frank Oltmanns June 11, 2023, 9:01 a.m. UTC
In case the CLK_SET_RATE_PARENT flag is set, consider using a different
parent rate when determining a new rate.

To find the best match for the requested rate, perform the following
steps for each NKM combination:
 - calculate the optimal parent rate,
 - find the best parent rate that the parent clock actually supports
 - use that parent rate to calculate the effective rate.

In case the clk does not support setting the parent rate, use the same
algorithm as before.

Signed-off-by: Frank Oltmanns <frank@oltmanns.dev>
---
 drivers/clk/sunxi-ng/ccu_nkm.c | 66 +++++++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 8 deletions(-)

Comments

Frank Oltmanns June 12, 2023, 8:51 a.m. UTC | #1
Hi all,

I just found in the Allwinner A64 User Manual V1.0, that there are
additional constraints on the combinations for pll-mipi, that are not
(and never were) enforced by by ccu_nkm.

On 2023-06-11 at 11:01:42 +0200, Frank Oltmanns <frank@oltmanns.dev> wrote:
> In case the CLK_SET_RATE_PARENT flag is set, consider using a different
> parent rate when determining a new rate.
>
> To find the best match for the requested rate, perform the following
> steps for each NKM combination:
>  - calculate the optimal parent rate,
>  - find the best parent rate that the parent clock actually supports
>  - use that parent rate to calculate the effective rate.
>
> In case the clk does not support setting the parent rate, use the same
> algorithm as before.
>
> Signed-off-by: Frank Oltmanns <frank@oltmanns.dev>
> ---
>  drivers/clk/sunxi-ng/ccu_nkm.c | 66 +++++++++++++++++++++++++++++-----
>  1 file changed, 58 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/clk/sunxi-ng/ccu_nkm.c b/drivers/clk/sunxi-ng/ccu_nkm.c
> index a0978a50edae..c49d5879fe73 100644
> --- a/drivers/clk/sunxi-ng/ccu_nkm.c
> +++ b/drivers/clk/sunxi-ng/ccu_nkm.c
> @@ -6,6 +6,7 @@
>
>  #include <linux/clk-provider.h>
>  #include <linux/io.h>
> +#include <linux/math.h>
>
>  #include "ccu_gate.h"
>  #include "ccu_nkm.h"
> @@ -16,10 +17,49 @@ struct _ccu_nkm {
>  	unsigned long	m, min_m, max_m;
>  };
>
> -static unsigned long ccu_nkm_find_best(unsigned long parent, unsigned long rate,
> -				       struct _ccu_nkm *nkm)
> +static unsigned long optimal_parent_rate(unsigned long rate, unsigned long n,
> +					 unsigned long k, unsigned long m)
>  {
> -	unsigned long best_rate = 0;
> +	unsigned long _rate, parent;
> +
> +	// We must first try to find the desired parent rate that is rounded up, because there are
> +	// cases where truncating makes us miss the requested rate.
> +	// E.g. rate=449035712, n=11, k=3, m=16
> +	// When truncating, we'd get parent=217714284 and calculating the rate from that would give
> +	// us 449035710. When rounding up, we get parent=217714285 which would give us the requested
> +	// rate of 449035712.
> +	parent = DIV_ROUND_UP(rate * m, n * k);
> +
> +	// But there are other cases, where rounding up the parent gives us a too high rate.
> +	// Therefore, we need to check for this case and, if necessary, truncate the parent rate
> +	// instead of rounding up.
> +	_rate = parent * n * k / m;
> +	if (_rate > rate)
> +		parent = rate * m / (n * k);
> +	return parent;
> +}
> +
> +/**
> + * ccu_nkm_find_best - Find the best nkm combination for a given rate
> + *
> + * @parent: rate of parent clock. This is used either as an input or out parameter:
> + *           - In cases where the parent clock can be set, this parameter will be updated to contain
> + *             the optimal rate for the parent to achieve the best rate for the nkm clock.
> + *           - In cases where the parent clock can not be set, this parameter must contain the
> + *             current rate of the parent, which is used to determine the best combination of n, k,
> + *             and m.
> + * @rate: requested rate.
> + * @nkm: Input/output parameter that contains the clocks constraints on the n, k, m combinations and
> + *       is updated in this function to contain the resulting best n, k, m combination.
> + * @parent_hw: parent clock. If set, this function assumes that the parent clock can be updated to a
> + *             rate that would be best to in order to get as close as possible to @rate. This
> + *             parameter must be set to NULL if this function shall not try to find the optimal
> + *             parent rate for the requested rate.
> + */
> +static unsigned long ccu_nkm_find_best(unsigned long *parent, unsigned long rate,
> +				       struct _ccu_nkm *nkm, struct clk_hw *parent_hw)
> +{
> +	unsigned long best_rate = 0, best_parent_rate = *parent, tmp_parent = *parent;
>  	unsigned long best_n = 0, best_k = 0, best_m = 0;
>  	unsigned long _n, _k, _m;
>
> @@ -28,12 +68,17 @@ static unsigned long ccu_nkm_find_best(unsigned long parent, unsigned long rate,
>  			for (_m = nkm->min_m; _m <= nkm->max_m; _m++) {

According to the manual M/N has to be <= 3. Therefore we need a
different maximum value for the _m-for-loop:

        unsigned long max_m = min(3 * _n, nkm->max_m);
        for (_m = nkm->min_m; _m <= max_m; _m++) {

I suggest that I add an optional member max_mn_ratio to the structs
ccu_nkm and _ccu_nkm. Optional meaning: Ignore if 0.

>  				unsigned long tmp_rate;
>
> -				tmp_rate = parent * _n * _k / _m;
> -
> +				if (parent_hw) {
> +					tmp_parent = optimal_parent_rate(rate, _n, _k, _m);
> +					tmp_parent = clk_hw_round_rate(parent_hw, tmp_parent);
> +				}

Another constraint is PLL-VIDEO0 rate / M >= 24 MHz. Therefore we also
need:
        if (tmp_parent < 24000000 * _m)
                continue;

So, we need another optional member min_m_times_parent or, for
shortness, maybe min_m_parent. I could use help finding a good name for
this.

I guess there needs to be a V3 of this patchset. :)

Regards,
  Frank

> +				tmp_rate = tmp_parent * _n * _k / _m;
>  				if (tmp_rate > rate)
>  					continue;
> +
>  				if ((rate - tmp_rate) < (rate - best_rate)) {
>  					best_rate = tmp_rate;
> +					best_parent_rate = tmp_parent;
>  					best_n = _n;
>  					best_k = _k;
>  					best_m = _m;
> @@ -46,6 +91,8 @@ static unsigned long ccu_nkm_find_best(unsigned long parent, unsigned long rate,
>  	nkm->k = best_k;
>  	nkm->m = best_m;
>
> +	*parent = best_parent_rate;
> +
>  	return best_rate;
>  }
>
> @@ -106,7 +153,7 @@ static unsigned long ccu_nkm_recalc_rate(struct clk_hw *hw,
>  }
>
>  static unsigned long ccu_nkm_round_rate(struct ccu_mux_internal *mux,
> -					struct clk_hw *hw,
> +					struct clk_hw *parent_hw,
>  					unsigned long *parent_rate,
>  					unsigned long rate,
>  					void *data)
> @@ -124,7 +171,10 @@ static unsigned long ccu_nkm_round_rate(struct ccu_mux_internal *mux,
>  	if (nkm->common.features & CCU_FEATURE_FIXED_POSTDIV)
>  		rate *= nkm->fixed_post_div;
>
> -	rate = ccu_nkm_find_best(*parent_rate, rate, &_nkm);
> +	if (!clk_hw_can_set_rate_parent(&nkm->common.hw))
> +		rate = ccu_nkm_find_best(parent_rate, rate, &_nkm, NULL);
> +	else
> +		rate = ccu_nkm_find_best(parent_rate, rate, &_nkm, parent_hw);
>
>  	if (nkm->common.features & CCU_FEATURE_FIXED_POSTDIV)
>  		rate /= nkm->fixed_post_div;
> @@ -159,7 +209,7 @@ static int ccu_nkm_set_rate(struct clk_hw *hw, unsigned long rate,
>  	_nkm.min_m = 1;
>  	_nkm.max_m = nkm->m.max ?: 1 << nkm->m.width;
>
> -	ccu_nkm_find_best(parent_rate, rate, &_nkm);
> +	ccu_nkm_find_best(&parent_rate, rate, &_nkm, NULL);
>
>  	spin_lock_irqsave(nkm->common.lock, flags);
Maxime Ripard June 12, 2023, 12:21 p.m. UTC | #2
Hi,

On Sun, Jun 11, 2023 at 11:01:42AM +0200, Frank Oltmanns wrote:
> In case the CLK_SET_RATE_PARENT flag is set, consider using a different
> parent rate when determining a new rate.
> 
> To find the best match for the requested rate, perform the following
> steps for each NKM combination:
>  - calculate the optimal parent rate,
>  - find the best parent rate that the parent clock actually supports
>  - use that parent rate to calculate the effective rate.
> 
> In case the clk does not support setting the parent rate, use the same
> algorithm as before.
> 
> Signed-off-by: Frank Oltmanns <frank@oltmanns.dev>

Most of the comments I had on v1 still apply there, so let's figure them
out before sending a v3.

Maxime
Maxime Ripard June 12, 2023, 12:31 p.m. UTC | #3
On Mon, Jun 12, 2023 at 10:51:52AM +0200, Frank Oltmanns wrote:
> > @@ -28,12 +68,17 @@ static unsigned long ccu_nkm_find_best(unsigned long parent, unsigned long rate,
> >  			for (_m = nkm->min_m; _m <= nkm->max_m; _m++) {
> 
> According to the manual M/N has to be <= 3. Therefore we need a
> different maximum value for the _m-for-loop:
> 
>         unsigned long max_m = min(3 * _n, nkm->max_m);
>         for (_m = nkm->min_m; _m <= max_m; _m++) {
> 
> I suggest that I add an optional member max_mn_ratio to the structs
> ccu_nkm and _ccu_nkm. Optional meaning: Ignore if 0.

Which workload is affected by this restriction?

> >  				unsigned long tmp_rate;
> >
> > -				tmp_rate = parent * _n * _k / _m;
> > -
> > +				if (parent_hw) {
> > +					tmp_parent = optimal_parent_rate(rate, _n, _k, _m);
> > +					tmp_parent = clk_hw_round_rate(parent_hw, tmp_parent);
> > +				}
> 
> Another constraint is PLL-VIDEO0 rate / M >= 24 MHz. Therefore we also need:
>         if (tmp_parent < 24000000 * _m)
>                 continue;
>
> So, we need another optional member min_m_times_parent or, for
> shortness, maybe min_m_parent. I could use help finding a good name for
> this.

Again, if it's not causing any harm I'd rather keep the code as simple
and maintainable as possible.

Maxime
Frank Oltmanns June 25, 2023, 10:45 a.m. UTC | #4
Hi Maxime,

On 2023-06-12 at 14:31:21 +0200, Maxime Ripard <maxime@cerno.tech> wrote:
> [[PGP Signed Part:Undecided]]
> On Mon, Jun 12, 2023 at 10:51:52AM +0200, Frank Oltmanns wrote:
>> > @@ -28,12 +68,17 @@ static unsigned long ccu_nkm_find_best(unsigned long parent, unsigned long rate,
>> >  			for (_m = nkm->min_m; _m <= nkm->max_m; _m++) {
>>
>> According to the manual M/N has to be <= 3. Therefore we need a
>> different maximum value for the _m-for-loop:
>>
>>         unsigned long max_m = min(3 * _n, nkm->max_m);
>>         for (_m = nkm->min_m; _m <= max_m; _m++) {
>>
>> I suggest that I add an optional member max_mn_ratio to the structs
>> ccu_nkm and _ccu_nkm. Optional meaning: Ignore if 0.
>
> Which workload is affected by this restriction?
>

Firstly, the restriction increases the minimum rate that pll-mipi of the
A64 SoC can use. The rate off pll-mipi is
        pll-video0 * K * N / M

The Allwinner's user manual ([1], p.94) states that the maximum ratio of
M/N (note how numerator and denominator changed) is 3. So, looking back
to the original formula, the N / M part can be at most 1/3. That
effectively limits the minimum rate that pll-mipi can provide to
        min(pll-video0) * 2 * 1 / 3

The minimum rate of pll-video0 is 192 MHz, i.e., the minimum rate for
pll-mipi becomes 128 MHz. Without the restriction, the minimum rate
currently is 24 MHz. It is my (albeit limited) understanding, that is no
real limitation because no panel would request such low rates. I should
also mention that Allwinner states in the user manual ([1], p. 94) that
the rate must be in the 500 MHz - 1.4 GHz range.

Secondly, it decreases the number of options for M for all N <= 6.
Therefore it reduces the number of meaningful NKM combinations from 275
(without the restriction) to 238. (With meaningful combinations, I mean
the combinations that result in a different rate for pll-mipi, e.g.,
K=2, M=1, N=2 is the same as K=4, M=1, N=1). The consequence is that the
precision of pll-mipi is slightly reduced. Note, however, that this loss
of precision is more than offset by the option that pll-mipi can now
"freely" choose its parent rate.

In conclusion, I don't see any real world limitation that this
restriction introduces.

>> >  				unsigned long tmp_rate;
>> >
>> > -				tmp_rate = parent * _n * _k / _m;
>> > -
>> > +				if (parent_hw) {
>> > +					tmp_parent = optimal_parent_rate(rate, _n, _k, _m);
>> > +					tmp_parent = clk_hw_round_rate(parent_hw, tmp_parent);
>> > +				}
>>
>> Another constraint is PLL-VIDEO0 rate / M >= 24 MHz. Therefore we also need:
>>         if (tmp_parent < 24000000 * _m)
>>                 continue;
>>
>> So, we need another optional member min_m_times_parent or, for
>> shortness, maybe min_m_parent. I could use help finding a good name for
>> this.
>
> Again, if it's not causing any harm I'd rather keep the code as simple
> and maintainable as possible.

Unfortunately, in Allwinner's User Manual, I could not find any
consequences of driving the SoC outside its specifications. Maybe
someone with more experience in that area could weigh in here.

I want to highlight, though, that currently (i.e., without this patch
series), the rate of pll-video0 is not set by the kernel. So in most
installations, it probably runs at 294 (u-boot) or 297 MHz (default
rate). That allows for M to be smaller than or equal to 12. But with
this patch series, we set pll-video0 to any rate that pll-mipi requests.
Where previously, people could have hand-crafted rates that were within
the SoC's specification - based on a well-known pll-video0 rate - this
no longer applies.

Therefore, I think we should be careful not to request rates from the
parent that drives pll-mipi outside its specification. What do you think
about implementing the min_m_parent functionality in a separate patch?
That would probably facilitate the assessment of the patch from a
simplicity and maintenance perspective. If we agree on a way forward, I
can still squash it with other patches in a later version of this patch
series. But if you are not convinced, I can simply drop it at any point
- including now ;).

In short, I don't know if violating the restriction causes any harm in a
real-world application. However, Allwinner may have reasons for listing
such limitations, even if they don't tell us the details. Those are my
two cents.

Thanks for your feedback,
  Frank

[1] https://github.com/OLIMEX/OLINUXINO/blob/027087da32d69651a58a4e6193bedadef9c036ec/DOCUMENTS/A64-PDFs/Allwinner%20A64%20User%20Manual%20v1.0.pdf

>
> Maxime
>
> [[End of PGP Signed Part]]
Maxime Ripard June 26, 2023, 4:50 p.m. UTC | #5
On Sun, Jun 25, 2023 at 12:45:45PM +0200, Frank Oltmanns wrote:
> Hi Maxime,
> 
> On 2023-06-12 at 14:31:21 +0200, Maxime Ripard <maxime@cerno.tech> wrote:
> > [[PGP Signed Part:Undecided]]
> > On Mon, Jun 12, 2023 at 10:51:52AM +0200, Frank Oltmanns wrote:
> >> > @@ -28,12 +68,17 @@ static unsigned long ccu_nkm_find_best(unsigned long parent, unsigned long rate,
> >> >  			for (_m = nkm->min_m; _m <= nkm->max_m; _m++) {
> >>
> >> According to the manual M/N has to be <= 3. Therefore we need a
> >> different maximum value for the _m-for-loop:
> >>
> >>         unsigned long max_m = min(3 * _n, nkm->max_m);
> >>         for (_m = nkm->min_m; _m <= max_m; _m++) {
> >>
> >> I suggest that I add an optional member max_mn_ratio to the structs
> >> ccu_nkm and _ccu_nkm. Optional meaning: Ignore if 0.
> >
> > Which workload is affected by this restriction?
> >
> 
> Firstly, the restriction increases the minimum rate that pll-mipi of the
> A64 SoC can use. The rate off pll-mipi is
>         pll-video0 * K * N / M
> 
> The Allwinner's user manual ([1], p.94) states that the maximum ratio of
> M/N (note how numerator and denominator changed) is 3. So, looking back
> to the original formula, the N / M part can be at most 1/3. That
> effectively limits the minimum rate that pll-mipi can provide to
>         min(pll-video0) * 2 * 1 / 3
> 
> The minimum rate of pll-video0 is 192 MHz, i.e., the minimum rate for
> pll-mipi becomes 128 MHz. Without the restriction, the minimum rate
> currently is 24 MHz. It is my (albeit limited) understanding, that is no
> real limitation because no panel would request such low rates. I should
> also mention that Allwinner states in the user manual ([1], p. 94) that
> the rate must be in the 500 MHz - 1.4 GHz range.
> 
> Secondly, it decreases the number of options for M for all N <= 6.
> Therefore it reduces the number of meaningful NKM combinations from 275
> (without the restriction) to 238. (With meaningful combinations, I mean
> the combinations that result in a different rate for pll-mipi, e.g.,
> K=2, M=1, N=2 is the same as K=4, M=1, N=1). The consequence is that the
> precision of pll-mipi is slightly reduced. Note, however, that this loss
> of precision is more than offset by the option that pll-mipi can now
> "freely" choose its parent rate.
> 
> In conclusion, I don't see any real world limitation that this
> restriction introduces.

If we want to go that way, I'd rather use a function that validates
whether or not the current set of parameter is valid.

That way, we can express pretty much any constraints without
special-casing the main structure too much.

Maxime
diff mbox series

Patch

diff --git a/drivers/clk/sunxi-ng/ccu_nkm.c b/drivers/clk/sunxi-ng/ccu_nkm.c
index a0978a50edae..c49d5879fe73 100644
--- a/drivers/clk/sunxi-ng/ccu_nkm.c
+++ b/drivers/clk/sunxi-ng/ccu_nkm.c
@@ -6,6 +6,7 @@ 
 
 #include <linux/clk-provider.h>
 #include <linux/io.h>
+#include <linux/math.h>
 
 #include "ccu_gate.h"
 #include "ccu_nkm.h"
@@ -16,10 +17,49 @@  struct _ccu_nkm {
 	unsigned long	m, min_m, max_m;
 };
 
-static unsigned long ccu_nkm_find_best(unsigned long parent, unsigned long rate,
-				       struct _ccu_nkm *nkm)
+static unsigned long optimal_parent_rate(unsigned long rate, unsigned long n,
+					 unsigned long k, unsigned long m)
 {
-	unsigned long best_rate = 0;
+	unsigned long _rate, parent;
+
+	// We must first try to find the desired parent rate that is rounded up, because there are
+	// cases where truncating makes us miss the requested rate.
+	// E.g. rate=449035712, n=11, k=3, m=16
+	// When truncating, we'd get parent=217714284 and calculating the rate from that would give
+	// us 449035710. When rounding up, we get parent=217714285 which would give us the requested
+	// rate of 449035712.
+	parent = DIV_ROUND_UP(rate * m, n * k);
+
+	// But there are other cases, where rounding up the parent gives us a too high rate.
+	// Therefore, we need to check for this case and, if necessary, truncate the parent rate
+	// instead of rounding up.
+	_rate = parent * n * k / m;
+	if (_rate > rate)
+		parent = rate * m / (n * k);
+	return parent;
+}
+
+/**
+ * ccu_nkm_find_best - Find the best nkm combination for a given rate
+ *
+ * @parent: rate of parent clock. This is used either as an input or out parameter:
+ *           - In cases where the parent clock can be set, this parameter will be updated to contain
+ *             the optimal rate for the parent to achieve the best rate for the nkm clock.
+ *           - In cases where the parent clock can not be set, this parameter must contain the
+ *             current rate of the parent, which is used to determine the best combination of n, k,
+ *             and m.
+ * @rate: requested rate.
+ * @nkm: Input/output parameter that contains the clocks constraints on the n, k, m combinations and
+ *       is updated in this function to contain the resulting best n, k, m combination.
+ * @parent_hw: parent clock. If set, this function assumes that the parent clock can be updated to a
+ *             rate that would be best to in order to get as close as possible to @rate. This
+ *             parameter must be set to NULL if this function shall not try to find the optimal
+ *             parent rate for the requested rate.
+ */
+static unsigned long ccu_nkm_find_best(unsigned long *parent, unsigned long rate,
+				       struct _ccu_nkm *nkm, struct clk_hw *parent_hw)
+{
+	unsigned long best_rate = 0, best_parent_rate = *parent, tmp_parent = *parent;
 	unsigned long best_n = 0, best_k = 0, best_m = 0;
 	unsigned long _n, _k, _m;
 
@@ -28,12 +68,17 @@  static unsigned long ccu_nkm_find_best(unsigned long parent, unsigned long rate,
 			for (_m = nkm->min_m; _m <= nkm->max_m; _m++) {
 				unsigned long tmp_rate;
 
-				tmp_rate = parent * _n * _k / _m;
-
+				if (parent_hw) {
+					tmp_parent = optimal_parent_rate(rate, _n, _k, _m);
+					tmp_parent = clk_hw_round_rate(parent_hw, tmp_parent);
+				}
+				tmp_rate = tmp_parent * _n * _k / _m;
 				if (tmp_rate > rate)
 					continue;
+
 				if ((rate - tmp_rate) < (rate - best_rate)) {
 					best_rate = tmp_rate;
+					best_parent_rate = tmp_parent;
 					best_n = _n;
 					best_k = _k;
 					best_m = _m;
@@ -46,6 +91,8 @@  static unsigned long ccu_nkm_find_best(unsigned long parent, unsigned long rate,
 	nkm->k = best_k;
 	nkm->m = best_m;
 
+	*parent = best_parent_rate;
+
 	return best_rate;
 }
 
@@ -106,7 +153,7 @@  static unsigned long ccu_nkm_recalc_rate(struct clk_hw *hw,
 }
 
 static unsigned long ccu_nkm_round_rate(struct ccu_mux_internal *mux,
-					struct clk_hw *hw,
+					struct clk_hw *parent_hw,
 					unsigned long *parent_rate,
 					unsigned long rate,
 					void *data)
@@ -124,7 +171,10 @@  static unsigned long ccu_nkm_round_rate(struct ccu_mux_internal *mux,
 	if (nkm->common.features & CCU_FEATURE_FIXED_POSTDIV)
 		rate *= nkm->fixed_post_div;
 
-	rate = ccu_nkm_find_best(*parent_rate, rate, &_nkm);
+	if (!clk_hw_can_set_rate_parent(&nkm->common.hw))
+		rate = ccu_nkm_find_best(parent_rate, rate, &_nkm, NULL);
+	else
+		rate = ccu_nkm_find_best(parent_rate, rate, &_nkm, parent_hw);
 
 	if (nkm->common.features & CCU_FEATURE_FIXED_POSTDIV)
 		rate /= nkm->fixed_post_div;
@@ -159,7 +209,7 @@  static int ccu_nkm_set_rate(struct clk_hw *hw, unsigned long rate,
 	_nkm.min_m = 1;
 	_nkm.max_m = nkm->m.max ?: 1 << nkm->m.width;
 
-	ccu_nkm_find_best(parent_rate, rate, &_nkm);
+	ccu_nkm_find_best(&parent_rate, rate, &_nkm, NULL);
 
 	spin_lock_irqsave(nkm->common.lock, flags);