diff mbox series

[6/6] pwm: renesas-tpu: Improve precision of period and duty_cycle calculation

Message ID 20220413085050.61144-6-u.kleine-koenig@pengutronix.de (mailing list archive)
State Superseded
Delegated to: Geert Uytterhoeven
Headers show
Series [1/6] pwm: renesas-tpu: Make use of dev_err_probe() | expand

Commit Message

Uwe Kleine-König April 13, 2022, 8:50 a.m. UTC
Dividing by the result of a division looses precision. Consider for example
clk_rate = 33000000 and period_ns = 500001. Then

	clk_rate / (NSEC_PER_SEC / period_ns)

has the exact value 16500.033, but in C this evaluates to 16508. It gets
worse for even bigger values of period_ns, so with period_ns = 500000001,
the exact result is 16500000.033 while in C we get 33000000.

For that reason use

	clk_rate * period_ns / NSEC_PER_SEC

instead which doesn't suffer from this problem. To ensure this doesn't
overflow add a safeguard check for clk_rate.

Incidentally this fixes a division by zero if period_ns > NSEC_PER_SEC.
Another side effect is that values bigger than INT_MAX for period and
duty_cyle are not wrongly discarded any more.

Fixes: 99b82abb0a35 ("pwm: Add Renesas TPU PWM driver")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
 drivers/pwm/pwm-renesas-tpu.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

Comments

Geert Uytterhoeven April 14, 2022, 10:27 a.m. UTC | #1
Hi Uwe,

On Wed, Apr 13, 2022 at 10:51 AM Uwe Kleine-König
<u.kleine-koenig@pengutronix.de> wrote:
> Dividing by the result of a division looses precision. Consider for example
> clk_rate = 33000000 and period_ns = 500001. Then
>
>         clk_rate / (NSEC_PER_SEC / period_ns)
>
> has the exact value 16500.033, but in C this evaluates to 16508. It gets
> worse for even bigger values of period_ns, so with period_ns = 500000001,
> the exact result is 16500000.033 while in C we get 33000000.
>
> For that reason use
>
>         clk_rate * period_ns / NSEC_PER_SEC
>
> instead which doesn't suffer from this problem. To ensure this doesn't
> overflow add a safeguard check for clk_rate.
>
> Incidentally this fixes a division by zero if period_ns > NSEC_PER_SEC.
> Another side effect is that values bigger than INT_MAX for period and
> duty_cyle are not wrongly discarded any more.

You forgot to mention that pwm_state.period is no longer truncated to u32.

>
> Fixes: 99b82abb0a35 ("pwm: Add Renesas TPU PWM driver")
> Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> ---
>  drivers/pwm/pwm-renesas-tpu.c | 34 ++++++++++++++++++++++------------
>  1 file changed, 22 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
> index fce7df418d62..c8c7a896fc55 100644
> --- a/drivers/pwm/pwm-renesas-tpu.c
> +++ b/drivers/pwm/pwm-renesas-tpu.c
> @@ -242,42 +242,52 @@ static void tpu_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
>  }
>
>  static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
> -                         int duty_ns, int period_ns, bool enabled)
> +                         u64 duty_ns, u64 period_ns, bool enabled)
>  {
>         struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
>         struct tpu_device *tpu = to_tpu_device(chip);
>         unsigned int prescaler;
>         bool duty_only = false;
>         u32 clk_rate;
> -       u32 period;
> +       u64 period;
>         u32 duty;
>         int ret;
>
>         clk_rate = clk_get_rate(tpu->clk);

As clk_get_rate() returns unsigned long, I think you should change
clk_rate from u32 to unsigned long, too.

> +       if (unlikely(clk_rate > 1000000000UL)) {

s/1000000000UL/NSEC_PER_SEC/

> +               /*
> +                * This won't happen in the nearer future, so this is only a
> +                * safeguard to prevent the following calculation from
> +                * overflowing. With this clk_rate * period_ns / NSEC_PER_SEC is
> +                * not greater than period_ns and so fits into an u64.
> +                */
> +               return -EINVAL;
> +       }
>
> -       period = clk_rate / (NSEC_PER_SEC / period_ns);
> +       period = mul_u64_u64_div_u64(clk_rate, period_ns, NSEC_PER_SEC);
>         if (period >= 64 * 0x10000 || period == 0)
>                 return -EINVAL;

Perhaps use "u64 period64" above, and

    /* We know period to fit into an u32 */
    period = (u32)period64;

to avoid introducing all casts below.

>
>         if (period < 0x10000)
>                 prescaler = 0;
>         else
> -               prescaler = ilog2(period / 0x10000) / 2 + 1;
> +               /*
> +                * We know period to fit into an u32, so cast accordingly to
> +                * make the division a bit cheaper
> +                */
> +               prescaler = ilog2((u32)period / 0x10000) / 2 + 1;

Using a loop would avoid the need for a division...

>
>         period >>= 2 * prescaler;
>
> -       if (duty_ns) {
> -               duty = (clk_rate >> 2 * prescaler)
> -                    / (NSEC_PER_SEC / duty_ns);
> -               if (duty > period)
> -                       return -EINVAL;
> -       } else {
> +       if (duty_ns)
> +               duty = mul_u64_u64_div_u64(clk_rate, duty_ns,
> +                                          (u64)NSEC_PER_SEC << (2 * prescaler));
> +       else
>                 duty = 0;
> -       }
>
>         dev_dbg(&tpu->pdev->dev,
>                 "rate %u, prescaler %u, period %u, duty %u\n",
> -               clk_rate, 1 << (2 * prescaler), period, duty);
> +               clk_rate, 1 << (2 * prescaler), (u32)period, duty);
>
>         if (tpd->prescaler == prescaler && tpd->period == period)
>                 duty_only = true;

With some (or all ;-) suggestions above taken into account:
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>

The display backlight still works fine on r8a7740/armadillo, so
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Geert Uytterhoeven April 19, 2022, 7:41 a.m. UTC | #2
Hi Uwe,

On Thu, Apr 14, 2022 at 12:27 PM Geert Uytterhoeven
<geert@linux-m68k.org> wrote:
> On Wed, Apr 13, 2022 at 10:51 AM Uwe Kleine-König
> <u.kleine-koenig@pengutronix.de> wrote:
> > Dividing by the result of a division looses precision. Consider for example
> > clk_rate = 33000000 and period_ns = 500001. Then
> >
> >         clk_rate / (NSEC_PER_SEC / period_ns)
> >
> > has the exact value 16500.033, but in C this evaluates to 16508. It gets
> > worse for even bigger values of period_ns, so with period_ns = 500000001,
> > the exact result is 16500000.033 while in C we get 33000000.
> >
> > For that reason use
> >
> >         clk_rate * period_ns / NSEC_PER_SEC
> >
> > instead which doesn't suffer from this problem. To ensure this doesn't
> > overflow add a safeguard check for clk_rate.
> >
> > Incidentally this fixes a division by zero if period_ns > NSEC_PER_SEC.
> > Another side effect is that values bigger than INT_MAX for period and
> > duty_cyle are not wrongly discarded any more.
>
> You forgot to mention that pwm_state.period is no longer truncated to u32.

Please ignore this bogus comment.
Sorry for the fuzz.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Uwe Kleine-König April 19, 2022, 7:48 a.m. UTC | #3
Hello Geert,

first of all thanks for your review and testing. It's great to get some
feedback (even though it means some work for me :-)

On Thu, Apr 14, 2022 at 12:27:28PM +0200, Geert Uytterhoeven wrote:
> On Wed, Apr 13, 2022 at 10:51 AM Uwe Kleine-König
> <u.kleine-koenig@pengutronix.de> wrote:
> > Fixes: 99b82abb0a35 ("pwm: Add Renesas TPU PWM driver")
> > Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> > ---
> >  drivers/pwm/pwm-renesas-tpu.c | 34 ++++++++++++++++++++++------------
> >  1 file changed, 22 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
> > index fce7df418d62..c8c7a896fc55 100644
> > --- a/drivers/pwm/pwm-renesas-tpu.c
> > +++ b/drivers/pwm/pwm-renesas-tpu.c
> > @@ -242,42 +242,52 @@ static void tpu_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
> >  }
> >
> >  static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
> > -                         int duty_ns, int period_ns, bool enabled)
> > +                         u64 duty_ns, u64 period_ns, bool enabled)
> >  {
> >         struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
> >         struct tpu_device *tpu = to_tpu_device(chip);
> >         unsigned int prescaler;
> >         bool duty_only = false;
> >         u32 clk_rate;
> > -       u32 period;
> > +       u64 period;
> >         u32 duty;
> >         int ret;
> >
> >         clk_rate = clk_get_rate(tpu->clk);
> 
> As clk_get_rate() returns unsigned long, I think you should change
> clk_rate from u32 to unsigned long, too.

Yeah, could do that. I guess I didn't because in my bubble a long is 32
bits wide :-) IMHO fixing that is worth a separate patch.

> > +       if (unlikely(clk_rate > 1000000000UL)) {
> 
> s/1000000000UL/NSEC_PER_SEC/

ok

> > +               /*
> > +                * This won't happen in the nearer future, so this is only a
> > +                * safeguard to prevent the following calculation from
> > +                * overflowing. With this clk_rate * period_ns / NSEC_PER_SEC is
> > +                * not greater than period_ns and so fits into an u64.
> > +                */
> > +               return -EINVAL;
> > +       }
> >
> > -       period = clk_rate / (NSEC_PER_SEC / period_ns);
> > +       period = mul_u64_u64_div_u64(clk_rate, period_ns, NSEC_PER_SEC);
> >         if (period >= 64 * 0x10000 || period == 0)
> >                 return -EINVAL;
> 
> Perhaps use "u64 period64" above, and
> 
>     /* We know period to fit into an u32 */
>     period = (u32)period64;
> 
> to avoid introducing all casts below.

I first had it that way, but didn't like it. Yeah, it makes the patch a
bit smaller, but IMHO it adds some burden to understand the code flow
because for a reader having two variables for the same (semantic) value
is harder to understand.

> 
> >
> >         if (period < 0x10000)
> >                 prescaler = 0;
> >         else
> > -               prescaler = ilog2(period / 0x10000) / 2 + 1;
> > +               /*
> > +                * We know period to fit into an u32, so cast accordingly to
> > +                * make the division a bit cheaper
> > +                */
> > +               prescaler = ilog2((u32)period / 0x10000) / 2 + 1;
> 
> Using a loop would avoid the need for a division...

I would "fix" this differently, there isn't really a division; at least
I would expect (but didn't check) that the compiler uses a cheap shift
to implement "/ 0x10000" and "/ 2". ilog2 might become a bit cheaper for
a 32 bit value. So I would replace that by:

	/*
	 * ilog2 might be a bit cheaper for u32 than u64 and we know
	 * period to fit into a u32, so cast accordingly.
	 */

Best regards
Uwe
diff mbox series

Patch

diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
index fce7df418d62..c8c7a896fc55 100644
--- a/drivers/pwm/pwm-renesas-tpu.c
+++ b/drivers/pwm/pwm-renesas-tpu.c
@@ -242,42 +242,52 @@  static void tpu_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 }
 
 static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-			  int duty_ns, int period_ns, bool enabled)
+			  u64 duty_ns, u64 period_ns, bool enabled)
 {
 	struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
 	struct tpu_device *tpu = to_tpu_device(chip);
 	unsigned int prescaler;
 	bool duty_only = false;
 	u32 clk_rate;
-	u32 period;
+	u64 period;
 	u32 duty;
 	int ret;
 
 	clk_rate = clk_get_rate(tpu->clk);
+	if (unlikely(clk_rate > 1000000000UL)) {
+		/*
+		 * This won't happen in the nearer future, so this is only a
+		 * safeguard to prevent the following calculation from
+		 * overflowing. With this clk_rate * period_ns / NSEC_PER_SEC is
+		 * not greater than period_ns and so fits into an u64.
+		 */
+		return -EINVAL;
+	}
 
-	period = clk_rate / (NSEC_PER_SEC / period_ns);
+	period = mul_u64_u64_div_u64(clk_rate, period_ns, NSEC_PER_SEC);
 	if (period >= 64 * 0x10000 || period == 0)
 		return -EINVAL;
 
 	if (period < 0x10000)
 		prescaler = 0;
 	else
-		prescaler = ilog2(period / 0x10000) / 2 + 1;
+		/*
+		 * We know period to fit into an u32, so cast accordingly to
+		 * make the division a bit cheaper
+		 */
+		prescaler = ilog2((u32)period / 0x10000) / 2 + 1;
 
 	period >>= 2 * prescaler;
 
-	if (duty_ns) {
-		duty = (clk_rate >> 2 * prescaler)
-		     / (NSEC_PER_SEC / duty_ns);
-		if (duty > period)
-			return -EINVAL;
-	} else {
+	if (duty_ns)
+		duty = mul_u64_u64_div_u64(clk_rate, duty_ns,
+					   (u64)NSEC_PER_SEC << (2 * prescaler));
+	else
 		duty = 0;
-	}
 
 	dev_dbg(&tpu->pdev->dev,
 		"rate %u, prescaler %u, period %u, duty %u\n",
-		clk_rate, 1 << (2 * prescaler), period, duty);
+		clk_rate, 1 << (2 * prescaler), (u32)period, duty);
 
 	if (tpd->prescaler == prescaler && tpd->period == period)
 		duty_only = true;