diff mbox

[2/2] drm/amdgpu: Avoid overflows/divide-by-zero in latency_watermark calculations.

Message ID 1490818152-10891-3-git-send-email-mario.kleiner.de@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mario Kleiner March 29, 2017, 8:09 p.m. UTC
At dot clocks > approx. 250 Mhz, some of these calcs will overflow and
cause miscalculation of latency watermarks, and for some overflows also
divide-by-zero driver crash ("divide error: 0000 [#1] PREEMPT SMP" in
"dce_v10_0_latency_watermark+0x12d/0x190").

This zero-divide happened, e.g., on AMD Tonga Pro under DCE-10,
on a Displayport panel when trying to set a video mode of 2560x1440
at 165 Hz vrefresh with a dot clock of 635.540 Mhz.

Refine calculations to avoid the overflows.

Tested for DCE-10 with R9 380 Tonga + ASUS ROG PG279 panel.

Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 19 +++----------------
 drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 19 +++----------------
 drivers/gpu/drm/amd/amdgpu/dce_v6_0.c  | 19 +++----------------
 drivers/gpu/drm/amd/amdgpu/dce_v8_0.c  | 19 +++----------------
 4 files changed, 12 insertions(+), 64 deletions(-)

Comments

Alex Deucher March 30, 2017, 7:24 p.m. UTC | #1
On Wed, Mar 29, 2017 at 4:09 PM, Mario Kleiner
<mario.kleiner.de@gmail.com> wrote:
> At dot clocks > approx. 250 Mhz, some of these calcs will overflow and
> cause miscalculation of latency watermarks, and for some overflows also
> divide-by-zero driver crash ("divide error: 0000 [#1] PREEMPT SMP" in
> "dce_v10_0_latency_watermark+0x12d/0x190").
>
> This zero-divide happened, e.g., on AMD Tonga Pro under DCE-10,
> on a Displayport panel when trying to set a video mode of 2560x1440
> at 165 Hz vrefresh with a dot clock of 635.540 Mhz.
>
> Refine calculations to avoid the overflows.
>
> Tested for DCE-10 with R9 380 Tonga + ASUS ROG PG279 panel.
>
> Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>

Applied the series.  thanks!

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 19 +++----------------
>  drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 19 +++----------------
>  drivers/gpu/drm/amd/amdgpu/dce_v6_0.c  | 19 +++----------------
>  drivers/gpu/drm/amd/amdgpu/dce_v8_0.c  | 19 +++----------------
>  4 files changed, 12 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
> index d3db921..33541ac 100644
> --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
> @@ -1090,23 +1090,10 @@ static u32 dce_v10_0_latency_watermark(struct dce10_wm_params *wm)
>         a.full = dfixed_const(available_bandwidth);
>         b.full = dfixed_const(wm->num_heads);
>         a.full = dfixed_div(a, b);
> +       tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
> +       tmp = min(dfixed_trunc(a), tmp);
>
> -       b.full = dfixed_const(mc_latency + 512);
> -       c.full = dfixed_const(wm->disp_clk);
> -       b.full = dfixed_div(b, c);
> -
> -       c.full = dfixed_const(dmif_size);
> -       b.full = dfixed_div(c, b);
> -
> -       tmp = min(dfixed_trunc(a), dfixed_trunc(b));
> -
> -       b.full = dfixed_const(1000);
> -       c.full = dfixed_const(wm->disp_clk);
> -       b.full = dfixed_div(c, b);
> -       c.full = dfixed_const(wm->bytes_per_pixel);
> -       b.full = dfixed_mul(b, c);
> -
> -       lb_fill_bw = min(tmp, dfixed_trunc(b));
> +       lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
>
>         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
>         b.full = dfixed_const(1000);
> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
> index 15ee8eb..1388f8a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
> @@ -1059,23 +1059,10 @@ static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm)
>         a.full = dfixed_const(available_bandwidth);
>         b.full = dfixed_const(wm->num_heads);
>         a.full = dfixed_div(a, b);
> +       tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
> +       tmp = min(dfixed_trunc(a), tmp);
>
> -       b.full = dfixed_const(mc_latency + 512);
> -       c.full = dfixed_const(wm->disp_clk);
> -       b.full = dfixed_div(b, c);
> -
> -       c.full = dfixed_const(dmif_size);
> -       b.full = dfixed_div(c, b);
> -
> -       tmp = min(dfixed_trunc(a), dfixed_trunc(b));
> -
> -       b.full = dfixed_const(1000);
> -       c.full = dfixed_const(wm->disp_clk);
> -       b.full = dfixed_div(c, b);
> -       c.full = dfixed_const(wm->bytes_per_pixel);
> -       b.full = dfixed_mul(b, c);
> -
> -       lb_fill_bw = min(tmp, dfixed_trunc(b));
> +       lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
>
>         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
>         b.full = dfixed_const(1000);
> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> index cb9158b..bad52c0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
> @@ -861,23 +861,10 @@ static u32 dce_v6_0_latency_watermark(struct dce6_wm_params *wm)
>         a.full = dfixed_const(available_bandwidth);
>         b.full = dfixed_const(wm->num_heads);
>         a.full = dfixed_div(a, b);
> +       tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
> +       tmp = min(dfixed_trunc(a), tmp);
>
> -       b.full = dfixed_const(mc_latency + 512);
> -       c.full = dfixed_const(wm->disp_clk);
> -       b.full = dfixed_div(b, c);
> -
> -       c.full = dfixed_const(dmif_size);
> -       b.full = dfixed_div(c, b);
> -
> -       tmp = min(dfixed_trunc(a), dfixed_trunc(b));
> -
> -       b.full = dfixed_const(1000);
> -       c.full = dfixed_const(wm->disp_clk);
> -       b.full = dfixed_div(c, b);
> -       c.full = dfixed_const(wm->bytes_per_pixel);
> -       b.full = dfixed_mul(b, c);
> -
> -       lb_fill_bw = min(tmp, dfixed_trunc(b));
> +       lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
>
>         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
>         b.full = dfixed_const(1000);
> diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
> index d547bcf..e52fc92 100644
> --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
> @@ -974,23 +974,10 @@ static u32 dce_v8_0_latency_watermark(struct dce8_wm_params *wm)
>         a.full = dfixed_const(available_bandwidth);
>         b.full = dfixed_const(wm->num_heads);
>         a.full = dfixed_div(a, b);
> +       tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
> +       tmp = min(dfixed_trunc(a), tmp);
>
> -       b.full = dfixed_const(mc_latency + 512);
> -       c.full = dfixed_const(wm->disp_clk);
> -       b.full = dfixed_div(b, c);
> -
> -       c.full = dfixed_const(dmif_size);
> -       b.full = dfixed_div(c, b);
> -
> -       tmp = min(dfixed_trunc(a), dfixed_trunc(b));
> -
> -       b.full = dfixed_const(1000);
> -       c.full = dfixed_const(wm->disp_clk);
> -       b.full = dfixed_div(c, b);
> -       c.full = dfixed_const(wm->bytes_per_pixel);
> -       b.full = dfixed_mul(b, c);
> -
> -       lb_fill_bw = min(tmp, dfixed_trunc(b));
> +       lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
>
>         a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
>         b.full = dfixed_const(1000);
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index d3db921..33541ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1090,23 +1090,10 @@  static u32 dce_v10_0_latency_watermark(struct dce10_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 15ee8eb..1388f8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1059,23 +1059,10 @@  static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index cb9158b..bad52c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -861,23 +861,10 @@  static u32 dce_v6_0_latency_watermark(struct dce6_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index d547bcf..e52fc92 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -974,23 +974,10 @@  static u32 dce_v8_0_latency_watermark(struct dce8_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);