diff mbox series

[fbtest,12/17] drawops: Fix crash when drawing large ellipses

Message ID 20241215104508.191237-13-geert@linux-m68k.org (mailing list archive)
State New
Headers show
Series Export feature and large ellipses support | expand

Commit Message

Geert Uytterhoeven Dec. 15, 2024, 10:45 a.m. UTC
"test002" crashes when run with a display resolution of e.g. 2560x1440
pixels, due to 32-bit overflow in the ellipse drawing routine.

Fix this by creating a copy that uses 64-bit arithmetic.  Use a
heuristic to pick either the 32-bit or the 64-bit version, to avoid the
overhead of the 64-bit version on small systems with small displays.
Replace (unsigned) int by u32/s32 in the 32-bit version for clarity.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drawops/generic.c | 127 +++++++++++++++++++++++++++++++++++++++++-----
 include/types.h   |   3 ++
 2 files changed, 116 insertions(+), 14 deletions(-)

Comments

Helge Deller Dec. 15, 2024, 3:08 p.m. UTC | #1
On 12/15/24 11:45, Geert Uytterhoeven wrote:
> "test002" crashes when run with a display resolution of e.g. 2560x1440
> pixels, due to 32-bit overflow in the ellipse drawing routine.
>
> Fix this by creating a copy that uses 64-bit arithmetic.  Use a
> heuristic to pick either the 32-bit or the 64-bit version, to avoid the
> overhead of the 64-bit version on small systems with small displays.

I see you always build the 32- and 64-bit versions, so when you mean
overhead you mean runtime overhead, not compiled binary size overhead.
So, just wondering:
Did you maybe measured how much slower the 64-bit version is on slow 32-bit systems?
I'm fine with your decision to build both, but I'm wondering if it's really necessary
to keep two versions for a "test tool"?

Helge

> Replace (unsigned) int by u32/s32 in the 32-bit version for clarity.
>
> Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
> ---
>   drawops/generic.c | 127 +++++++++++++++++++++++++++++++++++++++++-----
>   include/types.h   |   3 ++
>   2 files changed, 116 insertions(+), 14 deletions(-)
>
> diff --git a/drawops/generic.c b/drawops/generic.c
> index 5fd971b59bc698fe..c4cfad3223773a23 100644
> --- a/drawops/generic.c
> +++ b/drawops/generic.c
> @@ -305,8 +305,98 @@ static void fill_ellipse_points(u32 cx, u32 cy, u32 x, u32 y, pixel_t pixel)
>       }
>   }
>
> -static void do_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
> -		       draw_func_t draw_inner, draw_func_t draw_outer)
> +static void do_ellipse_32(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
> +			  draw_func_t draw_inner, draw_func_t draw_outer)
> +{
> +    u32 a2 = a*a;
> +    u32 b2 = b*b;
> +
> +    if (a <= b) {
> +	u32 x1 = 0;
> +	u32 y1 = b;
> +	s32 S = a2*(1-2*b)+2*b2;
> +	s32 T = b2-2*a2*(2*b-1);
> +	u32 dT1 = 4*b2;
> +	u32 dS1 = dT1+2*b2;
> +	s32 dS2 = -4*a2*(b-1);
> +	s32 dT2 = dS2+2*a2;
> +
> +	while (1) {
> +	    if (S < 0) {
> +		if (draw_inner)
> +		    draw_inner(x, y, x1, y1, pixel);
> +		S += dS1;
> +		T += dT1;
> +		dS1 += 4*b2;
> +		dT1 += 4*b2;
> +		x1++;
> +	    } else if (T < 0) {
> +		draw_outer(x, y, x1, y1, pixel);
> +		if (y1 == 0)
> +		    break;
> +		S += dS1+dS2;
> +		T += dT1+dT2;
> +		dS1 += 4*b2;
> +		dT1 += 4*b2;
> +		dS2 += 4*a2;
> +		dT2 += 4*a2;
> +		x1++;
> +		y1--;
> +	    } else {
> +		draw_outer(x, y, x1, y1, pixel);
> +		if (y1 == 0)
> +		    break;
> +		S += dS2;
> +		T += dT2;
> +		dS2 += 4*a2;
> +		dT2 += 4*a2;
> +		y1--;
> +	    }
> +	}
> +    } else {
> +	u32 x1 = a;
> +	u32 y1 = 0;
> +	s32 S = b2*(1-2*a)+2*a2;
> +	s32 T = a2-2*b2*(2*a-1);
> +	u32 dT1 = 4*a2;
> +	u32 dS1 = dT1+2*a2;
> +	s32 dS2 = -4*b2*(a-1);
> +	s32 dT2 = dS2+2*b2;
> +
> +	draw_outer(x, y, x1, y1, pixel);
> +	do {
> +	    if (S < 0) {
> +		S += dS1;
> +		T += dT1;
> +		dS1 += 4*a2;
> +		dT1 += 4*a2;
> +		y1++;
> +		draw_outer(x, y, x1, y1, pixel);
> +	    } else if (T < 0) {
> +		S += dS1+dS2;
> +		T += dT1+dT2;
> +		dS1 += 4*a2;
> +		dT1 += 4*a2;
> +		dS2 += 4*b2;
> +		dT2 += 4*b2;
> +		x1--;
> +		y1++;
> +		draw_outer(x, y, x1, y1, pixel);
> +	    } else {
> +		S += dS2;
> +		T += dT2;
> +		dS2 += 4*b2;
> +		dT2 += 4*b2;
> +		x1--;
> +		if (draw_inner)
> +		    draw_inner(x, y, x1, y1, pixel);
> +	    }
> +	} while (x1 > 0);
> +    }
> +}
> +
> +static void do_ellipse_64(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
> +			  draw_func_t draw_inner, draw_func_t draw_outer)
>   {
>       u32 a2 = a*a;
>       u32 b2 = b*b;
> @@ -314,12 +404,12 @@ static void do_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
>       if (a <= b) {
>   	u32 x1 = 0;
>   	u32 y1 = b;
> -	int S = a2*(1-2*b)+2*b2;
> -	int T = b2-2*a2*(2*b-1);
> -	unsigned int dT1 = 4*b2;
> -	unsigned int dS1 = dT1+2*b2;
> -	int dS2 = -4*a2*(b-1);
> -	int dT2 = dS2+2*a2;
> +	s64 S = a2*(1-2LL*b)+2LL*b2;
> +	s64 T = b2-2LL*a2*(2LL*b-1);
> +	u64 dT1 = 4*b2;
> +	u64 dS1 = dT1+2*b2;
> +	s64 dS2 = -4LL*a2*(b-1);
> +	s64 dT2 = dS2+2*a2;
>
>   	while (1) {
>   	    if (S < 0) {
> @@ -356,12 +446,12 @@ static void do_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
>       } else {
>   	u32 x1 = a;
>   	u32 y1 = 0;
> -	int S = b2*(1-2*a)+2*a2;
> -	int T = a2-2*b2*(2*a-1);
> -	unsigned int dT1 = 4*a2;
> -	unsigned int dS1 = dT1+2*a2;
> -	int dS2 = -4*b2*(a-1);
> -	int dT2 = dS2+2*b2;
> +	s64 S = b2*(1-2LL*a)+2LL*a2;
> +	s64 T = a2-2LL*b2*(2LL*a-1);
> +	u64 dT1 = 4*a2;
> +	u64 dS1 = dT1+2*a2;
> +	s64 dS2 = -4LL*b2*(a-1);
> +	s64 dT2 = dS2+2*b2;
>
>   	draw_outer(x, y, x1, y1, pixel);
>   	do {
> @@ -395,6 +485,15 @@ static void do_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
>       }
>   }
>
> +static void do_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
> +		       draw_func_t draw_inner, draw_func_t draw_outer)
> +{
> +    if ((a + 576) * (b + 576) < 1440000)
> +	do_ellipse_32(x, y, a, b, pixel, draw_inner, draw_outer);
> +    else
> +	do_ellipse_64(x, y, a, b, pixel, draw_inner, draw_outer);
> +}
> +
>   void generic_draw_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel)
>   {
>       if (a == b)
> diff --git a/include/types.h b/include/types.h
> index 9112ba6855b61eaa..0e3c76521469912f 100644
> --- a/include/types.h
> +++ b/include/types.h
> @@ -21,6 +21,9 @@ typedef unsigned short u16;
>   typedef unsigned int u32;
>   typedef unsigned long long u64;
>
> +typedef int s32;
> +typedef long long s64;
> +
>   #if defined(__LP64__) || defined(__alpha__) || defined(__ia64__) || \
>       defined(__mips64__) || defined(__powerpc64__) || defined(__s390x__) || \
>       defined(__sparc64__) || defined(__x86_64__) || defined(__hppa64__)
Geert Uytterhoeven Dec. 18, 2024, 4:29 p.m. UTC | #2
Hi Helge,

On Sun, Dec 15, 2024 at 4:08 PM Helge Deller <deller@gmx.de> wrote:
> On 12/15/24 11:45, Geert Uytterhoeven wrote:
> > "test002" crashes when run with a display resolution of e.g. 2560x1440
> > pixels, due to 32-bit overflow in the ellipse drawing routine.
> >
> > Fix this by creating a copy that uses 64-bit arithmetic.  Use a
> > heuristic to pick either the 32-bit or the 64-bit version, to avoid the
> > overhead of the 64-bit version on small systems with small displays.
>
> I see you always build the 32- and 64-bit versions, so when you mean
> overhead you mean runtime overhead, not compiled binary size overhead.

Exactly.

> So, just wondering:
> Did you maybe measured how much slower the 64-bit version is on slow 32-bit systems?
> I'm fine with your decision to build both, but I'm wondering if it's really necessary
> to keep two versions for a "test tool"?

On ARM Cortex-A9, draw_ellipse(400, 240, 300, 239, ...) with a
dummy (empty) set_pixel() method using the 64-bit version takes 44%
longer than the 32-bit version, so I think it is worthwhile to have
both versions.

Gr{oetje,eeting}s,

                        Geert
Helge Deller Dec. 18, 2024, 6 p.m. UTC | #3
On 12/18/24 17:29, Geert Uytterhoeven wrote:
> Hi Helge,
>
> On Sun, Dec 15, 2024 at 4:08 PM Helge Deller <deller@gmx.de> wrote:
>> On 12/15/24 11:45, Geert Uytterhoeven wrote:
>>> "test002" crashes when run with a display resolution of e.g. 2560x1440
>>> pixels, due to 32-bit overflow in the ellipse drawing routine.
>>>
>>> Fix this by creating a copy that uses 64-bit arithmetic.  Use a
>>> heuristic to pick either the 32-bit or the 64-bit version, to avoid the
>>> overhead of the 64-bit version on small systems with small displays.
>>
>> I see you always build the 32- and 64-bit versions, so when you mean
>> overhead you mean runtime overhead, not compiled binary size overhead.
>
> Exactly.
>
>> So, just wondering:
>> Did you maybe measured how much slower the 64-bit version is on slow 32-bit systems?
>> I'm fine with your decision to build both, but I'm wondering if it's really necessary
>> to keep two versions for a "test tool"?
>
> On ARM Cortex-A9, draw_ellipse(400, 240, 300, 239, ...) with a
> dummy (empty) set_pixel() method using the 64-bit version takes 44%
> longer than the 32-bit version, so I think it is worthwhile to have
> both versions.

Oh, I didn't expect that much.
I agree it's worthwhile to have both versions.

Helge
diff mbox series

Patch

diff --git a/drawops/generic.c b/drawops/generic.c
index 5fd971b59bc698fe..c4cfad3223773a23 100644
--- a/drawops/generic.c
+++ b/drawops/generic.c
@@ -305,8 +305,98 @@  static void fill_ellipse_points(u32 cx, u32 cy, u32 x, u32 y, pixel_t pixel)
     }
 }
 
-static void do_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
-		       draw_func_t draw_inner, draw_func_t draw_outer)
+static void do_ellipse_32(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
+			  draw_func_t draw_inner, draw_func_t draw_outer)
+{
+    u32 a2 = a*a;
+    u32 b2 = b*b;
+
+    if (a <= b) {
+	u32 x1 = 0;
+	u32 y1 = b;
+	s32 S = a2*(1-2*b)+2*b2;
+	s32 T = b2-2*a2*(2*b-1);
+	u32 dT1 = 4*b2;
+	u32 dS1 = dT1+2*b2;
+	s32 dS2 = -4*a2*(b-1);
+	s32 dT2 = dS2+2*a2;
+
+	while (1) {
+	    if (S < 0) {
+		if (draw_inner)
+		    draw_inner(x, y, x1, y1, pixel);
+		S += dS1;
+		T += dT1;
+		dS1 += 4*b2;
+		dT1 += 4*b2;
+		x1++;
+	    } else if (T < 0) {
+		draw_outer(x, y, x1, y1, pixel);
+		if (y1 == 0)
+		    break;
+		S += dS1+dS2;
+		T += dT1+dT2;
+		dS1 += 4*b2;
+		dT1 += 4*b2;
+		dS2 += 4*a2;
+		dT2 += 4*a2;
+		x1++;
+		y1--;
+	    } else {
+		draw_outer(x, y, x1, y1, pixel);
+		if (y1 == 0)
+		    break;
+		S += dS2;
+		T += dT2;
+		dS2 += 4*a2;
+		dT2 += 4*a2;
+		y1--;
+	    }
+	}
+    } else {
+	u32 x1 = a;
+	u32 y1 = 0;
+	s32 S = b2*(1-2*a)+2*a2;
+	s32 T = a2-2*b2*(2*a-1);
+	u32 dT1 = 4*a2;
+	u32 dS1 = dT1+2*a2;
+	s32 dS2 = -4*b2*(a-1);
+	s32 dT2 = dS2+2*b2;
+
+	draw_outer(x, y, x1, y1, pixel);
+	do {
+	    if (S < 0) {
+		S += dS1;
+		T += dT1;
+		dS1 += 4*a2;
+		dT1 += 4*a2;
+		y1++;
+		draw_outer(x, y, x1, y1, pixel);
+	    } else if (T < 0) {
+		S += dS1+dS2;
+		T += dT1+dT2;
+		dS1 += 4*a2;
+		dT1 += 4*a2;
+		dS2 += 4*b2;
+		dT2 += 4*b2;
+		x1--;
+		y1++;
+		draw_outer(x, y, x1, y1, pixel);
+	    } else {
+		S += dS2;
+		T += dT2;
+		dS2 += 4*b2;
+		dT2 += 4*b2;
+		x1--;
+		if (draw_inner)
+		    draw_inner(x, y, x1, y1, pixel);
+	    }
+	} while (x1 > 0);
+    }
+}
+
+static void do_ellipse_64(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
+			  draw_func_t draw_inner, draw_func_t draw_outer)
 {
     u32 a2 = a*a;
     u32 b2 = b*b;
@@ -314,12 +404,12 @@  static void do_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
     if (a <= b) {
 	u32 x1 = 0;
 	u32 y1 = b;
-	int S = a2*(1-2*b)+2*b2;
-	int T = b2-2*a2*(2*b-1);
-	unsigned int dT1 = 4*b2;
-	unsigned int dS1 = dT1+2*b2;
-	int dS2 = -4*a2*(b-1);
-	int dT2 = dS2+2*a2;
+	s64 S = a2*(1-2LL*b)+2LL*b2;
+	s64 T = b2-2LL*a2*(2LL*b-1);
+	u64 dT1 = 4*b2;
+	u64 dS1 = dT1+2*b2;
+	s64 dS2 = -4LL*a2*(b-1);
+	s64 dT2 = dS2+2*a2;
 
 	while (1) {
 	    if (S < 0) {
@@ -356,12 +446,12 @@  static void do_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
     } else {
 	u32 x1 = a;
 	u32 y1 = 0;
-	int S = b2*(1-2*a)+2*a2;
-	int T = a2-2*b2*(2*a-1);
-	unsigned int dT1 = 4*a2;
-	unsigned int dS1 = dT1+2*a2;
-	int dS2 = -4*b2*(a-1);
-	int dT2 = dS2+2*b2;
+	s64 S = b2*(1-2LL*a)+2LL*a2;
+	s64 T = a2-2LL*b2*(2LL*a-1);
+	u64 dT1 = 4*a2;
+	u64 dS1 = dT1+2*a2;
+	s64 dS2 = -4LL*b2*(a-1);
+	s64 dT2 = dS2+2*b2;
 
 	draw_outer(x, y, x1, y1, pixel);
 	do {
@@ -395,6 +485,15 @@  static void do_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
     }
 }
 
+static void do_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel,
+		       draw_func_t draw_inner, draw_func_t draw_outer)
+{
+    if ((a + 576) * (b + 576) < 1440000)
+	do_ellipse_32(x, y, a, b, pixel, draw_inner, draw_outer);
+    else
+	do_ellipse_64(x, y, a, b, pixel, draw_inner, draw_outer);
+}
+
 void generic_draw_ellipse(u32 x, u32 y, u32 a, u32 b, pixel_t pixel)
 {
     if (a == b)
diff --git a/include/types.h b/include/types.h
index 9112ba6855b61eaa..0e3c76521469912f 100644
--- a/include/types.h
+++ b/include/types.h
@@ -21,6 +21,9 @@  typedef unsigned short u16;
 typedef unsigned int u32;
 typedef unsigned long long u64;
 
+typedef int s32;
+typedef long long s64;
+
 #if defined(__LP64__) || defined(__alpha__) || defined(__ia64__) || \
     defined(__mips64__) || defined(__powerpc64__) || defined(__s390x__) || \
     defined(__sparc64__) || defined(__x86_64__) || defined(__hppa64__)