diff mbox series

[07/10] drm/format-helper: Add drm_fb_swab()

Message ID 20200429124830.27475-8-noralf@tronnes.org (mailing list archive)
State New, archived
Headers show
Series Generic USB Display driver | expand

Commit Message

Noralf Trønnes April 29, 2020, 12:48 p.m. UTC
This replaces drm_fb_swab16() with drm_fb_swab() supporting 16 and 32-bit.
Also make pixel line caching optional.

Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
---
 drivers/gpu/drm/drm_format_helper.c | 61 +++++++++++++++++++----------
 drivers/gpu/drm/drm_mipi_dbi.c      |  2 +-
 include/drm/drm_format_helper.h     |  4 +-
 3 files changed, 44 insertions(+), 23 deletions(-)

Comments

Sam Ravnborg May 3, 2020, 10:29 a.m. UTC | #1
Hi Noralf

On Wed, Apr 29, 2020 at 02:48:27PM +0200, Noralf Trønnes wrote:
> This replaces drm_fb_swab16() with drm_fb_swab() supporting 16 and 32-bit.
> Also make pixel line caching optional.
> 
> Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
A couple of nits, with these considered:
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>

> ---
>  drivers/gpu/drm/drm_format_helper.c | 61 +++++++++++++++++++----------
>  drivers/gpu/drm/drm_mipi_dbi.c      |  2 +-
>  include/drm/drm_format_helper.h     |  4 +-
>  3 files changed, 44 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
> index 0897cb9aeaff..5c147c49668c 100644
> --- a/drivers/gpu/drm/drm_format_helper.c
> +++ b/drivers/gpu/drm/drm_format_helper.c
> @@ -79,39 +79,60 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
>  
>  /**
> - * drm_fb_swab16 - Swap bytes into clip buffer
> - * @dst: RGB565 destination buffer
> - * @vaddr: RGB565 source buffer
> + * drm_fb_swab - Swap bytes into clip buffer
> + * @dst: Destination buffer
> + * @src: Source buffer
>   * @fb: DRM framebuffer
>   * @clip: Clip rectangle area to copy
> + * @cached: Source buffer is mapped cached (eg. not write-combined)
> + *
> + * If @cached is false a temporary buffer is used to cache one pixel line at a
> + * time to speed up slow uncached reads.
> + *
> + * This function does not apply clipping on dst, i.e. the destination
> + * is a small buffer containing the clip rect only.
>   */
> -void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
> -		   struct drm_rect *clip)
> +void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
> +		 struct drm_rect *clip, bool cached)
>  {
> -	size_t len = (clip->x2 - clip->x1) * sizeof(u16);
> +	u8 cpp = fb->format->cpp[0];
Use of format->cpp is deprecated, should be char_per_block according to
the comment in drm_fourcc.h

> +	size_t len = drm_rect_width(clip) * cpp;
> +	u16 *src16, *dst16 = dst;
> +	u32 *src32, *dst32 = dst;
>  	unsigned int x, y;
> -	u16 *src, *buf;
> +	void *buf = NULL;
>  
> -	/*
> -	 * The cma memory is write-combined so reads are uncached.
> -	 * Speed up by fetching one line at a time.
> -	 */
> -	buf = kmalloc(len, GFP_KERNEL);
> -	if (!buf)
> +	if (WARN_ON_ONCE(cpp == 1))
>  		return;
Or cpp != 2 && cpp != 4?
>  
> +	if (!cached)
> +		buf = kmalloc(len, GFP_KERNEL);
> +
> +	src += clip_offset(clip, fb->pitches[0], cpp);
Good that drm_rect_width() and clip_offset() are used,
replacing open-coded variants.

> +
>  	for (y = clip->y1; y < clip->y2; y++) {
> -		src = vaddr + (y * fb->pitches[0]);
> -		src += clip->x1;
> -		memcpy(buf, src, len);
> -		src = buf;
> -		for (x = clip->x1; x < clip->x2; x++)
> -			*dst++ = swab16(*src++);
> +		if (buf) {
> +			memcpy(buf, src, len);
> +			src16 = buf;
> +			src32 = buf;
> +		} else {
> +			src16 = src;
> +			src32 = src;
> +		}
> +
> +		for (x = clip->x1; x < clip->x2; x++) {
> +			if (cpp == 4)
> +				*dst32++ = swab32(*src32++);
> +			else
> +				*dst16++ = swab16(*src16++);
> +		}
> +
> +		src += fb->pitches[0];
>  	}
>  
>  	kfree(buf);
>  }
> -EXPORT_SYMBOL(drm_fb_swab16);
> +EXPORT_SYMBOL(drm_fb_swab);
>  
>  static void drm_fb_xrgb8888_to_rgb565_line(u16 *dbuf, u32 *sbuf,
>  					   unsigned int pixels,
> diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c
> index 16bff1be4b8a..bfefbcb69287 100644
> --- a/drivers/gpu/drm/drm_mipi_dbi.c
> +++ b/drivers/gpu/drm/drm_mipi_dbi.c
> @@ -217,7 +217,7 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb,
>  	switch (fb->format->format) {
>  	case DRM_FORMAT_RGB565:
>  		if (swap)
> -			drm_fb_swab16(dst, src, fb, clip);
> +			drm_fb_swab(dst, src, fb, clip, !import_attach);
>  		else
>  			drm_fb_memcpy(dst, src, fb, clip);
>  		break;
> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
> index ac220aa1a245..5f9e37032468 100644
> --- a/include/drm/drm_format_helper.h
> +++ b/include/drm/drm_format_helper.h
> @@ -14,8 +14,8 @@ void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
>  void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>  			   struct drm_framebuffer *fb,
>  			   struct drm_rect *clip);
> -void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
> -		   struct drm_rect *clip);
> +void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
> +		 struct drm_rect *clip, bool cached);
>  void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr,
>  			       struct drm_framebuffer *fb,
>  			       struct drm_rect *clip, bool swab);
> -- 
> 2.23.0
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Noralf Trønnes May 3, 2020, 1:38 p.m. UTC | #2
Den 03.05.2020 12.29, skrev Sam Ravnborg:
> Hi Noralf
> 
> On Wed, Apr 29, 2020 at 02:48:27PM +0200, Noralf Trønnes wrote:
>> This replaces drm_fb_swab16() with drm_fb_swab() supporting 16 and 32-bit.
>> Also make pixel line caching optional.
>>
>> Signed-off-by: Noralf Trønnes <noralf@tronnes.org>
> A couple of nits, with these considered:
> Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
> 
>> ---
>>  drivers/gpu/drm/drm_format_helper.c | 61 +++++++++++++++++++----------
>>  drivers/gpu/drm/drm_mipi_dbi.c      |  2 +-
>>  include/drm/drm_format_helper.h     |  4 +-
>>  3 files changed, 44 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
>> index 0897cb9aeaff..5c147c49668c 100644
>> --- a/drivers/gpu/drm/drm_format_helper.c
>> +++ b/drivers/gpu/drm/drm_format_helper.c
>> @@ -79,39 +79,60 @@ void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>  EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
>>  
>>  /**
>> - * drm_fb_swab16 - Swap bytes into clip buffer
>> - * @dst: RGB565 destination buffer
>> - * @vaddr: RGB565 source buffer
>> + * drm_fb_swab - Swap bytes into clip buffer
>> + * @dst: Destination buffer
>> + * @src: Source buffer
>>   * @fb: DRM framebuffer
>>   * @clip: Clip rectangle area to copy
>> + * @cached: Source buffer is mapped cached (eg. not write-combined)
>> + *
>> + * If @cached is false a temporary buffer is used to cache one pixel line at a
>> + * time to speed up slow uncached reads.
>> + *
>> + * This function does not apply clipping on dst, i.e. the destination
>> + * is a small buffer containing the clip rect only.
>>   */
>> -void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
>> -		   struct drm_rect *clip)
>> +void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
>> +		 struct drm_rect *clip, bool cached)
>>  {
>> -	size_t len = (clip->x2 - clip->x1) * sizeof(u16);
>> +	u8 cpp = fb->format->cpp[0];
> Use of format->cpp is deprecated, should be char_per_block according to
> the comment in drm_fourcc.h

I ducked this because if I had to do it properly I would have to look at
block width/height as well and yes ensure that num_planes is 1. That
would leave me with writing a helper function for something I don't
really understand :-)

static inline bool
drm_format_info_is_WHAT_TO_CALL_THIS(const struct drm_format_info *info)
{
	return info->num_planes == 1 &&
	       drm_format_info_block_width(info, 0) == 1 &&
	       drm_format_info_block_height(info, 0) == 1;
}

Or I could ofc just spell out the full assert inside this function:

	info->num_planes == 1 &&
	drm_format_info_block_width(info, 0) == 1 &&
	drm_format_info_block_height(info, 0) == 1 &&
	info->char_per_block[0] == 2 &&
	info->char_per_block[0] == 4

That way I don't have to know what I'm actually checking.
I'm using drm_fb_swab() for RGB formats, but it can be used for any
format that meets the above criteria.

And maybe I should check .hsub and .vsub as well, I don't know.

cpp was such a nice simple concept :-) So I'll keep it unless someone
knowledgeable shines some light on this.

> 
>> +	size_t len = drm_rect_width(clip) * cpp;
>> +	u16 *src16, *dst16 = dst;
>> +	u32 *src32, *dst32 = dst;
>>  	unsigned int x, y;
>> -	u16 *src, *buf;
>> +	void *buf = NULL;
>>  
>> -	/*
>> -	 * The cma memory is write-combined so reads are uncached.
>> -	 * Speed up by fetching one line at a time.
>> -	 */
>> -	buf = kmalloc(len, GFP_KERNEL);
>> -	if (!buf)
>> +	if (WARN_ON_ONCE(cpp == 1))
>>  		return;
> Or cpp != 2 && cpp != 4?

Indeed, I agree.

Noralf.

>>  
>> +	if (!cached)
>> +		buf = kmalloc(len, GFP_KERNEL);
>> +
>> +	src += clip_offset(clip, fb->pitches[0], cpp);
> Good that drm_rect_width() and clip_offset() are used,
> replacing open-coded variants.
> 
>> +
>>  	for (y = clip->y1; y < clip->y2; y++) {
>> -		src = vaddr + (y * fb->pitches[0]);
>> -		src += clip->x1;
>> -		memcpy(buf, src, len);
>> -		src = buf;
>> -		for (x = clip->x1; x < clip->x2; x++)
>> -			*dst++ = swab16(*src++);
>> +		if (buf) {
>> +			memcpy(buf, src, len);
>> +			src16 = buf;
>> +			src32 = buf;
>> +		} else {
>> +			src16 = src;
>> +			src32 = src;
>> +		}
>> +
>> +		for (x = clip->x1; x < clip->x2; x++) {
>> +			if (cpp == 4)
>> +				*dst32++ = swab32(*src32++);
>> +			else
>> +				*dst16++ = swab16(*src16++);
>> +		}
>> +
>> +		src += fb->pitches[0];
>>  	}
>>  
>>  	kfree(buf);
>>  }
>> -EXPORT_SYMBOL(drm_fb_swab16);
>> +EXPORT_SYMBOL(drm_fb_swab);
>>  
>>  static void drm_fb_xrgb8888_to_rgb565_line(u16 *dbuf, u32 *sbuf,
>>  					   unsigned int pixels,
>> diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c
>> index 16bff1be4b8a..bfefbcb69287 100644
>> --- a/drivers/gpu/drm/drm_mipi_dbi.c
>> +++ b/drivers/gpu/drm/drm_mipi_dbi.c
>> @@ -217,7 +217,7 @@ int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb,
>>  	switch (fb->format->format) {
>>  	case DRM_FORMAT_RGB565:
>>  		if (swap)
>> -			drm_fb_swab16(dst, src, fb, clip);
>> +			drm_fb_swab(dst, src, fb, clip, !import_attach);
>>  		else
>>  			drm_fb_memcpy(dst, src, fb, clip);
>>  		break;
>> diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
>> index ac220aa1a245..5f9e37032468 100644
>> --- a/include/drm/drm_format_helper.h
>> +++ b/include/drm/drm_format_helper.h
>> @@ -14,8 +14,8 @@ void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
>>  void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
>>  			   struct drm_framebuffer *fb,
>>  			   struct drm_rect *clip);
>> -void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
>> -		   struct drm_rect *clip);
>> +void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
>> +		 struct drm_rect *clip, bool cached);
>>  void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr,
>>  			       struct drm_framebuffer *fb,
>>  			       struct drm_rect *clip, bool swab);
>> -- 
>> 2.23.0
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
index 0897cb9aeaff..5c147c49668c 100644
--- a/drivers/gpu/drm/drm_format_helper.c
+++ b/drivers/gpu/drm/drm_format_helper.c
@@ -79,39 +79,60 @@  void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
 EXPORT_SYMBOL(drm_fb_memcpy_dstclip);
 
 /**
- * drm_fb_swab16 - Swap bytes into clip buffer
- * @dst: RGB565 destination buffer
- * @vaddr: RGB565 source buffer
+ * drm_fb_swab - Swap bytes into clip buffer
+ * @dst: Destination buffer
+ * @src: Source buffer
  * @fb: DRM framebuffer
  * @clip: Clip rectangle area to copy
+ * @cached: Source buffer is mapped cached (eg. not write-combined)
+ *
+ * If @cached is false a temporary buffer is used to cache one pixel line at a
+ * time to speed up slow uncached reads.
+ *
+ * This function does not apply clipping on dst, i.e. the destination
+ * is a small buffer containing the clip rect only.
  */
-void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
-		   struct drm_rect *clip)
+void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
+		 struct drm_rect *clip, bool cached)
 {
-	size_t len = (clip->x2 - clip->x1) * sizeof(u16);
+	u8 cpp = fb->format->cpp[0];
+	size_t len = drm_rect_width(clip) * cpp;
+	u16 *src16, *dst16 = dst;
+	u32 *src32, *dst32 = dst;
 	unsigned int x, y;
-	u16 *src, *buf;
+	void *buf = NULL;
 
-	/*
-	 * The cma memory is write-combined so reads are uncached.
-	 * Speed up by fetching one line at a time.
-	 */
-	buf = kmalloc(len, GFP_KERNEL);
-	if (!buf)
+	if (WARN_ON_ONCE(cpp == 1))
 		return;
 
+	if (!cached)
+		buf = kmalloc(len, GFP_KERNEL);
+
+	src += clip_offset(clip, fb->pitches[0], cpp);
+
 	for (y = clip->y1; y < clip->y2; y++) {
-		src = vaddr + (y * fb->pitches[0]);
-		src += clip->x1;
-		memcpy(buf, src, len);
-		src = buf;
-		for (x = clip->x1; x < clip->x2; x++)
-			*dst++ = swab16(*src++);
+		if (buf) {
+			memcpy(buf, src, len);
+			src16 = buf;
+			src32 = buf;
+		} else {
+			src16 = src;
+			src32 = src;
+		}
+
+		for (x = clip->x1; x < clip->x2; x++) {
+			if (cpp == 4)
+				*dst32++ = swab32(*src32++);
+			else
+				*dst16++ = swab16(*src16++);
+		}
+
+		src += fb->pitches[0];
 	}
 
 	kfree(buf);
 }
-EXPORT_SYMBOL(drm_fb_swab16);
+EXPORT_SYMBOL(drm_fb_swab);
 
 static void drm_fb_xrgb8888_to_rgb565_line(u16 *dbuf, u32 *sbuf,
 					   unsigned int pixels,
diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c
index 16bff1be4b8a..bfefbcb69287 100644
--- a/drivers/gpu/drm/drm_mipi_dbi.c
+++ b/drivers/gpu/drm/drm_mipi_dbi.c
@@ -217,7 +217,7 @@  int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb,
 	switch (fb->format->format) {
 	case DRM_FORMAT_RGB565:
 		if (swap)
-			drm_fb_swab16(dst, src, fb, clip);
+			drm_fb_swab(dst, src, fb, clip, !import_attach);
 		else
 			drm_fb_memcpy(dst, src, fb, clip);
 		break;
diff --git a/include/drm/drm_format_helper.h b/include/drm/drm_format_helper.h
index ac220aa1a245..5f9e37032468 100644
--- a/include/drm/drm_format_helper.h
+++ b/include/drm/drm_format_helper.h
@@ -14,8 +14,8 @@  void drm_fb_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
 void drm_fb_memcpy_dstclip(void __iomem *dst, void *vaddr,
 			   struct drm_framebuffer *fb,
 			   struct drm_rect *clip);
-void drm_fb_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
-		   struct drm_rect *clip);
+void drm_fb_swab(void *dst, void *src, struct drm_framebuffer *fb,
+		 struct drm_rect *clip, bool cached);
 void drm_fb_xrgb8888_to_rgb565(void *dst, void *vaddr,
 			       struct drm_framebuffer *fb,
 			       struct drm_rect *clip, bool swab);