diff mbox series

[v4,5/5] drm/ofdrm: Support big-endian scanout buffers

Message ID 20220928105010.18880-6-tzimmermann@suse.de (mailing list archive)
State Not Applicable
Headers show
Series drm: Add driver for PowerPC OF displays | expand

Commit Message

Thomas Zimmermann Sept. 28, 2022, 10:50 a.m. UTC
All DRM formats assume little-endian byte order. On big-endian systems,
it is likely that the scanout buffer is in big endian as well. Update
the format accordingly and add endianess conversion to the format-helper
library. Also opt-in to allocated buffers in host format by default.

Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/drm_format_helper.c | 10 ++++++
 drivers/gpu/drm/tiny/ofdrm.c        | 55 +++++++++++++++++++++++++++--
 2 files changed, 63 insertions(+), 2 deletions(-)

Comments

Michal Suchánek Sept. 28, 2022, 11:12 a.m. UTC | #1
Hello,

On Wed, Sep 28, 2022 at 12:50:10PM +0200, Thomas Zimmermann wrote:
> All DRM formats assume little-endian byte order. On big-endian systems,
> it is likely that the scanout buffer is in big endian as well. Update
> the format accordingly and add endianess conversion to the format-helper
> library. Also opt-in to allocated buffers in host format by default.

This sounds backwards to me.

Skimming through the code it sounds like the buffer is in fact in the
same format all the time but when the CPU is switched to BE it sees the
data loaded from it differently.

Or am I missing something?

Thanks

Michal

> 
> Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
> ---
>  drivers/gpu/drm/drm_format_helper.c | 10 ++++++
>  drivers/gpu/drm/tiny/ofdrm.c        | 55 +++++++++++++++++++++++++++--
>  2 files changed, 63 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
> index 4afc4ac27342..fca7936db083 100644
> --- a/drivers/gpu/drm/drm_format_helper.c
> +++ b/drivers/gpu/drm/drm_format_helper.c
> @@ -659,6 +659,11 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d
>  			drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, false);
>  			return 0;
>  		}
> +	} else if (dst_format == (DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN)) {
> +		if (fb_format == DRM_FORMAT_RGB565) {
> +			drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
> +			return 0;
> +		}
>  	} else if (dst_format == DRM_FORMAT_RGB888) {
>  		if (fb_format == DRM_FORMAT_XRGB8888) {
>  			drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip);
> @@ -677,6 +682,11 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d
>  			drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip);
>  			return 0;
>  		}
> +	} else if (dst_format == DRM_FORMAT_BGRX8888) {
> +		if (fb_format == DRM_FORMAT_XRGB8888) {
> +			drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
> +			return 0;
> +		}
>  	}
>  
>  	drm_warn_once(fb->dev, "No conversion helper from %p4cc to %p4cc found.\n",
> diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
> index 0bf5eebf6678..6e100a7f5db7 100644
> --- a/drivers/gpu/drm/tiny/ofdrm.c
> +++ b/drivers/gpu/drm/tiny/ofdrm.c
> @@ -94,7 +94,7 @@ static int display_get_validated_int0(struct drm_device *dev, const char *name,
>  }
>  
>  static const struct drm_format_info *display_get_validated_format(struct drm_device *dev,
> -								  u32 depth)
> +								  u32 depth, bool big_endian)
>  {
>  	const struct drm_format_info *info;
>  	u32 format;
> @@ -115,6 +115,29 @@ static const struct drm_format_info *display_get_validated_format(struct drm_dev
>  		return ERR_PTR(-EINVAL);
>  	}
>  
> +	/*
> +	 * DRM formats assume little-endian byte order. Update the format
> +	 * if the scanout buffer uses big-endian ordering.
> +	 */
> +	if (big_endian) {
> +		switch (format) {
> +		case DRM_FORMAT_XRGB8888:
> +			format = DRM_FORMAT_BGRX8888;
> +			break;
> +		case DRM_FORMAT_ARGB8888:
> +			format = DRM_FORMAT_BGRA8888;
> +			break;
> +		case DRM_FORMAT_RGB565:
> +			format = DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN;
> +			break;
> +		case DRM_FORMAT_XRGB1555:
> +			format = DRM_FORMAT_XRGB1555 | DRM_FORMAT_BIG_ENDIAN;
> +			break;
> +		default:
> +			break;
> +		}
> +	}
> +
>  	info = drm_format_info(format);
>  	if (!info) {
>  		drm_err(dev, "cannot find framebuffer format for depth %u\n", depth);
> @@ -134,6 +157,23 @@ static int display_read_u32_of(struct drm_device *dev, struct device_node *of_no
>  	return ret;
>  }
>  
> +static bool display_get_big_endian_of(struct drm_device *dev, struct device_node *of_node)
> +{
> +	bool big_endian;
> +
> +#ifdef __BIG_ENDIAN
> +	big_endian = true;
> +	if (of_get_property(of_node, "little-endian", NULL))
> +		big_endian = false;
> +#else
> +	big_endian = false;
> +	if (of_get_property(of_node, "big-endian", NULL))
> +		big_endian = true;
> +#endif
> +
> +	return big_endian;
> +}
> +
>  static int display_get_width_of(struct drm_device *dev, struct device_node *of_node)
>  {
>  	u32 width;
> @@ -613,6 +653,7 @@ static void ofdrm_device_set_gamma_linear(struct ofdrm_device *odev,
>  
>  	switch (format->format) {
>  	case DRM_FORMAT_RGB565:
> +	case DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN:
>  		/* Use better interpolation, to take 32 values from 0 to 255 */
>  		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE / 8; i++) {
>  			unsigned char r = i * 8 + i / 4;
> @@ -631,6 +672,7 @@ static void ofdrm_device_set_gamma_linear(struct ofdrm_device *odev,
>  		}
>  		break;
>  	case DRM_FORMAT_XRGB8888:
> +	case DRM_FORMAT_BGRX8888:
>  		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE; i++)
>  			odev->funcs->cmap_write(odev, i, i, i, i);
>  		break;
> @@ -650,6 +692,7 @@ static void ofdrm_device_set_gamma(struct ofdrm_device *odev,
>  
>  	switch (format->format) {
>  	case DRM_FORMAT_RGB565:
> +	case DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN:
>  		/* Use better interpolation, to take 32 values from lut[0] to lut[255] */
>  		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE / 8; i++) {
>  			unsigned char r = lut[i * 8 + i / 4].red >> 8;
> @@ -668,6 +711,7 @@ static void ofdrm_device_set_gamma(struct ofdrm_device *odev,
>  		}
>  		break;
>  	case DRM_FORMAT_XRGB8888:
> +	case DRM_FORMAT_BGRX8888:
>  		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE; i++) {
>  			unsigned char r = lut[i].red >> 8;
>  			unsigned char g = lut[i].green >> 8;
> @@ -718,6 +762,9 @@ static const uint32_t ofdrm_primary_plane_formats[] = {
>  	DRM_FORMAT_RGB565,
>  	//DRM_FORMAT_XRGB1555,
>  	//DRM_FORMAT_C8,
> +	/* Big-endian formats below */
> +	DRM_FORMAT_BGRX8888,
> +	DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN,
>  };
>  
>  static const uint64_t ofdrm_primary_plane_format_modifiers[] = {
> @@ -1048,6 +1095,7 @@ static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
>  	struct ofdrm_device *odev;
>  	struct drm_device *dev;
>  	enum ofdrm_model model;
> +	bool big_endian;
>  	int width, height, depth, linebytes;
>  	const struct drm_format_info *format;
>  	u64 address;
> @@ -1109,6 +1157,8 @@ static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
>  		break;
>  	}
>  
> +	big_endian = display_get_big_endian_of(dev, of_node);
> +
>  	width = display_get_width_of(dev, of_node);
>  	if (width < 0)
>  		return ERR_PTR(width);
> @@ -1122,7 +1172,7 @@ static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
>  	if (linebytes < 0)
>  		return ERR_PTR(linebytes);
>  
> -	format = display_get_validated_format(dev, depth);
> +	format = display_get_validated_format(dev, depth, big_endian);
>  	if (IS_ERR(format))
>  		return ERR_CAST(format);
>  	if (!linebytes) {
> @@ -1234,6 +1284,7 @@ static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
>  		dev->mode_config.preferred_depth = depth;
>  		break;
>  	}
> +	dev->mode_config.quirk_addfb_prefer_host_byte_order = true;
>  
>  	/* Primary plane */
>  
> -- 
> 2.37.3
>
Thomas Zimmermann Sept. 28, 2022, 11:30 a.m. UTC | #2
Hi

Am 28.09.22 um 13:12 schrieb Michal Suchánek:
> Hello,
> 
> On Wed, Sep 28, 2022 at 12:50:10PM +0200, Thomas Zimmermann wrote:
>> All DRM formats assume little-endian byte order. On big-endian systems,
>> it is likely that the scanout buffer is in big endian as well. Update
>> the format accordingly and add endianess conversion to the format-helper
>> library. Also opt-in to allocated buffers in host format by default.
> 
> This sounds backwards to me.

In which way?

> 
> Skimming through the code it sounds like the buffer is in fact in the
> same format all the time but when the CPU is switched to BE it sees the
> data loaded from it differently.
> 
> Or am I missing something?

Which buffer do you mean? The scanout buffer coming from the firmware, 
or the GEM BOs that are allocated by renderers?

The scanout buffer is either in BE or LE format. According to the code 
in offb, it's signaled by a node in the device tree. I took that code 
into ofdrm and set the scanout format accordingly.

The GEM BO can be in any format. If necessary, ofdrm converts internally 
while copying it to the scanout buffer. The quirk we opt in, makes DRM 
prefer whatever default byteorder the host prefers (BE or LE). According 
to the docs, it's the right thing to do. But that only affects the GEM 
code, not the scanout buffer.

Best regards
Thomas

> 
> Thanks
> 
> Michal
> 
>>
>> Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
>> Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
>> ---
>>   drivers/gpu/drm/drm_format_helper.c | 10 ++++++
>>   drivers/gpu/drm/tiny/ofdrm.c        | 55 +++++++++++++++++++++++++++--
>>   2 files changed, 63 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
>> index 4afc4ac27342..fca7936db083 100644
>> --- a/drivers/gpu/drm/drm_format_helper.c
>> +++ b/drivers/gpu/drm/drm_format_helper.c
>> @@ -659,6 +659,11 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d
>>   			drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, false);
>>   			return 0;
>>   		}
>> +	} else if (dst_format == (DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN)) {
>> +		if (fb_format == DRM_FORMAT_RGB565) {
>> +			drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
>> +			return 0;
>> +		}
>>   	} else if (dst_format == DRM_FORMAT_RGB888) {
>>   		if (fb_format == DRM_FORMAT_XRGB8888) {
>>   			drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip);
>> @@ -677,6 +682,11 @@ int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d
>>   			drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip);
>>   			return 0;
>>   		}
>> +	} else if (dst_format == DRM_FORMAT_BGRX8888) {
>> +		if (fb_format == DRM_FORMAT_XRGB8888) {
>> +			drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
>> +			return 0;
>> +		}
>>   	}
>>   
>>   	drm_warn_once(fb->dev, "No conversion helper from %p4cc to %p4cc found.\n",
>> diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
>> index 0bf5eebf6678..6e100a7f5db7 100644
>> --- a/drivers/gpu/drm/tiny/ofdrm.c
>> +++ b/drivers/gpu/drm/tiny/ofdrm.c
>> @@ -94,7 +94,7 @@ static int display_get_validated_int0(struct drm_device *dev, const char *name,
>>   }
>>   
>>   static const struct drm_format_info *display_get_validated_format(struct drm_device *dev,
>> -								  u32 depth)
>> +								  u32 depth, bool big_endian)
>>   {
>>   	const struct drm_format_info *info;
>>   	u32 format;
>> @@ -115,6 +115,29 @@ static const struct drm_format_info *display_get_validated_format(struct drm_dev
>>   		return ERR_PTR(-EINVAL);
>>   	}
>>   
>> +	/*
>> +	 * DRM formats assume little-endian byte order. Update the format
>> +	 * if the scanout buffer uses big-endian ordering.
>> +	 */
>> +	if (big_endian) {
>> +		switch (format) {
>> +		case DRM_FORMAT_XRGB8888:
>> +			format = DRM_FORMAT_BGRX8888;
>> +			break;
>> +		case DRM_FORMAT_ARGB8888:
>> +			format = DRM_FORMAT_BGRA8888;
>> +			break;
>> +		case DRM_FORMAT_RGB565:
>> +			format = DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN;
>> +			break;
>> +		case DRM_FORMAT_XRGB1555:
>> +			format = DRM_FORMAT_XRGB1555 | DRM_FORMAT_BIG_ENDIAN;
>> +			break;
>> +		default:
>> +			break;
>> +		}
>> +	}
>> +
>>   	info = drm_format_info(format);
>>   	if (!info) {
>>   		drm_err(dev, "cannot find framebuffer format for depth %u\n", depth);
>> @@ -134,6 +157,23 @@ static int display_read_u32_of(struct drm_device *dev, struct device_node *of_no
>>   	return ret;
>>   }
>>   
>> +static bool display_get_big_endian_of(struct drm_device *dev, struct device_node *of_node)
>> +{
>> +	bool big_endian;
>> +
>> +#ifdef __BIG_ENDIAN
>> +	big_endian = true;
>> +	if (of_get_property(of_node, "little-endian", NULL))
>> +		big_endian = false;
>> +#else
>> +	big_endian = false;
>> +	if (of_get_property(of_node, "big-endian", NULL))
>> +		big_endian = true;
>> +#endif
>> +
>> +	return big_endian;
>> +}
>> +
>>   static int display_get_width_of(struct drm_device *dev, struct device_node *of_node)
>>   {
>>   	u32 width;
>> @@ -613,6 +653,7 @@ static void ofdrm_device_set_gamma_linear(struct ofdrm_device *odev,
>>   
>>   	switch (format->format) {
>>   	case DRM_FORMAT_RGB565:
>> +	case DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN:
>>   		/* Use better interpolation, to take 32 values from 0 to 255 */
>>   		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE / 8; i++) {
>>   			unsigned char r = i * 8 + i / 4;
>> @@ -631,6 +672,7 @@ static void ofdrm_device_set_gamma_linear(struct ofdrm_device *odev,
>>   		}
>>   		break;
>>   	case DRM_FORMAT_XRGB8888:
>> +	case DRM_FORMAT_BGRX8888:
>>   		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE; i++)
>>   			odev->funcs->cmap_write(odev, i, i, i, i);
>>   		break;
>> @@ -650,6 +692,7 @@ static void ofdrm_device_set_gamma(struct ofdrm_device *odev,
>>   
>>   	switch (format->format) {
>>   	case DRM_FORMAT_RGB565:
>> +	case DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN:
>>   		/* Use better interpolation, to take 32 values from lut[0] to lut[255] */
>>   		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE / 8; i++) {
>>   			unsigned char r = lut[i * 8 + i / 4].red >> 8;
>> @@ -668,6 +711,7 @@ static void ofdrm_device_set_gamma(struct ofdrm_device *odev,
>>   		}
>>   		break;
>>   	case DRM_FORMAT_XRGB8888:
>> +	case DRM_FORMAT_BGRX8888:
>>   		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE; i++) {
>>   			unsigned char r = lut[i].red >> 8;
>>   			unsigned char g = lut[i].green >> 8;
>> @@ -718,6 +762,9 @@ static const uint32_t ofdrm_primary_plane_formats[] = {
>>   	DRM_FORMAT_RGB565,
>>   	//DRM_FORMAT_XRGB1555,
>>   	//DRM_FORMAT_C8,
>> +	/* Big-endian formats below */
>> +	DRM_FORMAT_BGRX8888,
>> +	DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN,
>>   };
>>   
>>   static const uint64_t ofdrm_primary_plane_format_modifiers[] = {
>> @@ -1048,6 +1095,7 @@ static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
>>   	struct ofdrm_device *odev;
>>   	struct drm_device *dev;
>>   	enum ofdrm_model model;
>> +	bool big_endian;
>>   	int width, height, depth, linebytes;
>>   	const struct drm_format_info *format;
>>   	u64 address;
>> @@ -1109,6 +1157,8 @@ static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
>>   		break;
>>   	}
>>   
>> +	big_endian = display_get_big_endian_of(dev, of_node);
>> +
>>   	width = display_get_width_of(dev, of_node);
>>   	if (width < 0)
>>   		return ERR_PTR(width);
>> @@ -1122,7 +1172,7 @@ static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
>>   	if (linebytes < 0)
>>   		return ERR_PTR(linebytes);
>>   
>> -	format = display_get_validated_format(dev, depth);
>> +	format = display_get_validated_format(dev, depth, big_endian);
>>   	if (IS_ERR(format))
>>   		return ERR_CAST(format);
>>   	if (!linebytes) {
>> @@ -1234,6 +1284,7 @@ static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
>>   		dev->mode_config.preferred_depth = depth;
>>   		break;
>>   	}
>> +	dev->mode_config.quirk_addfb_prefer_host_byte_order = true;
>>   
>>   	/* Primary plane */
>>   
>> -- 
>> 2.37.3
>>
Javier Martinez Canillas Oct. 11, 2022, 7:46 a.m. UTC | #3
Hello Thomas,

On 9/28/22 12:50, Thomas Zimmermann wrote:
> All DRM formats assume little-endian byte order. On big-endian systems,
> it is likely that the scanout buffer is in big endian as well. Update

You say it is likely, not always then? Does it depend on whether the Open
Firmware is BE or LE ?

[...]

> +static bool display_get_big_endian_of(struct drm_device *dev, struct device_node *of_node)
> +{
> +	bool big_endian;
> +
> +#ifdef __BIG_ENDIAN
> +	big_endian = true;
> +	if (of_get_property(of_node, "little-endian", NULL))
> +		big_endian = false;
> +#else
> +	big_endian = false;
> +	if (of_get_property(of_node, "big-endian", NULL))
> +		big_endian = true;
> +#endif
> +
> +	return big_endian;
> +}
> +

Ah, I see. The heuristic then is whether the build is BE or LE or if the Device
Tree has an explicit node defining the endianess. The patch looks good to me:

Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Thomas Zimmermann Oct. 11, 2022, 11:30 a.m. UTC | #4
Hi

Am 11.10.22 um 09:46 schrieb Javier Martinez Canillas:
> Hello Thomas,
> 
> On 9/28/22 12:50, Thomas Zimmermann wrote:
>> All DRM formats assume little-endian byte order. On big-endian systems,
>> it is likely that the scanout buffer is in big endian as well. Update
> 
> You say it is likely, not always then? Does it depend on whether the Open
> Firmware is BE or LE ?

It's the endianess of the framebuffer. There's graphics hardware that 
can switch between the two or even support both at the same time 
(depending on the aperture range). I don't know the exact semantics when 
each is being used, but I suspect that it corresponds to host endianess.

> 
> [...]
> 
>> +static bool display_get_big_endian_of(struct drm_device *dev, struct device_node *of_node)
>> +{
>> +	bool big_endian;
>> +
>> +#ifdef __BIG_ENDIAN
>> +	big_endian = true;
>> +	if (of_get_property(of_node, "little-endian", NULL))
>> +		big_endian = false;
>> +#else
>> +	big_endian = false;
>> +	if (of_get_property(of_node, "big-endian", NULL))
>> +		big_endian = true;
>> +#endif
>> +
>> +	return big_endian;
>> +}
>> +
> 
> Ah, I see. The heuristic then is whether the build is BE or LE or if the Device
> Tree has an explicit node defining the endianess. The patch looks good to me:

Yes. I took this test from offb.

> 
> Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
> 

Thanks

Best regards
Thomas
Arnd Bergmann Oct. 11, 2022, 8:06 p.m. UTC | #5
On Tue, Oct 11, 2022, at 1:30 PM, Thomas Zimmermann wrote:
> Am 11.10.22 um 09:46 schrieb Javier Martinez Canillas:
>>> +static bool display_get_big_endian_of(struct drm_device *dev, struct device_node *of_node)
>>> +{
>>> +	bool big_endian;
>>> +
>>> +#ifdef __BIG_ENDIAN
>>> +	big_endian = true;
>>> +	if (of_get_property(of_node, "little-endian", NULL))
>>> +		big_endian = false;
>>> +#else
>>> +	big_endian = false;
>>> +	if (of_get_property(of_node, "big-endian", NULL))
>>> +		big_endian = true;
>>> +#endif
>>> +
>>> +	return big_endian;
>>> +}
>>> +
>> 
>> Ah, I see. The heuristic then is whether the build is BE or LE or if the Device
>> Tree has an explicit node defining the endianess. The patch looks good to me:
>
> Yes. I took this test from offb.

Has the driver been tested with little-endian kernels though? While
ppc32 kernels are always BE, you can build kernels as either big-endian
or little-endian for most (modern) powerpc64 and arm/arm64 hardware,
and I don't see why that should change the defaults of the driver
when describing the same framebuffer hardware.

I could understand having a default to e.g. big-endian on all powerpc and
a default for little-endian on all arm, but having it tied to the
way the kernel is built seems wrong, and doesn't make sense in a
DT binding either.

      Arnd
Michal Suchánek Oct. 11, 2022, 9:38 p.m. UTC | #6
On Tue, Oct 11, 2022 at 10:06:59PM +0200, Arnd Bergmann wrote:
> On Tue, Oct 11, 2022, at 1:30 PM, Thomas Zimmermann wrote:
> > Am 11.10.22 um 09:46 schrieb Javier Martinez Canillas:
> >>> +static bool display_get_big_endian_of(struct drm_device *dev, struct device_node *of_node)
> >>> +{
> >>> +	bool big_endian;
> >>> +
> >>> +#ifdef __BIG_ENDIAN
> >>> +	big_endian = true;
> >>> +	if (of_get_property(of_node, "little-endian", NULL))
> >>> +		big_endian = false;
> >>> +#else
> >>> +	big_endian = false;
> >>> +	if (of_get_property(of_node, "big-endian", NULL))
> >>> +		big_endian = true;
> >>> +#endif
> >>> +
> >>> +	return big_endian;
> >>> +}
> >>> +
> >> 
> >> Ah, I see. The heuristic then is whether the build is BE or LE or if the Device
> >> Tree has an explicit node defining the endianess. The patch looks good to me:
> >
> > Yes. I took this test from offb.
> 
> Has the driver been tested with little-endian kernels though? While
> ppc32 kernels are always BE, you can build kernels as either big-endian
> or little-endian for most (modern) powerpc64 and arm/arm64 hardware,
> and I don't see why that should change the defaults of the driver
> when describing the same framebuffer hardware.

The original code was added with
commit 7f29b87a7779 ("powerpc: offb: add support for foreign endianness")

The hardware is either big-endian or runtime-switchable-endian. It makes
sense to assume big-endian when runnig big-endian and the DT does not
specify endian which is likely on a historical system.

It also makes sense to assume that on system with
runtime-switchable-endian the DT specifies the framebuffer endian.

If systems that only do little-endian exist or emerge later then it also
makes sense to assume that the framebuffer matches the host if not
specified.

I don't really see a problem here.

BTW is this used on arm and on what platform?

I do not see any bindings in dts.

Thanks

Michal
Thomas Zimmermann Oct. 12, 2022, 6:46 a.m. UTC | #7
Hi

Am 11.10.22 um 22:06 schrieb Arnd Bergmann:
> On Tue, Oct 11, 2022, at 1:30 PM, Thomas Zimmermann wrote:
>> Am 11.10.22 um 09:46 schrieb Javier Martinez Canillas:
>>>> +static bool display_get_big_endian_of(struct drm_device *dev, struct device_node *of_node)
>>>> +{
>>>> +	bool big_endian;
>>>> +
>>>> +#ifdef __BIG_ENDIAN
>>>> +	big_endian = true;
>>>> +	if (of_get_property(of_node, "little-endian", NULL))
>>>> +		big_endian = false;
>>>> +#else
>>>> +	big_endian = false;
>>>> +	if (of_get_property(of_node, "big-endian", NULL))
>>>> +		big_endian = true;
>>>> +#endif
>>>> +
>>>> +	return big_endian;
>>>> +}
>>>> +
>>>
>>> Ah, I see. The heuristic then is whether the build is BE or LE or if the Device
>>> Tree has an explicit node defining the endianess. The patch looks good to me:
>>
>> Yes. I took this test from offb.
> 
> Has the driver been tested with little-endian kernels though? While
> ppc32 kernels are always BE, you can build kernels as either big-endian
> or little-endian for most (modern) powerpc64 and arm/arm64 hardware,
> and I don't see why that should change the defaults of the driver
> when describing the same framebuffer hardware.

Yes, I tested this on qemu's ppc64le and ppc64.

Best regards
Thomas

> 
> I could understand having a default to e.g. big-endian on all powerpc and
> a default for little-endian on all arm, but having it tied to the
> way the kernel is built seems wrong, and doesn't make sense in a
> DT binding either.
> 
>        Arnd
Arnd Bergmann Oct. 12, 2022, 7:17 a.m. UTC | #8
On Wed, Oct 12, 2022, at 8:46 AM, Thomas Zimmermann wrote:
> Am 11.10.22 um 22:06 schrieb Arnd Bergmann:
>> On Tue, Oct 11, 2022, at 1:30 PM, Thomas Zimmermann wrote:
>>> Am 11.10.22 um 09:46 schrieb Javier Martinez Canillas:
>>>>> +static bool display_get_big_endian_of(struct drm_device *dev, struct device_node *of_node)
>>>>> +{
>>>>> +	bool big_endian;
>>>>> +
>>>>> +#ifdef __BIG_ENDIAN
>>>>> +	big_endian = true;
>>>>> +	if (of_get_property(of_node, "little-endian", NULL))
>>>>> +		big_endian = false;
>>>>> +#else
>>>>> +	big_endian = false;
>>>>> +	if (of_get_property(of_node, "big-endian", NULL))
>>>>> +		big_endian = true;
>>>>> +#endif
>>>>> +
>>>>> +	return big_endian;
>>>>> +}
>>>>> +
>>>>
>>>> Ah, I see. The heuristic then is whether the build is BE or LE or if the Device
>>>> Tree has an explicit node defining the endianess. The patch looks good to me:
>>>
>>> Yes. I took this test from offb.
>> 
>> Has the driver been tested with little-endian kernels though? While
>> ppc32 kernels are always BE, you can build kernels as either big-endian
>> or little-endian for most (modern) powerpc64 and arm/arm64 hardware,
>> and I don't see why that should change the defaults of the driver
>> when describing the same framebuffer hardware.
>
> Yes, I tested this on qemu's ppc64le and ppc64.

Does qemu mark the device has having a particular endianess then, or
does it switch the layout of the framebuffer to match what the CPU
does?

I've seen other cases where devices in qemu were defined using an
arbitrary definition of "cpu-endian", which is generally not how
real hardware works.

    Arnd
Thomas Zimmermann Oct. 12, 2022, 7:40 a.m. UTC | #9
Hi

Am 12.10.22 um 09:17 schrieb Arnd Bergmann:
> On Wed, Oct 12, 2022, at 8:46 AM, Thomas Zimmermann wrote:
>> Am 11.10.22 um 22:06 schrieb Arnd Bergmann:
>>> On Tue, Oct 11, 2022, at 1:30 PM, Thomas Zimmermann wrote:
>>>> Am 11.10.22 um 09:46 schrieb Javier Martinez Canillas:
>>>>>> +static bool display_get_big_endian_of(struct drm_device *dev, struct device_node *of_node)
>>>>>> +{
>>>>>> +	bool big_endian;
>>>>>> +
>>>>>> +#ifdef __BIG_ENDIAN
>>>>>> +	big_endian = true;
>>>>>> +	if (of_get_property(of_node, "little-endian", NULL))
>>>>>> +		big_endian = false;
>>>>>> +#else
>>>>>> +	big_endian = false;
>>>>>> +	if (of_get_property(of_node, "big-endian", NULL))
>>>>>> +		big_endian = true;
>>>>>> +#endif
>>>>>> +
>>>>>> +	return big_endian;
>>>>>> +}
>>>>>> +
>>>>>
>>>>> Ah, I see. The heuristic then is whether the build is BE or LE or if the Device
>>>>> Tree has an explicit node defining the endianess. The patch looks good to me:
>>>>
>>>> Yes. I took this test from offb.
>>>
>>> Has the driver been tested with little-endian kernels though? While
>>> ppc32 kernels are always BE, you can build kernels as either big-endian
>>> or little-endian for most (modern) powerpc64 and arm/arm64 hardware,
>>> and I don't see why that should change the defaults of the driver
>>> when describing the same framebuffer hardware.
>>
>> Yes, I tested this on qemu's ppc64le and ppc64.
> 
> Does qemu mark the device has having a particular endianess then, or
> does it switch the layout of the framebuffer to match what the CPU
> does?

The latter. On neither architecture does qemu expose this flag. The 
default endianess corresponds to the host.

Best regards
Thomas

> 
> I've seen other cases where devices in qemu were defined using an
> arbitrary definition of "cpu-endian", which is generally not how
> real hardware works.
> 
>      Arnd
Arnd Bergmann Oct. 12, 2022, 7:44 a.m. UTC | #10
On Wed, Oct 12, 2022, at 9:40 AM, Thomas Zimmermann wrote:
> Am 12.10.22 um 09:17 schrieb Arnd Bergmann:
>> On Wed, Oct 12, 2022, at 8:46 AM, Thomas Zimmermann wrote:
>
>> Does qemu mark the device has having a particular endianess then, or
>> does it switch the layout of the framebuffer to match what the CPU
>> does?
>
> The latter. On neither architecture does qemu expose this flag. The 
> default endianess corresponds to the host.

"host" as in the machine that qemu runs on, or the machine that is
being emulated? I suppose it would be broken either way, but in the
latter case, we could get away with detecting that the machine is
running under qemu.

    Arnd
Thomas Zimmermann Oct. 12, 2022, 8:27 a.m. UTC | #11
Hi

Am 12.10.22 um 09:44 schrieb Arnd Bergmann:
> On Wed, Oct 12, 2022, at 9:40 AM, Thomas Zimmermann wrote:
>> Am 12.10.22 um 09:17 schrieb Arnd Bergmann:
>>> On Wed, Oct 12, 2022, at 8:46 AM, Thomas Zimmermann wrote:
>>
>>> Does qemu mark the device has having a particular endianess then, or
>>> does it switch the layout of the framebuffer to match what the CPU
>>> does?
>>
>> The latter. On neither architecture does qemu expose this flag. The
>> default endianess corresponds to the host.
> 
> "host" as in the machine that qemu runs on, or the machine that is
> being emulated? I suppose it would be broken either way, but in the
> latter case, we could get away with detecting that the machine is
> running under qemu.

Sorry, my mistake. I meant "guest": the endianess of the framebuffer 
corresponds to the endianess of the emulated machine.  Given that many 
graphics cards support LE and BE modes, I assume that this behavior 
mimics real-hardware systems.

Best regards
Thomas

> 
>      Arnd
Arnd Bergmann Oct. 12, 2022, 8:38 a.m. UTC | #12
On Wed, Oct 12, 2022, at 10:27 AM, Thomas Zimmermann wrote:
> Am 12.10.22 um 09:44 schrieb Arnd Bergmann:
>> On Wed, Oct 12, 2022, at 9:40 AM, Thomas Zimmermann wrote:
>>> Am 12.10.22 um 09:17 schrieb Arnd Bergmann:
>>>> On Wed, Oct 12, 2022, at 8:46 AM, Thomas Zimmermann wrote:
>>>
>>>> Does qemu mark the device has having a particular endianess then, or
>>>> does it switch the layout of the framebuffer to match what the CPU
>>>> does?
>>>
>>> The latter. On neither architecture does qemu expose this flag. The
>>> default endianess corresponds to the host.
>> 
>> "host" as in the machine that qemu runs on, or the machine that is
>> being emulated? I suppose it would be broken either way, but in the
>> latter case, we could get away with detecting that the machine is
>> running under qemu.
>
> Sorry, my mistake. I meant "guest": the endianess of the framebuffer 
> corresponds to the endianess of the emulated machine.  Given that many 
> graphics cards support LE and BE modes, I assume that this behavior 
> mimics real-hardware systems.

Not really: While the hardware may be able to switch between
the modes, something has to actively set some hardware registers up
that way, but the offb/ofdrm driver has no interface for interacting
with that register, and the bootloader or firmware code that knows
about the register has no information about what kernel it will
eventually run. This is a bit architecture dependent, as e.g. on
MIPS, a bi-endian hardware platform has to run a bootloader with the
same endianness as the kernel, but on arm and powerpc the bootloader
is usually fixed and the kernel switches to its configured endianness
in the first few instructions after it gets entered.

     Arnd
Thomas Zimmermann Oct. 12, 2022, noon UTC | #13
Hi

Am 12.10.22 um 10:38 schrieb Arnd Bergmann:
> On Wed, Oct 12, 2022, at 10:27 AM, Thomas Zimmermann wrote:
>> Am 12.10.22 um 09:44 schrieb Arnd Bergmann:
>>> On Wed, Oct 12, 2022, at 9:40 AM, Thomas Zimmermann wrote:
>>>> Am 12.10.22 um 09:17 schrieb Arnd Bergmann:
>>>>> On Wed, Oct 12, 2022, at 8:46 AM, Thomas Zimmermann wrote:
>>>>
>>>>> Does qemu mark the device has having a particular endianess then, or
>>>>> does it switch the layout of the framebuffer to match what the CPU
>>>>> does?
>>>>
>>>> The latter. On neither architecture does qemu expose this flag. The
>>>> default endianess corresponds to the host.
>>>
>>> "host" as in the machine that qemu runs on, or the machine that is
>>> being emulated? I suppose it would be broken either way, but in the
>>> latter case, we could get away with detecting that the machine is
>>> running under qemu.
>>
>> Sorry, my mistake. I meant "guest": the endianess of the framebuffer
>> corresponds to the endianess of the emulated machine.  Given that many
>> graphics cards support LE and BE modes, I assume that this behavior
>> mimics real-hardware systems.
> 
> Not really: While the hardware may be able to switch between
> the modes, something has to actively set some hardware registers up
> that way, but the offb/ofdrm driver has no interface for interacting
> with that register, and the bootloader or firmware code that knows
> about the register has no information about what kernel it will
> eventually run. This is a bit architecture dependent, as e.g. on
> MIPS, a bi-endian hardware platform has to run a bootloader with the
> same endianness as the kernel, but on arm and powerpc the bootloader
> is usually fixed and the kernel switches to its configured endianness
> in the first few instructions after it gets entered.

Could well be. But ofdrm intents to replace offb and this test has 
worked well in offb for almost 15 yrs. If there are bug reports, I'm 
happy to take patches, but until then I see no reason to change it.

Best regards
Thomas

> 
>       Arnd
Michal Suchánek Oct. 12, 2022, 12:07 p.m. UTC | #14
On Wed, Oct 12, 2022 at 10:38:29AM +0200, Arnd Bergmann wrote:
> On Wed, Oct 12, 2022, at 10:27 AM, Thomas Zimmermann wrote:
> > Am 12.10.22 um 09:44 schrieb Arnd Bergmann:
> >> On Wed, Oct 12, 2022, at 9:40 AM, Thomas Zimmermann wrote:
> >>> Am 12.10.22 um 09:17 schrieb Arnd Bergmann:
> >>>> On Wed, Oct 12, 2022, at 8:46 AM, Thomas Zimmermann wrote:
> >>>
> >>>> Does qemu mark the device has having a particular endianess then, or
> >>>> does it switch the layout of the framebuffer to match what the CPU
> >>>> does?
> >>>
> >>> The latter. On neither architecture does qemu expose this flag. The
> >>> default endianess corresponds to the host.
> >> 
> >> "host" as in the machine that qemu runs on, or the machine that is
> >> being emulated? I suppose it would be broken either way, but in the
> >> latter case, we could get away with detecting that the machine is
> >> running under qemu.
> >
> > Sorry, my mistake. I meant "guest": the endianess of the framebuffer 
> > corresponds to the endianess of the emulated machine.  Given that many 
> > graphics cards support LE and BE modes, I assume that this behavior 
> > mimics real-hardware systems.
> 
> Not really: While the hardware may be able to switch between
> the modes, something has to actively set some hardware registers up
> that way, but the offb/ofdrm driver has no interface for interacting
> with that register, and the bootloader or firmware code that knows
> about the register has no information about what kernel it will
> eventually run. This is a bit architecture dependent, as e.g. on
> MIPS, a bi-endian hardware platform has to run a bootloader with the
> same endianness as the kernel, but on arm and powerpc the bootloader
> is usually fixed and the kernel switches to its configured endianness
> in the first few instructions after it gets entered.

But then the firmware knows that the kernel can switch endian and the
endian information should be provided. And maybe that should be emulated
better by qemu. Unfortunately, modern Power servers rarely come with a
graphics card so it's hard to test on real hardware.

Thanks

Michal
Arnd Bergmann Oct. 12, 2022, 1:12 p.m. UTC | #15
On Wed, Oct 12, 2022, at 2:00 PM, Thomas Zimmermann wrote:
>
> Could well be. But ofdrm intents to replace offb and this test has 
> worked well in offb for almost 15 yrs. If there are bug reports, I'm 
> happy to take patches, but until then I see no reason to change it.

I wouldn't change the code in offb unless a user reports a bug,
but I don't see a point in adding the same mistake to ofdrm if we
know it can't work on real hardware.

I tried to find out where this is configured in qemu, but it seems
to depend on the framebuffer backend there: most are always little-endian,
ati/bochs/vga-pci/virtio-vga are configurable from the guest through
some register setting, but vga.c picks a default from the
'TARGET_WORDS_BIGENDIAN' macro, which I think is set differently
between qemu-system-ppc64le and qemu-system-ppc64.

If you are using the framebuffer code from vga.c, I would guess that
that you can run a big-endian kernel with qemu-system-ppc64,
or a little-endian kernel with qemu-system-ppc64le and get the
correct colors, while running a little-endian kernel with
qemu-system-ppc64 and vga.c, or using a different framebuffer
emulation on a big-endian kernel would give you the wrong colors.

Which combinations did you actually test?

     Arnd
Michal Suchánek Oct. 12, 2022, 2:27 p.m. UTC | #16
Hello,

On Wed, Oct 12, 2022 at 03:12:35PM +0200, Arnd Bergmann wrote:
> On Wed, Oct 12, 2022, at 2:00 PM, Thomas Zimmermann wrote:
> >
> > Could well be. But ofdrm intents to replace offb and this test has 
> > worked well in offb for almost 15 yrs. If there are bug reports, I'm 
> > happy to take patches, but until then I see no reason to change it.
> 
> I wouldn't change the code in offb unless a user reports a bug,
> but I don't see a point in adding the same mistake to ofdrm if we
> know it can't work on real hardware.
> 
> I tried to find out where this is configured in qemu, but it seems
> to depend on the framebuffer backend there: most are always little-endian,
> ati/bochs/vga-pci/virtio-vga are configurable from the guest through
> some register setting, but vga.c picks a default from the
> 'TARGET_WORDS_BIGENDIAN' macro, which I think is set differently
> between qemu-system-ppc64le and qemu-system-ppc64.
> 
> If you are using the framebuffer code from vga.c, I would guess that
> that you can run a big-endian kernel with qemu-system-ppc64,
> or a little-endian kernel with qemu-system-ppc64le and get the
> correct colors, while running a little-endian kernel with
> qemu-system-ppc64 and vga.c, or using a different framebuffer
> emulation on a big-endian kernel would give you the wrong colors.

Thanks for digging this up.

That makes one thing clear: qemu does not emulate this framebuffer
property correctly, and cannot be relied on for verification.

If you can provide test results from real hardware that show the current
logic as flawed it should be changed.

In absence of such test results I think the most reasonable thing is to
keep the logic that nobody complained about for 10+ years.

Thanks

Michal
Thomas Zimmermann Oct. 12, 2022, 2:31 p.m. UTC | #17
Hi

Am 12.10.22 um 15:12 schrieb Arnd Bergmann:
> On Wed, Oct 12, 2022, at 2:00 PM, Thomas Zimmermann wrote:
>>
>> Could well be. But ofdrm intents to replace offb and this test has
>> worked well in offb for almost 15 yrs. If there are bug reports, I'm
>> happy to take patches, but until then I see no reason to change it.
> 
> I wouldn't change the code in offb unless a user reports a bug,
> but I don't see a point in adding the same mistake to ofdrm if we
> know it can't work on real hardware.

As I said, this has worked with offb and apparently on real hardware. 
For all I know, ATI hardware (before it became AMD) was used in PPC 
Macintoshs and assumed big-endian access on those machines.

> I tried to find out where this is configured in qemu, but it seems
> to depend on the framebuffer backend there: most are always little-endian,
> ati/bochs/vga-pci/virtio-vga are configurable from the guest through
> some register setting, but vga.c picks a default from the
> 'TARGET_WORDS_BIGENDIAN' macro, which I think is set differently
> between qemu-system-ppc64le and qemu-system-ppc64.
> 
> If you are using the framebuffer code from vga.c, I would guess that
> that you can run a big-endian kernel with qemu-system-ppc64,
> or a little-endian kernel with qemu-system-ppc64le and get the
> correct colors, while running a little-endian kernel with
> qemu-system-ppc64 and vga.c, or using a different framebuffer
> emulation on a big-endian kernel would give you the wrong colors.

If qemu doesn't give us the necessary DT property, it's a qemu bug. In 
in the absence of the property, picking the kernel's endianess is a 
sensible choice.

Best regards
Thomas

> 
> Which combinations did you actually test?
> 
>       Arnd
Ville Syrjälä Oct. 12, 2022, 2:59 p.m. UTC | #18
On Wed, Oct 12, 2022 at 04:31:14PM +0200, Thomas Zimmermann wrote:
> Hi
> 
> Am 12.10.22 um 15:12 schrieb Arnd Bergmann:
> > On Wed, Oct 12, 2022, at 2:00 PM, Thomas Zimmermann wrote:
> >>
> >> Could well be. But ofdrm intents to replace offb and this test has
> >> worked well in offb for almost 15 yrs. If there are bug reports, I'm
> >> happy to take patches, but until then I see no reason to change it.
> > 
> > I wouldn't change the code in offb unless a user reports a bug,
> > but I don't see a point in adding the same mistake to ofdrm if we
> > know it can't work on real hardware.
> 
> As I said, this has worked with offb and apparently on real hardware. 
> For all I know, ATI hardware (before it became AMD) was used in PPC 
> Macintoshs and assumed big-endian access on those machines.

At least mach64 class hardware has two frame buffer apertures, and
byte swapping can be configured separately for each. But that means
you only get correct byte swapping for at most two bpps at the same
time (and that only if you know which aperture to access each time).
IIRC Rage 128 already has the surface register stuff where you
could byte swap a limited set of ranges independently. And old
mga hardware has just one byte swap setting for the whole frame
buffer aperture, so only one bpp at a time.

That kind of horrible limitations of the byte swappers is the
main reason why I wanted to make drm fourcc endianness explicit.
Simply assuming host endianness would end in tears on big endian
as soon as you need to access stuff with two bpps at the same time.
Much better to just switch off those useless byte swappers and
swap by hand when necessary.
Michal Suchánek Oct. 12, 2022, 4:01 p.m. UTC | #19
On Wed, Oct 12, 2022 at 05:59:45PM +0300, Ville Syrjälä wrote:
> On Wed, Oct 12, 2022 at 04:31:14PM +0200, Thomas Zimmermann wrote:
> > Hi
> > 
> > Am 12.10.22 um 15:12 schrieb Arnd Bergmann:
> > > On Wed, Oct 12, 2022, at 2:00 PM, Thomas Zimmermann wrote:
> > >>
> > >> Could well be. But ofdrm intents to replace offb and this test has
> > >> worked well in offb for almost 15 yrs. If there are bug reports, I'm
> > >> happy to take patches, but until then I see no reason to change it.
> > > 
> > > I wouldn't change the code in offb unless a user reports a bug,
> > > but I don't see a point in adding the same mistake to ofdrm if we
> > > know it can't work on real hardware.
> > 
> > As I said, this has worked with offb and apparently on real hardware. 
> > For all I know, ATI hardware (before it became AMD) was used in PPC 
> > Macintoshs and assumed big-endian access on those machines.
> 
> At least mach64 class hardware has two frame buffer apertures, and
> byte swapping can be configured separately for each. But that means
> you only get correct byte swapping for at most two bpps at the same
> time (and that only if you know which aperture to access each time).
> IIRC Rage 128 already has the surface register stuff where you
> could byte swap a limited set of ranges independently. And old
> mga hardware has just one byte swap setting for the whole frame
> buffer aperture, so only one bpp at a time.
> 
> That kind of horrible limitations of the byte swappers is the
> main reason why I wanted to make drm fourcc endianness explicit.
> Simply assuming host endianness would end in tears on big endian
> as soon as you need to access stuff with two bpps at the same time.
> Much better to just switch off those useless byte swappers and
> swap by hand when necessary.

If you have hardware-specific driver, sure.

This is firmware-provided framebuffer, though. You get one framebuffer
address, and one endian - whatever the firmware set up and described in
the DT.

Thanks

Michal
Javier Martinez Canillas Oct. 13, 2022, 7:39 a.m. UTC | #20
Hello,

On 10/12/22 16:27, Michal Suchánek wrote:

[...]

>>
>> If you are using the framebuffer code from vga.c, I would guess that
>> that you can run a big-endian kernel with qemu-system-ppc64,
>> or a little-endian kernel with qemu-system-ppc64le and get the
>> correct colors, while running a little-endian kernel with
>> qemu-system-ppc64 and vga.c, or using a different framebuffer
>> emulation on a big-endian kernel would give you the wrong colors.
> 
> Thanks for digging this up.
> 
> That makes one thing clear: qemu does not emulate this framebuffer
> property correctly, and cannot be relied on for verification.
> 
> If you can provide test results from real hardware that show the current
> logic as flawed it should be changed.
> 
> In absence of such test results I think the most reasonable thing is to
> keep the logic that nobody complained about for 10+ years.
> 

I agree with Michal and Thomas on this. I don't see a strong reason to not
use the same heuristic that the offb fbdev driver has been doing for this.

Otherwise if this turns out to be needed, it will cause a regression for a
user that switches to this driver instead. Specially since both fbdev and
DRM drivers match against the same "display" OF compatible string.
Benjamin Herrenschmidt Oct. 17, 2022, 6:44 a.m. UTC | #21
On Thu, 2022-10-13 at 09:39 +0200, Javier Martinez Canillas wrote:
> > In absence of such test results I think the most reasonable thing is to
> > keep the logic that nobody complained about for 10+ years.
> > 
> 
> I agree with Michal and Thomas on this. I don't see a strong reason to not
> use the same heuristic that the offb fbdev driver has been doing for this.
> 
> Otherwise if this turns out to be needed, it will cause a regression for a
> user that switches to this driver instead. Specially since both fbdev and
> DRM drivers match against the same "display" OF compatible string.

I agree.

In the end, what it boils down to is, we don't know, we should guess. The
endianness of the kernel is as good a guess as anything here.

If not that, then assume BE.

That code was originally written for old macs. Those could simply not boot
anything other than a BE kernel. OF would always setup a 8bpp fb (so endianness
is irrelevant) but BootX could setup something else.

There's almost no old LE powerpc system out there, and I'm reasonably sure
actually none of any relevance to this code.

That leaves us with newer systems capable of endian switching. Those should just
get the property added.

I don't know of many cases out there. There' SLOS on powerpc which still won't
set it (which is what qemu uses). That could just be fixed. In the meantime
it makes sense for the kernel to follow its existing behaviour.

Cheers,
Ben.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
index 4afc4ac27342..fca7936db083 100644
--- a/drivers/gpu/drm/drm_format_helper.c
+++ b/drivers/gpu/drm/drm_format_helper.c
@@ -659,6 +659,11 @@  int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d
 			drm_fb_xrgb8888_to_rgb565(dst, dst_pitch, src, fb, clip, false);
 			return 0;
 		}
+	} else if (dst_format == (DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN)) {
+		if (fb_format == DRM_FORMAT_RGB565) {
+			drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
+			return 0;
+		}
 	} else if (dst_format == DRM_FORMAT_RGB888) {
 		if (fb_format == DRM_FORMAT_XRGB8888) {
 			drm_fb_xrgb8888_to_rgb888(dst, dst_pitch, src, fb, clip);
@@ -677,6 +682,11 @@  int drm_fb_blit(struct iosys_map *dst, const unsigned int *dst_pitch, uint32_t d
 			drm_fb_xrgb8888_to_xrgb2101010(dst, dst_pitch, src, fb, clip);
 			return 0;
 		}
+	} else if (dst_format == DRM_FORMAT_BGRX8888) {
+		if (fb_format == DRM_FORMAT_XRGB8888) {
+			drm_fb_swab(dst, dst_pitch, src, fb, clip, false);
+			return 0;
+		}
 	}
 
 	drm_warn_once(fb->dev, "No conversion helper from %p4cc to %p4cc found.\n",
diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
index 0bf5eebf6678..6e100a7f5db7 100644
--- a/drivers/gpu/drm/tiny/ofdrm.c
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -94,7 +94,7 @@  static int display_get_validated_int0(struct drm_device *dev, const char *name,
 }
 
 static const struct drm_format_info *display_get_validated_format(struct drm_device *dev,
-								  u32 depth)
+								  u32 depth, bool big_endian)
 {
 	const struct drm_format_info *info;
 	u32 format;
@@ -115,6 +115,29 @@  static const struct drm_format_info *display_get_validated_format(struct drm_dev
 		return ERR_PTR(-EINVAL);
 	}
 
+	/*
+	 * DRM formats assume little-endian byte order. Update the format
+	 * if the scanout buffer uses big-endian ordering.
+	 */
+	if (big_endian) {
+		switch (format) {
+		case DRM_FORMAT_XRGB8888:
+			format = DRM_FORMAT_BGRX8888;
+			break;
+		case DRM_FORMAT_ARGB8888:
+			format = DRM_FORMAT_BGRA8888;
+			break;
+		case DRM_FORMAT_RGB565:
+			format = DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN;
+			break;
+		case DRM_FORMAT_XRGB1555:
+			format = DRM_FORMAT_XRGB1555 | DRM_FORMAT_BIG_ENDIAN;
+			break;
+		default:
+			break;
+		}
+	}
+
 	info = drm_format_info(format);
 	if (!info) {
 		drm_err(dev, "cannot find framebuffer format for depth %u\n", depth);
@@ -134,6 +157,23 @@  static int display_read_u32_of(struct drm_device *dev, struct device_node *of_no
 	return ret;
 }
 
+static bool display_get_big_endian_of(struct drm_device *dev, struct device_node *of_node)
+{
+	bool big_endian;
+
+#ifdef __BIG_ENDIAN
+	big_endian = true;
+	if (of_get_property(of_node, "little-endian", NULL))
+		big_endian = false;
+#else
+	big_endian = false;
+	if (of_get_property(of_node, "big-endian", NULL))
+		big_endian = true;
+#endif
+
+	return big_endian;
+}
+
 static int display_get_width_of(struct drm_device *dev, struct device_node *of_node)
 {
 	u32 width;
@@ -613,6 +653,7 @@  static void ofdrm_device_set_gamma_linear(struct ofdrm_device *odev,
 
 	switch (format->format) {
 	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN:
 		/* Use better interpolation, to take 32 values from 0 to 255 */
 		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE / 8; i++) {
 			unsigned char r = i * 8 + i / 4;
@@ -631,6 +672,7 @@  static void ofdrm_device_set_gamma_linear(struct ofdrm_device *odev,
 		}
 		break;
 	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_BGRX8888:
 		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE; i++)
 			odev->funcs->cmap_write(odev, i, i, i, i);
 		break;
@@ -650,6 +692,7 @@  static void ofdrm_device_set_gamma(struct ofdrm_device *odev,
 
 	switch (format->format) {
 	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN:
 		/* Use better interpolation, to take 32 values from lut[0] to lut[255] */
 		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE / 8; i++) {
 			unsigned char r = lut[i * 8 + i / 4].red >> 8;
@@ -668,6 +711,7 @@  static void ofdrm_device_set_gamma(struct ofdrm_device *odev,
 		}
 		break;
 	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_BGRX8888:
 		for (i = 0; i < OFDRM_GAMMA_LUT_SIZE; i++) {
 			unsigned char r = lut[i].red >> 8;
 			unsigned char g = lut[i].green >> 8;
@@ -718,6 +762,9 @@  static const uint32_t ofdrm_primary_plane_formats[] = {
 	DRM_FORMAT_RGB565,
 	//DRM_FORMAT_XRGB1555,
 	//DRM_FORMAT_C8,
+	/* Big-endian formats below */
+	DRM_FORMAT_BGRX8888,
+	DRM_FORMAT_RGB565 | DRM_FORMAT_BIG_ENDIAN,
 };
 
 static const uint64_t ofdrm_primary_plane_format_modifiers[] = {
@@ -1048,6 +1095,7 @@  static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
 	struct ofdrm_device *odev;
 	struct drm_device *dev;
 	enum ofdrm_model model;
+	bool big_endian;
 	int width, height, depth, linebytes;
 	const struct drm_format_info *format;
 	u64 address;
@@ -1109,6 +1157,8 @@  static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
 		break;
 	}
 
+	big_endian = display_get_big_endian_of(dev, of_node);
+
 	width = display_get_width_of(dev, of_node);
 	if (width < 0)
 		return ERR_PTR(width);
@@ -1122,7 +1172,7 @@  static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
 	if (linebytes < 0)
 		return ERR_PTR(linebytes);
 
-	format = display_get_validated_format(dev, depth);
+	format = display_get_validated_format(dev, depth, big_endian);
 	if (IS_ERR(format))
 		return ERR_CAST(format);
 	if (!linebytes) {
@@ -1234,6 +1284,7 @@  static struct ofdrm_device *ofdrm_device_create(struct drm_driver *drv,
 		dev->mode_config.preferred_depth = depth;
 		break;
 	}
+	dev->mode_config.quirk_addfb_prefer_host_byte_order = true;
 
 	/* Primary plane */