diff mbox series

[4/7] drm/meson: plane: add support for AFBC mode for OSD1 plane

Message ID 20191010092526.10419-5-narmstrong@baylibre.com (mailing list archive)
State New, archived
Headers show
Series drm/meson: add AFBC support | expand

Commit Message

Neil Armstrong Oct. 10, 2019, 9:25 a.m. UTC
This adds all the OSD configuration plumbing to support the AFBC decoders
path to display of the OSD1 plane.

The Amlogic GXM and G12A AFBC decoders are integrated very differently.

The Amlogic GXM has a direct output path to the OSD1 VIU pixel input,
because the GXM AFBC decoder seem to be a custom IP developed by Amlogic.

On the other side, the Amlogic G12A AFBC decoder seems to be an external
IP that emit pixels on an AXI master hooked to a "Mali Unpack" block
feeding the OSD1 VIU pixel input.
This uses a weird "0x1000000" internal HW physical address on both
sides to transfer the pixels.

For Amlogic GXM, the supported pixel formats are the same as the normal
linear OSD1 mode.

On the other side, Amlogic added support for all AFBC v1.2 formats for
the G12A AFBC integration.

For simplicity, we stick to the already supported formats for now.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
---
 drivers/gpu/drm/meson/meson_crtc.c  |   2 +
 drivers/gpu/drm/meson/meson_drv.h   |   4 +
 drivers/gpu/drm/meson/meson_plane.c | 215 ++++++++++++++++++++++++----
 3 files changed, 190 insertions(+), 31 deletions(-)

Comments

Ayan Halder Oct. 10, 2019, 1:26 p.m. UTC | #1
On Thu, Oct 10, 2019 at 11:25:23AM +0200, Neil Armstrong wrote:
> This adds all the OSD configuration plumbing to support the AFBC decoders
> path to display of the OSD1 plane.
> 
> The Amlogic GXM and G12A AFBC decoders are integrated very differently.
> 
> The Amlogic GXM has a direct output path to the OSD1 VIU pixel input,
> because the GXM AFBC decoder seem to be a custom IP developed by Amlogic.
> 
> On the other side, the Amlogic G12A AFBC decoder seems to be an external
> IP that emit pixels on an AXI master hooked to a "Mali Unpack" block
> feeding the OSD1 VIU pixel input.
> This uses a weird "0x1000000" internal HW physical address on both
> sides to transfer the pixels.
> 
> For Amlogic GXM, the supported pixel formats are the same as the normal
> linear OSD1 mode.
> 
> On the other side, Amlogic added support for all AFBC v1.2 formats for
> the G12A AFBC integration.
> 
> For simplicity, we stick to the already supported formats for now.
> 
> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
> ---
>  drivers/gpu/drm/meson/meson_crtc.c  |   2 +
>  drivers/gpu/drm/meson/meson_drv.h   |   4 +
>  drivers/gpu/drm/meson/meson_plane.c | 215 ++++++++++++++++++++++++----
>  3 files changed, 190 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
> index 57ae1c13d1e6..d478fa232951 100644
> --- a/drivers/gpu/drm/meson/meson_crtc.c
> +++ b/drivers/gpu/drm/meson/meson_crtc.c
> @@ -281,6 +281,8 @@ void meson_crtc_irq(struct meson_drm *priv)
>  	if (priv->viu.osd1_enabled && priv->viu.osd1_commit) {
>  		writel_relaxed(priv->viu.osd1_ctrl_stat,
>  				priv->io_base + _REG(VIU_OSD1_CTRL_STAT));
> +		writel_relaxed(priv->viu.osd1_ctrl_stat2,
> +				priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
>  		writel_relaxed(priv->viu.osd1_blk0_cfg[0],
>  				priv->io_base + _REG(VIU_OSD1_BLK0_CFG_W0));
>  		writel_relaxed(priv->viu.osd1_blk0_cfg[1],
> diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
> index 60f13c6f34e5..de25349be8aa 100644
> --- a/drivers/gpu/drm/meson/meson_drv.h
> +++ b/drivers/gpu/drm/meson/meson_drv.h
> @@ -53,8 +53,12 @@ struct meson_drm {
>  		bool osd1_enabled;
>  		bool osd1_interlace;
>  		bool osd1_commit;
> +		bool osd1_afbcd;
>  		uint32_t osd1_ctrl_stat;
> +		uint32_t osd1_ctrl_stat2;
>  		uint32_t osd1_blk0_cfg[5];
> +		uint32_t osd1_blk1_cfg4;
> +		uint32_t osd1_blk2_cfg4;
>  		uint32_t osd1_addr;
>  		uint32_t osd1_stride;
>  		uint32_t osd1_height;
> diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
> index 5e798c276037..412941aa8402 100644
> --- a/drivers/gpu/drm/meson/meson_plane.c
> +++ b/drivers/gpu/drm/meson/meson_plane.c
> @@ -23,6 +23,7 @@
>  #include "meson_plane.h"
>  #include "meson_registers.h"
>  #include "meson_viu.h"
> +#include "meson_osd_afbcd.h"
>  
>  /* OSD_SCI_WH_M1 */
>  #define SCI_WH_M1_W(w)			FIELD_PREP(GENMASK(28, 16), w)
> @@ -92,12 +93,38 @@ static int meson_plane_atomic_check(struct drm_plane *plane,
>  						   false, true);
>  }
>  
> +#define MESON_MOD_AFBC_VALID_BITS (AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |	\
> +				   AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |	\
> +				   AFBC_FORMAT_MOD_YTR |		\
> +				   AFBC_FORMAT_MOD_SPARSE |		\
> +				   AFBC_FORMAT_MOD_SPLIT)
> +
>  /* Takes a fixed 16.16 number and converts it to integer. */
>  static inline int64_t fixed16_to_int(int64_t value)
>  {
>  	return value >> 16;
>  }
>  
> +static u32 meson_g12a_afbcd_line_stride(struct meson_drm *priv)
> +{
> +	u32 line_stride = 0;
> +
> +	switch (priv->afbcd.format) {
> +	case DRM_FORMAT_RGB565:
> +		line_stride = ((priv->viu.osd1_width << 4) + 127) >> 7;
> +		break;
> +	case DRM_FORMAT_RGB888:
> +	case DRM_FORMAT_XRGB8888:
> +	case DRM_FORMAT_ARGB8888:
> +	case DRM_FORMAT_XBGR8888:
> +	case DRM_FORMAT_ABGR8888:
Please have a look at
https://www.kernel.org/doc/html/latest/gpu/afbc.html for our
recommendation. We suggest that *X* formats are avoided.

Also, for interoperability and maximum compression efficiency (with
AFBC_FORMAT_MOD_YTR), we suggest the following order :-

        Component 0: R
        Component 1: G
        Component 2: B
        Component 3: A (if available)

Thus, DRM_FORMAT_ABGR, DRM_FORMAT_BGR should only be allowed.
> +		line_stride = ((priv->viu.osd1_width << 5) + 127) >> 7;
> +		break;
> +	}
> +
> +	return ((line_stride + 1) >> 1) << 1;
> +}
> +
>  static void meson_plane_atomic_update(struct drm_plane *plane,
>  				      struct drm_plane_state *old_state)
>  {
> @@ -126,57 +153,88 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
>  	 */
>  	spin_lock_irqsave(&priv->drm->event_lock, flags);
>  
> +	/* Check if AFBC decoder is required for this buffer */
> +	if ((meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) ||
> +	     meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) &&
> +	    fb->modifier & DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
> +		priv->viu.osd1_afbcd = true;
> +	else
> +		priv->viu.osd1_afbcd = false;
> +
>  	/* Enable OSD and BLK0, set max global alpha */
>  	priv->viu.osd1_ctrl_stat = OSD_ENABLE |
>  				   (0xFF << OSD_GLOBAL_ALPHA_SHIFT) |
>  				   OSD_BLK0_ENABLE;
>  
> +	priv->viu.osd1_ctrl_stat2 = readl(priv->io_base +
> +					  _REG(VIU_OSD1_CTRL_STAT2));
> +
>  	canvas_id_osd1 = priv->canvas_id_osd1;
>  
>  	/* Set up BLK0 to point to the right canvas */
> -	priv->viu.osd1_blk0_cfg[0] = ((canvas_id_osd1 << OSD_CANVAS_SEL) |
> -				      OSD_ENDIANNESS_LE);
> +	priv->viu.osd1_blk0_cfg[0] = canvas_id_osd1 << OSD_CANVAS_SEL;
> +
> +	if (priv->viu.osd1_afbcd) {
> +		if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) {
> +			/* This is the internal decoding memory address */
> +			priv->viu.osd1_blk1_cfg4 = MESON_G12A_AFBCD_OUT_ADDR;
> +			priv->viu.osd1_blk0_cfg[0] |= OSD_ENDIANNESS_BE;
> +			priv->viu.osd1_ctrl_stat2 |= OSD_PENDING_STAT_CLEAN;
> +		}
> +
> +		if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM)) {
> +			priv->viu.osd1_blk0_cfg[0] |= OSD_ENDIANNESS_LE;
> +			priv->viu.osd1_ctrl_stat2 |= OSD_DPATH_MALI_AFBCD;
> +		}
> +	} else {
> +		priv->viu.osd1_blk0_cfg[0] |= OSD_ENDIANNESS_LE;
> +
> +		if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
> +			priv->viu.osd1_ctrl_stat2 &= ~OSD_DPATH_MALI_AFBCD;
> +	}
>  
>  	/* On GXBB, Use the old non-HDR RGB2YUV converter */
>  	if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB))
>  		priv->viu.osd1_blk0_cfg[0] |= OSD_OUTPUT_COLOR_RGB;
>  
> +	if (priv->viu.osd1_afbcd &&
> +	    meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) {
> +		priv->viu.osd1_blk0_cfg[0] |= OSD_MALI_SRC_EN |
> +			priv->afbcd.ops->fmt_to_blk_mode(fb->modifier,
> +							  fb->format->format);
> +	} else {
> +		switch (fb->format->format) {
> +		case DRM_FORMAT_XRGB8888:
> +		case DRM_FORMAT_ARGB8888:
> +			priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
> +						OSD_COLOR_MATRIX_32_ARGB;
> +			break;
> +		case DRM_FORMAT_XBGR8888:
> +		case DRM_FORMAT_ABGR8888:
> +			priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
> +						OSD_COLOR_MATRIX_32_ABGR;
> +			break;
> +		case DRM_FORMAT_RGB888:
> +			priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_24 |
> +						OSD_COLOR_MATRIX_24_RGB;
> +			break;
> +		case DRM_FORMAT_RGB565:
> +			priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_16 |
> +						OSD_COLOR_MATRIX_16_RGB565;
> +			break;
> +		};
> +	}
> +
>  	switch (fb->format->format) {
>  	case DRM_FORMAT_XRGB8888:
> -		/* For XRGB, replace the pixel's alpha by 0xFF */
> -		writel_bits_relaxed(OSD_REPLACE_EN, OSD_REPLACE_EN,
> -				    priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
> -		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
> -					      OSD_COLOR_MATRIX_32_ARGB;
> -		break;
>  	case DRM_FORMAT_XBGR8888:
>  		/* For XRGB, replace the pixel's alpha by 0xFF */
> -		writel_bits_relaxed(OSD_REPLACE_EN, OSD_REPLACE_EN,
> -				    priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
> -		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
> -					      OSD_COLOR_MATRIX_32_ABGR;
> +		priv->viu.osd1_ctrl_stat2 |= OSD_REPLACE_EN;
>  		break;
>  	case DRM_FORMAT_ARGB8888:
> -		/* For ARGB, use the pixel's alpha */
> -		writel_bits_relaxed(OSD_REPLACE_EN, 0,
> -				    priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
> -		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
> -					      OSD_COLOR_MATRIX_32_ARGB;
> -		break;
>  	case DRM_FORMAT_ABGR8888:
>  		/* For ARGB, use the pixel's alpha */
> -		writel_bits_relaxed(OSD_REPLACE_EN, 0,
> -				    priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
> -		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
> -					      OSD_COLOR_MATRIX_32_ABGR;
> -		break;
> -	case DRM_FORMAT_RGB888:
> -		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_24 |
> -					      OSD_COLOR_MATRIX_24_RGB;
> -		break;
> -	case DRM_FORMAT_RGB565:
> -		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_16 |
> -					      OSD_COLOR_MATRIX_16_RGB565;
> +		priv->viu.osd1_ctrl_stat2 &= ~OSD_REPLACE_EN;
>  		break;
>  	};
>  
> @@ -307,6 +365,16 @@ static void meson_plane_atomic_update(struct drm_plane *plane,
>  	priv->viu.osd1_height = fb->height;
>  	priv->viu.osd1_width = fb->width;
>  
> +	if (priv->viu.osd1_afbcd) {
> +		priv->afbcd.modifier = fb->modifier;
> +		priv->afbcd.format = fb->format->format;
> +
> +		/* Calculate decoder write stride */
> +		if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> +			priv->viu.osd1_blk2_cfg4 =
> +				meson_g12a_afbcd_line_stride(priv);
> +	}
> +
>  	if (!meson_plane->enabled) {
>  		/* Reset OSD1 before enabling it on GXL+ SoCs */
>  		if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) ||
> @@ -346,6 +414,42 @@ static const struct drm_plane_helper_funcs meson_plane_helper_funcs = {
>  	.prepare_fb	= drm_gem_fb_prepare_fb,
>  };
>  
> +static bool meson_plane_format_mod_supported(struct drm_plane *plane,
> +					     u32 format, u64 modifier)
> +{
> +	struct meson_plane *meson_plane = to_meson_plane(plane);
> +	struct meson_drm *priv = meson_plane->priv;
> +	int i;
> +
> +	if (modifier == DRM_FORMAT_MOD_INVALID)
> +		return false;
> +
> +	if (modifier == DRM_FORMAT_MOD_LINEAR)
> +		return true;
> +
> +	if (!meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) &&
> +	    !meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> +		return false;
> +
> +	if (modifier & ~DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
> +		return false;
> +
> +	for (i = 0 ; i < plane->modifier_count ; ++i)
> +		if (plane->modifiers[i] == modifier)
> +			break;
> +
> +	if (i == plane->modifier_count) {
> +		DRM_DEBUG_KMS("Unsupported modifier\n");
> +		return false;
> +	}
> +
> +	if (priv->afbcd.ops && priv->afbcd.ops->supported_fmt)
> +		return priv->afbcd.ops->supported_fmt(modifier, format);
> +
> +	DRM_DEBUG_KMS("AFBC Unsupported\n");
> +	return false;
> +}
> +
>  static const struct drm_plane_funcs meson_plane_funcs = {
>  	.update_plane		= drm_atomic_helper_update_plane,
>  	.disable_plane		= drm_atomic_helper_disable_plane,
> @@ -353,6 +457,7 @@ static const struct drm_plane_funcs meson_plane_funcs = {
>  	.reset			= drm_atomic_helper_plane_reset,
>  	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
>  	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
> +	.format_mod_supported   = meson_plane_format_mod_supported,
>  };
>  
>  static const uint32_t supported_drm_formats[] = {
> @@ -364,10 +469,53 @@ static const uint32_t supported_drm_formats[] = {
>  	DRM_FORMAT_RGB565,
>  };
>  
> +static const uint64_t format_modifiers_afbc_gxm[] = {
> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> +				AFBC_FORMAT_MOD_SPARSE |
> +				AFBC_FORMAT_MOD_YTR),
> +	/* SPLIT mandates SPARSE, RGB modes mandates YTR */
> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> +				AFBC_FORMAT_MOD_YTR |
> +				AFBC_FORMAT_MOD_SPARSE |
> +				AFBC_FORMAT_MOD_SPLIT),
> +	DRM_FORMAT_MOD_LINEAR,
> +	DRM_FORMAT_MOD_INVALID,
> +};
> +
> +static const uint64_t format_modifiers_afbc_g12a[] = {
> +	/*
> +	 * - TOFIX Support AFBC modifiers for YUV formats (16x16 + TILED)
> +	 * - AFBC_FORMAT_MOD_YTR is mandatory since we only support RGB
> +	 * - SPLIT is mandatory for performances reasons when in 16x16
> +	 *   block size
> +	 * - 32x8 block size + SPLIT is mandatory with 4K frame size
> +	 *   for performances reasons
> +	 */
> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> +				AFBC_FORMAT_MOD_YTR |
> +				AFBC_FORMAT_MOD_SPARSE |
> +				AFBC_FORMAT_MOD_SPLIT),
> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> +				AFBC_FORMAT_MOD_YTR |
> +				AFBC_FORMAT_MOD_SPARSE),
> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> +				AFBC_FORMAT_MOD_YTR |
> +				AFBC_FORMAT_MOD_SPARSE |
> +				AFBC_FORMAT_MOD_SPLIT),
> +	DRM_FORMAT_MOD_LINEAR,
> +	DRM_FORMAT_MOD_INVALID,
> +};
> +
> +static const uint64_t format_modifiers_default[] = {
> +	DRM_FORMAT_MOD_LINEAR,
> +	DRM_FORMAT_MOD_INVALID,
> +};
> +
>  int meson_plane_create(struct meson_drm *priv)
>  {
>  	struct meson_plane *meson_plane;
>  	struct drm_plane *plane;
> +	const uint64_t *format_modifiers = format_modifiers_default;
>  
>  	meson_plane = devm_kzalloc(priv->drm->dev, sizeof(*meson_plane),
>  				   GFP_KERNEL);
> @@ -377,11 +525,16 @@ int meson_plane_create(struct meson_drm *priv)
>  	meson_plane->priv = priv;
>  	plane = &meson_plane->base;
>  
> +	if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
> +		format_modifiers = format_modifiers_afbc_gxm;
> +	else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> +		format_modifiers = format_modifiers_afbc_g12a;
> +
>  	drm_universal_plane_init(priv->drm, plane, 0xFF,
>  				 &meson_plane_funcs,
>  				 supported_drm_formats,
>  				 ARRAY_SIZE(supported_drm_formats),
> -				 NULL,
> +				 format_modifiers,
>  				 DRM_PLANE_TYPE_PRIMARY, "meson_primary_plane");
>  
>  	drm_plane_helper_add(plane, &meson_plane_helper_funcs);
> -- 
> 2.22.0
Neil Armstrong Oct. 10, 2019, 1:41 p.m. UTC | #2
Hi Ayan,

On 10/10/2019 15:26, Ayan Halder wrote:
> On Thu, Oct 10, 2019 at 11:25:23AM +0200, Neil Armstrong wrote:
>> This adds all the OSD configuration plumbing to support the AFBC decoders
>> path to display of the OSD1 plane.
>>
>> The Amlogic GXM and G12A AFBC decoders are integrated very differently.
>>
>> The Amlogic GXM has a direct output path to the OSD1 VIU pixel input,
>> because the GXM AFBC decoder seem to be a custom IP developed by Amlogic.
>>
>> On the other side, the Amlogic G12A AFBC decoder seems to be an external
>> IP that emit pixels on an AXI master hooked to a "Mali Unpack" block
>> feeding the OSD1 VIU pixel input.
>> This uses a weird "0x1000000" internal HW physical address on both
>> sides to transfer the pixels.
>>
>> For Amlogic GXM, the supported pixel formats are the same as the normal
>> linear OSD1 mode.
>>
>> On the other side, Amlogic added support for all AFBC v1.2 formats for
>> the G12A AFBC integration.
>>
>> For simplicity, we stick to the already supported formats for now.
>>
>> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
>> ---
>>  drivers/gpu/drm/meson/meson_crtc.c  |   2 +
>>  drivers/gpu/drm/meson/meson_drv.h   |   4 +
>>  drivers/gpu/drm/meson/meson_plane.c | 215 ++++++++++++++++++++++++----
>>  3 files changed, 190 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
>> index 57ae1c13d1e6..d478fa232951 100644
>> --- a/drivers/gpu/drm/meson/meson_crtc.c
>> +++ b/drivers/gpu/drm/meson/meson_crtc.c
>> @@ -281,6 +281,8 @@ void meson_crtc_irq(struct meson_drm *priv)
>>  	if (priv->viu.osd1_enabled && priv->viu.osd1_commit) {
>>  		writel_relaxed(priv->viu.osd1_ctrl_stat,
>>  				priv->io_base + _REG(VIU_OSD1_CTRL_STAT));
>> +		writel_relaxed(priv->viu.osd1_ctrl_stat2,
>> +				priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
>>  		writel_relaxed(priv->viu.osd1_blk0_cfg[0],
>>  				priv->io_base + _REG(VIU_OSD1_BLK0_CFG_W0));
>>  		writel_relaxed(priv->viu.osd1_blk0_cfg[1],
>> diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
>> index 60f13c6f34e5..de25349be8aa 100644
>> --- a/drivers/gpu/drm/meson/meson_drv.h
>> +++ b/drivers/gpu/drm/meson/meson_drv.h
>> @@ -53,8 +53,12 @@ struct meson_drm {
>>  		bool osd1_enabled;
>>  		bool osd1_interlace;
>>  		bool osd1_commit;
>> +		bool osd1_afbcd;
>>  		uint32_t osd1_ctrl_stat;
>> +		uint32_t osd1_ctrl_stat2;
>>  		uint32_t osd1_blk0_cfg[5];
>> +		uint32_t osd1_blk1_cfg4;
>> +		uint32_t osd1_blk2_cfg4;
>>  		uint32_t osd1_addr;
>>  		uint32_t osd1_stride;
>>  		uint32_t osd1_height;
>> diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
>> index 5e798c276037..412941aa8402 100644
>> --- a/drivers/gpu/drm/meson/meson_plane.c
>> +++ b/drivers/gpu/drm/meson/meson_plane.c
>> @@ -23,6 +23,7 @@
>>  #include "meson_plane.h"
>>  #include "meson_registers.h"
>>  #include "meson_viu.h"
>> +#include "meson_osd_afbcd.h"
>>  
>>  /* OSD_SCI_WH_M1 */
>>  #define SCI_WH_M1_W(w)			FIELD_PREP(GENMASK(28, 16), w)
>> @@ -92,12 +93,38 @@ static int meson_plane_atomic_check(struct drm_plane *plane,
>>  						   false, true);
>>  }
>>  
>> +#define MESON_MOD_AFBC_VALID_BITS (AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |	\
>> +				   AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |	\
>> +				   AFBC_FORMAT_MOD_YTR |		\
>> +				   AFBC_FORMAT_MOD_SPARSE |		\
>> +				   AFBC_FORMAT_MOD_SPLIT)
>> +
>>  /* Takes a fixed 16.16 number and converts it to integer. */
>>  static inline int64_t fixed16_to_int(int64_t value)
>>  {
>>  	return value >> 16;
>>  }
>>  
>> +static u32 meson_g12a_afbcd_line_stride(struct meson_drm *priv)
>> +{
>> +	u32 line_stride = 0;
>> +
>> +	switch (priv->afbcd.format) {
>> +	case DRM_FORMAT_RGB565:
>> +		line_stride = ((priv->viu.osd1_width << 4) + 127) >> 7;
>> +		break;
>> +	case DRM_FORMAT_RGB888:
>> +	case DRM_FORMAT_XRGB8888:
>> +	case DRM_FORMAT_ARGB8888:
>> +	case DRM_FORMAT_XBGR8888:
>> +	case DRM_FORMAT_ABGR8888:
> Please have a look at
> https://www.kernel.org/doc/html/latest/gpu/afbc.html for our
> recommendation. We suggest that *X* formats are avoided.
> 
> Also, for interoperability and maximum compression efficiency (with
> AFBC_FORMAT_MOD_YTR), we suggest the following order :-
> 
>         Component 0: R
>         Component 1: G
>         Component 2: B
>         Component 3: A (if available)


Sorry I don't understand, you ask me to limit AFBC to ABGR8888 ?

But why if the HW (GPU and DPU) is capable of ?

Isn't it an userspace choice ? I understand XRGB8888 is a waste
of memory space and compression efficiency, but this is not the
kernel driver's to decide this, right ?

For interoperability I'll understand recommending a minimal set
of modifiers and formats. But here, each platform is also limited
by it's GPU capabilites aswell.

Limiting to ABGR8888 would discard like every non-Android renderers,
using AFBC, I'm not sure it's the kernels driver's responsibility.

> 
> Thus, DRM_FORMAT_ABGR, DRM_FORMAT_BGR should only be allowed.
>> +		line_stride = ((priv->viu.osd1_width << 5) + 127) >> 7;
>> +		break;
>> +	}
>> +
>> +	return ((line_stride + 1) >> 1) << 1;
>> +}
>> +
>>  static void meson_plane_atomic_update(struct drm_plane *plane,
>>  				      struct drm_plane_state *old_state)
>>  {

[...]

>>  
>> +static bool meson_plane_format_mod_supported(struct drm_plane *plane,
>> +					     u32 format, u64 modifier)
>> +{
>> +	struct meson_plane *meson_plane = to_meson_plane(plane);
>> +	struct meson_drm *priv = meson_plane->priv;
>> +	int i;
>> +
>> +	if (modifier == DRM_FORMAT_MOD_INVALID)
>> +		return false;
>> +
>> +	if (modifier == DRM_FORMAT_MOD_LINEAR)
>> +		return true;
>> +
>> +	if (!meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) &&
>> +	    !meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
>> +		return false;
>> +
>> +	if (modifier & ~DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
>> +		return false;
>> +
>> +	for (i = 0 ; i < plane->modifier_count ; ++i)
>> +		if (plane->modifiers[i] == modifier)
>> +			break;
>> +
>> +	if (i == plane->modifier_count) {
>> +		DRM_DEBUG_KMS("Unsupported modifier\n");
>> +		return false;
>> +	}

I can add a warn_once here, would it be enough ?

>> +
>> +	if (priv->afbcd.ops && priv->afbcd.ops->supported_fmt)
>> +		return priv->afbcd.ops->supported_fmt(modifier, format);
>> +
>> +	DRM_DEBUG_KMS("AFBC Unsupported\n");
>> +	return false;
>> +}
>> +
>>  static const struct drm_plane_funcs meson_plane_funcs = {
>>  	.update_plane		= drm_atomic_helper_update_plane,
>>  	.disable_plane		= drm_atomic_helper_disable_plane,
>> @@ -353,6 +457,7 @@ static const struct drm_plane_funcs meson_plane_funcs = {
>>  	.reset			= drm_atomic_helper_plane_reset,
>>  	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
>>  	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
>> +	.format_mod_supported   = meson_plane_format_mod_supported,
>>  };
>>  
>>  static const uint32_t supported_drm_formats[] = {
>> @@ -364,10 +469,53 @@ static const uint32_t supported_drm_formats[] = {
>>  	DRM_FORMAT_RGB565,
>>  };
>>  
>> +static const uint64_t format_modifiers_afbc_gxm[] = {
>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
>> +				AFBC_FORMAT_MOD_SPARSE |
>> +				AFBC_FORMAT_MOD_YTR),
>> +	/* SPLIT mandates SPARSE, RGB modes mandates YTR */
>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
>> +				AFBC_FORMAT_MOD_YTR |
>> +				AFBC_FORMAT_MOD_SPARSE |
>> +				AFBC_FORMAT_MOD_SPLIT),
>> +	DRM_FORMAT_MOD_LINEAR,
>> +	DRM_FORMAT_MOD_INVALID,
>> +};
>> +
>> +static const uint64_t format_modifiers_afbc_g12a[] = {
>> +	/*
>> +	 * - TOFIX Support AFBC modifiers for YUV formats (16x16 + TILED)
>> +	 * - AFBC_FORMAT_MOD_YTR is mandatory since we only support RGB
>> +	 * - SPLIT is mandatory for performances reasons when in 16x16
>> +	 *   block size
>> +	 * - 32x8 block size + SPLIT is mandatory with 4K frame size
>> +	 *   for performances reasons
>> +	 */
>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
>> +				AFBC_FORMAT_MOD_YTR |
>> +				AFBC_FORMAT_MOD_SPARSE |
>> +				AFBC_FORMAT_MOD_SPLIT),
>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
>> +				AFBC_FORMAT_MOD_YTR |
>> +				AFBC_FORMAT_MOD_SPARSE),
>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
>> +				AFBC_FORMAT_MOD_YTR |
>> +				AFBC_FORMAT_MOD_SPARSE |
>> +				AFBC_FORMAT_MOD_SPLIT),
>> +	DRM_FORMAT_MOD_LINEAR,
>> +	DRM_FORMAT_MOD_INVALID,
>> +};
>> +
>> +static const uint64_t format_modifiers_default[] = {
>> +	DRM_FORMAT_MOD_LINEAR,
>> +	DRM_FORMAT_MOD_INVALID,
>> +};
>> +
>>  int meson_plane_create(struct meson_drm *priv)
>>  {
>>  	struct meson_plane *meson_plane;
>>  	struct drm_plane *plane;
>> +	const uint64_t *format_modifiers = format_modifiers_default;
>>  
>>  	meson_plane = devm_kzalloc(priv->drm->dev, sizeof(*meson_plane),
>>  				   GFP_KERNEL);
>> @@ -377,11 +525,16 @@ int meson_plane_create(struct meson_drm *priv)
>>  	meson_plane->priv = priv;
>>  	plane = &meson_plane->base;
>>  
>> +	if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
>> +		format_modifiers = format_modifiers_afbc_gxm;
>> +	else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
>> +		format_modifiers = format_modifiers_afbc_g12a;
>> +
>>  	drm_universal_plane_init(priv->drm, plane, 0xFF,
>>  				 &meson_plane_funcs,
>>  				 supported_drm_formats,
>>  				 ARRAY_SIZE(supported_drm_formats),
>> -				 NULL,
>> +				 format_modifiers,
>>  				 DRM_PLANE_TYPE_PRIMARY, "meson_primary_plane");
>>  
>>  	drm_plane_helper_add(plane, &meson_plane_helper_funcs);
>> -- 
>> 2.22.0
Ayan Halder Oct. 10, 2019, 5:31 p.m. UTC | #3
On Thu, Oct 10, 2019 at 03:41:15PM +0200, Neil Armstrong wrote:
> Hi Ayan,
> 
> On 10/10/2019 15:26, Ayan Halder wrote:
> > On Thu, Oct 10, 2019 at 11:25:23AM +0200, Neil Armstrong wrote:
> >> This adds all the OSD configuration plumbing to support the AFBC decoders
> >> path to display of the OSD1 plane.
> >>
> >> The Amlogic GXM and G12A AFBC decoders are integrated very differently.
> >>
> >> The Amlogic GXM has a direct output path to the OSD1 VIU pixel input,
> >> because the GXM AFBC decoder seem to be a custom IP developed by Amlogic.
> >>
> >> On the other side, the Amlogic G12A AFBC decoder seems to be an external
> >> IP that emit pixels on an AXI master hooked to a "Mali Unpack" block
> >> feeding the OSD1 VIU pixel input.
> >> This uses a weird "0x1000000" internal HW physical address on both
> >> sides to transfer the pixels.
> >>
> >> For Amlogic GXM, the supported pixel formats are the same as the normal
> >> linear OSD1 mode.
> >>
> >> On the other side, Amlogic added support for all AFBC v1.2 formats for
> >> the G12A AFBC integration.
> >>
> >> For simplicity, we stick to the already supported formats for now.
> >>
> >> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
> >> ---
> >>  drivers/gpu/drm/meson/meson_crtc.c  |   2 +
> >>  drivers/gpu/drm/meson/meson_drv.h   |   4 +
> >>  drivers/gpu/drm/meson/meson_plane.c | 215 ++++++++++++++++++++++++----
> >>  3 files changed, 190 insertions(+), 31 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
> >> index 57ae1c13d1e6..d478fa232951 100644
> >> --- a/drivers/gpu/drm/meson/meson_crtc.c
> >> +++ b/drivers/gpu/drm/meson/meson_crtc.c
> >> @@ -281,6 +281,8 @@ void meson_crtc_irq(struct meson_drm *priv)
> >>  	if (priv->viu.osd1_enabled && priv->viu.osd1_commit) {
> >>  		writel_relaxed(priv->viu.osd1_ctrl_stat,
> >>  				priv->io_base + _REG(VIU_OSD1_CTRL_STAT));
> >> +		writel_relaxed(priv->viu.osd1_ctrl_stat2,
> >> +				priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
> >>  		writel_relaxed(priv->viu.osd1_blk0_cfg[0],
> >>  				priv->io_base + _REG(VIU_OSD1_BLK0_CFG_W0));
> >>  		writel_relaxed(priv->viu.osd1_blk0_cfg[1],
> >> diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
> >> index 60f13c6f34e5..de25349be8aa 100644
> >> --- a/drivers/gpu/drm/meson/meson_drv.h
> >> +++ b/drivers/gpu/drm/meson/meson_drv.h
> >> @@ -53,8 +53,12 @@ struct meson_drm {
> >>  		bool osd1_enabled;
> >>  		bool osd1_interlace;
> >>  		bool osd1_commit;
> >> +		bool osd1_afbcd;
> >>  		uint32_t osd1_ctrl_stat;
> >> +		uint32_t osd1_ctrl_stat2;
> >>  		uint32_t osd1_blk0_cfg[5];
> >> +		uint32_t osd1_blk1_cfg4;
> >> +		uint32_t osd1_blk2_cfg4;
> >>  		uint32_t osd1_addr;
> >>  		uint32_t osd1_stride;
> >>  		uint32_t osd1_height;
> >> diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
> >> index 5e798c276037..412941aa8402 100644
> >> --- a/drivers/gpu/drm/meson/meson_plane.c
> >> +++ b/drivers/gpu/drm/meson/meson_plane.c
> >> @@ -23,6 +23,7 @@
> >>  #include "meson_plane.h"
> >>  #include "meson_registers.h"
> >>  #include "meson_viu.h"
> >> +#include "meson_osd_afbcd.h"
> >>  
> >>  /* OSD_SCI_WH_M1 */
> >>  #define SCI_WH_M1_W(w)			FIELD_PREP(GENMASK(28, 16), w)
> >> @@ -92,12 +93,38 @@ static int meson_plane_atomic_check(struct drm_plane *plane,
> >>  						   false, true);
> >>  }
> >>  
> >> +#define MESON_MOD_AFBC_VALID_BITS (AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |	\
> >> +				   AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |	\
> >> +				   AFBC_FORMAT_MOD_YTR |		\
> >> +				   AFBC_FORMAT_MOD_SPARSE |		\
> >> +				   AFBC_FORMAT_MOD_SPLIT)
> >> +
> >>  /* Takes a fixed 16.16 number and converts it to integer. */
> >>  static inline int64_t fixed16_to_int(int64_t value)
> >>  {
> >>  	return value >> 16;
> >>  }
> >>  
> >> +static u32 meson_g12a_afbcd_line_stride(struct meson_drm *priv)
> >> +{
> >> +	u32 line_stride = 0;
> >> +
> >> +	switch (priv->afbcd.format) {
> >> +	case DRM_FORMAT_RGB565:
> >> +		line_stride = ((priv->viu.osd1_width << 4) + 127) >> 7;
> >> +		break;
> >> +	case DRM_FORMAT_RGB888:
> >> +	case DRM_FORMAT_XRGB8888:
> >> +	case DRM_FORMAT_ARGB8888:
> >> +	case DRM_FORMAT_XBGR8888:
> >> +	case DRM_FORMAT_ABGR8888:
> > Please have a look at
> > https://www.kernel.org/doc/html/latest/gpu/afbc.html for our
> > recommendation. We suggest that *X* formats are avoided.
> > 
> > Also, for interoperability and maximum compression efficiency (with
> > AFBC_FORMAT_MOD_YTR), we suggest the following order :-
> > 
> >         Component 0: R
> >         Component 1: G
> >         Component 2: B
> >         Component 3: A (if available)
> 
> 
> Sorry I don't understand, you ask me to limit AFBC to ABGR8888 ?

Apologies for the confusion, as per the link, the formats which are
suggested with AFBC_FORMAT_MOD_YTR are the BGR/ABGR formats (as
listed in the 'AFBC formats' table)

Thus, any other permutation of the components might make it incompatible
with some other AFBC producers/consumers.

> 
> But why if the HW (GPU and DPU) is capable of ?
> 
> Isn't it an userspace choice ? I understand XRGB8888 is a waste
> of memory space and compression efficiency, but this is not the
> kernel driver's to decide this, right ?
It is a reccomendation by the AFBC spec. As far as I understand, it
depends upon the implementor of the AFBC spec(ie dpu, gpu, vpu, etc)
to allow/disallow *X* formats for AFBC encoding/decoding.

> 
> For interoperability I'll understand recommending a minimal set
> of modifiers and formats. But here, each platform is also limited
> by it's GPU capabilites aswell.
Agreed

>
> Limiting to ABGR8888 would discard like every non-Android renderers,
> using AFBC, I'm not sure it's the kernels driver's responsibility.
I am not familiar with non-Android renderers.
> 
> > 
> > Thus, DRM_FORMAT_ABGR, DRM_FORMAT_BGR should only be allowed.
> >> +		line_stride = ((priv->viu.osd1_width << 5) + 127) >> 7;
> >> +		break;
> >> +	}
> >> +
> >> +	return ((line_stride + 1) >> 1) << 1;
> >> +}
> >> +
> >>  static void meson_plane_atomic_update(struct drm_plane *plane,
> >>  				      struct drm_plane_state *old_state)
> >>  {
> 
> [...]
> 
> >>  
> >> +static bool meson_plane_format_mod_supported(struct drm_plane *plane,
> >> +					     u32 format, u64 modifier)
> >> +{
> >> +	struct meson_plane *meson_plane = to_meson_plane(plane);
> >> +	struct meson_drm *priv = meson_plane->priv;
> >> +	int i;
> >> +
> >> +	if (modifier == DRM_FORMAT_MOD_INVALID)
> >> +		return false;
> >> +
> >> +	if (modifier == DRM_FORMAT_MOD_LINEAR)
> >> +		return true;
> >> +
> >> +	if (!meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) &&
> >> +	    !meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> >> +		return false;
> >> +
> >> +	if (modifier & ~DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
> >> +		return false;
> >> +
> >> +	for (i = 0 ; i < plane->modifier_count ; ++i)
> >> +		if (plane->modifiers[i] == modifier)
> >> +			break;
> >> +
> >> +	if (i == plane->modifier_count) {
> >> +		DRM_DEBUG_KMS("Unsupported modifier\n");
> >> +		return false;
> >> +	}
> 
> I can add a warn_once here, would it be enough ?
> 
> >> +
> >> +	if (priv->afbcd.ops && priv->afbcd.ops->supported_fmt)
> >> +		return priv->afbcd.ops->supported_fmt(modifier, format);
> >> +
> >> +	DRM_DEBUG_KMS("AFBC Unsupported\n");
> >> +	return false;
> >> +}
> >> +
> >>  static const struct drm_plane_funcs meson_plane_funcs = {
> >>  	.update_plane		= drm_atomic_helper_update_plane,
> >>  	.disable_plane		= drm_atomic_helper_disable_plane,
> >> @@ -353,6 +457,7 @@ static const struct drm_plane_funcs meson_plane_funcs = {
> >>  	.reset			= drm_atomic_helper_plane_reset,
> >>  	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
> >>  	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
> >> +	.format_mod_supported   = meson_plane_format_mod_supported,
> >>  };
> >>  
> >>  static const uint32_t supported_drm_formats[] = {
> >> @@ -364,10 +469,53 @@ static const uint32_t supported_drm_formats[] = {
> >>  	DRM_FORMAT_RGB565,
> >>  };
> >>  
> >> +static const uint64_t format_modifiers_afbc_gxm[] = {
> >> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> >> +				AFBC_FORMAT_MOD_SPARSE |
> >> +				AFBC_FORMAT_MOD_YTR),
> >> +	/* SPLIT mandates SPARSE, RGB modes mandates YTR */
> >> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> >> +				AFBC_FORMAT_MOD_YTR |
> >> +				AFBC_FORMAT_MOD_SPARSE |
> >> +				AFBC_FORMAT_MOD_SPLIT),
> >> +	DRM_FORMAT_MOD_LINEAR,
> >> +	DRM_FORMAT_MOD_INVALID,
> >> +};
> >> +
> >> +static const uint64_t format_modifiers_afbc_g12a[] = {
> >> +	/*
> >> +	 * - TOFIX Support AFBC modifiers for YUV formats (16x16 + TILED)
> >> +	 * - AFBC_FORMAT_MOD_YTR is mandatory since we only support RGB
> >> +	 * - SPLIT is mandatory for performances reasons when in 16x16
> >> +	 *   block size
> >> +	 * - 32x8 block size + SPLIT is mandatory with 4K frame size
> >> +	 *   for performances reasons
> >> +	 */
> >> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> >> +				AFBC_FORMAT_MOD_YTR |
> >> +				AFBC_FORMAT_MOD_SPARSE |
> >> +				AFBC_FORMAT_MOD_SPLIT),
> >> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> >> +				AFBC_FORMAT_MOD_YTR |
> >> +				AFBC_FORMAT_MOD_SPARSE),
> >> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> >> +				AFBC_FORMAT_MOD_YTR |
> >> +				AFBC_FORMAT_MOD_SPARSE |
> >> +				AFBC_FORMAT_MOD_SPLIT),
> >> +	DRM_FORMAT_MOD_LINEAR,
> >> +	DRM_FORMAT_MOD_INVALID,
> >> +};
> >> +
> >> +static const uint64_t format_modifiers_default[] = {
> >> +	DRM_FORMAT_MOD_LINEAR,
> >> +	DRM_FORMAT_MOD_INVALID,
> >> +};
> >> +
> >>  int meson_plane_create(struct meson_drm *priv)
> >>  {
> >>  	struct meson_plane *meson_plane;
> >>  	struct drm_plane *plane;
> >> +	const uint64_t *format_modifiers = format_modifiers_default;
> >>  
> >>  	meson_plane = devm_kzalloc(priv->drm->dev, sizeof(*meson_plane),
> >>  				   GFP_KERNEL);
> >> @@ -377,11 +525,16 @@ int meson_plane_create(struct meson_drm *priv)
> >>  	meson_plane->priv = priv;
> >>  	plane = &meson_plane->base;
> >>  
> >> +	if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
> >> +		format_modifiers = format_modifiers_afbc_gxm;
> >> +	else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> >> +		format_modifiers = format_modifiers_afbc_g12a;
> >> +
> >>  	drm_universal_plane_init(priv->drm, plane, 0xFF,
> >>  				 &meson_plane_funcs,
> >>  				 supported_drm_formats,
> >>  				 ARRAY_SIZE(supported_drm_formats),
> >> -				 NULL,
> >> +				 format_modifiers,
> >>  				 DRM_PLANE_TYPE_PRIMARY, "meson_primary_plane");
> >>  
> >>  	drm_plane_helper_add(plane, &meson_plane_helper_funcs);
> >> -- 
> >> 2.22.0
Daniel Vetter Oct. 11, 2019, 7:46 a.m. UTC | #4
On Thu, Oct 10, 2019 at 7:32 PM Ayan Halder <Ayan.Halder@arm.com> wrote:
>
> On Thu, Oct 10, 2019 at 03:41:15PM +0200, Neil Armstrong wrote:
> > Hi Ayan,
> >
> > On 10/10/2019 15:26, Ayan Halder wrote:
> > > On Thu, Oct 10, 2019 at 11:25:23AM +0200, Neil Armstrong wrote:
> > >> This adds all the OSD configuration plumbing to support the AFBC decoders
> > >> path to display of the OSD1 plane.
> > >>
> > >> The Amlogic GXM and G12A AFBC decoders are integrated very differently.
> > >>
> > >> The Amlogic GXM has a direct output path to the OSD1 VIU pixel input,
> > >> because the GXM AFBC decoder seem to be a custom IP developed by Amlogic.
> > >>
> > >> On the other side, the Amlogic G12A AFBC decoder seems to be an external
> > >> IP that emit pixels on an AXI master hooked to a "Mali Unpack" block
> > >> feeding the OSD1 VIU pixel input.
> > >> This uses a weird "0x1000000" internal HW physical address on both
> > >> sides to transfer the pixels.
> > >>
> > >> For Amlogic GXM, the supported pixel formats are the same as the normal
> > >> linear OSD1 mode.
> > >>
> > >> On the other side, Amlogic added support for all AFBC v1.2 formats for
> > >> the G12A AFBC integration.
> > >>
> > >> For simplicity, we stick to the already supported formats for now.
> > >>
> > >> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
> > >> ---
> > >>  drivers/gpu/drm/meson/meson_crtc.c  |   2 +
> > >>  drivers/gpu/drm/meson/meson_drv.h   |   4 +
> > >>  drivers/gpu/drm/meson/meson_plane.c | 215 ++++++++++++++++++++++++----
> > >>  3 files changed, 190 insertions(+), 31 deletions(-)
> > >>
> > >> diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
> > >> index 57ae1c13d1e6..d478fa232951 100644
> > >> --- a/drivers/gpu/drm/meson/meson_crtc.c
> > >> +++ b/drivers/gpu/drm/meson/meson_crtc.c
> > >> @@ -281,6 +281,8 @@ void meson_crtc_irq(struct meson_drm *priv)
> > >>    if (priv->viu.osd1_enabled && priv->viu.osd1_commit) {
> > >>            writel_relaxed(priv->viu.osd1_ctrl_stat,
> > >>                            priv->io_base + _REG(VIU_OSD1_CTRL_STAT));
> > >> +          writel_relaxed(priv->viu.osd1_ctrl_stat2,
> > >> +                          priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
> > >>            writel_relaxed(priv->viu.osd1_blk0_cfg[0],
> > >>                            priv->io_base + _REG(VIU_OSD1_BLK0_CFG_W0));
> > >>            writel_relaxed(priv->viu.osd1_blk0_cfg[1],
> > >> diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
> > >> index 60f13c6f34e5..de25349be8aa 100644
> > >> --- a/drivers/gpu/drm/meson/meson_drv.h
> > >> +++ b/drivers/gpu/drm/meson/meson_drv.h
> > >> @@ -53,8 +53,12 @@ struct meson_drm {
> > >>            bool osd1_enabled;
> > >>            bool osd1_interlace;
> > >>            bool osd1_commit;
> > >> +          bool osd1_afbcd;
> > >>            uint32_t osd1_ctrl_stat;
> > >> +          uint32_t osd1_ctrl_stat2;
> > >>            uint32_t osd1_blk0_cfg[5];
> > >> +          uint32_t osd1_blk1_cfg4;
> > >> +          uint32_t osd1_blk2_cfg4;
> > >>            uint32_t osd1_addr;
> > >>            uint32_t osd1_stride;
> > >>            uint32_t osd1_height;
> > >> diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
> > >> index 5e798c276037..412941aa8402 100644
> > >> --- a/drivers/gpu/drm/meson/meson_plane.c
> > >> +++ b/drivers/gpu/drm/meson/meson_plane.c
> > >> @@ -23,6 +23,7 @@
> > >>  #include "meson_plane.h"
> > >>  #include "meson_registers.h"
> > >>  #include "meson_viu.h"
> > >> +#include "meson_osd_afbcd.h"
> > >>
> > >>  /* OSD_SCI_WH_M1 */
> > >>  #define SCI_WH_M1_W(w)                    FIELD_PREP(GENMASK(28, 16), w)
> > >> @@ -92,12 +93,38 @@ static int meson_plane_atomic_check(struct drm_plane *plane,
> > >>                                               false, true);
> > >>  }
> > >>
> > >> +#define MESON_MOD_AFBC_VALID_BITS (AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |     \
> > >> +                             AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |    \
> > >> +                             AFBC_FORMAT_MOD_YTR |                \
> > >> +                             AFBC_FORMAT_MOD_SPARSE |             \
> > >> +                             AFBC_FORMAT_MOD_SPLIT)
> > >> +
> > >>  /* Takes a fixed 16.16 number and converts it to integer. */
> > >>  static inline int64_t fixed16_to_int(int64_t value)
> > >>  {
> > >>    return value >> 16;
> > >>  }
> > >>
> > >> +static u32 meson_g12a_afbcd_line_stride(struct meson_drm *priv)
> > >> +{
> > >> +  u32 line_stride = 0;
> > >> +
> > >> +  switch (priv->afbcd.format) {
> > >> +  case DRM_FORMAT_RGB565:
> > >> +          line_stride = ((priv->viu.osd1_width << 4) + 127) >> 7;
> > >> +          break;
> > >> +  case DRM_FORMAT_RGB888:
> > >> +  case DRM_FORMAT_XRGB8888:
> > >> +  case DRM_FORMAT_ARGB8888:
> > >> +  case DRM_FORMAT_XBGR8888:
> > >> +  case DRM_FORMAT_ABGR8888:
> > > Please have a look at
> > > https://www.kernel.org/doc/html/latest/gpu/afbc.html for our
> > > recommendation. We suggest that *X* formats are avoided.
> > >
> > > Also, for interoperability and maximum compression efficiency (with
> > > AFBC_FORMAT_MOD_YTR), we suggest the following order :-
> > >
> > >         Component 0: R
> > >         Component 1: G
> > >         Component 2: B
> > >         Component 3: A (if available)
> >
> >
> > Sorry I don't understand, you ask me to limit AFBC to ABGR8888 ?
>
> Apologies for the confusion, as per the link, the formats which are
> suggested with AFBC_FORMAT_MOD_YTR are the BGR/ABGR formats (as
> listed in the 'AFBC formats' table)
>
> Thus, any other permutation of the components might make it incompatible
> with some other AFBC producers/consumers.

Uh, that's not how this is supposed to be used. Drivers are meant to
expose _everything_ they support (bonus if you roughly sort it in
preference order). Userspace then computes the intersection of
modifiers/formats supported by all devices it needs to share a buffer
with. Allowing that was the entire point of modifiers, if we
artificially limit to the common denominator we're back "only linear
works everywhere".
-Daniel

>
> >
> > But why if the HW (GPU and DPU) is capable of ?
> >
> > Isn't it an userspace choice ? I understand XRGB8888 is a waste
> > of memory space and compression efficiency, but this is not the
> > kernel driver's to decide this, right ?
> It is a reccomendation by the AFBC spec. As far as I understand, it
> depends upon the implementor of the AFBC spec(ie dpu, gpu, vpu, etc)
> to allow/disallow *X* formats for AFBC encoding/decoding.
>
> >
> > For interoperability I'll understand recommending a minimal set
> > of modifiers and formats. But here, each platform is also limited
> > by it's GPU capabilites aswell.
> Agreed
>
> >
> > Limiting to ABGR8888 would discard like every non-Android renderers,
> > using AFBC, I'm not sure it's the kernels driver's responsibility.
> I am not familiar with non-Android renderers.
> >
> > >
> > > Thus, DRM_FORMAT_ABGR, DRM_FORMAT_BGR should only be allowed.
> > >> +          line_stride = ((priv->viu.osd1_width << 5) + 127) >> 7;
> > >> +          break;
> > >> +  }
> > >> +
> > >> +  return ((line_stride + 1) >> 1) << 1;
> > >> +}
> > >> +
> > >>  static void meson_plane_atomic_update(struct drm_plane *plane,
> > >>                                  struct drm_plane_state *old_state)
> > >>  {
> >
> > [...]
> >
> > >>
> > >> +static bool meson_plane_format_mod_supported(struct drm_plane *plane,
> > >> +                                       u32 format, u64 modifier)
> > >> +{
> > >> +  struct meson_plane *meson_plane = to_meson_plane(plane);
> > >> +  struct meson_drm *priv = meson_plane->priv;
> > >> +  int i;
> > >> +
> > >> +  if (modifier == DRM_FORMAT_MOD_INVALID)
> > >> +          return false;
> > >> +
> > >> +  if (modifier == DRM_FORMAT_MOD_LINEAR)
> > >> +          return true;
> > >> +
> > >> +  if (!meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) &&
> > >> +      !meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> > >> +          return false;
> > >> +
> > >> +  if (modifier & ~DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
> > >> +          return false;
> > >> +
> > >> +  for (i = 0 ; i < plane->modifier_count ; ++i)
> > >> +          if (plane->modifiers[i] == modifier)
> > >> +                  break;
> > >> +
> > >> +  if (i == plane->modifier_count) {
> > >> +          DRM_DEBUG_KMS("Unsupported modifier\n");
> > >> +          return false;
> > >> +  }
> >
> > I can add a warn_once here, would it be enough ?
> >
> > >> +
> > >> +  if (priv->afbcd.ops && priv->afbcd.ops->supported_fmt)
> > >> +          return priv->afbcd.ops->supported_fmt(modifier, format);
> > >> +
> > >> +  DRM_DEBUG_KMS("AFBC Unsupported\n");
> > >> +  return false;
> > >> +}
> > >> +
> > >>  static const struct drm_plane_funcs meson_plane_funcs = {
> > >>    .update_plane           = drm_atomic_helper_update_plane,
> > >>    .disable_plane          = drm_atomic_helper_disable_plane,
> > >> @@ -353,6 +457,7 @@ static const struct drm_plane_funcs meson_plane_funcs = {
> > >>    .reset                  = drm_atomic_helper_plane_reset,
> > >>    .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
> > >>    .atomic_destroy_state   = drm_atomic_helper_plane_destroy_state,
> > >> +  .format_mod_supported   = meson_plane_format_mod_supported,
> > >>  };
> > >>
> > >>  static const uint32_t supported_drm_formats[] = {
> > >> @@ -364,10 +469,53 @@ static const uint32_t supported_drm_formats[] = {
> > >>    DRM_FORMAT_RGB565,
> > >>  };
> > >>
> > >> +static const uint64_t format_modifiers_afbc_gxm[] = {
> > >> +  DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> > >> +                          AFBC_FORMAT_MOD_SPARSE |
> > >> +                          AFBC_FORMAT_MOD_YTR),
> > >> +  /* SPLIT mandates SPARSE, RGB modes mandates YTR */
> > >> +  DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> > >> +                          AFBC_FORMAT_MOD_YTR |
> > >> +                          AFBC_FORMAT_MOD_SPARSE |
> > >> +                          AFBC_FORMAT_MOD_SPLIT),
> > >> +  DRM_FORMAT_MOD_LINEAR,
> > >> +  DRM_FORMAT_MOD_INVALID,
> > >> +};
> > >> +
> > >> +static const uint64_t format_modifiers_afbc_g12a[] = {
> > >> +  /*
> > >> +   * - TOFIX Support AFBC modifiers for YUV formats (16x16 + TILED)
> > >> +   * - AFBC_FORMAT_MOD_YTR is mandatory since we only support RGB
> > >> +   * - SPLIT is mandatory for performances reasons when in 16x16
> > >> +   *   block size
> > >> +   * - 32x8 block size + SPLIT is mandatory with 4K frame size
> > >> +   *   for performances reasons
> > >> +   */
> > >> +  DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> > >> +                          AFBC_FORMAT_MOD_YTR |
> > >> +                          AFBC_FORMAT_MOD_SPARSE |
> > >> +                          AFBC_FORMAT_MOD_SPLIT),
> > >> +  DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> > >> +                          AFBC_FORMAT_MOD_YTR |
> > >> +                          AFBC_FORMAT_MOD_SPARSE),
> > >> +  DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> > >> +                          AFBC_FORMAT_MOD_YTR |
> > >> +                          AFBC_FORMAT_MOD_SPARSE |
> > >> +                          AFBC_FORMAT_MOD_SPLIT),
> > >> +  DRM_FORMAT_MOD_LINEAR,
> > >> +  DRM_FORMAT_MOD_INVALID,
> > >> +};
> > >> +
> > >> +static const uint64_t format_modifiers_default[] = {
> > >> +  DRM_FORMAT_MOD_LINEAR,
> > >> +  DRM_FORMAT_MOD_INVALID,
> > >> +};
> > >> +
> > >>  int meson_plane_create(struct meson_drm *priv)
> > >>  {
> > >>    struct meson_plane *meson_plane;
> > >>    struct drm_plane *plane;
> > >> +  const uint64_t *format_modifiers = format_modifiers_default;
> > >>
> > >>    meson_plane = devm_kzalloc(priv->drm->dev, sizeof(*meson_plane),
> > >>                               GFP_KERNEL);
> > >> @@ -377,11 +525,16 @@ int meson_plane_create(struct meson_drm *priv)
> > >>    meson_plane->priv = priv;
> > >>    plane = &meson_plane->base;
> > >>
> > >> +  if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
> > >> +          format_modifiers = format_modifiers_afbc_gxm;
> > >> +  else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> > >> +          format_modifiers = format_modifiers_afbc_g12a;
> > >> +
> > >>    drm_universal_plane_init(priv->drm, plane, 0xFF,
> > >>                             &meson_plane_funcs,
> > >>                             supported_drm_formats,
> > >>                             ARRAY_SIZE(supported_drm_formats),
> > >> -                           NULL,
> > >> +                           format_modifiers,
> > >>                             DRM_PLANE_TYPE_PRIMARY, "meson_primary_plane");
> > >>
> > >>    drm_plane_helper_add(plane, &meson_plane_helper_funcs);
> > >> --
> > >> 2.22.0
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Daniel Stone Oct. 11, 2019, 7:56 a.m. UTC | #5
Hi,

On Fri, 11 Oct 2019 at 08:46, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Thu, Oct 10, 2019 at 7:32 PM Ayan Halder <Ayan.Halder@arm.com> wrote:
> > On Thu, Oct 10, 2019 at 03:41:15PM +0200, Neil Armstrong wrote:
> > > Sorry I don't understand, you ask me to limit AFBC to ABGR8888 ?
> >
> > Apologies for the confusion, as per the link, the formats which are
> > suggested with AFBC_FORMAT_MOD_YTR are the BGR/ABGR formats (as
> > listed in the 'AFBC formats' table)
> >
> > Thus, any other permutation of the components might make it incompatible
> > with some other AFBC producers/consumers.
>
> Uh, that's not how this is supposed to be used. Drivers are meant to
> expose _everything_ they support (bonus if you roughly sort it in
> preference order). Userspace then computes the intersection of
> modifiers/formats supported by all devices it needs to share a buffer
> with. Allowing that was the entire point of modifiers, if we
> artificially limit to the common denominator we're back "only linear
> works everywhere".

Correct.

A lot of userspace will select for format first, then find a modifier
which can be used with that format. If userspace has specific
knowledge of AFBC and decides that it prefers to use AFBC so will seek
out an alpha format - and make sure everyone fills the channel solid -
then that's fine. But that's putting the cart before the horse.

Not exposing XRGB8888 on the primary plane will break a lot of
userspace, so in this case AFBC would just never really be used.

Cheers,
Daniel
Brian Starkey Oct. 11, 2019, 8:41 a.m. UTC | #6
Hi Neil,

On Thu, Oct 10, 2019 at 03:41:15PM +0200, Neil Armstrong wrote:
> Hi Ayan,
> 
> On 10/10/2019 15:26, Ayan Halder wrote:
> > On Thu, Oct 10, 2019 at 11:25:23AM +0200, Neil Armstrong wrote:
> >> This adds all the OSD configuration plumbing to support the AFBC decoders
> >> path to display of the OSD1 plane.
> >>
> >> The Amlogic GXM and G12A AFBC decoders are integrated very differently.
> >>
> >> The Amlogic GXM has a direct output path to the OSD1 VIU pixel input,
> >> because the GXM AFBC decoder seem to be a custom IP developed by Amlogic.
> >>
> >> On the other side, the Amlogic G12A AFBC decoder seems to be an external
> >> IP that emit pixels on an AXI master hooked to a "Mali Unpack" block
> >> feeding the OSD1 VIU pixel input.
> >> This uses a weird "0x1000000" internal HW physical address on both
> >> sides to transfer the pixels.
> >>
> >> For Amlogic GXM, the supported pixel formats are the same as the normal
> >> linear OSD1 mode.
> >>
> >> On the other side, Amlogic added support for all AFBC v1.2 formats for
> >> the G12A AFBC integration.
> >>
> >> For simplicity, we stick to the already supported formats for now.
> >>
> >> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
> >> ---
> >>  drivers/gpu/drm/meson/meson_crtc.c  |   2 +
> >>  drivers/gpu/drm/meson/meson_drv.h   |   4 +
> >>  drivers/gpu/drm/meson/meson_plane.c | 215 ++++++++++++++++++++++++----
> >>  3 files changed, 190 insertions(+), 31 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
> >> index 57ae1c13d1e6..d478fa232951 100644
> >> --- a/drivers/gpu/drm/meson/meson_crtc.c
> >> +++ b/drivers/gpu/drm/meson/meson_crtc.c
> >> @@ -281,6 +281,8 @@ void meson_crtc_irq(struct meson_drm *priv)
> >>  	if (priv->viu.osd1_enabled && priv->viu.osd1_commit) {
> >>  		writel_relaxed(priv->viu.osd1_ctrl_stat,
> >>  				priv->io_base + _REG(VIU_OSD1_CTRL_STAT));
> >> +		writel_relaxed(priv->viu.osd1_ctrl_stat2,
> >> +				priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
> >>  		writel_relaxed(priv->viu.osd1_blk0_cfg[0],
> >>  				priv->io_base + _REG(VIU_OSD1_BLK0_CFG_W0));
> >>  		writel_relaxed(priv->viu.osd1_blk0_cfg[1],
> >> diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
> >> index 60f13c6f34e5..de25349be8aa 100644
> >> --- a/drivers/gpu/drm/meson/meson_drv.h
> >> +++ b/drivers/gpu/drm/meson/meson_drv.h
> >> @@ -53,8 +53,12 @@ struct meson_drm {
> >>  		bool osd1_enabled;
> >>  		bool osd1_interlace;
> >>  		bool osd1_commit;
> >> +		bool osd1_afbcd;
> >>  		uint32_t osd1_ctrl_stat;
> >> +		uint32_t osd1_ctrl_stat2;
> >>  		uint32_t osd1_blk0_cfg[5];
> >> +		uint32_t osd1_blk1_cfg4;
> >> +		uint32_t osd1_blk2_cfg4;
> >>  		uint32_t osd1_addr;
> >>  		uint32_t osd1_stride;
> >>  		uint32_t osd1_height;
> >> diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
> >> index 5e798c276037..412941aa8402 100644
> >> --- a/drivers/gpu/drm/meson/meson_plane.c
> >> +++ b/drivers/gpu/drm/meson/meson_plane.c
> >> @@ -23,6 +23,7 @@
> >>  #include "meson_plane.h"
> >>  #include "meson_registers.h"
> >>  #include "meson_viu.h"
> >> +#include "meson_osd_afbcd.h"
> >>  
> >>  /* OSD_SCI_WH_M1 */
> >>  #define SCI_WH_M1_W(w)			FIELD_PREP(GENMASK(28, 16), w)
> >> @@ -92,12 +93,38 @@ static int meson_plane_atomic_check(struct drm_plane *plane,
> >>  						   false, true);
> >>  }
> >>  
> >> +#define MESON_MOD_AFBC_VALID_BITS (AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |	\
> >> +				   AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |	\
> >> +				   AFBC_FORMAT_MOD_YTR |		\
> >> +				   AFBC_FORMAT_MOD_SPARSE |		\
> >> +				   AFBC_FORMAT_MOD_SPLIT)
> >> +
> >>  /* Takes a fixed 16.16 number and converts it to integer. */
> >>  static inline int64_t fixed16_to_int(int64_t value)
> >>  {
> >>  	return value >> 16;
> >>  }
> >>  
> >> +static u32 meson_g12a_afbcd_line_stride(struct meson_drm *priv)
> >> +{
> >> +	u32 line_stride = 0;
> >> +
> >> +	switch (priv->afbcd.format) {
> >> +	case DRM_FORMAT_RGB565:
> >> +		line_stride = ((priv->viu.osd1_width << 4) + 127) >> 7;
> >> +		break;
> >> +	case DRM_FORMAT_RGB888:
> >> +	case DRM_FORMAT_XRGB8888:
> >> +	case DRM_FORMAT_ARGB8888:
> >> +	case DRM_FORMAT_XBGR8888:
> >> +	case DRM_FORMAT_ABGR8888:
> > Please have a look at
> > https://www.kernel.org/doc/html/latest/gpu/afbc.html for our
> > recommendation. We suggest that *X* formats are avoided.
> > 
> > Also, for interoperability and maximum compression efficiency (with
> > AFBC_FORMAT_MOD_YTR), we suggest the following order :-
> > 
> >         Component 0: R
> >         Component 1: G
> >         Component 2: B
> >         Component 3: A (if available)
> 
> 
> Sorry I don't understand, you ask me to limit AFBC to ABGR8888 ?
> 
> But why if the HW (GPU and DPU) is capable of ?

AFBC doesn't have an in-memory component order in the traditional
sense (i.e. a bit-position to component mapping), so Arm
have decided to define the convention that DRM_FORMAT_ABGR8888
represents the AFBC layout with R in component 0.

Are you sure the GPU supports other orders? I think any Arm driver
will only be producing DRM_FORMATs with "BGR" order e.g. ABGR8888.

I'm not convinced the GPU HW actually supports any other order, but
it's all rather confusing with texture swizzling. What I can tell you
for sure is that it _does_ support BGR order (in DRM naming
convention).

If you do choose to expose orders other than BGR/ABGR, then you should
certainly not allow YTR to be used with any orders other than
BGR/ABGR. The AFBC spec defines YTR as using R in component 0, which
Arm has defined as DRM_FORMAT_*BGR* (component 0 in LE LSBs).

> 
> Isn't it an userspace choice ? I understand XRGB8888 is a waste
> of memory space and compression efficiency, but this is not the
> kernel driver's to decide this, right ?
> 

As long as it's agreed and understood what XRGB8888 means. It must be
an AFBC bitstream with 4-components, with B in component 0, G in
component 1, R in component 2 and 8 wasted bits in component 3.

I know of HW which treats "XBGR" with AFBC as a 3-component format,
which isn't correct but can easily lead to confusion and
incompatibility.

> For interoperability I'll understand recommending a minimal set
> of modifiers and formats. But here, each platform is also limited
> by it's GPU capabilites aswell.
> 

The (Arm) GPUs support ABGR ordering, so if everyone sticks to that we
can make sure everything's nice and compatible (until someone turns up
with HW which _doesn't_ support that ordering).

> Limiting to ABGR8888 would discard like every non-Android renderers,
> using AFBC, I'm not sure it's the kernels driver's responsibility.
> 

It prevents renderers with hard-coded pixel formats, perhaps. But
those are already fragile by nature, surely?

Cheers,
-Brian

> > 
> > Thus, DRM_FORMAT_ABGR, DRM_FORMAT_BGR should only be allowed.
> >> +		line_stride = ((priv->viu.osd1_width << 5) + 127) >> 7;
> >> +		break;
> >> +	}
> >> +
> >> +	return ((line_stride + 1) >> 1) << 1;
> >> +}
> >> +
> >>  static void meson_plane_atomic_update(struct drm_plane *plane,
> >>  				      struct drm_plane_state *old_state)
> >>  {
> 
> [...]
> 
> >>  
> >> +static bool meson_plane_format_mod_supported(struct drm_plane *plane,
> >> +					     u32 format, u64 modifier)
> >> +{
> >> +	struct meson_plane *meson_plane = to_meson_plane(plane);
> >> +	struct meson_drm *priv = meson_plane->priv;
> >> +	int i;
> >> +
> >> +	if (modifier == DRM_FORMAT_MOD_INVALID)
> >> +		return false;
> >> +
> >> +	if (modifier == DRM_FORMAT_MOD_LINEAR)
> >> +		return true;
> >> +
> >> +	if (!meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) &&
> >> +	    !meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> >> +		return false;
> >> +
> >> +	if (modifier & ~DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
> >> +		return false;
> >> +
> >> +	for (i = 0 ; i < plane->modifier_count ; ++i)
> >> +		if (plane->modifiers[i] == modifier)
> >> +			break;
> >> +
> >> +	if (i == plane->modifier_count) {
> >> +		DRM_DEBUG_KMS("Unsupported modifier\n");
> >> +		return false;
> >> +	}
> 
> I can add a warn_once here, would it be enough ?
> 
> >> +
> >> +	if (priv->afbcd.ops && priv->afbcd.ops->supported_fmt)
> >> +		return priv->afbcd.ops->supported_fmt(modifier, format);
> >> +
> >> +	DRM_DEBUG_KMS("AFBC Unsupported\n");
> >> +	return false;
> >> +}
> >> +
> >>  static const struct drm_plane_funcs meson_plane_funcs = {
> >>  	.update_plane		= drm_atomic_helper_update_plane,
> >>  	.disable_plane		= drm_atomic_helper_disable_plane,
> >> @@ -353,6 +457,7 @@ static const struct drm_plane_funcs meson_plane_funcs = {
> >>  	.reset			= drm_atomic_helper_plane_reset,
> >>  	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
> >>  	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
> >> +	.format_mod_supported   = meson_plane_format_mod_supported,
> >>  };
> >>  
> >>  static const uint32_t supported_drm_formats[] = {
> >> @@ -364,10 +469,53 @@ static const uint32_t supported_drm_formats[] = {
> >>  	DRM_FORMAT_RGB565,
> >>  };
> >>  
> >> +static const uint64_t format_modifiers_afbc_gxm[] = {
> >> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> >> +				AFBC_FORMAT_MOD_SPARSE |
> >> +				AFBC_FORMAT_MOD_YTR),
> >> +	/* SPLIT mandates SPARSE, RGB modes mandates YTR */
> >> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> >> +				AFBC_FORMAT_MOD_YTR |
> >> +				AFBC_FORMAT_MOD_SPARSE |
> >> +				AFBC_FORMAT_MOD_SPLIT),
> >> +	DRM_FORMAT_MOD_LINEAR,
> >> +	DRM_FORMAT_MOD_INVALID,
> >> +};
> >> +
> >> +static const uint64_t format_modifiers_afbc_g12a[] = {
> >> +	/*
> >> +	 * - TOFIX Support AFBC modifiers for YUV formats (16x16 + TILED)
> >> +	 * - AFBC_FORMAT_MOD_YTR is mandatory since we only support RGB
> >> +	 * - SPLIT is mandatory for performances reasons when in 16x16
> >> +	 *   block size
> >> +	 * - 32x8 block size + SPLIT is mandatory with 4K frame size
> >> +	 *   for performances reasons
> >> +	 */
> >> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> >> +				AFBC_FORMAT_MOD_YTR |
> >> +				AFBC_FORMAT_MOD_SPARSE |
> >> +				AFBC_FORMAT_MOD_SPLIT),
> >> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> >> +				AFBC_FORMAT_MOD_YTR |
> >> +				AFBC_FORMAT_MOD_SPARSE),
> >> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> >> +				AFBC_FORMAT_MOD_YTR |
> >> +				AFBC_FORMAT_MOD_SPARSE |
> >> +				AFBC_FORMAT_MOD_SPLIT),
> >> +	DRM_FORMAT_MOD_LINEAR,
> >> +	DRM_FORMAT_MOD_INVALID,
> >> +};
> >> +
> >> +static const uint64_t format_modifiers_default[] = {
> >> +	DRM_FORMAT_MOD_LINEAR,
> >> +	DRM_FORMAT_MOD_INVALID,
> >> +};
> >> +
> >>  int meson_plane_create(struct meson_drm *priv)
> >>  {
> >>  	struct meson_plane *meson_plane;
> >>  	struct drm_plane *plane;
> >> +	const uint64_t *format_modifiers = format_modifiers_default;
> >>  
> >>  	meson_plane = devm_kzalloc(priv->drm->dev, sizeof(*meson_plane),
> >>  				   GFP_KERNEL);
> >> @@ -377,11 +525,16 @@ int meson_plane_create(struct meson_drm *priv)
> >>  	meson_plane->priv = priv;
> >>  	plane = &meson_plane->base;
> >>  
> >> +	if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
> >> +		format_modifiers = format_modifiers_afbc_gxm;
> >> +	else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> >> +		format_modifiers = format_modifiers_afbc_g12a;
> >> +
> >>  	drm_universal_plane_init(priv->drm, plane, 0xFF,
> >>  				 &meson_plane_funcs,
> >>  				 supported_drm_formats,
> >>  				 ARRAY_SIZE(supported_drm_formats),
> >> -				 NULL,
> >> +				 format_modifiers,
> >>  				 DRM_PLANE_TYPE_PRIMARY, "meson_primary_plane");
> >>  
> >>  	drm_plane_helper_add(plane, &meson_plane_helper_funcs);
> >> -- 
> >> 2.22.0
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Neil Armstrong Oct. 11, 2019, 9:14 a.m. UTC | #7
Hi Brian,

On 11/10/2019 10:41, Brian Starkey wrote:
> Hi Neil,
> 
> On Thu, Oct 10, 2019 at 03:41:15PM +0200, Neil Armstrong wrote:
>> Hi Ayan,
>>
>> On 10/10/2019 15:26, Ayan Halder wrote:
>>> On Thu, Oct 10, 2019 at 11:25:23AM +0200, Neil Armstrong wrote:
>>>> This adds all the OSD configuration plumbing to support the AFBC decoders
>>>> path to display of the OSD1 plane.
>>>>
>>>> The Amlogic GXM and G12A AFBC decoders are integrated very differently.
>>>>
>>>> The Amlogic GXM has a direct output path to the OSD1 VIU pixel input,
>>>> because the GXM AFBC decoder seem to be a custom IP developed by Amlogic.
>>>>
>>>> On the other side, the Amlogic G12A AFBC decoder seems to be an external
>>>> IP that emit pixels on an AXI master hooked to a "Mali Unpack" block
>>>> feeding the OSD1 VIU pixel input.
>>>> This uses a weird "0x1000000" internal HW physical address on both
>>>> sides to transfer the pixels.
>>>>
>>>> For Amlogic GXM, the supported pixel formats are the same as the normal
>>>> linear OSD1 mode.
>>>>
>>>> On the other side, Amlogic added support for all AFBC v1.2 formats for
>>>> the G12A AFBC integration.
>>>>
>>>> For simplicity, we stick to the already supported formats for now.
>>>>
>>>> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
>>>> ---
>>>>  drivers/gpu/drm/meson/meson_crtc.c  |   2 +
>>>>  drivers/gpu/drm/meson/meson_drv.h   |   4 +
>>>>  drivers/gpu/drm/meson/meson_plane.c | 215 ++++++++++++++++++++++++----
>>>>  3 files changed, 190 insertions(+), 31 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
>>>> index 57ae1c13d1e6..d478fa232951 100644
>>>> --- a/drivers/gpu/drm/meson/meson_crtc.c
>>>> +++ b/drivers/gpu/drm/meson/meson_crtc.c
>>>> @@ -281,6 +281,8 @@ void meson_crtc_irq(struct meson_drm *priv)
>>>>  	if (priv->viu.osd1_enabled && priv->viu.osd1_commit) {
>>>>  		writel_relaxed(priv->viu.osd1_ctrl_stat,
>>>>  				priv->io_base + _REG(VIU_OSD1_CTRL_STAT));
>>>> +		writel_relaxed(priv->viu.osd1_ctrl_stat2,
>>>> +				priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
>>>>  		writel_relaxed(priv->viu.osd1_blk0_cfg[0],
>>>>  				priv->io_base + _REG(VIU_OSD1_BLK0_CFG_W0));
>>>>  		writel_relaxed(priv->viu.osd1_blk0_cfg[1],
>>>> diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
>>>> index 60f13c6f34e5..de25349be8aa 100644
>>>> --- a/drivers/gpu/drm/meson/meson_drv.h
>>>> +++ b/drivers/gpu/drm/meson/meson_drv.h
>>>> @@ -53,8 +53,12 @@ struct meson_drm {
>>>>  		bool osd1_enabled;
>>>>  		bool osd1_interlace;
>>>>  		bool osd1_commit;
>>>> +		bool osd1_afbcd;
>>>>  		uint32_t osd1_ctrl_stat;
>>>> +		uint32_t osd1_ctrl_stat2;
>>>>  		uint32_t osd1_blk0_cfg[5];
>>>> +		uint32_t osd1_blk1_cfg4;
>>>> +		uint32_t osd1_blk2_cfg4;
>>>>  		uint32_t osd1_addr;
>>>>  		uint32_t osd1_stride;
>>>>  		uint32_t osd1_height;
>>>> diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
>>>> index 5e798c276037..412941aa8402 100644
>>>> --- a/drivers/gpu/drm/meson/meson_plane.c
>>>> +++ b/drivers/gpu/drm/meson/meson_plane.c
>>>> @@ -23,6 +23,7 @@
>>>>  #include "meson_plane.h"
>>>>  #include "meson_registers.h"
>>>>  #include "meson_viu.h"
>>>> +#include "meson_osd_afbcd.h"
>>>>  
>>>>  /* OSD_SCI_WH_M1 */
>>>>  #define SCI_WH_M1_W(w)			FIELD_PREP(GENMASK(28, 16), w)
>>>> @@ -92,12 +93,38 @@ static int meson_plane_atomic_check(struct drm_plane *plane,
>>>>  						   false, true);
>>>>  }
>>>>  
>>>> +#define MESON_MOD_AFBC_VALID_BITS (AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |	\
>>>> +				   AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |	\
>>>> +				   AFBC_FORMAT_MOD_YTR |		\
>>>> +				   AFBC_FORMAT_MOD_SPARSE |		\
>>>> +				   AFBC_FORMAT_MOD_SPLIT)
>>>> +
>>>>  /* Takes a fixed 16.16 number and converts it to integer. */
>>>>  static inline int64_t fixed16_to_int(int64_t value)
>>>>  {
>>>>  	return value >> 16;
>>>>  }
>>>>  
>>>> +static u32 meson_g12a_afbcd_line_stride(struct meson_drm *priv)
>>>> +{
>>>> +	u32 line_stride = 0;
>>>> +
>>>> +	switch (priv->afbcd.format) {
>>>> +	case DRM_FORMAT_RGB565:
>>>> +		line_stride = ((priv->viu.osd1_width << 4) + 127) >> 7;
>>>> +		break;
>>>> +	case DRM_FORMAT_RGB888:
>>>> +	case DRM_FORMAT_XRGB8888:
>>>> +	case DRM_FORMAT_ARGB8888:
>>>> +	case DRM_FORMAT_XBGR8888:
>>>> +	case DRM_FORMAT_ABGR8888:
>>> Please have a look at
>>> https://www.kernel.org/doc/html/latest/gpu/afbc.html for our
>>> recommendation. We suggest that *X* formats are avoided.
>>>
>>> Also, for interoperability and maximum compression efficiency (with
>>> AFBC_FORMAT_MOD_YTR), we suggest the following order :-
>>>
>>>         Component 0: R
>>>         Component 1: G
>>>         Component 2: B
>>>         Component 3: A (if available)
>>
>>
>> Sorry I don't understand, you ask me to limit AFBC to ABGR8888 ?
>>
>> But why if the HW (GPU and DPU) is capable of ?
> 
> AFBC doesn't have an in-memory component order in the traditional
> sense (i.e. a bit-position to component mapping), so Arm
> have decided to define the convention that DRM_FORMAT_ABGR8888
> represents the AFBC layout with R in component 0.

In this implementation, we handle the ARGB/ABGR as the same mode
since the AFBC can only represent the layout as "ABGR" anyway.

> 
> Are you sure the GPU supports other orders? I think any Arm driver
> will only be producing DRM_FORMATs with "BGR" order e.g. ABGR888>
> I'm not convinced the GPU HW actually supports any other order, but
> it's all rather confusing with texture swizzling. What I can tell you
> for sure is that it _does_ support BGR order (in DRM naming
> convention).

Well, since the Bifrost Mali blobs are closed-source and delivered
by licensees, it's hard to define what is supported from a closed
GPU HW, closed SW implementation to a closed pixel format implementation.

You'll have to tell us if the closed libMali handling AFBC would accept
ARGB8888 as format to render with AFBC enabled, if not you're right
I'll discard XRGB8888/ARGB8888 for AFBC buffers completely.

But it the libMali chooses tt generate an ARGB8888 buffer whatever
ARGB8888/XRGB8888/ABGR888/XBGR888 is asked, then no I'll keep it that way.

BTW I kept the vendor implementation here, which may be wrong but since
they have the AFBC IP license and Mali Bifrost GPU license...

> 
> If you do choose to expose orders other than BGR/ABGR, then you should
> certainly not allow YTR to be used with any orders other than
> BGR/ABGR. The AFBC spec defines YTR as using R in component 0, which
> Arm has defined as DRM_FORMAT_*BGR* (component 0 in LE LSBs).
> 

The MAFBC_FMT_RGBA8888 pixel format is defined in the AFBC decoder,
which seems to be an ARM IP, the registers documentation is in the
SoC datasheet at [1] and the formats bits are defined in the patch 3 at [2].

So it seems the decoder handles only a single type for 32bit RGB buffer
format, as Amlogic names it MAFBC_FMT_RGBA8888

For XRGB8888/XBGR8888 we simply "replace" the A component with a fixed
value in the pixel generator.

[1] https://dl.khadas.com/Hardware/VIM3/Datasheet/S905D3_datasheet_0.2_Wesion.pdf page 772
[2] https://patchwork.freedesktop.org/patch/335199/?series=67832&rev=1

>>
>> Isn't it an userspace choice ? I understand XRGB8888 is a waste
>> of memory space and compression efficiency, but this is not the
>> kernel driver's to decide this, right ?
>>
> 
> As long as it's agreed and understood what XRGB8888 means. It must be
> an AFBC bitstream with 4-components, with B in component 0, G in
> component 1, R in component 2 and 8 wasted bits in component 3.

Yes, but this is something userspace must assume, and it's already
wasted in the linear XRGB8888 format anyway.

> 
> I know of HW which treats "XBGR" with AFBC as a 3-component format,
> which isn't correct but can easily lead to confusion and
> incompatibility.

Seems it's not the case here, at least for the G12A SoC family.

> 
>> For interoperability I'll understand recommending a minimal set
>> of modifiers and formats. But here, each platform is also limited
>> by it's GPU capabilites aswell.
>>
> 
> The (Arm) GPUs support ABGR ordering, so if everyone sticks to that we
> can make sure everything's nice and compatible (until someone turns up
> with HW which _doesn't_ support that ordering).

This is not clean enough in the https://www.kernel.org/doc/html/latest/gpu/afbc.html
document. Since ARM is in control of the renderers, saying AFBC does _not_
support another components format as ABGR ordering in all the
OpenGL ES/Vulkan implementations, it would be clear we couldn't render
anything using AFBC with ARGB.
But we hit the closed-source/closed-specifications here again.

> 
>> Limiting to ABGR8888 would discard like every non-Android renderers,
>> using AFBC, I'm not sure it's the kernels driver's responsibility.
>>
> 
> It prevents renderers with hard-coded pixel formats, perhaps. But
> those are already fragile by nature, surely?

Well, except Android, all the other renderers uses ARGB8888/XRGB8888,
as fixed pixel format, which is quite a large amount of code.


Anyway, thanks for these technical clarifications, it makes things
much more clearer.

Neil

> 
> Cheers,
> -Brian
> 
>>>
>>> Thus, DRM_FORMAT_ABGR, DRM_FORMAT_BGR should only be allowed.
>>>> +		line_stride = ((priv->viu.osd1_width << 5) + 127) >> 7;
>>>> +		break;
>>>> +	}
>>>> +
>>>> +	return ((line_stride + 1) >> 1) << 1;
>>>> +}
>>>> +
>>>>  static void meson_plane_atomic_update(struct drm_plane *plane,
>>>>  				      struct drm_plane_state *old_state)
>>>>  {
>>
>> [...]
>>
>>>>  
>>>> +static bool meson_plane_format_mod_supported(struct drm_plane *plane,
>>>> +					     u32 format, u64 modifier)
>>>> +{
>>>> +	struct meson_plane *meson_plane = to_meson_plane(plane);
>>>> +	struct meson_drm *priv = meson_plane->priv;
>>>> +	int i;
>>>> +
>>>> +	if (modifier == DRM_FORMAT_MOD_INVALID)
>>>> +		return false;
>>>> +
>>>> +	if (modifier == DRM_FORMAT_MOD_LINEAR)
>>>> +		return true;
>>>> +
>>>> +	if (!meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) &&
>>>> +	    !meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
>>>> +		return false;
>>>> +
>>>> +	if (modifier & ~DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
>>>> +		return false;
>>>> +
>>>> +	for (i = 0 ; i < plane->modifier_count ; ++i)
>>>> +		if (plane->modifiers[i] == modifier)
>>>> +			break;
>>>> +
>>>> +	if (i == plane->modifier_count) {
>>>> +		DRM_DEBUG_KMS("Unsupported modifier\n");
>>>> +		return false;
>>>> +	}
>>
>> I can add a warn_once here, would it be enough ?
>>
>>>> +
>>>> +	if (priv->afbcd.ops && priv->afbcd.ops->supported_fmt)
>>>> +		return priv->afbcd.ops->supported_fmt(modifier, format);
>>>> +
>>>> +	DRM_DEBUG_KMS("AFBC Unsupported\n");
>>>> +	return false;
>>>> +}
>>>> +
>>>>  static const struct drm_plane_funcs meson_plane_funcs = {
>>>>  	.update_plane		= drm_atomic_helper_update_plane,
>>>>  	.disable_plane		= drm_atomic_helper_disable_plane,
>>>> @@ -353,6 +457,7 @@ static const struct drm_plane_funcs meson_plane_funcs = {
>>>>  	.reset			= drm_atomic_helper_plane_reset,
>>>>  	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
>>>>  	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
>>>> +	.format_mod_supported   = meson_plane_format_mod_supported,
>>>>  };
>>>>  
>>>>  static const uint32_t supported_drm_formats[] = {
>>>> @@ -364,10 +469,53 @@ static const uint32_t supported_drm_formats[] = {
>>>>  	DRM_FORMAT_RGB565,
>>>>  };
>>>>  
>>>> +static const uint64_t format_modifiers_afbc_gxm[] = {
>>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
>>>> +				AFBC_FORMAT_MOD_SPARSE |
>>>> +				AFBC_FORMAT_MOD_YTR),
>>>> +	/* SPLIT mandates SPARSE, RGB modes mandates YTR */
>>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
>>>> +				AFBC_FORMAT_MOD_YTR |
>>>> +				AFBC_FORMAT_MOD_SPARSE |
>>>> +				AFBC_FORMAT_MOD_SPLIT),
>>>> +	DRM_FORMAT_MOD_LINEAR,
>>>> +	DRM_FORMAT_MOD_INVALID,
>>>> +};
>>>> +
>>>> +static const uint64_t format_modifiers_afbc_g12a[] = {
>>>> +	/*
>>>> +	 * - TOFIX Support AFBC modifiers for YUV formats (16x16 + TILED)
>>>> +	 * - AFBC_FORMAT_MOD_YTR is mandatory since we only support RGB
>>>> +	 * - SPLIT is mandatory for performances reasons when in 16x16
>>>> +	 *   block size
>>>> +	 * - 32x8 block size + SPLIT is mandatory with 4K frame size
>>>> +	 *   for performances reasons
>>>> +	 */
>>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
>>>> +				AFBC_FORMAT_MOD_YTR |
>>>> +				AFBC_FORMAT_MOD_SPARSE |
>>>> +				AFBC_FORMAT_MOD_SPLIT),
>>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
>>>> +				AFBC_FORMAT_MOD_YTR |
>>>> +				AFBC_FORMAT_MOD_SPARSE),
>>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
>>>> +				AFBC_FORMAT_MOD_YTR |
>>>> +				AFBC_FORMAT_MOD_SPARSE |
>>>> +				AFBC_FORMAT_MOD_SPLIT),
>>>> +	DRM_FORMAT_MOD_LINEAR,
>>>> +	DRM_FORMAT_MOD_INVALID,
>>>> +};
>>>> +
>>>> +static const uint64_t format_modifiers_default[] = {
>>>> +	DRM_FORMAT_MOD_LINEAR,
>>>> +	DRM_FORMAT_MOD_INVALID,
>>>> +};
>>>> +
>>>>  int meson_plane_create(struct meson_drm *priv)
>>>>  {
>>>>  	struct meson_plane *meson_plane;
>>>>  	struct drm_plane *plane;
>>>> +	const uint64_t *format_modifiers = format_modifiers_default;
>>>>  
>>>>  	meson_plane = devm_kzalloc(priv->drm->dev, sizeof(*meson_plane),
>>>>  				   GFP_KERNEL);
>>>> @@ -377,11 +525,16 @@ int meson_plane_create(struct meson_drm *priv)
>>>>  	meson_plane->priv = priv;
>>>>  	plane = &meson_plane->base;
>>>>  
>>>> +	if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
>>>> +		format_modifiers = format_modifiers_afbc_gxm;
>>>> +	else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
>>>> +		format_modifiers = format_modifiers_afbc_g12a;
>>>> +
>>>>  	drm_universal_plane_init(priv->drm, plane, 0xFF,
>>>>  				 &meson_plane_funcs,
>>>>  				 supported_drm_formats,
>>>>  				 ARRAY_SIZE(supported_drm_formats),
>>>> -				 NULL,
>>>> +				 format_modifiers,
>>>>  				 DRM_PLANE_TYPE_PRIMARY, "meson_primary_plane");
>>>>  
>>>>  	drm_plane_helper_add(plane, &meson_plane_helper_funcs);
>>>> -- 
>>>> 2.22.0
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Brian Starkey Oct. 11, 2019, 10:56 a.m. UTC | #8
Hi,

On Fri, Oct 11, 2019 at 11:14:43AM +0200, Neil Armstrong wrote:
> Hi Brian,
> 
> On 11/10/2019 10:41, Brian Starkey wrote:
> > Hi Neil,
> > 
> > On Thu, Oct 10, 2019 at 03:41:15PM +0200, Neil Armstrong wrote:
> >> Hi Ayan,
> >>
> >> On 10/10/2019 15:26, Ayan Halder wrote:
> >>> On Thu, Oct 10, 2019 at 11:25:23AM +0200, Neil Armstrong wrote:
> >>>> This adds all the OSD configuration plumbing to support the AFBC decoders
> >>>> path to display of the OSD1 plane.
> >>>>
> >>>> The Amlogic GXM and G12A AFBC decoders are integrated very differently.
> >>>>
> >>>> The Amlogic GXM has a direct output path to the OSD1 VIU pixel input,
> >>>> because the GXM AFBC decoder seem to be a custom IP developed by Amlogic.
> >>>>
> >>>> On the other side, the Amlogic G12A AFBC decoder seems to be an external
> >>>> IP that emit pixels on an AXI master hooked to a "Mali Unpack" block
> >>>> feeding the OSD1 VIU pixel input.
> >>>> This uses a weird "0x1000000" internal HW physical address on both
> >>>> sides to transfer the pixels.
> >>>>
> >>>> For Amlogic GXM, the supported pixel formats are the same as the normal
> >>>> linear OSD1 mode.
> >>>>
> >>>> On the other side, Amlogic added support for all AFBC v1.2 formats for
> >>>> the G12A AFBC integration.
> >>>>
> >>>> For simplicity, we stick to the already supported formats for now.
> >>>>
> >>>> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
> >>>> ---
> >>>>  drivers/gpu/drm/meson/meson_crtc.c  |   2 +
> >>>>  drivers/gpu/drm/meson/meson_drv.h   |   4 +
> >>>>  drivers/gpu/drm/meson/meson_plane.c | 215 ++++++++++++++++++++++++----
> >>>>  3 files changed, 190 insertions(+), 31 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
> >>>> index 57ae1c13d1e6..d478fa232951 100644
> >>>> --- a/drivers/gpu/drm/meson/meson_crtc.c
> >>>> +++ b/drivers/gpu/drm/meson/meson_crtc.c
> >>>> @@ -281,6 +281,8 @@ void meson_crtc_irq(struct meson_drm *priv)
> >>>>  	if (priv->viu.osd1_enabled && priv->viu.osd1_commit) {
> >>>>  		writel_relaxed(priv->viu.osd1_ctrl_stat,
> >>>>  				priv->io_base + _REG(VIU_OSD1_CTRL_STAT));
> >>>> +		writel_relaxed(priv->viu.osd1_ctrl_stat2,
> >>>> +				priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
> >>>>  		writel_relaxed(priv->viu.osd1_blk0_cfg[0],
> >>>>  				priv->io_base + _REG(VIU_OSD1_BLK0_CFG_W0));
> >>>>  		writel_relaxed(priv->viu.osd1_blk0_cfg[1],
> >>>> diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
> >>>> index 60f13c6f34e5..de25349be8aa 100644
> >>>> --- a/drivers/gpu/drm/meson/meson_drv.h
> >>>> +++ b/drivers/gpu/drm/meson/meson_drv.h
> >>>> @@ -53,8 +53,12 @@ struct meson_drm {
> >>>>  		bool osd1_enabled;
> >>>>  		bool osd1_interlace;
> >>>>  		bool osd1_commit;
> >>>> +		bool osd1_afbcd;
> >>>>  		uint32_t osd1_ctrl_stat;
> >>>> +		uint32_t osd1_ctrl_stat2;
> >>>>  		uint32_t osd1_blk0_cfg[5];
> >>>> +		uint32_t osd1_blk1_cfg4;
> >>>> +		uint32_t osd1_blk2_cfg4;
> >>>>  		uint32_t osd1_addr;
> >>>>  		uint32_t osd1_stride;
> >>>>  		uint32_t osd1_height;
> >>>> diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
> >>>> index 5e798c276037..412941aa8402 100644
> >>>> --- a/drivers/gpu/drm/meson/meson_plane.c
> >>>> +++ b/drivers/gpu/drm/meson/meson_plane.c
> >>>> @@ -23,6 +23,7 @@
> >>>>  #include "meson_plane.h"
> >>>>  #include "meson_registers.h"
> >>>>  #include "meson_viu.h"
> >>>> +#include "meson_osd_afbcd.h"
> >>>>  
> >>>>  /* OSD_SCI_WH_M1 */
> >>>>  #define SCI_WH_M1_W(w)			FIELD_PREP(GENMASK(28, 16), w)
> >>>> @@ -92,12 +93,38 @@ static int meson_plane_atomic_check(struct drm_plane *plane,
> >>>>  						   false, true);
> >>>>  }
> >>>>  
> >>>> +#define MESON_MOD_AFBC_VALID_BITS (AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |	\
> >>>> +				   AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |	\
> >>>> +				   AFBC_FORMAT_MOD_YTR |		\
> >>>> +				   AFBC_FORMAT_MOD_SPARSE |		\
> >>>> +				   AFBC_FORMAT_MOD_SPLIT)
> >>>> +
> >>>>  /* Takes a fixed 16.16 number and converts it to integer. */
> >>>>  static inline int64_t fixed16_to_int(int64_t value)
> >>>>  {
> >>>>  	return value >> 16;
> >>>>  }
> >>>>  
> >>>> +static u32 meson_g12a_afbcd_line_stride(struct meson_drm *priv)
> >>>> +{
> >>>> +	u32 line_stride = 0;
> >>>> +
> >>>> +	switch (priv->afbcd.format) {
> >>>> +	case DRM_FORMAT_RGB565:
> >>>> +		line_stride = ((priv->viu.osd1_width << 4) + 127) >> 7;
> >>>> +		break;
> >>>> +	case DRM_FORMAT_RGB888:
> >>>> +	case DRM_FORMAT_XRGB8888:
> >>>> +	case DRM_FORMAT_ARGB8888:
> >>>> +	case DRM_FORMAT_XBGR8888:
> >>>> +	case DRM_FORMAT_ABGR8888:
> >>> Please have a look at
> >>> https://www.kernel.org/doc/html/latest/gpu/afbc.html for our
> >>> recommendation. We suggest that *X* formats are avoided.
> >>>
> >>> Also, for interoperability and maximum compression efficiency (with
> >>> AFBC_FORMAT_MOD_YTR), we suggest the following order :-
> >>>
> >>>         Component 0: R
> >>>         Component 1: G
> >>>         Component 2: B
> >>>         Component 3: A (if available)
> >>
> >>
> >> Sorry I don't understand, you ask me to limit AFBC to ABGR8888 ?
> >>
> >> But why if the HW (GPU and DPU) is capable of ?
> > 
> > AFBC doesn't have an in-memory component order in the traditional
> > sense (i.e. a bit-position to component mapping), so Arm
> > have decided to define the convention that DRM_FORMAT_ABGR8888
> > represents the AFBC layout with R in component 0.
> 
> In this implementation, we handle the ARGB/ABGR as the same mode
> since the AFBC can only represent the layout as "ABGR" anyway.
> 

In this case, with the external AFBC IP, there's a whole extra layer
of potential confusion :-(

The decoder only needs to know the number of components - so
irrespective of what color channel is mapped to what component, it can
always be configured with the same mode for 4-component 32-bit
formats.

For everything to work correctly with YTR, the thing consuming the
output from the decoder must treat component 0 as 'R', but otherwise
it doesn't matter.

Is your HW able to treat the decoder output in different ways? e.g.
mapping component 0 to 'B'? If that's the case, then exposing the
different orders is valid - but only ABGR should allow YTR.

> > 
> > Are you sure the GPU supports other orders? I think any Arm driver
> > will only be producing DRM_FORMATs with "BGR" order e.g. ABGR888>
> > I'm not convinced the GPU HW actually supports any other order, but
> > it's all rather confusing with texture swizzling. What I can tell you
> > for sure is that it _does_ support BGR order (in DRM naming
> > convention).
> 
> Well, since the Bifrost Mali blobs are closed-source and delivered
> by licensees, it's hard to define what is supported from a closed
> GPU HW, closed SW implementation to a closed pixel format implementation.
> 

I hear you. IMO the only way to make any of this clear is to publish
reference data and tests which make sure implementations match each
other. It's something I'm trying to make happen.

> You'll have to tell us if the closed libMali handling AFBC would accept
> ARGB8888 as format to render with AFBC enabled, if not you're right
> I'll discard XRGB8888/ARGB8888 for AFBC buffers completely.
> 
> But it the libMali chooses tt generate an ARGB8888 buffer whatever
> ARGB8888/XRGB8888/ABGR888/XBGR888 is asked, then no I'll keep it that way.
> 

Yeah, I'll try and get clarity on this. It's not at all clear to me
either. When you say "accept ARGB8888 as format to render with AFBC
enabled", which API are you referring to, just so I can be clear? Do
you have an example of some code you're using to render AFBC with the
GPU blob?

In many APIs, there's no real expectation on in-memory component
order, so perhaps there treating them as all the same is acceptable.

However, fourcc + AFBC modifier is explicit in terms of component
order, and so IMO it's very harmful to "ignore" component order in
interfaces using fourcc + AFBC modifier.

There are implementations which support other orders, so ignoring
order will break those implementations. In some cases (Android, maybe
GL), this can be hidden behind "driver magic", but if the API is
fourcc + AFBC modifier, IMO it had better be completely explicit with
no tricks - irrespective of whatever other less-prescriptive APIs do.

> BTW I kept the vendor implementation here, which may be wrong but since
> they have the AFBC IP license and Mali Bifrost GPU license...
> 
> > 
> > If you do choose to expose orders other than BGR/ABGR, then you should
> > certainly not allow YTR to be used with any orders other than
> > BGR/ABGR. The AFBC spec defines YTR as using R in component 0, which
> > Arm has defined as DRM_FORMAT_*BGR* (component 0 in LE LSBs).
> > 
> 
> The MAFBC_FMT_RGBA8888 pixel format is defined in the AFBC decoder,
> which seems to be an ARM IP, the registers documentation is in the
> SoC datasheet at [1] and the formats bits are defined in the patch 3 at [2].
> 
> So it seems the decoder handles only a single type for 32bit RGB buffer
> format, as Amlogic names it MAFBC_FMT_RGBA8888
> 

Hopefully my comments at the beginning of this mail helps clear this
part up a bit.

> For XRGB8888/XBGR8888 we simply "replace" the A component with a fixed
> value in the pixel generator.

That seems correct, so long as the decoder is configured in the
4-component mode.

> 
> [1] https://dl.khadas.com/Hardware/VIM3/Datasheet/S905D3_datasheet_0.2_Wesion.pdf page 772
> [2] https://patchwork.freedesktop.org/patch/335199/?series=67832&rev=1
> 
> >>
> >> Isn't it an userspace choice ? I understand XRGB8888 is a waste
> >> of memory space and compression efficiency, but this is not the
> >> kernel driver's to decide this, right ?
> >>
> > 
> > As long as it's agreed and understood what XRGB8888 means. It must be
> > an AFBC bitstream with 4-components, with B in component 0, G in
> > component 1, R in component 2 and 8 wasted bits in component 3.
> 
> Yes, but this is something userspace must assume, and it's already
> wasted in the linear XRGB8888 format anyway.
> 
> > 
> > I know of HW which treats "XBGR" with AFBC as a 3-component format,
> > which isn't correct but can easily lead to confusion and
> > incompatibility.
> 
> Seems it's not the case here, at least for the G12A SoC family.

That's good :-)

> 
> > 
> >> For interoperability I'll understand recommending a minimal set
> >> of modifiers and formats. But here, each platform is also limited
> >> by it's GPU capabilites aswell.
> >>
> > 
> > The (Arm) GPUs support ABGR ordering, so if everyone sticks to that we
> > can make sure everything's nice and compatible (until someone turns up
> > with HW which _doesn't_ support that ordering).
> 
> This is not clean enough in the https://www.kernel.org/doc/html/latest/gpu/afbc.html
> document. Since ARM is in control of the renderers, saying AFBC does _not_
> support another components format as ABGR ordering in all the
> OpenGL ES/Vulkan implementations, it would be clear we couldn't render
> anything using AFBC with ARGB.
> But we hit the closed-source/closed-specifications here again.
> 

I didn't really understand the middle sentence.

I know and understand that the "closed-ness" is a problem. The page
you linked was an initial attempt at making a clear, public
specification.

What I need to be clear about, though, is that it describes _only_
cases where DRM fourcc + AFBC modifier are used. I don't think there's
any sane way to apply it to other APIs, because the formats are
described differently, and the "leeway" allowed for doing things
"under-the-hood" is very different.

> > 
> >> Limiting to ABGR8888 would discard like every non-Android renderers,
> >> using AFBC, I'm not sure it's the kernels driver's responsibility.
> >>
> > 
> > It prevents renderers with hard-coded pixel formats, perhaps. But
> > those are already fragile by nature, surely?
> 
> Well, except Android, all the other renderers uses ARGB8888/XRGB8888,
> as fixed pixel format, which is quite a large amount of code.
> 

I think whether that matters or not really depends on which graphics
APIs you're referring to. IMO it's inevitable that modifiers don't
simply "drop in" everywhere. The kernel API allows you to query what's
supported and pick that.

Thanks,
-Brian

> 
> Anyway, thanks for these technical clarifications, it makes things
> much more clearer.
> 
> Neil
> 
> > 
> > Cheers,
> > -Brian
> > 
> >>>
> >>> Thus, DRM_FORMAT_ABGR, DRM_FORMAT_BGR should only be allowed.
> >>>> +		line_stride = ((priv->viu.osd1_width << 5) + 127) >> 7;
> >>>> +		break;
> >>>> +	}
> >>>> +
> >>>> +	return ((line_stride + 1) >> 1) << 1;
> >>>> +}
> >>>> +
> >>>>  static void meson_plane_atomic_update(struct drm_plane *plane,
> >>>>  				      struct drm_plane_state *old_state)
> >>>>  {
> >>
> >> [...]
> >>
> >>>>  
> >>>> +static bool meson_plane_format_mod_supported(struct drm_plane *plane,
> >>>> +					     u32 format, u64 modifier)
> >>>> +{
> >>>> +	struct meson_plane *meson_plane = to_meson_plane(plane);
> >>>> +	struct meson_drm *priv = meson_plane->priv;
> >>>> +	int i;
> >>>> +
> >>>> +	if (modifier == DRM_FORMAT_MOD_INVALID)
> >>>> +		return false;
> >>>> +
> >>>> +	if (modifier == DRM_FORMAT_MOD_LINEAR)
> >>>> +		return true;
> >>>> +
> >>>> +	if (!meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) &&
> >>>> +	    !meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> >>>> +		return false;
> >>>> +
> >>>> +	if (modifier & ~DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
> >>>> +		return false;
> >>>> +
> >>>> +	for (i = 0 ; i < plane->modifier_count ; ++i)
> >>>> +		if (plane->modifiers[i] == modifier)
> >>>> +			break;
> >>>> +
> >>>> +	if (i == plane->modifier_count) {
> >>>> +		DRM_DEBUG_KMS("Unsupported modifier\n");
> >>>> +		return false;
> >>>> +	}
> >>
> >> I can add a warn_once here, would it be enough ?
> >>
> >>>> +
> >>>> +	if (priv->afbcd.ops && priv->afbcd.ops->supported_fmt)
> >>>> +		return priv->afbcd.ops->supported_fmt(modifier, format);
> >>>> +
> >>>> +	DRM_DEBUG_KMS("AFBC Unsupported\n");
> >>>> +	return false;
> >>>> +}
> >>>> +
> >>>>  static const struct drm_plane_funcs meson_plane_funcs = {
> >>>>  	.update_plane		= drm_atomic_helper_update_plane,
> >>>>  	.disable_plane		= drm_atomic_helper_disable_plane,
> >>>> @@ -353,6 +457,7 @@ static const struct drm_plane_funcs meson_plane_funcs = {
> >>>>  	.reset			= drm_atomic_helper_plane_reset,
> >>>>  	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
> >>>>  	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
> >>>> +	.format_mod_supported   = meson_plane_format_mod_supported,
> >>>>  };
> >>>>  
> >>>>  static const uint32_t supported_drm_formats[] = {
> >>>> @@ -364,10 +469,53 @@ static const uint32_t supported_drm_formats[] = {
> >>>>  	DRM_FORMAT_RGB565,
> >>>>  };
> >>>>  
> >>>> +static const uint64_t format_modifiers_afbc_gxm[] = {
> >>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> >>>> +				AFBC_FORMAT_MOD_SPARSE |
> >>>> +				AFBC_FORMAT_MOD_YTR),
> >>>> +	/* SPLIT mandates SPARSE, RGB modes mandates YTR */
> >>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> >>>> +				AFBC_FORMAT_MOD_YTR |
> >>>> +				AFBC_FORMAT_MOD_SPARSE |
> >>>> +				AFBC_FORMAT_MOD_SPLIT),
> >>>> +	DRM_FORMAT_MOD_LINEAR,
> >>>> +	DRM_FORMAT_MOD_INVALID,
> >>>> +};
> >>>> +
> >>>> +static const uint64_t format_modifiers_afbc_g12a[] = {
> >>>> +	/*
> >>>> +	 * - TOFIX Support AFBC modifiers for YUV formats (16x16 + TILED)
> >>>> +	 * - AFBC_FORMAT_MOD_YTR is mandatory since we only support RGB
> >>>> +	 * - SPLIT is mandatory for performances reasons when in 16x16
> >>>> +	 *   block size
> >>>> +	 * - 32x8 block size + SPLIT is mandatory with 4K frame size
> >>>> +	 *   for performances reasons
> >>>> +	 */
> >>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
> >>>> +				AFBC_FORMAT_MOD_YTR |
> >>>> +				AFBC_FORMAT_MOD_SPARSE |
> >>>> +				AFBC_FORMAT_MOD_SPLIT),
> >>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> >>>> +				AFBC_FORMAT_MOD_YTR |
> >>>> +				AFBC_FORMAT_MOD_SPARSE),
> >>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> >>>> +				AFBC_FORMAT_MOD_YTR |
> >>>> +				AFBC_FORMAT_MOD_SPARSE |
> >>>> +				AFBC_FORMAT_MOD_SPLIT),
> >>>> +	DRM_FORMAT_MOD_LINEAR,
> >>>> +	DRM_FORMAT_MOD_INVALID,
> >>>> +};
> >>>> +
> >>>> +static const uint64_t format_modifiers_default[] = {
> >>>> +	DRM_FORMAT_MOD_LINEAR,
> >>>> +	DRM_FORMAT_MOD_INVALID,
> >>>> +};
> >>>> +
> >>>>  int meson_plane_create(struct meson_drm *priv)
> >>>>  {
> >>>>  	struct meson_plane *meson_plane;
> >>>>  	struct drm_plane *plane;
> >>>> +	const uint64_t *format_modifiers = format_modifiers_default;
> >>>>  
> >>>>  	meson_plane = devm_kzalloc(priv->drm->dev, sizeof(*meson_plane),
> >>>>  				   GFP_KERNEL);
> >>>> @@ -377,11 +525,16 @@ int meson_plane_create(struct meson_drm *priv)
> >>>>  	meson_plane->priv = priv;
> >>>>  	plane = &meson_plane->base;
> >>>>  
> >>>> +	if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
> >>>> +		format_modifiers = format_modifiers_afbc_gxm;
> >>>> +	else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
> >>>> +		format_modifiers = format_modifiers_afbc_g12a;
> >>>> +
> >>>>  	drm_universal_plane_init(priv->drm, plane, 0xFF,
> >>>>  				 &meson_plane_funcs,
> >>>>  				 supported_drm_formats,
> >>>>  				 ARRAY_SIZE(supported_drm_formats),
> >>>> -				 NULL,
> >>>> +				 format_modifiers,
> >>>>  				 DRM_PLANE_TYPE_PRIMARY, "meson_primary_plane");
> >>>>  
> >>>>  	drm_plane_helper_add(plane, &meson_plane_helper_funcs);
> >>>> -- 
> >>>> 2.22.0
> >>
> >> _______________________________________________
> >> dri-devel mailing list
> >> dri-devel@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
Neil Armstrong Oct. 11, 2019, 12:07 p.m. UTC | #9
On 11/10/2019 12:56, Brian Starkey wrote:
> Hi,
> 
> On Fri, Oct 11, 2019 at 11:14:43AM +0200, Neil Armstrong wrote:
>> Hi Brian,
>>
>> On 11/10/2019 10:41, Brian Starkey wrote:
>>> Hi Neil,
>>>
>>> On Thu, Oct 10, 2019 at 03:41:15PM +0200, Neil Armstrong wrote:
>>>> Hi Ayan,
>>>>
>>>> On 10/10/2019 15:26, Ayan Halder wrote:
>>>>> On Thu, Oct 10, 2019 at 11:25:23AM +0200, Neil Armstrong wrote:
>>>>>> This adds all the OSD configuration plumbing to support the AFBC decoders
>>>>>> path to display of the OSD1 plane.
>>>>>>
>>>>>> The Amlogic GXM and G12A AFBC decoders are integrated very differently.
>>>>>>
>>>>>> The Amlogic GXM has a direct output path to the OSD1 VIU pixel input,
>>>>>> because the GXM AFBC decoder seem to be a custom IP developed by Amlogic.
>>>>>>
>>>>>> On the other side, the Amlogic G12A AFBC decoder seems to be an external
>>>>>> IP that emit pixels on an AXI master hooked to a "Mali Unpack" block
>>>>>> feeding the OSD1 VIU pixel input.
>>>>>> This uses a weird "0x1000000" internal HW physical address on both
>>>>>> sides to transfer the pixels.
>>>>>>
>>>>>> For Amlogic GXM, the supported pixel formats are the same as the normal
>>>>>> linear OSD1 mode.
>>>>>>
>>>>>> On the other side, Amlogic added support for all AFBC v1.2 formats for
>>>>>> the G12A AFBC integration.
>>>>>>
>>>>>> For simplicity, we stick to the already supported formats for now.
>>>>>>
>>>>>> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
>>>>>> ---
>>>>>>  drivers/gpu/drm/meson/meson_crtc.c  |   2 +
>>>>>>  drivers/gpu/drm/meson/meson_drv.h   |   4 +
>>>>>>  drivers/gpu/drm/meson/meson_plane.c | 215 ++++++++++++++++++++++++----
>>>>>>  3 files changed, 190 insertions(+), 31 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
>>>>>> index 57ae1c13d1e6..d478fa232951 100644
>>>>>> --- a/drivers/gpu/drm/meson/meson_crtc.c
>>>>>> +++ b/drivers/gpu/drm/meson/meson_crtc.c
>>>>>> @@ -281,6 +281,8 @@ void meson_crtc_irq(struct meson_drm *priv)
>>>>>>  	if (priv->viu.osd1_enabled && priv->viu.osd1_commit) {
>>>>>>  		writel_relaxed(priv->viu.osd1_ctrl_stat,
>>>>>>  				priv->io_base + _REG(VIU_OSD1_CTRL_STAT));
>>>>>> +		writel_relaxed(priv->viu.osd1_ctrl_stat2,
>>>>>> +				priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
>>>>>>  		writel_relaxed(priv->viu.osd1_blk0_cfg[0],
>>>>>>  				priv->io_base + _REG(VIU_OSD1_BLK0_CFG_W0));
>>>>>>  		writel_relaxed(priv->viu.osd1_blk0_cfg[1],
>>>>>> diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
>>>>>> index 60f13c6f34e5..de25349be8aa 100644
>>>>>> --- a/drivers/gpu/drm/meson/meson_drv.h
>>>>>> +++ b/drivers/gpu/drm/meson/meson_drv.h
>>>>>> @@ -53,8 +53,12 @@ struct meson_drm {
>>>>>>  		bool osd1_enabled;
>>>>>>  		bool osd1_interlace;
>>>>>>  		bool osd1_commit;
>>>>>> +		bool osd1_afbcd;
>>>>>>  		uint32_t osd1_ctrl_stat;
>>>>>> +		uint32_t osd1_ctrl_stat2;
>>>>>>  		uint32_t osd1_blk0_cfg[5];
>>>>>> +		uint32_t osd1_blk1_cfg4;
>>>>>> +		uint32_t osd1_blk2_cfg4;
>>>>>>  		uint32_t osd1_addr;
>>>>>>  		uint32_t osd1_stride;
>>>>>>  		uint32_t osd1_height;
>>>>>> diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
>>>>>> index 5e798c276037..412941aa8402 100644
>>>>>> --- a/drivers/gpu/drm/meson/meson_plane.c
>>>>>> +++ b/drivers/gpu/drm/meson/meson_plane.c
>>>>>> @@ -23,6 +23,7 @@
>>>>>>  #include "meson_plane.h"
>>>>>>  #include "meson_registers.h"
>>>>>>  #include "meson_viu.h"
>>>>>> +#include "meson_osd_afbcd.h"
>>>>>>  
>>>>>>  /* OSD_SCI_WH_M1 */
>>>>>>  #define SCI_WH_M1_W(w)			FIELD_PREP(GENMASK(28, 16), w)
>>>>>> @@ -92,12 +93,38 @@ static int meson_plane_atomic_check(struct drm_plane *plane,
>>>>>>  						   false, true);
>>>>>>  }
>>>>>>  
>>>>>> +#define MESON_MOD_AFBC_VALID_BITS (AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |	\
>>>>>> +				   AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |	\
>>>>>> +				   AFBC_FORMAT_MOD_YTR |		\
>>>>>> +				   AFBC_FORMAT_MOD_SPARSE |		\
>>>>>> +				   AFBC_FORMAT_MOD_SPLIT)
>>>>>> +
>>>>>>  /* Takes a fixed 16.16 number and converts it to integer. */
>>>>>>  static inline int64_t fixed16_to_int(int64_t value)
>>>>>>  {
>>>>>>  	return value >> 16;
>>>>>>  }
>>>>>>  
>>>>>> +static u32 meson_g12a_afbcd_line_stride(struct meson_drm *priv)
>>>>>> +{
>>>>>> +	u32 line_stride = 0;
>>>>>> +
>>>>>> +	switch (priv->afbcd.format) {
>>>>>> +	case DRM_FORMAT_RGB565:
>>>>>> +		line_stride = ((priv->viu.osd1_width << 4) + 127) >> 7;
>>>>>> +		break;
>>>>>> +	case DRM_FORMAT_RGB888:
>>>>>> +	case DRM_FORMAT_XRGB8888:
>>>>>> +	case DRM_FORMAT_ARGB8888:
>>>>>> +	case DRM_FORMAT_XBGR8888:
>>>>>> +	case DRM_FORMAT_ABGR8888:
>>>>> Please have a look at
>>>>> https://www.kernel.org/doc/html/latest/gpu/afbc.html for our
>>>>> recommendation. We suggest that *X* formats are avoided.
>>>>>
>>>>> Also, for interoperability and maximum compression efficiency (with
>>>>> AFBC_FORMAT_MOD_YTR), we suggest the following order :-
>>>>>
>>>>>         Component 0: R
>>>>>         Component 1: G
>>>>>         Component 2: B
>>>>>         Component 3: A (if available)
>>>>
>>>>
>>>> Sorry I don't understand, you ask me to limit AFBC to ABGR8888 ?
>>>>
>>>> But why if the HW (GPU and DPU) is capable of ?
>>>
>>> AFBC doesn't have an in-memory component order in the traditional
>>> sense (i.e. a bit-position to component mapping), so Arm
>>> have decided to define the convention that DRM_FORMAT_ABGR8888
>>> represents the AFBC layout with R in component 0.
>>
>> In this implementation, we handle the ARGB/ABGR as the same mode
>> since the AFBC can only represent the layout as "ABGR" anyway.
>>
> 
> In this case, with the external AFBC IP, there's a whole extra layer
> of potential confusion :-(
> 
> The decoder only needs to know the number of components - so
> irrespective of what color channel is mapped to what component, it can
> always be configured with the same mode for 4-component 32-bit
> formats.
> 
> For everything to work correctly with YTR, the thing consuming the
> output from the decoder must treat component 0 as 'R', but otherwise
> it doesn't matter.
> 
> Is your HW able to treat the decoder output in different ways? e.g.
> mapping component 0 to 'B'? If that's the case, then exposing the
> different orders is valid - but only ABGR should allow YTR.

Yes, we can remap each components from AFBC in any order.

Ok thanks for clarifying, so:
- I'll allow only ABGR/XBGR with YTR
- I'll allow ABGR/XBGR/ARGB/XRGB only if !YTR and use the AFBC components remapping
for ARGB/XRGB

I'll also need to clean up the RGB888/BGR888 as we support only RGB888 for now.

And I'll reject RGB565 since we don't support it without AFBC.

> 
>>>
>>> Are you sure the GPU supports other orders? I think any Arm driver
>>> will only be producing DRM_FORMATs with "BGR" order e.g. ABGR888>
>>> I'm not convinced the GPU HW actually supports any other order, but
>>> it's all rather confusing with texture swizzling. What I can tell you
>>> for sure is that it _does_ support BGR order (in DRM naming
>>> convention).
>>
>> Well, since the Bifrost Mali blobs are closed-source and delivered
>> by licensees, it's hard to define what is supported from a closed
>> GPU HW, closed SW implementation to a closed pixel format implementation.
>>
> 
> I hear you. IMO the only way to make any of this clear is to publish
> reference data and tests which make sure implementations match each
> other. It's something I'm trying to make happen.

I'll be happy to run them when available and fix the implementation accordingly !

> 
>> You'll have to tell us if the closed libMali handling AFBC would accept
>> ARGB8888 as format to render with AFBC enabled, if not you're right
>> I'll discard XRGB8888/ARGB8888 for AFBC buffers completely.
>>
>> But it the libMali chooses tt generate an ARGB8888 buffer whatever
>> ARGB8888/XRGB8888/ABGR888/XBGR888 is asked, then no I'll keep it that way.
>>
> 
> Yeah, I'll try and get clarity on this. It's not at all clear to me
> either. When you say "accept ARGB8888 as format to render with AFBC
> enabled", which API are you referring to, just so I can be clear? Do
> you have an example of some code you're using to render AFBC with the
> GPU blob?

Let's take kmscube using EGL and GBM.

The buffer is allocated using gbm_surface_create_with_modifiers(),
but the gbm implementation is vendor specified.

Then the surface is passed to eglCreateWindowSurface().
Then the format is matched using eglGetConfigAttrib() with the
returned configs.

Here support on the gbm and EGL implementation.

> 
> In many APIs, there's no real expectation on in-memory component
> order, so perhaps there treating them as all the same is acceptable.
> 
> However, fourcc + AFBC modifier is explicit in terms of component
> order, and so IMO it's very harmful to "ignore" component order in
> interfaces using fourcc + AFBC modifier.
> 
> There are implementations which support other orders, so ignoring
> order will break those implementations. In some cases (Android, maybe
> GL), this can be hidden behind "driver magic", but if the API is
> fourcc + AFBC modifier, IMO it had better be completely explicit with
> no tricks - irrespective of whatever other less-prescriptive APIs do.

Sure

> 
>> BTW I kept the vendor implementation here, which may be wrong but since
>> they have the AFBC IP license and Mali Bifrost GPU license...
>>
>>>
>>> If you do choose to expose orders other than BGR/ABGR, then you should
>>> certainly not allow YTR to be used with any orders other than
>>> BGR/ABGR. The AFBC spec defines YTR as using R in component 0, which
>>> Arm has defined as DRM_FORMAT_*BGR* (component 0 in LE LSBs).
>>>
>>
>> The MAFBC_FMT_RGBA8888 pixel format is defined in the AFBC decoder,
>> which seems to be an ARM IP, the registers documentation is in the
>> SoC datasheet at [1] and the formats bits are defined in the patch 3 at [2].
>>
>> So it seems the decoder handles only a single type for 32bit RGB buffer
>> format, as Amlogic names it MAFBC_FMT_RGBA8888
>>
> 
> Hopefully my comments at the beginning of this mail helps clear this
> part up a bit.
> 
>> For XRGB8888/XBGR8888 we simply "replace" the A component with a fixed
>> value in the pixel generator.
> 
> That seems correct, so long as the decoder is configured in the
> 4-component mode.
> 
>>
>> [1] https://dl.khadas.com/Hardware/VIM3/Datasheet/S905D3_datasheet_0.2_Wesion.pdf page 772
>> [2] https://patchwork.freedesktop.org/patch/335199/?series=67832&rev=1
>>
>>>>
>>>> Isn't it an userspace choice ? I understand XRGB8888 is a waste
>>>> of memory space and compression efficiency, but this is not the
>>>> kernel driver's to decide this, right ?
>>>>
>>>
>>> As long as it's agreed and understood what XRGB8888 means. It must be
>>> an AFBC bitstream with 4-components, with B in component 0, G in
>>> component 1, R in component 2 and 8 wasted bits in component 3.
>>
>> Yes, but this is something userspace must assume, and it's already
>> wasted in the linear XRGB8888 format anyway.
>>
>>>
>>> I know of HW which treats "XBGR" with AFBC as a 3-component format,
>>> which isn't correct but can easily lead to confusion and
>>> incompatibility.
>>
>> Seems it's not the case here, at least for the G12A SoC family.
> 
> That's good :-)
> 
>>
>>>
>>>> For interoperability I'll understand recommending a minimal set
>>>> of modifiers and formats. But here, each platform is also limited
>>>> by it's GPU capabilites aswell.
>>>>
>>>
>>> The (Arm) GPUs support ABGR ordering, so if everyone sticks to that we
>>> can make sure everything's nice and compatible (until someone turns up
>>> with HW which _doesn't_ support that ordering).
>>
>> This is not clean enough in the https://www.kernel.org/doc/html/latest/gpu/afbc.html
>> document. Since ARM is in control of the renderers, saying AFBC does _not_
>> support another components format as ABGR ordering in all the
>> OpenGL ES/Vulkan implementations, it would be clear we couldn't render
>> anything using AFBC with ARGB.
>> But we hit the closed-source/closed-specifications here again.
>>
> 
> I didn't really understand the middle sentence.
> 
> I know and understand that the "closed-ness" is a problem. The page
> you linked was an initial attempt at making a clear, public
> specification.
> 
> What I need to be clear about, though, is that it describes _only_
> cases where DRM fourcc + AFBC modifier are used. I don't think there's
> any sane way to apply it to other APIs, because the formats are
> described differently, and the "leeway" allowed for doing things
> "under-the-hood" is very different.

Indeed

> 
>>>
>>>> Limiting to ABGR8888 would discard like every non-Android renderers,
>>>> using AFBC, I'm not sure it's the kernels driver's responsibility.
>>>>
>>>
>>> It prevents renderers with hard-coded pixel formats, perhaps. But
>>> those are already fragile by nature, surely?
>>
>> Well, except Android, all the other renderers uses ARGB8888/XRGB8888,
>> as fixed pixel format, which is quite a large amount of code.
>>
> 
> I think whether that matters or not really depends on which graphics
> APIs you're referring to. IMO it's inevitable that modifiers don't
> simply "drop in" everywhere. The kernel API allows you to query what's
> supported and pick that.

Sure, we'll need to add an extra layer to discover the GL API capabilities
vs the DRM Display driver capabilities in term of fourcc+modifiers at some point.
This may be an goal for the liboutput library !

Thanks,
Neil

> 
> Thanks,
> -Brian
> 
>>
>> Anyway, thanks for these technical clarifications, it makes things
>> much more clearer.
>>
>> Neil
>>
>>>
>>> Cheers,
>>> -Brian
>>>
>>>>>
>>>>> Thus, DRM_FORMAT_ABGR, DRM_FORMAT_BGR should only be allowed.
>>>>>> +		line_stride = ((priv->viu.osd1_width << 5) + 127) >> 7;
>>>>>> +		break;
>>>>>> +	}
>>>>>> +
>>>>>> +	return ((line_stride + 1) >> 1) << 1;
>>>>>> +}
>>>>>> +
>>>>>>  static void meson_plane_atomic_update(struct drm_plane *plane,
>>>>>>  				      struct drm_plane_state *old_state)
>>>>>>  {
>>>>
>>>> [...]
>>>>
>>>>>>  
>>>>>> +static bool meson_plane_format_mod_supported(struct drm_plane *plane,
>>>>>> +					     u32 format, u64 modifier)
>>>>>> +{
>>>>>> +	struct meson_plane *meson_plane = to_meson_plane(plane);
>>>>>> +	struct meson_drm *priv = meson_plane->priv;
>>>>>> +	int i;
>>>>>> +
>>>>>> +	if (modifier == DRM_FORMAT_MOD_INVALID)
>>>>>> +		return false;
>>>>>> +
>>>>>> +	if (modifier == DRM_FORMAT_MOD_LINEAR)
>>>>>> +		return true;
>>>>>> +
>>>>>> +	if (!meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) &&
>>>>>> +	    !meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
>>>>>> +		return false;
>>>>>> +
>>>>>> +	if (modifier & ~DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
>>>>>> +		return false;
>>>>>> +
>>>>>> +	for (i = 0 ; i < plane->modifier_count ; ++i)
>>>>>> +		if (plane->modifiers[i] == modifier)
>>>>>> +			break;
>>>>>> +
>>>>>> +	if (i == plane->modifier_count) {
>>>>>> +		DRM_DEBUG_KMS("Unsupported modifier\n");
>>>>>> +		return false;
>>>>>> +	}
>>>>
>>>> I can add a warn_once here, would it be enough ?
>>>>
>>>>>> +
>>>>>> +	if (priv->afbcd.ops && priv->afbcd.ops->supported_fmt)
>>>>>> +		return priv->afbcd.ops->supported_fmt(modifier, format);
>>>>>> +
>>>>>> +	DRM_DEBUG_KMS("AFBC Unsupported\n");
>>>>>> +	return false;
>>>>>> +}
>>>>>> +
>>>>>>  static const struct drm_plane_funcs meson_plane_funcs = {
>>>>>>  	.update_plane		= drm_atomic_helper_update_plane,
>>>>>>  	.disable_plane		= drm_atomic_helper_disable_plane,
>>>>>> @@ -353,6 +457,7 @@ static const struct drm_plane_funcs meson_plane_funcs = {
>>>>>>  	.reset			= drm_atomic_helper_plane_reset,
>>>>>>  	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
>>>>>>  	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
>>>>>> +	.format_mod_supported   = meson_plane_format_mod_supported,
>>>>>>  };
>>>>>>  
>>>>>>  static const uint32_t supported_drm_formats[] = {
>>>>>> @@ -364,10 +469,53 @@ static const uint32_t supported_drm_formats[] = {
>>>>>>  	DRM_FORMAT_RGB565,
>>>>>>  };
>>>>>>  
>>>>>> +static const uint64_t format_modifiers_afbc_gxm[] = {
>>>>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
>>>>>> +				AFBC_FORMAT_MOD_SPARSE |
>>>>>> +				AFBC_FORMAT_MOD_YTR),
>>>>>> +	/* SPLIT mandates SPARSE, RGB modes mandates YTR */
>>>>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
>>>>>> +				AFBC_FORMAT_MOD_YTR |
>>>>>> +				AFBC_FORMAT_MOD_SPARSE |
>>>>>> +				AFBC_FORMAT_MOD_SPLIT),
>>>>>> +	DRM_FORMAT_MOD_LINEAR,
>>>>>> +	DRM_FORMAT_MOD_INVALID,
>>>>>> +};
>>>>>> +
>>>>>> +static const uint64_t format_modifiers_afbc_g12a[] = {
>>>>>> +	/*
>>>>>> +	 * - TOFIX Support AFBC modifiers for YUV formats (16x16 + TILED)
>>>>>> +	 * - AFBC_FORMAT_MOD_YTR is mandatory since we only support RGB
>>>>>> +	 * - SPLIT is mandatory for performances reasons when in 16x16
>>>>>> +	 *   block size
>>>>>> +	 * - 32x8 block size + SPLIT is mandatory with 4K frame size
>>>>>> +	 *   for performances reasons
>>>>>> +	 */
>>>>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
>>>>>> +				AFBC_FORMAT_MOD_YTR |
>>>>>> +				AFBC_FORMAT_MOD_SPARSE |
>>>>>> +				AFBC_FORMAT_MOD_SPLIT),
>>>>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
>>>>>> +				AFBC_FORMAT_MOD_YTR |
>>>>>> +				AFBC_FORMAT_MOD_SPARSE),
>>>>>> +	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
>>>>>> +				AFBC_FORMAT_MOD_YTR |
>>>>>> +				AFBC_FORMAT_MOD_SPARSE |
>>>>>> +				AFBC_FORMAT_MOD_SPLIT),
>>>>>> +	DRM_FORMAT_MOD_LINEAR,
>>>>>> +	DRM_FORMAT_MOD_INVALID,
>>>>>> +};
>>>>>> +
>>>>>> +static const uint64_t format_modifiers_default[] = {
>>>>>> +	DRM_FORMAT_MOD_LINEAR,
>>>>>> +	DRM_FORMAT_MOD_INVALID,
>>>>>> +};
>>>>>> +
>>>>>>  int meson_plane_create(struct meson_drm *priv)
>>>>>>  {
>>>>>>  	struct meson_plane *meson_plane;
>>>>>>  	struct drm_plane *plane;
>>>>>> +	const uint64_t *format_modifiers = format_modifiers_default;
>>>>>>  
>>>>>>  	meson_plane = devm_kzalloc(priv->drm->dev, sizeof(*meson_plane),
>>>>>>  				   GFP_KERNEL);
>>>>>> @@ -377,11 +525,16 @@ int meson_plane_create(struct meson_drm *priv)
>>>>>>  	meson_plane->priv = priv;
>>>>>>  	plane = &meson_plane->base;
>>>>>>  
>>>>>> +	if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
>>>>>> +		format_modifiers = format_modifiers_afbc_gxm;
>>>>>> +	else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
>>>>>> +		format_modifiers = format_modifiers_afbc_g12a;
>>>>>> +
>>>>>>  	drm_universal_plane_init(priv->drm, plane, 0xFF,
>>>>>>  				 &meson_plane_funcs,
>>>>>>  				 supported_drm_formats,
>>>>>>  				 ARRAY_SIZE(supported_drm_formats),
>>>>>> -				 NULL,
>>>>>> +				 format_modifiers,
>>>>>>  				 DRM_PLANE_TYPE_PRIMARY, "meson_primary_plane");
>>>>>>  
>>>>>>  	drm_plane_helper_add(plane, &meson_plane_helper_funcs);
>>>>>> -- 
>>>>>> 2.22.0
>>>>
>>>> _______________________________________________
>>>> dri-devel mailing list
>>>> dri-devel@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>>
Daniel Vetter Oct. 11, 2019, 5:25 p.m. UTC | #10
On Fri, Oct 11, 2019 at 12:56 PM Brian Starkey <Brian.Starkey@arm.com> wrote:
>
> Hi,
>
> On Fri, Oct 11, 2019 at 11:14:43AM +0200, Neil Armstrong wrote:
> > Hi Brian,
> >
> > On 11/10/2019 10:41, Brian Starkey wrote:

> > > Are you sure the GPU supports other orders? I think any Arm driver
> > > will only be producing DRM_FORMATs with "BGR" order e.g. ABGR888>
> > > I'm not convinced the GPU HW actually supports any other order, but
> > > it's all rather confusing with texture swizzling. What I can tell you
> > > for sure is that it _does_ support BGR order (in DRM naming
> > > convention).
> >
> > Well, since the Bifrost Mali blobs are closed-source and delivered
> > by licensees, it's hard to define what is supported from a closed
> > GPU HW, closed SW implementation to a closed pixel format implementation.
> >
>
> I hear you. IMO the only way to make any of this clear is to publish
> reference data and tests which make sure implementations match each
> other. It's something I'm trying to make happen.

*cough* igt test with crc/writeback validation *cough*

And you don't even need anything that actually compresses. All you
need is the minimal enough AFBC knowledge to set up the metadata that
everything is uncompressed. We're doing that for our intel compression
formats already, and it works great in making sure that we have
end-to-end agreement on all the bits and ordering and everything. Ofc
it doesn't validate the decoder, but that's not really igts job.
Should be possible to convince ARM to release that (as open source, in
igt), since it would be fairly valuable for the entire ecosystem here
...
-Daniel
Brian Starkey Oct. 14, 2019, 9:11 a.m. UTC | #11
On Fri, Oct 11, 2019 at 07:25:02PM +0200, Daniel Vetter wrote:
> On Fri, Oct 11, 2019 at 12:56 PM Brian Starkey <Brian.Starkey@arm.com> wrote:
> >
> > Hi,
> >
> > On Fri, Oct 11, 2019 at 11:14:43AM +0200, Neil Armstrong wrote:
> > > Hi Brian,
> > >
> > > On 11/10/2019 10:41, Brian Starkey wrote:
> 
> > > > Are you sure the GPU supports other orders? I think any Arm driver
> > > > will only be producing DRM_FORMATs with "BGR" order e.g. ABGR888>
> > > > I'm not convinced the GPU HW actually supports any other order, but
> > > > it's all rather confusing with texture swizzling. What I can tell you
> > > > for sure is that it _does_ support BGR order (in DRM naming
> > > > convention).
> > >
> > > Well, since the Bifrost Mali blobs are closed-source and delivered
> > > by licensees, it's hard to define what is supported from a closed
> > > GPU HW, closed SW implementation to a closed pixel format implementation.
> > >
> >
> > I hear you. IMO the only way to make any of this clear is to publish
> > reference data and tests which make sure implementations match each
> > other. It's something I'm trying to make happen.
> 
> *cough* igt test with crc/writeback validation *cough*
> 
> And you don't even need anything that actually compresses. All you
> need is the minimal enough AFBC knowledge to set up the metadata that
> everything is uncompressed. We're doing that for our intel compression
> formats already, and it works great in making sure that we have
> end-to-end agreement on all the bits and ordering and everything.

Yeah this was my original thinking too. Sadly, a decent number of the
AFBC parameters can't be tested with uncompressed data, as the codec
kicks into a different mode there.

> Ofc
> it doesn't validate the decoder, but that's not really igts job.
> Should be possible to convince ARM to release that (as open source, in
> igt), since it would be fairly valuable for the entire ecosystem here
> ...

I fully agree about the value proposition.

-Brian

> -Daniel
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
Daniel Vetter Oct. 14, 2019, 9:20 a.m. UTC | #12
On Mon, Oct 14, 2019 at 09:11:17AM +0000, Brian Starkey wrote:
> On Fri, Oct 11, 2019 at 07:25:02PM +0200, Daniel Vetter wrote:
> > On Fri, Oct 11, 2019 at 12:56 PM Brian Starkey <Brian.Starkey@arm.com> wrote:
> > >
> > > Hi,
> > >
> > > On Fri, Oct 11, 2019 at 11:14:43AM +0200, Neil Armstrong wrote:
> > > > Hi Brian,
> > > >
> > > > On 11/10/2019 10:41, Brian Starkey wrote:
> > 
> > > > > Are you sure the GPU supports other orders? I think any Arm driver
> > > > > will only be producing DRM_FORMATs with "BGR" order e.g. ABGR888>
> > > > > I'm not convinced the GPU HW actually supports any other order, but
> > > > > it's all rather confusing with texture swizzling. What I can tell you
> > > > > for sure is that it _does_ support BGR order (in DRM naming
> > > > > convention).
> > > >
> > > > Well, since the Bifrost Mali blobs are closed-source and delivered
> > > > by licensees, it's hard to define what is supported from a closed
> > > > GPU HW, closed SW implementation to a closed pixel format implementation.
> > > >
> > >
> > > I hear you. IMO the only way to make any of this clear is to publish
> > > reference data and tests which make sure implementations match each
> > > other. It's something I'm trying to make happen.
> > 
> > *cough* igt test with crc/writeback validation *cough*
> > 
> > And you don't even need anything that actually compresses. All you
> > need is the minimal enough AFBC knowledge to set up the metadata that
> > everything is uncompressed. We're doing that for our intel compression
> > formats already, and it works great in making sure that we have
> > end-to-end agreement on all the bits and ordering and everything.
> 
> Yeah this was my original thinking too. Sadly, a decent number of the
> AFBC parameters can't be tested with uncompressed data, as the codec
> kicks into a different mode there.

Hm right I guess some of the flags/parameters only matter if you deal with
actual compressed data. Still, better than nothing I guess - this should
at least catch stuff like color channels wired up the wrong way compared
to linear, and fun stuff like that.

> > Ofc
> > it doesn't validate the decoder, but that's not really igts job.
> > Should be possible to convince ARM to release that (as open source, in
> > igt), since it would be fairly valuable for the entire ecosystem here
> > ...
> 
> I fully agree about the value proposition.

I'll be waiting for patch from arm then :-)

Cheers, Daniel
Brian Starkey Oct. 15, 2019, 11:18 a.m. UTC | #13
Hi Neil,

On Fri, Oct 11, 2019 at 02:07:01PM +0200, Neil Armstrong wrote:
> On 11/10/2019 12:56, Brian Starkey wrote:
> > Hi,
> > 
> > On Fri, Oct 11, 2019 at 11:14:43AM +0200, Neil Armstrong wrote:
> >> Hi Brian,
> >>
> >> On 11/10/2019 10:41, Brian Starkey wrote:
> >>> Hi Neil,
> >>>
> >>> On Thu, Oct 10, 2019 at 03:41:15PM +0200, Neil Armstrong wrote:

[snip]

> >> You'll have to tell us if the closed libMali handling AFBC would accept
> >> ARGB8888 as format to render with AFBC enabled, if not you're right
> >> I'll discard XRGB8888/ARGB8888 for AFBC buffers completely.
> >>
> >> But it the libMali chooses tt generate an ARGB8888 buffer whatever
> >> ARGB8888/XRGB8888/ABGR888/XBGR888 is asked, then no I'll keep it that way.
> >>
> > 
> > Yeah, I'll try and get clarity on this. It's not at all clear to me
> > either. When you say "accept ARGB8888 as format to render with AFBC
> > enabled", which API are you referring to, just so I can be clear? Do
> > you have an example of some code you're using to render AFBC with the
> > GPU blob?
> 
> Let's take kmscube using EGL and GBM.
> 
> The buffer is allocated using gbm_surface_create_with_modifiers(),
> but the gbm implementation is vendor specified.
> 
> Then the surface is passed to eglCreateWindowSurface().
> Then the format is matched using eglGetConfigAttrib() with the
> returned configs.
> 
> Here support on the gbm and EGL implementation.
> 

Is this a use-case that works on your platform today?

I went and asked around. An Arm gbm implementation supporting AFBC
will reject AFBC allocations for orders other than (DRM-convention)
BGR.

Thanks,
-Brian
Neil Armstrong Oct. 15, 2019, 11:46 a.m. UTC | #14
On 15/10/2019 13:18, Brian Starkey wrote:
> Hi Neil,
> 
> On Fri, Oct 11, 2019 at 02:07:01PM +0200, Neil Armstrong wrote:
>> On 11/10/2019 12:56, Brian Starkey wrote:
>>> Hi,
>>>
>>> On Fri, Oct 11, 2019 at 11:14:43AM +0200, Neil Armstrong wrote:
>>>> Hi Brian,
>>>>
>>>> On 11/10/2019 10:41, Brian Starkey wrote:
>>>>> Hi Neil,
>>>>>
>>>>> On Thu, Oct 10, 2019 at 03:41:15PM +0200, Neil Armstrong wrote:
> 
> [snip]
> 
>>>> You'll have to tell us if the closed libMali handling AFBC would accept
>>>> ARGB8888 as format to render with AFBC enabled, if not you're right
>>>> I'll discard XRGB8888/ARGB8888 for AFBC buffers completely.
>>>>
>>>> But it the libMali chooses tt generate an ARGB8888 buffer whatever
>>>> ARGB8888/XRGB8888/ABGR888/XBGR888 is asked, then no I'll keep it that way.
>>>>
>>>
>>> Yeah, I'll try and get clarity on this. It's not at all clear to me
>>> either. When you say "accept ARGB8888 as format to render with AFBC
>>> enabled", which API are you referring to, just so I can be clear? Do
>>> you have an example of some code you're using to render AFBC with the
>>> GPU blob?
>>
>> Let's take kmscube using EGL and GBM.
>>
>> The buffer is allocated using gbm_surface_create_with_modifiers(),
>> but the gbm implementation is vendor specified.
>>
>> Then the surface is passed to eglCreateWindowSurface().
>> Then the format is matched using eglGetConfigAttrib() with the
>> returned configs.
>>
>> Here support on the gbm and EGL implementation.
>>
> 
> Is this a use-case that works on your platform today?

Amlogic gave ma a libMali for miniGBM with AFBC enabled, but I haven't
been able to enable AFBC yet.

> 
> I went and asked around. An Arm gbm implementation supporting AFBC
> will reject AFBC allocations for orders other than (DRM-convention)
> BGR.

I trust you on this point, thanks for asking around.

Neil

> 
> Thanks,
> -Brian
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
index 57ae1c13d1e6..d478fa232951 100644
--- a/drivers/gpu/drm/meson/meson_crtc.c
+++ b/drivers/gpu/drm/meson/meson_crtc.c
@@ -281,6 +281,8 @@  void meson_crtc_irq(struct meson_drm *priv)
 	if (priv->viu.osd1_enabled && priv->viu.osd1_commit) {
 		writel_relaxed(priv->viu.osd1_ctrl_stat,
 				priv->io_base + _REG(VIU_OSD1_CTRL_STAT));
+		writel_relaxed(priv->viu.osd1_ctrl_stat2,
+				priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
 		writel_relaxed(priv->viu.osd1_blk0_cfg[0],
 				priv->io_base + _REG(VIU_OSD1_BLK0_CFG_W0));
 		writel_relaxed(priv->viu.osd1_blk0_cfg[1],
diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
index 60f13c6f34e5..de25349be8aa 100644
--- a/drivers/gpu/drm/meson/meson_drv.h
+++ b/drivers/gpu/drm/meson/meson_drv.h
@@ -53,8 +53,12 @@  struct meson_drm {
 		bool osd1_enabled;
 		bool osd1_interlace;
 		bool osd1_commit;
+		bool osd1_afbcd;
 		uint32_t osd1_ctrl_stat;
+		uint32_t osd1_ctrl_stat2;
 		uint32_t osd1_blk0_cfg[5];
+		uint32_t osd1_blk1_cfg4;
+		uint32_t osd1_blk2_cfg4;
 		uint32_t osd1_addr;
 		uint32_t osd1_stride;
 		uint32_t osd1_height;
diff --git a/drivers/gpu/drm/meson/meson_plane.c b/drivers/gpu/drm/meson/meson_plane.c
index 5e798c276037..412941aa8402 100644
--- a/drivers/gpu/drm/meson/meson_plane.c
+++ b/drivers/gpu/drm/meson/meson_plane.c
@@ -23,6 +23,7 @@ 
 #include "meson_plane.h"
 #include "meson_registers.h"
 #include "meson_viu.h"
+#include "meson_osd_afbcd.h"
 
 /* OSD_SCI_WH_M1 */
 #define SCI_WH_M1_W(w)			FIELD_PREP(GENMASK(28, 16), w)
@@ -92,12 +93,38 @@  static int meson_plane_atomic_check(struct drm_plane *plane,
 						   false, true);
 }
 
+#define MESON_MOD_AFBC_VALID_BITS (AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |	\
+				   AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |	\
+				   AFBC_FORMAT_MOD_YTR |		\
+				   AFBC_FORMAT_MOD_SPARSE |		\
+				   AFBC_FORMAT_MOD_SPLIT)
+
 /* Takes a fixed 16.16 number and converts it to integer. */
 static inline int64_t fixed16_to_int(int64_t value)
 {
 	return value >> 16;
 }
 
+static u32 meson_g12a_afbcd_line_stride(struct meson_drm *priv)
+{
+	u32 line_stride = 0;
+
+	switch (priv->afbcd.format) {
+	case DRM_FORMAT_RGB565:
+		line_stride = ((priv->viu.osd1_width << 4) + 127) >> 7;
+		break;
+	case DRM_FORMAT_RGB888:
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_ARGB8888:
+	case DRM_FORMAT_XBGR8888:
+	case DRM_FORMAT_ABGR8888:
+		line_stride = ((priv->viu.osd1_width << 5) + 127) >> 7;
+		break;
+	}
+
+	return ((line_stride + 1) >> 1) << 1;
+}
+
 static void meson_plane_atomic_update(struct drm_plane *plane,
 				      struct drm_plane_state *old_state)
 {
@@ -126,57 +153,88 @@  static void meson_plane_atomic_update(struct drm_plane *plane,
 	 */
 	spin_lock_irqsave(&priv->drm->event_lock, flags);
 
+	/* Check if AFBC decoder is required for this buffer */
+	if ((meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) ||
+	     meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) &&
+	    fb->modifier & DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
+		priv->viu.osd1_afbcd = true;
+	else
+		priv->viu.osd1_afbcd = false;
+
 	/* Enable OSD and BLK0, set max global alpha */
 	priv->viu.osd1_ctrl_stat = OSD_ENABLE |
 				   (0xFF << OSD_GLOBAL_ALPHA_SHIFT) |
 				   OSD_BLK0_ENABLE;
 
+	priv->viu.osd1_ctrl_stat2 = readl(priv->io_base +
+					  _REG(VIU_OSD1_CTRL_STAT2));
+
 	canvas_id_osd1 = priv->canvas_id_osd1;
 
 	/* Set up BLK0 to point to the right canvas */
-	priv->viu.osd1_blk0_cfg[0] = ((canvas_id_osd1 << OSD_CANVAS_SEL) |
-				      OSD_ENDIANNESS_LE);
+	priv->viu.osd1_blk0_cfg[0] = canvas_id_osd1 << OSD_CANVAS_SEL;
+
+	if (priv->viu.osd1_afbcd) {
+		if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) {
+			/* This is the internal decoding memory address */
+			priv->viu.osd1_blk1_cfg4 = MESON_G12A_AFBCD_OUT_ADDR;
+			priv->viu.osd1_blk0_cfg[0] |= OSD_ENDIANNESS_BE;
+			priv->viu.osd1_ctrl_stat2 |= OSD_PENDING_STAT_CLEAN;
+		}
+
+		if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM)) {
+			priv->viu.osd1_blk0_cfg[0] |= OSD_ENDIANNESS_LE;
+			priv->viu.osd1_ctrl_stat2 |= OSD_DPATH_MALI_AFBCD;
+		}
+	} else {
+		priv->viu.osd1_blk0_cfg[0] |= OSD_ENDIANNESS_LE;
+
+		if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
+			priv->viu.osd1_ctrl_stat2 &= ~OSD_DPATH_MALI_AFBCD;
+	}
 
 	/* On GXBB, Use the old non-HDR RGB2YUV converter */
 	if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXBB))
 		priv->viu.osd1_blk0_cfg[0] |= OSD_OUTPUT_COLOR_RGB;
 
+	if (priv->viu.osd1_afbcd &&
+	    meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) {
+		priv->viu.osd1_blk0_cfg[0] |= OSD_MALI_SRC_EN |
+			priv->afbcd.ops->fmt_to_blk_mode(fb->modifier,
+							  fb->format->format);
+	} else {
+		switch (fb->format->format) {
+		case DRM_FORMAT_XRGB8888:
+		case DRM_FORMAT_ARGB8888:
+			priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
+						OSD_COLOR_MATRIX_32_ARGB;
+			break;
+		case DRM_FORMAT_XBGR8888:
+		case DRM_FORMAT_ABGR8888:
+			priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
+						OSD_COLOR_MATRIX_32_ABGR;
+			break;
+		case DRM_FORMAT_RGB888:
+			priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_24 |
+						OSD_COLOR_MATRIX_24_RGB;
+			break;
+		case DRM_FORMAT_RGB565:
+			priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_16 |
+						OSD_COLOR_MATRIX_16_RGB565;
+			break;
+		};
+	}
+
 	switch (fb->format->format) {
 	case DRM_FORMAT_XRGB8888:
-		/* For XRGB, replace the pixel's alpha by 0xFF */
-		writel_bits_relaxed(OSD_REPLACE_EN, OSD_REPLACE_EN,
-				    priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
-		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
-					      OSD_COLOR_MATRIX_32_ARGB;
-		break;
 	case DRM_FORMAT_XBGR8888:
 		/* For XRGB, replace the pixel's alpha by 0xFF */
-		writel_bits_relaxed(OSD_REPLACE_EN, OSD_REPLACE_EN,
-				    priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
-		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
-					      OSD_COLOR_MATRIX_32_ABGR;
+		priv->viu.osd1_ctrl_stat2 |= OSD_REPLACE_EN;
 		break;
 	case DRM_FORMAT_ARGB8888:
-		/* For ARGB, use the pixel's alpha */
-		writel_bits_relaxed(OSD_REPLACE_EN, 0,
-				    priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
-		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
-					      OSD_COLOR_MATRIX_32_ARGB;
-		break;
 	case DRM_FORMAT_ABGR8888:
 		/* For ARGB, use the pixel's alpha */
-		writel_bits_relaxed(OSD_REPLACE_EN, 0,
-				    priv->io_base + _REG(VIU_OSD1_CTRL_STAT2));
-		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_32 |
-					      OSD_COLOR_MATRIX_32_ABGR;
-		break;
-	case DRM_FORMAT_RGB888:
-		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_24 |
-					      OSD_COLOR_MATRIX_24_RGB;
-		break;
-	case DRM_FORMAT_RGB565:
-		priv->viu.osd1_blk0_cfg[0] |= OSD_BLK_MODE_16 |
-					      OSD_COLOR_MATRIX_16_RGB565;
+		priv->viu.osd1_ctrl_stat2 &= ~OSD_REPLACE_EN;
 		break;
 	};
 
@@ -307,6 +365,16 @@  static void meson_plane_atomic_update(struct drm_plane *plane,
 	priv->viu.osd1_height = fb->height;
 	priv->viu.osd1_width = fb->width;
 
+	if (priv->viu.osd1_afbcd) {
+		priv->afbcd.modifier = fb->modifier;
+		priv->afbcd.format = fb->format->format;
+
+		/* Calculate decoder write stride */
+		if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
+			priv->viu.osd1_blk2_cfg4 =
+				meson_g12a_afbcd_line_stride(priv);
+	}
+
 	if (!meson_plane->enabled) {
 		/* Reset OSD1 before enabling it on GXL+ SoCs */
 		if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) ||
@@ -346,6 +414,42 @@  static const struct drm_plane_helper_funcs meson_plane_helper_funcs = {
 	.prepare_fb	= drm_gem_fb_prepare_fb,
 };
 
+static bool meson_plane_format_mod_supported(struct drm_plane *plane,
+					     u32 format, u64 modifier)
+{
+	struct meson_plane *meson_plane = to_meson_plane(plane);
+	struct meson_drm *priv = meson_plane->priv;
+	int i;
+
+	if (modifier == DRM_FORMAT_MOD_INVALID)
+		return false;
+
+	if (modifier == DRM_FORMAT_MOD_LINEAR)
+		return true;
+
+	if (!meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) &&
+	    !meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
+		return false;
+
+	if (modifier & ~DRM_FORMAT_MOD_ARM_AFBC(MESON_MOD_AFBC_VALID_BITS))
+		return false;
+
+	for (i = 0 ; i < plane->modifier_count ; ++i)
+		if (plane->modifiers[i] == modifier)
+			break;
+
+	if (i == plane->modifier_count) {
+		DRM_DEBUG_KMS("Unsupported modifier\n");
+		return false;
+	}
+
+	if (priv->afbcd.ops && priv->afbcd.ops->supported_fmt)
+		return priv->afbcd.ops->supported_fmt(modifier, format);
+
+	DRM_DEBUG_KMS("AFBC Unsupported\n");
+	return false;
+}
+
 static const struct drm_plane_funcs meson_plane_funcs = {
 	.update_plane		= drm_atomic_helper_update_plane,
 	.disable_plane		= drm_atomic_helper_disable_plane,
@@ -353,6 +457,7 @@  static const struct drm_plane_funcs meson_plane_funcs = {
 	.reset			= drm_atomic_helper_plane_reset,
 	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
 	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
+	.format_mod_supported   = meson_plane_format_mod_supported,
 };
 
 static const uint32_t supported_drm_formats[] = {
@@ -364,10 +469,53 @@  static const uint32_t supported_drm_formats[] = {
 	DRM_FORMAT_RGB565,
 };
 
+static const uint64_t format_modifiers_afbc_gxm[] = {
+	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
+				AFBC_FORMAT_MOD_SPARSE |
+				AFBC_FORMAT_MOD_YTR),
+	/* SPLIT mandates SPARSE, RGB modes mandates YTR */
+	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
+				AFBC_FORMAT_MOD_YTR |
+				AFBC_FORMAT_MOD_SPARSE |
+				AFBC_FORMAT_MOD_SPLIT),
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID,
+};
+
+static const uint64_t format_modifiers_afbc_g12a[] = {
+	/*
+	 * - TOFIX Support AFBC modifiers for YUV formats (16x16 + TILED)
+	 * - AFBC_FORMAT_MOD_YTR is mandatory since we only support RGB
+	 * - SPLIT is mandatory for performances reasons when in 16x16
+	 *   block size
+	 * - 32x8 block size + SPLIT is mandatory with 4K frame size
+	 *   for performances reasons
+	 */
+	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 |
+				AFBC_FORMAT_MOD_YTR |
+				AFBC_FORMAT_MOD_SPARSE |
+				AFBC_FORMAT_MOD_SPLIT),
+	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
+				AFBC_FORMAT_MOD_YTR |
+				AFBC_FORMAT_MOD_SPARSE),
+	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
+				AFBC_FORMAT_MOD_YTR |
+				AFBC_FORMAT_MOD_SPARSE |
+				AFBC_FORMAT_MOD_SPLIT),
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID,
+};
+
+static const uint64_t format_modifiers_default[] = {
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID,
+};
+
 int meson_plane_create(struct meson_drm *priv)
 {
 	struct meson_plane *meson_plane;
 	struct drm_plane *plane;
+	const uint64_t *format_modifiers = format_modifiers_default;
 
 	meson_plane = devm_kzalloc(priv->drm->dev, sizeof(*meson_plane),
 				   GFP_KERNEL);
@@ -377,11 +525,16 @@  int meson_plane_create(struct meson_drm *priv)
 	meson_plane->priv = priv;
 	plane = &meson_plane->base;
 
+	if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM))
+		format_modifiers = format_modifiers_afbc_gxm;
+	else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
+		format_modifiers = format_modifiers_afbc_g12a;
+
 	drm_universal_plane_init(priv->drm, plane, 0xFF,
 				 &meson_plane_funcs,
 				 supported_drm_formats,
 				 ARRAY_SIZE(supported_drm_formats),
-				 NULL,
+				 format_modifiers,
 				 DRM_PLANE_TYPE_PRIMARY, "meson_primary_plane");
 
 	drm_plane_helper_add(plane, &meson_plane_helper_funcs);