diff mbox series

[RESEND,v2] drm/mediatek: Add AFBC support to Mediatek DRM driver

Message ID 20221010150157.1864492-1-greenjustin@google.com (mailing list archive)
State New, archived
Headers show
Series [RESEND,v2] drm/mediatek: Add AFBC support to Mediatek DRM driver | expand

Commit Message

Justin Green Oct. 10, 2022, 3:01 p.m. UTC
From: Justin Green <greenjustin@chromium.org>

Add AFBC support to Mediatek DRM driver and enable on MT8195.

Tested on MT8195 and confirmed both correct video output and improved DRAM
bandwidth performance.

v2:
Marked mtk_ovl_set_afbc as static, reflowed some lines to fit column
limit.

Signed-off-by: Justin Green <greenjustin@chromium.org>
---
 drivers/gpu/drm/mediatek/mtk_disp_ovl.c  | 108 ++++++++++++++++++++---
 drivers/gpu/drm/mediatek/mtk_drm_plane.c |  37 +++++++-
 drivers/gpu/drm/mediatek/mtk_drm_plane.h |   8 ++
 3 files changed, 140 insertions(+), 13 deletions(-)

Comments

AngeloGioacchino Del Regno Oct. 11, 2022, 9:09 a.m. UTC | #1
Il 10/10/22 17:01, Justin Green ha scritto:
> From: Justin Green <greenjustin@chromium.org>
> 
> Add AFBC support to Mediatek DRM driver and enable on MT8195.
> 
> Tested on MT8195 and confirmed both correct video output and improved DRAM
> bandwidth performance.
> 
> v2:
> Marked mtk_ovl_set_afbc as static, reflowed some lines to fit column
> limit.
> 
> Signed-off-by: Justin Green <greenjustin@chromium.org>

Hello Justin,
thanks for the patch!

However, there's something to improve...

> ---
>   drivers/gpu/drm/mediatek/mtk_disp_ovl.c  | 108 ++++++++++++++++++++---
>   drivers/gpu/drm/mediatek/mtk_drm_plane.c |  37 +++++++-
>   drivers/gpu/drm/mediatek/mtk_drm_plane.h |   8 ++
>   3 files changed, 140 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
> index 002b0f6cae1a..1724ea85a840 100644
> --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
> +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c

..snip..

> @@ -208,6 +231,8 @@ int mtk_ovl_layer_check(struct device *dev, unsigned int idx,
>   {
>   	struct drm_plane_state *state = &mtk_state->base;
>   	unsigned int rotation = 0;
> +	unsigned long long modifier;
> +	unsigned int fourcc;
>   
>   	rotation = drm_rotation_simplify(state->rotation,
>   					 DRM_MODE_ROTATE_0 |
> @@ -226,6 +251,30 @@ int mtk_ovl_layer_check(struct device *dev, unsigned int idx,
>   	if (state->fb->format->is_yuv && rotation != 0)
>   		return -EINVAL;
>   
> +	if (state->fb->modifier) {
> +		struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);

Since you're introducing modifier and fourcc for this branch only, you
may as well just declare them here instead, but either way is fine.

> +
> +		if (!ovl->data->supports_afbc)
> +			return -EINVAL;
> +
> +		modifier = state->fb->modifier;
> +
> +		if (modifier != DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> +							AFBC_FORMAT_MOD_SPLIT |
> +							AFBC_FORMAT_MOD_SPARSE))
> +			return -EINVAL;
> +
> +		fourcc = state->fb->format->format;
> +		if (fourcc != DRM_FORMAT_BGRA8888 &&
> +		    fourcc != DRM_FORMAT_ABGR8888 &&
> +		    fourcc != DRM_FORMAT_ARGB8888 &&
> +		    fourcc != DRM_FORMAT_XRGB8888 &&
> +		    fourcc != DRM_FORMAT_XBGR8888 &&
> +		    fourcc != DRM_FORMAT_RGB888 &&
> +		    fourcc != DRM_FORMAT_BGR888)
> +			return -EINVAL;
> +	}
> +
>   	state->rotation = rotation;
>   
>   	return 0;
> @@ -310,11 +359,14 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
>   	struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
>   	struct mtk_plane_pending_state *pending = &state->pending;
>   	unsigned int addr = pending->addr;
> -	unsigned int pitch = pending->pitch & 0xffff;
> +	unsigned int hdr_addr = pending->hdr_addr;
> +	unsigned int pitch = pending->pitch;
> +	unsigned int hdr_pitch = pending->hdr_pitch;
>   	unsigned int fmt = pending->format;
>   	unsigned int offset = (pending->y << 16) | pending->x;
>   	unsigned int src_size = (pending->height << 16) | pending->width;
>   	unsigned int con;
> +	bool is_afbc = pending->modifier;
>   
>   	if (!pending->enable) {
>   		mtk_ovl_layer_off(dev, idx, cmdq_pkt);
> @@ -335,16 +387,39 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
>   		addr += pending->pitch - 1;
>   	}
>   
> -	mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
> -			      DISP_REG_OVL_CON(idx));
> -	mtk_ddp_write_relaxed(cmdq_pkt, pitch, &ovl->cmdq_reg, ovl->regs,
> -			      DISP_REG_OVL_PITCH(idx));
> -	mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
> -			      DISP_REG_OVL_SRC_SIZE(idx));
> -	mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
> -			      DISP_REG_OVL_OFFSET(idx));
> -	mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
> -			      DISP_REG_OVL_ADDR(ovl, idx));
> +	mtk_ovl_set_afbc(dev, cmdq_pkt, idx, is_afbc);
> +	if (!is_afbc) {
> +		mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_CON(idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, pitch & 0xFFFF, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_PITCH(idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_SRC_SIZE(idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_OFFSET(idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_ADDR(ovl, idx));
> +	} else {
> +		mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_ADDR(ovl, idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_HDR_ADDR(ovl, idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_SRC_SIZE(idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt,
> +				      OVL_PITCH_MSB_2ND_SUBBUF | ((pitch >> 16) & 0xFFFF),

I say that this would be more readable if you use bitfield macros instead but,
anyway, that magic "16" number must get a definition.
I have the same comment about the GENMASK(15, 0) (== 0xffff), but I know that
doesn't come from you... would still be nice to also add a definition, which
is anyway "practically mandatory" if you use FIELD_PREP(mask, val).

> +				      &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, pitch & 0xFFFF, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_PITCH(idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_HDR_PITCH(ovl, idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_CON(idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_OFFSET(idx));
> +		mtk_ddp_write_relaxed(cmdq_pkt, 0, &ovl->cmdq_reg, ovl->regs,
> +				      DISP_REG_OVL_CLIP(idx));
> +	}

In any case - are you use that we *must* program the registers in this exact
sequence?

Here, the OVL layer is supposed to be OFF (OVL_SRC_CON == 0, OVL_RDMA_CTRL == 0)
so the contents of the registers that you're modifying should not matter at all
until the layer is turned on.

Note that, in case it doesn't matter, this gets greatly simplified, exactly as:

at the top -- after DISP_REG_OVL_PITCH_MSB(n):
#define OVL_SRC_PITCH_MSB	GENMASK(3, 0)

after DISP_REG_OVL_PITCH(n):
#define OVL_SRC_PITCH_LSB	GENMASK(15, 0)

	mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
			      DISP_REG_OVL_CON(idx));
	mtk_ddp_write_relaxed(cmdq_pkt, FIELD_PREP(OVL_SRC_PITCH_LSB, pitch),
			      &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH(idx));
	mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
			      DISP_REG_OVL_SRC_SIZE(idx));
	mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
			      DISP_REG_OVL_OFFSET(idx));
	mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
			      DISP_REG_OVL_ADDR(ovl, idx));

	if (is_afbc) {
		pitch_msb = FIELD_PREP(OVL_SRC_PITCH_MSB, (pitch >> 16));
		pitch_msb |= FIELD_PREP(OVL_PITCH_MSB_2ND_SUBBUF, 1);
		mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs,
				      DISP_REG_OVL_HDR_ADDR(ovl, idx));
		mtk_ddp_write_relaxed(cmdq_pkt, val, &ovl->cmdq_reg, ovl->regs,
				      DISP_REG_OVL_PITCH_MSB(idx));
		mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs,
				      DISP_REG_OVL_HDR_PITCH(ovl, idx));
		mtk_ddp_write_relaxed(cmdq_pkt, 0, &ovl->cmdq_reg, ovl->regs,
				      DISP_REG_OVL_CLIP(idx));
	}

>   
>   	mtk_ovl_layer_on(dev, idx, cmdq_pkt);
>   }
> @@ -492,6 +567,15 @@ static const struct mtk_disp_ovl_data mt8192_ovl_2l_driver_data = {

..snip..

> diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
> index 5c0d9ce69931..734d2554b2b8 100644
> --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
> +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
> @@ -12,6 +12,7 @@
>   #include <drm/drm_framebuffer.h>
>   #include <drm/drm_gem_atomic_helper.h>
>   #include <drm/drm_plane_helper.h>
> +#include <linux/align.h>
>   
>   #include "mtk_drm_crtc.h"
>   #include "mtk_drm_ddp_comp.h"
> @@ -52,6 +53,7 @@ static void mtk_plane_reset(struct drm_plane *plane)
>   
>   	state->base.plane = plane;
>   	state->pending.format = DRM_FORMAT_RGB565;
> +	state->pending.modifier = 0;
>   }
>   
>   static struct drm_plane_state *mtk_plane_duplicate_state(struct drm_plane *plane)
> @@ -120,21 +122,52 @@ static void mtk_plane_update_new_state(struct drm_plane_state *new_state,
>   	struct drm_gem_object *gem;
>   	struct mtk_drm_gem_obj *mtk_gem;
>   	unsigned int pitch, format;
> +	unsigned long long modifier;
>   	dma_addr_t addr;
> +	dma_addr_t hdr_addr = 0;
> +	unsigned int hdr_pitch = 0;
>   
>   	gem = fb->obj[0];
>   	mtk_gem = to_mtk_gem_obj(gem);
>   	addr = mtk_gem->dma_addr;
>   	pitch = fb->pitches[0];
>   	format = fb->format->format;
> +	modifier = fb->modifier;
>   
> -	addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
> -	addr += (new_state->src.y1 >> 16) * pitch;
> +	if (!modifier) {
> +		addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
> +		addr += (new_state->src.y1 >> 16) * pitch;
> +	} else {
> +		int width_in_blocks = ALIGN(fb->width, AFBC_DATA_BLOCK_WIDTH)
> +				      / AFBC_DATA_BLOCK_WIDTH;
> +		int height_in_blocks = ALIGN(fb->height, AFBC_DATA_BLOCK_HEIGHT)
> +				       / AFBC_DATA_BLOCK_HEIGHT;
> +		int x_offset_in_blocks = (new_state->src.x1 >> 16) / AFBC_DATA_BLOCK_WIDTH;
> +		int y_offset_in_blocks = (new_state->src.y1 >> 16) / AFBC_DATA_BLOCK_HEIGHT;
> +		int hdr_size;
> +
> +		hdr_pitch = width_in_blocks * AFBC_HEADER_BLOCK_SIZE;
> +		pitch = width_in_blocks * AFBC_DATA_BLOCK_WIDTH *
> +			AFBC_DATA_BLOCK_HEIGHT * fb->format->cpp[0];
> +
> +		hdr_size = ALIGN(hdr_pitch * height_in_blocks, AFBC_HEADER_ALIGNMENT);
> +
> +		hdr_addr = addr + hdr_pitch * y_offset_in_blocks +
> +			   AFBC_HEADER_BLOCK_SIZE * x_offset_in_blocks;
> +		// The data plane is offset by 1 additional block.

C-style comments please.

> +		addr = addr + hdr_size +
> +		       pitch * y_offset_in_blocks +
> +		       AFBC_DATA_BLOCK_WIDTH * AFBC_DATA_BLOCK_HEIGHT *
> +		       fb->format->cpp[0] * (x_offset_in_blocks + 1);
> +	}
>   

Regards,
Angelo
Justin Green Oct. 11, 2022, 6:28 p.m. UTC | #2
Hi Angelo,
Thanks for the suggestions! I'll upload another patch with those changes.

Re the pitch register math, would it be acceptable to define separate
macros for the LSB and MSB to abstract away the magic numbers? For
example:
#define OVL_PITCH_MSB(n)                        ((n >> 16) & GENMASK(15, 0))
#define OVL_PITCH_LSB(n)                        (n & GENMASK(15, 0))

Regards,
Justin

On Tue, Oct 11, 2022 at 5:09 AM AngeloGioacchino Del Regno
<angelogioacchino.delregno@collabora.com> wrote:
>
> Il 10/10/22 17:01, Justin Green ha scritto:
> > From: Justin Green <greenjustin@chromium.org>
> >
> > Add AFBC support to Mediatek DRM driver and enable on MT8195.
> >
> > Tested on MT8195 and confirmed both correct video output and improved DRAM
> > bandwidth performance.
> >
> > v2:
> > Marked mtk_ovl_set_afbc as static, reflowed some lines to fit column
> > limit.
> >
> > Signed-off-by: Justin Green <greenjustin@chromium.org>
>
> Hello Justin,
> thanks for the patch!
>
> However, there's something to improve...
>
> > ---
> >   drivers/gpu/drm/mediatek/mtk_disp_ovl.c  | 108 ++++++++++++++++++++---
> >   drivers/gpu/drm/mediatek/mtk_drm_plane.c |  37 +++++++-
> >   drivers/gpu/drm/mediatek/mtk_drm_plane.h |   8 ++
> >   3 files changed, 140 insertions(+), 13 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
> > index 002b0f6cae1a..1724ea85a840 100644
> > --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
> > +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
>
> ..snip..
>
> > @@ -208,6 +231,8 @@ int mtk_ovl_layer_check(struct device *dev, unsigned int idx,
> >   {
> >       struct drm_plane_state *state = &mtk_state->base;
> >       unsigned int rotation = 0;
> > +     unsigned long long modifier;
> > +     unsigned int fourcc;
> >
> >       rotation = drm_rotation_simplify(state->rotation,
> >                                        DRM_MODE_ROTATE_0 |
> > @@ -226,6 +251,30 @@ int mtk_ovl_layer_check(struct device *dev, unsigned int idx,
> >       if (state->fb->format->is_yuv && rotation != 0)
> >               return -EINVAL;
> >
> > +     if (state->fb->modifier) {
> > +             struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
>
> Since you're introducing modifier and fourcc for this branch only, you
> may as well just declare them here instead, but either way is fine.
>
> > +
> > +             if (!ovl->data->supports_afbc)
> > +                     return -EINVAL;
> > +
> > +             modifier = state->fb->modifier;
> > +
> > +             if (modifier != DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
> > +                                                     AFBC_FORMAT_MOD_SPLIT |
> > +                                                     AFBC_FORMAT_MOD_SPARSE))
> > +                     return -EINVAL;
> > +
> > +             fourcc = state->fb->format->format;
> > +             if (fourcc != DRM_FORMAT_BGRA8888 &&
> > +                 fourcc != DRM_FORMAT_ABGR8888 &&
> > +                 fourcc != DRM_FORMAT_ARGB8888 &&
> > +                 fourcc != DRM_FORMAT_XRGB8888 &&
> > +                 fourcc != DRM_FORMAT_XBGR8888 &&
> > +                 fourcc != DRM_FORMAT_RGB888 &&
> > +                 fourcc != DRM_FORMAT_BGR888)
> > +                     return -EINVAL;
> > +     }
> > +
> >       state->rotation = rotation;
> >
> >       return 0;
> > @@ -310,11 +359,14 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
> >       struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
> >       struct mtk_plane_pending_state *pending = &state->pending;
> >       unsigned int addr = pending->addr;
> > -     unsigned int pitch = pending->pitch & 0xffff;
> > +     unsigned int hdr_addr = pending->hdr_addr;
> > +     unsigned int pitch = pending->pitch;
> > +     unsigned int hdr_pitch = pending->hdr_pitch;
> >       unsigned int fmt = pending->format;
> >       unsigned int offset = (pending->y << 16) | pending->x;
> >       unsigned int src_size = (pending->height << 16) | pending->width;
> >       unsigned int con;
> > +     bool is_afbc = pending->modifier;
> >
> >       if (!pending->enable) {
> >               mtk_ovl_layer_off(dev, idx, cmdq_pkt);
> > @@ -335,16 +387,39 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
> >               addr += pending->pitch - 1;
> >       }
> >
> > -     mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
> > -                           DISP_REG_OVL_CON(idx));
> > -     mtk_ddp_write_relaxed(cmdq_pkt, pitch, &ovl->cmdq_reg, ovl->regs,
> > -                           DISP_REG_OVL_PITCH(idx));
> > -     mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
> > -                           DISP_REG_OVL_SRC_SIZE(idx));
> > -     mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
> > -                           DISP_REG_OVL_OFFSET(idx));
> > -     mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
> > -                           DISP_REG_OVL_ADDR(ovl, idx));
> > +     mtk_ovl_set_afbc(dev, cmdq_pkt, idx, is_afbc);
> > +     if (!is_afbc) {
> > +             mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_CON(idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, pitch & 0xFFFF, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_PITCH(idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_SRC_SIZE(idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_OFFSET(idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_ADDR(ovl, idx));
> > +     } else {
> > +             mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_ADDR(ovl, idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_HDR_ADDR(ovl, idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_SRC_SIZE(idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt,
> > +                                   OVL_PITCH_MSB_2ND_SUBBUF | ((pitch >> 16) & 0xFFFF),
>
> I say that this would be more readable if you use bitfield macros instead but,
> anyway, that magic "16" number must get a definition.
> I have the same comment about the GENMASK(15, 0) (== 0xffff), but I know that
> doesn't come from you... would still be nice to also add a definition, which
> is anyway "practically mandatory" if you use FIELD_PREP(mask, val).
>
> > +                                   &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, pitch & 0xFFFF, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_PITCH(idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_HDR_PITCH(ovl, idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_CON(idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_OFFSET(idx));
> > +             mtk_ddp_write_relaxed(cmdq_pkt, 0, &ovl->cmdq_reg, ovl->regs,
> > +                                   DISP_REG_OVL_CLIP(idx));
> > +     }
>
> In any case - are you use that we *must* program the registers in this exact
> sequence?
>
> Here, the OVL layer is supposed to be OFF (OVL_SRC_CON == 0, OVL_RDMA_CTRL == 0)
> so the contents of the registers that you're modifying should not matter at all
> until the layer is turned on.
>
> Note that, in case it doesn't matter, this gets greatly simplified, exactly as:
>
> at the top -- after DISP_REG_OVL_PITCH_MSB(n):
> #define OVL_SRC_PITCH_MSB       GENMASK(3, 0)
>
> after DISP_REG_OVL_PITCH(n):
> #define OVL_SRC_PITCH_LSB       GENMASK(15, 0)
>
>         mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
>                               DISP_REG_OVL_CON(idx));
>         mtk_ddp_write_relaxed(cmdq_pkt, FIELD_PREP(OVL_SRC_PITCH_LSB, pitch),
>                               &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH(idx));
>         mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
>                               DISP_REG_OVL_SRC_SIZE(idx));
>         mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
>                               DISP_REG_OVL_OFFSET(idx));
>         mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
>                               DISP_REG_OVL_ADDR(ovl, idx));
>
>         if (is_afbc) {
>                 pitch_msb = FIELD_PREP(OVL_SRC_PITCH_MSB, (pitch >> 16));
>                 pitch_msb |= FIELD_PREP(OVL_PITCH_MSB_2ND_SUBBUF, 1);
>                 mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs,
>                                       DISP_REG_OVL_HDR_ADDR(ovl, idx));
>                 mtk_ddp_write_relaxed(cmdq_pkt, val, &ovl->cmdq_reg, ovl->regs,
>                                       DISP_REG_OVL_PITCH_MSB(idx));
>                 mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs,
>                                       DISP_REG_OVL_HDR_PITCH(ovl, idx));
>                 mtk_ddp_write_relaxed(cmdq_pkt, 0, &ovl->cmdq_reg, ovl->regs,
>                                       DISP_REG_OVL_CLIP(idx));
>         }
>
> >
> >       mtk_ovl_layer_on(dev, idx, cmdq_pkt);
> >   }
> > @@ -492,6 +567,15 @@ static const struct mtk_disp_ovl_data mt8192_ovl_2l_driver_data = {
>
> ..snip..
>
> > diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
> > index 5c0d9ce69931..734d2554b2b8 100644
> > --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
> > +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
> > @@ -12,6 +12,7 @@
> >   #include <drm/drm_framebuffer.h>
> >   #include <drm/drm_gem_atomic_helper.h>
> >   #include <drm/drm_plane_helper.h>
> > +#include <linux/align.h>
> >
> >   #include "mtk_drm_crtc.h"
> >   #include "mtk_drm_ddp_comp.h"
> > @@ -52,6 +53,7 @@ static void mtk_plane_reset(struct drm_plane *plane)
> >
> >       state->base.plane = plane;
> >       state->pending.format = DRM_FORMAT_RGB565;
> > +     state->pending.modifier = 0;
> >   }
> >
> >   static struct drm_plane_state *mtk_plane_duplicate_state(struct drm_plane *plane)
> > @@ -120,21 +122,52 @@ static void mtk_plane_update_new_state(struct drm_plane_state *new_state,
> >       struct drm_gem_object *gem;
> >       struct mtk_drm_gem_obj *mtk_gem;
> >       unsigned int pitch, format;
> > +     unsigned long long modifier;
> >       dma_addr_t addr;
> > +     dma_addr_t hdr_addr = 0;
> > +     unsigned int hdr_pitch = 0;
> >
> >       gem = fb->obj[0];
> >       mtk_gem = to_mtk_gem_obj(gem);
> >       addr = mtk_gem->dma_addr;
> >       pitch = fb->pitches[0];
> >       format = fb->format->format;
> > +     modifier = fb->modifier;
> >
> > -     addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
> > -     addr += (new_state->src.y1 >> 16) * pitch;
> > +     if (!modifier) {
> > +             addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
> > +             addr += (new_state->src.y1 >> 16) * pitch;
> > +     } else {
> > +             int width_in_blocks = ALIGN(fb->width, AFBC_DATA_BLOCK_WIDTH)
> > +                                   / AFBC_DATA_BLOCK_WIDTH;
> > +             int height_in_blocks = ALIGN(fb->height, AFBC_DATA_BLOCK_HEIGHT)
> > +                                    / AFBC_DATA_BLOCK_HEIGHT;
> > +             int x_offset_in_blocks = (new_state->src.x1 >> 16) / AFBC_DATA_BLOCK_WIDTH;
> > +             int y_offset_in_blocks = (new_state->src.y1 >> 16) / AFBC_DATA_BLOCK_HEIGHT;
> > +             int hdr_size;
> > +
> > +             hdr_pitch = width_in_blocks * AFBC_HEADER_BLOCK_SIZE;
> > +             pitch = width_in_blocks * AFBC_DATA_BLOCK_WIDTH *
> > +                     AFBC_DATA_BLOCK_HEIGHT * fb->format->cpp[0];
> > +
> > +             hdr_size = ALIGN(hdr_pitch * height_in_blocks, AFBC_HEADER_ALIGNMENT);
> > +
> > +             hdr_addr = addr + hdr_pitch * y_offset_in_blocks +
> > +                        AFBC_HEADER_BLOCK_SIZE * x_offset_in_blocks;
> > +             // The data plane is offset by 1 additional block.
>
> C-style comments please.
>
> > +             addr = addr + hdr_size +
> > +                    pitch * y_offset_in_blocks +
> > +                    AFBC_DATA_BLOCK_WIDTH * AFBC_DATA_BLOCK_HEIGHT *
> > +                    fb->format->cpp[0] * (x_offset_in_blocks + 1);
> > +     }
> >
>
> Regards,
> Angelo
>
AngeloGioacchino Del Regno Oct. 12, 2022, 8:25 a.m. UTC | #3
Il 11/10/22 20:28, Justin Green ha scritto:
> Hi Angelo,
> Thanks for the suggestions! I'll upload another patch with those changes.
> 
> Re the pitch register math, would it be acceptable to define separate
> macros for the LSB and MSB to abstract away the magic numbers? For
> example:
> #define OVL_PITCH_MSB(n)                        ((n >> 16) & GENMASK(15, 0))
> #define OVL_PITCH_LSB(n)                        (n & GENMASK(15, 0))
> 

These would be different from the macros that are available in bitfield.h, but
not *fundamentally* different, so these would look a little redundant...

I think that you refer to that `pitch` variable that's coming from the DRM(/fb)
API... and bitfield macros are for register access... so I guess that one clean
way of avoiding the magic shifting (that is purely used to split the 32-bits
number in two 16-bits 'chunks') would be to perhaps use a union, so that you
will have something like u.pitch_lsb, u.pitch_msb (with lsb/msb being two u16).

That'd better represent what is going on here, I believe?

Regards,
Angelo

> Regards,
> Justin
> 
> On Tue, Oct 11, 2022 at 5:09 AM AngeloGioacchino Del Regno
> <angelogioacchino.delregno@collabora.com> wrote:
>>
>> Il 10/10/22 17:01, Justin Green ha scritto:
>>> From: Justin Green <greenjustin@chromium.org>
>>>
>>> Add AFBC support to Mediatek DRM driver and enable on MT8195.
>>>
>>> Tested on MT8195 and confirmed both correct video output and improved DRAM
>>> bandwidth performance.
>>>
>>> v2:
>>> Marked mtk_ovl_set_afbc as static, reflowed some lines to fit column
>>> limit.
>>>
>>> Signed-off-by: Justin Green <greenjustin@chromium.org>
>>
>> Hello Justin,
>> thanks for the patch!
>>
>> However, there's something to improve...
>>
>>> ---
>>>    drivers/gpu/drm/mediatek/mtk_disp_ovl.c  | 108 ++++++++++++++++++++---
>>>    drivers/gpu/drm/mediatek/mtk_drm_plane.c |  37 +++++++-
>>>    drivers/gpu/drm/mediatek/mtk_drm_plane.h |   8 ++
>>>    3 files changed, 140 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
>>> index 002b0f6cae1a..1724ea85a840 100644
>>> --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
>>> +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
>>
>> ..snip..
>>
>>> @@ -208,6 +231,8 @@ int mtk_ovl_layer_check(struct device *dev, unsigned int idx,
>>>    {
>>>        struct drm_plane_state *state = &mtk_state->base;
>>>        unsigned int rotation = 0;
>>> +     unsigned long long modifier;
>>> +     unsigned int fourcc;
>>>
>>>        rotation = drm_rotation_simplify(state->rotation,
>>>                                         DRM_MODE_ROTATE_0 |
>>> @@ -226,6 +251,30 @@ int mtk_ovl_layer_check(struct device *dev, unsigned int idx,
>>>        if (state->fb->format->is_yuv && rotation != 0)
>>>                return -EINVAL;
>>>
>>> +     if (state->fb->modifier) {
>>> +             struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
>>
>> Since you're introducing modifier and fourcc for this branch only, you
>> may as well just declare them here instead, but either way is fine.
>>
>>> +
>>> +             if (!ovl->data->supports_afbc)
>>> +                     return -EINVAL;
>>> +
>>> +             modifier = state->fb->modifier;
>>> +
>>> +             if (modifier != DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
>>> +                                                     AFBC_FORMAT_MOD_SPLIT |
>>> +                                                     AFBC_FORMAT_MOD_SPARSE))
>>> +                     return -EINVAL;
>>> +
>>> +             fourcc = state->fb->format->format;
>>> +             if (fourcc != DRM_FORMAT_BGRA8888 &&
>>> +                 fourcc != DRM_FORMAT_ABGR8888 &&
>>> +                 fourcc != DRM_FORMAT_ARGB8888 &&
>>> +                 fourcc != DRM_FORMAT_XRGB8888 &&
>>> +                 fourcc != DRM_FORMAT_XBGR8888 &&
>>> +                 fourcc != DRM_FORMAT_RGB888 &&
>>> +                 fourcc != DRM_FORMAT_BGR888)
>>> +                     return -EINVAL;
>>> +     }
>>> +
>>>        state->rotation = rotation;
>>>
>>>        return 0;
>>> @@ -310,11 +359,14 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
>>>        struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
>>>        struct mtk_plane_pending_state *pending = &state->pending;
>>>        unsigned int addr = pending->addr;
>>> -     unsigned int pitch = pending->pitch & 0xffff;
>>> +     unsigned int hdr_addr = pending->hdr_addr;
>>> +     unsigned int pitch = pending->pitch;
>>> +     unsigned int hdr_pitch = pending->hdr_pitch;
>>>        unsigned int fmt = pending->format;
>>>        unsigned int offset = (pending->y << 16) | pending->x;
>>>        unsigned int src_size = (pending->height << 16) | pending->width;
>>>        unsigned int con;
>>> +     bool is_afbc = pending->modifier;
>>>
>>>        if (!pending->enable) {
>>>                mtk_ovl_layer_off(dev, idx, cmdq_pkt);
>>> @@ -335,16 +387,39 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
>>>                addr += pending->pitch - 1;
>>>        }
>>>
>>> -     mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
>>> -                           DISP_REG_OVL_CON(idx));
>>> -     mtk_ddp_write_relaxed(cmdq_pkt, pitch, &ovl->cmdq_reg, ovl->regs,
>>> -                           DISP_REG_OVL_PITCH(idx));
>>> -     mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
>>> -                           DISP_REG_OVL_SRC_SIZE(idx));
>>> -     mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
>>> -                           DISP_REG_OVL_OFFSET(idx));
>>> -     mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
>>> -                           DISP_REG_OVL_ADDR(ovl, idx));
>>> +     mtk_ovl_set_afbc(dev, cmdq_pkt, idx, is_afbc);
>>> +     if (!is_afbc) {
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_CON(idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, pitch & 0xFFFF, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_PITCH(idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_SRC_SIZE(idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_OFFSET(idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_ADDR(ovl, idx));
>>> +     } else {
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_ADDR(ovl, idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_HDR_ADDR(ovl, idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_SRC_SIZE(idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt,
>>> +                                   OVL_PITCH_MSB_2ND_SUBBUF | ((pitch >> 16) & 0xFFFF),
>>
>> I say that this would be more readable if you use bitfield macros instead but,
>> anyway, that magic "16" number must get a definition.
>> I have the same comment about the GENMASK(15, 0) (== 0xffff), but I know that
>> doesn't come from you... would still be nice to also add a definition, which
>> is anyway "practically mandatory" if you use FIELD_PREP(mask, val).
>>
>>> +                                   &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, pitch & 0xFFFF, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_PITCH(idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_HDR_PITCH(ovl, idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_CON(idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_OFFSET(idx));
>>> +             mtk_ddp_write_relaxed(cmdq_pkt, 0, &ovl->cmdq_reg, ovl->regs,
>>> +                                   DISP_REG_OVL_CLIP(idx));
>>> +     }
>>
>> In any case - are you use that we *must* program the registers in this exact
>> sequence?
>>
>> Here, the OVL layer is supposed to be OFF (OVL_SRC_CON == 0, OVL_RDMA_CTRL == 0)
>> so the contents of the registers that you're modifying should not matter at all
>> until the layer is turned on.
>>
>> Note that, in case it doesn't matter, this gets greatly simplified, exactly as:
>>
>> at the top -- after DISP_REG_OVL_PITCH_MSB(n):
>> #define OVL_SRC_PITCH_MSB       GENMASK(3, 0)
>>
>> after DISP_REG_OVL_PITCH(n):
>> #define OVL_SRC_PITCH_LSB       GENMASK(15, 0)
>>
>>          mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
>>                                DISP_REG_OVL_CON(idx));
>>          mtk_ddp_write_relaxed(cmdq_pkt, FIELD_PREP(OVL_SRC_PITCH_LSB, pitch),
>>                                &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH(idx));
>>          mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
>>                                DISP_REG_OVL_SRC_SIZE(idx));
>>          mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
>>                                DISP_REG_OVL_OFFSET(idx));
>>          mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
>>                                DISP_REG_OVL_ADDR(ovl, idx));
>>
>>          if (is_afbc) {
>>                  pitch_msb = FIELD_PREP(OVL_SRC_PITCH_MSB, (pitch >> 16));
>>                  pitch_msb |= FIELD_PREP(OVL_PITCH_MSB_2ND_SUBBUF, 1);
>>                  mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs,
>>                                        DISP_REG_OVL_HDR_ADDR(ovl, idx));
>>                  mtk_ddp_write_relaxed(cmdq_pkt, val, &ovl->cmdq_reg, ovl->regs,
>>                                        DISP_REG_OVL_PITCH_MSB(idx));
>>                  mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs,
>>                                        DISP_REG_OVL_HDR_PITCH(ovl, idx));
>>                  mtk_ddp_write_relaxed(cmdq_pkt, 0, &ovl->cmdq_reg, ovl->regs,
>>                                        DISP_REG_OVL_CLIP(idx));
>>          }
>>
>>>
>>>        mtk_ovl_layer_on(dev, idx, cmdq_pkt);
>>>    }
>>> @@ -492,6 +567,15 @@ static const struct mtk_disp_ovl_data mt8192_ovl_2l_driver_data = {
>>
>> ..snip..
>>
>>> diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
>>> index 5c0d9ce69931..734d2554b2b8 100644
>>> --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
>>> +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
>>> @@ -12,6 +12,7 @@
>>>    #include <drm/drm_framebuffer.h>
>>>    #include <drm/drm_gem_atomic_helper.h>
>>>    #include <drm/drm_plane_helper.h>
>>> +#include <linux/align.h>
>>>
>>>    #include "mtk_drm_crtc.h"
>>>    #include "mtk_drm_ddp_comp.h"
>>> @@ -52,6 +53,7 @@ static void mtk_plane_reset(struct drm_plane *plane)
>>>
>>>        state->base.plane = plane;
>>>        state->pending.format = DRM_FORMAT_RGB565;
>>> +     state->pending.modifier = 0;
>>>    }
>>>
>>>    static struct drm_plane_state *mtk_plane_duplicate_state(struct drm_plane *plane)
>>> @@ -120,21 +122,52 @@ static void mtk_plane_update_new_state(struct drm_plane_state *new_state,
>>>        struct drm_gem_object *gem;
>>>        struct mtk_drm_gem_obj *mtk_gem;
>>>        unsigned int pitch, format;
>>> +     unsigned long long modifier;
>>>        dma_addr_t addr;
>>> +     dma_addr_t hdr_addr = 0;
>>> +     unsigned int hdr_pitch = 0;
>>>
>>>        gem = fb->obj[0];
>>>        mtk_gem = to_mtk_gem_obj(gem);
>>>        addr = mtk_gem->dma_addr;
>>>        pitch = fb->pitches[0];
>>>        format = fb->format->format;
>>> +     modifier = fb->modifier;
>>>
>>> -     addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
>>> -     addr += (new_state->src.y1 >> 16) * pitch;
>>> +     if (!modifier) {
>>> +             addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
>>> +             addr += (new_state->src.y1 >> 16) * pitch;
>>> +     } else {
>>> +             int width_in_blocks = ALIGN(fb->width, AFBC_DATA_BLOCK_WIDTH)
>>> +                                   / AFBC_DATA_BLOCK_WIDTH;
>>> +             int height_in_blocks = ALIGN(fb->height, AFBC_DATA_BLOCK_HEIGHT)
>>> +                                    / AFBC_DATA_BLOCK_HEIGHT;
>>> +             int x_offset_in_blocks = (new_state->src.x1 >> 16) / AFBC_DATA_BLOCK_WIDTH;
>>> +             int y_offset_in_blocks = (new_state->src.y1 >> 16) / AFBC_DATA_BLOCK_HEIGHT;
>>> +             int hdr_size;
>>> +
>>> +             hdr_pitch = width_in_blocks * AFBC_HEADER_BLOCK_SIZE;
>>> +             pitch = width_in_blocks * AFBC_DATA_BLOCK_WIDTH *
>>> +                     AFBC_DATA_BLOCK_HEIGHT * fb->format->cpp[0];
>>> +
>>> +             hdr_size = ALIGN(hdr_pitch * height_in_blocks, AFBC_HEADER_ALIGNMENT);
>>> +
>>> +             hdr_addr = addr + hdr_pitch * y_offset_in_blocks +
>>> +                        AFBC_HEADER_BLOCK_SIZE * x_offset_in_blocks;
>>> +             // The data plane is offset by 1 additional block.
>>
>> C-style comments please.
>>
>>> +             addr = addr + hdr_size +
>>> +                    pitch * y_offset_in_blocks +
>>> +                    AFBC_DATA_BLOCK_WIDTH * AFBC_DATA_BLOCK_HEIGHT *
>>> +                    fb->format->cpp[0] * (x_offset_in_blocks + 1);
>>> +     }
>>>
>>
>> Regards,
>> Angelo
>>
Justin Green Oct. 12, 2022, 2:12 p.m. UTC | #4
> These would be different from the macros that are available in bitfield.h, but
> not *fundamentally* different, so these would look a little redundant...
>
> I think that you refer to that `pitch` variable that's coming from the DRM(/fb)
> API... and bitfield macros are for register access... so I guess that one clean
> way of avoiding the magic shifting (that is purely used to split the 32-bits
> number in two 16-bits 'chunks') would be to perhaps use a union, so that you
> will have something like u.pitch_lsb, u.pitch_msb (with lsb/msb being two u16).

Do you mean something like this?

union pitch_val {
     struct split_pitch_val {
          uint16_t lsb;
          uint16_t msb;
     } split;
     uint32_t val;
};

I think my concern with that approach would be it assumes the compiler
packs structs tightly and it also assumes the endianness of the
machine, whereas a bitshift is maybe more portable. Is this an issue
worth considering since we know this driver will only run on specific
MTK SoCs?
AngeloGioacchino Del Regno Oct. 12, 2022, 3:16 p.m. UTC | #5
Il 12/10/22 16:12, Justin Green ha scritto:
>> These would be different from the macros that are available in bitfield.h, but
>> not *fundamentally* different, so these would look a little redundant...
>>
>> I think that you refer to that `pitch` variable that's coming from the DRM(/fb)
>> API... and bitfield macros are for register access... so I guess that one clean
>> way of avoiding the magic shifting (that is purely used to split the 32-bits
>> number in two 16-bits 'chunks') would be to perhaps use a union, so that you
>> will have something like u.pitch_lsb, u.pitch_msb (with lsb/msb being two u16).
> 
> Do you mean something like this?
> 
> union pitch_val {
>       struct split_pitch_val {
>            uint16_t lsb;
>            uint16_t msb;
>       } split;
>       uint32_t val;
> };
> 
> I think my concern with that approach would be it assumes the compiler
> packs structs tightly and it also assumes the endianness of the
> machine, whereas a bitshift is maybe more portable. Is this an issue
> worth considering since we know this driver will only run on specific
> MTK SoCs?

Yes I mean something like that (except, use u16, u32 short form)... and that
should be safe really. As for the endianness, a lot more would break already,
so I don't see that as a concern right now.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index 002b0f6cae1a..1724ea85a840 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -29,17 +29,24 @@ 
 #define DISP_REG_OVL_DATAPATH_CON		0x0024
 #define OVL_LAYER_SMI_ID_EN				BIT(0)
 #define OVL_BGCLR_SEL_IN				BIT(2)
+#define OVL_LAYER_AFBC_EN(n)				BIT(4+n)
 #define DISP_REG_OVL_ROI_BGCLR			0x0028
 #define DISP_REG_OVL_SRC_CON			0x002c
 #define DISP_REG_OVL_CON(n)			(0x0030 + 0x20 * (n))
 #define DISP_REG_OVL_SRC_SIZE(n)		(0x0038 + 0x20 * (n))
 #define DISP_REG_OVL_OFFSET(n)			(0x003c + 0x20 * (n))
+#define DISP_REG_OVL_PITCH_MSB(n)		(0x0040 + 0x20 * (n))
+#define OVL_PITCH_MSB_2ND_SUBBUF			BIT(16)
+#define OVL_PITCH_MSB_YUV_TRANS			BIT(20)
 #define DISP_REG_OVL_PITCH(n)			(0x0044 + 0x20 * (n))
+#define DISP_REG_OVL_CLIP(n)			(0x004c + 0x20 * (n))
 #define DISP_REG_OVL_RDMA_CTRL(n)		(0x00c0 + 0x20 * (n))
 #define DISP_REG_OVL_RDMA_GMC(n)		(0x00c8 + 0x20 * (n))
 #define DISP_REG_OVL_ADDR_MT2701		0x0040
 #define DISP_REG_OVL_ADDR_MT8173		0x0f40
 #define DISP_REG_OVL_ADDR(ovl, n)		((ovl)->data->addr + 0x20 * (n))
+#define DISP_REG_OVL_HDR_ADDR(ovl, n)		((ovl)->data->addr + 0x20 * (n) + 0x04)
+#define DISP_REG_OVL_HDR_PITCH(ovl, n)		((ovl)->data->addr + 0x20 * (n) + 0x08)
 
 #define GMC_THRESHOLD_BITS	16
 #define GMC_THRESHOLD_HIGH	((1 << GMC_THRESHOLD_BITS) / 4)
@@ -67,6 +74,7 @@  struct mtk_disp_ovl_data {
 	unsigned int layer_nr;
 	bool fmt_rgb565_is_0;
 	bool smi_id_en;
+	bool supports_afbc;
 };
 
 /*
@@ -172,7 +180,22 @@  void mtk_ovl_stop(struct device *dev)
 		reg = reg & ~OVL_LAYER_SMI_ID_EN;
 		writel_relaxed(reg, ovl->regs + DISP_REG_OVL_DATAPATH_CON);
 	}
+}
+
+static void mtk_ovl_set_afbc(struct device *dev, struct cmdq_pkt *cmdq_pkt,
+			     int idx, bool enabled)
+{
+	struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
+	unsigned int reg;
 
+	reg = readl(ovl->regs + DISP_REG_OVL_DATAPATH_CON);
+	if (enabled)
+		reg = reg | OVL_LAYER_AFBC_EN(idx);
+	else
+		reg = reg & ~OVL_LAYER_AFBC_EN(idx);
+
+	mtk_ddp_write_relaxed(cmdq_pkt, reg, &ovl->cmdq_reg,
+			      ovl->regs, DISP_REG_OVL_DATAPATH_CON);
 }
 
 void mtk_ovl_config(struct device *dev, unsigned int w,
@@ -208,6 +231,8 @@  int mtk_ovl_layer_check(struct device *dev, unsigned int idx,
 {
 	struct drm_plane_state *state = &mtk_state->base;
 	unsigned int rotation = 0;
+	unsigned long long modifier;
+	unsigned int fourcc;
 
 	rotation = drm_rotation_simplify(state->rotation,
 					 DRM_MODE_ROTATE_0 |
@@ -226,6 +251,30 @@  int mtk_ovl_layer_check(struct device *dev, unsigned int idx,
 	if (state->fb->format->is_yuv && rotation != 0)
 		return -EINVAL;
 
+	if (state->fb->modifier) {
+		struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
+
+		if (!ovl->data->supports_afbc)
+			return -EINVAL;
+
+		modifier = state->fb->modifier;
+
+		if (modifier != DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
+							AFBC_FORMAT_MOD_SPLIT |
+							AFBC_FORMAT_MOD_SPARSE))
+			return -EINVAL;
+
+		fourcc = state->fb->format->format;
+		if (fourcc != DRM_FORMAT_BGRA8888 &&
+		    fourcc != DRM_FORMAT_ABGR8888 &&
+		    fourcc != DRM_FORMAT_ARGB8888 &&
+		    fourcc != DRM_FORMAT_XRGB8888 &&
+		    fourcc != DRM_FORMAT_XBGR8888 &&
+		    fourcc != DRM_FORMAT_RGB888 &&
+		    fourcc != DRM_FORMAT_BGR888)
+			return -EINVAL;
+	}
+
 	state->rotation = rotation;
 
 	return 0;
@@ -310,11 +359,14 @@  void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
 	struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
 	struct mtk_plane_pending_state *pending = &state->pending;
 	unsigned int addr = pending->addr;
-	unsigned int pitch = pending->pitch & 0xffff;
+	unsigned int hdr_addr = pending->hdr_addr;
+	unsigned int pitch = pending->pitch;
+	unsigned int hdr_pitch = pending->hdr_pitch;
 	unsigned int fmt = pending->format;
 	unsigned int offset = (pending->y << 16) | pending->x;
 	unsigned int src_size = (pending->height << 16) | pending->width;
 	unsigned int con;
+	bool is_afbc = pending->modifier;
 
 	if (!pending->enable) {
 		mtk_ovl_layer_off(dev, idx, cmdq_pkt);
@@ -335,16 +387,39 @@  void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
 		addr += pending->pitch - 1;
 	}
 
-	mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
-			      DISP_REG_OVL_CON(idx));
-	mtk_ddp_write_relaxed(cmdq_pkt, pitch, &ovl->cmdq_reg, ovl->regs,
-			      DISP_REG_OVL_PITCH(idx));
-	mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
-			      DISP_REG_OVL_SRC_SIZE(idx));
-	mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
-			      DISP_REG_OVL_OFFSET(idx));
-	mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
-			      DISP_REG_OVL_ADDR(ovl, idx));
+	mtk_ovl_set_afbc(dev, cmdq_pkt, idx, is_afbc);
+	if (!is_afbc) {
+		mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_CON(idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, pitch & 0xFFFF, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_PITCH(idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_SRC_SIZE(idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_OFFSET(idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_ADDR(ovl, idx));
+	} else {
+		mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_ADDR(ovl, idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_HDR_ADDR(ovl, idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_SRC_SIZE(idx));
+		mtk_ddp_write_relaxed(cmdq_pkt,
+				      OVL_PITCH_MSB_2ND_SUBBUF | ((pitch >> 16) & 0xFFFF),
+				      &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, pitch & 0xFFFF, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_PITCH(idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_HDR_PITCH(ovl, idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_CON(idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, offset, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_OFFSET(idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, 0, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_CLIP(idx));
+	}
 
 	mtk_ovl_layer_on(dev, idx, cmdq_pkt);
 }
@@ -492,6 +567,15 @@  static const struct mtk_disp_ovl_data mt8192_ovl_2l_driver_data = {
 	.smi_id_en = true,
 };
 
+static const struct mtk_disp_ovl_data mt8195_ovl_driver_data = {
+	.addr = DISP_REG_OVL_ADDR_MT8173,
+	.gmc_bits = 10,
+	.layer_nr = 4,
+	.fmt_rgb565_is_0 = true,
+	.smi_id_en = true,
+	.supports_afbc = true,
+};
+
 static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = {
 	{ .compatible = "mediatek,mt2701-disp-ovl",
 	  .data = &mt2701_ovl_driver_data},
@@ -505,6 +589,8 @@  static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = {
 	  .data = &mt8192_ovl_driver_data},
 	{ .compatible = "mediatek,mt8192-disp-ovl-2l",
 	  .data = &mt8192_ovl_2l_driver_data},
+	{ .compatible = "mediatek,mt8195-disp-ovl",
+	  .data = &mt8195_ovl_driver_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, mtk_disp_ovl_driver_dt_match);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
index 5c0d9ce69931..734d2554b2b8 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
@@ -12,6 +12,7 @@ 
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
+#include <linux/align.h>
 
 #include "mtk_drm_crtc.h"
 #include "mtk_drm_ddp_comp.h"
@@ -52,6 +53,7 @@  static void mtk_plane_reset(struct drm_plane *plane)
 
 	state->base.plane = plane;
 	state->pending.format = DRM_FORMAT_RGB565;
+	state->pending.modifier = 0;
 }
 
 static struct drm_plane_state *mtk_plane_duplicate_state(struct drm_plane *plane)
@@ -120,21 +122,52 @@  static void mtk_plane_update_new_state(struct drm_plane_state *new_state,
 	struct drm_gem_object *gem;
 	struct mtk_drm_gem_obj *mtk_gem;
 	unsigned int pitch, format;
+	unsigned long long modifier;
 	dma_addr_t addr;
+	dma_addr_t hdr_addr = 0;
+	unsigned int hdr_pitch = 0;
 
 	gem = fb->obj[0];
 	mtk_gem = to_mtk_gem_obj(gem);
 	addr = mtk_gem->dma_addr;
 	pitch = fb->pitches[0];
 	format = fb->format->format;
+	modifier = fb->modifier;
 
-	addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
-	addr += (new_state->src.y1 >> 16) * pitch;
+	if (!modifier) {
+		addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
+		addr += (new_state->src.y1 >> 16) * pitch;
+	} else {
+		int width_in_blocks = ALIGN(fb->width, AFBC_DATA_BLOCK_WIDTH)
+				      / AFBC_DATA_BLOCK_WIDTH;
+		int height_in_blocks = ALIGN(fb->height, AFBC_DATA_BLOCK_HEIGHT)
+				       / AFBC_DATA_BLOCK_HEIGHT;
+		int x_offset_in_blocks = (new_state->src.x1 >> 16) / AFBC_DATA_BLOCK_WIDTH;
+		int y_offset_in_blocks = (new_state->src.y1 >> 16) / AFBC_DATA_BLOCK_HEIGHT;
+		int hdr_size;
+
+		hdr_pitch = width_in_blocks * AFBC_HEADER_BLOCK_SIZE;
+		pitch = width_in_blocks * AFBC_DATA_BLOCK_WIDTH *
+			AFBC_DATA_BLOCK_HEIGHT * fb->format->cpp[0];
+
+		hdr_size = ALIGN(hdr_pitch * height_in_blocks, AFBC_HEADER_ALIGNMENT);
+
+		hdr_addr = addr + hdr_pitch * y_offset_in_blocks +
+			   AFBC_HEADER_BLOCK_SIZE * x_offset_in_blocks;
+		// The data plane is offset by 1 additional block.
+		addr = addr + hdr_size +
+		       pitch * y_offset_in_blocks +
+		       AFBC_DATA_BLOCK_WIDTH * AFBC_DATA_BLOCK_HEIGHT *
+		       fb->format->cpp[0] * (x_offset_in_blocks + 1);
+	}
 
 	mtk_plane_state->pending.enable = true;
 	mtk_plane_state->pending.pitch = pitch;
+	mtk_plane_state->pending.hdr_pitch = hdr_pitch;
 	mtk_plane_state->pending.format = format;
+	mtk_plane_state->pending.modifier = modifier;
 	mtk_plane_state->pending.addr = addr;
+	mtk_plane_state->pending.hdr_addr = hdr_addr;
 	mtk_plane_state->pending.x = new_state->dst.x1;
 	mtk_plane_state->pending.y = new_state->dst.y1;
 	mtk_plane_state->pending.width = drm_rect_width(&new_state->dst);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.h b/drivers/gpu/drm/mediatek/mtk_drm_plane.h
index 2d5ec66e3df1..8f39011cdbfc 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.h
@@ -10,12 +10,20 @@ 
 #include <drm/drm_crtc.h>
 #include <linux/types.h>
 
+#define AFBC_DATA_BLOCK_WIDTH 32
+#define AFBC_DATA_BLOCK_HEIGHT 8
+#define AFBC_HEADER_BLOCK_SIZE 16
+#define AFBC_HEADER_ALIGNMENT 1024
+
 struct mtk_plane_pending_state {
 	bool				config;
 	bool				enable;
 	dma_addr_t			addr;
+	dma_addr_t			hdr_addr;
 	unsigned int			pitch;
+	unsigned int			hdr_pitch;
 	unsigned int			format;
+	unsigned long long		modifier;
 	unsigned int			x;
 	unsigned int			y;
 	unsigned int			width;