diff mbox series

[v3,1/5] media: hantro: Fix H264 motion vector buffer offset

Message ID HE1PR06MB4011A23596DCD988F2CAC603AC790@HE1PR06MB4011.eurprd06.prod.outlook.com (mailing list archive)
State New, archived
Headers show
Series media: hantro: H264 fixes and improvements | expand

Commit Message

Jonas Karlman Nov. 6, 2019, 10:34 p.m. UTC
A decoded 8-bit 4:2:0 frame need memory for up to 448 bytes per
macroblock and is laid out in memory as follow:

+---------------------------+
| Y-plane   256 bytes x MBs |
+---------------------------+
| UV-plane  128 bytes x MBs |
+---------------------------+
| MV buffer  64 bytes x MBs |
+---------------------------+

The motion vector buffer offset is currently correct for 4:2:0 because the
extra space for motion vectors is overallocated with an extra 64 bytes x MBs.

Wrong offset for both destination and motion vector buffer are used
for the bottom field of field encoded content, wrong offset is
also used for 4:0:0 (monochrome) content.

Fix this by setting the motion vector address to the expected 384 bytes x MBs
offset for 4:2:0 and 256 bytes x MBs offset for 4:0:0 content.

Also use correct destination and motion vector buffer offset
for the bottom field of field encoded content.

While at it also extend the check for 4:0:0 (monochrome) to include an
additional check for High Profile (100).

Fixes: dea0a82f3d22 ("media: hantro: Add support for H264 decoding on G1")
Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
---
Changes in v3:
  * address remarks from Boris
  - use src_fmt instead of dst_fmt
Changes in v2:
  * address remarks from Philipp and Ezequiel
  - update commit message
  - rename offset to bytes_per_mb
  - remove MV_OFFSET macros
  - move PIC_MB_WIDTH/HEIGHT_P change to separate patch
---
 .../staging/media/hantro/hantro_g1_h264_dec.c | 31 +++++++++++++------
 1 file changed, 22 insertions(+), 9 deletions(-)

Comments

Tomasz Figa Nov. 20, 2019, 12:40 p.m. UTC | #1
Hi Jonas,

On Thu, Nov 7, 2019 at 7:34 AM Jonas Karlman <jonas@kwiboo.se> wrote:
>
> A decoded 8-bit 4:2:0 frame need memory for up to 448 bytes per
> macroblock and is laid out in memory as follow:
>
> +---------------------------+
> | Y-plane   256 bytes x MBs |
> +---------------------------+
> | UV-plane  128 bytes x MBs |
> +---------------------------+
> | MV buffer  64 bytes x MBs |
> +---------------------------+
>
> The motion vector buffer offset is currently correct for 4:2:0 because the
> extra space for motion vectors is overallocated with an extra 64 bytes x MBs.
>
> Wrong offset for both destination and motion vector buffer are used
> for the bottom field of field encoded content, wrong offset is
> also used for 4:0:0 (monochrome) content.
>
> Fix this by setting the motion vector address to the expected 384 bytes x MBs
> offset for 4:2:0 and 256 bytes x MBs offset for 4:0:0 content.
>
> Also use correct destination and motion vector buffer offset
> for the bottom field of field encoded content.
>
> While at it also extend the check for 4:0:0 (monochrome) to include an
> additional check for High Profile (100).
>
> Fixes: dea0a82f3d22 ("media: hantro: Add support for H264 decoding on G1")
> Signed-off-by: Jonas Karlman <jonas@kwiboo.se>
> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
> ---
> Changes in v3:
>   * address remarks from Boris
>   - use src_fmt instead of dst_fmt
> Changes in v2:
>   * address remarks from Philipp and Ezequiel
>   - update commit message
>   - rename offset to bytes_per_mb
>   - remove MV_OFFSET macros
>   - move PIC_MB_WIDTH/HEIGHT_P change to separate patch
> ---
>  .../staging/media/hantro/hantro_g1_h264_dec.c | 31 +++++++++++++------
>  1 file changed, 22 insertions(+), 9 deletions(-)
>

First of all, thanks for the patches! Good to see more members of the
community contributing to the driver.

Please find my comments inline.

> diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
> index 70a6b5b26477..30d977c3d529 100644
> --- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c
> +++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
> @@ -81,7 +81,7 @@ static void set_params(struct hantro_ctx *ctx)
>                 reg |= G1_REG_DEC_CTRL4_CABAC_E;
>         if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
>                 reg |= G1_REG_DEC_CTRL4_DIR_8X8_INFER_E;
> -       if (sps->chroma_format_idc == 0)
> +       if (sps->profile_idc >= 100 && sps->chroma_format_idc == 0)

I'd rather make this a separate patch with proper explanation in commit message.

>                 reg |= G1_REG_DEC_CTRL4_BLACKWHITE_E;
>         if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
>                 reg |= G1_REG_DEC_CTRL4_WEIGHT_PRED_E;
> @@ -234,6 +234,7 @@ static void set_buffers(struct hantro_ctx *ctx)
>         struct vb2_v4l2_buffer *src_buf, *dst_buf;
>         struct hantro_dev *vpu = ctx->dev;
>         dma_addr_t src_dma, dst_dma;
> +       size_t offset = 0;
>
>         src_buf = hantro_get_src_buf(ctx);
>         dst_buf = hantro_get_dst_buf(ctx);
> @@ -244,18 +245,30 @@ static void set_buffers(struct hantro_ctx *ctx)
>
>         /* Destination (decoded frame) buffer. */
>         dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> -       vdpu_write_relaxed(vpu, dst_dma, G1_REG_ADDR_DST);
> +       /* Adjust dma addr to start at second line for bottom field */
> +       if (ctrls->slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
> +               offset = ALIGN(ctx->src_fmt.width, MB_DIM);

Isn't ctx->src_fmt.width already aligned to MB_DIM?

Also, offset is in bytes, so should we rather use the bytesperline field?

> +       vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DST);
>
>         /* Higher profiles require DMV buffer appended to reference frames. */
>         if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) {
> -               size_t pic_size = ctx->h264_dec.pic_size;
> -               size_t mv_offset = round_up(pic_size, 8);
> -
> +               unsigned int bytes_per_mb = 384;
> +
> +               /* DMV buffer for monochrome start directly after Y-plane */
> +               if (ctrls->sps->profile_idc >= 100 &&
> +                   ctrls->sps->chroma_format_idc == 0)
> +                       bytes_per_mb = 256;

nit: Adding a blank line here would make it much easier to read.

> +               offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) *
> +                        MB_HEIGHT(ctx->src_fmt.height);

It's kind of difficult to follow with this idea of bytes_per_mb IMHO.
Would it perhaps make sense to rewrite the code as below?

luma_size = ctx->src_fmt.planes[0].bytesperline * ctx->src_fmt.height;

if (ctrls->sps->profile_idc >= 100 &&
    ctrls->sps->chroma_format_idc == 0)
        chroma_size = 0;
else
        chroma_size = ctx->src_fmt.planes[0].bytesperline *
ctx->src_fmt.height / 4;

offset = luma_size + chroma_size;

Also, the code only handles 4:2:0 and 4:0:0. How about 4:2:2?

Best regards,
Tomasz

> +
> +               /*
> +                * DMV buffer is split in two for field encoded frames,
> +                * adjust offset for bottom field
> +                */
>                 if (ctrls->slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
> -                       mv_offset += 32 * MB_WIDTH(ctx->dst_fmt.width);
> -
> -               vdpu_write_relaxed(vpu, dst_dma + mv_offset,
> -                                  G1_REG_ADDR_DIR_MV);
> +                       offset += 32 * MB_WIDTH(ctx->src_fmt.width) *
> +                                 MB_HEIGHT(ctx->src_fmt.height);
> +               vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DIR_MV);
>         }
>
>         /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */
> --
> 2.17.1
>
diff mbox series

Patch

diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
index 70a6b5b26477..30d977c3d529 100644
--- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
@@ -81,7 +81,7 @@  static void set_params(struct hantro_ctx *ctx)
 		reg |= G1_REG_DEC_CTRL4_CABAC_E;
 	if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
 		reg |= G1_REG_DEC_CTRL4_DIR_8X8_INFER_E;
-	if (sps->chroma_format_idc == 0)
+	if (sps->profile_idc >= 100 && sps->chroma_format_idc == 0)
 		reg |= G1_REG_DEC_CTRL4_BLACKWHITE_E;
 	if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
 		reg |= G1_REG_DEC_CTRL4_WEIGHT_PRED_E;
@@ -234,6 +234,7 @@  static void set_buffers(struct hantro_ctx *ctx)
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
 	struct hantro_dev *vpu = ctx->dev;
 	dma_addr_t src_dma, dst_dma;
+	size_t offset = 0;
 
 	src_buf = hantro_get_src_buf(ctx);
 	dst_buf = hantro_get_dst_buf(ctx);
@@ -244,18 +245,30 @@  static void set_buffers(struct hantro_ctx *ctx)
 
 	/* Destination (decoded frame) buffer. */
 	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
-	vdpu_write_relaxed(vpu, dst_dma, G1_REG_ADDR_DST);
+	/* Adjust dma addr to start at second line for bottom field */
+	if (ctrls->slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
+		offset = ALIGN(ctx->src_fmt.width, MB_DIM);
+	vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DST);
 
 	/* Higher profiles require DMV buffer appended to reference frames. */
 	if (ctrls->sps->profile_idc > 66 && ctrls->decode->nal_ref_idc) {
-		size_t pic_size = ctx->h264_dec.pic_size;
-		size_t mv_offset = round_up(pic_size, 8);
-
+		unsigned int bytes_per_mb = 384;
+
+		/* DMV buffer for monochrome start directly after Y-plane */
+		if (ctrls->sps->profile_idc >= 100 &&
+		    ctrls->sps->chroma_format_idc == 0)
+			bytes_per_mb = 256;
+		offset = bytes_per_mb * MB_WIDTH(ctx->src_fmt.width) *
+			 MB_HEIGHT(ctx->src_fmt.height);
+
+		/*
+		 * DMV buffer is split in two for field encoded frames,
+		 * adjust offset for bottom field
+		 */
 		if (ctrls->slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
-			mv_offset += 32 * MB_WIDTH(ctx->dst_fmt.width);
-
-		vdpu_write_relaxed(vpu, dst_dma + mv_offset,
-				   G1_REG_ADDR_DIR_MV);
+			offset += 32 * MB_WIDTH(ctx->src_fmt.width) *
+				  MB_HEIGHT(ctx->src_fmt.height);
+		vdpu_write_relaxed(vpu, dst_dma + offset, G1_REG_ADDR_DIR_MV);
 	}
 
 	/* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */