diff mbox series

[v7,11/11] media: hantro: Support NV12 on the G2 core

Message ID 20210929160439.6601-12-andrzej.p@collabora.com (mailing list archive)
State New, archived
Headers show
Series VP9 codec V4L2 control interface | expand

Commit Message

Andrzej Pietrasiewicz Sept. 29, 2021, 4:04 p.m. UTC
The G2 decoder block produces NV12 4x4 tiled format (NV12_4L4).
Enable the G2 post-processor block, in order to produce regular NV12.

The logic in hantro_postproc.c is leveraged to take care of allocating
the extra buffers and configure the post-processor, which is
significantly simpler than the one on the G1.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
---
 .../staging/media/hantro/hantro_g2_vp9_dec.c  |  6 ++--
 drivers/staging/media/hantro/hantro_hw.h      |  1 +
 .../staging/media/hantro/hantro_postproc.c    | 31 +++++++++++++++++++
 drivers/staging/media/hantro/imx8m_vpu_hw.c   | 11 +++++++
 4 files changed, 46 insertions(+), 3 deletions(-)

Comments

Jernej Škrabec Oct. 14, 2021, 5:42 p.m. UTC | #1
Hi Andrzej!

Dne sreda, 29. september 2021 ob 18:04:39 CEST je Andrzej Pietrasiewicz 
napisal(a):
> The G2 decoder block produces NV12 4x4 tiled format (NV12_4L4).
> Enable the G2 post-processor block, in order to produce regular NV12.
> 
> The logic in hantro_postproc.c is leveraged to take care of allocating
> the extra buffers and configure the post-processor, which is
> significantly simpler than the one on the G1.

Quick summary of discussion on LibreELEC Slack:
When using NV12 format on Allwinner H6 variant of G2 (needs some driver 
changes), I get frames out of order. If I use native NV12 tiled format, frames 
are ordered correctly.

Currently I'm not sure if this is issue with my changes or is this general 
issue.

I would be grateful if anyone can test frame order with and without 
postprocessing enabled on imx8. Take some dynamic video with a lot of short 
scenes. It's pretty obvious when frames are out of order.

However, given that frames themself are correctly decoded and without 
postprocessing in right order, that shouldn't block merging previous patches. 
I tried few different videos and frames were all decoded correctly.

Best regards,
Jernej

> 
> Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
> Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
> ---
>  .../staging/media/hantro/hantro_g2_vp9_dec.c  |  6 ++--
>  drivers/staging/media/hantro/hantro_hw.h      |  1 +
>  .../staging/media/hantro/hantro_postproc.c    | 31 +++++++++++++++++++
>  drivers/staging/media/hantro/imx8m_vpu_hw.c   | 11 +++++++
>  4 files changed, 46 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c b/drivers/
staging/media/hantro/hantro_g2_vp9_dec.c
> index 7f827b9f0133..1a26be72c878 100644
> --- a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
> +++ b/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
> @@ -152,7 +152,7 @@ static void config_output(struct hantro_ctx *ctx,
>  	hantro_reg_write(ctx->dev, &g2_out_dis, 0);
>  	hantro_reg_write(ctx->dev, &g2_output_format, 0);
>  
> -	luma_addr = vb2_dma_contig_plane_dma_addr(&dst->base.vb.vb2_buf, 
0);
> +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
>  	hantro_write_addr(ctx->dev, G2_OUT_LUMA_ADDR, luma_addr);
>  
>  	chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
> @@ -191,7 +191,7 @@ static void config_ref(struct hantro_ctx *ctx,
>  	hantro_reg_write(ctx->dev, &ref_reg->hor_scale, (refw << 14) / 
dst->vp9.width);
>  	hantro_reg_write(ctx->dev, &ref_reg->ver_scale, (refh << 14) / 
dst->vp9.height);
>  
> -	luma_addr = vb2_dma_contig_plane_dma_addr(&buf->base.vb.vb2_buf, 
0);
> +	luma_addr = hantro_get_dec_buf_addr(ctx, &buf->base.vb.vb2_buf);
>  	hantro_write_addr(ctx->dev, ref_reg->y_base, luma_addr);
>  
>  	chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
> @@ -236,7 +236,7 @@ static void config_ref_registers(struct hantro_ctx *ctx,
>  	config_ref(ctx, dst, &ref_regs[1], dec_params, dec_params-
>golden_frame_ts);
>  	config_ref(ctx, dst, &ref_regs[2], dec_params, dec_params-
>alt_frame_ts);
>  
> -	mv_addr = vb2_dma_contig_plane_dma_addr(&mv_ref->base.vb.vb2_buf, 
0) +
> +	mv_addr = hantro_get_dec_buf_addr(ctx, &mv_ref->base.vb.vb2_buf) +
>  		  mv_offset(ctx, dec_params);
>  	hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_addr);
>  
> diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/
media/hantro/hantro_hw.h
> index 2961d399fd60..3d4a5dc1e6d5 100644
> --- a/drivers/staging/media/hantro/hantro_hw.h
> +++ b/drivers/staging/media/hantro/hantro_hw.h
> @@ -274,6 +274,7 @@ extern const struct hantro_variant rk3399_vpu_variant;
>  extern const struct hantro_variant sama5d4_vdec_variant;
>  
>  extern const struct hantro_postproc_ops hantro_g1_postproc_ops;
> +extern const struct hantro_postproc_ops hantro_g2_postproc_ops;
>  
>  extern const u32 hantro_vp8_dec_mc_filter[8][6];
>  
> diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/
staging/media/hantro/hantro_postproc.c
> index 4549aec08feb..79a66d001738 100644
> --- a/drivers/staging/media/hantro/hantro_postproc.c
> +++ b/drivers/staging/media/hantro/hantro_postproc.c
> @@ -11,6 +11,7 @@
>  #include "hantro.h"
>  #include "hantro_hw.h"
>  #include "hantro_g1_regs.h"
> +#include "hantro_g2_regs.h"
>  
>  #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
>  { \
> @@ -99,6 +100,21 @@ static void hantro_postproc_g1_enable(struct hantro_ctx 
*ctx)
>  	HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
>  }
>  
> +static void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +	struct vb2_v4l2_buffer *dst_buf;
> +	size_t chroma_offset = ctx->dst_fmt.width * ctx->dst_fmt.height;
> +	dma_addr_t dst_dma;
> +
> +	dst_buf = hantro_get_dst_buf(ctx);
> +	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> +
> +	hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
> +	hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma + 
chroma_offset);
> +	hantro_reg_write(vpu, &g2_out_rs_e, 1);
> +}
> +
>  void hantro_postproc_free(struct hantro_ctx *ctx)
>  {
>  	struct hantro_dev *vpu = ctx->dev;
> @@ -127,6 +143,9 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
>  	if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
>  		buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
>  						ctx-
>dst_fmt.height);
> +	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
> +		buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
> +					       ctx-
>dst_fmt.height);
>  
>  	for (i = 0; i < num_buffers; ++i) {
>  		struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
> @@ -152,6 +171,13 @@ static void hantro_postproc_g1_disable(struct 
hantro_ctx *ctx)
>  	HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
>  }
>  
> +static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
> +{
> +	struct hantro_dev *vpu = ctx->dev;
> +
> +	hantro_reg_write(vpu, &g2_out_rs_e, 0);
> +}
> +
>  void hantro_postproc_disable(struct hantro_ctx *ctx)
>  {
>  	struct hantro_dev *vpu = ctx->dev;
> @@ -172,3 +198,8 @@ const struct hantro_postproc_ops hantro_g1_postproc_ops 
= {
>  	.enable = hantro_postproc_g1_enable,
>  	.disable = hantro_postproc_g1_disable,
>  };
> +
> +const struct hantro_postproc_ops hantro_g2_postproc_ops = {
> +	.enable = hantro_postproc_g2_enable,
> +	.disable = hantro_postproc_g2_disable,
> +};
> diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/
media/hantro/imx8m_vpu_hw.c
> index 455a107ffb02..1a43f6fceef9 100644
> --- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
> +++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
> @@ -132,6 +132,14 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
>  	},
>  };
>  
> +static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
> +	{
> +		.fourcc = V4L2_PIX_FMT_NV12,
> +		.codec_mode = HANTRO_MODE_NONE,
> +		.postprocessed = true,
> +	},
> +};
> +
>  static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
>  	{
>  		.fourcc = V4L2_PIX_FMT_NV12_4L4,
> @@ -301,6 +309,9 @@ const struct hantro_variant imx8mq_vpu_g2_variant = {
>  	.dec_offset = 0x0,
>  	.dec_fmts = imx8m_vpu_g2_dec_fmts,
>  	.num_dec_fmts = ARRAY_SIZE(imx8m_vpu_g2_dec_fmts),
> +	.postproc_fmts = imx8m_vpu_g2_postproc_fmts,
> +	.num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_g2_postproc_fmts),
> +	.postproc_ops = &hantro_g2_postproc_ops,
>  	.codec = HANTRO_HEVC_DECODER | HANTRO_VP9_DECODER,
>  	.codec_ops = imx8mq_vpu_g2_codec_ops,
>  	.init = imx8mq_vpu_hw_init,
> -- 
> 2.17.1
> 
>
Andrzej Pietrasiewicz Oct. 15, 2021, 5:19 p.m. UTC | #2
Hi Jernej,

W dniu 14.10.2021 o 19:42, Jernej Škrabec pisze:
> Hi Andrzej!
> 
> Dne sreda, 29. september 2021 ob 18:04:39 CEST je Andrzej Pietrasiewicz
> napisal(a):
>> The G2 decoder block produces NV12 4x4 tiled format (NV12_4L4).
>> Enable the G2 post-processor block, in order to produce regular NV12.
>>
>> The logic in hantro_postproc.c is leveraged to take care of allocating
>> the extra buffers and configure the post-processor, which is
>> significantly simpler than the one on the G1.
> 
> Quick summary of discussion on LibreELEC Slack:
> When using NV12 format on Allwinner H6 variant of G2 (needs some driver
> changes), I get frames out of order. If I use native NV12 tiled format, frames
> are ordered correctly.
> 
> Currently I'm not sure if this is issue with my changes or is this general
> issue.
> 
> I would be grateful if anyone can test frame order with and without
> postprocessing enabled on imx8. Take some dynamic video with a lot of short
> scenes. It's pretty obvious when frames are out of order.
> 

I checked on imx8 and cannot observe any such artifacts.

Andrzej

> However, given that frames themself are correctly decoded and without
> postprocessing in right order, that shouldn't block merging previous patches.
> I tried few different videos and frames were all decoded correctly.
> 
> Best regards,
> Jernej
> 
>>
>> Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
>> Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
>> ---
>>   .../staging/media/hantro/hantro_g2_vp9_dec.c  |  6 ++--
>>   drivers/staging/media/hantro/hantro_hw.h      |  1 +
>>   .../staging/media/hantro/hantro_postproc.c    | 31 +++++++++++++++++++
>>   drivers/staging/media/hantro/imx8m_vpu_hw.c   | 11 +++++++
>>   4 files changed, 46 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c b/drivers/
> staging/media/hantro/hantro_g2_vp9_dec.c
>> index 7f827b9f0133..1a26be72c878 100644
>> --- a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
>> +++ b/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
>> @@ -152,7 +152,7 @@ static void config_output(struct hantro_ctx *ctx,
>>   	hantro_reg_write(ctx->dev, &g2_out_dis, 0);
>>   	hantro_reg_write(ctx->dev, &g2_output_format, 0);
>>   
>> -	luma_addr = vb2_dma_contig_plane_dma_addr(&dst->base.vb.vb2_buf,
> 0);
>> +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
>>   	hantro_write_addr(ctx->dev, G2_OUT_LUMA_ADDR, luma_addr);
>>   
>>   	chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
>> @@ -191,7 +191,7 @@ static void config_ref(struct hantro_ctx *ctx,
>>   	hantro_reg_write(ctx->dev, &ref_reg->hor_scale, (refw << 14) /
> dst->vp9.width);
>>   	hantro_reg_write(ctx->dev, &ref_reg->ver_scale, (refh << 14) /
> dst->vp9.height);
>>   
>> -	luma_addr = vb2_dma_contig_plane_dma_addr(&buf->base.vb.vb2_buf,
> 0);
>> +	luma_addr = hantro_get_dec_buf_addr(ctx, &buf->base.vb.vb2_buf);
>>   	hantro_write_addr(ctx->dev, ref_reg->y_base, luma_addr);
>>   
>>   	chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
>> @@ -236,7 +236,7 @@ static void config_ref_registers(struct hantro_ctx *ctx,
>>   	config_ref(ctx, dst, &ref_regs[1], dec_params, dec_params-
>> golden_frame_ts);
>>   	config_ref(ctx, dst, &ref_regs[2], dec_params, dec_params-
>> alt_frame_ts);
>>   
>> -	mv_addr = vb2_dma_contig_plane_dma_addr(&mv_ref->base.vb.vb2_buf,
> 0) +
>> +	mv_addr = hantro_get_dec_buf_addr(ctx, &mv_ref->base.vb.vb2_buf) +
>>   		  mv_offset(ctx, dec_params);
>>   	hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_addr);
>>   
>> diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/
> media/hantro/hantro_hw.h
>> index 2961d399fd60..3d4a5dc1e6d5 100644
>> --- a/drivers/staging/media/hantro/hantro_hw.h
>> +++ b/drivers/staging/media/hantro/hantro_hw.h
>> @@ -274,6 +274,7 @@ extern const struct hantro_variant rk3399_vpu_variant;
>>   extern const struct hantro_variant sama5d4_vdec_variant;
>>   
>>   extern const struct hantro_postproc_ops hantro_g1_postproc_ops;
>> +extern const struct hantro_postproc_ops hantro_g2_postproc_ops;
>>   
>>   extern const u32 hantro_vp8_dec_mc_filter[8][6];
>>   
>> diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/
> staging/media/hantro/hantro_postproc.c
>> index 4549aec08feb..79a66d001738 100644
>> --- a/drivers/staging/media/hantro/hantro_postproc.c
>> +++ b/drivers/staging/media/hantro/hantro_postproc.c
>> @@ -11,6 +11,7 @@
>>   #include "hantro.h"
>>   #include "hantro_hw.h"
>>   #include "hantro_g1_regs.h"
>> +#include "hantro_g2_regs.h"
>>   
>>   #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
>>   { \
>> @@ -99,6 +100,21 @@ static void hantro_postproc_g1_enable(struct hantro_ctx
> *ctx)
>>   	HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
>>   }
>>   
>> +static void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +	struct vb2_v4l2_buffer *dst_buf;
>> +	size_t chroma_offset = ctx->dst_fmt.width * ctx->dst_fmt.height;
>> +	dma_addr_t dst_dma;
>> +
>> +	dst_buf = hantro_get_dst_buf(ctx);
>> +	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
>> +
>> +	hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
>> +	hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma +
> chroma_offset);
>> +	hantro_reg_write(vpu, &g2_out_rs_e, 1);
>> +}
>> +
>>   void hantro_postproc_free(struct hantro_ctx *ctx)
>>   {
>>   	struct hantro_dev *vpu = ctx->dev;
>> @@ -127,6 +143,9 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
>>   	if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
>>   		buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
>>   						ctx-
>> dst_fmt.height);
>> +	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
>> +		buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
>> +					       ctx-
>> dst_fmt.height);
>>   
>>   	for (i = 0; i < num_buffers; ++i) {
>>   		struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
>> @@ -152,6 +171,13 @@ static void hantro_postproc_g1_disable(struct
> hantro_ctx *ctx)
>>   	HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
>>   }
>>   
>> +static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
>> +{
>> +	struct hantro_dev *vpu = ctx->dev;
>> +
>> +	hantro_reg_write(vpu, &g2_out_rs_e, 0);
>> +}
>> +
>>   void hantro_postproc_disable(struct hantro_ctx *ctx)
>>   {
>>   	struct hantro_dev *vpu = ctx->dev;
>> @@ -172,3 +198,8 @@ const struct hantro_postproc_ops hantro_g1_postproc_ops
> = {
>>   	.enable = hantro_postproc_g1_enable,
>>   	.disable = hantro_postproc_g1_disable,
>>   };
>> +
>> +const struct hantro_postproc_ops hantro_g2_postproc_ops = {
>> +	.enable = hantro_postproc_g2_enable,
>> +	.disable = hantro_postproc_g2_disable,
>> +};
>> diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/
> media/hantro/imx8m_vpu_hw.c
>> index 455a107ffb02..1a43f6fceef9 100644
>> --- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
>> +++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
>> @@ -132,6 +132,14 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
>>   	},
>>   };
>>   
>> +static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
>> +	{
>> +		.fourcc = V4L2_PIX_FMT_NV12,
>> +		.codec_mode = HANTRO_MODE_NONE,
>> +		.postprocessed = true,
>> +	},
>> +};
>> +
>>   static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
>>   	{
>>   		.fourcc = V4L2_PIX_FMT_NV12_4L4,
>> @@ -301,6 +309,9 @@ const struct hantro_variant imx8mq_vpu_g2_variant = {
>>   	.dec_offset = 0x0,
>>   	.dec_fmts = imx8m_vpu_g2_dec_fmts,
>>   	.num_dec_fmts = ARRAY_SIZE(imx8m_vpu_g2_dec_fmts),
>> +	.postproc_fmts = imx8m_vpu_g2_postproc_fmts,
>> +	.num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_g2_postproc_fmts),
>> +	.postproc_ops = &hantro_g2_postproc_ops,
>>   	.codec = HANTRO_HEVC_DECODER | HANTRO_VP9_DECODER,
>>   	.codec_ops = imx8mq_vpu_g2_codec_ops,
>>   	.init = imx8mq_vpu_hw_init,
>> -- 
>> 2.17.1
>>
>>
> 
>
Jernej Škrabec Oct. 19, 2021, 4:38 p.m. UTC | #3
Hi Andrzej!

Dne petek, 15. oktober 2021 ob 19:19:47 CEST je Andrzej Pietrasiewicz 
napisal(a):
> Hi Jernej,
> 
> W dniu 14.10.2021 o 19:42, Jernej Škrabec pisze:
> > Hi Andrzej!
> > 
> > Dne sreda, 29. september 2021 ob 18:04:39 CEST je Andrzej Pietrasiewicz
> > napisal(a):
> >> The G2 decoder block produces NV12 4x4 tiled format (NV12_4L4).
> >> Enable the G2 post-processor block, in order to produce regular NV12.
> >>
> >> The logic in hantro_postproc.c is leveraged to take care of allocating
> >> the extra buffers and configure the post-processor, which is
> >> significantly simpler than the one on the G1.
> > 
> > Quick summary of discussion on LibreELEC Slack:
> > When using NV12 format on Allwinner H6 variant of G2 (needs some driver
> > changes), I get frames out of order. If I use native NV12 tiled format, 
frames
> > are ordered correctly.
> > 
> > Currently I'm not sure if this is issue with my changes or is this general
> > issue.
> > 
> > I would be grateful if anyone can test frame order with and without
> > postprocessing enabled on imx8. Take some dynamic video with a lot of 
short
> > scenes. It's pretty obvious when frames are out of order.
> > 
> 
> I checked on imx8 and cannot observe any such artifacts.

I finally found the issue. As you mentioned on Slack, register write order once 
already affected decoding. Well, it's the case again. I made hacky test and 
moved postproc enable call after output buffers are set and it worked. So, this 
is actually core quirk which is obviously fixed in newer variants.

This makes this series with minor adaptations completely working on H6. I see 
no reason not to merge whole series.

Thanks for testing.

Best regards,
Jernej

> 
> Andrzej
> 
> > However, given that frames themself are correctly decoded and without
> > postprocessing in right order, that shouldn't block merging previous 
patches.
> > I tried few different videos and frames were all decoded correctly.
> > 
> > Best regards,
> > Jernej
> > 
> >>
> >> Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
> >> Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
> >> ---
> >>   .../staging/media/hantro/hantro_g2_vp9_dec.c  |  6 ++--
> >>   drivers/staging/media/hantro/hantro_hw.h      |  1 +
> >>   .../staging/media/hantro/hantro_postproc.c    | 31 +++++++++++++++++++
> >>   drivers/staging/media/hantro/imx8m_vpu_hw.c   | 11 +++++++
> >>   4 files changed, 46 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c b/drivers/
> > staging/media/hantro/hantro_g2_vp9_dec.c
> >> index 7f827b9f0133..1a26be72c878 100644
> >> --- a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
> >> +++ b/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
> >> @@ -152,7 +152,7 @@ static void config_output(struct hantro_ctx *ctx,
> >>   	hantro_reg_write(ctx->dev, &g2_out_dis, 0);
> >>   	hantro_reg_write(ctx->dev, &g2_output_format, 0);
> >>   
> >> -	luma_addr = vb2_dma_contig_plane_dma_addr(&dst->base.vb.vb2_buf,
> > 0);
> >> +	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
> >>   	hantro_write_addr(ctx->dev, G2_OUT_LUMA_ADDR, luma_addr);
> >>   
> >>   	chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
> >> @@ -191,7 +191,7 @@ static void config_ref(struct hantro_ctx *ctx,
> >>   	hantro_reg_write(ctx->dev, &ref_reg->hor_scale, (refw << 14) /
> > dst->vp9.width);
> >>   	hantro_reg_write(ctx->dev, &ref_reg->ver_scale, (refh << 14) /
> > dst->vp9.height);
> >>   
> >> -	luma_addr = vb2_dma_contig_plane_dma_addr(&buf->base.vb.vb2_buf,
> > 0);
> >> +	luma_addr = hantro_get_dec_buf_addr(ctx, &buf->base.vb.vb2_buf);
> >>   	hantro_write_addr(ctx->dev, ref_reg->y_base, luma_addr);
> >>   
> >>   	chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
> >> @@ -236,7 +236,7 @@ static void config_ref_registers(struct hantro_ctx 
*ctx,
> >>   	config_ref(ctx, dst, &ref_regs[1], dec_params, dec_params-
> >> golden_frame_ts);
> >>   	config_ref(ctx, dst, &ref_regs[2], dec_params, dec_params-
> >> alt_frame_ts);
> >>   
> >> -	mv_addr = vb2_dma_contig_plane_dma_addr(&mv_ref->base.vb.vb2_buf,
> > 0) +
> >> +	mv_addr = hantro_get_dec_buf_addr(ctx, &mv_ref->base.vb.vb2_buf) +
> >>   		  mv_offset(ctx, dec_params);
> >>   	hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_addr);
> >>   
> >> diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/
> > media/hantro/hantro_hw.h
> >> index 2961d399fd60..3d4a5dc1e6d5 100644
> >> --- a/drivers/staging/media/hantro/hantro_hw.h
> >> +++ b/drivers/staging/media/hantro/hantro_hw.h
> >> @@ -274,6 +274,7 @@ extern const struct hantro_variant 
rk3399_vpu_variant;
> >>   extern const struct hantro_variant sama5d4_vdec_variant;
> >>   
> >>   extern const struct hantro_postproc_ops hantro_g1_postproc_ops;
> >> +extern const struct hantro_postproc_ops hantro_g2_postproc_ops;
> >>   
> >>   extern const u32 hantro_vp8_dec_mc_filter[8][6];
> >>   
> >> diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/
> > staging/media/hantro/hantro_postproc.c
> >> index 4549aec08feb..79a66d001738 100644
> >> --- a/drivers/staging/media/hantro/hantro_postproc.c
> >> +++ b/drivers/staging/media/hantro/hantro_postproc.c
> >> @@ -11,6 +11,7 @@
> >>   #include "hantro.h"
> >>   #include "hantro_hw.h"
> >>   #include "hantro_g1_regs.h"
> >> +#include "hantro_g2_regs.h"
> >>   
> >>   #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
> >>   { \
> >> @@ -99,6 +100,21 @@ static void hantro_postproc_g1_enable(struct 
hantro_ctx
> > *ctx)
> >>   	HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
> >>   }
> >>   
> >> +static void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
> >> +{
> >> +	struct hantro_dev *vpu = ctx->dev;
> >> +	struct vb2_v4l2_buffer *dst_buf;
> >> +	size_t chroma_offset = ctx->dst_fmt.width * ctx->dst_fmt.height;
> >> +	dma_addr_t dst_dma;
> >> +
> >> +	dst_buf = hantro_get_dst_buf(ctx);
> >> +	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> >> +
> >> +	hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
> >> +	hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma +
> > chroma_offset);
> >> +	hantro_reg_write(vpu, &g2_out_rs_e, 1);
> >> +}
> >> +
> >>   void hantro_postproc_free(struct hantro_ctx *ctx)
> >>   {
> >>   	struct hantro_dev *vpu = ctx->dev;
> >> @@ -127,6 +143,9 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
> >>   	if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
> >>   		buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
> >>   						ctx-
> >> dst_fmt.height);
> >> +	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
> >> +		buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
> >> +					       ctx-
> >> dst_fmt.height);
> >>   
> >>   	for (i = 0; i < num_buffers; ++i) {
> >>   		struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
> >> @@ -152,6 +171,13 @@ static void hantro_postproc_g1_disable(struct
> > hantro_ctx *ctx)
> >>   	HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
> >>   }
> >>   
> >> +static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
> >> +{
> >> +	struct hantro_dev *vpu = ctx->dev;
> >> +
> >> +	hantro_reg_write(vpu, &g2_out_rs_e, 0);
> >> +}
> >> +
> >>   void hantro_postproc_disable(struct hantro_ctx *ctx)
> >>   {
> >>   	struct hantro_dev *vpu = ctx->dev;
> >> @@ -172,3 +198,8 @@ const struct hantro_postproc_ops 
hantro_g1_postproc_ops
> > = {
> >>   	.enable = hantro_postproc_g1_enable,
> >>   	.disable = hantro_postproc_g1_disable,
> >>   };
> >> +
> >> +const struct hantro_postproc_ops hantro_g2_postproc_ops = {
> >> +	.enable = hantro_postproc_g2_enable,
> >> +	.disable = hantro_postproc_g2_disable,
> >> +};
> >> diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/
staging/
> > media/hantro/imx8m_vpu_hw.c
> >> index 455a107ffb02..1a43f6fceef9 100644
> >> --- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
> >> +++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
> >> @@ -132,6 +132,14 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] 
= {
> >>   	},
> >>   };
> >>   
> >> +static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
> >> +	{
> >> +		.fourcc = V4L2_PIX_FMT_NV12,
> >> +		.codec_mode = HANTRO_MODE_NONE,
> >> +		.postprocessed = true,
> >> +	},
> >> +};
> >> +
> >>   static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
> >>   	{
> >>   		.fourcc = V4L2_PIX_FMT_NV12_4L4,
> >> @@ -301,6 +309,9 @@ const struct hantro_variant imx8mq_vpu_g2_variant = {
> >>   	.dec_offset = 0x0,
> >>   	.dec_fmts = imx8m_vpu_g2_dec_fmts,
> >>   	.num_dec_fmts = ARRAY_SIZE(imx8m_vpu_g2_dec_fmts),
> >> +	.postproc_fmts = imx8m_vpu_g2_postproc_fmts,
> >> +	.num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_g2_postproc_fmts),
> >> +	.postproc_ops = &hantro_g2_postproc_ops,
> >>   	.codec = HANTRO_HEVC_DECODER | HANTRO_VP9_DECODER,
> >>   	.codec_ops = imx8mq_vpu_g2_codec_ops,
> >>   	.init = imx8mq_vpu_hw_init,
> >> -- 
> >> 2.17.1
> >>
> >>
> > 
> > 
> 
>
Ezequiel Garcia Oct. 20, 2021, 11:06 a.m. UTC | #4
Hi Jernej,

On Tue, 19 Oct 2021 at 13:38, Jernej Škrabec <jernej.skrabec@gmail.com> wrote:
>
> Hi Andrzej!
>
> Dne petek, 15. oktober 2021 ob 19:19:47 CEST je Andrzej Pietrasiewicz
> napisal(a):
> > Hi Jernej,
> >
> > W dniu 14.10.2021 o 19:42, Jernej Škrabec pisze:
> > > Hi Andrzej!
> > >
> > > Dne sreda, 29. september 2021 ob 18:04:39 CEST je Andrzej Pietrasiewicz
> > > napisal(a):
> > >> The G2 decoder block produces NV12 4x4 tiled format (NV12_4L4).
> > >> Enable the G2 post-processor block, in order to produce regular NV12.
> > >>
> > >> The logic in hantro_postproc.c is leveraged to take care of allocating
> > >> the extra buffers and configure the post-processor, which is
> > >> significantly simpler than the one on the G1.
> > >
> > > Quick summary of discussion on LibreELEC Slack:
> > > When using NV12 format on Allwinner H6 variant of G2 (needs some driver
> > > changes), I get frames out of order. If I use native NV12 tiled format,
> frames
> > > are ordered correctly.
> > >
> > > Currently I'm not sure if this is issue with my changes or is this general
> > > issue.
> > >
> > > I would be grateful if anyone can test frame order with and without
> > > postprocessing enabled on imx8. Take some dynamic video with a lot of
> short
> > > scenes. It's pretty obvious when frames are out of order.
> > >
> >
> > I checked on imx8 and cannot observe any such artifacts.
>
> I finally found the issue. As you mentioned on Slack, register write order once
> already affected decoding. Well, it's the case again. I made hacky test and
> moved postproc enable call after output buffers are set and it worked. So, this
> is actually core quirk which is obviously fixed in newer variants.
>

Ugh, good catch.

What happens if you move all the calls to HANTRO_PP_REG_WRITE_S
(HANTRO_PP_REG_WRITE does a relaxed write)?

Or what happens if the HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma)
is moved to be done after all the other registers?

> This makes this series with minor adaptations completely working on H6. I see
> no reason not to merge whole series.
>

Do you have plans to submit your H6 work on top of this?

Thanks,
Ezequiel


> Thanks for testing.
>
> Best regards,
> Jernej
>
> >
> > Andrzej
> >
> > > However, given that frames themself are correctly decoded and without
> > > postprocessing in right order, that shouldn't block merging previous
> patches.
> > > I tried few different videos and frames were all decoded correctly.
> > >
> > > Best regards,
> > > Jernej
> > >
> > >>
> > >> Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
> > >> Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
> > >> ---
> > >>   .../staging/media/hantro/hantro_g2_vp9_dec.c  |  6 ++--
> > >>   drivers/staging/media/hantro/hantro_hw.h      |  1 +
> > >>   .../staging/media/hantro/hantro_postproc.c    | 31 +++++++++++++++++++
> > >>   drivers/staging/media/hantro/imx8m_vpu_hw.c   | 11 +++++++
> > >>   4 files changed, 46 insertions(+), 3 deletions(-)
> > >>
> > >> diff --git a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c b/drivers/
> > > staging/media/hantro/hantro_g2_vp9_dec.c
> > >> index 7f827b9f0133..1a26be72c878 100644
> > >> --- a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
> > >> +++ b/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
> > >> @@ -152,7 +152,7 @@ static void config_output(struct hantro_ctx *ctx,
> > >>    hantro_reg_write(ctx->dev, &g2_out_dis, 0);
> > >>    hantro_reg_write(ctx->dev, &g2_output_format, 0);
> > >>
> > >> -  luma_addr = vb2_dma_contig_plane_dma_addr(&dst->base.vb.vb2_buf,
> > > 0);
> > >> +  luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
> > >>    hantro_write_addr(ctx->dev, G2_OUT_LUMA_ADDR, luma_addr);
> > >>
> > >>    chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
> > >> @@ -191,7 +191,7 @@ static void config_ref(struct hantro_ctx *ctx,
> > >>    hantro_reg_write(ctx->dev, &ref_reg->hor_scale, (refw << 14) /
> > > dst->vp9.width);
> > >>    hantro_reg_write(ctx->dev, &ref_reg->ver_scale, (refh << 14) /
> > > dst->vp9.height);
> > >>
> > >> -  luma_addr = vb2_dma_contig_plane_dma_addr(&buf->base.vb.vb2_buf,
> > > 0);
> > >> +  luma_addr = hantro_get_dec_buf_addr(ctx, &buf->base.vb.vb2_buf);
> > >>    hantro_write_addr(ctx->dev, ref_reg->y_base, luma_addr);
> > >>
> > >>    chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
> > >> @@ -236,7 +236,7 @@ static void config_ref_registers(struct hantro_ctx
> *ctx,
> > >>    config_ref(ctx, dst, &ref_regs[1], dec_params, dec_params-
> > >> golden_frame_ts);
> > >>    config_ref(ctx, dst, &ref_regs[2], dec_params, dec_params-
> > >> alt_frame_ts);
> > >>
> > >> -  mv_addr = vb2_dma_contig_plane_dma_addr(&mv_ref->base.vb.vb2_buf,
> > > 0) +
> > >> +  mv_addr = hantro_get_dec_buf_addr(ctx, &mv_ref->base.vb.vb2_buf) +
> > >>              mv_offset(ctx, dec_params);
> > >>    hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_addr);
> > >>
> > >> diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/
> > > media/hantro/hantro_hw.h
> > >> index 2961d399fd60..3d4a5dc1e6d5 100644
> > >> --- a/drivers/staging/media/hantro/hantro_hw.h
> > >> +++ b/drivers/staging/media/hantro/hantro_hw.h
> > >> @@ -274,6 +274,7 @@ extern const struct hantro_variant
> rk3399_vpu_variant;
> > >>   extern const struct hantro_variant sama5d4_vdec_variant;
> > >>
> > >>   extern const struct hantro_postproc_ops hantro_g1_postproc_ops;
> > >> +extern const struct hantro_postproc_ops hantro_g2_postproc_ops;
> > >>
> > >>   extern const u32 hantro_vp8_dec_mc_filter[8][6];
> > >>
> > >> diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/
> > > staging/media/hantro/hantro_postproc.c
> > >> index 4549aec08feb..79a66d001738 100644
> > >> --- a/drivers/staging/media/hantro/hantro_postproc.c
> > >> +++ b/drivers/staging/media/hantro/hantro_postproc.c
> > >> @@ -11,6 +11,7 @@
> > >>   #include "hantro.h"
> > >>   #include "hantro_hw.h"
> > >>   #include "hantro_g1_regs.h"
> > >> +#include "hantro_g2_regs.h"
> > >>
> > >>   #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
> > >>   { \
> > >> @@ -99,6 +100,21 @@ static void hantro_postproc_g1_enable(struct
> hantro_ctx
> > > *ctx)
> > >>    HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
> > >>   }
> > >>
> > >> +static void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
> > >> +{
> > >> +  struct hantro_dev *vpu = ctx->dev;
> > >> +  struct vb2_v4l2_buffer *dst_buf;
> > >> +  size_t chroma_offset = ctx->dst_fmt.width * ctx->dst_fmt.height;
> > >> +  dma_addr_t dst_dma;
> > >> +
> > >> +  dst_buf = hantro_get_dst_buf(ctx);
> > >> +  dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> > >> +
> > >> +  hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
> > >> +  hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma +
> > > chroma_offset);
> > >> +  hantro_reg_write(vpu, &g2_out_rs_e, 1);
> > >> +}
> > >> +
> > >>   void hantro_postproc_free(struct hantro_ctx *ctx)
> > >>   {
> > >>    struct hantro_dev *vpu = ctx->dev;
> > >> @@ -127,6 +143,9 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
> > >>    if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
> > >>            buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
> > >>                                            ctx-
> > >> dst_fmt.height);
> > >> +  else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
> > >> +          buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
> > >> +                                         ctx-
> > >> dst_fmt.height);
> > >>
> > >>    for (i = 0; i < num_buffers; ++i) {
> > >>            struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
> > >> @@ -152,6 +171,13 @@ static void hantro_postproc_g1_disable(struct
> > > hantro_ctx *ctx)
> > >>    HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
> > >>   }
> > >>
> > >> +static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
> > >> +{
> > >> +  struct hantro_dev *vpu = ctx->dev;
> > >> +
> > >> +  hantro_reg_write(vpu, &g2_out_rs_e, 0);
> > >> +}
> > >> +
> > >>   void hantro_postproc_disable(struct hantro_ctx *ctx)
> > >>   {
> > >>    struct hantro_dev *vpu = ctx->dev;
> > >> @@ -172,3 +198,8 @@ const struct hantro_postproc_ops
> hantro_g1_postproc_ops
> > > = {
> > >>    .enable = hantro_postproc_g1_enable,
> > >>    .disable = hantro_postproc_g1_disable,
> > >>   };
> > >> +
> > >> +const struct hantro_postproc_ops hantro_g2_postproc_ops = {
> > >> +  .enable = hantro_postproc_g2_enable,
> > >> +  .disable = hantro_postproc_g2_disable,
> > >> +};
> > >> diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/
> staging/
> > > media/hantro/imx8m_vpu_hw.c
> > >> index 455a107ffb02..1a43f6fceef9 100644
> > >> --- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
> > >> +++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
> > >> @@ -132,6 +132,14 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[]
> = {
> > >>    },
> > >>   };
> > >>
> > >> +static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
> > >> +  {
> > >> +          .fourcc = V4L2_PIX_FMT_NV12,
> > >> +          .codec_mode = HANTRO_MODE_NONE,
> > >> +          .postprocessed = true,
> > >> +  },
> > >> +};
> > >> +
> > >>   static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
> > >>    {
> > >>            .fourcc = V4L2_PIX_FMT_NV12_4L4,
> > >> @@ -301,6 +309,9 @@ const struct hantro_variant imx8mq_vpu_g2_variant = {
> > >>    .dec_offset = 0x0,
> > >>    .dec_fmts = imx8m_vpu_g2_dec_fmts,
> > >>    .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_g2_dec_fmts),
> > >> +  .postproc_fmts = imx8m_vpu_g2_postproc_fmts,
> > >> +  .num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_g2_postproc_fmts),
> > >> +  .postproc_ops = &hantro_g2_postproc_ops,
> > >>    .codec = HANTRO_HEVC_DECODER | HANTRO_VP9_DECODER,
> > >>    .codec_ops = imx8mq_vpu_g2_codec_ops,
> > >>    .init = imx8mq_vpu_hw_init,
> > >> --
> > >> 2.17.1
> > >>
> > >>
> > >
> > >
> >
> >
>
>
Jernej Škrabec Oct. 20, 2021, 3:04 p.m. UTC | #5
Dne sreda, 20. oktober 2021 ob 13:06:59 CEST je Ezequiel Garcia napisal(a):
> Hi Jernej,
> 
> On Tue, 19 Oct 2021 at 13:38, Jernej Škrabec <jernej.skrabec@gmail.com> 
wrote:
> >
> > Hi Andrzej!
> >
> > Dne petek, 15. oktober 2021 ob 19:19:47 CEST je Andrzej Pietrasiewicz
> > napisal(a):
> > > Hi Jernej,
> > >
> > > W dniu 14.10.2021 o 19:42, Jernej Škrabec pisze:
> > > > Hi Andrzej!
> > > >
> > > > Dne sreda, 29. september 2021 ob 18:04:39 CEST je Andrzej 
Pietrasiewicz
> > > > napisal(a):
> > > >> The G2 decoder block produces NV12 4x4 tiled format (NV12_4L4).
> > > >> Enable the G2 post-processor block, in order to produce regular NV12.
> > > >>
> > > >> The logic in hantro_postproc.c is leveraged to take care of 
allocating
> > > >> the extra buffers and configure the post-processor, which is
> > > >> significantly simpler than the one on the G1.
> > > >
> > > > Quick summary of discussion on LibreELEC Slack:
> > > > When using NV12 format on Allwinner H6 variant of G2 (needs some 
driver
> > > > changes), I get frames out of order. If I use native NV12 tiled 
format,
> > frames
> > > > are ordered correctly.
> > > >
> > > > Currently I'm not sure if this is issue with my changes or is this 
general
> > > > issue.
> > > >
> > > > I would be grateful if anyone can test frame order with and without
> > > > postprocessing enabled on imx8. Take some dynamic video with a lot of
> > short
> > > > scenes. It's pretty obvious when frames are out of order.
> > > >
> > >
> > > I checked on imx8 and cannot observe any such artifacts.
> >
> > I finally found the issue. As you mentioned on Slack, register write order 
once
> > already affected decoding. Well, it's the case again. I made hacky test and
> > moved postproc enable call after output buffers are set and it worked. So, 
this
> > is actually core quirk which is obviously fixed in newer variants.
> >
> 
> Ugh, good catch.
> 
> What happens if you move all the calls to HANTRO_PP_REG_WRITE_S
> (HANTRO_PP_REG_WRITE does a relaxed write)?
> 
> Or what happens if the HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma)
> is moved to be done after all the other registers?

Those two macros aren't used on G2. Andrzej introduced new postproc helpers 
for G2.

This commit solves issue for H6:
https://github.com/jernejsk/linux-1/commit/
a783a977c0843bb4b555dc9d0b5d64915cd219e7

> 
> > This makes this series with minor adaptations completely working on H6. I 
see
> > no reason not to merge whole series.
> >
> 
> Do you have plans to submit your H6 work on top of this?

Of course, why would I work on this otherwise? :) But before I do that, I have 
to clean up and split one commit, which adapts VP9 G2 code for H6 variant.

If you're interested in changes, take a look here:
https://github.com/jernejsk/linux-1/commits/vp9

Best regards,
Jernej

> 
> Thanks,
> Ezequiel
> 
> 
> > Thanks for testing.
> >
> > Best regards,
> > Jernej
> >
> > >
> > > Andrzej
> > >
> > > > However, given that frames themself are correctly decoded and without
> > > > postprocessing in right order, that shouldn't block merging previous
> > patches.
> > > > I tried few different videos and frames were all decoded correctly.
> > > >
> > > > Best regards,
> > > > Jernej
> > > >
> > > >>
> > > >> Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
> > > >> Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
> > > >> ---
> > > >>   .../staging/media/hantro/hantro_g2_vp9_dec.c  |  6 ++--
> > > >>   drivers/staging/media/hantro/hantro_hw.h      |  1 +
> > > >>   .../staging/media/hantro/hantro_postproc.c    | 31 ++++++++++++++++
+++
> > > >>   drivers/staging/media/hantro/imx8m_vpu_hw.c   | 11 +++++++
> > > >>   4 files changed, 46 insertions(+), 3 deletions(-)
> > > >>
> > > >> diff --git a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c b/
drivers/
> > > > staging/media/hantro/hantro_g2_vp9_dec.c
> > > >> index 7f827b9f0133..1a26be72c878 100644
> > > >> --- a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
> > > >> +++ b/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
> > > >> @@ -152,7 +152,7 @@ static void config_output(struct hantro_ctx *ctx,
> > > >>    hantro_reg_write(ctx->dev, &g2_out_dis, 0);
> > > >>    hantro_reg_write(ctx->dev, &g2_output_format, 0);
> > > >>
> > > >> -  luma_addr = vb2_dma_contig_plane_dma_addr(&dst->base.vb.vb2_buf,
> > > > 0);
> > > >> +  luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
> > > >>    hantro_write_addr(ctx->dev, G2_OUT_LUMA_ADDR, luma_addr);
> > > >>
> > > >>    chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
> > > >> @@ -191,7 +191,7 @@ static void config_ref(struct hantro_ctx *ctx,
> > > >>    hantro_reg_write(ctx->dev, &ref_reg->hor_scale, (refw << 14) /
> > > > dst->vp9.width);
> > > >>    hantro_reg_write(ctx->dev, &ref_reg->ver_scale, (refh << 14) /
> > > > dst->vp9.height);
> > > >>
> > > >> -  luma_addr = vb2_dma_contig_plane_dma_addr(&buf->base.vb.vb2_buf,
> > > > 0);
> > > >> +  luma_addr = hantro_get_dec_buf_addr(ctx, &buf->base.vb.vb2_buf);
> > > >>    hantro_write_addr(ctx->dev, ref_reg->y_base, luma_addr);
> > > >>
> > > >>    chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
> > > >> @@ -236,7 +236,7 @@ static void config_ref_registers(struct hantro_ctx
> > *ctx,
> > > >>    config_ref(ctx, dst, &ref_regs[1], dec_params, dec_params-
> > > >> golden_frame_ts);
> > > >>    config_ref(ctx, dst, &ref_regs[2], dec_params, dec_params-
> > > >> alt_frame_ts);
> > > >>
> > > >> -  mv_addr = vb2_dma_contig_plane_dma_addr(&mv_ref->base.vb.vb2_buf,
> > > > 0) +
> > > >> +  mv_addr = hantro_get_dec_buf_addr(ctx, &mv_ref->base.vb.vb2_buf) +
> > > >>              mv_offset(ctx, dec_params);
> > > >>    hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_addr);
> > > >>
> > > >> diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/
staging/
> > > > media/hantro/hantro_hw.h
> > > >> index 2961d399fd60..3d4a5dc1e6d5 100644
> > > >> --- a/drivers/staging/media/hantro/hantro_hw.h
> > > >> +++ b/drivers/staging/media/hantro/hantro_hw.h
> > > >> @@ -274,6 +274,7 @@ extern const struct hantro_variant
> > rk3399_vpu_variant;
> > > >>   extern const struct hantro_variant sama5d4_vdec_variant;
> > > >>
> > > >>   extern const struct hantro_postproc_ops hantro_g1_postproc_ops;
> > > >> +extern const struct hantro_postproc_ops hantro_g2_postproc_ops;
> > > >>
> > > >>   extern const u32 hantro_vp8_dec_mc_filter[8][6];
> > > >>
> > > >> diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/
> > > > staging/media/hantro/hantro_postproc.c
> > > >> index 4549aec08feb..79a66d001738 100644
> > > >> --- a/drivers/staging/media/hantro/hantro_postproc.c
> > > >> +++ b/drivers/staging/media/hantro/hantro_postproc.c
> > > >> @@ -11,6 +11,7 @@
> > > >>   #include "hantro.h"
> > > >>   #include "hantro_hw.h"
> > > >>   #include "hantro_g1_regs.h"
> > > >> +#include "hantro_g2_regs.h"
> > > >>
> > > >>   #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
> > > >>   { \
> > > >> @@ -99,6 +100,21 @@ static void hantro_postproc_g1_enable(struct
> > hantro_ctx
> > > > *ctx)
> > > >>    HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
> > > >>   }
> > > >>
> > > >> +static void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
> > > >> +{
> > > >> +  struct hantro_dev *vpu = ctx->dev;
> > > >> +  struct vb2_v4l2_buffer *dst_buf;
> > > >> +  size_t chroma_offset = ctx->dst_fmt.width * ctx->dst_fmt.height;
> > > >> +  dma_addr_t dst_dma;
> > > >> +
> > > >> +  dst_buf = hantro_get_dst_buf(ctx);
> > > >> +  dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> > > >> +
> > > >> +  hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
> > > >> +  hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma +
> > > > chroma_offset);
> > > >> +  hantro_reg_write(vpu, &g2_out_rs_e, 1);
> > > >> +}
> > > >> +
> > > >>   void hantro_postproc_free(struct hantro_ctx *ctx)
> > > >>   {
> > > >>    struct hantro_dev *vpu = ctx->dev;
> > > >> @@ -127,6 +143,9 @@ int hantro_postproc_alloc(struct hantro_ctx *ctx)
> > > >>    if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
> > > >>            buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
> > > >>                                            ctx-
> > > >> dst_fmt.height);
> > > >> +  else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
> > > >> +          buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
> > > >> +                                         ctx-
> > > >> dst_fmt.height);
> > > >>
> > > >>    for (i = 0; i < num_buffers; ++i) {
> > > >>            struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
> > > >> @@ -152,6 +171,13 @@ static void hantro_postproc_g1_disable(struct
> > > > hantro_ctx *ctx)
> > > >>    HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
> > > >>   }
> > > >>
> > > >> +static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
> > > >> +{
> > > >> +  struct hantro_dev *vpu = ctx->dev;
> > > >> +
> > > >> +  hantro_reg_write(vpu, &g2_out_rs_e, 0);
> > > >> +}
> > > >> +
> > > >>   void hantro_postproc_disable(struct hantro_ctx *ctx)
> > > >>   {
> > > >>    struct hantro_dev *vpu = ctx->dev;
> > > >> @@ -172,3 +198,8 @@ const struct hantro_postproc_ops
> > hantro_g1_postproc_ops
> > > > = {
> > > >>    .enable = hantro_postproc_g1_enable,
> > > >>    .disable = hantro_postproc_g1_disable,
> > > >>   };
> > > >> +
> > > >> +const struct hantro_postproc_ops hantro_g2_postproc_ops = {
> > > >> +  .enable = hantro_postproc_g2_enable,
> > > >> +  .disable = hantro_postproc_g2_disable,
> > > >> +};
> > > >> diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/
> > staging/
> > > > media/hantro/imx8m_vpu_hw.c
> > > >> index 455a107ffb02..1a43f6fceef9 100644
> > > >> --- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
> > > >> +++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
> > > >> @@ -132,6 +132,14 @@ static const struct hantro_fmt 
imx8m_vpu_dec_fmts[]
> > = {
> > > >>    },
> > > >>   };
> > > >>
> > > >> +static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
> > > >> +  {
> > > >> +          .fourcc = V4L2_PIX_FMT_NV12,
> > > >> +          .codec_mode = HANTRO_MODE_NONE,
> > > >> +          .postprocessed = true,
> > > >> +  },
> > > >> +};
> > > >> +
> > > >>   static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
> > > >>    {
> > > >>            .fourcc = V4L2_PIX_FMT_NV12_4L4,
> > > >> @@ -301,6 +309,9 @@ const struct hantro_variant imx8mq_vpu_g2_variant 
= {
> > > >>    .dec_offset = 0x0,
> > > >>    .dec_fmts = imx8m_vpu_g2_dec_fmts,
> > > >>    .num_dec_fmts = ARRAY_SIZE(imx8m_vpu_g2_dec_fmts),
> > > >> +  .postproc_fmts = imx8m_vpu_g2_postproc_fmts,
> > > >> +  .num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_g2_postproc_fmts),
> > > >> +  .postproc_ops = &hantro_g2_postproc_ops,
> > > >>    .codec = HANTRO_HEVC_DECODER | HANTRO_VP9_DECODER,
> > > >>    .codec_ops = imx8mq_vpu_g2_codec_ops,
> > > >>    .init = imx8mq_vpu_hw_init,
> > > >> --
> > > >> 2.17.1
> > > >>
> > > >>
> > > >
> > > >
> > >
> > >
> >
> >
>
Ezequiel Garcia Oct. 20, 2021, 3:25 p.m. UTC | #6
On Wed, 20 Oct 2021 at 12:04, Jernej Škrabec <jernej.skrabec@gmail.com> wrote:
>
> Dne sreda, 20. oktober 2021 ob 13:06:59 CEST je Ezequiel Garcia napisal(a):
> > Hi Jernej,
> >
> > On Tue, 19 Oct 2021 at 13:38, Jernej Škrabec <jernej.skrabec@gmail.com>
> wrote:
> > >
> > > Hi Andrzej!
> > >
> > > Dne petek, 15. oktober 2021 ob 19:19:47 CEST je Andrzej Pietrasiewicz
> > > napisal(a):
> > > > Hi Jernej,
> > > >
> > > > W dniu 14.10.2021 o 19:42, Jernej Škrabec pisze:
> > > > > Hi Andrzej!
> > > > >
> > > > > Dne sreda, 29. september 2021 ob 18:04:39 CEST je Andrzej
> Pietrasiewicz
> > > > > napisal(a):
> > > > >> The G2 decoder block produces NV12 4x4 tiled format (NV12_4L4).
> > > > >> Enable the G2 post-processor block, in order to produce regular NV12.
> > > > >>
> > > > >> The logic in hantro_postproc.c is leveraged to take care of
> allocating
> > > > >> the extra buffers and configure the post-processor, which is
> > > > >> significantly simpler than the one on the G1.
> > > > >
> > > > > Quick summary of discussion on LibreELEC Slack:
> > > > > When using NV12 format on Allwinner H6 variant of G2 (needs some
> driver
> > > > > changes), I get frames out of order. If I use native NV12 tiled
> format,
> > > frames
> > > > > are ordered correctly.
> > > > >
> > > > > Currently I'm not sure if this is issue with my changes or is this
> general
> > > > > issue.
> > > > >
> > > > > I would be grateful if anyone can test frame order with and without
> > > > > postprocessing enabled on imx8. Take some dynamic video with a lot of
> > > short
> > > > > scenes. It's pretty obvious when frames are out of order.
> > > > >
> > > >
> > > > I checked on imx8 and cannot observe any such artifacts.
> > >
> > > I finally found the issue. As you mentioned on Slack, register write order
> once
> > > already affected decoding. Well, it's the case again. I made hacky test and
> > > moved postproc enable call after output buffers are set and it worked. So,
> this
> > > is actually core quirk which is obviously fixed in newer variants.
> > >
> >
> > Ugh, good catch.
> >
> > What happens if you move all the calls to HANTRO_PP_REG_WRITE_S
> > (HANTRO_PP_REG_WRITE does a relaxed write)?
> >
> > Or what happens if the HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma)
> > is moved to be done after all the other registers?
>
> Those two macros aren't used on G2. Andrzej introduced new postproc helpers
> for G2.
>

Ah, so the issue is specific on the G2 post-processor.

> This commit solves issue for H6:
> https://github.com/jernejsk/linux-1/commit/
> a783a977c0843bb4b555dc9d0b5d64915cd219e7
>

Right, but see this comment:

    /* Turn on pipeline mode. Must be done first. */
    HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x1);

I have vague recollection of why we have that comment,
but I'm reluctant to move post-proc enable to the end.
(or at least not do it on G1?).

> >
> > > This makes this series with minor adaptations completely working on H6. I
> see
> > > no reason not to merge whole series.
> > >
> >
> > Do you have plans to submit your H6 work on top of this?
>
> Of course, why would I work on this otherwise? :) But before I do that, I have
> to clean up and split one commit, which adapts VP9 G2 code for H6 variant.
>

OK, sounds good.

> If you're interested in changes, take a look here:
> https://github.com/jernejsk/linux-1/commits/vp9
>

Will take a look.

Thanks,
Ezequiel
Jernej Škrabec Oct. 21, 2021, 3:36 p.m. UTC | #7
Dne sreda, 20. oktober 2021 ob 17:25:40 CEST je Ezequiel Garcia napisal(a):
> On Wed, 20 Oct 2021 at 12:04, Jernej Škrabec <jernej.skrabec@gmail.com> 
wrote:
> >
> > Dne sreda, 20. oktober 2021 ob 13:06:59 CEST je Ezequiel Garcia 
napisal(a):
> > > Hi Jernej,
> > >
> > > On Tue, 19 Oct 2021 at 13:38, Jernej Škrabec <jernej.skrabec@gmail.com>
> > wrote:
> > > >
> > > > Hi Andrzej!
> > > >
> > > > Dne petek, 15. oktober 2021 ob 19:19:47 CEST je Andrzej Pietrasiewicz
> > > > napisal(a):
> > > > > Hi Jernej,
> > > > >
> > > > > W dniu 14.10.2021 o 19:42, Jernej Škrabec pisze:
> > > > > > Hi Andrzej!
> > > > > >
> > > > > > Dne sreda, 29. september 2021 ob 18:04:39 CEST je Andrzej
> > Pietrasiewicz
> > > > > > napisal(a):
> > > > > >> The G2 decoder block produces NV12 4x4 tiled format (NV12_4L4).
> > > > > >> Enable the G2 post-processor block, in order to produce regular 
NV12.
> > > > > >>
> > > > > >> The logic in hantro_postproc.c is leveraged to take care of
> > allocating
> > > > > >> the extra buffers and configure the post-processor, which is
> > > > > >> significantly simpler than the one on the G1.
> > > > > >
> > > > > > Quick summary of discussion on LibreELEC Slack:
> > > > > > When using NV12 format on Allwinner H6 variant of G2 (needs some
> > driver
> > > > > > changes), I get frames out of order. If I use native NV12 tiled
> > format,
> > > > frames
> > > > > > are ordered correctly.
> > > > > >
> > > > > > Currently I'm not sure if this is issue with my changes or is this
> > general
> > > > > > issue.
> > > > > >
> > > > > > I would be grateful if anyone can test frame order with and 
without
> > > > > > postprocessing enabled on imx8. Take some dynamic video with a lot 
of
> > > > short
> > > > > > scenes. It's pretty obvious when frames are out of order.
> > > > > >
> > > > >
> > > > > I checked on imx8 and cannot observe any such artifacts.
> > > >
> > > > I finally found the issue. As you mentioned on Slack, register write 
order
> > once
> > > > already affected decoding. Well, it's the case again. I made hacky test 
and
> > > > moved postproc enable call after output buffers are set and it worked. 
So,
> > this
> > > > is actually core quirk which is obviously fixed in newer variants.
> > > >
> > >
> > > Ugh, good catch.
> > >
> > > What happens if you move all the calls to HANTRO_PP_REG_WRITE_S
> > > (HANTRO_PP_REG_WRITE does a relaxed write)?
> > >
> > > Or what happens if the HANTRO_PP_REG_WRITE(vpu, out_luma_base, dst_dma)
> > > is moved to be done after all the other registers?
> >
> > Those two macros aren't used on G2. Andrzej introduced new postproc 
helpers
> > for G2.
> >
> 
> Ah, so the issue is specific on the G2 post-processor.

To be more precise, issue is specific only to old G2 post-processor, found in 
Allwinner H6. Andrzej tested code with newer G2 core and both locations worked 
fine.

> 
> > This commit solves issue for H6:
> > https://github.com/jernejsk/linux-1/commit/
> > a783a977c0843bb4b555dc9d0b5d64915cd219e7
> >
> 
> Right, but see this comment:
> 
>     /* Turn on pipeline mode. Must be done first. */
>     HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x1);
> 
> I have vague recollection of why we have that comment,
> but I'm reluctant to move post-proc enable to the end.
> (or at least not do it on G1?).

I missed that. Any idea what would be the cleanest way to move code for G2 
only? I can only think of quirk flag in platform specific structure.

Best regards,
Jernej

> 
> > >
> > > > This makes this series with minor adaptations completely working on 
H6. I
> > see
> > > > no reason not to merge whole series.
> > > >
> > >
> > > Do you have plans to submit your H6 work on top of this?
> >
> > Of course, why would I work on this otherwise? :) But before I do that, I 
have
> > to clean up and split one commit, which adapts VP9 G2 code for H6 variant.
> >
> 
> OK, sounds good.
> 
> > If you're interested in changes, take a look here:
> > https://github.com/jernejsk/linux-1/commits/vp9
> >
> 
> Will take a look.
> 
> Thanks,
> Ezequiel
>
diff mbox series

Patch

diff --git a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c b/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
index 7f827b9f0133..1a26be72c878 100644
--- a/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
+++ b/drivers/staging/media/hantro/hantro_g2_vp9_dec.c
@@ -152,7 +152,7 @@  static void config_output(struct hantro_ctx *ctx,
 	hantro_reg_write(ctx->dev, &g2_out_dis, 0);
 	hantro_reg_write(ctx->dev, &g2_output_format, 0);
 
-	luma_addr = vb2_dma_contig_plane_dma_addr(&dst->base.vb.vb2_buf, 0);
+	luma_addr = hantro_get_dec_buf_addr(ctx, &dst->base.vb.vb2_buf);
 	hantro_write_addr(ctx->dev, G2_OUT_LUMA_ADDR, luma_addr);
 
 	chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
@@ -191,7 +191,7 @@  static void config_ref(struct hantro_ctx *ctx,
 	hantro_reg_write(ctx->dev, &ref_reg->hor_scale, (refw << 14) / dst->vp9.width);
 	hantro_reg_write(ctx->dev, &ref_reg->ver_scale, (refh << 14) / dst->vp9.height);
 
-	luma_addr = vb2_dma_contig_plane_dma_addr(&buf->base.vb.vb2_buf, 0);
+	luma_addr = hantro_get_dec_buf_addr(ctx, &buf->base.vb.vb2_buf);
 	hantro_write_addr(ctx->dev, ref_reg->y_base, luma_addr);
 
 	chroma_addr = luma_addr + chroma_offset(ctx, dec_params);
@@ -236,7 +236,7 @@  static void config_ref_registers(struct hantro_ctx *ctx,
 	config_ref(ctx, dst, &ref_regs[1], dec_params, dec_params->golden_frame_ts);
 	config_ref(ctx, dst, &ref_regs[2], dec_params, dec_params->alt_frame_ts);
 
-	mv_addr = vb2_dma_contig_plane_dma_addr(&mv_ref->base.vb.vb2_buf, 0) +
+	mv_addr = hantro_get_dec_buf_addr(ctx, &mv_ref->base.vb.vb2_buf) +
 		  mv_offset(ctx, dec_params);
 	hantro_write_addr(ctx->dev, G2_REF_MV_ADDR(0), mv_addr);
 
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index 2961d399fd60..3d4a5dc1e6d5 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -274,6 +274,7 @@  extern const struct hantro_variant rk3399_vpu_variant;
 extern const struct hantro_variant sama5d4_vdec_variant;
 
 extern const struct hantro_postproc_ops hantro_g1_postproc_ops;
+extern const struct hantro_postproc_ops hantro_g2_postproc_ops;
 
 extern const u32 hantro_vp8_dec_mc_filter[8][6];
 
diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/staging/media/hantro/hantro_postproc.c
index 4549aec08feb..79a66d001738 100644
--- a/drivers/staging/media/hantro/hantro_postproc.c
+++ b/drivers/staging/media/hantro/hantro_postproc.c
@@ -11,6 +11,7 @@ 
 #include "hantro.h"
 #include "hantro_hw.h"
 #include "hantro_g1_regs.h"
+#include "hantro_g2_regs.h"
 
 #define HANTRO_PP_REG_WRITE(vpu, reg_name, val) \
 { \
@@ -99,6 +100,21 @@  static void hantro_postproc_g1_enable(struct hantro_ctx *ctx)
 	HANTRO_PP_REG_WRITE(vpu, display_width, ctx->dst_fmt.width);
 }
 
+static void hantro_postproc_g2_enable(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct vb2_v4l2_buffer *dst_buf;
+	size_t chroma_offset = ctx->dst_fmt.width * ctx->dst_fmt.height;
+	dma_addr_t dst_dma;
+
+	dst_buf = hantro_get_dst_buf(ctx);
+	dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
+
+	hantro_write_addr(vpu, G2_RS_OUT_LUMA_ADDR, dst_dma);
+	hantro_write_addr(vpu, G2_RS_OUT_CHROMA_ADDR, dst_dma + chroma_offset);
+	hantro_reg_write(vpu, &g2_out_rs_e, 1);
+}
+
 void hantro_postproc_free(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
@@ -127,6 +143,9 @@  int hantro_postproc_alloc(struct hantro_ctx *ctx)
 	if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_H264_SLICE)
 		buf_size += hantro_h264_mv_size(ctx->dst_fmt.width,
 						ctx->dst_fmt.height);
+	else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
+		buf_size += hantro_vp9_mv_size(ctx->dst_fmt.width,
+					       ctx->dst_fmt.height);
 
 	for (i = 0; i < num_buffers; ++i) {
 		struct hantro_aux_buf *priv = &ctx->postproc.dec_q[i];
@@ -152,6 +171,13 @@  static void hantro_postproc_g1_disable(struct hantro_ctx *ctx)
 	HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
 }
 
+static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+
+	hantro_reg_write(vpu, &g2_out_rs_e, 0);
+}
+
 void hantro_postproc_disable(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
@@ -172,3 +198,8 @@  const struct hantro_postproc_ops hantro_g1_postproc_ops = {
 	.enable = hantro_postproc_g1_enable,
 	.disable = hantro_postproc_g1_disable,
 };
+
+const struct hantro_postproc_ops hantro_g2_postproc_ops = {
+	.enable = hantro_postproc_g2_enable,
+	.disable = hantro_postproc_g2_disable,
+};
diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
index 455a107ffb02..1a43f6fceef9 100644
--- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
+++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
@@ -132,6 +132,14 @@  static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
 	},
 };
 
+static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_NV12,
+		.codec_mode = HANTRO_MODE_NONE,
+		.postprocessed = true,
+	},
+};
+
 static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
 	{
 		.fourcc = V4L2_PIX_FMT_NV12_4L4,
@@ -301,6 +309,9 @@  const struct hantro_variant imx8mq_vpu_g2_variant = {
 	.dec_offset = 0x0,
 	.dec_fmts = imx8m_vpu_g2_dec_fmts,
 	.num_dec_fmts = ARRAY_SIZE(imx8m_vpu_g2_dec_fmts),
+	.postproc_fmts = imx8m_vpu_g2_postproc_fmts,
+	.num_postproc_fmts = ARRAY_SIZE(imx8m_vpu_g2_postproc_fmts),
+	.postproc_ops = &hantro_g2_postproc_ops,
 	.codec = HANTRO_HEVC_DECODER | HANTRO_VP9_DECODER,
 	.codec_ops = imx8mq_vpu_g2_codec_ops,
 	.init = imx8mq_vpu_hw_init,