Message ID | 20191026074959.1073512-3-jernej.skrabec@siol.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | media: cedrus: Add support for 4k videos | expand |
Hi, On Sat 26 Oct 19, 09:49, Jernej Skrabec wrote: > H264 decoder needs additional or bigger buffers in order to decode 4k > videos. Thanks for the fixup, we hadn't looked into those bits at all during initial bringup of H.264! See a few minor comments below. > Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net> > --- > drivers/staging/media/sunxi/cedrus/cedrus.h | 7 ++ > .../staging/media/sunxi/cedrus/cedrus_h264.c | 83 +++++++++++++++++-- > .../staging/media/sunxi/cedrus/cedrus_regs.h | 11 +++ > 3 files changed, 93 insertions(+), 8 deletions(-) > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h > index c45fb9a7ad07..96765555ab8a 100644 > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h > @@ -116,8 +116,15 @@ struct cedrus_ctx { > ssize_t mv_col_buf_size; > void *pic_info_buf; > dma_addr_t pic_info_buf_dma; > + ssize_t pic_info_buf_size; > void *neighbor_info_buf; > dma_addr_t neighbor_info_buf_dma; > + void *deblk_buf; > + dma_addr_t deblk_buf_dma; > + ssize_t deblk_buf_size; > + void *intra_pred_buf; > + dma_addr_t intra_pred_buf_dma; > + ssize_t intra_pred_buf_size; > } h264; > struct { > void *mv_col_buf; > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c > index d2c854ecdf15..19962f4213d4 100644 > --- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c > @@ -39,7 +39,6 @@ struct cedrus_h264_sram_ref_pic { > #define CEDRUS_H264_FRAME_NUM 18 > > #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (16 * SZ_1K) > -#define CEDRUS_PIC_INFO_BUF_SIZE (128 * SZ_1K) Could we keep a define with the minimum size that you are using later (increased to 130 * SZ_1K)? > static void cedrus_h264_write_sram(struct cedrus_dev *dev, > enum cedrus_h264_sram_off off, > @@ -342,6 +341,20 @@ static void cedrus_set_params(struct cedrus_ctx *ctx, > VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID | > VE_H264_VLD_ADDR_LAST); > > + if (ctx->src_fmt.width > 2048) { > + cedrus_write(dev, VE_BUF_CTRL, > + VE_BUF_CTRL_INTRAPRED_MIXED_RAM | > + VE_BUF_CTRL_DBLK_MIXED_RAM); > + cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR, > + ctx->codec.h264.deblk_buf_dma); > + cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR, > + ctx->codec.h264.intra_pred_buf_dma); > + } else { > + cedrus_write(dev, VE_BUF_CTRL, > + VE_BUF_CTRL_INTRAPRED_INT_SRAM | > + VE_BUF_CTRL_DBLK_INT_SRAM); > + } > + > /* > * FIXME: Since the bitstream parsing is done in software, and > * in userspace, this shouldn't be needed anymore. But it > @@ -502,18 +515,28 @@ static void cedrus_h264_setup(struct cedrus_ctx *ctx, > static int cedrus_h264_start(struct cedrus_ctx *ctx) > { > struct cedrus_dev *dev = ctx->dev; > + unsigned int pic_info_size; > unsigned int field_size; > unsigned int mv_col_size; > int ret; > Maybe add a comment here this is a half-magic sub-optimal formula? > + if (ctx->src_fmt.width > 2048) > + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000; > + else > + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000; > + > /* > - * FIXME: It seems that the H6 cedarX code is using a formula > - * here based on the size of the frame, while all the older > - * code is using a fixed size, so that might need to be > - * changed at some point. > + * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set, > + * there is no need to multiply by 2. > */ > + pic_info_size += ctx->src_fmt.height * 2 * 64; > + > + if (pic_info_size < 130 * SZ_1K) > + pic_info_size = 130 * SZ_1K; This is where I think we could have a "minimum pic info size" define. > + > + ctx->codec.h264.pic_info_buf_size = pic_info_size; > ctx->codec.h264.pic_info_buf = > - dma_alloc_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, > + dma_alloc_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, > &ctx->codec.h264.pic_info_buf_dma, > GFP_KERNEL); > if (!ctx->codec.h264.pic_info_buf) > @@ -566,15 +589,51 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx) > goto err_neighbor_buf; > } > > + if (ctx->src_fmt.width > 2048) { Feel free to add a comment here to explain where the 12 below comes from if you have some idea, or that it's a somewhat magical value that generally works. > + ctx->codec.h264.deblk_buf_size = > + ALIGN(ctx->src_fmt.width, 32) * 12; > + ctx->codec.h264.deblk_buf = > + dma_alloc_coherent(dev->dev, > + ctx->codec.h264.deblk_buf_size, > + &ctx->codec.h264.deblk_buf_dma, > + GFP_KERNEL); > + if (!ctx->codec.h264.deblk_buf) { > + ret = -ENOMEM; > + goto err_mv_col_buf; > + } > + Same here, a comment would be welcome about the 5 value below. Cheers, Paul > + ctx->codec.h264.intra_pred_buf_size = > + ALIGN(ctx->src_fmt.width, 64) * 5; > + ctx->codec.h264.intra_pred_buf = > + dma_alloc_coherent(dev->dev, > + ctx->codec.h264.intra_pred_buf_size, > + &ctx->codec.h264.intra_pred_buf_dma, > + GFP_KERNEL); > + if (!ctx->codec.h264.intra_pred_buf) { > + ret = -ENOMEM; > + goto err_deblk_buf; > + } > + } > + > return 0; > > +err_deblk_buf: > + dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size, > + ctx->codec.h264.deblk_buf, > + ctx->codec.h264.deblk_buf_dma); > + > +err_mv_col_buf: > + dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size, > + ctx->codec.h264.mv_col_buf, > + ctx->codec.h264.mv_col_buf_dma); > + > err_neighbor_buf: > dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, > ctx->codec.h264.neighbor_info_buf, > ctx->codec.h264.neighbor_info_buf_dma); > > err_pic_buf: > - dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, > + dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, > ctx->codec.h264.pic_info_buf, > ctx->codec.h264.pic_info_buf_dma); > return ret; > @@ -590,9 +649,17 @@ static void cedrus_h264_stop(struct cedrus_ctx *ctx) > dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, > ctx->codec.h264.neighbor_info_buf, > ctx->codec.h264.neighbor_info_buf_dma); > - dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, > + dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, > ctx->codec.h264.pic_info_buf, > ctx->codec.h264.pic_info_buf_dma); > + if (ctx->codec.h264.deblk_buf_size) > + dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size, > + ctx->codec.h264.deblk_buf, > + ctx->codec.h264.deblk_buf_dma); > + if (ctx->codec.h264.intra_pred_buf_size) > + dma_free_coherent(dev->dev, ctx->codec.h264.intra_pred_buf_size, > + ctx->codec.h264.intra_pred_buf, > + ctx->codec.h264.intra_pred_buf_dma); > } > > static void cedrus_h264_trigger(struct cedrus_ctx *ctx) > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h > index ace3d49fcd82..7beb03d3bb39 100644 > --- a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h > @@ -46,6 +46,17 @@ > #define VE_MODE_DEC_H264 (0x01 << 0) > #define VE_MODE_DEC_MPEG (0x00 << 0) > > +#define VE_BUF_CTRL 0x50 > + > +#define VE_BUF_CTRL_INTRAPRED_EXT_RAM (0x02 << 2) > +#define VE_BUF_CTRL_INTRAPRED_MIXED_RAM (0x01 << 2) > +#define VE_BUF_CTRL_INTRAPRED_INT_SRAM (0x00 << 2) > +#define VE_BUF_CTRL_DBLK_EXT_RAM (0x02 << 0) > +#define VE_BUF_CTRL_DBLK_MIXED_RAM (0x01 << 0) > +#define VE_BUF_CTRL_DBLK_INT_SRAM (0x00 << 0) > + > +#define VE_DBLK_DRAM_BUF_ADDR 0x54 > +#define VE_INTRAPRED_DRAM_BUF_ADDR 0x58 > #define VE_PRIMARY_CHROMA_BUF_LEN 0xc4 > #define VE_PRIMARY_FB_LINE_STRIDE 0xc8 > > -- > 2.23.0 >
Dne ponedeljek, 04. november 2019 ob 11:13:19 CET je Paul Kocialkowski napisal(a): > Hi, > > On Sat 26 Oct 19, 09:49, Jernej Skrabec wrote: > > H264 decoder needs additional or bigger buffers in order to decode 4k > > videos. > > Thanks for the fixup, we hadn't looked into those bits at all during initial > bringup of H.264! > > See a few minor comments below. > > > Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net> > > --- > > > > drivers/staging/media/sunxi/cedrus/cedrus.h | 7 ++ > > .../staging/media/sunxi/cedrus/cedrus_h264.c | 83 +++++++++++++++++-- > > .../staging/media/sunxi/cedrus/cedrus_regs.h | 11 +++ > > 3 files changed, 93 insertions(+), 8 deletions(-) > > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h > > b/drivers/staging/media/sunxi/cedrus/cedrus.h index > > c45fb9a7ad07..96765555ab8a 100644 > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h > > @@ -116,8 +116,15 @@ struct cedrus_ctx { > > > > ssize_t mv_col_buf_size; > > void *pic_info_buf; > > dma_addr_t pic_info_buf_dma; > > > > + ssize_t pic_info_buf_size; > > > > void *neighbor_info_buf; > > dma_addr_t neighbor_info_buf_dma; > > > > + void *deblk_buf; > > + dma_addr_t deblk_buf_dma; > > + ssize_t deblk_buf_size; > > + void *intra_pred_buf; > > + dma_addr_t intra_pred_buf_dma; > > + ssize_t intra_pred_buf_size; > > > > } h264; > > struct { > > > > void *mv_col_buf; > > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c > > b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c index > > d2c854ecdf15..19962f4213d4 100644 > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c > > @@ -39,7 +39,6 @@ struct cedrus_h264_sram_ref_pic { > > > > #define CEDRUS_H264_FRAME_NUM 18 > > > > #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (16 * SZ_1K) > > > > -#define CEDRUS_PIC_INFO_BUF_SIZE (128 * SZ_1K) > > Could we keep a define with the minimum size that you are using later > (increased to 130 * SZ_1K)? Sure. > > > static void cedrus_h264_write_sram(struct cedrus_dev *dev, > > > > enum cedrus_h264_sram_off off, > > > > @@ -342,6 +341,20 @@ static void cedrus_set_params(struct cedrus_ctx *ctx, > > > > VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID | > > VE_H264_VLD_ADDR_LAST); > > > > + if (ctx->src_fmt.width > 2048) { > > + cedrus_write(dev, VE_BUF_CTRL, > > + VE_BUF_CTRL_INTRAPRED_MIXED_RAM | > > + VE_BUF_CTRL_DBLK_MIXED_RAM); > > + cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR, > > + ctx->codec.h264.deblk_buf_dma); > > + cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR, > > + ctx->codec.h264.intra_pred_buf_dma); > > + } else { > > + cedrus_write(dev, VE_BUF_CTRL, > > + VE_BUF_CTRL_INTRAPRED_INT_SRAM | > > + VE_BUF_CTRL_DBLK_INT_SRAM); > > + } > > + > > > > /* > > > > * FIXME: Since the bitstream parsing is done in software, and > > * in userspace, this shouldn't be needed anymore. But it > > > > @@ -502,18 +515,28 @@ static void cedrus_h264_setup(struct cedrus_ctx > > *ctx, > > > > static int cedrus_h264_start(struct cedrus_ctx *ctx) > > { > > > > struct cedrus_dev *dev = ctx->dev; > > > > + unsigned int pic_info_size; > > > > unsigned int field_size; > > unsigned int mv_col_size; > > int ret; > > Maybe add a comment here this is a half-magic sub-optimal formula? Well, I'm not sure how much suboptimal formulas this and those below are. They are taken from CedarX source. I would imagine that they didn't waste too much memory. What kind of comment would be ok for you? "Formula taken from CedarX source"? Best regards, Jernej > > > + if (ctx->src_fmt.width > 2048) > > + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000; > > + else > > + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000; > > + > > > > /* > > > > - * FIXME: It seems that the H6 cedarX code is using a formula > > - * here based on the size of the frame, while all the older > > - * code is using a fixed size, so that might need to be > > - * changed at some point. > > + * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set, > > + * there is no need to multiply by 2. > > > > */ > > > > + pic_info_size += ctx->src_fmt.height * 2 * 64; > > + > > + if (pic_info_size < 130 * SZ_1K) > > + pic_info_size = 130 * SZ_1K; > > This is where I think we could have a "minimum pic info size" define. > > > + > > + ctx->codec.h264.pic_info_buf_size = pic_info_size; > > > > ctx->codec.h264.pic_info_buf = > > > > - dma_alloc_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, > > + dma_alloc_coherent(dev->dev, ctx- >codec.h264.pic_info_buf_size, > > > > &ctx- >codec.h264.pic_info_buf_dma, > > GFP_KERNEL); > > > > if (!ctx->codec.h264.pic_info_buf) > > > > @@ -566,15 +589,51 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx) > > > > goto err_neighbor_buf; > > > > } > > > > + if (ctx->src_fmt.width > 2048) { > > Feel free to add a comment here to explain where the 12 below comes from if > you have some idea, or that it's a somewhat magical value that generally > works. > > + ctx->codec.h264.deblk_buf_size = > > + ALIGN(ctx->src_fmt.width, 32) * 12; > > + ctx->codec.h264.deblk_buf = > > + dma_alloc_coherent(dev->dev, > > + ctx- >codec.h264.deblk_buf_size, > > + &ctx- >codec.h264.deblk_buf_dma, > > + GFP_KERNEL); > > + if (!ctx->codec.h264.deblk_buf) { > > + ret = -ENOMEM; > > + goto err_mv_col_buf; > > + } > > + > > Same here, a comment would be welcome about the 5 value below. > > Cheers, > > Paul > > > + ctx->codec.h264.intra_pred_buf_size = > > + ALIGN(ctx->src_fmt.width, 64) * 5; > > + ctx->codec.h264.intra_pred_buf = > > + dma_alloc_coherent(dev->dev, > > + ctx- >codec.h264.intra_pred_buf_size, > > + &ctx- >codec.h264.intra_pred_buf_dma, > > + GFP_KERNEL); > > + if (!ctx->codec.h264.intra_pred_buf) { > > + ret = -ENOMEM; > > + goto err_deblk_buf; > > + } > > + } > > + > > > > return 0; > > > > +err_deblk_buf: > > + dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size, > > + ctx->codec.h264.deblk_buf, > > + ctx->codec.h264.deblk_buf_dma); > > + > > +err_mv_col_buf: > > + dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size, > > + ctx->codec.h264.mv_col_buf, > > + ctx->codec.h264.mv_col_buf_dma); > > + > > > > err_neighbor_buf: > > dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, > > > > ctx->codec.h264.neighbor_info_buf, > > ctx->codec.h264.neighbor_info_buf_dma); > > > > err_pic_buf: > > - dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, > > + dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, > > > > ctx->codec.h264.pic_info_buf, > > ctx->codec.h264.pic_info_buf_dma); > > > > return ret; > > > > @@ -590,9 +649,17 @@ static void cedrus_h264_stop(struct cedrus_ctx *ctx) > > > > dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, > > > > ctx->codec.h264.neighbor_info_buf, > > ctx->codec.h264.neighbor_info_buf_dma); > > > > - dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, > > + dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, > > > > ctx->codec.h264.pic_info_buf, > > ctx->codec.h264.pic_info_buf_dma); > > > > + if (ctx->codec.h264.deblk_buf_size) > > + dma_free_coherent(dev->dev, ctx- >codec.h264.deblk_buf_size, > > + ctx->codec.h264.deblk_buf, > > + ctx->codec.h264.deblk_buf_dma); > > + if (ctx->codec.h264.intra_pred_buf_size) > > + dma_free_coherent(dev->dev, ctx- >codec.h264.intra_pred_buf_size, > > + ctx->codec.h264.intra_pred_buf, > > + ctx- >codec.h264.intra_pred_buf_dma); > > > > } > > > > static void cedrus_h264_trigger(struct cedrus_ctx *ctx) > > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h > > b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h index > > ace3d49fcd82..7beb03d3bb39 100644 > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h > > @@ -46,6 +46,17 @@ > > > > #define VE_MODE_DEC_H264 (0x01 << 0) > > #define VE_MODE_DEC_MPEG (0x00 << 0) > > > > +#define VE_BUF_CTRL 0x50 > > + > > +#define VE_BUF_CTRL_INTRAPRED_EXT_RAM (0x02 << 2) > > +#define VE_BUF_CTRL_INTRAPRED_MIXED_RAM (0x01 << 2) > > +#define VE_BUF_CTRL_INTRAPRED_INT_SRAM (0x00 << 2) > > +#define VE_BUF_CTRL_DBLK_EXT_RAM (0x02 << 0) > > +#define VE_BUF_CTRL_DBLK_MIXED_RAM (0x01 << 0) > > +#define VE_BUF_CTRL_DBLK_INT_SRAM (0x00 << 0) > > + > > +#define VE_DBLK_DRAM_BUF_ADDR 0x54 > > +#define VE_INTRAPRED_DRAM_BUF_ADDR 0x58 > > > > #define VE_PRIMARY_CHROMA_BUF_LEN 0xc4 > > #define VE_PRIMARY_FB_LINE_STRIDE 0xc8
Hi Jernej, On Mon 04 Nov 19, 17:53, Jernej Škrabec wrote: > Dne ponedeljek, 04. november 2019 ob 11:13:19 CET je Paul Kocialkowski > napisal(a): > > Hi, > > > > On Sat 26 Oct 19, 09:49, Jernej Skrabec wrote: > > > H264 decoder needs additional or bigger buffers in order to decode 4k > > > videos. > > > > Thanks for the fixup, we hadn't looked into those bits at all during initial > > bringup of H.264! > > > > See a few minor comments below. > > > > > Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net> > > > --- > > > > > > drivers/staging/media/sunxi/cedrus/cedrus.h | 7 ++ > > > .../staging/media/sunxi/cedrus/cedrus_h264.c | 83 +++++++++++++++++-- > > > .../staging/media/sunxi/cedrus/cedrus_regs.h | 11 +++ > > > 3 files changed, 93 insertions(+), 8 deletions(-) > > > > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h > > > b/drivers/staging/media/sunxi/cedrus/cedrus.h index > > > c45fb9a7ad07..96765555ab8a 100644 > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h > > > @@ -116,8 +116,15 @@ struct cedrus_ctx { > > > > > > ssize_t mv_col_buf_size; > > > void *pic_info_buf; > > > dma_addr_t pic_info_buf_dma; > > > > > > + ssize_t pic_info_buf_size; > > > > > > void *neighbor_info_buf; > > > dma_addr_t neighbor_info_buf_dma; > > > > > > + void *deblk_buf; > > > + dma_addr_t deblk_buf_dma; > > > + ssize_t deblk_buf_size; > > > + void *intra_pred_buf; > > > + dma_addr_t intra_pred_buf_dma; > > > + ssize_t intra_pred_buf_size; > > > > > > } h264; > > > struct { > > > > > > void *mv_col_buf; > > > > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c > > > b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c index > > > d2c854ecdf15..19962f4213d4 100644 > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c > > > @@ -39,7 +39,6 @@ struct cedrus_h264_sram_ref_pic { > > > > > > #define CEDRUS_H264_FRAME_NUM 18 > > > > > > #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (16 * SZ_1K) > > > > > > -#define CEDRUS_PIC_INFO_BUF_SIZE (128 * SZ_1K) > > > > Could we keep a define with the minimum size that you are using later > > (increased to 130 * SZ_1K)? > > Sure. > > > > > > static void cedrus_h264_write_sram(struct cedrus_dev *dev, > > > > > > enum cedrus_h264_sram_off off, > > > > > > @@ -342,6 +341,20 @@ static void cedrus_set_params(struct cedrus_ctx *ctx, > > > > > > VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID | > > > VE_H264_VLD_ADDR_LAST); > > > > > > + if (ctx->src_fmt.width > 2048) { > > > + cedrus_write(dev, VE_BUF_CTRL, > > > + VE_BUF_CTRL_INTRAPRED_MIXED_RAM | > > > + VE_BUF_CTRL_DBLK_MIXED_RAM); > > > + cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR, > > > + ctx->codec.h264.deblk_buf_dma); > > > + cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR, > > > + ctx->codec.h264.intra_pred_buf_dma); > > > + } else { > > > + cedrus_write(dev, VE_BUF_CTRL, > > > + VE_BUF_CTRL_INTRAPRED_INT_SRAM | > > > + VE_BUF_CTRL_DBLK_INT_SRAM); > > > + } > > > + > > > > > > /* > > > > > > * FIXME: Since the bitstream parsing is done in software, and > > > * in userspace, this shouldn't be needed anymore. But it > > > > > > @@ -502,18 +515,28 @@ static void cedrus_h264_setup(struct cedrus_ctx > > > *ctx, > > > > > > static int cedrus_h264_start(struct cedrus_ctx *ctx) > > > { > > > > > > struct cedrus_dev *dev = ctx->dev; > > > > > > + unsigned int pic_info_size; > > > > > > unsigned int field_size; > > > unsigned int mv_col_size; > > > int ret; > > > > Maybe add a comment here this is a half-magic sub-optimal formula? > > Well, I'm not sure how much suboptimal formulas this and those below are. They > are taken from CedarX source. I would imagine that they didn't waste too much > memory. What kind of comment would be ok for you? "Formula taken from CedarX > source"? Yes, something like that would work fine. The point is to make it clear that it is not an obvious or direct calculation based on something from the spec. Cheers, Paul > Best regards, > Jernej > > > > > > + if (ctx->src_fmt.width > 2048) > > > + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000; > > > + else > > > + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000; > > > + > > > > > > /* > > > > > > - * FIXME: It seems that the H6 cedarX code is using a formula > > > - * here based on the size of the frame, while all the older > > > - * code is using a fixed size, so that might need to be > > > - * changed at some point. > > > + * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set, > > > + * there is no need to multiply by 2. > > > > > > */ > > > > > > + pic_info_size += ctx->src_fmt.height * 2 * 64; > > > + > > > + if (pic_info_size < 130 * SZ_1K) > > > + pic_info_size = 130 * SZ_1K; > > > > This is where I think we could have a "minimum pic info size" define. > > > > > + > > > + ctx->codec.h264.pic_info_buf_size = pic_info_size; > > > > > > ctx->codec.h264.pic_info_buf = > > > > > > - dma_alloc_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, > > > + dma_alloc_coherent(dev->dev, ctx- > >codec.h264.pic_info_buf_size, > > > > > > &ctx- > >codec.h264.pic_info_buf_dma, > > > GFP_KERNEL); > > > > > > if (!ctx->codec.h264.pic_info_buf) > > > > > > @@ -566,15 +589,51 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx) > > > > > > goto err_neighbor_buf; > > > > > > } > > > > > > + if (ctx->src_fmt.width > 2048) { > > > > Feel free to add a comment here to explain where the 12 below comes from if > > you have some idea, or that it's a somewhat magical value that generally > > works. > > > + ctx->codec.h264.deblk_buf_size = > > > + ALIGN(ctx->src_fmt.width, 32) * 12; > > > + ctx->codec.h264.deblk_buf = > > > + dma_alloc_coherent(dev->dev, > > > + ctx- > >codec.h264.deblk_buf_size, > > > + &ctx- > >codec.h264.deblk_buf_dma, > > > + GFP_KERNEL); > > > + if (!ctx->codec.h264.deblk_buf) { > > > + ret = -ENOMEM; > > > + goto err_mv_col_buf; > > > + } > > > + > > > > Same here, a comment would be welcome about the 5 value below. > > > > Cheers, > > > > Paul > > > > > + ctx->codec.h264.intra_pred_buf_size = > > > + ALIGN(ctx->src_fmt.width, 64) * 5; > > > + ctx->codec.h264.intra_pred_buf = > > > + dma_alloc_coherent(dev->dev, > > > + ctx- > >codec.h264.intra_pred_buf_size, > > > + &ctx- > >codec.h264.intra_pred_buf_dma, > > > + GFP_KERNEL); > > > + if (!ctx->codec.h264.intra_pred_buf) { > > > + ret = -ENOMEM; > > > + goto err_deblk_buf; > > > + } > > > + } > > > + > > > > > > return 0; > > > > > > +err_deblk_buf: > > > + dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size, > > > + ctx->codec.h264.deblk_buf, > > > + ctx->codec.h264.deblk_buf_dma); > > > + > > > +err_mv_col_buf: > > > + dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size, > > > + ctx->codec.h264.mv_col_buf, > > > + ctx->codec.h264.mv_col_buf_dma); > > > + > > > > > > err_neighbor_buf: > > > dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, > > > > > > ctx->codec.h264.neighbor_info_buf, > > > ctx->codec.h264.neighbor_info_buf_dma); > > > > > > err_pic_buf: > > > - dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, > > > + dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, > > > > > > ctx->codec.h264.pic_info_buf, > > > ctx->codec.h264.pic_info_buf_dma); > > > > > > return ret; > > > > > > @@ -590,9 +649,17 @@ static void cedrus_h264_stop(struct cedrus_ctx *ctx) > > > > > > dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, > > > > > > ctx->codec.h264.neighbor_info_buf, > > > ctx->codec.h264.neighbor_info_buf_dma); > > > > > > - dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, > > > + dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, > > > > > > ctx->codec.h264.pic_info_buf, > > > ctx->codec.h264.pic_info_buf_dma); > > > > > > + if (ctx->codec.h264.deblk_buf_size) > > > + dma_free_coherent(dev->dev, ctx- > >codec.h264.deblk_buf_size, > > > + ctx->codec.h264.deblk_buf, > > > + ctx->codec.h264.deblk_buf_dma); > > > + if (ctx->codec.h264.intra_pred_buf_size) > > > + dma_free_coherent(dev->dev, ctx- > >codec.h264.intra_pred_buf_size, > > > + ctx->codec.h264.intra_pred_buf, > > > + ctx- > >codec.h264.intra_pred_buf_dma); > > > > > > } > > > > > > static void cedrus_h264_trigger(struct cedrus_ctx *ctx) > > > > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h > > > b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h index > > > ace3d49fcd82..7beb03d3bb39 100644 > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h > > > @@ -46,6 +46,17 @@ > > > > > > #define VE_MODE_DEC_H264 (0x01 << 0) > > > #define VE_MODE_DEC_MPEG (0x00 << 0) > > > > > > +#define VE_BUF_CTRL 0x50 > > > + > > > +#define VE_BUF_CTRL_INTRAPRED_EXT_RAM (0x02 << 2) > > > +#define VE_BUF_CTRL_INTRAPRED_MIXED_RAM (0x01 << 2) > > > +#define VE_BUF_CTRL_INTRAPRED_INT_SRAM (0x00 << 2) > > > +#define VE_BUF_CTRL_DBLK_EXT_RAM (0x02 << 0) > > > +#define VE_BUF_CTRL_DBLK_MIXED_RAM (0x01 << 0) > > > +#define VE_BUF_CTRL_DBLK_INT_SRAM (0x00 << 0) > > > + > > > +#define VE_DBLK_DRAM_BUF_ADDR 0x54 > > > +#define VE_INTRAPRED_DRAM_BUF_ADDR 0x58 > > > > > > #define VE_PRIMARY_CHROMA_BUF_LEN 0xc4 > > > #define VE_PRIMARY_FB_LINE_STRIDE 0xc8 > > > >
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h index c45fb9a7ad07..96765555ab8a 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus.h +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h @@ -116,8 +116,15 @@ struct cedrus_ctx { ssize_t mv_col_buf_size; void *pic_info_buf; dma_addr_t pic_info_buf_dma; + ssize_t pic_info_buf_size; void *neighbor_info_buf; dma_addr_t neighbor_info_buf_dma; + void *deblk_buf; + dma_addr_t deblk_buf_dma; + ssize_t deblk_buf_size; + void *intra_pred_buf; + dma_addr_t intra_pred_buf_dma; + ssize_t intra_pred_buf_size; } h264; struct { void *mv_col_buf; diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c index d2c854ecdf15..19962f4213d4 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h264.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h264.c @@ -39,7 +39,6 @@ struct cedrus_h264_sram_ref_pic { #define CEDRUS_H264_FRAME_NUM 18 #define CEDRUS_NEIGHBOR_INFO_BUF_SIZE (16 * SZ_1K) -#define CEDRUS_PIC_INFO_BUF_SIZE (128 * SZ_1K) static void cedrus_h264_write_sram(struct cedrus_dev *dev, enum cedrus_h264_sram_off off, @@ -342,6 +341,20 @@ static void cedrus_set_params(struct cedrus_ctx *ctx, VE_H264_VLD_ADDR_FIRST | VE_H264_VLD_ADDR_VALID | VE_H264_VLD_ADDR_LAST); + if (ctx->src_fmt.width > 2048) { + cedrus_write(dev, VE_BUF_CTRL, + VE_BUF_CTRL_INTRAPRED_MIXED_RAM | + VE_BUF_CTRL_DBLK_MIXED_RAM); + cedrus_write(dev, VE_DBLK_DRAM_BUF_ADDR, + ctx->codec.h264.deblk_buf_dma); + cedrus_write(dev, VE_INTRAPRED_DRAM_BUF_ADDR, + ctx->codec.h264.intra_pred_buf_dma); + } else { + cedrus_write(dev, VE_BUF_CTRL, + VE_BUF_CTRL_INTRAPRED_INT_SRAM | + VE_BUF_CTRL_DBLK_INT_SRAM); + } + /* * FIXME: Since the bitstream parsing is done in software, and * in userspace, this shouldn't be needed anymore. But it @@ -502,18 +515,28 @@ static void cedrus_h264_setup(struct cedrus_ctx *ctx, static int cedrus_h264_start(struct cedrus_ctx *ctx) { struct cedrus_dev *dev = ctx->dev; + unsigned int pic_info_size; unsigned int field_size; unsigned int mv_col_size; int ret; + if (ctx->src_fmt.width > 2048) + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x4000; + else + pic_info_size = CEDRUS_H264_FRAME_NUM * 0x1000; + /* - * FIXME: It seems that the H6 cedarX code is using a formula - * here based on the size of the frame, while all the older - * code is using a fixed size, so that might need to be - * changed at some point. + * FIXME: If V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY is set, + * there is no need to multiply by 2. */ + pic_info_size += ctx->src_fmt.height * 2 * 64; + + if (pic_info_size < 130 * SZ_1K) + pic_info_size = 130 * SZ_1K; + + ctx->codec.h264.pic_info_buf_size = pic_info_size; ctx->codec.h264.pic_info_buf = - dma_alloc_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, + dma_alloc_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, &ctx->codec.h264.pic_info_buf_dma, GFP_KERNEL); if (!ctx->codec.h264.pic_info_buf) @@ -566,15 +589,51 @@ static int cedrus_h264_start(struct cedrus_ctx *ctx) goto err_neighbor_buf; } + if (ctx->src_fmt.width > 2048) { + ctx->codec.h264.deblk_buf_size = + ALIGN(ctx->src_fmt.width, 32) * 12; + ctx->codec.h264.deblk_buf = + dma_alloc_coherent(dev->dev, + ctx->codec.h264.deblk_buf_size, + &ctx->codec.h264.deblk_buf_dma, + GFP_KERNEL); + if (!ctx->codec.h264.deblk_buf) { + ret = -ENOMEM; + goto err_mv_col_buf; + } + + ctx->codec.h264.intra_pred_buf_size = + ALIGN(ctx->src_fmt.width, 64) * 5; + ctx->codec.h264.intra_pred_buf = + dma_alloc_coherent(dev->dev, + ctx->codec.h264.intra_pred_buf_size, + &ctx->codec.h264.intra_pred_buf_dma, + GFP_KERNEL); + if (!ctx->codec.h264.intra_pred_buf) { + ret = -ENOMEM; + goto err_deblk_buf; + } + } + return 0; +err_deblk_buf: + dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size, + ctx->codec.h264.deblk_buf, + ctx->codec.h264.deblk_buf_dma); + +err_mv_col_buf: + dma_free_coherent(dev->dev, ctx->codec.h264.mv_col_buf_size, + ctx->codec.h264.mv_col_buf, + ctx->codec.h264.mv_col_buf_dma); + err_neighbor_buf: dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, ctx->codec.h264.neighbor_info_buf, ctx->codec.h264.neighbor_info_buf_dma); err_pic_buf: - dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, + dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, ctx->codec.h264.pic_info_buf, ctx->codec.h264.pic_info_buf_dma); return ret; @@ -590,9 +649,17 @@ static void cedrus_h264_stop(struct cedrus_ctx *ctx) dma_free_coherent(dev->dev, CEDRUS_NEIGHBOR_INFO_BUF_SIZE, ctx->codec.h264.neighbor_info_buf, ctx->codec.h264.neighbor_info_buf_dma); - dma_free_coherent(dev->dev, CEDRUS_PIC_INFO_BUF_SIZE, + dma_free_coherent(dev->dev, ctx->codec.h264.pic_info_buf_size, ctx->codec.h264.pic_info_buf, ctx->codec.h264.pic_info_buf_dma); + if (ctx->codec.h264.deblk_buf_size) + dma_free_coherent(dev->dev, ctx->codec.h264.deblk_buf_size, + ctx->codec.h264.deblk_buf, + ctx->codec.h264.deblk_buf_dma); + if (ctx->codec.h264.intra_pred_buf_size) + dma_free_coherent(dev->dev, ctx->codec.h264.intra_pred_buf_size, + ctx->codec.h264.intra_pred_buf, + ctx->codec.h264.intra_pred_buf_dma); } static void cedrus_h264_trigger(struct cedrus_ctx *ctx) diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h index ace3d49fcd82..7beb03d3bb39 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h +++ b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h @@ -46,6 +46,17 @@ #define VE_MODE_DEC_H264 (0x01 << 0) #define VE_MODE_DEC_MPEG (0x00 << 0) +#define VE_BUF_CTRL 0x50 + +#define VE_BUF_CTRL_INTRAPRED_EXT_RAM (0x02 << 2) +#define VE_BUF_CTRL_INTRAPRED_MIXED_RAM (0x01 << 2) +#define VE_BUF_CTRL_INTRAPRED_INT_SRAM (0x00 << 2) +#define VE_BUF_CTRL_DBLK_EXT_RAM (0x02 << 0) +#define VE_BUF_CTRL_DBLK_MIXED_RAM (0x01 << 0) +#define VE_BUF_CTRL_DBLK_INT_SRAM (0x00 << 0) + +#define VE_DBLK_DRAM_BUF_ADDR 0x54 +#define VE_INTRAPRED_DRAM_BUF_ADDR 0x58 #define VE_PRIMARY_CHROMA_BUF_LEN 0xc4 #define VE_PRIMARY_FB_LINE_STRIDE 0xc8
H264 decoder needs additional or bigger buffers in order to decode 4k videos. Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net> --- drivers/staging/media/sunxi/cedrus/cedrus.h | 7 ++ .../staging/media/sunxi/cedrus/cedrus_h264.c | 83 +++++++++++++++++-- .../staging/media/sunxi/cedrus/cedrus_regs.h | 11 +++ 3 files changed, 93 insertions(+), 8 deletions(-)