diff mbox series

[6/7] media: cedrus: Add infra for extra buffers connected to capture buffers

Message ID 20190530211516.1891-7-jernej.skrabec@siol.net (mailing list archive)
State New, archived
Headers show
Series media: cedrus: Improvements/cleanup | expand

Commit Message

Jernej Škrabec May 30, 2019, 9:15 p.m. UTC
H264 and HEVC engines need additional buffers for each capture buffer.
H264 engine has this currently solved by allocating fixed size pool,
which is not ideal. Most of the time pool size is much bigger than it
needs to be.

Ideally, extra buffer should be allocated at buffer initialization, but
that's not efficient. It's size in H264 depends on flags set in SPS, but
that information is not available in buffer init callback.

Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
---
 drivers/staging/media/sunxi/cedrus/cedrus.h   |  4 ++++
 .../staging/media/sunxi/cedrus/cedrus_video.c | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+)

Comments

Maxime Ripard June 3, 2019, 12:18 p.m. UTC | #1
Hi,

On Thu, May 30, 2019 at 11:15:15PM +0200, Jernej Skrabec wrote:
> H264 and HEVC engines need additional buffers for each capture buffer.
> H264 engine has this currently solved by allocating fixed size pool,
> which is not ideal. Most of the time pool size is much bigger than it
> needs to be.
>
> Ideally, extra buffer should be allocated at buffer initialization, but
> that's not efficient. It's size in H264 depends on flags set in SPS, but
> that information is not available in buffer init callback.
>
> Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
> ---
>  drivers/staging/media/sunxi/cedrus/cedrus.h   |  4 ++++
>  .../staging/media/sunxi/cedrus/cedrus_video.c | 19 +++++++++++++++++++
>  2 files changed, 23 insertions(+)
>
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
> index d8e6777e5e27..16c1bdfd243a 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> @@ -81,6 +81,10 @@ struct cedrus_run {
>  struct cedrus_buffer {
>  	struct v4l2_m2m_buffer          m2m_buf;
>
> +	void		*extra_buf;
> +	dma_addr_t	extra_buf_dma;
> +	ssize_t		extra_buf_size;
> +
>  	union {
>  		struct {
>  			unsigned int			position;
> diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> index 681dfe3367a6..d756e0e69634 100644
> --- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> +++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> @@ -411,6 +411,24 @@ static void cedrus_queue_cleanup(struct vb2_queue *vq, u32 state)
>  	}
>  }
>
> +static void cedrus_buf_cleanup(struct vb2_buffer *vb)
> +{
> +	struct vb2_queue *vq = vb->vb2_queue;
> +
> +	if (!V4L2_TYPE_IS_OUTPUT(vq->type)) {
> +		struct cedrus_ctx *ctx = vb2_get_drv_priv(vq);
> +		struct cedrus_buffer *cedrus_buf;
> +
> +		cedrus_buf = vb2_to_cedrus_buffer(vq->bufs[vb->index]);
> +
> +		if (cedrus_buf->extra_buf_size)
> +			dma_free_coherent(ctx->dev->dev,
> +					  cedrus_buf->extra_buf_size,
> +					  cedrus_buf->extra_buf,
> +					  cedrus_buf->extra_buf_dma);
> +	}
> +}
> +

I'm really not a fan of allocating something somewhere, and freeing it
somewhere else. Making sure you don't leak something is hard enough to
not have some non-trivial allocation scheme.

Maxime

--
Maxime Ripard, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
Jernej Škrabec June 3, 2019, 3:48 p.m. UTC | #2
Dne ponedeljek, 03. junij 2019 ob 14:18:59 CEST je Maxime Ripard napisal(a):
> Hi,
> 
> On Thu, May 30, 2019 at 11:15:15PM +0200, Jernej Skrabec wrote:
> > H264 and HEVC engines need additional buffers for each capture buffer.
> > H264 engine has this currently solved by allocating fixed size pool,
> > which is not ideal. Most of the time pool size is much bigger than it
> > needs to be.
> > 
> > Ideally, extra buffer should be allocated at buffer initialization, but
> > that's not efficient. It's size in H264 depends on flags set in SPS, but
> > that information is not available in buffer init callback.
> > 
> > Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
> > ---
> > 
> >  drivers/staging/media/sunxi/cedrus/cedrus.h   |  4 ++++
> >  .../staging/media/sunxi/cedrus/cedrus_video.c | 19 +++++++++++++++++++
> >  2 files changed, 23 insertions(+)
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > b/drivers/staging/media/sunxi/cedrus/cedrus.h index
> > d8e6777e5e27..16c1bdfd243a 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> > @@ -81,6 +81,10 @@ struct cedrus_run {
> > 
> >  struct cedrus_buffer {
> >  
> >  	struct v4l2_m2m_buffer          m2m_buf;
> > 
> > +	void		*extra_buf;
> > +	dma_addr_t	extra_buf_dma;
> > +	ssize_t		extra_buf_size;
> > +
> > 
> >  	union {
> >  	
> >  		struct {
> >  		
> >  			unsigned int			position;
> > 
> > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > b/drivers/staging/media/sunxi/cedrus/cedrus_video.c index
> > 681dfe3367a6..d756e0e69634 100644
> > --- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > @@ -411,6 +411,24 @@ static void cedrus_queue_cleanup(struct vb2_queue
> > *vq, u32 state)> 
> >  	}
> >  
> >  }
> > 
> > +static void cedrus_buf_cleanup(struct vb2_buffer *vb)
> > +{
> > +	struct vb2_queue *vq = vb->vb2_queue;
> > +
> > +	if (!V4L2_TYPE_IS_OUTPUT(vq->type)) {
> > +		struct cedrus_ctx *ctx = vb2_get_drv_priv(vq);
> > +		struct cedrus_buffer *cedrus_buf;
> > +
> > +		cedrus_buf = vb2_to_cedrus_buffer(vq->bufs[vb->index]);
> > +
> > +		if (cedrus_buf->extra_buf_size)
> > +			dma_free_coherent(ctx->dev->dev,
> > +					  cedrus_buf-
>extra_buf_size,
> > +					  cedrus_buf-
>extra_buf,
> > +					  cedrus_buf-
>extra_buf_dma);
> > +	}
> > +}
> > +
> 
> I'm really not a fan of allocating something somewhere, and freeing it
> somewhere else. Making sure you don't leak something is hard enough to
> not have some non-trivial allocation scheme.

Ok, what about introducing two new optional methods in engine callbacks, 
buffer_init and buffer_destroy, which would be called from cedrus_buf_init() and 
cedrus_buf_cleanup(), respectively. That way all (de)allocation logic stays 
within the same engine.

Best regards,
Jernej
Paul Kocialkowski June 5, 2019, 9:10 p.m. UTC | #3
Hi,

Le lundi 03 juin 2019 à 17:48 +0200, Jernej Škrabec a écrit :
> Dne ponedeljek, 03. junij 2019 ob 14:18:59 CEST je Maxime Ripard napisal(a):
> > Hi,
> > 
> > On Thu, May 30, 2019 at 11:15:15PM +0200, Jernej Skrabec wrote:
> > > H264 and HEVC engines need additional buffers for each capture buffer.
> > > H264 engine has this currently solved by allocating fixed size pool,
> > > which is not ideal. Most of the time pool size is much bigger than it
> > > needs to be.
> > > 
> > > Ideally, extra buffer should be allocated at buffer initialization, but
> > > that's not efficient. It's size in H264 depends on flags set in SPS, but
> > > that information is not available in buffer init callback.
> > > 
> > > Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
> > > ---
> > > 
> > >  drivers/staging/media/sunxi/cedrus/cedrus.h   |  4 ++++
> > >  .../staging/media/sunxi/cedrus/cedrus_video.c | 19 +++++++++++++++++++
> > >  2 files changed, 23 insertions(+)
> > > 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > b/drivers/staging/media/sunxi/cedrus/cedrus.h index
> > > d8e6777e5e27..16c1bdfd243a 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > @@ -81,6 +81,10 @@ struct cedrus_run {
> > > 
> > >  struct cedrus_buffer {
> > >  
> > >  	struct v4l2_m2m_buffer          m2m_buf;
> > > 
> > > +	void		*extra_buf;
> > > +	dma_addr_t	extra_buf_dma;
> > > +	ssize_t		extra_buf_size;
> > > +
> > > 
> > >  	union {
> > >  	
> > >  		struct {
> > >  		
> > >  			unsigned int			position;
> > > 
> > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > > b/drivers/staging/media/sunxi/cedrus/cedrus_video.c index
> > > 681dfe3367a6..d756e0e69634 100644
> > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > > @@ -411,6 +411,24 @@ static void cedrus_queue_cleanup(struct vb2_queue
> > > *vq, u32 state)> 
> > >  	}
> > >  
> > >  }
> > > 
> > > +static void cedrus_buf_cleanup(struct vb2_buffer *vb)
> > > +{
> > > +	struct vb2_queue *vq = vb->vb2_queue;
> > > +
> > > +	if (!V4L2_TYPE_IS_OUTPUT(vq->type)) {
> > > +		struct cedrus_ctx *ctx = vb2_get_drv_priv(vq);
> > > +		struct cedrus_buffer *cedrus_buf;
> > > +
> > > +		cedrus_buf = vb2_to_cedrus_buffer(vq->bufs[vb->index]);
> > > +
> > > +		if (cedrus_buf->extra_buf_size)
> > > +			dma_free_coherent(ctx->dev->dev,
> > > +					  cedrus_buf-
> > extra_buf_size,
> > > +					  cedrus_buf-
> > extra_buf,
> > > +					  cedrus_buf-
> > extra_buf_dma);
> > > +	}
> > > +}
> > > +
> > 
> > I'm really not a fan of allocating something somewhere, and freeing it
> > somewhere else. Making sure you don't leak something is hard enough to
> > not have some non-trivial allocation scheme.
> 
> Ok, what about introducing two new optional methods in engine callbacks, 
> buffer_init and buffer_destroy, which would be called from cedrus_buf_init() and 
> cedrus_buf_cleanup(), respectively. That way all (de)allocation logic stays 
> within the same engine.

I'm thinking that we should have v4l2-framework-level per-codec helpers
to provide ops for these kinds of things, since they tend be quite
common across decoders.

Cheers,

Paul
Jernej Škrabec June 5, 2019, 9:52 p.m. UTC | #4
Dne sreda, 05. junij 2019 ob 23:10:17 CEST je Paul Kocialkowski napisal(a):
> Hi,
> 
> Le lundi 03 juin 2019 à 17:48 +0200, Jernej Škrabec a écrit :
> > Dne ponedeljek, 03. junij 2019 ob 14:18:59 CEST je Maxime Ripard 
napisal(a):
> > > Hi,
> > > 
> > > On Thu, May 30, 2019 at 11:15:15PM +0200, Jernej Skrabec wrote:
> > > > H264 and HEVC engines need additional buffers for each capture buffer.
> > > > H264 engine has this currently solved by allocating fixed size pool,
> > > > which is not ideal. Most of the time pool size is much bigger than it
> > > > needs to be.
> > > > 
> > > > Ideally, extra buffer should be allocated at buffer initialization,
> > > > but
> > > > that's not efficient. It's size in H264 depends on flags set in SPS,
> > > > but
> > > > that information is not available in buffer init callback.
> > > > 
> > > > Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
> > > > ---
> > > > 
> > > >  drivers/staging/media/sunxi/cedrus/cedrus.h   |  4 ++++
> > > >  .../staging/media/sunxi/cedrus/cedrus_video.c | 19
> > > >  +++++++++++++++++++
> > > >  2 files changed, 23 insertions(+)
> > > > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus.h index
> > > > d8e6777e5e27..16c1bdfd243a 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
> > > > @@ -81,6 +81,10 @@ struct cedrus_run {
> > > > 
> > > >  struct cedrus_buffer {
> > > >  
> > > >  	struct v4l2_m2m_buffer          m2m_buf;
> > > > 
> > > > +	void		*extra_buf;
> > > > +	dma_addr_t	extra_buf_dma;
> > > > +	ssize_t		extra_buf_size;
> > > > +
> > > > 
> > > >  	union {
> > > >  	
> > > >  		struct {
> > > >  		
> > > >  			unsigned int			position;
> > > > 
> > > > diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > > > b/drivers/staging/media/sunxi/cedrus/cedrus_video.c index
> > > > 681dfe3367a6..d756e0e69634 100644
> > > > --- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > > > +++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
> > > > @@ -411,6 +411,24 @@ static void cedrus_queue_cleanup(struct vb2_queue
> > > > *vq, u32 state)>
> > > > 
> > > >  	}
> > > >  
> > > >  }
> > > > 
> > > > +static void cedrus_buf_cleanup(struct vb2_buffer *vb)
> > > > +{
> > > > +	struct vb2_queue *vq = vb->vb2_queue;
> > > > +
> > > > +	if (!V4L2_TYPE_IS_OUTPUT(vq->type)) {
> > > > +		struct cedrus_ctx *ctx = vb2_get_drv_priv(vq);
> > > > +		struct cedrus_buffer *cedrus_buf;
> > > > +
> > > > +		cedrus_buf = vb2_to_cedrus_buffer(vq->bufs[vb->index]);
> > > > +
> > > > +		if (cedrus_buf->extra_buf_size)
> > > > +			dma_free_coherent(ctx->dev->dev,
> > > > +					  cedrus_buf-
> > > 
> > > extra_buf_size,
> > > 
> > > > +					  cedrus_buf-
> > > 
> > > extra_buf,
> > > 
> > > > +					  cedrus_buf-
> > > 
> > > extra_buf_dma);
> > > 
> > > > +	}
> > > > +}
> > > > +
> > > 
> > > I'm really not a fan of allocating something somewhere, and freeing it
> > > somewhere else. Making sure you don't leak something is hard enough to
> > > not have some non-trivial allocation scheme.
> > 
> > Ok, what about introducing two new optional methods in engine callbacks,
> > buffer_init and buffer_destroy, which would be called from
> > cedrus_buf_init() and cedrus_buf_cleanup(), respectively. That way all
> > (de)allocation logic stays within the same engine.
> 
> I'm thinking that we should have v4l2-framework-level per-codec helpers
> to provide ops for these kinds of things, since they tend be quite
> common across decoders.

Isn't .buf_init and .buf_cleanup callbacks provided by struct vb2_ops meant 
for exactly that?

Related, but different topic. I managed to fix 10-bit HEVC support on H6, but 
when working in 8-bit mode, capture buffers have to be big enough to hold 
normal NV12 decoded image plus extra buffer for 2 bits of each pixel. VPU 
accepts only offset from destination buffer for this extra buffer instead of full 
address. How we will handle that? Override sizeimage when allocating? But 
there we don't have information if it's 10-bit video or not. As you can see, 
I'm not a fan of overallocating.

I suspect we will have even bigger issues when decoding 10-bit HEVC video in 
P010 format, which is the only 10-bit YUV format useable by DRM driver (not 
implemented yet). From what I know till now, VPU needs aforementioned 8-bit+2-
bit buffers (for decoding) and another one in which it rearranges samples in 
P010 format. But that has to be confirmed first.

Best regards,
Jernej
Maxime Ripard June 6, 2019, 8:33 a.m. UTC | #5
On Mon, Jun 03, 2019 at 05:48:25PM +0200, Jernej Škrabec wrote:
> Dne ponedeljek, 03. junij 2019 ob 14:18:59 CEST je Maxime Ripard napisal(a):
> > > +static void cedrus_buf_cleanup(struct vb2_buffer *vb)
> > > +{
> > > +	struct vb2_queue *vq = vb->vb2_queue;
> > > +
> > > +	if (!V4L2_TYPE_IS_OUTPUT(vq->type)) {
> > > +		struct cedrus_ctx *ctx = vb2_get_drv_priv(vq);
> > > +		struct cedrus_buffer *cedrus_buf;
> > > +
> > > +		cedrus_buf = vb2_to_cedrus_buffer(vq->bufs[vb->index]);
> > > +
> > > +		if (cedrus_buf->extra_buf_size)
> > > +			dma_free_coherent(ctx->dev->dev,
> > > +					  cedrus_buf-
> >extra_buf_size,
> > > +					  cedrus_buf-
> >extra_buf,
> > > +					  cedrus_buf-
> >extra_buf_dma);
> > > +	}
> > > +}
> > > +
> >
> > I'm really not a fan of allocating something somewhere, and freeing it
> > somewhere else. Making sure you don't leak something is hard enough to
> > not have some non-trivial allocation scheme.
>
> Ok, what about introducing two new optional methods in engine callbacks,
> buffer_init and buffer_destroy, which would be called from cedrus_buf_init() and
> cedrus_buf_cleanup(), respectively. That way all (de)allocation logic stays
> within the same engine.

Yep, that would work for me.

Thanks!
Maxime

--
Maxime Ripard, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com
diff mbox series

Patch

diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
index d8e6777e5e27..16c1bdfd243a 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
@@ -81,6 +81,10 @@  struct cedrus_run {
 struct cedrus_buffer {
 	struct v4l2_m2m_buffer          m2m_buf;
 
+	void		*extra_buf;
+	dma_addr_t	extra_buf_dma;
+	ssize_t		extra_buf_size;
+
 	union {
 		struct {
 			unsigned int			position;
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
index 681dfe3367a6..d756e0e69634 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
@@ -411,6 +411,24 @@  static void cedrus_queue_cleanup(struct vb2_queue *vq, u32 state)
 	}
 }
 
+static void cedrus_buf_cleanup(struct vb2_buffer *vb)
+{
+	struct vb2_queue *vq = vb->vb2_queue;
+
+	if (!V4L2_TYPE_IS_OUTPUT(vq->type)) {
+		struct cedrus_ctx *ctx = vb2_get_drv_priv(vq);
+		struct cedrus_buffer *cedrus_buf;
+
+		cedrus_buf = vb2_to_cedrus_buffer(vq->bufs[vb->index]);
+
+		if (cedrus_buf->extra_buf_size)
+			dma_free_coherent(ctx->dev->dev,
+					  cedrus_buf->extra_buf_size,
+					  cedrus_buf->extra_buf,
+					  cedrus_buf->extra_buf_dma);
+	}
+}
+
 static int cedrus_buf_out_validate(struct vb2_buffer *vb)
 {
 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
@@ -497,6 +515,7 @@  static void cedrus_buf_request_complete(struct vb2_buffer *vb)
 static struct vb2_ops cedrus_qops = {
 	.queue_setup		= cedrus_queue_setup,
 	.buf_prepare		= cedrus_buf_prepare,
+	.buf_cleanup		= cedrus_buf_cleanup,
 	.buf_queue		= cedrus_buf_queue,
 	.buf_out_validate	= cedrus_buf_out_validate,
 	.buf_request_complete	= cedrus_buf_request_complete,