diff mbox series

[4/5] media: meson: vdec: add VP9 input support

Message ID 20191205092639.26330-1-narmstrong@baylibre.com (mailing list archive)
State New, archived
Headers show
Series media: meson: vdec: Add VP9 decoding support | expand

Commit Message

Neil Armstrong Dec. 5, 2019, 9:26 a.m. UTC
From: Maxime Jourdan <mjourdan@baylibre.com>

Amlogic VP9 decoder requires an additional 16-byte payload before every
frame header.

Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
---
 drivers/staging/media/meson/vdec/esparser.c | 142 +++++++++++++++++++-
 1 file changed, 138 insertions(+), 4 deletions(-)

Comments

Nicolas Dufresne Dec. 5, 2019, 6:40 p.m. UTC | #1
Le jeudi 05 décembre 2019 à 10:26 +0100, Neil Armstrong a écrit :
> From: Maxime Jourdan <mjourdan@baylibre.com>
> 
> Amlogic VP9 decoder requires an additional 16-byte payload before every
> frame header.

When I first saw this patch, I assumed data_offset was to be used (like
for venus), but I think what I'm reading is that the bitstream is
bounce into another buffer (ring buffer ?) and for this reason such an
offset is not needed. Maybe worth referring to how the header is being
added (e.g. while copying the data) ? 

> 
> Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com>
> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
> ---
>  drivers/staging/media/meson/vdec/esparser.c | 142 +++++++++++++++++++-
>  1 file changed, 138 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/staging/media/meson/vdec/esparser.c b/drivers/staging/media/meson/vdec/esparser.c
> index adc5c1e81a4c..aeb68f6c732a 100644
> --- a/drivers/staging/media/meson/vdec/esparser.c
> +++ b/drivers/staging/media/meson/vdec/esparser.c
> @@ -52,6 +52,7 @@
>  #define PARSER_VIDEO_HOLE	0x90
>  
>  #define SEARCH_PATTERN_LEN	512
> +#define VP9_HEADER_SIZE		16
>  
>  static DECLARE_WAIT_QUEUE_HEAD(wq);
>  static int search_done;
> @@ -74,14 +75,121 @@ static irqreturn_t esparser_isr(int irq, void *dev)
>  	return IRQ_HANDLED;
>  }
>  
> +/**
> + * VP9 frame headers need to be appended by a 16-byte long

nit: Maybe the use of "appending" is not appropriate as the header is
documented in the commit as being "before every frame header" ?

> + * Amlogic custom header
> + */
> +static int vp9_update_header(struct amvdec_core *core, struct vb2_buffer *buf)
> +{
> +	u8 *dp;
> +	u8 marker;
> +	int dsize;
> +	int num_frames, cur_frame;
> +	int cur_mag, mag, mag_ptr;
> +	int frame_size[8], tot_frame_size[8];
> +	int total_datasize = 0;
> +	int new_frame_size;
> +	unsigned char *old_header = NULL;
> +
> +	dp = (uint8_t *)vb2_plane_vaddr(buf, 0);
> +	dsize = vb2_get_plane_payload(buf, 0);
> +
> +	if (dsize == vb2_plane_size(buf, 0)) {
> +		dev_warn(core->dev, "%s: unable to update header\n", __func__);
> +		return 0;
> +	}
> +
> +	marker = dp[dsize - 1];
> +	if ((marker & 0xe0) == 0xc0) {
> +		num_frames = (marker & 0x7) + 1;
> +		mag = ((marker >> 3) & 0x3) + 1;
> +		mag_ptr = dsize - mag * num_frames - 2;
> +		if (dp[mag_ptr] != marker)
> +			return 0;
> +
> +		mag_ptr++;
> +		for (cur_frame = 0; cur_frame < num_frames; cur_frame++) {
> +			frame_size[cur_frame] = 0;
> +			for (cur_mag = 0; cur_mag < mag; cur_mag++) {
> +				frame_size[cur_frame] |=
> +					(dp[mag_ptr] << (cur_mag * 8));
> +				mag_ptr++;
> +			}
> +			if (cur_frame == 0)
> +				tot_frame_size[cur_frame] =
> +					frame_size[cur_frame];
> +			else
> +				tot_frame_size[cur_frame] =
> +					tot_frame_size[cur_frame - 1] +
> +					frame_size[cur_frame];
> +			total_datasize += frame_size[cur_frame];
> +		}
> +	} else {
> +		num_frames = 1;
> +		frame_size[0] = dsize;
> +		tot_frame_size[0] = dsize;
> +		total_datasize = dsize;
> +	}
> +
> +	new_frame_size = total_datasize + num_frames * VP9_HEADER_SIZE;
> +
> +	if (new_frame_size >= vb2_plane_size(buf, 0)) {
> +		dev_warn(core->dev, "%s: unable to update header\n", __func__);
> +		return 0;
> +	}
> +
> +	for (cur_frame = num_frames - 1; cur_frame >= 0; cur_frame--) {
> +		int framesize = frame_size[cur_frame];
> +		int framesize_header = framesize + 4;
> +		int oldframeoff = tot_frame_size[cur_frame] - framesize;
> +		int outheaderoff =  oldframeoff + cur_frame * VP9_HEADER_SIZE;
> +		u8 *fdata = dp + outheaderoff;
> +		u8 *old_framedata = dp + oldframeoff;
> +
> +		memmove(fdata + VP9_HEADER_SIZE, old_framedata, framesize);
> +
> +		fdata[0] = (framesize_header >> 24) & 0xff;
> +		fdata[1] = (framesize_header >> 16) & 0xff;
> +		fdata[2] = (framesize_header >> 8) & 0xff;
> +		fdata[3] = (framesize_header >> 0) & 0xff;
> +		fdata[4] = ((framesize_header >> 24) & 0xff) ^ 0xff;
> +		fdata[5] = ((framesize_header >> 16) & 0xff) ^ 0xff;
> +		fdata[6] = ((framesize_header >> 8) & 0xff) ^ 0xff;
> +		fdata[7] = ((framesize_header >> 0) & 0xff) ^ 0xff;
> +		fdata[8] = 0;
> +		fdata[9] = 0;
> +		fdata[10] = 0;
> +		fdata[11] = 1;
> +		fdata[12] = 'A';
> +		fdata[13] = 'M';
> +		fdata[14] = 'L';
> +		fdata[15] = 'V';
> +
> +		if (!old_header) {
> +			/* nothing */
> +		} else if (old_header > fdata + 16 + framesize) {
> +			dev_dbg(core->dev, "%s: data has gaps, setting to 0\n",
> +				__func__);
> +			memset(fdata + 16 + framesize, 0,
> +			       (old_header - fdata + 16 + framesize));
> +		} else if (old_header < fdata + 16 + framesize) {
> +			dev_err(core->dev, "%s: data overwritten\n", __func__);
> +		}
> +		old_header = fdata;
> +	}
> +
> +	return new_frame_size;
> +}
> +
>  /* Pad the packet to at least 4KiB bytes otherwise the VDEC unit won't trigger
>   * ISRs.
>   * Also append a start code 000001ff at the end to trigger
>   * the ESPARSER interrupt.
>   */
> -static u32 esparser_pad_start_code(struct amvdec_core *core, struct vb2_buffer *vb)
> +static u32 esparser_pad_start_code(struct amvdec_core *core,
> +				   struct vb2_buffer *vb,
> +				   u32 payload_size)
>  {
> -	u32 payload_size = vb2_get_plane_payload(vb, 0);
>  	u32 pad_size = 0;
>  	u8 *vaddr = vb2_plane_vaddr(vb, 0);
>  
> @@ -186,13 +294,27 @@ esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf)
>  	int ret;
>  	struct vb2_buffer *vb = &vbuf->vb2_buf;
>  	struct amvdec_core *core = sess->core;
> +	struct amvdec_codec_ops *codec_ops = sess->fmt_out->codec_ops;
>  	u32 payload_size = vb2_get_plane_payload(vb, 0);
>  	dma_addr_t phy = vb2_dma_contig_plane_dma_addr(vb, 0);
> +	u32 num_dst_bufs = 0;
>  	u32 offset;
>  	u32 pad_size;
>  
> -	if (esparser_vififo_get_free_space(sess) < payload_size)
> +	if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9) {
> +		if (codec_ops->num_pending_bufs)
> +			num_dst_bufs = codec_ops->num_pending_bufs(sess);
> +
> +		num_dst_bufs += v4l2_m2m_num_dst_bufs_ready(sess->m2m_ctx);
> +		if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9)
> +			num_dst_bufs -= 2;
> +
> +		if (esparser_vififo_get_free_space(sess) < payload_size ||
> +		    atomic_read(&sess->esparser_queued_bufs) >= num_dst_bufs)
> +			return -EAGAIN;
> +	} else if (esparser_vififo_get_free_space(sess) < payload_size) {
>  		return -EAGAIN;
> +	}
>  
>  	v4l2_m2m_src_buf_remove_by_buf(sess->m2m_ctx, vbuf);
>  
> @@ -206,7 +328,19 @@ esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf)
>  	vbuf->field = V4L2_FIELD_NONE;
>  	vbuf->sequence = sess->sequence_out++;
>  
> -	pad_size = esparser_pad_start_code(core, vb);
> +	if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9) {
> +		payload_size = vp9_update_header(core, vb);
> +
> +		/* If unable to alter buffer to add headers */
> +		if (payload_size == 0) {
> +			amvdec_remove_ts(sess, vb->timestamp);
> +			v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR);
> +
> +			return 0;
> +		}
> +	}
> +
> +	pad_size = esparser_pad_start_code(core, vb, payload_size);
>  	ret = esparser_write_data(core, phy, payload_size + pad_size);
>  
>  	if (ret <= 0) {
Neil Armstrong Dec. 13, 2019, 9:06 a.m. UTC | #2
On 05/12/2019 19:40, Nicolas Dufresne wrote:
> Le jeudi 05 décembre 2019 à 10:26 +0100, Neil Armstrong a écrit :
>> From: Maxime Jourdan <mjourdan@baylibre.com>
>>
>> Amlogic VP9 decoder requires an additional 16-byte payload before every
>> frame header.
> 
> When I first saw this patch, I assumed data_offset was to be used (like
> for venus), but I think what I'm reading is that the bitstream is
> bounce into another buffer (ring buffer ?) and for this reason such an
> offset is not needed. Maybe worth referring to how the header is being
> added (e.g. while copying the data) ? 

The source buffer is updates in-situ, then given to the Parser, copied into
the parser ring buffer, then parses and copies the slice into the decoder "workspace".

I'll specify it the commit log.

Neil

> 
>>
>> Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com>
>> Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
>> ---
>>  drivers/staging/media/meson/vdec/esparser.c | 142 +++++++++++++++++++-
>>  1 file changed, 138 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/staging/media/meson/vdec/esparser.c b/drivers/staging/media/meson/vdec/esparser.c
>> index adc5c1e81a4c..aeb68f6c732a 100644
>> --- a/drivers/staging/media/meson/vdec/esparser.c
>> +++ b/drivers/staging/media/meson/vdec/esparser.c
>> @@ -52,6 +52,7 @@
>>  #define PARSER_VIDEO_HOLE	0x90
>>  
>>  #define SEARCH_PATTERN_LEN	512
>> +#define VP9_HEADER_SIZE		16
>>  
>>  static DECLARE_WAIT_QUEUE_HEAD(wq);
>>  static int search_done;
>> @@ -74,14 +75,121 @@ static irqreturn_t esparser_isr(int irq, void *dev)
>>  	return IRQ_HANDLED;
>>  }
>>  
>> +/**
>> + * VP9 frame headers need to be appended by a 16-byte long
> 
> nit: Maybe the use of "appending" is not appropriate as the header is
> documented in the commit as being "before every frame header" ?
> 
>> + * Amlogic custom header
>> + */
>> +static int vp9_update_header(struct amvdec_core *core, struct vb2_buffer *buf)
>> +{
>> +	u8 *dp;
>> +	u8 marker;
>> +	int dsize;
>> +	int num_frames, cur_frame;
>> +	int cur_mag, mag, mag_ptr;
>> +	int frame_size[8], tot_frame_size[8];
>> +	int total_datasize = 0;
>> +	int new_frame_size;
>> +	unsigned char *old_header = NULL;
>> +
>> +	dp = (uint8_t *)vb2_plane_vaddr(buf, 0);
>> +	dsize = vb2_get_plane_payload(buf, 0);
>> +
>> +	if (dsize == vb2_plane_size(buf, 0)) {
>> +		dev_warn(core->dev, "%s: unable to update header\n", __func__);
>> +		return 0;
>> +	}
>> +
>> +	marker = dp[dsize - 1];
>> +	if ((marker & 0xe0) == 0xc0) {
>> +		num_frames = (marker & 0x7) + 1;
>> +		mag = ((marker >> 3) & 0x3) + 1;
>> +		mag_ptr = dsize - mag * num_frames - 2;
>> +		if (dp[mag_ptr] != marker)
>> +			return 0;
>> +
>> +		mag_ptr++;
>> +		for (cur_frame = 0; cur_frame < num_frames; cur_frame++) {
>> +			frame_size[cur_frame] = 0;
>> +			for (cur_mag = 0; cur_mag < mag; cur_mag++) {
>> +				frame_size[cur_frame] |=
>> +					(dp[mag_ptr] << (cur_mag * 8));
>> +				mag_ptr++;
>> +			}
>> +			if (cur_frame == 0)
>> +				tot_frame_size[cur_frame] =
>> +					frame_size[cur_frame];
>> +			else
>> +				tot_frame_size[cur_frame] =
>> +					tot_frame_size[cur_frame - 1] +
>> +					frame_size[cur_frame];
>> +			total_datasize += frame_size[cur_frame];
>> +		}
>> +	} else {
>> +		num_frames = 1;
>> +		frame_size[0] = dsize;
>> +		tot_frame_size[0] = dsize;
>> +		total_datasize = dsize;
>> +	}
>> +
>> +	new_frame_size = total_datasize + num_frames * VP9_HEADER_SIZE;
>> +
>> +	if (new_frame_size >= vb2_plane_size(buf, 0)) {
>> +		dev_warn(core->dev, "%s: unable to update header\n", __func__);
>> +		return 0;
>> +	}
>> +
>> +	for (cur_frame = num_frames - 1; cur_frame >= 0; cur_frame--) {
>> +		int framesize = frame_size[cur_frame];
>> +		int framesize_header = framesize + 4;
>> +		int oldframeoff = tot_frame_size[cur_frame] - framesize;
>> +		int outheaderoff =  oldframeoff + cur_frame * VP9_HEADER_SIZE;
>> +		u8 *fdata = dp + outheaderoff;
>> +		u8 *old_framedata = dp + oldframeoff;
>> +
>> +		memmove(fdata + VP9_HEADER_SIZE, old_framedata, framesize);
>> +
>> +		fdata[0] = (framesize_header >> 24) & 0xff;
>> +		fdata[1] = (framesize_header >> 16) & 0xff;
>> +		fdata[2] = (framesize_header >> 8) & 0xff;
>> +		fdata[3] = (framesize_header >> 0) & 0xff;
>> +		fdata[4] = ((framesize_header >> 24) & 0xff) ^ 0xff;
>> +		fdata[5] = ((framesize_header >> 16) & 0xff) ^ 0xff;
>> +		fdata[6] = ((framesize_header >> 8) & 0xff) ^ 0xff;
>> +		fdata[7] = ((framesize_header >> 0) & 0xff) ^ 0xff;
>> +		fdata[8] = 0;
>> +		fdata[9] = 0;
>> +		fdata[10] = 0;
>> +		fdata[11] = 1;
>> +		fdata[12] = 'A';
>> +		fdata[13] = 'M';
>> +		fdata[14] = 'L';
>> +		fdata[15] = 'V';
>> +
>> +		if (!old_header) {
>> +			/* nothing */
>> +		} else if (old_header > fdata + 16 + framesize) {
>> +			dev_dbg(core->dev, "%s: data has gaps, setting to 0\n",
>> +				__func__);
>> +			memset(fdata + 16 + framesize, 0,
>> +			       (old_header - fdata + 16 + framesize));
>> +		} else if (old_header < fdata + 16 + framesize) {
>> +			dev_err(core->dev, "%s: data overwritten\n", __func__);
>> +		}
>> +		old_header = fdata;
>> +	}
>> +
>> +	return new_frame_size;
>> +}
>> +
>>  /* Pad the packet to at least 4KiB bytes otherwise the VDEC unit won't trigger
>>   * ISRs.
>>   * Also append a start code 000001ff at the end to trigger
>>   * the ESPARSER interrupt.
>>   */
>> -static u32 esparser_pad_start_code(struct amvdec_core *core, struct vb2_buffer *vb)
>> +static u32 esparser_pad_start_code(struct amvdec_core *core,
>> +				   struct vb2_buffer *vb,
>> +				   u32 payload_size)
>>  {
>> -	u32 payload_size = vb2_get_plane_payload(vb, 0);
>>  	u32 pad_size = 0;
>>  	u8 *vaddr = vb2_plane_vaddr(vb, 0);
>>  
>> @@ -186,13 +294,27 @@ esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf)
>>  	int ret;
>>  	struct vb2_buffer *vb = &vbuf->vb2_buf;
>>  	struct amvdec_core *core = sess->core;
>> +	struct amvdec_codec_ops *codec_ops = sess->fmt_out->codec_ops;
>>  	u32 payload_size = vb2_get_plane_payload(vb, 0);
>>  	dma_addr_t phy = vb2_dma_contig_plane_dma_addr(vb, 0);
>> +	u32 num_dst_bufs = 0;
>>  	u32 offset;
>>  	u32 pad_size;
>>  
>> -	if (esparser_vififo_get_free_space(sess) < payload_size)
>> +	if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9) {
>> +		if (codec_ops->num_pending_bufs)
>> +			num_dst_bufs = codec_ops->num_pending_bufs(sess);
>> +
>> +		num_dst_bufs += v4l2_m2m_num_dst_bufs_ready(sess->m2m_ctx);
>> +		if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9)
>> +			num_dst_bufs -= 2;
>> +
>> +		if (esparser_vififo_get_free_space(sess) < payload_size ||
>> +		    atomic_read(&sess->esparser_queued_bufs) >= num_dst_bufs)
>> +			return -EAGAIN;
>> +	} else if (esparser_vififo_get_free_space(sess) < payload_size) {
>>  		return -EAGAIN;
>> +	}
>>  
>>  	v4l2_m2m_src_buf_remove_by_buf(sess->m2m_ctx, vbuf);
>>  
>> @@ -206,7 +328,19 @@ esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf)
>>  	vbuf->field = V4L2_FIELD_NONE;
>>  	vbuf->sequence = sess->sequence_out++;
>>  
>> -	pad_size = esparser_pad_start_code(core, vb);
>> +	if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9) {
>> +		payload_size = vp9_update_header(core, vb);
>> +
>> +		/* If unable to alter buffer to add headers */
>> +		if (payload_size == 0) {
>> +			amvdec_remove_ts(sess, vb->timestamp);
>> +			v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR);
>> +
>> +			return 0;
>> +		}
>> +	}
>> +
>> +	pad_size = esparser_pad_start_code(core, vb, payload_size);
>>  	ret = esparser_write_data(core, phy, payload_size + pad_size);
>>  
>>  	if (ret <= 0) {
diff mbox series

Patch

diff --git a/drivers/staging/media/meson/vdec/esparser.c b/drivers/staging/media/meson/vdec/esparser.c
index adc5c1e81a4c..aeb68f6c732a 100644
--- a/drivers/staging/media/meson/vdec/esparser.c
+++ b/drivers/staging/media/meson/vdec/esparser.c
@@ -52,6 +52,7 @@ 
 #define PARSER_VIDEO_HOLE	0x90
 
 #define SEARCH_PATTERN_LEN	512
+#define VP9_HEADER_SIZE		16
 
 static DECLARE_WAIT_QUEUE_HEAD(wq);
 static int search_done;
@@ -74,14 +75,121 @@  static irqreturn_t esparser_isr(int irq, void *dev)
 	return IRQ_HANDLED;
 }
 
+/**
+ * VP9 frame headers need to be appended by a 16-byte long
+ * Amlogic custom header
+ */
+static int vp9_update_header(struct amvdec_core *core, struct vb2_buffer *buf)
+{
+	u8 *dp;
+	u8 marker;
+	int dsize;
+	int num_frames, cur_frame;
+	int cur_mag, mag, mag_ptr;
+	int frame_size[8], tot_frame_size[8];
+	int total_datasize = 0;
+	int new_frame_size;
+	unsigned char *old_header = NULL;
+
+	dp = (uint8_t *)vb2_plane_vaddr(buf, 0);
+	dsize = vb2_get_plane_payload(buf, 0);
+
+	if (dsize == vb2_plane_size(buf, 0)) {
+		dev_warn(core->dev, "%s: unable to update header\n", __func__);
+		return 0;
+	}
+
+	marker = dp[dsize - 1];
+	if ((marker & 0xe0) == 0xc0) {
+		num_frames = (marker & 0x7) + 1;
+		mag = ((marker >> 3) & 0x3) + 1;
+		mag_ptr = dsize - mag * num_frames - 2;
+		if (dp[mag_ptr] != marker)
+			return 0;
+
+		mag_ptr++;
+		for (cur_frame = 0; cur_frame < num_frames; cur_frame++) {
+			frame_size[cur_frame] = 0;
+			for (cur_mag = 0; cur_mag < mag; cur_mag++) {
+				frame_size[cur_frame] |=
+					(dp[mag_ptr] << (cur_mag * 8));
+				mag_ptr++;
+			}
+			if (cur_frame == 0)
+				tot_frame_size[cur_frame] =
+					frame_size[cur_frame];
+			else
+				tot_frame_size[cur_frame] =
+					tot_frame_size[cur_frame - 1] +
+					frame_size[cur_frame];
+			total_datasize += frame_size[cur_frame];
+		}
+	} else {
+		num_frames = 1;
+		frame_size[0] = dsize;
+		tot_frame_size[0] = dsize;
+		total_datasize = dsize;
+	}
+
+	new_frame_size = total_datasize + num_frames * VP9_HEADER_SIZE;
+
+	if (new_frame_size >= vb2_plane_size(buf, 0)) {
+		dev_warn(core->dev, "%s: unable to update header\n", __func__);
+		return 0;
+	}
+
+	for (cur_frame = num_frames - 1; cur_frame >= 0; cur_frame--) {
+		int framesize = frame_size[cur_frame];
+		int framesize_header = framesize + 4;
+		int oldframeoff = tot_frame_size[cur_frame] - framesize;
+		int outheaderoff =  oldframeoff + cur_frame * VP9_HEADER_SIZE;
+		u8 *fdata = dp + outheaderoff;
+		u8 *old_framedata = dp + oldframeoff;
+
+		memmove(fdata + VP9_HEADER_SIZE, old_framedata, framesize);
+
+		fdata[0] = (framesize_header >> 24) & 0xff;
+		fdata[1] = (framesize_header >> 16) & 0xff;
+		fdata[2] = (framesize_header >> 8) & 0xff;
+		fdata[3] = (framesize_header >> 0) & 0xff;
+		fdata[4] = ((framesize_header >> 24) & 0xff) ^ 0xff;
+		fdata[5] = ((framesize_header >> 16) & 0xff) ^ 0xff;
+		fdata[6] = ((framesize_header >> 8) & 0xff) ^ 0xff;
+		fdata[7] = ((framesize_header >> 0) & 0xff) ^ 0xff;
+		fdata[8] = 0;
+		fdata[9] = 0;
+		fdata[10] = 0;
+		fdata[11] = 1;
+		fdata[12] = 'A';
+		fdata[13] = 'M';
+		fdata[14] = 'L';
+		fdata[15] = 'V';
+
+		if (!old_header) {
+			/* nothing */
+		} else if (old_header > fdata + 16 + framesize) {
+			dev_dbg(core->dev, "%s: data has gaps, setting to 0\n",
+				__func__);
+			memset(fdata + 16 + framesize, 0,
+			       (old_header - fdata + 16 + framesize));
+		} else if (old_header < fdata + 16 + framesize) {
+			dev_err(core->dev, "%s: data overwritten\n", __func__);
+		}
+		old_header = fdata;
+	}
+
+	return new_frame_size;
+}
+
 /* Pad the packet to at least 4KiB bytes otherwise the VDEC unit won't trigger
  * ISRs.
  * Also append a start code 000001ff at the end to trigger
  * the ESPARSER interrupt.
  */
-static u32 esparser_pad_start_code(struct amvdec_core *core, struct vb2_buffer *vb)
+static u32 esparser_pad_start_code(struct amvdec_core *core,
+				   struct vb2_buffer *vb,
+				   u32 payload_size)
 {
-	u32 payload_size = vb2_get_plane_payload(vb, 0);
 	u32 pad_size = 0;
 	u8 *vaddr = vb2_plane_vaddr(vb, 0);
 
@@ -186,13 +294,27 @@  esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf)
 	int ret;
 	struct vb2_buffer *vb = &vbuf->vb2_buf;
 	struct amvdec_core *core = sess->core;
+	struct amvdec_codec_ops *codec_ops = sess->fmt_out->codec_ops;
 	u32 payload_size = vb2_get_plane_payload(vb, 0);
 	dma_addr_t phy = vb2_dma_contig_plane_dma_addr(vb, 0);
+	u32 num_dst_bufs = 0;
 	u32 offset;
 	u32 pad_size;
 
-	if (esparser_vififo_get_free_space(sess) < payload_size)
+	if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9) {
+		if (codec_ops->num_pending_bufs)
+			num_dst_bufs = codec_ops->num_pending_bufs(sess);
+
+		num_dst_bufs += v4l2_m2m_num_dst_bufs_ready(sess->m2m_ctx);
+		if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9)
+			num_dst_bufs -= 2;
+
+		if (esparser_vififo_get_free_space(sess) < payload_size ||
+		    atomic_read(&sess->esparser_queued_bufs) >= num_dst_bufs)
+			return -EAGAIN;
+	} else if (esparser_vififo_get_free_space(sess) < payload_size) {
 		return -EAGAIN;
+	}
 
 	v4l2_m2m_src_buf_remove_by_buf(sess->m2m_ctx, vbuf);
 
@@ -206,7 +328,19 @@  esparser_queue(struct amvdec_session *sess, struct vb2_v4l2_buffer *vbuf)
 	vbuf->field = V4L2_FIELD_NONE;
 	vbuf->sequence = sess->sequence_out++;
 
-	pad_size = esparser_pad_start_code(core, vb);
+	if (sess->fmt_out->pixfmt == V4L2_PIX_FMT_VP9) {
+		payload_size = vp9_update_header(core, vb);
+
+		/* If unable to alter buffer to add headers */
+		if (payload_size == 0) {
+			amvdec_remove_ts(sess, vb->timestamp);
+			v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR);
+
+			return 0;
+		}
+	}
+
+	pad_size = esparser_pad_start_code(core, vb, payload_size);
 	ret = esparser_write_data(core, phy, payload_size + pad_size);
 
 	if (ret <= 0) {