diff mbox series

[v2,04/10] media: vicodec: keep the ref frame according to the format in decoder

Message ID 20190215130509.86290-5-dafna3@gmail.com (mailing list archive)
State New, archived
Headers show
Series add support to stateless decoder | expand

Commit Message

Dafna Hirschfeld Feb. 15, 2019, 1:05 p.m. UTC
In the decoder, save the inner reference frame in the same
format as the capture buffer.
The decoder writes directly to the capture buffer and then
the capture buffer is copied to the reference buffer.
This will simplify the stateless decoder.

Signed-off-by: Dafna Hirschfeld <dafna3@gmail.com>
---
 drivers/media/platform/vicodec/codec-fwht.c   |  68 +++--
 drivers/media/platform/vicodec/codec-fwht.h   |  10 +-
 .../media/platform/vicodec/codec-v4l2-fwht.c  | 280 +++---------------
 .../media/platform/vicodec/codec-v4l2-fwht.h  |   3 +
 drivers/media/platform/vicodec/vicodec-core.c |   2 +
 5 files changed, 103 insertions(+), 260 deletions(-)

Comments

Hans Verkuil Feb. 19, 2019, 10:30 a.m. UTC | #1
On 2/15/19 2:05 PM, Dafna Hirschfeld wrote:
> In the decoder, save the inner reference frame in the same
> format as the capture buffer.
> The decoder writes directly to the capture buffer and then
> the capture buffer is copied to the reference buffer.
> This will simplify the stateless decoder.
> 
> Signed-off-by: Dafna Hirschfeld <dafna3@gmail.com>
> ---
>  drivers/media/platform/vicodec/codec-fwht.c   |  68 +++--
>  drivers/media/platform/vicodec/codec-fwht.h   |  10 +-
>  .../media/platform/vicodec/codec-v4l2-fwht.c  | 280 +++---------------
>  .../media/platform/vicodec/codec-v4l2-fwht.h  |   3 +
>  drivers/media/platform/vicodec/vicodec-core.c |   2 +
>  5 files changed, 103 insertions(+), 260 deletions(-)
> 
> diff --git a/drivers/media/platform/vicodec/codec-fwht.c b/drivers/media/platform/vicodec/codec-fwht.c
> index d1d6085da9f1..42849476069b 100644
> --- a/drivers/media/platform/vicodec/codec-fwht.c
> +++ b/drivers/media/platform/vicodec/codec-fwht.c
> @@ -632,12 +632,13 @@ static int decide_blocktype(const u8 *cur, const u8 *reference,
>  	return vari <= vard ? IBLOCK : PBLOCK;
>  }
>  
> -static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
> +static void fill_decoder_block(u8 *dst, const s16 *input, int stride,
> +			       unsigned int dst_step)
>  {
>  	int i, j;
>  
>  	for (i = 0; i < 8; i++) {
> -		for (j = 0; j < 8; j++, input++, dst++) {
> +		for (j = 0; j < 8; j++, input++, dst += dst_step) {
>  			if (*input < 0)
>  				*dst = 0;
>  			else if (*input > 255)
> @@ -645,17 +646,19 @@ static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
>  			else
>  				*dst = *input;
>  		}
> -		dst += stride - 8;
> +		dst += stride - (8 * dst_step);
>  	}
>  }
>  
> -static void add_deltas(s16 *deltas, const u8 *ref, int stride)
> +static void add_deltas(s16 *deltas, const u8 *ref, int stride,
> +		       unsigned int ref_step)
>  {
>  	int k, l;
>  
>  	for (k = 0; k < 8; k++) {
>  		for (l = 0; l < 8; l++) {
> -			*deltas += *ref++;
> +			*deltas += *ref;
> +			ref += ref_step;
>  			/*
>  			 * Due to quantizing, it might possible that the
>  			 * decoded coefficients are slightly out of range
> @@ -666,7 +669,7 @@ static void add_deltas(s16 *deltas, const u8 *ref, int stride)
>  				*deltas = 255;
>  			deltas++;
>  		}
> -		ref += stride - 8;
> +		ref += stride - (8 * ref_step);
>  	}
>  }
>  
> @@ -711,8 +714,8 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
>  				ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
>  
>  				if (blocktype == PBLOCK)
> -					add_deltas(cf->de_fwht, refp, 8);
> -				fill_decoder_block(refp, cf->de_fwht, 8);
> +					add_deltas(cf->de_fwht, refp, 8, 1);
> +				fill_decoder_block(refp, cf->de_fwht, 8, 1);
>  			}
>  
>  			input += 8 * input_step;
> @@ -821,8 +824,10 @@ u32 fwht_encode_frame(struct fwht_raw_frame *frm,
>  	return encoding;
>  }
>  
> -static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
> -			 u32 height, u32 width, u32 coded_width,
> +static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco,
> +			 const u8 *ref, u32 height, u32 width, u32 coded_width,
> +			 u8 *dst, unsigned int dst_stride,
> +			 unsigned int dst_step, unsigned int ref_step,
>  			 bool uncompressed, const __be16 *end_of_rlco_buf)
>  {
>  	unsigned int copies = 0;
> @@ -834,10 +839,15 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
>  	height = round_up(height, 8);
>  
>  	if (uncompressed) {
> +		int i;
> +
>  		if (end_of_rlco_buf + 1 < *rlco + width * height / 2)
>  			return false;
> -		memcpy(ref, *rlco, width * height);
> -		*rlco += width * height / 2;
> +		for (i = 0; i < height; i++) {
> +			memcpy(dst, *rlco, width);
> +			dst += dst_stride;
> +			*rlco += width / 2;
> +		}
>  		return true;
>  	}
>  
> @@ -849,15 +859,18 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
>  	 */
>  	for (j = 0; j < height / 8; j++) {
>  		for (i = 0; i < width / 8; i++) {
> -			u8 *refp = ref + j * 8 * coded_width + i * 8;
> +			const u8 *refp = ref + j * 8 * ref_step * coded_width +
> +				i * 8 * ref_step;
> +			u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step;
>  
>  			if (copies) {
>  				memcpy(cf->de_fwht, copy, sizeof(copy));
>  				if (stat & PFRAME_BIT)
>  					add_deltas(cf->de_fwht, refp,
> -						   coded_width);
> -				fill_decoder_block(refp, cf->de_fwht,
> -						   coded_width);
> +						   coded_width * ref_step,
> +						   ref_step);
> +				fill_decoder_block(dstp, cf->de_fwht,
> +						   dst_stride, dst_step);
>  				copies--;
>  				continue;
>  			}
> @@ -877,23 +890,28 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
>  			if (copies)
>  				memcpy(copy, cf->de_fwht, sizeof(copy));
>  			if (stat & PFRAME_BIT)
> -				add_deltas(cf->de_fwht, refp, coded_width);
> -			fill_decoder_block(refp, cf->de_fwht, coded_width);
> +				add_deltas(cf->de_fwht, refp,
> +					   coded_width * ref_step, ref_step);
> +			fill_decoder_block(dstp, cf->de_fwht, dst_stride,
> +					   dst_step);
>  		}
>  	}
>  	return true;
>  }
>  
> -bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
> +bool fwht_decode_frame(struct fwht_cframe *cf, const struct fwht_raw_frame *ref,
>  		       u32 hdr_flags, unsigned int components_num,
>  		       unsigned int width, unsigned int height,
> -		       unsigned int coded_width)
> +		       unsigned int coded_width, struct fwht_raw_frame *dst,
> +		       unsigned int dst_stride, unsigned int dst_chroma_stride)
>  {
>  	const __be16 *rlco = cf->rlc_data;
>  	const __be16 *end_of_rlco_buf = cf->rlc_data +
>  			(cf->size / sizeof(*rlco)) - 1;
>  
>  	if (!decode_plane(cf, &rlco, ref->luma, height, width, coded_width,
> +			  dst->luma, dst_stride, dst->luma_alpha_step,
> +			  ref->luma_alpha_step,
>  			  hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED,
>  			  end_of_rlco_buf))
>  		return false;
> @@ -909,11 +927,15 @@ bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
>  			w /= 2;
>  			c /= 2;
>  		}
> -		if (!decode_plane(cf, &rlco, ref->cb, h, w, c,
> +		if (!decode_plane(cf, &rlco, ref->cb, h, w, c, dst->cb,
> +				  dst_chroma_stride, dst->chroma_step,
> +				  ref->chroma_step,
>  				  hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED,
>  				  end_of_rlco_buf))
>  			return false;
> -		if (!decode_plane(cf, &rlco, ref->cr, h, w, c,
> +		if (!decode_plane(cf, &rlco, ref->cr, h, w, c, dst->cr,
> +				  dst_chroma_stride, dst->chroma_step,
> +				  ref->chroma_step,
>  				  hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED,
>  				  end_of_rlco_buf))
>  			return false;
> @@ -922,6 +944,8 @@ bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
>  	if (components_num == 4)
>  		if (!decode_plane(cf, &rlco, ref->alpha, height, width,
>  				  coded_width,
> +				  dst->alpha, dst_stride, dst->luma_alpha_step,
> +				  ref->luma_alpha_step,
>  				  hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED,
>  				  end_of_rlco_buf))
>  			return false;
> diff --git a/drivers/media/platform/vicodec/codec-fwht.h b/drivers/media/platform/vicodec/codec-fwht.h
> index 8a4f07d466cb..eab4a97aa132 100644
> --- a/drivers/media/platform/vicodec/codec-fwht.h
> +++ b/drivers/media/platform/vicodec/codec-fwht.h
> @@ -140,9 +140,9 @@ u32 fwht_encode_frame(struct fwht_raw_frame *frm,
>  		      bool is_intra, bool next_is_intra,
>  		      unsigned int width, unsigned int height,
>  		      unsigned int stride, unsigned int chroma_stride);
> -bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
> -		       u32 hdr_flags, unsigned int components_num,
> -		       unsigned int width, unsigned int height,
> -		       unsigned int coded_width);
> -
> +bool fwht_decode_frame(struct fwht_cframe *cf, const struct fwht_raw_frame *ref,
> +		u32 hdr_flags, unsigned int components_num,
> +		unsigned int width, unsigned int height,
> +		unsigned int coded_width, struct fwht_raw_frame *dst,
> +		unsigned int dst_stride, unsigned int dst_chroma_stride);
>  #endif
> diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.c b/drivers/media/platform/vicodec/codec-v4l2-fwht.c
> index 728ed5012aed..40b1f4901fd3 100644
> --- a/drivers/media/platform/vicodec/codec-v4l2-fwht.c
> +++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.c
> @@ -75,6 +75,35 @@ const struct v4l2_fwht_pixfmt_info *v4l2_fwht_get_pixfmt(u32 idx)
>  	return v4l2_fwht_pixfmts + idx;
>  }
>  
> +void copy_cap_to_ref(u8 *cap, const struct v4l2_fwht_pixfmt_info *info,
> +		     struct v4l2_fwht_state *state)

Should be a static function and cap should be const u8 *.

> +{
> +	int plane_idx;
> +	u8 *p_ref = state->ref_frame.buf;
> +
> +	for (plane_idx = 0; plane_idx < info->planes_num; plane_idx++) {
> +		int i;
> +		bool is_chroma_plane = plane_idx == 1 || plane_idx == 2;
> +		unsigned int h_div = is_chroma_plane ? info->height_div : 1;
> +		unsigned int w_div = is_chroma_plane ? info->width_div : 1;
> +		unsigned int step = is_chroma_plane ? info->chroma_step :
> +			info->luma_alpha_step;
> +		unsigned int stride_div =
> +			(info->planes_num == 3 && plane_idx > 0) ? 2 : 1;
> +
> +		u8 *row_dst = cap;

I'd call this row_cap. 'dst' is confusing since you expect a memcpy to write
to the destination address, but this isn't the 'destination', it is just a
row in the capture buffer.

It really confused me for a bit :-)

Regards,

	Hans

> +		u8 *row_ref = p_ref;
> +
> +		for (i = 0; i < state->visible_height / h_div; i++) {
> +			memcpy(row_ref, row_dst, step * state->visible_width / w_div);
> +			row_ref += step * state->coded_width / w_div;
> +			row_dst += state->stride / stride_div;
> +		}
> +		cap += (state->stride / stride_div) * (state->coded_height / h_div);
> +		p_ref += (step * state->coded_width / w_div) * (state->coded_height / h_div);
> +	}
> +}
> +
>  static int prepare_raw_frame(struct fwht_raw_frame *rf,
>  			 const struct v4l2_fwht_pixfmt_info *info, u8 *buf,
>  			 unsigned int size)
> @@ -243,14 +272,16 @@ int v4l2_fwht_encode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out)
>  
>  int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out)
>  {
> -	unsigned int i, j, k;
>  	u32 flags;
>  	struct fwht_cframe cf;
> -	u8 *p, *ref_p;
>  	unsigned int components_num = 3;
>  	unsigned int version;
>  	const struct v4l2_fwht_pixfmt_info *info;
>  	unsigned int hdr_width_div, hdr_height_div;
> +	struct fwht_raw_frame dst_rf;
> +	unsigned int dst_chroma_stride = state->stride;
> +	unsigned int dst_size = state->stride * state->coded_height;
> +	unsigned int ref_size;
>  
>  	if (!state->info)
>  		return -EINVAL;
> @@ -298,241 +329,24 @@ int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out)
>  	    hdr_height_div != info->height_div)
>  		return -EINVAL;
>  
> -	if (!fwht_decode_frame(&cf, &state->ref_frame, flags, components_num,
> -			       state->visible_width, state->visible_height,
> -			       state->coded_width))
> +	if (prepare_raw_frame(&dst_rf, info, p_out, dst_size))
>  		return -EINVAL;
> +	if (info->id == V4L2_PIX_FMT_YUV420 ||
> +	    info->id == V4L2_PIX_FMT_YVU420 ||
> +	    info->id == V4L2_PIX_FMT_YUV422P)
> +		dst_chroma_stride /= 2;
>  
> -	/*
> -	 * TODO - handle the case where the compressed stream encodes a
> -	 * different format than the requested decoded format.
> -	 */
> -	switch (state->info->id) {
> -	case V4L2_PIX_FMT_GREY:
> -		ref_p = state->ref_frame.luma;
> -		for (i = 0; i < state->coded_height; i++)  {
> -			memcpy(p_out, ref_p, state->visible_width);
> -			p_out += state->stride;
> -			ref_p += state->coded_width;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_YUV420:
> -	case V4L2_PIX_FMT_YUV422P:
> -		ref_p = state->ref_frame.luma;
> -		for (i = 0; i < state->coded_height; i++)  {
> -			memcpy(p_out, ref_p, state->visible_width);
> -			p_out += state->stride;
> -			ref_p += state->coded_width;
> -		}
> -
> -		ref_p = state->ref_frame.cb;
> -		for (i = 0; i < state->coded_height / 2; i++)  {
> -			memcpy(p_out, ref_p, state->visible_width / 2);
> -			p_out += state->stride / 2;
> -			ref_p += state->coded_width / 2;
> -		}
> -		ref_p = state->ref_frame.cr;
> -		for (i = 0; i < state->coded_height / 2; i++)  {
> -			memcpy(p_out, ref_p, state->visible_width / 2);
> -			p_out += state->stride / 2;
> -			ref_p += state->coded_width / 2;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_YVU420:
> -		ref_p = state->ref_frame.luma;
> -		for (i = 0; i < state->coded_height; i++)  {
> -			memcpy(p_out, ref_p, state->visible_width);
> -			p_out += state->stride;
> -			ref_p += state->coded_width;
> -		}
> -
> -		ref_p = state->ref_frame.cr;
> -		for (i = 0; i < state->coded_height / 2; i++)  {
> -			memcpy(p_out, ref_p, state->visible_width / 2);
> -			p_out += state->stride / 2;
> -			ref_p += state->coded_width / 2;
> -		}
> -		ref_p = state->ref_frame.cb;
> -		for (i = 0; i < state->coded_height / 2; i++)  {
> -			memcpy(p_out, ref_p, state->visible_width / 2);
> -			p_out += state->stride / 2;
> -			ref_p += state->coded_width / 2;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_NV12:
> -	case V4L2_PIX_FMT_NV16:
> -	case V4L2_PIX_FMT_NV24:
> -		ref_p = state->ref_frame.luma;
> -		for (i = 0; i < state->coded_height; i++)  {
> -			memcpy(p_out, ref_p, state->visible_width);
> -			p_out += state->stride;
> -			ref_p += state->coded_width;
> -		}
> +	ref_size = state->coded_width * state->coded_height *
> +		info->luma_alpha_step;
>  
> -		k = 0;
> -		for (i = 0; i < state->coded_height / 2; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
> -				*p++ = state->ref_frame.cb[k];
> -				*p++ = state->ref_frame.cr[k];
> -				k++;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_NV21:
> -	case V4L2_PIX_FMT_NV61:
> -	case V4L2_PIX_FMT_NV42:
> -		ref_p = state->ref_frame.luma;
> -		for (i = 0; i < state->coded_height; i++)  {
> -			memcpy(p_out, ref_p, state->visible_width);
> -			p_out += state->stride;
> -			ref_p += state->coded_width;
> -		}
> +	if (prepare_raw_frame(&state->ref_frame, info, state->ref_frame.buf,
> +			      ref_size))
> +		return -EINVAL;
>  
> -		k = 0;
> -		for (i = 0; i < state->coded_height / 2; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
> -				*p++ = state->ref_frame.cr[k];
> -				*p++ = state->ref_frame.cb[k];
> -				k++;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_YUYV:
> -		k = 0;
> -		for (i = 0; i < state->coded_height; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
> -				*p++ = state->ref_frame.luma[k];
> -				*p++ = state->ref_frame.cb[k / 2];
> -				*p++ = state->ref_frame.luma[k + 1];
> -				*p++ = state->ref_frame.cr[k / 2];
> -				k += 2;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_YVYU:
> -		k = 0;
> -		for (i = 0; i < state->coded_height; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
> -				*p++ = state->ref_frame.luma[k];
> -				*p++ = state->ref_frame.cr[k / 2];
> -				*p++ = state->ref_frame.luma[k + 1];
> -				*p++ = state->ref_frame.cb[k / 2];
> -				k += 2;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_UYVY:
> -		k = 0;
> -		for (i = 0; i < state->coded_height; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
> -				*p++ = state->ref_frame.cb[k / 2];
> -				*p++ = state->ref_frame.luma[k];
> -				*p++ = state->ref_frame.cr[k / 2];
> -				*p++ = state->ref_frame.luma[k + 1];
> -				k += 2;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_VYUY:
> -		k = 0;
> -		for (i = 0; i < state->coded_height; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
> -				*p++ = state->ref_frame.cr[k / 2];
> -				*p++ = state->ref_frame.luma[k];
> -				*p++ = state->ref_frame.cb[k / 2];
> -				*p++ = state->ref_frame.luma[k + 1];
> -				k += 2;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_RGB24:
> -	case V4L2_PIX_FMT_HSV24:
> -		k = 0;
> -		for (i = 0; i < state->coded_height; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
> -				*p++ = state->ref_frame.cr[k];
> -				*p++ = state->ref_frame.luma[k];
> -				*p++ = state->ref_frame.cb[k];
> -				k++;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_BGR24:
> -		k = 0;
> -		for (i = 0; i < state->coded_height; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
> -				*p++ = state->ref_frame.cb[k];
> -				*p++ = state->ref_frame.luma[k];
> -				*p++ = state->ref_frame.cr[k];
> -				k++;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_RGB32:
> -	case V4L2_PIX_FMT_XRGB32:
> -	case V4L2_PIX_FMT_HSV32:
> -		k = 0;
> -		for (i = 0; i < state->coded_height; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
> -				*p++ = 0;
> -				*p++ = state->ref_frame.cr[k];
> -				*p++ = state->ref_frame.luma[k];
> -				*p++ = state->ref_frame.cb[k];
> -				k++;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_BGR32:
> -	case V4L2_PIX_FMT_XBGR32:
> -		k = 0;
> -		for (i = 0; i < state->coded_height; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
> -				*p++ = state->ref_frame.cb[k];
> -				*p++ = state->ref_frame.luma[k];
> -				*p++ = state->ref_frame.cr[k];
> -				*p++ = 0;
> -				k++;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_ARGB32:
> -		k = 0;
> -		for (i = 0; i < state->coded_height; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
> -				*p++ = state->ref_frame.alpha[k];
> -				*p++ = state->ref_frame.cr[k];
> -				*p++ = state->ref_frame.luma[k];
> -				*p++ = state->ref_frame.cb[k];
> -				k++;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	case V4L2_PIX_FMT_ABGR32:
> -		k = 0;
> -		for (i = 0; i < state->coded_height; i++) {
> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
> -				*p++ = state->ref_frame.cb[k];
> -				*p++ = state->ref_frame.luma[k];
> -				*p++ = state->ref_frame.cr[k];
> -				*p++ = state->ref_frame.alpha[k];
> -				k++;
> -			}
> -			p_out += state->stride;
> -		}
> -		break;
> -	default:
> +	if (!fwht_decode_frame(&cf, &state->ref_frame, flags, components_num,
> +			       state->visible_width, state->visible_height,
> +			       state->coded_width, &dst_rf, state->stride,
> +			       dst_chroma_stride))
>  		return -EINVAL;
> -	}
>  	return 0;
>  }
> diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.h b/drivers/media/platform/vicodec/codec-v4l2-fwht.h
> index aa6fa90a48be..75343cdf45e2 100644
> --- a/drivers/media/platform/vicodec/codec-v4l2-fwht.h
> +++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.h
> @@ -53,6 +53,9 @@ const struct v4l2_fwht_pixfmt_info *v4l2_fwht_default_fmt(u32 width_div,
>  							  u32 pixenc,
>  							  unsigned int start_idx);
>  
> +void copy_cap_to_ref(u8 *cap, const struct v4l2_fwht_pixfmt_info *info,
> +		struct v4l2_fwht_state *state);
> +
>  int v4l2_fwht_encode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out);
>  int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out);
>  
> diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c
> index 8d38bc1ef079..335a931fdf02 100644
> --- a/drivers/media/platform/vicodec/vicodec-core.c
> +++ b/drivers/media/platform/vicodec/vicodec-core.c
> @@ -194,6 +194,8 @@ static int device_process(struct vicodec_ctx *ctx,
>  		ret = v4l2_fwht_decode(state, p_src, p_dst);
>  		if (ret < 0)
>  			return ret;
> +		copy_cap_to_ref(p_dst, ctx->state.info, &ctx->state);
> +
>  		vb2_set_plane_payload(&dst_vb->vb2_buf, 0, q_dst->sizeimage);
>  	}
>  
>
Hans Verkuil Feb. 19, 2019, 10:33 a.m. UTC | #2
On 2/19/19 11:30 AM, Hans Verkuil wrote:
> On 2/15/19 2:05 PM, Dafna Hirschfeld wrote:
>> In the decoder, save the inner reference frame in the same
>> format as the capture buffer.
>> The decoder writes directly to the capture buffer and then
>> the capture buffer is copied to the reference buffer.
>> This will simplify the stateless decoder.
>>
>> Signed-off-by: Dafna Hirschfeld <dafna3@gmail.com>
>> ---
>>  drivers/media/platform/vicodec/codec-fwht.c   |  68 +++--
>>  drivers/media/platform/vicodec/codec-fwht.h   |  10 +-
>>  .../media/platform/vicodec/codec-v4l2-fwht.c  | 280 +++---------------
>>  .../media/platform/vicodec/codec-v4l2-fwht.h  |   3 +
>>  drivers/media/platform/vicodec/vicodec-core.c |   2 +
>>  5 files changed, 103 insertions(+), 260 deletions(-)
>>
>> diff --git a/drivers/media/platform/vicodec/codec-fwht.c b/drivers/media/platform/vicodec/codec-fwht.c
>> index d1d6085da9f1..42849476069b 100644
>> --- a/drivers/media/platform/vicodec/codec-fwht.c
>> +++ b/drivers/media/platform/vicodec/codec-fwht.c
>> @@ -632,12 +632,13 @@ static int decide_blocktype(const u8 *cur, const u8 *reference,
>>  	return vari <= vard ? IBLOCK : PBLOCK;
>>  }
>>  
>> -static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
>> +static void fill_decoder_block(u8 *dst, const s16 *input, int stride,
>> +			       unsigned int dst_step)
>>  {
>>  	int i, j;
>>  
>>  	for (i = 0; i < 8; i++) {
>> -		for (j = 0; j < 8; j++, input++, dst++) {
>> +		for (j = 0; j < 8; j++, input++, dst += dst_step) {
>>  			if (*input < 0)
>>  				*dst = 0;
>>  			else if (*input > 255)
>> @@ -645,17 +646,19 @@ static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
>>  			else
>>  				*dst = *input;
>>  		}
>> -		dst += stride - 8;
>> +		dst += stride - (8 * dst_step);
>>  	}
>>  }
>>  
>> -static void add_deltas(s16 *deltas, const u8 *ref, int stride)
>> +static void add_deltas(s16 *deltas, const u8 *ref, int stride,
>> +		       unsigned int ref_step)
>>  {
>>  	int k, l;
>>  
>>  	for (k = 0; k < 8; k++) {
>>  		for (l = 0; l < 8; l++) {
>> -			*deltas += *ref++;
>> +			*deltas += *ref;
>> +			ref += ref_step;
>>  			/*
>>  			 * Due to quantizing, it might possible that the
>>  			 * decoded coefficients are slightly out of range
>> @@ -666,7 +669,7 @@ static void add_deltas(s16 *deltas, const u8 *ref, int stride)
>>  				*deltas = 255;
>>  			deltas++;
>>  		}
>> -		ref += stride - 8;
>> +		ref += stride - (8 * ref_step);
>>  	}
>>  }
>>  
>> @@ -711,8 +714,8 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
>>  				ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
>>  
>>  				if (blocktype == PBLOCK)
>> -					add_deltas(cf->de_fwht, refp, 8);
>> -				fill_decoder_block(refp, cf->de_fwht, 8);
>> +					add_deltas(cf->de_fwht, refp, 8, 1);
>> +				fill_decoder_block(refp, cf->de_fwht, 8, 1);
>>  			}
>>  
>>  			input += 8 * input_step;
>> @@ -821,8 +824,10 @@ u32 fwht_encode_frame(struct fwht_raw_frame *frm,
>>  	return encoding;
>>  }
>>  
>> -static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
>> -			 u32 height, u32 width, u32 coded_width,
>> +static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco,
>> +			 const u8 *ref, u32 height, u32 width, u32 coded_width,
>> +			 u8 *dst, unsigned int dst_stride,
>> +			 unsigned int dst_step, unsigned int ref_step,
>>  			 bool uncompressed, const __be16 *end_of_rlco_buf)
>>  {
>>  	unsigned int copies = 0;
>> @@ -834,10 +839,15 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
>>  	height = round_up(height, 8);
>>  
>>  	if (uncompressed) {
>> +		int i;
>> +
>>  		if (end_of_rlco_buf + 1 < *rlco + width * height / 2)
>>  			return false;
>> -		memcpy(ref, *rlco, width * height);
>> -		*rlco += width * height / 2;
>> +		for (i = 0; i < height; i++) {
>> +			memcpy(dst, *rlco, width);
>> +			dst += dst_stride;
>> +			*rlco += width / 2;
>> +		}
>>  		return true;
>>  	}
>>  
>> @@ -849,15 +859,18 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
>>  	 */
>>  	for (j = 0; j < height / 8; j++) {
>>  		for (i = 0; i < width / 8; i++) {
>> -			u8 *refp = ref + j * 8 * coded_width + i * 8;
>> +			const u8 *refp = ref + j * 8 * ref_step * coded_width +
>> +				i * 8 * ref_step;
>> +			u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step;
>>  
>>  			if (copies) {
>>  				memcpy(cf->de_fwht, copy, sizeof(copy));
>>  				if (stat & PFRAME_BIT)
>>  					add_deltas(cf->de_fwht, refp,
>> -						   coded_width);
>> -				fill_decoder_block(refp, cf->de_fwht,
>> -						   coded_width);
>> +						   coded_width * ref_step,
>> +						   ref_step);
>> +				fill_decoder_block(dstp, cf->de_fwht,
>> +						   dst_stride, dst_step);
>>  				copies--;
>>  				continue;
>>  			}
>> @@ -877,23 +890,28 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
>>  			if (copies)
>>  				memcpy(copy, cf->de_fwht, sizeof(copy));
>>  			if (stat & PFRAME_BIT)
>> -				add_deltas(cf->de_fwht, refp, coded_width);
>> -			fill_decoder_block(refp, cf->de_fwht, coded_width);
>> +				add_deltas(cf->de_fwht, refp,
>> +					   coded_width * ref_step, ref_step);
>> +			fill_decoder_block(dstp, cf->de_fwht, dst_stride,
>> +					   dst_step);
>>  		}
>>  	}
>>  	return true;
>>  }
>>  
>> -bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
>> +bool fwht_decode_frame(struct fwht_cframe *cf, const struct fwht_raw_frame *ref,
>>  		       u32 hdr_flags, unsigned int components_num,
>>  		       unsigned int width, unsigned int height,
>> -		       unsigned int coded_width)
>> +		       unsigned int coded_width, struct fwht_raw_frame *dst,
>> +		       unsigned int dst_stride, unsigned int dst_chroma_stride)
>>  {
>>  	const __be16 *rlco = cf->rlc_data;
>>  	const __be16 *end_of_rlco_buf = cf->rlc_data +
>>  			(cf->size / sizeof(*rlco)) - 1;
>>  
>>  	if (!decode_plane(cf, &rlco, ref->luma, height, width, coded_width,
>> +			  dst->luma, dst_stride, dst->luma_alpha_step,
>> +			  ref->luma_alpha_step,
>>  			  hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED,
>>  			  end_of_rlco_buf))
>>  		return false;
>> @@ -909,11 +927,15 @@ bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
>>  			w /= 2;
>>  			c /= 2;
>>  		}
>> -		if (!decode_plane(cf, &rlco, ref->cb, h, w, c,
>> +		if (!decode_plane(cf, &rlco, ref->cb, h, w, c, dst->cb,
>> +				  dst_chroma_stride, dst->chroma_step,
>> +				  ref->chroma_step,
>>  				  hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED,
>>  				  end_of_rlco_buf))
>>  			return false;
>> -		if (!decode_plane(cf, &rlco, ref->cr, h, w, c,
>> +		if (!decode_plane(cf, &rlco, ref->cr, h, w, c, dst->cr,
>> +				  dst_chroma_stride, dst->chroma_step,
>> +				  ref->chroma_step,
>>  				  hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED,
>>  				  end_of_rlco_buf))
>>  			return false;
>> @@ -922,6 +944,8 @@ bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
>>  	if (components_num == 4)
>>  		if (!decode_plane(cf, &rlco, ref->alpha, height, width,
>>  				  coded_width,
>> +				  dst->alpha, dst_stride, dst->luma_alpha_step,
>> +				  ref->luma_alpha_step,
>>  				  hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED,
>>  				  end_of_rlco_buf))
>>  			return false;
>> diff --git a/drivers/media/platform/vicodec/codec-fwht.h b/drivers/media/platform/vicodec/codec-fwht.h
>> index 8a4f07d466cb..eab4a97aa132 100644
>> --- a/drivers/media/platform/vicodec/codec-fwht.h
>> +++ b/drivers/media/platform/vicodec/codec-fwht.h
>> @@ -140,9 +140,9 @@ u32 fwht_encode_frame(struct fwht_raw_frame *frm,
>>  		      bool is_intra, bool next_is_intra,
>>  		      unsigned int width, unsigned int height,
>>  		      unsigned int stride, unsigned int chroma_stride);
>> -bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
>> -		       u32 hdr_flags, unsigned int components_num,
>> -		       unsigned int width, unsigned int height,
>> -		       unsigned int coded_width);
>> -
>> +bool fwht_decode_frame(struct fwht_cframe *cf, const struct fwht_raw_frame *ref,
>> +		u32 hdr_flags, unsigned int components_num,
>> +		unsigned int width, unsigned int height,
>> +		unsigned int coded_width, struct fwht_raw_frame *dst,
>> +		unsigned int dst_stride, unsigned int dst_chroma_stride);
>>  #endif
>> diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.c b/drivers/media/platform/vicodec/codec-v4l2-fwht.c
>> index 728ed5012aed..40b1f4901fd3 100644
>> --- a/drivers/media/platform/vicodec/codec-v4l2-fwht.c
>> +++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.c
>> @@ -75,6 +75,35 @@ const struct v4l2_fwht_pixfmt_info *v4l2_fwht_get_pixfmt(u32 idx)
>>  	return v4l2_fwht_pixfmts + idx;
>>  }
>>  
>> +void copy_cap_to_ref(u8 *cap, const struct v4l2_fwht_pixfmt_info *info,
>> +		     struct v4l2_fwht_state *state)
> 
> Should be a static function and cap should be const u8 *.

Sorry, I misread. This function is used in vicodec-core, so it can't be static.

But wouldn't it make more sense to move this function to vicodec-core.c
as well?

Regards,

	Hans

> 
>> +{
>> +	int plane_idx;
>> +	u8 *p_ref = state->ref_frame.buf;
>> +
>> +	for (plane_idx = 0; plane_idx < info->planes_num; plane_idx++) {
>> +		int i;
>> +		bool is_chroma_plane = plane_idx == 1 || plane_idx == 2;
>> +		unsigned int h_div = is_chroma_plane ? info->height_div : 1;
>> +		unsigned int w_div = is_chroma_plane ? info->width_div : 1;
>> +		unsigned int step = is_chroma_plane ? info->chroma_step :
>> +			info->luma_alpha_step;
>> +		unsigned int stride_div =
>> +			(info->planes_num == 3 && plane_idx > 0) ? 2 : 1;
>> +
>> +		u8 *row_dst = cap;
> 
> I'd call this row_cap. 'dst' is confusing since you expect a memcpy to write
> to the destination address, but this isn't the 'destination', it is just a
> row in the capture buffer.
> 
> It really confused me for a bit :-)
> 
> Regards,
> 
> 	Hans
> 
>> +		u8 *row_ref = p_ref;
>> +
>> +		for (i = 0; i < state->visible_height / h_div; i++) {
>> +			memcpy(row_ref, row_dst, step * state->visible_width / w_div);
>> +			row_ref += step * state->coded_width / w_div;
>> +			row_dst += state->stride / stride_div;
>> +		}
>> +		cap += (state->stride / stride_div) * (state->coded_height / h_div);
>> +		p_ref += (step * state->coded_width / w_div) * (state->coded_height / h_div);
>> +	}
>> +}
>> +
>>  static int prepare_raw_frame(struct fwht_raw_frame *rf,
>>  			 const struct v4l2_fwht_pixfmt_info *info, u8 *buf,
>>  			 unsigned int size)
>> @@ -243,14 +272,16 @@ int v4l2_fwht_encode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out)
>>  
>>  int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out)
>>  {
>> -	unsigned int i, j, k;
>>  	u32 flags;
>>  	struct fwht_cframe cf;
>> -	u8 *p, *ref_p;
>>  	unsigned int components_num = 3;
>>  	unsigned int version;
>>  	const struct v4l2_fwht_pixfmt_info *info;
>>  	unsigned int hdr_width_div, hdr_height_div;
>> +	struct fwht_raw_frame dst_rf;
>> +	unsigned int dst_chroma_stride = state->stride;
>> +	unsigned int dst_size = state->stride * state->coded_height;
>> +	unsigned int ref_size;
>>  
>>  	if (!state->info)
>>  		return -EINVAL;
>> @@ -298,241 +329,24 @@ int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out)
>>  	    hdr_height_div != info->height_div)
>>  		return -EINVAL;
>>  
>> -	if (!fwht_decode_frame(&cf, &state->ref_frame, flags, components_num,
>> -			       state->visible_width, state->visible_height,
>> -			       state->coded_width))
>> +	if (prepare_raw_frame(&dst_rf, info, p_out, dst_size))
>>  		return -EINVAL;
>> +	if (info->id == V4L2_PIX_FMT_YUV420 ||
>> +	    info->id == V4L2_PIX_FMT_YVU420 ||
>> +	    info->id == V4L2_PIX_FMT_YUV422P)
>> +		dst_chroma_stride /= 2;
>>  
>> -	/*
>> -	 * TODO - handle the case where the compressed stream encodes a
>> -	 * different format than the requested decoded format.
>> -	 */
>> -	switch (state->info->id) {
>> -	case V4L2_PIX_FMT_GREY:
>> -		ref_p = state->ref_frame.luma;
>> -		for (i = 0; i < state->coded_height; i++)  {
>> -			memcpy(p_out, ref_p, state->visible_width);
>> -			p_out += state->stride;
>> -			ref_p += state->coded_width;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_YUV420:
>> -	case V4L2_PIX_FMT_YUV422P:
>> -		ref_p = state->ref_frame.luma;
>> -		for (i = 0; i < state->coded_height; i++)  {
>> -			memcpy(p_out, ref_p, state->visible_width);
>> -			p_out += state->stride;
>> -			ref_p += state->coded_width;
>> -		}
>> -
>> -		ref_p = state->ref_frame.cb;
>> -		for (i = 0; i < state->coded_height / 2; i++)  {
>> -			memcpy(p_out, ref_p, state->visible_width / 2);
>> -			p_out += state->stride / 2;
>> -			ref_p += state->coded_width / 2;
>> -		}
>> -		ref_p = state->ref_frame.cr;
>> -		for (i = 0; i < state->coded_height / 2; i++)  {
>> -			memcpy(p_out, ref_p, state->visible_width / 2);
>> -			p_out += state->stride / 2;
>> -			ref_p += state->coded_width / 2;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_YVU420:
>> -		ref_p = state->ref_frame.luma;
>> -		for (i = 0; i < state->coded_height; i++)  {
>> -			memcpy(p_out, ref_p, state->visible_width);
>> -			p_out += state->stride;
>> -			ref_p += state->coded_width;
>> -		}
>> -
>> -		ref_p = state->ref_frame.cr;
>> -		for (i = 0; i < state->coded_height / 2; i++)  {
>> -			memcpy(p_out, ref_p, state->visible_width / 2);
>> -			p_out += state->stride / 2;
>> -			ref_p += state->coded_width / 2;
>> -		}
>> -		ref_p = state->ref_frame.cb;
>> -		for (i = 0; i < state->coded_height / 2; i++)  {
>> -			memcpy(p_out, ref_p, state->visible_width / 2);
>> -			p_out += state->stride / 2;
>> -			ref_p += state->coded_width / 2;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_NV12:
>> -	case V4L2_PIX_FMT_NV16:
>> -	case V4L2_PIX_FMT_NV24:
>> -		ref_p = state->ref_frame.luma;
>> -		for (i = 0; i < state->coded_height; i++)  {
>> -			memcpy(p_out, ref_p, state->visible_width);
>> -			p_out += state->stride;
>> -			ref_p += state->coded_width;
>> -		}
>> +	ref_size = state->coded_width * state->coded_height *
>> +		info->luma_alpha_step;
>>  
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height / 2; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
>> -				*p++ = state->ref_frame.cb[k];
>> -				*p++ = state->ref_frame.cr[k];
>> -				k++;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_NV21:
>> -	case V4L2_PIX_FMT_NV61:
>> -	case V4L2_PIX_FMT_NV42:
>> -		ref_p = state->ref_frame.luma;
>> -		for (i = 0; i < state->coded_height; i++)  {
>> -			memcpy(p_out, ref_p, state->visible_width);
>> -			p_out += state->stride;
>> -			ref_p += state->coded_width;
>> -		}
>> +	if (prepare_raw_frame(&state->ref_frame, info, state->ref_frame.buf,
>> +			      ref_size))
>> +		return -EINVAL;
>>  
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height / 2; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
>> -				*p++ = state->ref_frame.cr[k];
>> -				*p++ = state->ref_frame.cb[k];
>> -				k++;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_YUYV:
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
>> -				*p++ = state->ref_frame.luma[k];
>> -				*p++ = state->ref_frame.cb[k / 2];
>> -				*p++ = state->ref_frame.luma[k + 1];
>> -				*p++ = state->ref_frame.cr[k / 2];
>> -				k += 2;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_YVYU:
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
>> -				*p++ = state->ref_frame.luma[k];
>> -				*p++ = state->ref_frame.cr[k / 2];
>> -				*p++ = state->ref_frame.luma[k + 1];
>> -				*p++ = state->ref_frame.cb[k / 2];
>> -				k += 2;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_UYVY:
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
>> -				*p++ = state->ref_frame.cb[k / 2];
>> -				*p++ = state->ref_frame.luma[k];
>> -				*p++ = state->ref_frame.cr[k / 2];
>> -				*p++ = state->ref_frame.luma[k + 1];
>> -				k += 2;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_VYUY:
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
>> -				*p++ = state->ref_frame.cr[k / 2];
>> -				*p++ = state->ref_frame.luma[k];
>> -				*p++ = state->ref_frame.cb[k / 2];
>> -				*p++ = state->ref_frame.luma[k + 1];
>> -				k += 2;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_RGB24:
>> -	case V4L2_PIX_FMT_HSV24:
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
>> -				*p++ = state->ref_frame.cr[k];
>> -				*p++ = state->ref_frame.luma[k];
>> -				*p++ = state->ref_frame.cb[k];
>> -				k++;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_BGR24:
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
>> -				*p++ = state->ref_frame.cb[k];
>> -				*p++ = state->ref_frame.luma[k];
>> -				*p++ = state->ref_frame.cr[k];
>> -				k++;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_RGB32:
>> -	case V4L2_PIX_FMT_XRGB32:
>> -	case V4L2_PIX_FMT_HSV32:
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
>> -				*p++ = 0;
>> -				*p++ = state->ref_frame.cr[k];
>> -				*p++ = state->ref_frame.luma[k];
>> -				*p++ = state->ref_frame.cb[k];
>> -				k++;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_BGR32:
>> -	case V4L2_PIX_FMT_XBGR32:
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
>> -				*p++ = state->ref_frame.cb[k];
>> -				*p++ = state->ref_frame.luma[k];
>> -				*p++ = state->ref_frame.cr[k];
>> -				*p++ = 0;
>> -				k++;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_ARGB32:
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
>> -				*p++ = state->ref_frame.alpha[k];
>> -				*p++ = state->ref_frame.cr[k];
>> -				*p++ = state->ref_frame.luma[k];
>> -				*p++ = state->ref_frame.cb[k];
>> -				k++;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	case V4L2_PIX_FMT_ABGR32:
>> -		k = 0;
>> -		for (i = 0; i < state->coded_height; i++) {
>> -			for (j = 0, p = p_out; j < state->coded_width; j++) {
>> -				*p++ = state->ref_frame.cb[k];
>> -				*p++ = state->ref_frame.luma[k];
>> -				*p++ = state->ref_frame.cr[k];
>> -				*p++ = state->ref_frame.alpha[k];
>> -				k++;
>> -			}
>> -			p_out += state->stride;
>> -		}
>> -		break;
>> -	default:
>> +	if (!fwht_decode_frame(&cf, &state->ref_frame, flags, components_num,
>> +			       state->visible_width, state->visible_height,
>> +			       state->coded_width, &dst_rf, state->stride,
>> +			       dst_chroma_stride))
>>  		return -EINVAL;
>> -	}
>>  	return 0;
>>  }
>> diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.h b/drivers/media/platform/vicodec/codec-v4l2-fwht.h
>> index aa6fa90a48be..75343cdf45e2 100644
>> --- a/drivers/media/platform/vicodec/codec-v4l2-fwht.h
>> +++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.h
>> @@ -53,6 +53,9 @@ const struct v4l2_fwht_pixfmt_info *v4l2_fwht_default_fmt(u32 width_div,
>>  							  u32 pixenc,
>>  							  unsigned int start_idx);
>>  
>> +void copy_cap_to_ref(u8 *cap, const struct v4l2_fwht_pixfmt_info *info,
>> +		struct v4l2_fwht_state *state);
>> +
>>  int v4l2_fwht_encode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out);
>>  int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out);
>>  
>> diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c
>> index 8d38bc1ef079..335a931fdf02 100644
>> --- a/drivers/media/platform/vicodec/vicodec-core.c
>> +++ b/drivers/media/platform/vicodec/vicodec-core.c
>> @@ -194,6 +194,8 @@ static int device_process(struct vicodec_ctx *ctx,
>>  		ret = v4l2_fwht_decode(state, p_src, p_dst);
>>  		if (ret < 0)
>>  			return ret;
>> +		copy_cap_to_ref(p_dst, ctx->state.info, &ctx->state);
>> +
>>  		vb2_set_plane_payload(&dst_vb->vb2_buf, 0, q_dst->sizeimage);
>>  	}
>>  
>>
>
diff mbox series

Patch

diff --git a/drivers/media/platform/vicodec/codec-fwht.c b/drivers/media/platform/vicodec/codec-fwht.c
index d1d6085da9f1..42849476069b 100644
--- a/drivers/media/platform/vicodec/codec-fwht.c
+++ b/drivers/media/platform/vicodec/codec-fwht.c
@@ -632,12 +632,13 @@  static int decide_blocktype(const u8 *cur, const u8 *reference,
 	return vari <= vard ? IBLOCK : PBLOCK;
 }
 
-static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
+static void fill_decoder_block(u8 *dst, const s16 *input, int stride,
+			       unsigned int dst_step)
 {
 	int i, j;
 
 	for (i = 0; i < 8; i++) {
-		for (j = 0; j < 8; j++, input++, dst++) {
+		for (j = 0; j < 8; j++, input++, dst += dst_step) {
 			if (*input < 0)
 				*dst = 0;
 			else if (*input > 255)
@@ -645,17 +646,19 @@  static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
 			else
 				*dst = *input;
 		}
-		dst += stride - 8;
+		dst += stride - (8 * dst_step);
 	}
 }
 
-static void add_deltas(s16 *deltas, const u8 *ref, int stride)
+static void add_deltas(s16 *deltas, const u8 *ref, int stride,
+		       unsigned int ref_step)
 {
 	int k, l;
 
 	for (k = 0; k < 8; k++) {
 		for (l = 0; l < 8; l++) {
-			*deltas += *ref++;
+			*deltas += *ref;
+			ref += ref_step;
 			/*
 			 * Due to quantizing, it might possible that the
 			 * decoded coefficients are slightly out of range
@@ -666,7 +669,7 @@  static void add_deltas(s16 *deltas, const u8 *ref, int stride)
 				*deltas = 255;
 			deltas++;
 		}
-		ref += stride - 8;
+		ref += stride - (8 * ref_step);
 	}
 }
 
@@ -711,8 +714,8 @@  static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
 				ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
 
 				if (blocktype == PBLOCK)
-					add_deltas(cf->de_fwht, refp, 8);
-				fill_decoder_block(refp, cf->de_fwht, 8);
+					add_deltas(cf->de_fwht, refp, 8, 1);
+				fill_decoder_block(refp, cf->de_fwht, 8, 1);
 			}
 
 			input += 8 * input_step;
@@ -821,8 +824,10 @@  u32 fwht_encode_frame(struct fwht_raw_frame *frm,
 	return encoding;
 }
 
-static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
-			 u32 height, u32 width, u32 coded_width,
+static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco,
+			 const u8 *ref, u32 height, u32 width, u32 coded_width,
+			 u8 *dst, unsigned int dst_stride,
+			 unsigned int dst_step, unsigned int ref_step,
 			 bool uncompressed, const __be16 *end_of_rlco_buf)
 {
 	unsigned int copies = 0;
@@ -834,10 +839,15 @@  static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
 	height = round_up(height, 8);
 
 	if (uncompressed) {
+		int i;
+
 		if (end_of_rlco_buf + 1 < *rlco + width * height / 2)
 			return false;
-		memcpy(ref, *rlco, width * height);
-		*rlco += width * height / 2;
+		for (i = 0; i < height; i++) {
+			memcpy(dst, *rlco, width);
+			dst += dst_stride;
+			*rlco += width / 2;
+		}
 		return true;
 	}
 
@@ -849,15 +859,18 @@  static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
 	 */
 	for (j = 0; j < height / 8; j++) {
 		for (i = 0; i < width / 8; i++) {
-			u8 *refp = ref + j * 8 * coded_width + i * 8;
+			const u8 *refp = ref + j * 8 * ref_step * coded_width +
+				i * 8 * ref_step;
+			u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step;
 
 			if (copies) {
 				memcpy(cf->de_fwht, copy, sizeof(copy));
 				if (stat & PFRAME_BIT)
 					add_deltas(cf->de_fwht, refp,
-						   coded_width);
-				fill_decoder_block(refp, cf->de_fwht,
-						   coded_width);
+						   coded_width * ref_step,
+						   ref_step);
+				fill_decoder_block(dstp, cf->de_fwht,
+						   dst_stride, dst_step);
 				copies--;
 				continue;
 			}
@@ -877,23 +890,28 @@  static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
 			if (copies)
 				memcpy(copy, cf->de_fwht, sizeof(copy));
 			if (stat & PFRAME_BIT)
-				add_deltas(cf->de_fwht, refp, coded_width);
-			fill_decoder_block(refp, cf->de_fwht, coded_width);
+				add_deltas(cf->de_fwht, refp,
+					   coded_width * ref_step, ref_step);
+			fill_decoder_block(dstp, cf->de_fwht, dst_stride,
+					   dst_step);
 		}
 	}
 	return true;
 }
 
-bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
+bool fwht_decode_frame(struct fwht_cframe *cf, const struct fwht_raw_frame *ref,
 		       u32 hdr_flags, unsigned int components_num,
 		       unsigned int width, unsigned int height,
-		       unsigned int coded_width)
+		       unsigned int coded_width, struct fwht_raw_frame *dst,
+		       unsigned int dst_stride, unsigned int dst_chroma_stride)
 {
 	const __be16 *rlco = cf->rlc_data;
 	const __be16 *end_of_rlco_buf = cf->rlc_data +
 			(cf->size / sizeof(*rlco)) - 1;
 
 	if (!decode_plane(cf, &rlco, ref->luma, height, width, coded_width,
+			  dst->luma, dst_stride, dst->luma_alpha_step,
+			  ref->luma_alpha_step,
 			  hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED,
 			  end_of_rlco_buf))
 		return false;
@@ -909,11 +927,15 @@  bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
 			w /= 2;
 			c /= 2;
 		}
-		if (!decode_plane(cf, &rlco, ref->cb, h, w, c,
+		if (!decode_plane(cf, &rlco, ref->cb, h, w, c, dst->cb,
+				  dst_chroma_stride, dst->chroma_step,
+				  ref->chroma_step,
 				  hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED,
 				  end_of_rlco_buf))
 			return false;
-		if (!decode_plane(cf, &rlco, ref->cr, h, w, c,
+		if (!decode_plane(cf, &rlco, ref->cr, h, w, c, dst->cr,
+				  dst_chroma_stride, dst->chroma_step,
+				  ref->chroma_step,
 				  hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED,
 				  end_of_rlco_buf))
 			return false;
@@ -922,6 +944,8 @@  bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
 	if (components_num == 4)
 		if (!decode_plane(cf, &rlco, ref->alpha, height, width,
 				  coded_width,
+				  dst->alpha, dst_stride, dst->luma_alpha_step,
+				  ref->luma_alpha_step,
 				  hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED,
 				  end_of_rlco_buf))
 			return false;
diff --git a/drivers/media/platform/vicodec/codec-fwht.h b/drivers/media/platform/vicodec/codec-fwht.h
index 8a4f07d466cb..eab4a97aa132 100644
--- a/drivers/media/platform/vicodec/codec-fwht.h
+++ b/drivers/media/platform/vicodec/codec-fwht.h
@@ -140,9 +140,9 @@  u32 fwht_encode_frame(struct fwht_raw_frame *frm,
 		      bool is_intra, bool next_is_intra,
 		      unsigned int width, unsigned int height,
 		      unsigned int stride, unsigned int chroma_stride);
-bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
-		       u32 hdr_flags, unsigned int components_num,
-		       unsigned int width, unsigned int height,
-		       unsigned int coded_width);
-
+bool fwht_decode_frame(struct fwht_cframe *cf, const struct fwht_raw_frame *ref,
+		u32 hdr_flags, unsigned int components_num,
+		unsigned int width, unsigned int height,
+		unsigned int coded_width, struct fwht_raw_frame *dst,
+		unsigned int dst_stride, unsigned int dst_chroma_stride);
 #endif
diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.c b/drivers/media/platform/vicodec/codec-v4l2-fwht.c
index 728ed5012aed..40b1f4901fd3 100644
--- a/drivers/media/platform/vicodec/codec-v4l2-fwht.c
+++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.c
@@ -75,6 +75,35 @@  const struct v4l2_fwht_pixfmt_info *v4l2_fwht_get_pixfmt(u32 idx)
 	return v4l2_fwht_pixfmts + idx;
 }
 
+void copy_cap_to_ref(u8 *cap, const struct v4l2_fwht_pixfmt_info *info,
+		     struct v4l2_fwht_state *state)
+{
+	int plane_idx;
+	u8 *p_ref = state->ref_frame.buf;
+
+	for (plane_idx = 0; plane_idx < info->planes_num; plane_idx++) {
+		int i;
+		bool is_chroma_plane = plane_idx == 1 || plane_idx == 2;
+		unsigned int h_div = is_chroma_plane ? info->height_div : 1;
+		unsigned int w_div = is_chroma_plane ? info->width_div : 1;
+		unsigned int step = is_chroma_plane ? info->chroma_step :
+			info->luma_alpha_step;
+		unsigned int stride_div =
+			(info->planes_num == 3 && plane_idx > 0) ? 2 : 1;
+
+		u8 *row_dst = cap;
+		u8 *row_ref = p_ref;
+
+		for (i = 0; i < state->visible_height / h_div; i++) {
+			memcpy(row_ref, row_dst, step * state->visible_width / w_div);
+			row_ref += step * state->coded_width / w_div;
+			row_dst += state->stride / stride_div;
+		}
+		cap += (state->stride / stride_div) * (state->coded_height / h_div);
+		p_ref += (step * state->coded_width / w_div) * (state->coded_height / h_div);
+	}
+}
+
 static int prepare_raw_frame(struct fwht_raw_frame *rf,
 			 const struct v4l2_fwht_pixfmt_info *info, u8 *buf,
 			 unsigned int size)
@@ -243,14 +272,16 @@  int v4l2_fwht_encode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out)
 
 int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out)
 {
-	unsigned int i, j, k;
 	u32 flags;
 	struct fwht_cframe cf;
-	u8 *p, *ref_p;
 	unsigned int components_num = 3;
 	unsigned int version;
 	const struct v4l2_fwht_pixfmt_info *info;
 	unsigned int hdr_width_div, hdr_height_div;
+	struct fwht_raw_frame dst_rf;
+	unsigned int dst_chroma_stride = state->stride;
+	unsigned int dst_size = state->stride * state->coded_height;
+	unsigned int ref_size;
 
 	if (!state->info)
 		return -EINVAL;
@@ -298,241 +329,24 @@  int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out)
 	    hdr_height_div != info->height_div)
 		return -EINVAL;
 
-	if (!fwht_decode_frame(&cf, &state->ref_frame, flags, components_num,
-			       state->visible_width, state->visible_height,
-			       state->coded_width))
+	if (prepare_raw_frame(&dst_rf, info, p_out, dst_size))
 		return -EINVAL;
+	if (info->id == V4L2_PIX_FMT_YUV420 ||
+	    info->id == V4L2_PIX_FMT_YVU420 ||
+	    info->id == V4L2_PIX_FMT_YUV422P)
+		dst_chroma_stride /= 2;
 
-	/*
-	 * TODO - handle the case where the compressed stream encodes a
-	 * different format than the requested decoded format.
-	 */
-	switch (state->info->id) {
-	case V4L2_PIX_FMT_GREY:
-		ref_p = state->ref_frame.luma;
-		for (i = 0; i < state->coded_height; i++)  {
-			memcpy(p_out, ref_p, state->visible_width);
-			p_out += state->stride;
-			ref_p += state->coded_width;
-		}
-		break;
-	case V4L2_PIX_FMT_YUV420:
-	case V4L2_PIX_FMT_YUV422P:
-		ref_p = state->ref_frame.luma;
-		for (i = 0; i < state->coded_height; i++)  {
-			memcpy(p_out, ref_p, state->visible_width);
-			p_out += state->stride;
-			ref_p += state->coded_width;
-		}
-
-		ref_p = state->ref_frame.cb;
-		for (i = 0; i < state->coded_height / 2; i++)  {
-			memcpy(p_out, ref_p, state->visible_width / 2);
-			p_out += state->stride / 2;
-			ref_p += state->coded_width / 2;
-		}
-		ref_p = state->ref_frame.cr;
-		for (i = 0; i < state->coded_height / 2; i++)  {
-			memcpy(p_out, ref_p, state->visible_width / 2);
-			p_out += state->stride / 2;
-			ref_p += state->coded_width / 2;
-		}
-		break;
-	case V4L2_PIX_FMT_YVU420:
-		ref_p = state->ref_frame.luma;
-		for (i = 0; i < state->coded_height; i++)  {
-			memcpy(p_out, ref_p, state->visible_width);
-			p_out += state->stride;
-			ref_p += state->coded_width;
-		}
-
-		ref_p = state->ref_frame.cr;
-		for (i = 0; i < state->coded_height / 2; i++)  {
-			memcpy(p_out, ref_p, state->visible_width / 2);
-			p_out += state->stride / 2;
-			ref_p += state->coded_width / 2;
-		}
-		ref_p = state->ref_frame.cb;
-		for (i = 0; i < state->coded_height / 2; i++)  {
-			memcpy(p_out, ref_p, state->visible_width / 2);
-			p_out += state->stride / 2;
-			ref_p += state->coded_width / 2;
-		}
-		break;
-	case V4L2_PIX_FMT_NV12:
-	case V4L2_PIX_FMT_NV16:
-	case V4L2_PIX_FMT_NV24:
-		ref_p = state->ref_frame.luma;
-		for (i = 0; i < state->coded_height; i++)  {
-			memcpy(p_out, ref_p, state->visible_width);
-			p_out += state->stride;
-			ref_p += state->coded_width;
-		}
+	ref_size = state->coded_width * state->coded_height *
+		info->luma_alpha_step;
 
-		k = 0;
-		for (i = 0; i < state->coded_height / 2; i++) {
-			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
-				*p++ = state->ref_frame.cb[k];
-				*p++ = state->ref_frame.cr[k];
-				k++;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_NV21:
-	case V4L2_PIX_FMT_NV61:
-	case V4L2_PIX_FMT_NV42:
-		ref_p = state->ref_frame.luma;
-		for (i = 0; i < state->coded_height; i++)  {
-			memcpy(p_out, ref_p, state->visible_width);
-			p_out += state->stride;
-			ref_p += state->coded_width;
-		}
+	if (prepare_raw_frame(&state->ref_frame, info, state->ref_frame.buf,
+			      ref_size))
+		return -EINVAL;
 
-		k = 0;
-		for (i = 0; i < state->coded_height / 2; i++) {
-			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
-				*p++ = state->ref_frame.cr[k];
-				*p++ = state->ref_frame.cb[k];
-				k++;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_YUYV:
-		k = 0;
-		for (i = 0; i < state->coded_height; i++) {
-			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
-				*p++ = state->ref_frame.luma[k];
-				*p++ = state->ref_frame.cb[k / 2];
-				*p++ = state->ref_frame.luma[k + 1];
-				*p++ = state->ref_frame.cr[k / 2];
-				k += 2;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_YVYU:
-		k = 0;
-		for (i = 0; i < state->coded_height; i++) {
-			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
-				*p++ = state->ref_frame.luma[k];
-				*p++ = state->ref_frame.cr[k / 2];
-				*p++ = state->ref_frame.luma[k + 1];
-				*p++ = state->ref_frame.cb[k / 2];
-				k += 2;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_UYVY:
-		k = 0;
-		for (i = 0; i < state->coded_height; i++) {
-			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
-				*p++ = state->ref_frame.cb[k / 2];
-				*p++ = state->ref_frame.luma[k];
-				*p++ = state->ref_frame.cr[k / 2];
-				*p++ = state->ref_frame.luma[k + 1];
-				k += 2;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_VYUY:
-		k = 0;
-		for (i = 0; i < state->coded_height; i++) {
-			for (j = 0, p = p_out; j < state->coded_width / 2; j++) {
-				*p++ = state->ref_frame.cr[k / 2];
-				*p++ = state->ref_frame.luma[k];
-				*p++ = state->ref_frame.cb[k / 2];
-				*p++ = state->ref_frame.luma[k + 1];
-				k += 2;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_RGB24:
-	case V4L2_PIX_FMT_HSV24:
-		k = 0;
-		for (i = 0; i < state->coded_height; i++) {
-			for (j = 0, p = p_out; j < state->coded_width; j++) {
-				*p++ = state->ref_frame.cr[k];
-				*p++ = state->ref_frame.luma[k];
-				*p++ = state->ref_frame.cb[k];
-				k++;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_BGR24:
-		k = 0;
-		for (i = 0; i < state->coded_height; i++) {
-			for (j = 0, p = p_out; j < state->coded_width; j++) {
-				*p++ = state->ref_frame.cb[k];
-				*p++ = state->ref_frame.luma[k];
-				*p++ = state->ref_frame.cr[k];
-				k++;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_RGB32:
-	case V4L2_PIX_FMT_XRGB32:
-	case V4L2_PIX_FMT_HSV32:
-		k = 0;
-		for (i = 0; i < state->coded_height; i++) {
-			for (j = 0, p = p_out; j < state->coded_width; j++) {
-				*p++ = 0;
-				*p++ = state->ref_frame.cr[k];
-				*p++ = state->ref_frame.luma[k];
-				*p++ = state->ref_frame.cb[k];
-				k++;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_BGR32:
-	case V4L2_PIX_FMT_XBGR32:
-		k = 0;
-		for (i = 0; i < state->coded_height; i++) {
-			for (j = 0, p = p_out; j < state->coded_width; j++) {
-				*p++ = state->ref_frame.cb[k];
-				*p++ = state->ref_frame.luma[k];
-				*p++ = state->ref_frame.cr[k];
-				*p++ = 0;
-				k++;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_ARGB32:
-		k = 0;
-		for (i = 0; i < state->coded_height; i++) {
-			for (j = 0, p = p_out; j < state->coded_width; j++) {
-				*p++ = state->ref_frame.alpha[k];
-				*p++ = state->ref_frame.cr[k];
-				*p++ = state->ref_frame.luma[k];
-				*p++ = state->ref_frame.cb[k];
-				k++;
-			}
-			p_out += state->stride;
-		}
-		break;
-	case V4L2_PIX_FMT_ABGR32:
-		k = 0;
-		for (i = 0; i < state->coded_height; i++) {
-			for (j = 0, p = p_out; j < state->coded_width; j++) {
-				*p++ = state->ref_frame.cb[k];
-				*p++ = state->ref_frame.luma[k];
-				*p++ = state->ref_frame.cr[k];
-				*p++ = state->ref_frame.alpha[k];
-				k++;
-			}
-			p_out += state->stride;
-		}
-		break;
-	default:
+	if (!fwht_decode_frame(&cf, &state->ref_frame, flags, components_num,
+			       state->visible_width, state->visible_height,
+			       state->coded_width, &dst_rf, state->stride,
+			       dst_chroma_stride))
 		return -EINVAL;
-	}
 	return 0;
 }
diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.h b/drivers/media/platform/vicodec/codec-v4l2-fwht.h
index aa6fa90a48be..75343cdf45e2 100644
--- a/drivers/media/platform/vicodec/codec-v4l2-fwht.h
+++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.h
@@ -53,6 +53,9 @@  const struct v4l2_fwht_pixfmt_info *v4l2_fwht_default_fmt(u32 width_div,
 							  u32 pixenc,
 							  unsigned int start_idx);
 
+void copy_cap_to_ref(u8 *cap, const struct v4l2_fwht_pixfmt_info *info,
+		struct v4l2_fwht_state *state);
+
 int v4l2_fwht_encode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out);
 int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out);
 
diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c
index 8d38bc1ef079..335a931fdf02 100644
--- a/drivers/media/platform/vicodec/vicodec-core.c
+++ b/drivers/media/platform/vicodec/vicodec-core.c
@@ -194,6 +194,8 @@  static int device_process(struct vicodec_ctx *ctx,
 		ret = v4l2_fwht_decode(state, p_src, p_dst);
 		if (ret < 0)
 			return ret;
+		copy_cap_to_ref(p_dst, ctx->state.info, &ctx->state);
+
 		vb2_set_plane_payload(&dst_vb->vb2_buf, 0, q_dst->sizeimage);
 	}