diff mbox series

[v8] media: imx: add mem2mem device

Message ID 20190418164414.29373-1-p.zabel@pengutronix.de (mailing list archive)
State New, archived
Headers show
Series [v8] media: imx: add mem2mem device | expand

Commit Message

Philipp Zabel April 18, 2019, 4:44 p.m. UTC
Add a single imx-media mem2mem video device that uses the IPU IC PP
(image converter post processing) task for scaling and colorspace
conversion.
On i.MX6Q/DL SoCs with two IPUs currently only the first IPU is used.

The hardware only supports writing to destination buffers up to
1024x1024 pixels in a single pass, arbitrary sizes can be achieved
by rendering multiple tiles per frame.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
[slongerbeam@gmail.com: use ipu_image_convert_adjust(), fix
 device_run() error handling, add missing media-device header,
 unregister and remove the mem2mem device in error paths in
 imx_media_probe_complete() and in imx_media_remove()]
Signed-off-by: Steve Longerbeam <slongerbeam@gmail.com>
---
Changes since v7 [1]:
 - Change capture format to keep aspect ration when changing rotation.
 - Adjust output and update formats to alignment requirements, if
   necessary when changing rotation or flip modes.
 - Disallow changing rotation or flip modes if a format change is
   required on a busy queue.
 - Add sequence counting.
 - Use v4l2_m2m_buf_copy_metadata.
 - Disallow interlaced field modes.
 - Rename video device to "ipu_ic_pp csc/scaler".

[1] https://patchwork.linuxtv.org/patch/53968/
---
 drivers/staging/media/imx/Kconfig             |   1 +
 drivers/staging/media/imx/Makefile            |   1 +
 .../staging/media/imx/imx-media-csc-scaler.c  | 917 ++++++++++++++++++
 drivers/staging/media/imx/imx-media-dev.c     |  34 +-
 drivers/staging/media/imx/imx-media.h         |  10 +
 5 files changed, 959 insertions(+), 4 deletions(-)
 create mode 100644 drivers/staging/media/imx/imx-media-csc-scaler.c

Comments

Hans Verkuil May 10, 2019, 1:03 p.m. UTC | #1
Can you change the subject text? E.g.: 'add csc/scaler mem2mem device'.

That's a lot more descriptive.

On 4/18/19 6:44 PM, Philipp Zabel wrote:
> Add a single imx-media mem2mem video device that uses the IPU IC PP
> (image converter post processing) task for scaling and colorspace
> conversion.
> On i.MX6Q/DL SoCs with two IPUs currently only the first IPU is used.
> 
> The hardware only supports writing to destination buffers up to
> 1024x1024 pixels in a single pass, arbitrary sizes can be achieved
> by rendering multiple tiles per frame.
> 
> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
> [slongerbeam@gmail.com: use ipu_image_convert_adjust(), fix
>  device_run() error handling, add missing media-device header,
>  unregister and remove the mem2mem device in error paths in
>  imx_media_probe_complete() and in imx_media_remove()]
> Signed-off-by: Steve Longerbeam <slongerbeam@gmail.com>
> ---
> Changes since v7 [1]:
>  - Change capture format to keep aspect ration when changing rotation.
>  - Adjust output and update formats to alignment requirements, if
>    necessary when changing rotation or flip modes.
>  - Disallow changing rotation or flip modes if a format change is
>    required on a busy queue.
>  - Add sequence counting.
>  - Use v4l2_m2m_buf_copy_metadata.
>  - Disallow interlaced field modes.
>  - Rename video device to "ipu_ic_pp csc/scaler".
> 
> [1] https://patchwork.linuxtv.org/patch/53968/
> ---
>  drivers/staging/media/imx/Kconfig             |   1 +
>  drivers/staging/media/imx/Makefile            |   1 +
>  .../staging/media/imx/imx-media-csc-scaler.c  | 917 ++++++++++++++++++
>  drivers/staging/media/imx/imx-media-dev.c     |  34 +-
>  drivers/staging/media/imx/imx-media.h         |  10 +
>  5 files changed, 959 insertions(+), 4 deletions(-)
>  create mode 100644 drivers/staging/media/imx/imx-media-csc-scaler.c
> 
> diff --git a/drivers/staging/media/imx/Kconfig b/drivers/staging/media/imx/Kconfig
> index f6d220b649fb..9bf14eb2154b 100644
> --- a/drivers/staging/media/imx/Kconfig
> +++ b/drivers/staging/media/imx/Kconfig
> @@ -6,6 +6,7 @@ config VIDEO_IMX_MEDIA
>  	depends on HAS_DMA
>  	select VIDEOBUF2_DMA_CONTIG
>  	select V4L2_FWNODE
> +	select V4L2_MEM2MEM_DEV
>  	help
>  	  Say yes here to enable support for video4linux media controller
>  	  driver for the i.MX5/6 SOC.
> diff --git a/drivers/staging/media/imx/Makefile b/drivers/staging/media/imx/Makefile
> index d2d909a36239..0c86723f1763 100644
> --- a/drivers/staging/media/imx/Makefile
> +++ b/drivers/staging/media/imx/Makefile
> @@ -7,6 +7,7 @@ imx-media-ic-objs := imx-ic-common.o imx-ic-prp.o imx-ic-prpencvf.o
>  obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media.o
>  obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-common.o
>  obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-capture.o
> +obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-csc-scaler.o
>  obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-vdic.o
>  obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-ic.o
>  
> diff --git a/drivers/staging/media/imx/imx-media-csc-scaler.c b/drivers/staging/media/imx/imx-media-csc-scaler.c
> new file mode 100644
> index 000000000000..4a0ecdfe38e6
> --- /dev/null
> +++ b/drivers/staging/media/imx/imx-media-csc-scaler.c
> @@ -0,0 +1,917 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * i.MX IPUv3 IC PP mem2mem CSC/Scaler driver
> + *
> + * Copyright (C) 2011 Pengutronix, Sascha Hauer
> + * Copyright (C) 2018 Pengutronix, Philipp Zabel
> + */
> +#include <linux/module.h>
> +#include <linux/delay.h>
> +#include <linux/fs.h>
> +#include <linux/version.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <video/imx-ipu-v3.h>
> +#include <video/imx-ipu-image-convert.h>
> +
> +#include <media/media-device.h>
> +#include <media/v4l2-ctrls.h>
> +#include <media/v4l2-event.h>
> +#include <media/v4l2-mem2mem.h>
> +#include <media/v4l2-device.h>
> +#include <media/v4l2-ioctl.h>
> +#include <media/videobuf2-dma-contig.h>
> +
> +#include "imx-media.h"
> +
> +#define fh_to_ctx(__fh)	container_of(__fh, struct ipu_csc_scaler_ctx, fh)
> +
> +enum {
> +	V4L2_M2M_SRC = 0,
> +	V4L2_M2M_DST = 1,
> +};
> +
> +struct ipu_csc_scaler_priv {
> +	struct imx_media_video_dev	vdev;
> +
> +	struct v4l2_m2m_dev		*m2m_dev;
> +	struct device			*dev;
> +
> +	struct imx_media_dev		*md;
> +
> +	struct mutex			mutex;	/* mem2mem device mutex */
> +};
> +
> +#define vdev_to_priv(v) container_of(v, struct ipu_csc_scaler_priv, vdev)
> +
> +/* Per-queue, driver-specific private data */
> +struct ipu_csc_scaler_q_data {
> +	struct v4l2_pix_format		cur_fmt;
> +	struct v4l2_rect		rect;
> +};
> +
> +struct ipu_csc_scaler_ctx {
> +	struct ipu_csc_scaler_priv	*priv;
> +
> +	struct v4l2_fh			fh;
> +	struct ipu_csc_scaler_q_data	q_data[2];
> +	struct ipu_image_convert_ctx	*icc;
> +
> +	struct v4l2_ctrl_handler	ctrl_hdlr;
> +	int				rotate;
> +	bool				hflip;
> +	bool				vflip;
> +	enum ipu_rotate_mode		rot_mode;
> +	unsigned int			sequence;
> +};
> +
> +static struct ipu_csc_scaler_q_data *get_q_data(struct ipu_csc_scaler_ctx *ctx,
> +						enum v4l2_buf_type type)
> +{
> +	if (V4L2_TYPE_IS_OUTPUT(type))
> +		return &ctx->q_data[V4L2_M2M_SRC];
> +	else
> +		return &ctx->q_data[V4L2_M2M_DST];
> +}
> +
> +/*
> + * mem2mem callbacks
> + */
> +
> +static void job_abort(void *_ctx)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = _ctx;
> +
> +	if (ctx->icc)
> +		ipu_image_convert_abort(ctx->icc);
> +}
> +
> +static void ipu_ic_pp_complete(struct ipu_image_convert_run *run, void *_ctx)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = _ctx;
> +	struct ipu_csc_scaler_priv *priv = ctx->priv;
> +	struct vb2_v4l2_buffer *src_buf, *dst_buf;
> +
> +	src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> +	dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> +
> +	v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, true);
> +
> +	src_buf->sequence = dst_buf->sequence = ctx->sequence++;
> +
> +	v4l2_m2m_buf_done(src_buf, run->status ? VB2_BUF_STATE_ERROR :
> +						 VB2_BUF_STATE_DONE);
> +	v4l2_m2m_buf_done(dst_buf, run->status ? VB2_BUF_STATE_ERROR :
> +						 VB2_BUF_STATE_DONE);
> +
> +	v4l2_m2m_job_finish(priv->m2m_dev, ctx->fh.m2m_ctx);
> +	kfree(run);
> +}
> +
> +static void device_run(void *_ctx)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = _ctx;
> +	struct ipu_csc_scaler_priv *priv = ctx->priv;
> +	struct vb2_v4l2_buffer *src_buf, *dst_buf;
> +	struct ipu_image_convert_run *run;
> +	int ret;
> +
> +	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> +	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
> +
> +	run = kzalloc(sizeof(*run), GFP_KERNEL);
> +	if (!run)
> +		goto err;
> +
> +	run->ctx = ctx->icc;
> +	run->in_phys = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
> +	run->out_phys = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> +
> +	ret = ipu_image_convert_queue(run);
> +	if (ret < 0) {
> +		v4l2_err(ctx->priv->vdev.vfd->v4l2_dev,
> +			 "%s: failed to queue: %d\n", __func__, ret);
> +		goto err;
> +	}
> +
> +	return;
> +
> +err:
> +	v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> +	v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> +	v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
> +	v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR);
> +	v4l2_m2m_job_finish(priv->m2m_dev, ctx->fh.m2m_ctx);
> +}
> +
> +/*
> + * Video ioctls
> + */
> +static int ipu_csc_scaler_querycap(struct file *file, void *priv,
> +				   struct v4l2_capability *cap)
> +{
> +	strscpy(cap->driver, "imx-media-csc-scaler", sizeof(cap->driver));
> +	strscpy(cap->card, "imx-media-csc-scaler", sizeof(cap->card));
> +	strscpy(cap->bus_info, "platform:imx-media-csc-scaler",
> +		sizeof(cap->bus_info));
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_enum_fmt(struct file *file, void *fh,
> +				   struct v4l2_fmtdesc *f)
> +{
> +	u32 fourcc;
> +	int ret;
> +
> +	ret = imx_media_enum_format(&fourcc, f->index, CS_SEL_ANY);
> +	if (ret)
> +		return ret;
> +
> +	f->pixelformat = fourcc;
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_g_fmt(struct file *file, void *priv,
> +				struct v4l2_format *f)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
> +	struct ipu_csc_scaler_q_data *q_data;
> +
> +	q_data = get_q_data(ctx, f->type);
> +
> +	f->fmt.pix = q_data->cur_fmt;
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_try_fmt(struct file *file, void *priv,
> +				  struct v4l2_format *f)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
> +	struct ipu_csc_scaler_q_data *q_data = get_q_data(ctx, f->type);
> +	struct ipu_image test_in, test_out;
> +	enum v4l2_field field;
> +
> +	field = f->fmt.pix.field;
> +	if (field == V4L2_FIELD_ANY)
> +		field = V4L2_FIELD_NONE;
> +	else if (field != V4L2_FIELD_NONE)
> +		return -EINVAL;
> +
> +	if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
> +		struct ipu_csc_scaler_q_data *q_data_in =
> +			get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +
> +		test_out.pix = f->fmt.pix;
> +		test_in.pix = q_data_in->cur_fmt;
> +	} else {
> +		struct ipu_csc_scaler_q_data *q_data_out =
> +			get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +
> +		test_in.pix = f->fmt.pix;
> +		test_out.pix = q_data_out->cur_fmt;
> +	}
> +
> +	ipu_image_convert_adjust(&test_in, &test_out, ctx->rot_mode);
> +
> +	f->fmt.pix = (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) ?
> +		test_out.pix : test_in.pix;
> +
> +	if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
> +		f->fmt.pix.colorspace = q_data->cur_fmt.colorspace;
> +		f->fmt.pix.ycbcr_enc = q_data->cur_fmt.ycbcr_enc;
> +		f->fmt.pix.xfer_func = q_data->cur_fmt.xfer_func;
> +		f->fmt.pix.quantization = q_data->cur_fmt.quantization;
> +	} else if (f->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) {
> +		f->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB;
> +		f->fmt.pix.ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> +		f->fmt.pix.xfer_func = V4L2_XFER_FUNC_DEFAULT;
> +		f->fmt.pix.quantization = V4L2_QUANTIZATION_DEFAULT;
> +	}
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_s_fmt(struct file *file, void *priv,
> +				struct v4l2_format *f)
> +{
> +	struct ipu_csc_scaler_q_data *q_data;
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
> +	struct vb2_queue *vq;
> +	int ret;
> +
> +	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
> +	if (vb2_is_busy(vq)) {
> +		v4l2_err(ctx->priv->vdev.vfd->v4l2_dev, "%s: queue busy\n",
> +			 __func__);
> +		return -EBUSY;
> +	}
> +
> +	q_data = get_q_data(ctx, f->type);
> +
> +	ret = ipu_csc_scaler_try_fmt(file, priv, f);
> +	if (ret < 0)
> +		return ret;
> +
> +	q_data->cur_fmt.width = f->fmt.pix.width;
> +	q_data->cur_fmt.height = f->fmt.pix.height;
> +	q_data->cur_fmt.pixelformat = f->fmt.pix.pixelformat;
> +	q_data->cur_fmt.field = f->fmt.pix.field;
> +	q_data->cur_fmt.bytesperline = f->fmt.pix.bytesperline;
> +	q_data->cur_fmt.sizeimage = f->fmt.pix.sizeimage;
> +
> +	/* Reset cropping/composing rectangle */
> +	q_data->rect.left = 0;
> +	q_data->rect.top = 0;
> +	q_data->rect.width = q_data->cur_fmt.width;
> +	q_data->rect.height = q_data->cur_fmt.height;
> +
> +	if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
> +		/* Set colorimetry on the output queue */
> +		q_data->cur_fmt.colorspace = f->fmt.pix.colorspace;
> +		q_data->cur_fmt.ycbcr_enc = f->fmt.pix.ycbcr_enc;
> +		q_data->cur_fmt.xfer_func = f->fmt.pix.xfer_func;
> +		q_data->cur_fmt.quantization = f->fmt.pix.quantization;
> +		/* Propagate colorimetry to the capture queue */
> +		q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +		q_data->cur_fmt.colorspace = f->fmt.pix.colorspace;
> +		q_data->cur_fmt.ycbcr_enc = f->fmt.pix.ycbcr_enc;
> +		q_data->cur_fmt.xfer_func = f->fmt.pix.xfer_func;
> +		q_data->cur_fmt.quantization = f->fmt.pix.quantization;
> +	}
> +
> +	/*
> +	 * TODO: Setting colorimetry on the capture queue is currently not
> +	 * supported by the V4L2 API
> +	 */
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_g_selection(struct file *file, void *priv,
> +				      struct v4l2_selection *s)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
> +	struct ipu_csc_scaler_q_data *q_data;
> +
> +	switch (s->target) {
> +	case V4L2_SEL_TGT_CROP:
> +	case V4L2_SEL_TGT_CROP_DEFAULT:
> +	case V4L2_SEL_TGT_CROP_BOUNDS:
> +		if (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
> +			return -EINVAL;
> +		q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +		break;
> +	case V4L2_SEL_TGT_COMPOSE:
> +	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
> +	case V4L2_SEL_TGT_COMPOSE_BOUNDS:
> +		if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
> +			return -EINVAL;
> +		q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	if (s->target == V4L2_SEL_TGT_CROP ||
> +	    s->target == V4L2_SEL_TGT_COMPOSE) {
> +		s->r = q_data->rect;
> +	} else {
> +		s->r.left = 0;
> +		s->r.top = 0;
> +		s->r.width = q_data->cur_fmt.width;
> +		s->r.height = q_data->cur_fmt.height;
> +	}
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_s_selection(struct file *file, void *priv,
> +				      struct v4l2_selection *s)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
> +	struct ipu_csc_scaler_q_data *q_data;
> +
> +	switch (s->target) {
> +	case V4L2_SEL_TGT_CROP:Becky Sharp - 1935
> +		if (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
> +			return -EINVAL;
> +		break;
> +	case V4L2_SEL_TGT_COMPOSE:
> +		if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
> +			return -EINVAL;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE &&
> +	    s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
> +		return -EINVAL;
> +
> +	q_data = get_q_data(ctx, s->type);
> +
> +	/* The input's frame width to the IC must be a multiple of 8 pixels
> +	 * When performing resizing the frame width must be multiple of burst
> +	 * size - 8 or 16 pixels as defined by CB#_BURST_16 parameter.
> +	 */
> +	if (s->flags & V4L2_SEL_FLAG_GE)
> +		s->r.width = round_up(s->r.width, 8);
> +	if (s->flags & V4L2_SEL_FLAG_LE)
> +		s->r.width = round_down(s->r.width, 8);
> +	s->r.width = clamp_t(unsigned int, s->r.width, 8,
> +			     round_down(q_data->cur_fmt.width, 8));
> +	s->r.height = clamp_t(unsigned int, s->r.height, 1,
> +			      q_data->cur_fmt.height);
> +	s->r.left = clamp_t(unsigned int, s->r.left, 0,
> +			    q_data->cur_fmt.width - s->r.width);
> +	s->r.top = clamp_t(unsigned int, s->r.top, 0,
> +			   q_data->cur_fmt.height - s->r.height);
> +
> +	/* V4L2_SEL_FLAG_KEEP_CONFIG is only valid for subdevices */
> +	q_data->rect = s->r;
> +
> +	return 0;
> +}
> +
> +static const struct v4l2_ioctl_ops ipu_csc_scaler_ioctl_ops = {
> +	.vidioc_querycap		= ipu_csc_scaler_querycap,
> +
> +	.vidioc_enum_fmt_vid_cap	= ipu_csc_scaler_enum_fmt,
> +	.vidioc_g_fmt_vid_cap		= ipu_csc_scaler_g_fmt,
> +	.vidioc_try_fmt_vid_cap		= ipu_csc_scaler_try_fmt,
> +	.vidioc_s_fmt_vid_cap		= ipu_csc_scaler_s_fmt,
> +
> +	.vidioc_enum_fmt_vid_out	= ipu_csc_scaler_enum_fmt,
> +	.vidioc_g_fmt_vid_out		= ipu_csc_scaler_g_fmt,
> +	.vidioc_try_fmt_vid_out		= ipu_csc_scaler_try_fmt,
> +	.vidioc_s_fmt_vid_out		= ipu_csc_scaler_s_fmt,
> +
> +	.vidioc_g_selection		= ipu_csc_scaler_g_selection,
> +	.vidioc_s_selection		= ipu_csc_scaler_s_selection,
> +
> +	.vidioc_reqbufs			= v4l2_m2m_ioctl_reqbufs,
> +	.vidioc_querybuf		= v4l2_m2m_ioctl_querybuf,
> +
> +	.vidioc_qbuf			= v4l2_m2m_ioctl_qbuf,
> +	.vidioc_expbuf			= v4l2_m2m_ioctl_expbuf,
> +	.vidioc_dqbuf			= v4l2_m2m_ioctl_dqbuf,
> +	.vidioc_create_bufs		= v4l2_m2m_ioctl_create_bufs,
> +	.vidioc_prepare_buf		= v4l2_m2m_ioctl_prepare_buf,
> +
> +	.vidioc_streamon		= v4l2_m2m_ioctl_streamon,
> +	.vidioc_streamoff		= v4l2_m2m_ioctl_streamoff,
> +
> +	.vidioc_subscribe_event		= v4l2_ctrl_subscribe_event,
> +	.vidioc_unsubscribe_event	= v4l2_event_unsubscribe,
> +};
> +
> +/*
> + * Queue operations
> + */
> +
> +static int ipu_csc_scaler_queue_setup(struct vb2_queue *vq,
> +				      unsigned int *nbuffers,
> +				      unsigned int *nplanes,
> +				      unsigned int sizes[],
> +				      struct device *alloc_devs[])
> +{
> +	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct ipu_csc_scaler_q_data *q_data;
> +	unsigned int size, count = *nbuffers;
> +
> +	q_data = get_q_data(ctx, vq->type);
> +
> +	size = q_data->cur_fmt.sizeimage;
> +
> +	*nbuffers = count;
> +
> +	if (*nplanes)
> +		return sizes[0] < size ? -EINVAL : 0;
> +
> +	*nplanes = 1;
> +	sizes[0] = size;
> +
> +	dev_dbg(ctx->priv->dev, "get %d buffer(s) of size %d each.\n",
> +		count, size);
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_buf_prepare(struct vb2_buffer *vb)
> +{
> +	struct vb2_queue *vq = vb->vb2_queue;
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct ipu_csc_scaler_q_data *q_data;
> +	unsigned long size;
> +
> +	dev_dbg(ctx->priv->dev, "type: %d\n", vq->type);
> +
> +	if (V4L2_TYPE_IS_OUTPUT(vq->type)) {
> +		if (vbuf->field == V4L2_FIELD_ANY)
> +			vbuf->field = V4L2_FIELD_NONE;
> +		if (vbuf->field != V4L2_FIELD_NONE) {
> +			dev_dbg(ctx->priv->dev, "%s: field isn't supported\n",
> +				__func__);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	q_data = get_q_data(ctx, vq->type);
> +	size = q_data->cur_fmt.sizeimage;
> +
> +	if (vb2_plane_size(vb, 0) < size) {
> +		dev_dbg(ctx->priv->dev,
> +			"%s: data will not fit into plane (%lu < %lu)\n",
> +			__func__, vb2_plane_size(vb, 0), size);
> +		return -EINVAL;
> +	}
> +
> +	vb2_set_plane_payload(vb, 0, size);
> +
> +	return 0;
> +}
> +
> +static void ipu_csc_scaler_buf_queue(struct vb2_buffer *vb)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> +
> +	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, to_vb2_v4l2_buffer(vb));
> +}
> +
> +static void ipu_image_from_q_data(struct ipu_image *im,
> +				  struct ipu_csc_scaler_q_data *q_data)
> +{
> +	im->pix.width = q_data->cur_fmt.width;
> +	im->pix.height = q_data->cur_fmt.height;
> +	im->pix.bytesperline = q_data->cur_fmt.bytesperline;
> +	im->pix.pixelformat = q_data->cur_fmt.pixelformat;
> +	im->rect = q_data->rect;
> +}
> +
> +static int ipu_csc_scaler_start_streaming(struct vb2_queue *q,
> +					  unsigned int count)
> +{
> +	const enum ipu_ic_task ic_task = IC_TASK_POST_PROCESSOR;
> +	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(q);
> +	struct ipu_csc_scaler_priv *priv = ctx->priv;
> +	struct ipu_soc *ipu = priv->md->ipu[0];
> +	struct ipu_csc_scaler_q_data *q_data;
> +	struct vb2_queue *other_q;
> +	struct ipu_image in, out;
> +
> +	other_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
> +				  (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) ?
> +				  V4L2_BUF_TYPE_VIDEO_OUTPUT :
> +				  V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +	if (!vb2_is_streaming(other_q))
> +		return 0;
> +
> +	if (ctx->icc) {
> +		v4l2_warn(ctx->priv->vdev.vfd->v4l2_dev, "removing old ICC\n");
> +		ipu_image_convert_unprepare(ctx->icc);
> +	}
> +
> +	q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	ipu_image_from_q_data(&in, q_data);
> +
> +	q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +	ipu_image_from_q_data(&out, q_data);
> +
> +	ctx->icc = ipu_image_convert_prepare(ipu, ic_task, &in, &out,
> +					     ctx->rot_mode,
> +					     ipu_ic_pp_complete, ctx);
> +	if (IS_ERR(ctx->icc)) {
> +		struct vb2_v4l2_buffer *buf;
> +		int ret = PTR_ERR(ctx->icc);
> +
> +		ctx->icc = NULL;
> +		v4l2_err(ctx->priv->vdev.vfd->v4l2_dev, "%s: error %d\n",
> +			 __func__, ret);
> +		while ((buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx)))
> +			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_QUEUED);
> +		while ((buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx)))
> +			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_QUEUED);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static void ipu_csc_scaler_stop_streaming(struct vb2_queue *q)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(q);
> +	struct vb2_v4l2_buffer *buf;
> +
> +	if (ctx->icc) {
> +		ipu_image_convert_unprepare(ctx->icc);
> +		ctx->icc = NULL;
> +	}
> +
> +	ctx->sequence = 0;
> +
> +	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
> +		while ((buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx)))
> +			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
> +	} else {
> +		while ((buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx)))
> +			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
> +	}
> +}
> +
> +static const struct vb2_ops ipu_csc_scaler_qops = {
> +	.queue_setup		= ipu_csc_scaler_queue_setup,
> +	.buf_prepare		= ipu_csc_scaler_buf_prepare,
> +	.buf_queue		= ipu_csc_scaler_buf_queue,
> +	.wait_prepare		= vb2_ops_wait_prepare,
> +	.wait_finish		= vb2_ops_wait_finish,
> +	.start_streaming	= ipu_csc_scaler_start_streaming,
> +	.stop_streaming		= ipu_csc_scaler_stop_streaming,
> +};
> +
> +static int ipu_csc_scaler_queue_init(void *priv, struct vb2_queue *src_vq,
> +				     struct vb2_queue *dst_vq)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = priv;
> +	int ret;
> +
> +	memset(src_vq, 0, sizeof(*src_vq));
> +	src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
> +	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> +	src_vq->drv_priv = ctx;
> +	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> +	src_vq->ops = &ipu_csc_scaler_qops;
> +	src_vq->mem_ops = &vb2_dma_contig_memops;
> +	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> +	src_vq->lock = &ctx->priv->mutex;
> +	src_vq->dev = ctx->priv->dev;
> +
> +	ret = vb2_queue_init(src_vq);
> +	if (ret)
> +		return ret;
> +
> +	memset(dst_vq, 0, sizeof(*dst_vq));
> +	dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
> +	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> +	dst_vq->drv_priv = ctx;
> +	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> +	dst_vq->ops = &ipu_csc_scaler_qops;
> +	dst_vq->mem_ops = &vb2_dma_contig_memops;
> +	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> +	dst_vq->lock = &ctx->priv->mutex;
> +	dst_vq->dev = ctx->priv->dev;
> +
> +	return vb2_queue_init(dst_vq);
> +}
> +
> +static int ipu_csc_scaler_s_ctrl(struct v4l2_ctrl *ctrl)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = container_of(ctrl->handler,
> +						      struct ipu_csc_scaler_ctx,
> +						      ctrl_hdlr);
> +	enum ipu_rotate_mode rot_mode;
> +	int rotate;
> +	bool hflip, vflip;
> +	int ret = 0;
> +
> +	rotate = ctx->rotate;
> +	hflip = ctx->hflip;
> +	vflip = ctx->vflip;
> +
> +	switch (ctrl->id) {
> +	case V4L2_CID_HFLIP:
> +		hflip = ctrl->val;
> +		break;
> +	case V4L2_CID_VFLIP:
> +		vflip = ctrl->val;
> +		break;
> +	case V4L2_CID_ROTATE:
> +		rotate = ctrl->val;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	ret = ipu_degrees_to_rot_mode(&rot_mode, rotate, hflip, vflip);
> +	if (ret)
> +		return ret;
> +
> +	if (rot_mode != ctx->rot_mode) {
> +		struct v4l2_pix_format *in_fmt, *out_fmt;
> +		struct ipu_image test_in, test_out;
> +
> +		in_fmt = &ctx->q_data[V4L2_M2M_SRC].cur_fmt;
> +		out_fmt = &ctx->q_data[V4L2_M2M_DST].cur_fmt;
> +
> +		test_in.pix = *in_fmt;
> +		test_out.pix = *out_fmt;
> +
> +		if (ipu_rot_mode_is_irt(rot_mode) !=
> +		    ipu_rot_mode_is_irt(ctx->rot_mode)) {
> +			/* Switch width and height to keep aspect ratio intact */
> +			test_out.pix.width = out_fmt->height;
> +			test_out.pix.height = out_fmt->width;
> +		}
> +
> +		ipu_image_convert_adjust(&test_in, &test_out, ctx->rot_mode);
> +
> +		/* Check if output format needs to be changed */
> +		if (test_in.pix.width != in_fmt->width ||
> +		    test_in.pix.height != in_fmt->height ||
> +		    test_in.pix.bytesperline != in_fmt->bytesperline ||
> +		    test_in.pix.sizeimage != in_fmt->sizeimage) {
> +			struct vb2_queue *out_q;
> +
> +			out_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
> +						V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +			if (vb2_is_busy(out_q))
> +				return -EBUSY;
> +		}
> +
> +		/* Check if capture format needs to be changed */
> +		if (test_out.pix.width != out_fmt->width ||
> +		    test_out.pix.height != out_fmt->height ||
> +		    test_out.pix.bytesperline != out_fmt->bytesperline ||
> +		    test_out.pix.sizeimage != out_fmt->sizeimage) {
> +			struct vb2_queue *cap_q;
> +
> +			cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
> +						V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +			if (vb2_is_busy(cap_q))
> +				return -EBUSY;
> +		}
> +
> +		*in_fmt = test_in.pix;
> +		*out_fmt = test_out.pix;
> +
> +		ctx->rot_mode = rot_mode;
> +		ctx->rotate = rotate;
> +		ctx->hflip = hflip;
> +		ctx->vflip = vflip;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct v4l2_ctrl_ops ipu_csc_scaler_ctrl_ops = {
> +	.s_ctrl = ipu_csc_scaler_s_ctrl,
> +};
> +
> +static int ipu_csc_scaler_init_controls(struct ipu_csc_scaler_ctx *ctx)
> +{
> +	struct v4l2_ctrl_handler *hdlr = &ctx->ctrl_hdlr;
> +
> +	v4l2_ctrl_handler_init(hdlr, 3);
> +
> +	v4l2_ctrl_new_std(hdlr, &ipu_csc_scaler_ctrl_ops, V4L2_CID_HFLIP,
> +			  0, 1, 1, 0);
> +	v4l2_ctrl_new_std(hdlr, &ipu_csc_scaler_ctrl_ops, V4L2_CID_VFLIP,
> +			  0, 1, 1, 0);
> +	v4l2_ctrl_new_std(hdlr, &ipu_csc_scaler_ctrl_ops, V4L2_CID_ROTATE,
> +			  0, 270, 90, 0);
> +
> +	if (hdlr->error) {
> +		v4l2_ctrl_handler_free(hdlr);
> +		return hdlr->error;
> +	}
> +
> +	v4l2_ctrl_handler_setup(hdlr);
> +	return 0;
> +}
> +
> +#define DEFAULT_WIDTH	720
> +#define DEFAULT_HEIGHT	576
> +static const struct ipu_csc_scaler_q_data ipu_csc_scaler_q_data_default = {
> +	.cur_fmt = {
> +		.width = DEFAULT_WIDTH,
> +		.height = DEFAULT_HEIGHT,
> +		.pixelformat = V4L2_PIX_FMT_YUV420,
> +		.field = V4L2_FIELD_NONE,
> +		.bytesperline = DEFAULT_WIDTH,
> +		.sizeimage = DEFAULT_WIDTH * DEFAULT_HEIGHT * 3 / 2,
> +		.colorspace = V4L2_COLORSPACE_SRGB,
> +	},
> +	.rect = {
> +		.width = DEFAULT_WIDTH,
> +		.height = DEFAULT_HEIGHT,
> +	},
> +};
> +
> +/*
> + * File operations
> + */
> +static int ipu_csc_scaler_open(struct file *file)
> +{
> +	struct ipu_csc_scaler_priv *priv = video_drvdata(file);
> +	struct ipu_csc_scaler_ctx *ctx = NULL;
> +	int ret;
> +
> +	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ctx->rot_mode = IPU_ROTATE_NONE;
> +
> +	v4l2_fh_init(&ctx->fh, video_devdata(file));
> +	file->private_data = &ctx->fh;
> +	v4l2_fh_add(&ctx->fh);
> +	ctx->priv = priv;
> +
> +	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(priv->m2m_dev, ctx,
> +					    &ipu_csc_scaler_queue_init);
> +	if (IS_ERR(ctx->fh.m2m_ctx)) {
> +		ret = PTR_ERR(ctx->fh.m2m_ctx);
> +		goto err_ctx;
> +	}
> +
> +	ret = ipu_csc_scaler_init_controls(ctx);
> +	if (ret)
> +		goto err_ctrls;
> +
> +	ctx->fh.ctrl_handler = &ctx->ctrl_hdlr;
> +
> +	ctx->q_data[V4L2_M2M_SRC] = ipu_csc_scaler_q_data_default;
> +	ctx->q_data[V4L2_M2M_DST] = ipu_csc_scaler_q_data_default;
> +
> +	dev_dbg(priv->dev, "Created instance %p, m2m_ctx: %p\n", ctx,
> +		ctx->fh.m2m_ctx);
> +
> +	return 0;
> +
> +err_ctrls:
> +	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
> +err_ctx:
> +	v4l2_fh_del(&ctx->fh);
> +	v4l2_fh_exit(&ctx->fh);
> +	kfree(ctx);
> +	return ret;
> +}
> +
> +static int ipu_csc_scaler_release(struct file *file)
> +{
> +	struct ipu_csc_scaler_priv *priv = video_drvdata(file);
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(file->private_data);
> +
> +	dev_dbg(priv->dev, "Releasing instance %p\n", ctx);
> +
> +	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
> +	v4l2_fh_del(&ctx->fh);
> +	v4l2_fh_exit(&ctx->fh);
> +	kfree(ctx);
> +
> +	return 0;
> +}
> +
> +static const struct v4l2_file_operations ipu_csc_scaler_fops = {
> +	.owner		= THIS_MODULE,
> +	.open		= ipu_csc_scaler_open,
> +	.release	= ipu_csc_scaler_release,
> +	.poll		= v4l2_m2m_fop_poll,
> +	.unlocked_ioctl	= video_ioctl2,
> +	.mmap		= v4l2_m2m_fop_mmap,
> +};
> +
> +static struct v4l2_m2m_ops m2m_ops = {
> +	.device_run	= device_run,
> +	.job_abort	= job_abort,
> +};
> +
> +static const struct video_device ipu_csc_scaler_videodev_template = {
> +	.name		= "ipu_ic_pp csc/scaler",
> +	.fops		= &ipu_csc_scaler_fops,
> +	.ioctl_ops	= &ipu_csc_scaler_ioctl_ops,
> +	.minor		= -1,
> +	.release	= video_device_release,
> +	.vfl_dir	= VFL_DIR_M2M,
> +	.device_caps	= V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING,
> +};
> +
> +int imx_media_csc_scaler_device_register(struct imx_media_video_dev *vdev)
> +{
> +	struct ipu_csc_scaler_priv *priv = vdev_to_priv(vdev);
> +	struct video_device *vfd = vdev->vfd;
> +	int ret;
> +
> +	vfd->v4l2_dev = &priv->md->v4l2_dev;
> +
> +	ret = video_register_device(vfd, VFL_TYPE_GRABBER, -1);
> +	if (ret) {
> +		v4l2_err(vfd->v4l2_dev, "Failed to register video device\n");
> +		return ret;
> +	}
> +
> +	v4l2_info(vfd->v4l2_dev, "Registered %s as /dev/%s\n", vfd->name,
> +		  video_device_node_name(vfd));
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_register);
> +
> +void imx_media_csc_scaler_device_unregister(struct imx_media_video_dev *vdev)
> +{
> +	struct ipu_csc_scaler_priv *priv = vdev_to_priv(vdev);
> +	struct video_device *vfd = priv->vdev.vfd;
> +
> +	mutex_lock(&priv->mutex);
> +
> +	if (video_is_registered(vfd))

No need for the 'if', video_unregister_device already checks if the
vfd is registered.

> +		video_unregister_device(vfd);
> +
> +	mutex_unlock(&priv->mutex);
> +}
> +EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_unregister);
> +
> +struct imx_media_video_dev *
> +imx_media_csc_scaler_device_init(struct imx_media_dev *md)
> +{
> +	struct ipu_csc_scaler_priv *priv;
> +	struct video_device *vfd;
> +	int ret;
> +
> +	priv = devm_kzalloc(md->md.dev, sizeof(*priv), GFP_KERNEL);

Are you sure you can use devm_ here? It might be a good idea to test
what happens when you unbind the device while streaming.

At unbind time any devm_ memory is immediately freed, which can cause
problems if it is still used somewhere.

It might be better to release this in the video_device release function.

> +	if (!priv)
> +		return ERR_PTR(-ENOMEM);
> +
> +	priv->md = md;
> +	priv->dev = md->md.dev;
> +
> +	mutex_init(&priv->mutex);
> +
> +	vfd = video_device_alloc();
> +	if (!vfd)
> +		return ERR_PTR(-ENOMEM);
> +
> +	*vfd = ipu_csc_scaler_videodev_template;
> +	vfd->lock = &priv->mutex;
> +	priv->vdev.vfd = vfd;
> +
> +	INIT_LIST_HEAD(&priv->vdev.list);
> +
> +	video_set_drvdata(vfd, priv);
> +
> +	priv->m2m_dev = v4l2_m2m_init(&m2m_ops);
> +	if (IS_ERR(priv->m2m_dev)) {
> +		ret = PTR_ERR(priv->m2m_dev);
> +		v4l2_err(&md->v4l2_dev, "Failed to init mem2mem device: %d\n",
> +			 ret);
> +		return ERR_PTR(ret);
> +	}
> +
> +	return &priv->vdev;
> +}
> +EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_init);
> +
> +void imx_media_csc_scaler_device_remove(struct imx_media_video_dev *vdev)
> +{
> +	struct ipu_csc_scaler_priv *priv = vdev_to_priv(vdev);
> +
> +	v4l2_m2m_release(priv->m2m_dev);
> +}
> +EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_remove);
> +
> +MODULE_DESCRIPTION("i.MX IPUv3 mem2mem scaler/CSC driver");
> +MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
> +MODULE_LICENSE("GPL");
> diff --git a/drivers/staging/media/imx/imx-media-dev.c b/drivers/staging/media/imx/imx-media-dev.c
> index 0a7d1d183141..4d2078d18a48 100644
> --- a/drivers/staging/media/imx/imx-media-dev.c
> +++ b/drivers/staging/media/imx/imx-media-dev.c
> @@ -323,12 +323,36 @@ int imx_media_probe_complete(struct v4l2_async_notifier *notifier)
>  		goto unlock;
>  
>  	ret = v4l2_device_register_subdev_nodes(&imxmd->v4l2_dev);
> -unlock:
> -	mutex_unlock(&imxmd->mutex);
>  	if (ret)
> -		return ret;
> +		goto unlock;
> +
> +	imxmd->m2m_vdev = imx_media_csc_scaler_device_init(imxmd);
> +	if (IS_ERR(imxmd->m2m_vdev)) {
> +		ret = PTR_ERR(imxmd->m2m_vdev);
> +		goto unlock;
> +	}
>  
> -	return media_device_register(&imxmd->md);
> +	ret = imx_media_csc_scaler_device_register(imxmd->m2m_vdev);
> +	if (ret)
> +		goto m2m_remove;
> +
> +	mutex_unlock(&imxmd->mutex);
> +
> +	ret = media_device_register(&imxmd->md);
> +	if (ret) {
> +		mutex_lock(&imxmd->mutex);
> +		goto m2m_unreg;
> +	}
> +
> +	return 0;
> +
> +m2m_unreg:
> +	imx_media_csc_scaler_device_unregister(imxmd->m2m_vdev);
> +m2m_remove:
> +	imx_media_csc_scaler_device_remove(imxmd->m2m_vdev);
> +unlock:
> +	mutex_unlock(&imxmd->mutex);
> +	return ret;
>  }
>  
>  /*
> @@ -504,6 +528,8 @@ static int imx_media_remove(struct platform_device *pdev)
>  	v4l2_async_notifier_unregister(&imxmd->notifier);
>  	imx_media_remove_ipu_internal_subdevs(imxmd);
>  	v4l2_async_notifier_cleanup(&imxmd->notifier);
> +	imx_media_csc_scaler_device_unregister(imxmd->m2m_vdev);
> +	imx_media_csc_scaler_device_remove(imxmd->m2m_vdev);
>  	media_device_unregister(&imxmd->md);
>  	v4l2_device_unregister(&imxmd->v4l2_dev);
>  	media_device_cleanup(&imxmd->md);
> diff --git a/drivers/staging/media/imx/imx-media.h b/drivers/staging/media/imx/imx-media.h
> index dd603a6b3a70..1894553b4497 100644
> --- a/drivers/staging/media/imx/imx-media.h
> +++ b/drivers/staging/media/imx/imx-media.h
> @@ -151,6 +151,9 @@ struct imx_media_dev {
>  
>  	/* for async subdev registration */
>  	struct v4l2_async_notifier notifier;
> +
> +	/* IC scaler/CSC mem2mem video device */
> +	struct imx_media_video_dev *m2m_vdev;
>  };
>  
>  enum codespace_sel {
> @@ -281,6 +284,13 @@ void imx_media_capture_device_set_format(struct imx_media_video_dev *vdev,
>  					 const struct v4l2_rect *compose);
>  void imx_media_capture_device_error(struct imx_media_video_dev *vdev);
>  
> +/* imx-media-mem2mem.c */
> +struct imx_media_video_dev *
> +imx_media_csc_scaler_device_init(struct imx_media_dev *dev);
> +void imx_media_csc_scaler_device_remove(struct imx_media_video_dev *vdev);
> +int imx_media_csc_scaler_device_register(struct imx_media_video_dev *vdev);
> +void imx_media_csc_scaler_device_unregister(struct imx_media_video_dev *vdev);
> +
>  /* subdev group ids */
>  #define IMX_MEDIA_GRP_ID_CSI2          BIT(8)
>  #define IMX_MEDIA_GRP_ID_CSI           BIT(9)
> 

Otherwise this looks very good and I plan to merge v9 unless something unexpected
crops up.

Regards,

	Hans
Sven Van Asbroeck May 29, 2019, 3:44 p.m. UTC | #2
Thank you all (and especially Philipp) for this amazing work !

One of the main uses for the VPU scaler is to convert from video file
resolution to display resolution. E.g. the source video is 1080p, but the
display videomode is only 720p.

Unfortunately when I connect CODA/h264 decode to the VPU scaler, performance
drops to about half. But on the non-mainline Freescale kernel, even with
the rescale, I still get 30fps.

Mainline kernel + v8 imx rescaler patch:
A) 1080p30 source video -> CODA h264 decode -> drm 1080p: near full speed (28fps)
B) 1080p30 source video -> CODA h264 decode -> VPU scaler 1080p->720p ->
	drm 720p: only half speed (15fps)

Freescale non-mainline kernel:
C) 1080p30 source video -> CODA h264 decode -> VPU scaler 1080p->720p ->
	v4l2out 720p: full speed (30fps)

Question:
- is this expected behaviour ? A regression wrt. the Freescale kernel?
- perhaps I am missing something in the gstreamer pipeline ?

Gstreamer pipelines:
--------------------
A) gst-launch-1.0 filesrc location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! 
matroskademux ! h264parse ! v4l2h264dec ! kmssink can-scale=0
B) gst-launch-1.0 filesrc location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! 
matroskademux ! h264parse ! v4l2h264dec ! v4l2video8convert ! video/x-raw,width=
1280,height=720 ! kmssink can-scale=0
C) gst-launch-0.10 filesrc location=/home/default/jellyfish-10-mbps-hd-h264.
mkv ! decodebin ! mfw_v4lsink

I have tried various values for capture-io-mode/output-io-mode.

imx6q
v8 imx scaler patch backported to 4.19 mainline kernel.
GStreamer 1.14.4
Fabio Estevam May 29, 2019, 4:53 p.m. UTC | #3
Hi Sven,

On Wed, May 29, 2019 at 12:45 PM Sven Van Asbroeck <thesven73@gmail.com> wrote:
>
> Thank you all (and especially Philipp) for this amazing work !
>
> One of the main uses for the VPU scaler is to convert from video file
> resolution to display resolution. E.g. the source video is 1080p, but the
> display videomode is only 720p.
>
> Unfortunately when I connect CODA/h264 decode to the VPU scaler, performance
> drops to about half. But on the non-mainline Freescale kernel, even with
> the rescale, I still get 30fps.
>
> Mainline kernel + v8 imx rescaler patch:
> A) 1080p30 source video -> CODA h264 decode -> drm 1080p: near full speed (28fps)
> B) 1080p30 source video -> CODA h264 decode -> VPU scaler 1080p->720p ->
>         drm 720p: only half speed (15fps)

Does this patch from Philipp fix the problem?
https://git.pengutronix.de/cgit/pza/linux/commit/?h=imx-drm/fixes&id=137caa702f2308f7ef03876e164b0d0f3300712a
Sven Van Asbroeck May 29, 2019, 5:20 p.m. UTC | #4
Hi Fabio,

On Wed, May 29, 2019 at 12:53 PM Fabio Estevam <festevam@gmail.com> wrote:
>
> Does this patch from Philipp fix the problem?
> https://git.pengutronix.de/cgit/pza/linux/commit/?h=imx-drm/fixes&id=137caa702f2308f7ef03876e164b0d0f3300712a

Thank you so much for the suggestion ! It does like a really good candidate.

Unfortunately that patch doesn't apply to my 4.19, I am missing too many of
the latest patches. I will have to port our system to latest mainline first.
Sven Van Asbroeck May 29, 2019, 8:55 p.m. UTC | #5
Philipp and Fabio,

On Wed, May 29, 2019 at 12:53 PM Fabio Estevam <festevam@gmail.com> wrote:
>
> Does this patch from Philipp fix the problem?
> https://git.pengutronix.de/cgit/pza/linux/commit/?h=imx-drm/fixes&id=137caa702f2308f7ef03876e164b0d0f3300712a

I am now running 5.2-rc2 with Philipp's non-plus imx6q patch.

Performance is still much worse than the Freescale baseline.

I am not at all worried about vpu scaler performance, after all v8 is an
in-progress patch.

I am much more concerned about the CODA h264 slowdown. My 1080p30 test
video runs at half the speed compared to the Freescale kernel. The best it
can do is 28fps, which results in visible 'jerks' in the video. Note that
this is without using the scaler.

Questions:
- is the performance slowdown a known issue?
- is there anything I've missed in the gstreamer pipelines below?
- is there anything I can do to help?

A) mainline 5.2-rc2 with Philipp's latest non-plus patch:
$ time gst-launch-1.0 filesrc
location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! matroskademux !
h264parse ! v4l2h264dec ! kmssink can-scale=0 sync=0
real 0m 32.01s

B) Freescale kernel:
$ time gst-launch-0.10 filesrc
location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! decodebin !
mfw_v4lsink sync=0
Running time 0:00:14.781129554 render fps 59.941
Fabio Estevam May 29, 2019, 10:19 p.m. UTC | #6
Hi Sven,

On Wed, May 29, 2019 at 5:55 PM Sven Van Asbroeck <thesven73@gmail.com> wrote:

> I am now running 5.2-rc2 with Philipp's non-plus imx6q patch.
>
> Performance is still much worse than the Freescale baseline.
>
> I am not at all worried about vpu scaler performance, after all v8 is an
> in-progress patch.
>
> I am much more concerned about the CODA h264 slowdown. My 1080p30 test
> video runs at half the speed compared to the Freescale kernel. The best it
> can do is 28fps, which results in visible 'jerks' in the video. Note that
> this is without using the scaler.
>
> Questions:
> - is the performance slowdown a known issue?
> - is there anything I've missed in the gstreamer pipelines below?
> - is there anything I can do to help?
>
> A) mainline 5.2-rc2 with Philipp's latest non-plus patch:
> $ time gst-launch-1.0 filesrc
> location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! matroskademux !
> h264parse ! v4l2h264dec ! kmssink can-scale=0 sync=0
> real 0m 32.01s

In my tests I had to pass 'capture-io-mode=dmabuf' to get a smooth
video playback.

Please check the explanation from Philipp at:
http://gstreamer-devel.966125.n4.nabble.com/IMX-Scaler-CSC-m2m-driver-td4671175.html
Nicolas Dufresne May 29, 2019, 11:48 p.m. UTC | #7
Le mercredi 29 mai 2019 à 19:19 -0300, Fabio Estevam a écrit :
> Hi Sven,
> 
> On Wed, May 29, 2019 at 5:55 PM Sven Van Asbroeck <thesven73@gmail.com> wrote:
> 
> > I am now running 5.2-rc2 with Philipp's non-plus imx6q patch.
> > 
> > Performance is still much worse than the Freescale baseline.
> > 
> > I am not at all worried about vpu scaler performance, after all v8 is an
> > in-progress patch.
> > 
> > I am much more concerned about the CODA h264 slowdown. My 1080p30 test
> > video runs at half the speed compared to the Freescale kernel. The best it
> > can do is 28fps, which results in visible 'jerks' in the video. Note that
> > this is without using the scaler.
> > 
> > Questions:
> > - is the performance slowdown a known issue?
> > - is there anything I've missed in the gstreamer pipelines below?
> > - is there anything I can do to help?
> > 
> > A) mainline 5.2-rc2 with Philipp's latest non-plus patch:
> > $ time gst-launch-1.0 filesrc
> > location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! matroskademux !
> > h264parse ! v4l2h264dec ! kmssink can-scale=0 sync=0
> > real 0m 32.01s
> 
> In my tests I had to pass 'capture-io-mode=dmabuf' to get a smooth
> video playback.

This is the default since 1.14.0.

> 
> Please check the explanation from Philipp at:
> http://gstreamer-devel.966125.n4.nabble.com/IMX-Scaler-CSC-m2m-driver-td4671175.html
Nicolas Dufresne May 29, 2019, 11:52 p.m. UTC | #8
Le mercredi 29 mai 2019 à 16:55 -0400, Sven Van Asbroeck a écrit :
> Philipp and Fabio,
> 
> On Wed, May 29, 2019 at 12:53 PM Fabio Estevam <festevam@gmail.com> wrote:
> > Does this patch from Philipp fix the problem?
> > https://git.pengutronix.de/cgit/pza/linux/commit/?h=imx-drm/fixes&id=137caa702f2308f7ef03876e164b0d0f3300712a
> 
> I am now running 5.2-rc2 with Philipp's non-plus imx6q patch.
> 
> Performance is still much worse than the Freescale baseline.
> 
> I am not at all worried about vpu scaler performance, after all v8 is an
> in-progress patch.
> 
> I am much more concerned about the CODA h264 slowdown. My 1080p30 test
> video runs at half the speed compared to the Freescale kernel. The best it
> can do is 28fps, which results in visible 'jerks' in the video. Note that
> this is without using the scaler.
> 
> Questions:
> - is the performance slowdown a known issue?
> - is there anything I've missed in the gstreamer pipelines below?
> - is there anything I can do to help?
> 
> A) mainline 5.2-rc2 with Philipp's latest non-plus patch:
> $ time gst-launch-1.0 filesrc
> location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! matroskademux !
> h264parse ! v4l2h264dec ! kmssink can-scale=0 sync=0
> real 0m 32.01s

kmssink element still have some issues as it renders using the legacy
KMS API and makes an synchronous vblank wait before returning. In many
cases, the proper workaround is to do:

  ... ! v4l2h264dec ! queue ! kmssink can-scale=0 sync=0

In order to measure the decoding performance without having the display
being involved you can also do:

  ... ! v4l2h264dec ! fpsdisplaysink text-overlay=0 video-sink=fakevideosink sync=0 -v

In order to benefit from the best of this driver, you should also use
the latest GStreamer 1.16.0. It contains latest fixes from Philipp and
I, including some performance improvement.

> 
> B) Freescale kernel:
> $ time gst-launch-0.10 filesrc
> location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! decodebin !
> mfw_v4lsink sync=0
> Running time 0:00:14.781129554 render fps 59.941
Nicolas Dufresne May 30, 2019, 9:09 p.m. UTC | #9
Le mercredi 29 mai 2019 à 16:55 -0400, Sven Van Asbroeck a écrit :
> Philipp and Fabio,
> 
> On Wed, May 29, 2019 at 12:53 PM Fabio Estevam <festevam@gmail.com> wrote:
> > Does this patch from Philipp fix the problem?
> > https://git.pengutronix.de/cgit/pza/linux/commit/?h=imx-drm/fixes&id=137caa702f2308f7ef03876e164b0d0f3300712a
> 
> I am now running 5.2-rc2 with Philipp's non-plus imx6q patch.
> 
> Performance is still much worse than the Freescale baseline.
> 
> I am not at all worried about vpu scaler performance, after all v8 is an
> in-progress patch.
> 
> I am much more concerned about the CODA h264 slowdown. My 1080p30 test
> video runs at half the speed compared to the Freescale kernel. The best it
> can do is 28fps, which results in visible 'jerks' in the video. Note that
> this is without using the scaler.
> 
> Questions:
> - is the performance slowdown a known issue?
> - is there anything I've missed in the gstreamer pipelines below?
> - is there anything I can do to help?
> 
> A) mainline 5.2-rc2 with Philipp's latest non-plus patch:
> $ time gst-launch-1.0 filesrc
> location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! matroskademux !
> h264parse ! v4l2h264dec ! kmssink can-scale=0 sync=0
> real 0m 32.01s

Just for your information, I tested with this pipeline, with kernel
"5.2.0-rc2-00024-gbec7550cca10":

time gst-launch-1.0 filesrc location=~/Videos/jellyfish-10-mbps-hd-
h264.mkv ! matroskademux ! h264parse ! v4l2h264dec ! fakevideosink
sync=0 
Définition du pipeline à PAUSED...
Le pipeline est en phase de PREROLL…
Le pipeline a terminé la phase PREROLL…
Passage du pipeline à la phase PLAYING…
New clock: GstSystemClock
EOS reçu de l’élément « pipeline0 ».
Execution ended after 0:00:12.860395248
Définition du pipeline à PAUSED...
Définition du pipeline à READY (prêt)…
Définition du pipeline à NULL…
Libération du pipeline…

real	0m13,411s
user	0m1,398s
sys	0m1,006s

That's about 70fps.

> 
> B) Freescale kernel:
> $ time gst-launch-0.10 filesrc
> location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! decodebin !
> mfw_v4lsink sync=0
> Running time 0:00:14.781129554 render fps 59.941
Sven Van Asbroeck May 30, 2019, 9:34 p.m. UTC | #10
Nicholas and Fabio, thank you so much for your help !

On Thu, May 30, 2019 at 5:09 PM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
>
> Just for your information, I tested with this pipeline, with kernel
> "5.2.0-rc2-00024-gbec7550cca10":
>
> Execution ended after 0:00:12.860395248

I noticed that my mainline kernel was using imx-sdma firmware from ROM,
but the Freescale kernel was downloading version 1.1 from the filesystem.

So after d/l-ing the latest imx-sdma firmware and adding that to my
4.19 mainline kernel, my performance is now identical to yours !
Fantastic !

# dmesg | grep imx-sdma
[    1.202715] imx-sdma 20ec000.sdma: loaded firmware 3.3
# time gst-launch-1.0 filesrc
location=/home/default/jellyfish-10-mbps-hd-h264.mkv !
matroskademux ! h264parse ! v4l2h264dec ! fakevideosink sync=0
Execution ended after 0:00:12.851651008

Unfortunately I cannot load any imx-sdma firmware on the latest mainline
kernel. Right after the firmware is loaded, reads seem to get corrupted
and the whole kernel crashes / hangs.

I am currently bisecting to find the offending commit:
v.4.20 good
v5.0 bad
Fabio Estevam May 30, 2019, 10:18 p.m. UTC | #11
Hi Sven,

[Addin Robin]

On Thu, May 30, 2019 at 6:34 PM Sven Van Asbroeck <thesven73@gmail.com> wrote:

> Unfortunately I cannot load any imx-sdma firmware on the latest mainline
> kernel. Right after the firmware is loaded, reads seem to get corrupted
> and the whole kernel crashes / hangs.
>
> I am currently bisecting to find the offending commit:
> v.4.20 good
> v5.0 bad

I am not sure I understood the sdma firmware issue correctly.

Please start a new thread in linux-arm-kernel on this topic and also
copy Robin and the folks from
./scripts/get_maintainer.pl -f drivers/dma/imx-sdma.c

Thanks
Sven Van Asbroeck May 31, 2019, 12:26 a.m. UTC | #12
On Thu, May 30, 2019 at 6:18 PM Fabio Estevam <festevam@gmail.com> wrote:
>
> Please start a new thread in linux-arm-kernel on this topic and also
> copy Robin and the folks from
> ./scripts/get_maintainer.pl -f drivers/dma/imx-sdma.c

Thank you, I will post a bug report once I locate the commit which
introduces the issue. 4 more bisects to go.
Robin Gong May 31, 2019, 12:26 a.m. UTC | #13
Hi Sven,
	What's soc chip and board you used? Could you post log?

> -----Original Message-----
> From: Fabio Estevam <festevam@gmail.com>
> Sent: 2019年5月31日 6:18
> To: Sven Van Asbroeck <thesven73@gmail.com>; Robin Gong
> <yibin.gong@nxp.com>
> Cc: Nicolas Dufresne <nicolas@ndufresne.ca>; Philipp Zabel
> <p.zabel@pengutronix.de>; linux-media <linux-media@vger.kernel.org>; Hans
> Verkuil <hans.verkuil@cisco.com>; tharvey@gateworks.com
> Subject: Re: [v8] media: imx: add mem2mem device
> 
> Hi Sven,
> 
> [Addin Robin]
> 
> On Thu, May 30, 2019 at 6:34 PM Sven Van Asbroeck <thesven73@gmail.com>
> wrote:
> 
> > Unfortunately I cannot load any imx-sdma firmware on the latest
> > mainline kernel. Right after the firmware is loaded, reads seem to get
> > corrupted and the whole kernel crashes / hangs.
> >
> > I am currently bisecting to find the offending commit:
> > v.4.20 good
> > v5.0 bad
> 
> I am not sure I understood the sdma firmware issue correctly.
> 
> Please start a new thread in linux-arm-kernel on this topic and also copy Robin
> and the folks from ./scripts/get_maintainer.pl -f drivers/dma/imx-sdma.c
> 
> Thanks
Nicolas Dufresne May 31, 2019, 3:34 p.m. UTC | #14
Le mercredi 29 mai 2019 à 19:52 -0400, Nicolas Dufresne a écrit :
> Le mercredi 29 mai 2019 à 16:55 -0400, Sven Van Asbroeck a écrit :
> > Philipp and Fabio,
> > 
> > On Wed, May 29, 2019 at 12:53 PM Fabio Estevam <festevam@gmail.com> wrote:
> > > Does this patch from Philipp fix the problem?
> > > https://git.pengutronix.de/cgit/pza/linux/commit/?h=imx-drm/fixes&id=137caa702f2308f7ef03876e164b0d0f3300712a
> > 
> > I am now running 5.2-rc2 with Philipp's non-plus imx6q patch.
> > 
> > Performance is still much worse than the Freescale baseline.
> > 
> > I am not at all worried about vpu scaler performance, after all v8 is an
> > in-progress patch.
> > 
> > I am much more concerned about the CODA h264 slowdown. My 1080p30 test
> > video runs at half the speed compared to the Freescale kernel. The best it
> > can do is 28fps, which results in visible 'jerks' in the video. Note that
> > this is without using the scaler.
> > 
> > Questions:
> > - is the performance slowdown a known issue?
> > - is there anything I've missed in the gstreamer pipelines below?
> > - is there anything I can do to help?
> > 
> > A) mainline 5.2-rc2 with Philipp's latest non-plus patch:
> > $ time gst-launch-1.0 filesrc
> > location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! matroskademux !
> > h264parse ! v4l2h264dec ! kmssink can-scale=0 sync=0
> > real 0m 32.01s
> 
> kmssink element still have some issues as it renders using the legacy
> KMS API and makes an synchronous vblank wait before returning. In many
> cases, the proper workaround is to do:
> 
>   ... ! v4l2h264dec ! queue ! kmssink can-scale=0 sync=0
> 
> In order to measure the decoding performance without having the display
> being involved you can also do:
> 
>   ... ! v4l2h264dec ! fpsdisplaysink text-overlay=0 video-sink=fakevideosink sync=0 -v
> 
> In order to benefit from the best of this driver, you should also use
> the latest GStreamer 1.16.0. It contains latest fixes from Philipp and
> I, including some performance improvement.

So I have done more tests with kmssink, and noticed a large drop in
performance too. As I suspected, it's related to unreported latency (as
this is not live, it's in fact processing-deadline). What I did to test
this (this is stock 1.16.0, with the plugins-base-tools installed).

GST_TRACERS="latency(flags=element+reported)" GST_DEBUG="GST_TRACER:7" gst-launch-1.0 filesrc location=Videos/jellyfish-10-mbps-hd-h264.mkv ! parsebin ! v4l2h264dec ! queue max-size-bytes=0 ! fpsdisplaysink text-overlay=0 video-sink="kmssink can-scale=0" 2> gst.log
gst-stats-1.0 gst.log

Which yield this (filtered the output):
Element Latency Statistics:
	0xf281c8.v4l2h264dec0.src: mean=338373215 min=83527400 max=424823314

Element Reported Latency:
	0xf281c8.v4l2h264dec0: min=0 max=0 ts=0:00:01.786671738

So the decoder reports no latency, which is not a problem for offline
playback, but has a 338 ms average latency (with variation from 83 to
424 ms). I think when that latency (this is the processing latency)
goes up, QoS events are sent by the sink to the decoder which will do
early skips in order to ensure a swift catch up, but also to skip any
other processing in the case there would be some converter between the
decoder and the sink. But it seems the skip algo is too agressive and
makes things worst. I'll work on that. Meanwhile, you can workaround
with:

  ... ! kmssink can-scale=0 qos=0

Now, if that pipeline was live, this would be a problem. For other
decoders, we use V4L2_CID_MIN_BUFFERS_FOR_CAPTURE as the decoder
latency in frames. That usually represent the DPB depth, but CODA
capture queue is not actually the frames we are decoding to, but a pool
of frames we convert into. I think to make CODA really usable for live
playback with high depth DPB, we'll need a new control that expose this
latency. Exposing it through this one would make userspace allocate too
many buffers for now reason.

> 
> > B) Freescale kernel:
> > $ time gst-launch-0.10 filesrc
> > location=/home/default/jellyfish-10-mbps-hd-h264.mkv ! decodebin !
> > mfw_v4lsink sync=0
> > Running time 0:00:14.781129554 render fps 59.941
Sven Van Asbroeck May 31, 2019, 4:07 p.m. UTC | #15
Hello Nicholas, thank you so much for investigating.

On Fri, May 31, 2019 at 11:34 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
>
> Now, if that pipeline was live, this would be a problem.

This is where my gstreamer knowledge gets really hazy.
What does it mean for a pipeline to be 'live' ?
Would this be a problem when playing a 1080p30 h264
video from a file?
Nicolas Dufresne May 31, 2019, 5:34 p.m. UTC | #16
Le vendredi 31 mai 2019 à 12:07 -0400, Sven Van Asbroeck a écrit :
> Hello Nicholas, thank you so much for investigating.
> 
> On Fri, May 31, 2019 at 11:34 AM Nicolas Dufresne <nicolas@ndufresne.ca> wrote:
> > Now, if that pipeline was live, this would be a problem.
> 
> This is where my gstreamer knowledge gets really hazy.
> What does it mean for a pipeline to be 'live' ?
> Would this be a problem when playing a 1080p30 h264
> video from a file?

Playback from file is not live. That basically means that the input of
your pipeline is not paced. You can read it as fast as you can, and you
don't have a limited amount of time to deal with it. An example live
pipeline would be:

  v4l2src ! v4l2h264enc ! rtph264pay ! udpsink

In this case, if v4l2h264 has too much latency, or is too slow, the
capture driver will start skipping captures, loosing information. The
latency is mostly for the case you have multiple streams though (e.g.
audio and video).

Nicolas
Sven Van Asbroeck May 31, 2019, 9:16 p.m. UTC | #17
Hello Nicholas and Fabio,

The sdma firmware turned out to be a non-issue: adding "qos=0" to kmssink
(as suggested by Nicholas) is what fixed kmssink performance, unrelated to sdma.

(Although I still see an sdma issue, unrelated to this thread - I will talk
to Robin about it outside of the thread)

So now I am getting identical h264 decode/noscale performance across kernel
versions. Performance is identical to the Freescale kernel. And kmssink
works great, as long as "qos=0" is added - as Nicholas suggested.

========> That's fanastic !!! <========

However I see performance and corruption issues with the v8 scaler patch.
Probably to be expected, after all it's a patch 'in progress'.

This is on a 5.2-rc2 kernel, with Philipp's non-plus imx6 patch applied.
(Running on a non-plus imx6q)

Upscaling 720p -> 1080p works ok, however the performance is lower than
the Freescale kernel. This is with a 720p24 Toy Story 4 demo grabbed from
YouTube:

# gst-launch-1.0 -vvv filesrc location=/home/default/toy720p.mp4  !
 qtdemux ! h264parse ! v4l2h264dec ! v4l2video8convert  !
 video/x-raw,width=1920,height=1080 !
 fpsdisplaysink video-sink="fakevideosink" text-overlay=0 sync=0
[...]current: 42.19, average: 41.95

Still much better than real time, but lower than the Freescale kernel, which
did at least 60fps (using mfw_v4lsink sync=0).

However, downscaling 1080p -> 720p is problematic:
# gst-launch-1.0 -vvv filesrc
location=/home/default/jellyfish-10-mbps-hd-h264.mkv !
 matroskademux ! h264parse ! v4l2h264dec ! v4l2video8convert  !
 video/x-raw,width=1280,height=720 !  fpsdisplaysink video-sink="fakevideosink"
 text-overlay=0 sync=0
[...]current: 24.62, average: 24.08

This is not real time, as jellyfish is a 30fps video. On the Freescale kernel,
this would work at 60fps (again using mfw_v4lsink sync=0).

In addition, the image produced is corrupted:
(displayed with "kmssink can-scale=0 qos=0" w/ 720p monitor mode)
https://imagebin.ca/v/4j1q0qRpzgqV

Hope this is useful. Thank you again for making this available to mainline!
Sven
Sven Van Asbroeck May 31, 2019, 9:58 p.m. UTC | #18
Hi Robin,

On Thu, May 30, 2019 at 8:26 PM Robin Gong <yibin.gong@nxp.com> wrote:
>         What's soc chip and board you used? Could you post log?

The mainline kernel we've been using does not have any sdma firmware, and
as a result we get this in the log:

# uname -a
Linux Chimera 5.2.0-rc2-00041-g22a1787e8f40 #66 SMP Fri May 31
16:08:15 EDT 2019 armv7l GNU/Linux
# dmesg | grep sdma
[    1.358472] imx-sdma 20ec000.sdma: Direct firmware load for
imx/sdma/sdma-imx6q.bin failed with error -2
[    1.358570] imx-sdma 20ec000.sdma: Falling back to sysfs fallback
for: imx/sdma/sdma-imx6q.bin
[   64.473773] imx-sdma 20ec000.sdma: external firmware not found,
using ROM firmware

I downloaded the latest Freescale sdma firmware (3.3) from
http://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware
(file: imx/sdma/sdma-imx6q.bin) and added this to the kernel. This now gets
uploaded to the imx6q, we see this in the log:

[    1.338741] imx-sdma 20ec000.sdma: loaded firmware 3.3

But... the kernel now locks up and crashes. See the dmesg log at the end of
this e-mail.

I have not been able to bisect the exact commit where this crash was introduced.
All I know is:
v4.20 good
v5.0 bad

Also: if I remove CONFIG_NFS_V4 from the defconfig, the problem disappears.

This is on an imx6q:
# cat /proc/cpuinfo
processor : 0
model name : ARMv7 Processor rev 10 (v7l)
BogoMIPS : 7.54
Features : half thumb fastmult vfp edsp neon vfpv3 tls vfpd32
CPU implementer : 0x41
CPU architecture: 7
CPU variant : 0x2
CPU part : 0xc09
CPU revision : 10
<...>
Hardware : Freescale i.MX6 Quad/DualLite (Device Tree)
Revision : 0000
Serial : 0000000000000000

The board is an in-house design, loosely based on the sabre-sd. We have our
own small patch set which adds our devicetree, plus a few very specific
patches we need. We apply this on top of mainline.

We also use imx_v6_v7_defconfig.

Here is the crash log:

[    0.000000] Booting Linux on physical CPU 0x0
[    0.000000] Linux version 5.2.0-rc2-00045-gc63ee3cb7134
(sva@svens-asus) (gcc version 7.3.1 20180425 [linaro-7.3-2018.05
revision d29120a424ecfbc167ef90065c0eeb7f91977701] (Linaro GCC
7.3-2018.05)) #67 SMP Fri May 31 17:26:19 EDT 2019
[    0.000000] CPU: ARMv7 Processor [412fc09a] revision 10 (ARMv7), cr=10c5387d
[    0.000000] CPU: PIPT / VIPT nonaliasing data cache, VIPT aliasing
instruction cache
[    0.000000] OF: fdt: Machine model: ARCX Medusa
[    0.000000] Memory policy: Data cache writealloc
[    0.000000] cma: Reserved 256 MiB at 0x30000000
[    0.000000] percpu: Embedded 21 pages/cpu s54824 r8192 d23000 u86016
[    0.000000] Built 1 zonelists, mobility grouping on.  Total pages: 522560
[    0.000000] Kernel command line: console=ttymxc0,115200
video=mxcfb0:dev=pegasus,640x480M@60,if=RGB24,bpp=32
video=mxcfb1:dev=hdmi,640x480M@60,if=RGB24,bpp=32 ip=none
root=/dev/mmcblk0p3 rootwait ro rootfstype=ext2
[    0.000000] Dentry cache hash table entries: 131072 (order: 7, 524288 bytes)
[    0.000000] Inode-cache hash table entries: 65536 (order: 6, 262144 bytes)
[    0.000000] Memory: 1788368K/2097152K available (12288K kernel
code, 984K rwdata, 4300K rodata, 1024K init, 6927K bss, 46640K
reserved, 262144K cma-reserved, 1310720K highmem)
[    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
[    0.000000] Running RCU self tests
[    0.000000] rcu: Hierarchical RCU implementation.
[    0.000000] rcu: RCU event tracing is enabled.
[    0.000000] rcu: RCU lockdep checking is enabled.
[    0.000000] rcu: RCU calculated value of scheduler-enlistment delay
is 10 jiffies.
[    0.000000] NR_IRQS: 16, nr_irqs: 16, preallocated irqs: 16
[    0.000000] L2C: DT/platform modifies aux control register:
0x32070000 -> 0x32470000
[    0.000000] L2C-310 errata 752271 769419 enabled
[    0.000000] L2C-310 enabling early BRESP for Cortex-A9
[    0.000000] L2C-310 full line of zeros enabled for Cortex-A9
[    0.000000] L2C-310 ID prefetch enabled, offset 16 lines
[    0.000000] L2C-310 dynamic clock gating enabled, standby mode enabled
[    0.000000] L2C-310 cache controller enabled, 16 ways, 1024 kB
[    0.000000] L2C-310: CACHE_ID 0x410000c7, AUX_CTRL 0x76470001
[    0.000000] random: get_random_bytes called from
start_kernel+0x2ac/0x4c0 with crng_init=0
[    0.000000] Switching to timer-based delay loop, resolution 333ns
[    0.000008] sched_clock: 32 bits at 3000kHz, resolution 333ns,
wraps every 715827882841ns
[    0.000034] clocksource: mxc_timer1: mask: 0xffffffff max_cycles:
0xffffffff, max_idle_ns: 637086815595 ns
[    0.002040] Console: colour dummy device 80x30
[    0.002082] Lock dependency validator: Copyright (c) 2006 Red Hat,
Inc., Ingo Molnar
[    0.002099] ... MAX_LOCKDEP_SUBCLASSES:  8
[    0.002115] ... MAX_LOCK_DEPTH:          48
[    0.002130] ... MAX_LOCKDEP_KEYS:        8191
[    0.002145] ... CLASSHASH_SIZE:          4096
[    0.002160] ... MAX_LOCKDEP_ENTRIES:     32768
[    0.002176] ... MAX_LOCKDEP_CHAINS:      65536
[    0.002191] ... CHAINHASH_SIZE:          32768
[    0.002206]  memory used by lock dependency info: 4411 kB
[    0.002221]  per task-struct memory footprint: 1536 bytes
[    0.002319] Calibrating delay loop (skipped), value calculated
using timer frequency.. 6.00 BogoMIPS (lpj=30000)
[    0.002346] pid_max: default: 32768 minimum: 301
[    0.002804] Mount-cache hash table entries: 2048 (order: 1, 8192 bytes)
[    0.002834] Mountpoint-cache hash table entries: 2048 (order: 1, 8192 bytes)
[    0.004993] *** VALIDATE proc ***
[    0.006217] *** VALIDATE cgroup1 ***
[    0.006244] *** VALIDATE cgroup2 ***
[    0.006274] CPU: Testing write buffer coherency: ok
[    0.006361] CPU0: Spectre v2: using BPIALL workaround
[    0.007728] CPU0: thread -1, cpu 0, socket 0, mpidr 80000000
[    0.010229] Setting up static identity map for 0x10100000 - 0x10100078
[    0.010805] rcu: Hierarchical SRCU implementation.
[    0.012321] smp: Bringing up secondary CPUs ...
[    0.014731] CPU1: thread -1, cpu 1, socket 0, mpidr 80000001
[    0.014742] CPU1: Spectre v2: using BPIALL workaround
[    0.017456] CPU2: thread -1, cpu 2, socket 0, mpidr 80000002
[    0.017467] CPU2: Spectre v2: using BPIALL workaround
[    0.019703] CPU3: thread -1, cpu 3, socket 0, mpidr 80000003
[    0.019714] CPU3: Spectre v2: using BPIALL workaround
[    0.020158] smp: Brought up 1 node, 4 CPUs
[    0.020182] SMP: Total of 4 processors activated (24.00 BogoMIPS).
[    0.020200] CPU: All CPU(s) started in SVC mode.
[    0.023381] devtmpfs: initialized
[    0.055718] VFP support v0.3: implementor 41 architecture 3 part 30
variant 9 rev 4
[    0.057990] clocksource: jiffies: mask: 0xffffffff max_cycles:
0xffffffff, max_idle_ns: 19112604462750000 ns
[    0.058053] futex hash table entries: 1024 (order: 4, 65536 bytes)
[    0.071931] pinctrl core: initialized pinctrl subsystem
[    0.076633] NET: Registered protocol family 16
[    0.099278] DMA: preallocated 256 KiB pool for atomic coherent allocations
[    0.102498] cpuidle: using governor menu
[    0.102817] CPU identified as i.MX6Q, silicon rev 1.5
[    0.121955] vdd1p1: supplied by regulator-dummy
[    0.123569] vdd3p0: supplied by regulator-dummy
[    0.124764] vdd2p5: supplied by regulator-dummy
[    0.125968] vddarm: supplied by regulator-dummy
[    0.127280] vddpu: supplied by regulator-dummy
[    0.128480] vddsoc: supplied by regulator-dummy
[    0.156882] No ATAGs?
[    0.157339] hw-breakpoint: found 5 (+1 reserved) breakpoint and 1
watchpoint registers.
[    0.157442] hw-breakpoint: maximum watchpoint size is 4 bytes.
[    0.161886] imx6q-pinctrl 20e0000.iomuxc: initialized IMX pinctrl driver
[    0.251889] mxs-dma 110000.dma-apbh: initialized
[    0.575723] vgaarb: loaded
[    0.576724] SCSI subsystem initialized
[    0.577976] usbcore: registered new interface driver usbfs
[    0.578177] usbcore: registered new interface driver hub
[    0.578429] usbcore: registered new device driver usb
[    0.578736] usb_phy_generic usbphynop1: usbphynop1 supply vcc not
found, using dummy regulator
[    0.579422] usb_phy_generic usbphynop2: usbphynop2 supply vcc not
found, using dummy regulator
[    0.583328] i2c i2c-0: IMX I2C adapter registered
[    0.585141] i2c i2c-1: IMX I2C adapter registered
[    0.586492] i2c i2c-2: IMX I2C adapter registered
[    0.586857] media: Linux media interface: v0.10
[    0.586961] videodev: Linux video capture interface: v2.00
[    0.587400] pps_core: LinuxPPS API ver. 1 registered
[    0.587421] pps_core: Software ver. 5.3.6 - Copyright 2005-2007
Rodolfo Giometti <giometti@linux.it>
[    0.587476] PTP clock support registered
[    0.588382] Advanced Linux Sound Architecture Driver Initialized.
[    0.592138] Bluetooth: Core ver 2.22
[    0.592243] NET: Registered protocol family 31
[    0.592263] Bluetooth: HCI device and connection manager initialized
[    0.592369] Bluetooth: HCI socket layer initialized
[    0.592402] Bluetooth: L2CAP socket layer initialized
[    0.592586] Bluetooth: SCO socket layer initialized
[    0.594195] Registering the MIOB1 driver
[    0.594874] clocksource: Switched to clocksource mxc_timer1
[    1.258934] VFS: Disk quotas dquot_6.6.0
[    1.259111] VFS: Dquot-cache hash table entries: 1024 (order 0, 4096 bytes)
[    1.285747] NET: Registered protocol family 2
[    1.288032] tcp_listen_portaddr_hash hash table entries: 512
(order: 2, 20480 bytes)
[    1.288161] TCP established hash table entries: 8192 (order: 3, 32768 bytes)
[    1.288326] TCP bind hash table entries: 8192 (order: 6, 294912 bytes)
[    1.289306] TCP: Hash tables configured (established 8192 bind 8192)
[    1.289803] UDP hash table entries: 512 (order: 3, 40960 bytes)
[    1.289987] UDP-Lite hash table entries: 512 (order: 3, 40960 bytes)
[    1.290545] NET: Registered protocol family 1
[    1.292659] RPC: Registered named UNIX socket transport module.
[    1.292742] RPC: Registered udp transport module.
[    1.292761] RPC: Registered tcp transport module.
[    1.292779] RPC: Registered tcp NFSv4.1 backchannel transport module.
[    1.293779] PCI: CLS 0 bytes, default 64
[    1.295797] hw perfevents: no interrupt-affinity property for /pmu, guessing.
[    1.296451] hw perfevents: enabled with armv7_cortex_a9 PMU driver,
7 counters available
[    1.301092] Initialise system trusted keyrings
[    1.301815] workingset: timestamp_bits=14 max_order=19 bucket_order=5
[    1.320996] NFS: Registering the id_resolver key type
[    1.321160] Key type id_resolver registered
[    1.321244] Key type id_legacy registered
[    1.321420] jffs2: version 2.2. (NAND) © 2001-2006 Red Hat, Inc.
[    1.322546] romfs: ROMFS MTD (C) 2007 Red Hat, Inc.
[    1.323116] fuse: init (API version 7.30)
[    1.324872] Key type asymmetric registered
[    1.324989] Asymmetric key parser 'x509' registered
[    1.325235] bounce: pool size: 64 pages
[    1.325850] io scheduler mq-deadline registered
[    1.325874] io scheduler kyber registered
[    1.329712] imx-weim 21b8000.weim: Driver registered.
[    1.334167] imx6q-pcie 1ffc000.pcie: host bridge /soc/pcie@1ffc000 ranges:
[    1.334363] imx6q-pcie 1ffc000.pcie:    IO 0x01f80000..0x01f8ffff
-> 0x00000000
[    1.334561] imx6q-pcie 1ffc000.pcie:   MEM 0x01000000..0x01efffff
-> 0x01000000
[    1.338741] imx-sdma 20ec000.sdma: loaded firmware 3.3
[    1.340843] random: fast init done
[    1.347479] imx-pgc-pd imx-pgc-power-domain.0: DMA mask not set
[    1.347981] imx-pgc-pd imx-pgc-power-domain.1: DMA mask not set
[    1.439680] random: crng init done
[    1.454933] pfuze100-regulator 1-0008: unrecognized pfuze chip ID!
[    1.455678] pfuze100-regulator: probe of 1-0008 failed with error -110
[    1.458693] 2020000.serial: ttymxc0 at MMIO 0x2020000 (irq = 27,
base_baud = 5000000) is a IMX
[    2.332077] printk: console [ttymxc0] enabled
[    2.339264] imx-uart 21e8000.serial: has an ltc2870 combined
rs232/rs485 transceiver
[    2.347647] 21e8000.serial: ttymxc1 at MMIO 0x21e8000 (irq = 72,
base_baud = 5000000) is a IMX
[    2.357699] 21ec000.serial: ttymxc2 at MMIO 0x21ec000 (irq = 73,
base_baud = 5000000) is a IMX
[    2.367844] 21f0000.serial: ttymxc3 at MMIO 0x21f0000 (irq = 74,
base_baud = 5000000) is a IMX
[    2.400377] etnaviv etnaviv: bound 130000.gpu (ops gpu_ops)
[    2.406647] etnaviv etnaviv: bound 134000.gpu (ops gpu_ops)
[    2.412736] etnaviv etnaviv: bound 2204000.gpu (ops gpu_ops)
[    2.418491] etnaviv-gpu 130000.gpu: model: GC2000, revision: 5108
[    2.436892] etnaviv-gpu 130000.gpu: command buffer outside valid
memory window
[    2.445272] etnaviv-gpu 134000.gpu: model: GC320, revision: 5007
[    2.462852] etnaviv-gpu 134000.gpu: command buffer outside valid
memory window
[    2.471107] etnaviv-gpu 2204000.gpu: model: GC355, revision: 1215
[    2.477299] etnaviv-gpu 2204000.gpu: Ignoring GPU with VG and FE2.0
[    2.486259] [drm] Initialized etnaviv 1.2.0 20151214 for etnaviv on minor 0
[    2.497848] imx-ipuv3 2400000.ipu: IPUv3H probed
[    2.505207] [drm] Supports vblank timestamp caching Rev 2 (21.10.2013).
[    2.511894] [drm] No driver support for vblank timestamp query.
[    2.519385] imx-drm display-subsystem: bound imx-ipuv3-crtc.2 (ops
ipu_crtc_ops)
[    2.527161] imx-drm display-subsystem: bound imx-ipuv3-crtc.3 (ops
ipu_crtc_ops)
[    2.534943] imx-drm display-subsystem: bound imx-ipuv3-crtc.6 (ops
ipu_crtc_ops)
[    2.542666] imx-drm display-subsystem: bound imx-ipuv3-crtc.7 (ops
ipu_crtc_ops)
[    2.550431] imx-drm display-subsystem: failed to bind 120000.hdmi
(ops dw_hdmi_imx_ops): -517
[    2.560401] imx-drm display-subsystem: master bind failed: -517
[    2.566519] imx-ipuv3 2800000.ipu: IPUv3H probed
[   27.494777] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
[   27.500797] rcu: 0-....: (1 GPs behind) idle=b62/0/0x3 softirq=39/39 fqs=1301
[   27.508222] (detected by 1, t=2603 jiffies, g=-1143, q=21)
[   27.513816] Sending NMI from CPU 1 to CPUs 0:
[   27.518568] NMI backtrace for cpu 0
[   27.518576] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
5.2.0-rc2-00045-gc63ee3cb7134 #67
[   27.518582] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[   27.518587] PC is at __do_softirq+0xbc/0x528
[   27.518590] LR is at lockdep_hardirqs_on+0xac/0x1e8
[   27.518595] pc : [<c01022dc>]    lr : [<c01899f8>]    psr: 60000113
[   27.518598] sp : c1301e80  ip : 00000000  fp : c13f0a44
[   27.518602] r10: 00000282  r9 : dc018400  r8 : 00000001
[   27.518605] r7 : 00000000  r6 : c1308ce0  r5 : 00000000  r4 : ffffe000
[   27.518609] r3 : c130c600  r2 : 00000000  r1 : 00000002  r0 : 00000001
[   27.518613] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
[   27.518617] Control: 10c5387d  Table: 1000404a  DAC: 00000051
[   27.518622] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
5.2.0-rc2-00045-gc63ee3cb7134 #67
[   27.518626] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[   27.518630] [<c01127e0>] (unwind_backtrace) from [<c010cfd4>]
(show_stack+0x10/0x14)
[   27.518634] [<c010cfd4>] (show_stack) from [<c0c70bac>]
(dump_stack+0xd8/0x110)
[   27.518638] [<c0c70bac>] (dump_stack) from [<c0c77640>]
(nmi_cpu_backtrace+0x6c/0xbc)
[   27.518642] [<c0c77640>] (nmi_cpu_backtrace) from [<c0110b20>]
(handle_IPI+0xe4/0x3ac)
[   27.518646] [<c0110b20>] (handle_IPI) from [<c052e7f0>]
(gic_handle_irq+0x94/0xa8)
[   27.518650] [<c052e7f0>] (gic_handle_irq) from [<c0101a70>]
(__irq_svc+0x70/0x98)
[   27.518654] Exception stack(0xc1301e30 to 0xc1301e78)
[   27.518659] 1e20:                                     00000001
00000002 00000000 c130c600
[   27.518663] 1e40: ffffe000 00000000 c1308ce0 00000000 00000001
dc018400 00000282 c13f0a44
[   27.518667] 1e60: 00000000 c1301e80 c01899f8 c01022dc 60000113 ffffffff
[   27.518671] [<c0101a70>] (__irq_svc) from [<c01022dc>]
(__do_softirq+0xbc/0x528)
[   27.518676] [<c01022dc>] (__do_softirq) from [<c012eff0>]
(irq_exit+0x12c/0x180)
[   27.518680] [<c012eff0>] (irq_exit) from [<c0195eb0>]
(__handle_domain_irq+0x6c/0xe0)
[   27.518684] [<c0195eb0>] (__handle_domain_irq) from [<c052e7a8>]
(gic_handle_irq+0x4c/0xa8)
[   27.518688] [<c052e7a8>] (gic_handle_irq) from [<c0101a70>]
(__irq_svc+0x70/0x98)
[   27.518691] Exception stack(0xc1301f10 to 0xc1301f58)
[   27.518696] 1f00:                                     00000001
00000006 00000000 c130c600
[   27.518700] 1f20: ffffe000 c1308928 00000001 c1308964 00000000
00000000 c1308908 c1308978
[   27.518703] 1f40: 00000000 c1301f60 c0189a48 c0109490 20000013 ffffffff
[   27.518707] [<c0101a70>] (__irq_svc) from [<c0109490>]
(arch_cpu_idle+0x20/0x3c)
[   27.518711] [<c0109490>] (arch_cpu_idle) from [<c0160748>]
(do_idle+0x1b8/0x2c0)
[   27.518715] [<c0160748>] (do_idle) from [<c0160be0>]
(cpu_startup_entry+0x18/0x20)
[   27.518720] [<c0160be0>] (cpu_startup_entry) from [<c1200e24>]
(start_kernel+0x410/0x4c0)
[   27.518725] [<c1200e24>] (start_kernel) from [<00000000>] (0x0)
Robin Gong June 4, 2019, 8:51 a.m. UTC | #19
Hi Sven,
	I saw similar kernel crash issue on the latest linux-next during kernel boot up, but not on
the 'Linux 5.2-rc1 ' tag. Will do bisect later. But v5.0 should be okay, could you help double check?

> -----Original Message-----
> From: Sven Van Asbroeck <thesven73@gmail.com>
> Sent: 2019年6月1日 5:59
> Hi Robin,
> 
> On Thu, May 30, 2019 at 8:26 PM Robin Gong <yibin.gong@nxp.com> wrote:
> >         What's soc chip and board you used? Could you post log?
> 
> The mainline kernel we've been using does not have any sdma firmware, and
> as a result we get this in the log:
> 
> # uname -a
> Linux Chimera 5.2.0-rc2-00041-g22a1787e8f40 #66 SMP Fri May 31
> 16:08:15 EDT 2019 armv7l GNU/Linux
> # dmesg | grep sdma
> [    1.358472] imx-sdma 20ec000.sdma: Direct firmware load for
> imx/sdma/sdma-imx6q.bin failed with error -2
> [    1.358570] imx-sdma 20ec000.sdma: Falling back to sysfs fallback
> for: imx/sdma/sdma-imx6q.bin
> [   64.473773] imx-sdma 20ec000.sdma: external firmware not found,
> using ROM firmware
> 
> I downloaded the latest Freescale sdma firmware (3.3) from
> https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fgit.kerne
> l.org%2Fpub%2Fscm%2Flinux%2Fkernel%2Fgit%2Ffirmware%2Flinux-firmware
> &amp;data=02%7C01%7Cyibin.gong%40nxp.com%7C41a2b6e15523463bbfa6
> 08d6e6132e3e%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C636
> 949367388300740&amp;sdata=C2qb%2B13OOaLy%2BxqT6AyDuN%2Fmh00R
> z6uHcYNJtzt%2BPEU%3D&amp;reserved=0
> (file: imx/sdma/sdma-imx6q.bin) and added this to the kernel. This now gets
> uploaded to the imx6q, we see this in the log:
> 
> [    1.338741] imx-sdma 20ec000.sdma: loaded firmware 3.3
> 
> But... the kernel now locks up and crashes. See the dmesg log at the end of this
> e-mail.
> 
> I have not been able to bisect the exact commit where this crash was
> introduced.
> All I know is:
> v4.20 good
> v5.0 bad
> 
> Also: if I remove CONFIG_NFS_V4 from the defconfig, the problem disappears.
> 
> This is on an imx6q:
> # cat /proc/cpuinfo
> processor : 0
> model name : ARMv7 Processor rev 10 (v7l) BogoMIPS : 7.54 Features : half
> thumb fastmult vfp edsp neon vfpv3 tls vfpd32 CPU implementer : 0x41 CPU
> architecture: 7 CPU variant : 0x2 CPU part : 0xc09 CPU revision : 10 <...>
> Hardware : Freescale i.MX6 Quad/DualLite (Device Tree) Revision : 0000 Serial :
> 0000000000000000
> 
> The board is an in-house design, loosely based on the sabre-sd. We have our
> own small patch set which adds our devicetree, plus a few very specific
> patches we need. We apply this on top of mainline.
> 
> We also use imx_v6_v7_defconfig.
> 
> Here is the crash log:
> 
> [    0.000000] Booting Linux on physical CPU 0x0
> [    0.000000] Linux version 5.2.0-rc2-00045-gc63ee3cb7134
> (sva@svens-asus) (gcc version 7.3.1 20180425 [linaro-7.3-2018.05 revision
> d29120a424ecfbc167ef90065c0eeb7f91977701] (Linaro GCC
> 7.3-2018.05)) #67 SMP Fri May 31 17:26:19 EDT 2019
> [    0.000000] CPU: ARMv7 Processor [412fc09a] revision 10 (ARMv7),
> cr=10c5387d
> [    0.000000] CPU: PIPT / VIPT nonaliasing data cache, VIPT aliasing
> instruction cache
> [    0.000000] OF: fdt: Machine model: ARCX Medusa
> [    0.000000] Memory policy: Data cache writealloc
> [    0.000000] cma: Reserved 256 MiB at 0x30000000
> [    0.000000] percpu: Embedded 21 pages/cpu s54824 r8192 d23000
> u86016
> [    0.000000] Built 1 zonelists, mobility grouping on.  Total pages: 522560
> [    0.000000] Kernel command line: console=ttymxc0,115200
> video=mxcfb0:dev=pegasus,640x480M@60,if=RGB24,bpp=32
> video=mxcfb1:dev=hdmi,640x480M@60,if=RGB24,bpp=32 ip=none
> root=/dev/mmcblk0p3 rootwait ro rootfstype=ext2
> [    0.000000] Dentry cache hash table entries: 131072 (order: 7, 524288
> bytes)
> [    0.000000] Inode-cache hash table entries: 65536 (order: 6, 262144
> bytes)
> [    0.000000] Memory: 1788368K/2097152K available (12288K kernel
> code, 984K rwdata, 4300K rodata, 1024K init, 6927K bss, 46640K reserved,
> 262144K cma-reserved, 1310720K highmem)
> [    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4,
> Nodes=1
> [    0.000000] Running RCU self tests
> [    0.000000] rcu: Hierarchical RCU implementation.
> [    0.000000] rcu: RCU event tracing is enabled.
> [    0.000000] rcu: RCU lockdep checking is enabled.
> [    0.000000] rcu: RCU calculated value of scheduler-enlistment delay
> is 10 jiffies.
> [    0.000000] NR_IRQS: 16, nr_irqs: 16, preallocated irqs: 16
> [    0.000000] L2C: DT/platform modifies aux control register:
> 0x32070000 -> 0x32470000
> [    0.000000] L2C-310 errata 752271 769419 enabled
> [    0.000000] L2C-310 enabling early BRESP for Cortex-A9
> [    0.000000] L2C-310 full line of zeros enabled for Cortex-A9
> [    0.000000] L2C-310 ID prefetch enabled, offset 16 lines
> [    0.000000] L2C-310 dynamic clock gating enabled, standby mode enabled
> [    0.000000] L2C-310 cache controller enabled, 16 ways, 1024 kB
> [    0.000000] L2C-310: CACHE_ID 0x410000c7, AUX_CTRL 0x76470001
> [    0.000000] random: get_random_bytes called from
> start_kernel+0x2ac/0x4c0 with crng_init=0
> [    0.000000] Switching to timer-based delay loop, resolution 333ns
> [    0.000008] sched_clock: 32 bits at 3000kHz, resolution 333ns,
> wraps every 715827882841ns
> [    0.000034] clocksource: mxc_timer1: mask: 0xffffffff max_cycles:
> 0xffffffff, max_idle_ns: 637086815595 ns
> [    0.002040] Console: colour dummy device 80x30
> [    0.002082] Lock dependency validator: Copyright (c) 2006 Red Hat,
> Inc., Ingo Molnar
> [    0.002099] ... MAX_LOCKDEP_SUBCLASSES:  8
> [    0.002115] ... MAX_LOCK_DEPTH:          48
> [    0.002130] ... MAX_LOCKDEP_KEYS:        8191
> [    0.002145] ... CLASSHASH_SIZE:          4096
> [    0.002160] ... MAX_LOCKDEP_ENTRIES:     32768
> [    0.002176] ... MAX_LOCKDEP_CHAINS:      65536
> [    0.002191] ... CHAINHASH_SIZE:          32768
> [    0.002206]  memory used by lock dependency info: 4411 kB
> [    0.002221]  per task-struct memory footprint: 1536 bytes
> [    0.002319] Calibrating delay loop (skipped), value calculated
> using timer frequency.. 6.00 BogoMIPS (lpj=30000)
> [    0.002346] pid_max: default: 32768 minimum: 301
> [    0.002804] Mount-cache hash table entries: 2048 (order: 1, 8192 bytes)
> [    0.002834] Mountpoint-cache hash table entries: 2048 (order: 1, 8192
> bytes)
> [    0.004993] *** VALIDATE proc ***
> [    0.006217] *** VALIDATE cgroup1 ***
> [    0.006244] *** VALIDATE cgroup2 ***
> [    0.006274] CPU: Testing write buffer coherency: ok
> [    0.006361] CPU0: Spectre v2: using BPIALL workaround
> [    0.007728] CPU0: thread -1, cpu 0, socket 0, mpidr 80000000
> [    0.010229] Setting up static identity map for 0x10100000 - 0x10100078
> [    0.010805] rcu: Hierarchical SRCU implementation.
> [    0.012321] smp: Bringing up secondary CPUs ...
> [    0.014731] CPU1: thread -1, cpu 1, socket 0, mpidr 80000001
> [    0.014742] CPU1: Spectre v2: using BPIALL workaround
> [    0.017456] CPU2: thread -1, cpu 2, socket 0, mpidr 80000002
> [    0.017467] CPU2: Spectre v2: using BPIALL workaround
> [    0.019703] CPU3: thread -1, cpu 3, socket 0, mpidr 80000003
> [    0.019714] CPU3: Spectre v2: using BPIALL workaround
> [    0.020158] smp: Brought up 1 node, 4 CPUs
> [    0.020182] SMP: Total of 4 processors activated (24.00 BogoMIPS).
> [    0.020200] CPU: All CPU(s) started in SVC mode.
> [    0.023381] devtmpfs: initialized
> [    0.055718] VFP support v0.3: implementor 41 architecture 3 part 30
> variant 9 rev 4
> [    0.057990] clocksource: jiffies: mask: 0xffffffff max_cycles:
> 0xffffffff, max_idle_ns: 19112604462750000 ns
> [    0.058053] futex hash table entries: 1024 (order: 4, 65536 bytes)
> [    0.071931] pinctrl core: initialized pinctrl subsystem
> [    0.076633] NET: Registered protocol family 16
> [    0.099278] DMA: preallocated 256 KiB pool for atomic coherent
> allocations
> [    0.102498] cpuidle: using governor menu
> [    0.102817] CPU identified as i.MX6Q, silicon rev 1.5
> [    0.121955] vdd1p1: supplied by regulator-dummy
> [    0.123569] vdd3p0: supplied by regulator-dummy
> [    0.124764] vdd2p5: supplied by regulator-dummy
> [    0.125968] vddarm: supplied by regulator-dummy
> [    0.127280] vddpu: supplied by regulator-dummy
> [    0.128480] vddsoc: supplied by regulator-dummy
> [    0.156882] No ATAGs?
> [    0.157339] hw-breakpoint: found 5 (+1 reserved) breakpoint and 1
> watchpoint registers.
> [    0.157442] hw-breakpoint: maximum watchpoint size is 4 bytes.
> [    0.161886] imx6q-pinctrl 20e0000.iomuxc: initialized IMX pinctrl driver
> [    0.251889] mxs-dma 110000.dma-apbh: initialized
> [    0.575723] vgaarb: loaded
> [    0.576724] SCSI subsystem initialized
> [    0.577976] usbcore: registered new interface driver usbfs
> [    0.578177] usbcore: registered new interface driver hub
> [    0.578429] usbcore: registered new device driver usb
> [    0.578736] usb_phy_generic usbphynop1: usbphynop1 supply vcc not
> found, using dummy regulator
> [    0.579422] usb_phy_generic usbphynop2: usbphynop2 supply vcc not
> found, using dummy regulator
> [    0.583328] i2c i2c-0: IMX I2C adapter registered
> [    0.585141] i2c i2c-1: IMX I2C adapter registered
> [    0.586492] i2c i2c-2: IMX I2C adapter registered
> [    0.586857] media: Linux media interface: v0.10
> [    0.586961] videodev: Linux video capture interface: v2.00
> [    0.587400] pps_core: LinuxPPS API ver. 1 registered
> [    0.587421] pps_core: Software ver. 5.3.6 - Copyright 2005-2007
> Rodolfo Giometti <giometti@linux.it>
> [    0.587476] PTP clock support registered
> [    0.588382] Advanced Linux Sound Architecture Driver Initialized.
> [    0.592138] Bluetooth: Core ver 2.22
> [    0.592243] NET: Registered protocol family 31
> [    0.592263] Bluetooth: HCI device and connection manager initialized
> [    0.592369] Bluetooth: HCI socket layer initialized
> [    0.592402] Bluetooth: L2CAP socket layer initialized
> [    0.592586] Bluetooth: SCO socket layer initialized
> [    0.594195] Registering the MIOB1 driver
> [    0.594874] clocksource: Switched to clocksource mxc_timer1
> [    1.258934] VFS: Disk quotas dquot_6.6.0
> [    1.259111] VFS: Dquot-cache hash table entries: 1024 (order 0, 4096
> bytes)
> [    1.285747] NET: Registered protocol family 2
> [    1.288032] tcp_listen_portaddr_hash hash table entries: 512
> (order: 2, 20480 bytes)
> [    1.288161] TCP established hash table entries: 8192 (order: 3, 32768
> bytes)
> [    1.288326] TCP bind hash table entries: 8192 (order: 6, 294912 bytes)
> [    1.289306] TCP: Hash tables configured (established 8192 bind 8192)
> [    1.289803] UDP hash table entries: 512 (order: 3, 40960 bytes)
> [    1.289987] UDP-Lite hash table entries: 512 (order: 3, 40960 bytes)
> [    1.290545] NET: Registered protocol family 1
> [    1.292659] RPC: Registered named UNIX socket transport module.
> [    1.292742] RPC: Registered udp transport module.
> [    1.292761] RPC: Registered tcp transport module.
> [    1.292779] RPC: Registered tcp NFSv4.1 backchannel transport module.
> [    1.293779] PCI: CLS 0 bytes, default 64
> [    1.295797] hw perfevents: no interrupt-affinity property for /pmu,
> guessing.
> [    1.296451] hw perfevents: enabled with armv7_cortex_a9 PMU driver,
> 7 counters available
> [    1.301092] Initialise system trusted keyrings
> [    1.301815] workingset: timestamp_bits=14 max_order=19
> bucket_order=5
> [    1.320996] NFS: Registering the id_resolver key type
> [    1.321160] Key type id_resolver registered
> [    1.321244] Key type id_legacy registered
> [    1.321420] jffs2: version 2.2. (NAND) © 2001-2006 Red Hat, Inc.
> [    1.322546] romfs: ROMFS MTD (C) 2007 Red Hat, Inc.
> [    1.323116] fuse: init (API version 7.30)
> [    1.324872] Key type asymmetric registered
> [    1.324989] Asymmetric key parser 'x509' registered
> [    1.325235] bounce: pool size: 64 pages
> [    1.325850] io scheduler mq-deadline registered
> [    1.325874] io scheduler kyber registered
> [    1.329712] imx-weim 21b8000.weim: Driver registered.
> [    1.334167] imx6q-pcie 1ffc000.pcie: host bridge /soc/pcie@1ffc000
> ranges:
> [    1.334363] imx6q-pcie 1ffc000.pcie:    IO 0x01f80000..0x01f8ffff
> -> 0x00000000
> [    1.334561] imx6q-pcie 1ffc000.pcie:   MEM 0x01000000..0x01efffff
> -> 0x01000000
> [    1.338741] imx-sdma 20ec000.sdma: loaded firmware 3.3
> [    1.340843] random: fast init done
> [    1.347479] imx-pgc-pd imx-pgc-power-domain.0: DMA mask not set
> [    1.347981] imx-pgc-pd imx-pgc-power-domain.1: DMA mask not set
> [    1.439680] random: crng init done
> [    1.454933] pfuze100-regulator 1-0008: unrecognized pfuze chip ID!
> [    1.455678] pfuze100-regulator: probe of 1-0008 failed with error -110
> [    1.458693] 2020000.serial: ttymxc0 at MMIO 0x2020000 (irq = 27,
> base_baud = 5000000) is a IMX
> [    2.332077] printk: console [ttymxc0] enabled
> [    2.339264] imx-uart 21e8000.serial: has an ltc2870 combined
> rs232/rs485 transceiver
> [    2.347647] 21e8000.serial: ttymxc1 at MMIO 0x21e8000 (irq = 72,
> base_baud = 5000000) is a IMX
> [    2.357699] 21ec000.serial: ttymxc2 at MMIO 0x21ec000 (irq = 73,
> base_baud = 5000000) is a IMX
> [    2.367844] 21f0000.serial: ttymxc3 at MMIO 0x21f0000 (irq = 74,
> base_baud = 5000000) is a IMX
> [    2.400377] etnaviv etnaviv: bound 130000.gpu (ops gpu_ops)
> [    2.406647] etnaviv etnaviv: bound 134000.gpu (ops gpu_ops)
> [    2.412736] etnaviv etnaviv: bound 2204000.gpu (ops gpu_ops)
> [    2.418491] etnaviv-gpu 130000.gpu: model: GC2000, revision: 5108
> [    2.436892] etnaviv-gpu 130000.gpu: command buffer outside valid
> memory window
> [    2.445272] etnaviv-gpu 134000.gpu: model: GC320, revision: 5007
> [    2.462852] etnaviv-gpu 134000.gpu: command buffer outside valid
> memory window
> [    2.471107] etnaviv-gpu 2204000.gpu: model: GC355, revision: 1215
> [    2.477299] etnaviv-gpu 2204000.gpu: Ignoring GPU with VG and FE2.0
> [    2.486259] [drm] Initialized etnaviv 1.2.0 20151214 for etnaviv on minor
> 0
> [    2.497848] imx-ipuv3 2400000.ipu: IPUv3H probed
> [    2.505207] [drm] Supports vblank timestamp caching Rev 2 (21.10.2013).
> [    2.511894] [drm] No driver support for vblank timestamp query.
> [    2.519385] imx-drm display-subsystem: bound imx-ipuv3-crtc.2 (ops
> ipu_crtc_ops)
> [    2.527161] imx-drm display-subsystem: bound imx-ipuv3-crtc.3 (ops
> ipu_crtc_ops)
> [    2.534943] imx-drm display-subsystem: bound imx-ipuv3-crtc.6 (ops
> ipu_crtc_ops)
> [    2.542666] imx-drm display-subsystem: bound imx-ipuv3-crtc.7 (ops
> ipu_crtc_ops)
> [    2.550431] imx-drm display-subsystem: failed to bind 120000.hdmi
> (ops dw_hdmi_imx_ops): -517
> [    2.560401] imx-drm display-subsystem: master bind failed: -517
> [    2.566519] imx-ipuv3 2800000.ipu: IPUv3H probed
> [   27.494777] rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
> [   27.500797] rcu: 0-....: (1 GPs behind) idle=b62/0/0x3 softirq=39/39
> fqs=1301
> [   27.508222] (detected by 1, t=2603 jiffies, g=-1143, q=21)
> [   27.513816] Sending NMI from CPU 1 to CPUs 0:
> [   27.518568] NMI backtrace for cpu 0
> [   27.518576] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
> 5.2.0-rc2-00045-gc63ee3cb7134 #67
> [   27.518582] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> [   27.518587] PC is at __do_softirq+0xbc/0x528
> [   27.518590] LR is at lockdep_hardirqs_on+0xac/0x1e8
> [   27.518595] pc : [<c01022dc>]    lr : [<c01899f8>]    psr: 60000113
> [   27.518598] sp : c1301e80  ip : 00000000  fp : c13f0a44
> [   27.518602] r10: 00000282  r9 : dc018400  r8 : 00000001
> [   27.518605] r7 : 00000000  r6 : c1308ce0  r5 : 00000000  r4 : ffffe000
> [   27.518609] r3 : c130c600  r2 : 00000000  r1 : 00000002  r0 :
> 00000001
> [   27.518613] Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM
> Segment none
> [   27.518617] Control: 10c5387d  Table: 1000404a  DAC: 00000051
> [   27.518622] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
> 5.2.0-rc2-00045-gc63ee3cb7134 #67
> [   27.518626] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> [   27.518630] [<c01127e0>] (unwind_backtrace) from [<c010cfd4>]
> (show_stack+0x10/0x14)
> [   27.518634] [<c010cfd4>] (show_stack) from [<c0c70bac>]
> (dump_stack+0xd8/0x110)
> [   27.518638] [<c0c70bac>] (dump_stack) from [<c0c77640>]
> (nmi_cpu_backtrace+0x6c/0xbc)
> [   27.518642] [<c0c77640>] (nmi_cpu_backtrace) from [<c0110b20>]
> (handle_IPI+0xe4/0x3ac)
> [   27.518646] [<c0110b20>] (handle_IPI) from [<c052e7f0>]
> (gic_handle_irq+0x94/0xa8)
> [   27.518650] [<c052e7f0>] (gic_handle_irq) from [<c0101a70>]
> (__irq_svc+0x70/0x98)
> [   27.518654] Exception stack(0xc1301e30 to 0xc1301e78)
> [   27.518659] 1e20:                                     00000001
> 00000002 00000000 c130c600
> [   27.518663] 1e40: ffffe000 00000000 c1308ce0 00000000 00000001
> dc018400 00000282 c13f0a44
> [   27.518667] 1e60: 00000000 c1301e80 c01899f8 c01022dc 60000113
> ffffffff
> [   27.518671] [<c0101a70>] (__irq_svc) from [<c01022dc>]
> (__do_softirq+0xbc/0x528)
> [   27.518676] [<c01022dc>] (__do_softirq) from [<c012eff0>]
> (irq_exit+0x12c/0x180)
> [   27.518680] [<c012eff0>] (irq_exit) from [<c0195eb0>]
> (__handle_domain_irq+0x6c/0xe0)
> [   27.518684] [<c0195eb0>] (__handle_domain_irq) from [<c052e7a8>]
> (gic_handle_irq+0x4c/0xa8)
> [   27.518688] [<c052e7a8>] (gic_handle_irq) from [<c0101a70>]
> (__irq_svc+0x70/0x98)
> [   27.518691] Exception stack(0xc1301f10 to 0xc1301f58)
> [   27.518696] 1f00:                                     00000001
> 00000006 00000000 c130c600
> [   27.518700] 1f20: ffffe000 c1308928 00000001 c1308964 00000000
> 00000000 c1308908 c1308978
> [   27.518703] 1f40: 00000000 c1301f60 c0189a48 c0109490 20000013
> ffffffff
> [   27.518707] [<c0101a70>] (__irq_svc) from [<c0109490>]
> (arch_cpu_idle+0x20/0x3c)
> [   27.518711] [<c0109490>] (arch_cpu_idle) from [<c0160748>]
> (do_idle+0x1b8/0x2c0)
> [   27.518715] [<c0160748>] (do_idle) from [<c0160be0>]
> (cpu_startup_entry+0x18/0x20)
> [   27.518720] [<c0160be0>] (cpu_startup_entry) from [<c1200e24>]
> (start_kernel+0x410/0x4c0)
> [   27.518725] [<c1200e24>] (start_kernel) from [<00000000>] (0x0)
Sven Van Asbroeck June 4, 2019, 5:57 p.m. UTC | #20
Hi Robin,

On Tue, Jun 4, 2019 at 4:51 AM Robin Gong <yibin.gong@nxp.com> wrote:
>
> Hi Sven,
>         I saw similar kernel crash issue on the latest linux-next during kernel boot up, but not on
> the 'Linux 5.2-rc1 ' tag. Will do bisect later. But v5.0 should be okay, could you help double check?

Of course. My tests show:

v5.0 bad
v5.2-rc1 bad
v5.2-rc2 bad
v5.2-rc3 bad
v4.20 good

where "bad" means: kernel crashes on boot if sdma firmware is loaded.
kernel works ok if we use sdma firmware already in rom.

Are you able to reproduce this issue? I started bisecting last week, but
ran out of time. I could try again here, if you cannot reliably reproduce it.
Robin Gong June 5, 2019, 10:08 a.m. UTC | #21
Hi Sven,
	I met below crash on v5.2-rc1 and found it's related with commit 728e0fbf263e, after googled,  the latest
Linux-next should fix the issue, please check https://lkml.org/lkml/2019/6/3/1405. 
After I rebased to the latest linux-next, no such issue but still another crash issue after kernel bootup, I believe that's
Caused by NFS, because the issue is gone if I mount to mmc rootfs instead of NFS. I don't think it's related with
Sdma firmware download, but seems your issue is different with mine. Anyway, could you try the latest linux-next?

 [   17.794449] Internal error: Oops - undefined instruction: 0 [#1] SMP ARM
[   17.794459] Modules linked in:
[   17.794473] CPU: 0 PID: 29 Comm: kworker/0:1 Not tainted 5.2.0-rc2-00431-gcaaadc9 #285
[   17.794478] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[   17.794499] Workqueue: events bpf_prog_free_deferred
[   17.794515] PC is at __free_vmap_area+0x7c/0x390
[   17.794523] LR is at 0xe83a2be0
[   17.794528] pc : [<c02626bc>]    lr : [<e83a2be0>]    psr: 20000013
[   17.794533] sp : e81f5e60  ip : f0da1000  fp : eafac378
[   17.794537] r10: c12089ec  r9 : c12300e4  r8 : c198b0cc
[   17.794542] r7 : c198b0dc  r6 : e87f5ee0  r5 : e87f5ed0  r4 : e87f5ee0
[   17.794547] r3 : e83a2bb0  r2 : e83a2c60  r1 : e83a2bb0  r0 : f0d9d000
[   17.794555] Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
[   17.794561] Control: 10c5387d  Table: 38b9c04a  DAC: 00000051
[   17.794569] Process kworker/0:1 (pid: 29, stack limit = 0x042ac40f)
[   17.794574] Stack: (0xe81f5e60 to 0xe81f6000)
[   17.794584] 5e60: c198b0cc c198b0dc e87f5ea0 00000004 c198b0cc c198b0dc 0000c000 c12300e4
[   17.794593] 5e80: c12089ec c0262a9c c12e3205 ffffffff 00000000 c12e3205 00000001 00000004
[   17.794602] 5ea0: c12089ec c0263644 00000000 00000000 c02634f0 c0bf8e70 00000000 c11b638c
[   17.794610] 5ec0: 00000075 e97164c0 00000001 f0b5d000 00000000 ffffffff c1208928 c12e43f0
[   17.794618] 5ee0: c12e30a9 c0265efc e9142ea4 e81e9080 eaf6f4c0 eaf72700 e81f5f1c c01468e4
[   17.794627] 5f00: 00000001 00000000 c014682c eaf6f4d0 00000000 00000000 c0147b54 c1986060
[   17.794635] 5f20: c15949d0 00000000 c0ec3a08 a010c325 c1205900 e81e9080 e81e9094 eaf6f4c0
[   17.794644] 5f40: 00000008 eaf6f4f4 e81f4000 eaf6f4c0 c1205900 c0147ae0 e813a10c c0bf8e70
[   17.794652] 5f60: e813a100 e813a100 00000000 e81e4dc0 e81f4000 e81e9080 c0147aac e813a138
[   17.794660] 5f80: e80d9e90 c014d564 e8130c80 e81e4dc0 c014d458 00000000 00000000 00000000
[   17.794668] 5fa0: 00000000 00000000 00000000 c01010b4 00000000 00000000 00000000 00000000
[   17.794675] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   17.794683] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000
[   17.794698] [<c02626bc>] (__free_vmap_area) from [<c0262a9c>] (__purge_vmap_area_lazy+0xcc/0x15c)
[   17.794710] [<c0262a9c>] (__purge_vmap_area_lazy) from [<c0263644>] (_vm_unmap_aliases+0x1a8/0x220)
[   17.794720] [<c0263644>] (_vm_unmap_aliases) from [<c0265efc>] (__vunmap+0x174/0x218)
[   17.794736] [<c0265efc>] (__vunmap) from [<c01468e4>] (process_one_work+0x2d0/0x704)
[   17.794747] [<c01468e4>] (process_one_work) from [<c0147ae0>] (worker_thread+0x34/0x560)
[   17.794759] [<c0147ae0>] (worker_thread) from [<c014d564>] (kthread+0x10c/0x148)
[   17.794770] [<c014d564>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)
[   17.794775] Exception stack(0xe81f5fb0 to 0xe81f5ff8)
[   17.794782] 5fa0:                                     00000000 00000000 00000000 00000000
[   17.794790] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[   17.794797] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000
[   17.794806] Code: e5132010 9a000002 e15c0002 9a000008 (e7f001f2)
[   17.794816] ---[ end trace 478473ef71849173 ]---
[   44.153270] rcu: INFO: rcu_sched self-detected stall on CPU
[   44.158868] rcu:     3-....: (2599 ticks this GP) idle=f42/1/0x40000002 softirq=2523/2523 fqs=1300
[   44.167659]  (t=2600 jiffies g=585 q=509)
[   44.171676] NMI backtrace for cpu 3
[   44.175176] CPU: 3 PID: 1 Comm: systemd Tainted: G      D           5.2.0-rc2-00431-gcaaadc9 #285
[   44.184051] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[   44.190609] [<c0112680>] (unwind_backtrace) from [<c010ceb4>] (show_stack+0x10/0x14)
[   44.198369] [<c010ceb4>] (show_stack) from [<c0bd623c>] (dump_stack+0xd4/0x108)
[   44.205690] [<c0bd623c>] (dump_stack) from [<c0bdd608>] (nmi_cpu_backtrace+0xac/0xbc)
[   44.213534] [<c0bdd608>] (nmi_cpu_backtrace) from [<c0bdd6f8>] (nmi_trigger_cpumask_backtrace+0xe0/0x134)
[   44.223114] [<c0bdd6f8>] (nmi_trigger_cpumask_backtrace) from [<c01a8390>] (rcu_dump_cpu_stacks+0xac/0xf0)
[   44.232780] [<c01a8390>] (rcu_dump_cpu_stacks) from [<c01a77b0>] (rcu_sched_clock_irq+0x7d0/0xa30)
[   44.241752] [<c01a77b0>] (rcu_sched_clock_irq) from [<c01af444>] (update_process_times+0x30/0x5c)
[   44.250638] [<c01af444>] (update_process_times) from [<c01c3e50>] (tick_sched_timer+0x5c/0xc0)
[   44.259263] [<c01c3e50>] (tick_sched_timer) from [<c01b00f8>] (__hrtimer_run_queues+0x170/0x500)
[   44.268058] [<c01b00f8>] (__hrtimer_run_queues) from [<c01b1444>] (hrtimer_interrupt+0x154/0x2d0)
[   44.276941] [<c01b1444>] (hrtimer_interrupt) from [<c01119a4>] (twd_handler+0x2c/0x38)
[   44.284869] [<c01119a4>] (twd_handler) from [<c0192554>] (handle_percpu_devid_irq+0xd4/0x384)
[   44.293405] [<c0192554>] (handle_percpu_devid_irq) from [<c018c28c>] (generic_handle_irq+0x20/0x34)
[   44.302462] [<c018c28c>] (generic_handle_irq) from [<c018c88c>] (__handle_domain_irq+0x64/0xe0)
[   44.311177] [<c018c88c>] (__handle_domain_irq) from [<c04ebb40>] (gic_handle_irq+0x58/0xb8)
[   44.319542] [<c04ebb40>] (gic_handle_irq) from [<c0101a70>] (__irq_svc+0x70/0x98)
[   44.327030] Exception stack(0xe80b3bb0 to 0xe80b3bf8)
[   44.332089] 3ba0:                                     c12300e4 00000000 000003c4 000003c3
[   44.340275] 3bc0: c12300e4 ffffe000 f0800000 00004000 00000000 f0800000 ffffffff 00000001
[   44.348459] 3be0: e80b2000 e80b3c00 c0264c6c c01834b0 800d0013 ffffffff










> -----Original Message-----
> From: Sven Van Asbroeck <thesven73@gmail.com>
> Sent: 2019年6月5日 1:57> 
> Hi Robin,
> 
> On Tue, Jun 4, 2019 at 4:51 AM Robin Gong <yibin.gong@nxp.com> wrote:
> >
> > Hi Sven,
> >         I saw similar kernel crash issue on the latest linux-next
> > during kernel boot up, but not on the 'Linux 5.2-rc1 ' tag. Will do bisect later.
> But v5.0 should be okay, could you help double check?
> 
> Of course. My tests show:
> 
> v5.0 bad
> v5.2-rc1 bad
> v5.2-rc2 bad
> v5.2-rc3 bad
> v4.20 good
> 
> where "bad" means: kernel crashes on boot if sdma firmware is loaded.
> kernel works ok if we use sdma firmware already in rom.
> 
> Are you able to reproduce this issue? I started bisecting last week, but ran out
> of time. I could try again here, if you cannot reliably reproduce it.
Sven Van Asbroeck June 5, 2019, 1:51 p.m. UTC | #22
On Wed, Jun 5, 2019 at 6:08 AM Robin Gong <yibin.gong@nxp.com> wrote:
>
> Hi Sven,
>         I met below crash on v5.2-rc1 and found it's related with commit 728e0fbf263e, after googled,  the latest
> Linux-next should fix the issue, please check https://lkml.org/lkml/2019/6/3/1405.
> After I rebased to the latest linux-next, no such issue but still another crash issue after kernel bootup, I believe that's
> Caused by NFS, because the issue is gone if I mount to mmc rootfs instead of NFS. I don't think it's related with
> Sdma firmware download, but seems your issue is different with mine. Anyway, could you try the latest linux-next?

I tried the latest linux-next
(b2924447b98afa42f13f16b1a4786f0872a2fc37) but the same issue remains:
kernel crashes on boot if sdma firmware is loaded.
kernel boots normally if using sdma firmware in rom.

Crash log:

[    0.000000] Booting Linux on physical CPU 0x0
[    0.000000] Linux version
5.2.0-rc3-next-20190605-00042-g1a0686c7a5d2 (sva@svens-asus) (gcc
version 7.3.1 20180425 [linaro-7.3-2018.05 revision
d29120a424ecfbc167ef90065c0eeb7f91977701] (Linaro GCC 7.3-2018.05)) #1
SMP Wed Jun 5 09:33:56 EDT 2019
<snip>
[    1.337525] imx-sdma 20ec000.sdma: loaded firmware 3.3
[    1.339537] random: fast init done
[    1.346061] imx-pgc-pd imx-pgc-power-domain.0: DMA mask not set
[    1.346557] imx-pgc-pd imx-pgc-power-domain.1: DMA mask not set
[    1.411436] random: crng init done
[    1.455144] pfuze100-regulator 1-0008: unrecognized pfuze chip ID!
[    1.455874] pfuze100-regulator: probe of 1-0008 failed with error -110
[    1.458875] 2020000.serial: ttymxc0 at MMIO 0x2020000 (irq = 27,
base_baud = 5000000) is a IMX
[    2.332541] printk: console [ttymxc0] enabled
[    2.339484] imx-uart 21e8000.serial: has an ltc2870 combined
rs232/rs485 transceiver
[    2.347850] 21e8000.serial: ttymxc1 at MMIO 0x21e8000 (irq = 72,
base_baud = 5000000) is a IMX
[    2.357867] 21ec000.serial: ttymxc2 at MMIO 0x21ec000 (irq = 73,
base_baud = 5000000) is a IMX
[    2.368019] 21f0000.serial: ttymxc3 at MMIO 0x21f0000 (irq = 74,
base_baud = 5000000) is a IMX
[    2.400772] etnaviv etnaviv: bound 130000.gpu (ops gpu_ops)
[    2.407076] etnaviv etnaviv: bound 134000.gpu (ops gpu_ops)
[    2.413221] etnaviv etnaviv: bound 2204000.gpu (ops gpu_ops)
[    2.418977] etnaviv-gpu 130000.gpu: model: GC2000, revision: 5108
[    2.437166] etnaviv-gpu 130000.gpu: command buffer outside valid
memory window
[    2.445459] etnaviv-gpu 134000.gpu: model: GC320, revision: 5007
[    2.463082] etnaviv-gpu 134000.gpu: command buffer outside valid
memory window
[    2.471240] etnaviv-gpu 2204000.gpu: model: GC355, revision: 1215
[    2.477413] etnaviv-gpu 2204000.gpu: Ignoring GPU with VG and FE2.0
[    2.486360] [drm] Initialized etnaviv 1.2.0 20151214 for etnaviv on minor 0
[    2.497949] imx-ipuv3 2400000.ipu: IPUv3H probed
[    2.505316] [drm] Supports vblank timestamp caching Rev 2 (21.10.2013).
[    2.512006] [drm] No driver support for vblank timestamp query.
[    2.519532] imx-drm display-subsystem: bound imx-ipuv3-crtc.2 (ops
ipu_crtc_ops)
[    2.527288] imx-drm display-subsystem: bound imx-ipuv3-crtc.3 (ops
ipu_crtc_ops)
[    2.535064] imx-drm display-subsystem: bound imx-ipuv3-crtc.6 (ops
ipu_crtc_ops)
[    2.542762] imx-drm display-subsystem: bound imx-ipuv3-crtc.7 (ops
ipu_crtc_ops)
[    2.550523] imx-drm display-subsystem: failed to bind 120000.hdmi
(ops dw_hdmi_imx_ops): -517
[    2.560505] imx-drm display-subsystem: master bind failed: -517
[    2.566609] imx-ipuv3 2800000.ipu: IPUv3H probed
[   27.324992] rcu: INFO: rcu_sched self-detected stall on CPU
[   27.330691] rcu: 0-....: (2599 ticks this GP) idle=532/0/0x3
softirq=36/36 fqs=1300
[   27.338654] (t=2601 jiffies g=-1147 q=29)
[   27.342771] NMI backtrace for cpu 0
[   27.346283] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
5.2.0-rc3-next-20190605-00042-g1a0686c7a5d2 #1
[   27.355432] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[   27.361996] [<c01127a0>] (unwind_backtrace) from [<c010cfb4>]
(show_stack+0x10/0x14)
[   27.369769] [<c010cfb4>] (show_stack) from [<c0c75010>]
(dump_stack+0xd8/0x110)
[   27.377106] [<c0c75010>] (dump_stack) from [<c0c7bb68>]
(nmi_cpu_backtrace+0xac/0xbc)
[   27.384959] [<c0c7bb68>] (nmi_cpu_backtrace) from [<c0c7bc58>]
(nmi_trigger_cpumask_backtrace+0xe0/0x134)
[   27.394556] [<c0c7bc58>] (nmi_trigger_cpumask_backtrace) from
[<c01b1850>] (rcu_dump_cpu_stacks+0xa0/0xd8)
[   27.404234] [<c01b1850>] (rcu_dump_cpu_stacks) from [<c01b0aa8>]
(rcu_sched_clock_irq+0x7ec/0xa44)
[   27.413220] [<c01b0aa8>] (rcu_sched_clock_irq) from [<c01b8a64>]
(update_process_times+0x30/0x5c)
[   27.422117] [<c01b8a64>] (update_process_times) from [<c01cd8f4>]
(tick_sched_timer+0x5c/0xc0)
[   27.430751] [<c01cd8f4>] (tick_sched_timer) from [<c01b978c>]
(__hrtimer_run_queues+0x198/0x590)
[   27.439560] [<c01b978c>] (__hrtimer_run_queues) from [<c01bab78>]
(hrtimer_interrupt+0x118/0x2e0)
[   27.448455] [<c01bab78>] (hrtimer_interrupt) from [<c01117c0>]
(twd_handler+0x2c/0x40)
[   27.456399] [<c01117c0>] (twd_handler) from [<c019bbd0>]
(handle_percpu_devid_irq+0xdc/0x36c)
[   27.464953] [<c019bbd0>] (handle_percpu_devid_irq) from
[<c01959ac>] (generic_handle_irq+0x20/0x34)
[   27.474022] [<c01959ac>] (generic_handle_irq) from [<c0195fac>]
(__handle_domain_irq+0x64/0xe0)
[   27.482750] [<c0195fac>] (__handle_domain_irq) from [<c052eb50>]
(gic_handle_irq+0x4c/0xa8)
[   27.491125] [<c052eb50>] (gic_handle_irq) from [<c0101a70>]
(__irq_svc+0x70/0x98)
[   27.498624] Exception stack(0xc1301e30 to 0xc1301e78)
[   27.503696] 1e20:                                     00000001
00000002 00000000 c130c600
[   27.511893] 1e40: ffffe000 00000000 c1308ce0 00000000 00000001
dc018400 00000282 c13f18e4
[   27.520089] 1e60: 00000000 c1301e80 c0189b08 c01022dc 60000113 ffffffff
[   27.526726] [<c0101a70>] (__irq_svc) from [<c01022dc>]
(__do_softirq+0xbc/0x528)
[   27.534144] [<c01022dc>] (__do_softirq) from [<c012f0ac>]
(irq_exit+0x12c/0x180)
[   27.541561] [<c012f0ac>] (irq_exit) from [<c0195fb4>]
(__handle_domain_irq+0x6c/0xe0)
[   27.549413] [<c0195fb4>] (__handle_domain_irq) from [<c052eb50>]
(gic_handle_irq+0x4c/0xa8)
[   27.557785] [<c052eb50>] (gic_handle_irq) from [<c0101a70>]
(__irq_svc+0x70/0x98)
[   27.565282] Exception stack(0xc1301f10 to 0xc1301f58)
[   27.570351] 1f00:                                     00000001
00000006 00000000 c130c600
[   27.578548] 1f20: ffffe000 c1308928 00000001 c1308964 00000000
00000000 c1308908 c1308978
[   27.586743] 1f40: 00000000 c1301f60 c0189b58 c0109490 20000013 ffffffff
[   27.593384] [<c0101a70>] (__irq_svc) from [<c0109490>]
(arch_cpu_idle+0x20/0x3c)
[   27.600806] [<c0109490>] (arch_cpu_idle) from [<c0160ae0>]
(do_idle+0x1b8/0x2c0)
[   27.608224] [<c0160ae0>] (do_idle) from [<c0160f7c>]
(cpu_startup_entry+0x18/0x1c)
[   27.615818] [<c0160f7c>] (cpu_startup_entry) from [<c1200e24>]
(start_kernel+0x410/0x4c0)
[   27.624017] [<c1200e24>] (start_kernel) from [<00000000>] (0x0)
Robin Gong June 6, 2019, 2:32 a.m. UTC | #23
> -----Original Message-----
> From: Sven Van Asbroeck <thesven73@gmail.com>
> Sent: 2019年6月5日 21:52
> On Wed, Jun 5, 2019 at 6:08 AM Robin Gong <yibin.gong@nxp.com> wrote:
> >
> > Hi Sven,
> >         I met below crash on v5.2-rc1 and found it's related with
> > commit 728e0fbf263e, after googled,  the latest Linux-next should fix the
> issue, please check
> https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.or
> g%2Flkml%2F2019%2F6%2F3%2F1405&amp;data=02%7C01%7Cyibin.gong%
> 40nxp.com%7C87e4a5e00f764c03805308d6e9bcf753%7C686ea1d3bc2b4c6f
> a92cd99c5c301635%7C0%7C0%7C636953395138168452&amp;sdata=QZuoi
> %2BICYNF1bV15UZdpzDi0lj9V9uLNUD6FtMK5CXU%3D&amp;reserved=0.
> > After I rebased to the latest linux-next, no such issue but still
> > another crash issue after kernel bootup, I believe that's Caused by
> > NFS, because the issue is gone if I mount to mmc rootfs instead of NFS. I
> don't think it's related with Sdma firmware download, but seems your issue is
> different with mine. Anyway, could you try the latest linux-next?
> 
> I tried the latest linux-next
> (b2924447b98afa42f13f16b1a4786f0872a2fc37) but the same issue remains:
> kernel crashes on boot if sdma firmware is loaded.
> kernel boots normally if using sdma firmware in rom.
So that's another issue that I can't reproduce. How do you switch between sdma ROM/RAM firmware load? Just keep or remove
sdma-imx6q.bin in your ramfs(seems you used)? 
> Crash log:
> 
> [    0.000000] Booting Linux on physical CPU 0x0
> [    0.000000] Linux version
> 5.2.0-rc3-next-20190605-00042-g1a0686c7a5d2 (sva@svens-asus) (gcc
> version 7.3.1 20180425 [linaro-7.3-2018.05 revision
> d29120a424ecfbc167ef90065c0eeb7f91977701] (Linaro GCC 7.3-2018.05)) #1
> SMP Wed Jun 5 09:33:56 EDT 2019 <snip>
> [    1.337525] imx-sdma 20ec000.sdma: loaded firmware 3.3
> [    1.339537] random: fast init done
> [    1.346061] imx-pgc-pd imx-pgc-power-domain.0: DMA mask not set
> [    1.346557] imx-pgc-pd imx-pgc-power-domain.1: DMA mask not set
> [    1.411436] random: crng init done
> [    1.455144] pfuze100-regulator 1-0008: unrecognized pfuze chip ID!
> [    1.455874] pfuze100-regulator: probe of 1-0008 failed with error -110
> [    1.458875] 2020000.serial: ttymxc0 at MMIO 0x2020000 (irq = 27,
> base_baud = 5000000) is a IMX
> [    2.332541] printk: console [ttymxc0] enabled
> [    2.339484] imx-uart 21e8000.serial: has an ltc2870 combined
> rs232/rs485 transceiver
> [    2.347850] 21e8000.serial: ttymxc1 at MMIO 0x21e8000 (irq = 72,
> base_baud = 5000000) is a IMX
> [    2.357867] 21ec000.serial: ttymxc2 at MMIO 0x21ec000 (irq = 73,
> base_baud = 5000000) is a IMX
> [    2.368019] 21f0000.serial: ttymxc3 at MMIO 0x21f0000 (irq = 74,
> base_baud = 5000000) is a IMX
> [    2.400772] etnaviv etnaviv: bound 130000.gpu (ops gpu_ops)
> [    2.407076] etnaviv etnaviv: bound 134000.gpu (ops gpu_ops)
> [    2.413221] etnaviv etnaviv: bound 2204000.gpu (ops gpu_ops)
> [    2.418977] etnaviv-gpu 130000.gpu: model: GC2000, revision: 5108
> [    2.437166] etnaviv-gpu 130000.gpu: command buffer outside valid
> memory window
> [    2.445459] etnaviv-gpu 134000.gpu: model: GC320, revision: 5007
> [    2.463082] etnaviv-gpu 134000.gpu: command buffer outside valid
> memory window
> [    2.471240] etnaviv-gpu 2204000.gpu: model: GC355, revision: 1215
> [    2.477413] etnaviv-gpu 2204000.gpu: Ignoring GPU with VG and FE2.0
> [    2.486360] [drm] Initialized etnaviv 1.2.0 20151214 for etnaviv on minor
> 0
> [    2.497949] imx-ipuv3 2400000.ipu: IPUv3H probed
> [    2.505316] [drm] Supports vblank timestamp caching Rev 2 (21.10.2013).
> [    2.512006] [drm] No driver support for vblank timestamp query.
> [    2.519532] imx-drm display-subsystem: bound imx-ipuv3-crtc.2 (ops
> ipu_crtc_ops)
> [    2.527288] imx-drm display-subsystem: bound imx-ipuv3-crtc.3 (ops
> ipu_crtc_ops)
> [    2.535064] imx-drm display-subsystem: bound imx-ipuv3-crtc.6 (ops
> ipu_crtc_ops)
> [    2.542762] imx-drm display-subsystem: bound imx-ipuv3-crtc.7 (ops
> ipu_crtc_ops)
> [    2.550523] imx-drm display-subsystem: failed to bind 120000.hdmi
> (ops dw_hdmi_imx_ops): -517
> [    2.560505] imx-drm display-subsystem: master bind failed: -517
> [    2.566609] imx-ipuv3 2800000.ipu: IPUv3H probed
> [   27.324992] rcu: INFO: rcu_sched self-detected stall on CPU
> [   27.330691] rcu: 0-....: (2599 ticks this GP) idle=532/0/0x3
> softirq=36/36 fqs=1300
> [   27.338654] (t=2601 jiffies g=-1147 q=29)
> [   27.342771] NMI backtrace for cpu 0
> [   27.346283] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
> 5.2.0-rc3-next-20190605-00042-g1a0686c7a5d2 #1
> [   27.355432] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> [   27.361996] [<c01127a0>] (unwind_backtrace) from [<c010cfb4>]
> (show_stack+0x10/0x14)
> [   27.369769] [<c010cfb4>] (show_stack) from [<c0c75010>]
> (dump_stack+0xd8/0x110)
> [   27.377106] [<c0c75010>] (dump_stack) from [<c0c7bb68>]
> (nmi_cpu_backtrace+0xac/0xbc)
> [   27.384959] [<c0c7bb68>] (nmi_cpu_backtrace) from [<c0c7bc58>]
> (nmi_trigger_cpumask_backtrace+0xe0/0x134)
> [   27.394556] [<c0c7bc58>] (nmi_trigger_cpumask_backtrace) from
> [<c01b1850>] (rcu_dump_cpu_stacks+0xa0/0xd8)
> [   27.404234] [<c01b1850>] (rcu_dump_cpu_stacks) from [<c01b0aa8>]
> (rcu_sched_clock_irq+0x7ec/0xa44)
> [   27.413220] [<c01b0aa8>] (rcu_sched_clock_irq) from [<c01b8a64>]
> (update_process_times+0x30/0x5c)
> [   27.422117] [<c01b8a64>] (update_process_times) from [<c01cd8f4>]
> (tick_sched_timer+0x5c/0xc0)
> [   27.430751] [<c01cd8f4>] (tick_sched_timer) from [<c01b978c>]
> (__hrtimer_run_queues+0x198/0x590)
> [   27.439560] [<c01b978c>] (__hrtimer_run_queues) from [<c01bab78>]
> (hrtimer_interrupt+0x118/0x2e0)
> [   27.448455] [<c01bab78>] (hrtimer_interrupt) from [<c01117c0>]
> (twd_handler+0x2c/0x40)
> [   27.456399] [<c01117c0>] (twd_handler) from [<c019bbd0>]
> (handle_percpu_devid_irq+0xdc/0x36c)
> [   27.464953] [<c019bbd0>] (handle_percpu_devid_irq) from
> [<c01959ac>] (generic_handle_irq+0x20/0x34)
> [   27.474022] [<c01959ac>] (generic_handle_irq) from [<c0195fac>]
> (__handle_domain_irq+0x64/0xe0)
> [   27.482750] [<c0195fac>] (__handle_domain_irq) from [<c052eb50>]
> (gic_handle_irq+0x4c/0xa8)
> [   27.491125] [<c052eb50>] (gic_handle_irq) from [<c0101a70>]
> (__irq_svc+0x70/0x98)
> [   27.498624] Exception stack(0xc1301e30 to 0xc1301e78)
> [   27.503696] 1e20:                                     00000001
> 00000002 00000000 c130c600
> [   27.511893] 1e40: ffffe000 00000000 c1308ce0 00000000 00000001
> dc018400 00000282 c13f18e4
> [   27.520089] 1e60: 00000000 c1301e80 c0189b08 c01022dc 60000113
> ffffffff
> [   27.526726] [<c0101a70>] (__irq_svc) from [<c01022dc>]
> (__do_softirq+0xbc/0x528)
> [   27.534144] [<c01022dc>] (__do_softirq) from [<c012f0ac>]
> (irq_exit+0x12c/0x180)
> [   27.541561] [<c012f0ac>] (irq_exit) from [<c0195fb4>]
> (__handle_domain_irq+0x6c/0xe0)
> [   27.549413] [<c0195fb4>] (__handle_domain_irq) from [<c052eb50>]
> (gic_handle_irq+0x4c/0xa8)
> [   27.557785] [<c052eb50>] (gic_handle_irq) from [<c0101a70>]
> (__irq_svc+0x70/0x98)
> [   27.565282] Exception stack(0xc1301f10 to 0xc1301f58)
> [   27.570351] 1f00:                                     00000001
> 00000006 00000000 c130c600
> [   27.578548] 1f20: ffffe000 c1308928 00000001 c1308964 00000000
> 00000000 c1308908 c1308978
> [   27.586743] 1f40: 00000000 c1301f60 c0189b58 c0109490 20000013
> ffffffff
> [   27.593384] [<c0101a70>] (__irq_svc) from [<c0109490>]
> (arch_cpu_idle+0x20/0x3c)
> [   27.600806] [<c0109490>] (arch_cpu_idle) from [<c0160ae0>]
> (do_idle+0x1b8/0x2c0)
> [   27.608224] [<c0160ae0>] (do_idle) from [<c0160f7c>]
> (cpu_startup_entry+0x18/0x1c)
> [   27.615818] [<c0160f7c>] (cpu_startup_entry) from [<c1200e24>]
> (start_kernel+0x410/0x4c0)
> [   27.624017] [<c1200e24>] (start_kernel) from [<00000000>] (0x0)
Sven Van Asbroeck June 6, 2019, 1:34 p.m. UTC | #24
On Wed, Jun 5, 2019 at 10:32 PM Robin Gong <yibin.gong@nxp.com> wrote:
>
> So that's another issue that I can't reproduce. How do you switch between sdma ROM/RAM firmware load? Just keep or remove
> sdma-imx6q.bin in your ramfs(seems you used)?

I add the sdma firmware to the kernel image, by adding the following to the
defconfig:

+CONFIG_EXTRA_FIRMWARE="imx/sdma/sdma-imx6q.bin"
+CONFIG_EXTRA_FIRMWARE_DIR="firmware/"

If you are unable to reproduce this, I will have to do the bisect myself
to find the bad commit. Please bear with me, this is very time consuming.
Robin Gong June 10, 2019, 9:09 a.m. UTC | #25
> -----Original Message-----
> From: Sven Van Asbroeck <thesven73@gmail.com>
> Sent: 2019年6月6日 21:35
> On Wed, Jun 5, 2019 at 10:32 PM Robin Gong <yibin.gong@nxp.com> wrote:
> >
> > So that's another issue that I can't reproduce. How do you switch
> > between sdma ROM/RAM firmware load? Just keep or remove
> sdma-imx6q.bin in your ramfs(seems you used)?
> 
> I add the sdma firmware to the kernel image, by adding the following to the
> defconfig:
> 
> +CONFIG_EXTRA_FIRMWARE="imx/sdma/sdma-imx6q.bin"
> +CONFIG_EXTRA_FIRMWARE_DIR="firmware/"
> 
> If you are unable to reproduce this, I will have to do the bisect myself to find
> the bad commit. Please bear with me, this is very time consuming.
I can reproduce once enable your config to build firmware in kernel, but no such issue
if load sdma firmware from rootfs as imx_v6_v7_defconfig. Maybe firmware built in function
broken by some patches. Could you try with the default firmware loading way which is from
rootfs(/lib/firmware/imx/sdma/sdma-imx6q.bin)? 

22.873543] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [CRTC:34:crtc-0] flip_done timed out
[   26.983447] rcu: INFO: rcu_sched self-detected stall on CPU
[   26.983505] rcu:     0-....: (1 GPs behind) idle=75e/0/0x3 softirq=38/39 fqs=1220
[   26.983579]  (t=2600 jiffies g=-1143 q=27)
[   26.983590] NMI backtrace for cpu 0
[   26.983605] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.2.0-rc3-next-20190607-00015-g6d2f337-dirty #316
[   26.983612] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[   26.983640] [<c0112640>] (unwind_backtrace) from [<c010ce94>] (show_stack+0x10/0x14)
[   26.983665] [<c010ce94>] (show_stack) from [<c0bdf780>] (dump_stack+0xd4/0x108)
[   26.983686] [<c0bdf780>] (dump_stack) from [<c0be6bd8>] (nmi_cpu_backtrace+0xac/0xbc)
[   26.983703] [<c0be6bd8>] (nmi_cpu_backtrace) from [<c0be6cc8>] (nmi_trigger_cpumask_backtrace+0xe0/0x134)
[   26.983723] [<c0be6cc8>] (nmi_trigger_cpumask_backtrace) from [<c01a865c>] (rcu_dump_cpu_stacks+0xac/0xf0)
[   26.983740] [<c01a865c>] (rcu_dump_cpu_stacks) from [<c01a7a7c>] (rcu_sched_clock_irq+0x7d0/0xa30)
[   26.983757] [<c01a7a7c>] (rcu_sched_clock_irq) from [<c01af6f4>] (update_process_times+0x30/0x5c)
[   26.983777] [<c01af6f4>] (update_process_times) from [<c01c4048>] (tick_sched_timer+0x5c/0xc0)
[   26.983792] [<c01c4048>] (tick_sched_timer) from [<c01b03b0>] (__hrtimer_run_queues+0x170/0x500)
[   26.983807] [<c01b03b0>] (__hrtimer_run_queues) from [<c01b16fc>] (hrtimer_interrupt+0x154/0x2d0)
[   26.983824] [<c01b16fc>] (hrtimer_interrupt) from [<c0111964>] (twd_handler+0x2c/0x38)
[   26.983840] [<c0111964>] (twd_handler) from [<c0192834>] (handle_percpu_devid_irq+0xd4/0x384)
[   26.983857] [<c0192834>] (handle_percpu_devid_irq) from [<c018c570>] (generic_handle_irq+0x20/0x34)
[   26.983873] [<c018c570>] (generic_handle_irq) from [<c018cb70>] (__handle_domain_irq+0x64/0xe0)
[   26.983894] [<c018cb70>] (__handle_domain_irq) from [<c04eb510>] (gic_handle_irq+0x58/0xb8)
[   26.983911] [<c04eb510>] (gic_handle_irq) from [<c0101a70>] (__irq_svc+0x70/0x98)
[   26.983921] Exception stack(0xc1301e10 to 0xc1301e58)
[   26.983932] 1e00:                                     00000001 00000002 00000000 c130c340
[   26.983945] 1e20: c12b7480 ffffe000 c1308cd0 00000282 c13e4664 e8018400 f4000100 c1308f2c
[   26.983956] 1e40: 00000000 c1301e60 c0180540 c01022f8 60000113 ffffffff
[   26.983972] [<c0101a70>] (__irq_svc) from [<c01022f8>] (__do_softirq+0xd8/0x4f8)
[   26.983987] [<c01022f8>] (__do_softirq) from [<c012ea00>] (irq_exit+0x138/0x18c)
[   26.984001] [<c012ea00>] (irq_exit) from [<c018cb78>] (__handle_domain_irq+0x6c/0xe0)
[   26.984016] [<c018cb78>] (__handle_domain_irq) from [<c04eb510>] (gic_handle_irq+0x58/0xb8)
[   26.984030] [<c04eb510>] (gic_handle_irq) from [<c0101a70>] (__irq_svc+0x70/0x98)
[   26.984037] Exception stack(0xc1301f10 to 0xc1301f58)
[   26.984047] 1f00:                                     00000001 00000006 00000000 c130c340
[   26.984060] 1f20: c1300000 c1308928 00000001 c1308960 00000000 c12b6cf0 c1308908 00000000
[   26.984071] 1f40: 00000000 c1301f60 c0180590 c0109368 20000013 ffffffff
[   26.984089] [<c0101a70>] (__irq_svc) from [<c0109368>] (arch_cpu_idle+0x20/0x3c)
[   26.984107] [<c0109368>] (arch_cpu_idle) from [<c015e294>] (do_idle+0x1b4/0x2b8)
[   26.984121] [<c015e294>] (do_idle) from [<c015e72c>] (cpu_startup_entry+0x18/0x1c)
[   26.984139] [<c015e72c>] (cpu_startup_entry) from [<c1200e28>] (start_kernel+0x3fc/0x4a8)
[   26.984155] [<c1200e28>] (start_kernel) from [<00000000>] (0x0)
[   33.113520] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [CONNECTOR:56:LVDS-1] flip_done timed out
[   43.353519] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [PLANE:31:plane-0] flip_done timed out
[   53.603519] [drm:drm_atomic_helper_wait_for_flip_done] *ERROR* [CRTC:34:crtc-0] flip_done timed out
[   53.608869] Console: switching to colour frame buffer device 128x48
[   63.833523] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [CRTC:34:crtc-0] flip_done timed out
[   74.073519] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [CONNECTOR:56:LVDS-1] flip_done timed out
[   84.313521] [drm:drm_atomic_helper_wait_for_dependencies] *ERROR* [PLANE:31:plane-0] flip_done timed out
[   94.553520] [drm:drm_atomic_helper_wait_for_flip_done] *ERROR* [CRTC:34:crtc-0] flip_done timed out
[   94.954763] imx-drm display-subsystem: fb0: imx-drmdrmfb frame buffer device
[  105.013446] rcu: INFO: rcu_sched self-detected stall on CPU
[  105.019120] rcu:     0-....: (1 GPs behind) idle=75e/0/0x3 softirq=38/39 fqs=4886
[  105.026515]  (t=10403 jiffies g=-1143 q=27)
Sven Van Asbroeck June 10, 2019, 3:46 p.m. UTC | #26
On Mon, Jun 10, 2019 at 5:09 AM Robin Gong <yibin.gong@nxp.com> wrote:
>
> I can reproduce once enable your config to build firmware in kernel, but no such issue
> if load sdma firmware from rootfs as imx_v6_v7_defconfig. Maybe firmware built in function
> broken by some patches. Could you try with the default firmware loading way which is from
> rootfs(/lib/firmware/imx/sdma/sdma-imx6q.bin)?

This is it ! If I add the firmware to the kernel directly, I see the crash.
But if I use the firmware fall-back mechanism, there is no crash.
And if I build imx-sdma as a module, and insmod it later, there is
also no crash.

I patched imx-sdma so it logs the adler32 checksum of the firmware it's
loading (I tried using the kernel crypto API, but it doesn't work this early
in the boot). I notice that the firmware is always the same, crash or no
crash:

firmware in-kernel (crash):
[    1.370424] imx-sdma 20ec000.sdma: firmware hash: 69BC0F09
firmware fallback (no crash):
[    6.466394] imx-sdma 20ec000.sdma: firmware hash: 69BC0F09

My guess: this could be a timing issue. If the sdma driver loads 'too early',
the boot crash will happen. Maybe the driver needs to check for a
missing dependency on boot, and -EPROBE_DEFER ?

Robin, should I make a bug report? If so, who do I send this to?

Code used to print the firmware hash on imx-sdma:

#define MOD_ADLER  65521

static u32 adler32(const unsigned char *data, size_t len)
{
    u32 a = 1, b = 0;
    size_t index;

    for (index = 0; index < len; ++index)
    {
        a = (a + data[index]) % MOD_ADLER;
        b = (b + a) % MOD_ADLER;
    }

    return (b << 16) | a;
}

static void log_fw_hash(struct device *dev, const struct firmware *fw)
{
        dev_info(dev, "firmware hash: %08X", adler32(fw->data, fw->size));
}

static void sdma_load_firmware(const struct firmware *fw, void *context)
{
        struct sdma_engine *sdma = context;
        const struct sdma_firmware_header *header;
        const struct sdma_script_start_addrs *addr;
        unsigned short *ram_code;

        if (!fw) {
                dev_info(sdma->dev, "external firmware not found,
using ROM firmware\n");
                /* In this case we just use the ROM firmware. */
                return;
        }
        log_fw_hash(sdma->dev, fw);
Robin Gong June 11, 2019, 9:09 a.m. UTC | #27
> -----Original Message-----
> From: Sven Van Asbroeck <thesven73@gmail.com>
> Sent: 2019年6月10日 23:47
> To: Robin Gong <yibin.gong@nxp.com>
> Subject: Re: [v8] media: imx: add mem2mem device
> On Mon, Jun 10, 2019 at 5:09 AM Robin Gong <yibin.gong@nxp.com> wrote:
> >
> > I can reproduce once enable your config to build firmware in kernel,
> > but no such issue if load sdma firmware from rootfs as
> > imx_v6_v7_defconfig. Maybe firmware built in function broken by some
> > patches. Could you try with the default firmware loading way which is from
> rootfs(/lib/firmware/imx/sdma/sdma-imx6q.bin)?
> 
> This is it ! If I add the firmware to the kernel directly, I see the crash.
> But if I use the firmware fall-back mechanism, there is no crash.
> And if I build imx-sdma as a module, and insmod it later, there is also no crash.
> 
> I patched imx-sdma so it logs the adler32 checksum of the firmware it's
> loading (I tried using the kernel crypto API, but it doesn't work this early in the
> boot). I notice that the firmware is always the same, crash or no
> crash:
> 
> firmware in-kernel (crash):
> [    1.370424] imx-sdma 20ec000.sdma: firmware hash: 69BC0F09
> firmware fallback (no crash):
> [    6.466394] imx-sdma 20ec000.sdma: firmware hash: 69BC0F09
> 
> My guess: this could be a timing issue. If the sdma driver loads 'too early', the
> boot crash will happen. Maybe the driver needs to check for a missing
> dependency on boot, and -EPROBE_DEFER ?
> 
> Robin, should I make a bug report? If so, who do I send this to?
> 
Sven, no any dependency from sdma driver view. The only difference between directly loading firmware
from kernel and rootfs is the former spend more time during kernel boot and such timing may cause
the crash. The issue is not 100% in my side, about 20% possibility, which looks like 'timing issue' . Another
interesting thing is that every time the crash stop at somewhere drm, and After I disable ipu and display
which use drm in i.mx6q.dtsi, the issue is gone on my i.mx6q-sabreauto board.
Could you have a try with below patch as mine? If the issue is gone on your side, we could involve drm guys to
look into it.

diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi
index 7175898..5b21b3f 100644
--- a/arch/arm/boot/dts/imx6q.dtsi
+++ b/arch/arm/boot/dts/imx6q.dtsi
@@ -217,6 +217,7 @@
                                 <&clks IMX6QDL_CLK_IPU2_DI1>;
                        clock-names = "bus", "di0", "di1";
                        resets = <&src 4>;
+                       status = "disabled";

                        ipu2_csi0: port@0 {
                                reg = <0>;
@@ -300,6 +301,7 @@
        display-subsystem {
                compatible = "fsl,imx-display-subsystem";
                ports = <&ipu1_di0>, <&ipu1_di1>, <&ipu2_di0>, <&ipu2_di1>;
+               status = "disabled";
        };
 };
Robin Gong June 11, 2019, 9:17 a.m. UTC | #28
Sorry, forgot disable ipu1:

diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index b352ea2..b884490 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -1291,7 +1291,7 @@
                                 <&clks IMX6QDL_CLK_IPU1_DI1>;
                        clock-names = "bus", "di0", "di1";
                        resets = <&src 2>;
-
+                       status = "disabled";
                        ipu1_csi0: port@0 {
                                reg = <0>;


> -----Original Message-----
> From: Robin Gong
> Sent: 2019年6月11日 17:10
> To: 'Sven Van Asbroeck' <thesven73@gmail.com>
> Cc: Fabio Estevam <festevam@gmail.com>; linux-media
> <linux-media@vger.kernel.org>
> Subject: RE: [v8] media: imx: add mem2mem device
> 
> > -----Original Message-----
> > From: Sven Van Asbroeck <thesven73@gmail.com>
> > Sent: 2019年6月10日 23:47
> > To: Robin Gong <yibin.gong@nxp.com>
> > Subject: Re: [v8] media: imx: add mem2mem device On Mon, Jun 10, 2019
> > at 5:09 AM Robin Gong <yibin.gong@nxp.com> wrote:
> > >
> > > I can reproduce once enable your config to build firmware in kernel,
> > > but no such issue if load sdma firmware from rootfs as
> > > imx_v6_v7_defconfig. Maybe firmware built in function broken by some
> > > patches. Could you try with the default firmware loading way which
> > > is from
> > rootfs(/lib/firmware/imx/sdma/sdma-imx6q.bin)?
> >
> > This is it ! If I add the firmware to the kernel directly, I see the crash.
> > But if I use the firmware fall-back mechanism, there is no crash.
> > And if I build imx-sdma as a module, and insmod it later, there is also no
> crash.
> >
> > I patched imx-sdma so it logs the adler32 checksum of the firmware
> > it's loading (I tried using the kernel crypto API, but it doesn't work
> > this early in the boot). I notice that the firmware is always the
> > same, crash or no
> > crash:
> >
> > firmware in-kernel (crash):
> > [    1.370424] imx-sdma 20ec000.sdma: firmware hash: 69BC0F09
> > firmware fallback (no crash):
> > [    6.466394] imx-sdma 20ec000.sdma: firmware hash: 69BC0F09
> >
> > My guess: this could be a timing issue. If the sdma driver loads 'too
> > early', the boot crash will happen. Maybe the driver needs to check
> > for a missing dependency on boot, and -EPROBE_DEFER ?
> >
> > Robin, should I make a bug report? If so, who do I send this to?
> >
> Sven, no any dependency from sdma driver view. The only difference between
> directly loading firmware from kernel and rootfs is the former spend more time
> during kernel boot and such timing may cause the crash. The issue is not 100%
> in my side, about 20% possibility, which looks like 'timing issue' . Another
> interesting thing is that every time the crash stop at somewhere drm, and After
> I disable ipu and display which use drm in i.mx6q.dtsi, the issue is gone on my
> i.mx6q-sabreauto board.
> Could you have a try with below patch as mine? If the issue is gone on your
> side, we could involve drm guys to look into it.
> 
> diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi
> index 7175898..5b21b3f 100644
> --- a/arch/arm/boot/dts/imx6q.dtsi
> +++ b/arch/arm/boot/dts/imx6q.dtsi
> @@ -217,6 +217,7 @@
>                                  <&clks IMX6QDL_CLK_IPU2_DI1>;
>                         clock-names = "bus", "di0", "di1";
>                         resets = <&src 4>;
> +                       status = "disabled";
> 
>                         ipu2_csi0: port@0 {
>                                 reg = <0>; @@ -300,6 +301,7 @@
>         display-subsystem {
>                 compatible = "fsl,imx-display-subsystem";
>                 ports = <&ipu1_di0>, <&ipu1_di1>, <&ipu2_di0>,
> <&ipu2_di1>;
> +               status = "disabled";
>         };
>  };
Sven Van Asbroeck June 11, 2019, 5:30 p.m. UTC | #29
On Tue, Jun 11, 2019 at 5:09 AM Robin Gong <yibin.gong@nxp.com> wrote:
>
> Sven, no any dependency from sdma driver view. The only difference between directly loading firmware
> from kernel and rootfs is the former spend more time during kernel boot and such timing may cause
> the crash. The issue is not 100% in my side, about 20% possibility, which looks like 'timing issue' . Another
> interesting thing is that every time the crash stop at somewhere drm, and After I disable ipu and display
> which use drm in i.mx6q.dtsi, the issue is gone on my i.mx6q-sabreauto board.
> Could you have a try with below patch as mine? If the issue is gone on your side, we could involve drm guys to
> look into it.

When I apply your patch to ipu and display, the crash still happens on
my device.
But when I disable NFSv4 network filesystem in defconfig, the crash disappears.
Yet on linux-next, the crash is there again, even if I disable the IPU or NFSv4.

My guess: we are chasing ghosts, the crashes are purely timing related. Things
like disabling the IPU or NFSv4 change boot timing, and this changes the crash.

Experiment: If I put msleep(1000) right before the sdma_load_script() call, then
the crash never happens. And if I comment out the call to sdma_run_channel0()
in sdma_load_script(), then the crash also does not happen.

This suggests that the crash is related to the exact timing when
sdma_run_channel0() is called. If it is called too early, this results
in an 'interrupt storm' on the sdma interrupt handler: it gets called
millions of times in a very short amount of time.

By adding debug prints, I noticed that the sdma core calls back
sdma_alloc_chan_resources(), later during the boot, when a spi
bus is created.

Experiment: I paused firmware upload until the first time
sdma_alloc_chan_resources() is called by the core.
I used a struct completion to accomplish this.

Result: the crash never happens again.

All this suggests very strongly that sdma_run_channel0() is called
"too early" by the driver. I don't known enough of imx-sdma to
know what is missing during the early call.

Here is the patch to delay firmware load until the first
sdma_alloc_chan_resources() has completed:

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 99d9f431ae2c..ddeded5c3337 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -33,6 +33,7 @@
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
 #include <linux/workqueue.h>
+#include <linux/completion.h>

 #include <asm/irq.h>
 #include <linux/platform_data/dma-imx-sdma.h>
@@ -444,6 +445,7 @@ struct sdma_engine {
        struct sdma_buffer_descriptor   *bd0;
        /* clock ratio for AHB:SDMA core. 1:1 is 1, 2:1 is 0*/
        bool                            clk_ratio;
+       struct completion               chan_resources_alloced;
 };

 static int sdma_config_write(struct dma_chan *chan,
@@ -1258,6 +1260,7 @@ static void sdma_desc_free(struct virt_dma_desc *vd)
 static int sdma_alloc_chan_resources(struct dma_chan *chan)
 {
        struct sdma_channel *sdmac = to_sdma_chan(chan);
+       struct sdma_engine *sdma = sdmac->sdma;
        struct imx_dma_data *data = chan->private;
        struct imx_dma_data mem_data;
        int prio, ret;
@@ -1310,6 +1313,7 @@ static int sdma_alloc_chan_resources(struct
dma_chan *chan)
        if (ret)
                goto disable_clk_ahb;

+       complete(&sdma->chan_resources_alloced);
        return 0;

 disable_clk_ahb:
@@ -1724,6 +1728,7 @@ static void sdma_load_firmware(const struct
firmware *fw, void *context)
                /* In this case we just use the ROM firmware. */
                return;
        }
+       wait_for_completion(&sdma->chan_resources_alloced);

        if (fw->size < sizeof(*header))
                goto err_firmware;
@@ -2012,6 +2017,7 @@ static int sdma_probe(struct platform_device *pdev)
                return -ENOMEM;

        spin_lock_init(&sdma->channel_0_lock);
+       init_completion(&sdma->chan_resources_alloced);

        sdma->dev = &pdev->dev;
        sdma->drvdata = drvdata;
Steve Longerbeam June 12, 2019, 1:08 a.m. UTC | #30
Hi Philipp,

Version 9 will also need to fix merge conflicts due to the recent module 
re-org and the switch to sync registration for the IPU internal subdevs.

I've done that work already, feel free to cherry-pick it from my github 
fork if you agree with the merge fixes:

git@github.com:slongerbeam/mediatree.git, branch imx/mem2mem.v8.

Btw, some bugs have been found and fixed in ipu-image-convert.c. I will 
be posting a patch-set shortly. You can review branch imx/bgthree-2136 
in my fork for the changes.

Steve


On 4/18/19 9:44 AM, Philipp Zabel wrote:
> Add a single imx-media mem2mem video device that uses the IPU IC PP
> (image converter post processing) task for scaling and colorspace
> conversion.
> On i.MX6Q/DL SoCs with two IPUs currently only the first IPU is used.
>
> The hardware only supports writing to destination buffers up to
> 1024x1024 pixels in a single pass, arbitrary sizes can be achieved
> by rendering multiple tiles per frame.
>
> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
> [slongerbeam@gmail.com: use ipu_image_convert_adjust(), fix
>   device_run() error handling, add missing media-device header,
>   unregister and remove the mem2mem device in error paths in
>   imx_media_probe_complete() and in imx_media_remove()]
> Signed-off-by: Steve Longerbeam <slongerbeam@gmail.com>
> ---
> Changes since v7 [1]:
>   - Change capture format to keep aspect ration when changing rotation.
>   - Adjust output and update formats to alignment requirements, if
>     necessary when changing rotation or flip modes.
>   - Disallow changing rotation or flip modes if a format change is
>     required on a busy queue.
>   - Add sequence counting.
>   - Use v4l2_m2m_buf_copy_metadata.
>   - Disallow interlaced field modes.
>   - Rename video device to "ipu_ic_pp csc/scaler".
>
> [1] https://patchwork.linuxtv.org/patch/53968/
> ---
>   drivers/staging/media/imx/Kconfig             |   1 +
>   drivers/staging/media/imx/Makefile            |   1 +
>   .../staging/media/imx/imx-media-csc-scaler.c  | 917 ++++++++++++++++++
>   drivers/staging/media/imx/imx-media-dev.c     |  34 +-
>   drivers/staging/media/imx/imx-media.h         |  10 +
>   5 files changed, 959 insertions(+), 4 deletions(-)
>   create mode 100644 drivers/staging/media/imx/imx-media-csc-scaler.c
>
> diff --git a/drivers/staging/media/imx/Kconfig b/drivers/staging/media/imx/Kconfig
> index f6d220b649fb..9bf14eb2154b 100644
> --- a/drivers/staging/media/imx/Kconfig
> +++ b/drivers/staging/media/imx/Kconfig
> @@ -6,6 +6,7 @@ config VIDEO_IMX_MEDIA
>   	depends on HAS_DMA
>   	select VIDEOBUF2_DMA_CONTIG
>   	select V4L2_FWNODE
> +	select V4L2_MEM2MEM_DEV
>   	help
>   	  Say yes here to enable support for video4linux media controller
>   	  driver for the i.MX5/6 SOC.
> diff --git a/drivers/staging/media/imx/Makefile b/drivers/staging/media/imx/Makefile
> index d2d909a36239..0c86723f1763 100644
> --- a/drivers/staging/media/imx/Makefile
> +++ b/drivers/staging/media/imx/Makefile
> @@ -7,6 +7,7 @@ imx-media-ic-objs := imx-ic-common.o imx-ic-prp.o imx-ic-prpencvf.o
>   obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media.o
>   obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-common.o
>   obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-capture.o
> +obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-csc-scaler.o
>   obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-vdic.o
>   obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-ic.o
>   
> diff --git a/drivers/staging/media/imx/imx-media-csc-scaler.c b/drivers/staging/media/imx/imx-media-csc-scaler.c
> new file mode 100644
> index 000000000000..4a0ecdfe38e6
> --- /dev/null
> +++ b/drivers/staging/media/imx/imx-media-csc-scaler.c
> @@ -0,0 +1,917 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * i.MX IPUv3 IC PP mem2mem CSC/Scaler driver
> + *
> + * Copyright (C) 2011 Pengutronix, Sascha Hauer
> + * Copyright (C) 2018 Pengutronix, Philipp Zabel
> + */
> +#include <linux/module.h>
> +#include <linux/delay.h>
> +#include <linux/fs.h>
> +#include <linux/version.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <video/imx-ipu-v3.h>
> +#include <video/imx-ipu-image-convert.h>
> +
> +#include <media/media-device.h>
> +#include <media/v4l2-ctrls.h>
> +#include <media/v4l2-event.h>
> +#include <media/v4l2-mem2mem.h>
> +#include <media/v4l2-device.h>
> +#include <media/v4l2-ioctl.h>
> +#include <media/videobuf2-dma-contig.h>
> +
> +#include "imx-media.h"
> +
> +#define fh_to_ctx(__fh)	container_of(__fh, struct ipu_csc_scaler_ctx, fh)
> +
> +enum {
> +	V4L2_M2M_SRC = 0,
> +	V4L2_M2M_DST = 1,
> +};
> +
> +struct ipu_csc_scaler_priv {
> +	struct imx_media_video_dev	vdev;
> +
> +	struct v4l2_m2m_dev		*m2m_dev;
> +	struct device			*dev;
> +
> +	struct imx_media_dev		*md;
> +
> +	struct mutex			mutex;	/* mem2mem device mutex */
> +};
> +
> +#define vdev_to_priv(v) container_of(v, struct ipu_csc_scaler_priv, vdev)
> +
> +/* Per-queue, driver-specific private data */
> +struct ipu_csc_scaler_q_data {
> +	struct v4l2_pix_format		cur_fmt;
> +	struct v4l2_rect		rect;
> +};
> +
> +struct ipu_csc_scaler_ctx {
> +	struct ipu_csc_scaler_priv	*priv;
> +
> +	struct v4l2_fh			fh;
> +	struct ipu_csc_scaler_q_data	q_data[2];
> +	struct ipu_image_convert_ctx	*icc;
> +
> +	struct v4l2_ctrl_handler	ctrl_hdlr;
> +	int				rotate;
> +	bool				hflip;
> +	bool				vflip;
> +	enum ipu_rotate_mode		rot_mode;
> +	unsigned int			sequence;
> +};
> +
> +static struct ipu_csc_scaler_q_data *get_q_data(struct ipu_csc_scaler_ctx *ctx,
> +						enum v4l2_buf_type type)
> +{
> +	if (V4L2_TYPE_IS_OUTPUT(type))
> +		return &ctx->q_data[V4L2_M2M_SRC];
> +	else
> +		return &ctx->q_data[V4L2_M2M_DST];
> +}
> +
> +/*
> + * mem2mem callbacks
> + */
> +
> +static void job_abort(void *_ctx)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = _ctx;
> +
> +	if (ctx->icc)
> +		ipu_image_convert_abort(ctx->icc);
> +}
> +
> +static void ipu_ic_pp_complete(struct ipu_image_convert_run *run, void *_ctx)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = _ctx;
> +	struct ipu_csc_scaler_priv *priv = ctx->priv;
> +	struct vb2_v4l2_buffer *src_buf, *dst_buf;
> +
> +	src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> +	dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> +
> +	v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, true);
> +
> +	src_buf->sequence = dst_buf->sequence = ctx->sequence++;
> +
> +	v4l2_m2m_buf_done(src_buf, run->status ? VB2_BUF_STATE_ERROR :
> +						 VB2_BUF_STATE_DONE);
> +	v4l2_m2m_buf_done(dst_buf, run->status ? VB2_BUF_STATE_ERROR :
> +						 VB2_BUF_STATE_DONE);
> +
> +	v4l2_m2m_job_finish(priv->m2m_dev, ctx->fh.m2m_ctx);
> +	kfree(run);
> +}
> +
> +static void device_run(void *_ctx)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = _ctx;
> +	struct ipu_csc_scaler_priv *priv = ctx->priv;
> +	struct vb2_v4l2_buffer *src_buf, *dst_buf;
> +	struct ipu_image_convert_run *run;
> +	int ret;
> +
> +	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> +	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
> +
> +	run = kzalloc(sizeof(*run), GFP_KERNEL);
> +	if (!run)
> +		goto err;
> +
> +	run->ctx = ctx->icc;
> +	run->in_phys = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
> +	run->out_phys = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> +
> +	ret = ipu_image_convert_queue(run);
> +	if (ret < 0) {
> +		v4l2_err(ctx->priv->vdev.vfd->v4l2_dev,
> +			 "%s: failed to queue: %d\n", __func__, ret);
> +		goto err;
> +	}
> +
> +	return;
> +
> +err:
> +	v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> +	v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> +	v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
> +	v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR);
> +	v4l2_m2m_job_finish(priv->m2m_dev, ctx->fh.m2m_ctx);
> +}
> +
> +/*
> + * Video ioctls
> + */
> +static int ipu_csc_scaler_querycap(struct file *file, void *priv,
> +				   struct v4l2_capability *cap)
> +{
> +	strscpy(cap->driver, "imx-media-csc-scaler", sizeof(cap->driver));
> +	strscpy(cap->card, "imx-media-csc-scaler", sizeof(cap->card));
> +	strscpy(cap->bus_info, "platform:imx-media-csc-scaler",
> +		sizeof(cap->bus_info));
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_enum_fmt(struct file *file, void *fh,
> +				   struct v4l2_fmtdesc *f)
> +{
> +	u32 fourcc;
> +	int ret;
> +
> +	ret = imx_media_enum_format(&fourcc, f->index, CS_SEL_ANY);
> +	if (ret)
> +		return ret;
> +
> +	f->pixelformat = fourcc;
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_g_fmt(struct file *file, void *priv,
> +				struct v4l2_format *f)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
> +	struct ipu_csc_scaler_q_data *q_data;
> +
> +	q_data = get_q_data(ctx, f->type);
> +
> +	f->fmt.pix = q_data->cur_fmt;
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_try_fmt(struct file *file, void *priv,
> +				  struct v4l2_format *f)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
> +	struct ipu_csc_scaler_q_data *q_data = get_q_data(ctx, f->type);
> +	struct ipu_image test_in, test_out;
> +	enum v4l2_field field;
> +
> +	field = f->fmt.pix.field;
> +	if (field == V4L2_FIELD_ANY)
> +		field = V4L2_FIELD_NONE;
> +	else if (field != V4L2_FIELD_NONE)
> +		return -EINVAL;
> +
> +	if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
> +		struct ipu_csc_scaler_q_data *q_data_in =
> +			get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +
> +		test_out.pix = f->fmt.pix;
> +		test_in.pix = q_data_in->cur_fmt;
> +	} else {
> +		struct ipu_csc_scaler_q_data *q_data_out =
> +			get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +
> +		test_in.pix = f->fmt.pix;
> +		test_out.pix = q_data_out->cur_fmt;
> +	}
> +
> +	ipu_image_convert_adjust(&test_in, &test_out, ctx->rot_mode);
> +
> +	f->fmt.pix = (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) ?
> +		test_out.pix : test_in.pix;
> +
> +	if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
> +		f->fmt.pix.colorspace = q_data->cur_fmt.colorspace;
> +		f->fmt.pix.ycbcr_enc = q_data->cur_fmt.ycbcr_enc;
> +		f->fmt.pix.xfer_func = q_data->cur_fmt.xfer_func;
> +		f->fmt.pix.quantization = q_data->cur_fmt.quantization;
> +	} else if (f->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) {
> +		f->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB;
> +		f->fmt.pix.ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> +		f->fmt.pix.xfer_func = V4L2_XFER_FUNC_DEFAULT;
> +		f->fmt.pix.quantization = V4L2_QUANTIZATION_DEFAULT;
> +	}
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_s_fmt(struct file *file, void *priv,
> +				struct v4l2_format *f)
> +{
> +	struct ipu_csc_scaler_q_data *q_data;
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
> +	struct vb2_queue *vq;
> +	int ret;
> +
> +	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
> +	if (vb2_is_busy(vq)) {
> +		v4l2_err(ctx->priv->vdev.vfd->v4l2_dev, "%s: queue busy\n",
> +			 __func__);
> +		return -EBUSY;
> +	}
> +
> +	q_data = get_q_data(ctx, f->type);
> +
> +	ret = ipu_csc_scaler_try_fmt(file, priv, f);
> +	if (ret < 0)
> +		return ret;
> +
> +	q_data->cur_fmt.width = f->fmt.pix.width;
> +	q_data->cur_fmt.height = f->fmt.pix.height;
> +	q_data->cur_fmt.pixelformat = f->fmt.pix.pixelformat;
> +	q_data->cur_fmt.field = f->fmt.pix.field;
> +	q_data->cur_fmt.bytesperline = f->fmt.pix.bytesperline;
> +	q_data->cur_fmt.sizeimage = f->fmt.pix.sizeimage;
> +
> +	/* Reset cropping/composing rectangle */
> +	q_data->rect.left = 0;
> +	q_data->rect.top = 0;
> +	q_data->rect.width = q_data->cur_fmt.width;
> +	q_data->rect.height = q_data->cur_fmt.height;
> +
> +	if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
> +		/* Set colorimetry on the output queue */
> +		q_data->cur_fmt.colorspace = f->fmt.pix.colorspace;
> +		q_data->cur_fmt.ycbcr_enc = f->fmt.pix.ycbcr_enc;
> +		q_data->cur_fmt.xfer_func = f->fmt.pix.xfer_func;
> +		q_data->cur_fmt.quantization = f->fmt.pix.quantization;
> +		/* Propagate colorimetry to the capture queue */
> +		q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +		q_data->cur_fmt.colorspace = f->fmt.pix.colorspace;
> +		q_data->cur_fmt.ycbcr_enc = f->fmt.pix.ycbcr_enc;
> +		q_data->cur_fmt.xfer_func = f->fmt.pix.xfer_func;
> +		q_data->cur_fmt.quantization = f->fmt.pix.quantization;
> +	}
> +
> +	/*
> +	 * TODO: Setting colorimetry on the capture queue is currently not
> +	 * supported by the V4L2 API
> +	 */
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_g_selection(struct file *file, void *priv,
> +				      struct v4l2_selection *s)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
> +	struct ipu_csc_scaler_q_data *q_data;
> +
> +	switch (s->target) {
> +	case V4L2_SEL_TGT_CROP:
> +	case V4L2_SEL_TGT_CROP_DEFAULT:
> +	case V4L2_SEL_TGT_CROP_BOUNDS:
> +		if (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
> +			return -EINVAL;
> +		q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +		break;
> +	case V4L2_SEL_TGT_COMPOSE:
> +	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
> +	case V4L2_SEL_TGT_COMPOSE_BOUNDS:
> +		if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
> +			return -EINVAL;
> +		q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	if (s->target == V4L2_SEL_TGT_CROP ||
> +	    s->target == V4L2_SEL_TGT_COMPOSE) {
> +		s->r = q_data->rect;
> +	} else {
> +		s->r.left = 0;
> +		s->r.top = 0;
> +		s->r.width = q_data->cur_fmt.width;
> +		s->r.height = q_data->cur_fmt.height;
> +	}
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_s_selection(struct file *file, void *priv,
> +				      struct v4l2_selection *s)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
> +	struct ipu_csc_scaler_q_data *q_data;
> +
> +	switch (s->target) {
> +	case V4L2_SEL_TGT_CROP:
> +		if (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
> +			return -EINVAL;
> +		break;
> +	case V4L2_SEL_TGT_COMPOSE:
> +		if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
> +			return -EINVAL;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE &&
> +	    s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
> +		return -EINVAL;
> +
> +	q_data = get_q_data(ctx, s->type);
> +
> +	/* The input's frame width to the IC must be a multiple of 8 pixels
> +	 * When performing resizing the frame width must be multiple of burst
> +	 * size - 8 or 16 pixels as defined by CB#_BURST_16 parameter.
> +	 */
> +	if (s->flags & V4L2_SEL_FLAG_GE)
> +		s->r.width = round_up(s->r.width, 8);
> +	if (s->flags & V4L2_SEL_FLAG_LE)
> +		s->r.width = round_down(s->r.width, 8);
> +	s->r.width = clamp_t(unsigned int, s->r.width, 8,
> +			     round_down(q_data->cur_fmt.width, 8));
> +	s->r.height = clamp_t(unsigned int, s->r.height, 1,
> +			      q_data->cur_fmt.height);
> +	s->r.left = clamp_t(unsigned int, s->r.left, 0,
> +			    q_data->cur_fmt.width - s->r.width);
> +	s->r.top = clamp_t(unsigned int, s->r.top, 0,
> +			   q_data->cur_fmt.height - s->r.height);
> +
> +	/* V4L2_SEL_FLAG_KEEP_CONFIG is only valid for subdevices */
> +	q_data->rect = s->r;
> +
> +	return 0;
> +}
> +
> +static const struct v4l2_ioctl_ops ipu_csc_scaler_ioctl_ops = {
> +	.vidioc_querycap		= ipu_csc_scaler_querycap,
> +
> +	.vidioc_enum_fmt_vid_cap	= ipu_csc_scaler_enum_fmt,
> +	.vidioc_g_fmt_vid_cap		= ipu_csc_scaler_g_fmt,
> +	.vidioc_try_fmt_vid_cap		= ipu_csc_scaler_try_fmt,
> +	.vidioc_s_fmt_vid_cap		= ipu_csc_scaler_s_fmt,
> +
> +	.vidioc_enum_fmt_vid_out	= ipu_csc_scaler_enum_fmt,
> +	.vidioc_g_fmt_vid_out		= ipu_csc_scaler_g_fmt,
> +	.vidioc_try_fmt_vid_out		= ipu_csc_scaler_try_fmt,
> +	.vidioc_s_fmt_vid_out		= ipu_csc_scaler_s_fmt,
> +
> +	.vidioc_g_selection		= ipu_csc_scaler_g_selection,
> +	.vidioc_s_selection		= ipu_csc_scaler_s_selection,
> +
> +	.vidioc_reqbufs			= v4l2_m2m_ioctl_reqbufs,
> +	.vidioc_querybuf		= v4l2_m2m_ioctl_querybuf,
> +
> +	.vidioc_qbuf			= v4l2_m2m_ioctl_qbuf,
> +	.vidioc_expbuf			= v4l2_m2m_ioctl_expbuf,
> +	.vidioc_dqbuf			= v4l2_m2m_ioctl_dqbuf,
> +	.vidioc_create_bufs		= v4l2_m2m_ioctl_create_bufs,
> +	.vidioc_prepare_buf		= v4l2_m2m_ioctl_prepare_buf,
> +
> +	.vidioc_streamon		= v4l2_m2m_ioctl_streamon,
> +	.vidioc_streamoff		= v4l2_m2m_ioctl_streamoff,
> +
> +	.vidioc_subscribe_event		= v4l2_ctrl_subscribe_event,
> +	.vidioc_unsubscribe_event	= v4l2_event_unsubscribe,
> +};
> +
> +/*
> + * Queue operations
> + */
> +
> +static int ipu_csc_scaler_queue_setup(struct vb2_queue *vq,
> +				      unsigned int *nbuffers,
> +				      unsigned int *nplanes,
> +				      unsigned int sizes[],
> +				      struct device *alloc_devs[])
> +{
> +	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct ipu_csc_scaler_q_data *q_data;
> +	unsigned int size, count = *nbuffers;
> +
> +	q_data = get_q_data(ctx, vq->type);
> +
> +	size = q_data->cur_fmt.sizeimage;
> +
> +	*nbuffers = count;
> +
> +	if (*nplanes)
> +		return sizes[0] < size ? -EINVAL : 0;
> +
> +	*nplanes = 1;
> +	sizes[0] = size;
> +
> +	dev_dbg(ctx->priv->dev, "get %d buffer(s) of size %d each.\n",
> +		count, size);
> +
> +	return 0;
> +}
> +
> +static int ipu_csc_scaler_buf_prepare(struct vb2_buffer *vb)
> +{
> +	struct vb2_queue *vq = vb->vb2_queue;
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct ipu_csc_scaler_q_data *q_data;
> +	unsigned long size;
> +
> +	dev_dbg(ctx->priv->dev, "type: %d\n", vq->type);
> +
> +	if (V4L2_TYPE_IS_OUTPUT(vq->type)) {
> +		if (vbuf->field == V4L2_FIELD_ANY)
> +			vbuf->field = V4L2_FIELD_NONE;
> +		if (vbuf->field != V4L2_FIELD_NONE) {
> +			dev_dbg(ctx->priv->dev, "%s: field isn't supported\n",
> +				__func__);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	q_data = get_q_data(ctx, vq->type);
> +	size = q_data->cur_fmt.sizeimage;
> +
> +	if (vb2_plane_size(vb, 0) < size) {
> +		dev_dbg(ctx->priv->dev,
> +			"%s: data will not fit into plane (%lu < %lu)\n",
> +			__func__, vb2_plane_size(vb, 0), size);
> +		return -EINVAL;
> +	}
> +
> +	vb2_set_plane_payload(vb, 0, size);
> +
> +	return 0;
> +}
> +
> +static void ipu_csc_scaler_buf_queue(struct vb2_buffer *vb)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> +
> +	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, to_vb2_v4l2_buffer(vb));
> +}
> +
> +static void ipu_image_from_q_data(struct ipu_image *im,
> +				  struct ipu_csc_scaler_q_data *q_data)
> +{
> +	im->pix.width = q_data->cur_fmt.width;
> +	im->pix.height = q_data->cur_fmt.height;
> +	im->pix.bytesperline = q_data->cur_fmt.bytesperline;
> +	im->pix.pixelformat = q_data->cur_fmt.pixelformat;
> +	im->rect = q_data->rect;
> +}
> +
> +static int ipu_csc_scaler_start_streaming(struct vb2_queue *q,
> +					  unsigned int count)
> +{
> +	const enum ipu_ic_task ic_task = IC_TASK_POST_PROCESSOR;
> +	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(q);
> +	struct ipu_csc_scaler_priv *priv = ctx->priv;
> +	struct ipu_soc *ipu = priv->md->ipu[0];
> +	struct ipu_csc_scaler_q_data *q_data;
> +	struct vb2_queue *other_q;
> +	struct ipu_image in, out;
> +
> +	other_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
> +				  (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) ?
> +				  V4L2_BUF_TYPE_VIDEO_OUTPUT :
> +				  V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +	if (!vb2_is_streaming(other_q))
> +		return 0;
> +
> +	if (ctx->icc) {
> +		v4l2_warn(ctx->priv->vdev.vfd->v4l2_dev, "removing old ICC\n");
> +		ipu_image_convert_unprepare(ctx->icc);
> +	}
> +
> +	q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +	ipu_image_from_q_data(&in, q_data);
> +
> +	q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +	ipu_image_from_q_data(&out, q_data);
> +
> +	ctx->icc = ipu_image_convert_prepare(ipu, ic_task, &in, &out,
> +					     ctx->rot_mode,
> +					     ipu_ic_pp_complete, ctx);
> +	if (IS_ERR(ctx->icc)) {
> +		struct vb2_v4l2_buffer *buf;
> +		int ret = PTR_ERR(ctx->icc);
> +
> +		ctx->icc = NULL;
> +		v4l2_err(ctx->priv->vdev.vfd->v4l2_dev, "%s: error %d\n",
> +			 __func__, ret);
> +		while ((buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx)))
> +			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_QUEUED);
> +		while ((buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx)))
> +			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_QUEUED);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static void ipu_csc_scaler_stop_streaming(struct vb2_queue *q)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(q);
> +	struct vb2_v4l2_buffer *buf;
> +
> +	if (ctx->icc) {
> +		ipu_image_convert_unprepare(ctx->icc);
> +		ctx->icc = NULL;
> +	}
> +
> +	ctx->sequence = 0;
> +
> +	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
> +		while ((buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx)))
> +			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
> +	} else {
> +		while ((buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx)))
> +			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
> +	}
> +}
> +
> +static const struct vb2_ops ipu_csc_scaler_qops = {
> +	.queue_setup		= ipu_csc_scaler_queue_setup,
> +	.buf_prepare		= ipu_csc_scaler_buf_prepare,
> +	.buf_queue		= ipu_csc_scaler_buf_queue,
> +	.wait_prepare		= vb2_ops_wait_prepare,
> +	.wait_finish		= vb2_ops_wait_finish,
> +	.start_streaming	= ipu_csc_scaler_start_streaming,
> +	.stop_streaming		= ipu_csc_scaler_stop_streaming,
> +};
> +
> +static int ipu_csc_scaler_queue_init(void *priv, struct vb2_queue *src_vq,
> +				     struct vb2_queue *dst_vq)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = priv;
> +	int ret;
> +
> +	memset(src_vq, 0, sizeof(*src_vq));
> +	src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
> +	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> +	src_vq->drv_priv = ctx;
> +	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> +	src_vq->ops = &ipu_csc_scaler_qops;
> +	src_vq->mem_ops = &vb2_dma_contig_memops;
> +	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> +	src_vq->lock = &ctx->priv->mutex;
> +	src_vq->dev = ctx->priv->dev;
> +
> +	ret = vb2_queue_init(src_vq);
> +	if (ret)
> +		return ret;
> +
> +	memset(dst_vq, 0, sizeof(*dst_vq));
> +	dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
> +	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> +	dst_vq->drv_priv = ctx;
> +	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> +	dst_vq->ops = &ipu_csc_scaler_qops;
> +	dst_vq->mem_ops = &vb2_dma_contig_memops;
> +	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> +	dst_vq->lock = &ctx->priv->mutex;
> +	dst_vq->dev = ctx->priv->dev;
> +
> +	return vb2_queue_init(dst_vq);
> +}
> +
> +static int ipu_csc_scaler_s_ctrl(struct v4l2_ctrl *ctrl)
> +{
> +	struct ipu_csc_scaler_ctx *ctx = container_of(ctrl->handler,
> +						      struct ipu_csc_scaler_ctx,
> +						      ctrl_hdlr);
> +	enum ipu_rotate_mode rot_mode;
> +	int rotate;
> +	bool hflip, vflip;
> +	int ret = 0;
> +
> +	rotate = ctx->rotate;
> +	hflip = ctx->hflip;
> +	vflip = ctx->vflip;
> +
> +	switch (ctrl->id) {
> +	case V4L2_CID_HFLIP:
> +		hflip = ctrl->val;
> +		break;
> +	case V4L2_CID_VFLIP:
> +		vflip = ctrl->val;
> +		break;
> +	case V4L2_CID_ROTATE:
> +		rotate = ctrl->val;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	ret = ipu_degrees_to_rot_mode(&rot_mode, rotate, hflip, vflip);
> +	if (ret)
> +		return ret;
> +
> +	if (rot_mode != ctx->rot_mode) {
> +		struct v4l2_pix_format *in_fmt, *out_fmt;
> +		struct ipu_image test_in, test_out;
> +
> +		in_fmt = &ctx->q_data[V4L2_M2M_SRC].cur_fmt;
> +		out_fmt = &ctx->q_data[V4L2_M2M_DST].cur_fmt;
> +
> +		test_in.pix = *in_fmt;
> +		test_out.pix = *out_fmt;
> +
> +		if (ipu_rot_mode_is_irt(rot_mode) !=
> +		    ipu_rot_mode_is_irt(ctx->rot_mode)) {
> +			/* Switch width and height to keep aspect ratio intact */
> +			test_out.pix.width = out_fmt->height;
> +			test_out.pix.height = out_fmt->width;
> +		}
> +
> +		ipu_image_convert_adjust(&test_in, &test_out, ctx->rot_mode);
> +
> +		/* Check if output format needs to be changed */
> +		if (test_in.pix.width != in_fmt->width ||
> +		    test_in.pix.height != in_fmt->height ||
> +		    test_in.pix.bytesperline != in_fmt->bytesperline ||
> +		    test_in.pix.sizeimage != in_fmt->sizeimage) {
> +			struct vb2_queue *out_q;
> +
> +			out_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
> +						V4L2_BUF_TYPE_VIDEO_OUTPUT);
> +			if (vb2_is_busy(out_q))
> +				return -EBUSY;
> +		}
> +
> +		/* Check if capture format needs to be changed */
> +		if (test_out.pix.width != out_fmt->width ||
> +		    test_out.pix.height != out_fmt->height ||
> +		    test_out.pix.bytesperline != out_fmt->bytesperline ||
> +		    test_out.pix.sizeimage != out_fmt->sizeimage) {
> +			struct vb2_queue *cap_q;
> +
> +			cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
> +						V4L2_BUF_TYPE_VIDEO_CAPTURE);
> +			if (vb2_is_busy(cap_q))
> +				return -EBUSY;
> +		}
> +
> +		*in_fmt = test_in.pix;
> +		*out_fmt = test_out.pix;
> +
> +		ctx->rot_mode = rot_mode;
> +		ctx->rotate = rotate;
> +		ctx->hflip = hflip;
> +		ctx->vflip = vflip;
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct v4l2_ctrl_ops ipu_csc_scaler_ctrl_ops = {
> +	.s_ctrl = ipu_csc_scaler_s_ctrl,
> +};
> +
> +static int ipu_csc_scaler_init_controls(struct ipu_csc_scaler_ctx *ctx)
> +{
> +	struct v4l2_ctrl_handler *hdlr = &ctx->ctrl_hdlr;
> +
> +	v4l2_ctrl_handler_init(hdlr, 3);
> +
> +	v4l2_ctrl_new_std(hdlr, &ipu_csc_scaler_ctrl_ops, V4L2_CID_HFLIP,
> +			  0, 1, 1, 0);
> +	v4l2_ctrl_new_std(hdlr, &ipu_csc_scaler_ctrl_ops, V4L2_CID_VFLIP,
> +			  0, 1, 1, 0);
> +	v4l2_ctrl_new_std(hdlr, &ipu_csc_scaler_ctrl_ops, V4L2_CID_ROTATE,
> +			  0, 270, 90, 0);
> +
> +	if (hdlr->error) {
> +		v4l2_ctrl_handler_free(hdlr);
> +		return hdlr->error;
> +	}
> +
> +	v4l2_ctrl_handler_setup(hdlr);
> +	return 0;
> +}
> +
> +#define DEFAULT_WIDTH	720
> +#define DEFAULT_HEIGHT	576
> +static const struct ipu_csc_scaler_q_data ipu_csc_scaler_q_data_default = {
> +	.cur_fmt = {
> +		.width = DEFAULT_WIDTH,
> +		.height = DEFAULT_HEIGHT,
> +		.pixelformat = V4L2_PIX_FMT_YUV420,
> +		.field = V4L2_FIELD_NONE,
> +		.bytesperline = DEFAULT_WIDTH,
> +		.sizeimage = DEFAULT_WIDTH * DEFAULT_HEIGHT * 3 / 2,
> +		.colorspace = V4L2_COLORSPACE_SRGB,
> +	},
> +	.rect = {
> +		.width = DEFAULT_WIDTH,
> +		.height = DEFAULT_HEIGHT,
> +	},
> +};
> +
> +/*
> + * File operations
> + */
> +static int ipu_csc_scaler_open(struct file *file)
> +{
> +	struct ipu_csc_scaler_priv *priv = video_drvdata(file);
> +	struct ipu_csc_scaler_ctx *ctx = NULL;
> +	int ret;
> +
> +	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ctx->rot_mode = IPU_ROTATE_NONE;
> +
> +	v4l2_fh_init(&ctx->fh, video_devdata(file));
> +	file->private_data = &ctx->fh;
> +	v4l2_fh_add(&ctx->fh);
> +	ctx->priv = priv;
> +
> +	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(priv->m2m_dev, ctx,
> +					    &ipu_csc_scaler_queue_init);
> +	if (IS_ERR(ctx->fh.m2m_ctx)) {
> +		ret = PTR_ERR(ctx->fh.m2m_ctx);
> +		goto err_ctx;
> +	}
> +
> +	ret = ipu_csc_scaler_init_controls(ctx);
> +	if (ret)
> +		goto err_ctrls;
> +
> +	ctx->fh.ctrl_handler = &ctx->ctrl_hdlr;
> +
> +	ctx->q_data[V4L2_M2M_SRC] = ipu_csc_scaler_q_data_default;
> +	ctx->q_data[V4L2_M2M_DST] = ipu_csc_scaler_q_data_default;
> +
> +	dev_dbg(priv->dev, "Created instance %p, m2m_ctx: %p\n", ctx,
> +		ctx->fh.m2m_ctx);
> +
> +	return 0;
> +
> +err_ctrls:
> +	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
> +err_ctx:
> +	v4l2_fh_del(&ctx->fh);
> +	v4l2_fh_exit(&ctx->fh);
> +	kfree(ctx);
> +	return ret;
> +}
> +
> +static int ipu_csc_scaler_release(struct file *file)
> +{
> +	struct ipu_csc_scaler_priv *priv = video_drvdata(file);
> +	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(file->private_data);
> +
> +	dev_dbg(priv->dev, "Releasing instance %p\n", ctx);
> +
> +	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
> +	v4l2_fh_del(&ctx->fh);
> +	v4l2_fh_exit(&ctx->fh);
> +	kfree(ctx);
> +
> +	return 0;
> +}
> +
> +static const struct v4l2_file_operations ipu_csc_scaler_fops = {
> +	.owner		= THIS_MODULE,
> +	.open		= ipu_csc_scaler_open,
> +	.release	= ipu_csc_scaler_release,
> +	.poll		= v4l2_m2m_fop_poll,
> +	.unlocked_ioctl	= video_ioctl2,
> +	.mmap		= v4l2_m2m_fop_mmap,
> +};
> +
> +static struct v4l2_m2m_ops m2m_ops = {
> +	.device_run	= device_run,
> +	.job_abort	= job_abort,
> +};
> +
> +static const struct video_device ipu_csc_scaler_videodev_template = {
> +	.name		= "ipu_ic_pp csc/scaler",
> +	.fops		= &ipu_csc_scaler_fops,
> +	.ioctl_ops	= &ipu_csc_scaler_ioctl_ops,
> +	.minor		= -1,
> +	.release	= video_device_release,
> +	.vfl_dir	= VFL_DIR_M2M,
> +	.device_caps	= V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING,
> +};
> +
> +int imx_media_csc_scaler_device_register(struct imx_media_video_dev *vdev)
> +{
> +	struct ipu_csc_scaler_priv *priv = vdev_to_priv(vdev);
> +	struct video_device *vfd = vdev->vfd;
> +	int ret;
> +
> +	vfd->v4l2_dev = &priv->md->v4l2_dev;
> +
> +	ret = video_register_device(vfd, VFL_TYPE_GRABBER, -1);
> +	if (ret) {
> +		v4l2_err(vfd->v4l2_dev, "Failed to register video device\n");
> +		return ret;
> +	}
> +
> +	v4l2_info(vfd->v4l2_dev, "Registered %s as /dev/%s\n", vfd->name,
> +		  video_device_node_name(vfd));
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_register);
> +
> +void imx_media_csc_scaler_device_unregister(struct imx_media_video_dev *vdev)
> +{
> +	struct ipu_csc_scaler_priv *priv = vdev_to_priv(vdev);
> +	struct video_device *vfd = priv->vdev.vfd;
> +
> +	mutex_lock(&priv->mutex);
> +
> +	if (video_is_registered(vfd))
> +		video_unregister_device(vfd);
> +
> +	mutex_unlock(&priv->mutex);
> +}
> +EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_unregister);
> +
> +struct imx_media_video_dev *
> +imx_media_csc_scaler_device_init(struct imx_media_dev *md)
> +{
> +	struct ipu_csc_scaler_priv *priv;
> +	struct video_device *vfd;
> +	int ret;
> +
> +	priv = devm_kzalloc(md->md.dev, sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		return ERR_PTR(-ENOMEM);
> +
> +	priv->md = md;
> +	priv->dev = md->md.dev;
> +
> +	mutex_init(&priv->mutex);
> +
> +	vfd = video_device_alloc();
> +	if (!vfd)
> +		return ERR_PTR(-ENOMEM);
> +
> +	*vfd = ipu_csc_scaler_videodev_template;
> +	vfd->lock = &priv->mutex;
> +	priv->vdev.vfd = vfd;
> +
> +	INIT_LIST_HEAD(&priv->vdev.list);
> +
> +	video_set_drvdata(vfd, priv);
> +
> +	priv->m2m_dev = v4l2_m2m_init(&m2m_ops);
> +	if (IS_ERR(priv->m2m_dev)) {
> +		ret = PTR_ERR(priv->m2m_dev);
> +		v4l2_err(&md->v4l2_dev, "Failed to init mem2mem device: %d\n",
> +			 ret);
> +		return ERR_PTR(ret);
> +	}
> +
> +	return &priv->vdev;
> +}
> +EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_init);
> +
> +void imx_media_csc_scaler_device_remove(struct imx_media_video_dev *vdev)
> +{
> +	struct ipu_csc_scaler_priv *priv = vdev_to_priv(vdev);
> +
> +	v4l2_m2m_release(priv->m2m_dev);
> +}
> +EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_remove);
> +
> +MODULE_DESCRIPTION("i.MX IPUv3 mem2mem scaler/CSC driver");
> +MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
> +MODULE_LICENSE("GPL");
> diff --git a/drivers/staging/media/imx/imx-media-dev.c b/drivers/staging/media/imx/imx-media-dev.c
> index 0a7d1d183141..4d2078d18a48 100644
> --- a/drivers/staging/media/imx/imx-media-dev.c
> +++ b/drivers/staging/media/imx/imx-media-dev.c
> @@ -323,12 +323,36 @@ int imx_media_probe_complete(struct v4l2_async_notifier *notifier)
>   		goto unlock;
>   
>   	ret = v4l2_device_register_subdev_nodes(&imxmd->v4l2_dev);
> -unlock:
> -	mutex_unlock(&imxmd->mutex);
>   	if (ret)
> -		return ret;
> +		goto unlock;
> +
> +	imxmd->m2m_vdev = imx_media_csc_scaler_device_init(imxmd);
> +	if (IS_ERR(imxmd->m2m_vdev)) {
> +		ret = PTR_ERR(imxmd->m2m_vdev);
> +		goto unlock;
> +	}
>   
> -	return media_device_register(&imxmd->md);
> +	ret = imx_media_csc_scaler_device_register(imxmd->m2m_vdev);
> +	if (ret)
> +		goto m2m_remove;
> +
> +	mutex_unlock(&imxmd->mutex);
> +
> +	ret = media_device_register(&imxmd->md);
> +	if (ret) {
> +		mutex_lock(&imxmd->mutex);
> +		goto m2m_unreg;
> +	}
> +
> +	return 0;
> +
> +m2m_unreg:
> +	imx_media_csc_scaler_device_unregister(imxmd->m2m_vdev);
> +m2m_remove:
> +	imx_media_csc_scaler_device_remove(imxmd->m2m_vdev);
> +unlock:
> +	mutex_unlock(&imxmd->mutex);
> +	return ret;
>   }
>   
>   /*
> @@ -504,6 +528,8 @@ static int imx_media_remove(struct platform_device *pdev)
>   	v4l2_async_notifier_unregister(&imxmd->notifier);
>   	imx_media_remove_ipu_internal_subdevs(imxmd);
>   	v4l2_async_notifier_cleanup(&imxmd->notifier);
> +	imx_media_csc_scaler_device_unregister(imxmd->m2m_vdev);
> +	imx_media_csc_scaler_device_remove(imxmd->m2m_vdev);
>   	media_device_unregister(&imxmd->md);
>   	v4l2_device_unregister(&imxmd->v4l2_dev);
>   	media_device_cleanup(&imxmd->md);
> diff --git a/drivers/staging/media/imx/imx-media.h b/drivers/staging/media/imx/imx-media.h
> index dd603a6b3a70..1894553b4497 100644
> --- a/drivers/staging/media/imx/imx-media.h
> +++ b/drivers/staging/media/imx/imx-media.h
> @@ -151,6 +151,9 @@ struct imx_media_dev {
>   
>   	/* for async subdev registration */
>   	struct v4l2_async_notifier notifier;
> +
> +	/* IC scaler/CSC mem2mem video device */
> +	struct imx_media_video_dev *m2m_vdev;
>   };
>   
>   enum codespace_sel {
> @@ -281,6 +284,13 @@ void imx_media_capture_device_set_format(struct imx_media_video_dev *vdev,
>   					 const struct v4l2_rect *compose);
>   void imx_media_capture_device_error(struct imx_media_video_dev *vdev);
>   
> +/* imx-media-mem2mem.c */
> +struct imx_media_video_dev *
> +imx_media_csc_scaler_device_init(struct imx_media_dev *dev);
> +void imx_media_csc_scaler_device_remove(struct imx_media_video_dev *vdev);
> +int imx_media_csc_scaler_device_register(struct imx_media_video_dev *vdev);
> +void imx_media_csc_scaler_device_unregister(struct imx_media_video_dev *vdev);
> +
>   /* subdev group ids */
>   #define IMX_MEDIA_GRP_ID_CSI2          BIT(8)
>   #define IMX_MEDIA_GRP_ID_CSI           BIT(9)
Robin Gong June 13, 2019, 8:52 a.m. UTC | #31
Thank Sven, please have a try with the below patch. I'll send it to
review later. The root cause is that channel0 done interrupt may come
later after after sdma clock disabled(sdma_load_firmware()), which
means clearing channel0 interrupt status in ISR never work, thus
infinite interrupt comes out. But if delay the firmware load behind any
other driver using sdma probe, the issue is gone because sdma clock
enabled again in sdma_alloc_chan_resource() such as SPI driver.
Actually, no need trigger interrupt for channel0 since SDMA_H_STATSTOP
register already be checked instead. 



diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index deea9aa..b5a1ee2 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -742,7 +742,7 @@ static int sdma_load_script(struct sdma_engine
*sdma, void *buf, int size,
        spin_lock_irqsave(&sdma->channel_0_lock, flags);
 
        bd0->mode.command = C0_SETPM;
-       bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+       bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD;
        bd0->mode.count = size / 2;
        bd0->buffer_addr = buf_phys;
        bd0->ext_buffer_addr = address;
@@ -1064,7 +1064,7 @@ static int sdma_load_context(struct sdma_channel
*sdmac)
        context->gReg[7] = sdmac->watermark_level;
 
        bd0->mode.command = C0_SETDM;
-       bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+       bd0->mode.status = BD_DONE | BD_WRAP | BD_EXTD;
        bd0->mode.count = sizeof(*context) / 4;
        bd0->buffer_addr = sdma->context_phys;
        bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel;

On 2019-06-11 at 17:30 +0000, Sven Van Asbroeck wrote:
> On Tue, Jun 11, 2019 at 5:09 AM Robin Gong <yibin.gong@nxp.com>
> wrote:
> > 
> > 
> > Sven, no any dependency from sdma driver view. The only difference
> > between directly loading firmware
> > from kernel and rootfs is the former spend more time during kernel
> > boot and such timing may cause
> > the crash. The issue is not 100% in my side, about 20% possibility,
> > which looks like 'timing issue' . Another
> > interesting thing is that every time the crash stop at somewhere
> > drm, and After I disable ipu and display
> > which use drm in i.mx6q.dtsi, the issue is gone on my i.mx6q-
> > sabreauto board.
> > Could you have a try with below patch as mine? If the issue is gone
> > on your side, we could involve drm guys to
> > look into it.
> When I apply your patch to ipu and display, the crash still happens
> on
> my device.
> But when I disable NFSv4 network filesystem in defconfig, the crash
> disappears.
> Yet on linux-next, the crash is there again, even if I disable the
> IPU or NFSv4.
> 
> My guess: we are chasing ghosts, the crashes are purely timing
> related. Things
> like disabling the IPU or NFSv4 change boot timing, and this changes
> the crash.
> 
> Experiment: If I put msleep(1000) right before the sdma_load_script()
> call, then
> the crash never happens. And if I comment out the call to
> sdma_run_channel0()
> in sdma_load_script(), then the crash also does not happen.
> 
> This suggests that the crash is related to the exact timing when
> sdma_run_channel0() is called. If it is called too early, this
> results
> in an 'interrupt storm' on the sdma interrupt handler: it gets called
> millions of times in a very short amount of time.
> 
> By adding debug prints, I noticed that the sdma core calls back
> sdma_alloc_chan_resources(), later during the boot, when a spi
> bus is created.
> 
> Experiment: I paused firmware upload until the first time
> sdma_alloc_chan_resources() is called by the core.
> I used a struct completion to accomplish this.
> 
> Result: the crash never happens again.
> 
> All this suggests very strongly that sdma_run_channel0() is called
> "too early" by the driver. I don't known enough of imx-sdma to
> know what is missing during the early call.
> 
> Here is the patch to delay firmware load until the first
> sdma_alloc_chan_resources() has completed:
> 
> diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
> index 99d9f431ae2c..ddeded5c3337 100644
> --- a/drivers/dma/imx-sdma.c
> +++ b/drivers/dma/imx-sdma.c
> @@ -33,6 +33,7 @@
>  #include <linux/of_device.h>
>  #include <linux/of_dma.h>
>  #include <linux/workqueue.h>
> +#include <linux/completion.h>
> 
>  #include <asm/irq.h>
>  #include <linux/platform_data/dma-imx-sdma.h>
> @@ -444,6 +445,7 @@ struct sdma_engine {
>         struct sdma_buffer_descriptor   *bd0;
>         /* clock ratio for AHB:SDMA core. 1:1 is 1, 2:1 is 0*/
>         bool                            clk_ratio;
> +       struct completion               chan_resources_alloced;
>  };
> 
>  static int sdma_config_write(struct dma_chan *chan,
> @@ -1258,6 +1260,7 @@ static void sdma_desc_free(struct virt_dma_desc
> *vd)
>  static int sdma_alloc_chan_resources(struct dma_chan *chan)
>  {
>         struct sdma_channel *sdmac = to_sdma_chan(chan);
> +       struct sdma_engine *sdma = sdmac->sdma;
>         struct imx_dma_data *data = chan->private;
>         struct imx_dma_data mem_data;
>         int prio, ret;
> @@ -1310,6 +1313,7 @@ static int sdma_alloc_chan_resources(struct
> dma_chan *chan)
>         if (ret)
>                 goto disable_clk_ahb;
> 
> +       complete(&sdma->chan_resources_alloced);
>         return 0;
> 
>  disable_clk_ahb:
> @@ -1724,6 +1728,7 @@ static void sdma_load_firmware(const struct
> firmware *fw, void *context)
>                 /* In this case we just use the ROM firmware. */
>                 return;
>         }
> +       wait_for_completion(&sdma->chan_resources_alloced);
> 
>         if (fw->size < sizeof(*header))
>                 goto err_firmware;
> @@ -2012,6 +2017,7 @@ static int sdma_probe(struct platform_device
> *pdev)
>                 return -ENOMEM;
> 
>         spin_lock_init(&sdma->channel_0_lock);
> +       init_completion(&sdma->chan_resources_alloced);
> 
>         sdma->dev = &pdev->dev;
>         sdma->drvdata = drvdata;
Sven Van Asbroeck June 13, 2019, 2 p.m. UTC | #32
On Thu, Jun 13, 2019 at 4:52 AM Robin Gong <yibin.gong@nxp.com> wrote:
>
> Thank Sven, please have a try with the below patch. I'll send it to
> review later.

That's awesome, we are cooking with gas :)

Actually I patched only the sdma_load_script() function (not _context())
and the crash no longer happens.

Cc me on the patch that you'll send out, so I can test it.
Philipp Zabel June 14, 2019, 2:13 p.m. UTC | #33
Hi Steve,

On Tue, 2019-06-11 at 18:08 -0700, Steve Longerbeam wrote:
> Hi Philipp,
> 
> Version 9 will also need to fix merge conflicts due to the recent module 
> re-org and the switch to sync registration for the IPU internal subdevs.
> 
> I've done that work already, feel free to cherry-pick it from my github 
> fork if you agree with the merge fixes:
> 
> git@github.com:slongerbeam/mediatree.git, branch imx/mem2mem.v8.
> 
> Btw, some bugs have been found and fixed in ipu-image-convert.c. I will 
> be posting a patch-set shortly. You can review branch imx/bgthree-2136 
> in my fork for the changes.

Thank you, I'll resend the mem2mem next week. I had rebased already, but
I haven't tested unbinding/rebinding yet, as imx-media exploded on me
even without the mem2mem patch when I try tried.

regards
Philipp
diff mbox series

Patch

diff --git a/drivers/staging/media/imx/Kconfig b/drivers/staging/media/imx/Kconfig
index f6d220b649fb..9bf14eb2154b 100644
--- a/drivers/staging/media/imx/Kconfig
+++ b/drivers/staging/media/imx/Kconfig
@@ -6,6 +6,7 @@  config VIDEO_IMX_MEDIA
 	depends on HAS_DMA
 	select VIDEOBUF2_DMA_CONTIG
 	select V4L2_FWNODE
+	select V4L2_MEM2MEM_DEV
 	help
 	  Say yes here to enable support for video4linux media controller
 	  driver for the i.MX5/6 SOC.
diff --git a/drivers/staging/media/imx/Makefile b/drivers/staging/media/imx/Makefile
index d2d909a36239..0c86723f1763 100644
--- a/drivers/staging/media/imx/Makefile
+++ b/drivers/staging/media/imx/Makefile
@@ -7,6 +7,7 @@  imx-media-ic-objs := imx-ic-common.o imx-ic-prp.o imx-ic-prpencvf.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-common.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-capture.o
+obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-csc-scaler.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-vdic.o
 obj-$(CONFIG_VIDEO_IMX_MEDIA) += imx-media-ic.o
 
diff --git a/drivers/staging/media/imx/imx-media-csc-scaler.c b/drivers/staging/media/imx/imx-media-csc-scaler.c
new file mode 100644
index 000000000000..4a0ecdfe38e6
--- /dev/null
+++ b/drivers/staging/media/imx/imx-media-csc-scaler.c
@@ -0,0 +1,917 @@ 
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * i.MX IPUv3 IC PP mem2mem CSC/Scaler driver
+ *
+ * Copyright (C) 2011 Pengutronix, Sascha Hauer
+ * Copyright (C) 2018 Pengutronix, Philipp Zabel
+ */
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <video/imx-ipu-v3.h>
+#include <video/imx-ipu-image-convert.h>
+
+#include <media/media-device.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "imx-media.h"
+
+#define fh_to_ctx(__fh)	container_of(__fh, struct ipu_csc_scaler_ctx, fh)
+
+enum {
+	V4L2_M2M_SRC = 0,
+	V4L2_M2M_DST = 1,
+};
+
+struct ipu_csc_scaler_priv {
+	struct imx_media_video_dev	vdev;
+
+	struct v4l2_m2m_dev		*m2m_dev;
+	struct device			*dev;
+
+	struct imx_media_dev		*md;
+
+	struct mutex			mutex;	/* mem2mem device mutex */
+};
+
+#define vdev_to_priv(v) container_of(v, struct ipu_csc_scaler_priv, vdev)
+
+/* Per-queue, driver-specific private data */
+struct ipu_csc_scaler_q_data {
+	struct v4l2_pix_format		cur_fmt;
+	struct v4l2_rect		rect;
+};
+
+struct ipu_csc_scaler_ctx {
+	struct ipu_csc_scaler_priv	*priv;
+
+	struct v4l2_fh			fh;
+	struct ipu_csc_scaler_q_data	q_data[2];
+	struct ipu_image_convert_ctx	*icc;
+
+	struct v4l2_ctrl_handler	ctrl_hdlr;
+	int				rotate;
+	bool				hflip;
+	bool				vflip;
+	enum ipu_rotate_mode		rot_mode;
+	unsigned int			sequence;
+};
+
+static struct ipu_csc_scaler_q_data *get_q_data(struct ipu_csc_scaler_ctx *ctx,
+						enum v4l2_buf_type type)
+{
+	if (V4L2_TYPE_IS_OUTPUT(type))
+		return &ctx->q_data[V4L2_M2M_SRC];
+	else
+		return &ctx->q_data[V4L2_M2M_DST];
+}
+
+/*
+ * mem2mem callbacks
+ */
+
+static void job_abort(void *_ctx)
+{
+	struct ipu_csc_scaler_ctx *ctx = _ctx;
+
+	if (ctx->icc)
+		ipu_image_convert_abort(ctx->icc);
+}
+
+static void ipu_ic_pp_complete(struct ipu_image_convert_run *run, void *_ctx)
+{
+	struct ipu_csc_scaler_ctx *ctx = _ctx;
+	struct ipu_csc_scaler_priv *priv = ctx->priv;
+	struct vb2_v4l2_buffer *src_buf, *dst_buf;
+
+	src_buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+	dst_buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+
+	v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, true);
+
+	src_buf->sequence = dst_buf->sequence = ctx->sequence++;
+
+	v4l2_m2m_buf_done(src_buf, run->status ? VB2_BUF_STATE_ERROR :
+						 VB2_BUF_STATE_DONE);
+	v4l2_m2m_buf_done(dst_buf, run->status ? VB2_BUF_STATE_ERROR :
+						 VB2_BUF_STATE_DONE);
+
+	v4l2_m2m_job_finish(priv->m2m_dev, ctx->fh.m2m_ctx);
+	kfree(run);
+}
+
+static void device_run(void *_ctx)
+{
+	struct ipu_csc_scaler_ctx *ctx = _ctx;
+	struct ipu_csc_scaler_priv *priv = ctx->priv;
+	struct vb2_v4l2_buffer *src_buf, *dst_buf;
+	struct ipu_image_convert_run *run;
+	int ret;
+
+	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
+	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
+
+	run = kzalloc(sizeof(*run), GFP_KERNEL);
+	if (!run)
+		goto err;
+
+	run->ctx = ctx->icc;
+	run->in_phys = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
+	run->out_phys = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
+
+	ret = ipu_image_convert_queue(run);
+	if (ret < 0) {
+		v4l2_err(ctx->priv->vdev.vfd->v4l2_dev,
+			 "%s: failed to queue: %d\n", __func__, ret);
+		goto err;
+	}
+
+	return;
+
+err:
+	v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+	v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+	v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR);
+	v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_ERROR);
+	v4l2_m2m_job_finish(priv->m2m_dev, ctx->fh.m2m_ctx);
+}
+
+/*
+ * Video ioctls
+ */
+static int ipu_csc_scaler_querycap(struct file *file, void *priv,
+				   struct v4l2_capability *cap)
+{
+	strscpy(cap->driver, "imx-media-csc-scaler", sizeof(cap->driver));
+	strscpy(cap->card, "imx-media-csc-scaler", sizeof(cap->card));
+	strscpy(cap->bus_info, "platform:imx-media-csc-scaler",
+		sizeof(cap->bus_info));
+
+	return 0;
+}
+
+static int ipu_csc_scaler_enum_fmt(struct file *file, void *fh,
+				   struct v4l2_fmtdesc *f)
+{
+	u32 fourcc;
+	int ret;
+
+	ret = imx_media_enum_format(&fourcc, f->index, CS_SEL_ANY);
+	if (ret)
+		return ret;
+
+	f->pixelformat = fourcc;
+
+	return 0;
+}
+
+static int ipu_csc_scaler_g_fmt(struct file *file, void *priv,
+				struct v4l2_format *f)
+{
+	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
+	struct ipu_csc_scaler_q_data *q_data;
+
+	q_data = get_q_data(ctx, f->type);
+
+	f->fmt.pix = q_data->cur_fmt;
+
+	return 0;
+}
+
+static int ipu_csc_scaler_try_fmt(struct file *file, void *priv,
+				  struct v4l2_format *f)
+{
+	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
+	struct ipu_csc_scaler_q_data *q_data = get_q_data(ctx, f->type);
+	struct ipu_image test_in, test_out;
+	enum v4l2_field field;
+
+	field = f->fmt.pix.field;
+	if (field == V4L2_FIELD_ANY)
+		field = V4L2_FIELD_NONE;
+	else if (field != V4L2_FIELD_NONE)
+		return -EINVAL;
+
+	if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
+		struct ipu_csc_scaler_q_data *q_data_in =
+			get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+
+		test_out.pix = f->fmt.pix;
+		test_in.pix = q_data_in->cur_fmt;
+	} else {
+		struct ipu_csc_scaler_q_data *q_data_out =
+			get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+
+		test_in.pix = f->fmt.pix;
+		test_out.pix = q_data_out->cur_fmt;
+	}
+
+	ipu_image_convert_adjust(&test_in, &test_out, ctx->rot_mode);
+
+	f->fmt.pix = (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) ?
+		test_out.pix : test_in.pix;
+
+	if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) {
+		f->fmt.pix.colorspace = q_data->cur_fmt.colorspace;
+		f->fmt.pix.ycbcr_enc = q_data->cur_fmt.ycbcr_enc;
+		f->fmt.pix.xfer_func = q_data->cur_fmt.xfer_func;
+		f->fmt.pix.quantization = q_data->cur_fmt.quantization;
+	} else if (f->fmt.pix.colorspace == V4L2_COLORSPACE_DEFAULT) {
+		f->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB;
+		f->fmt.pix.ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
+		f->fmt.pix.xfer_func = V4L2_XFER_FUNC_DEFAULT;
+		f->fmt.pix.quantization = V4L2_QUANTIZATION_DEFAULT;
+	}
+
+	return 0;
+}
+
+static int ipu_csc_scaler_s_fmt(struct file *file, void *priv,
+				struct v4l2_format *f)
+{
+	struct ipu_csc_scaler_q_data *q_data;
+	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
+	struct vb2_queue *vq;
+	int ret;
+
+	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
+	if (vb2_is_busy(vq)) {
+		v4l2_err(ctx->priv->vdev.vfd->v4l2_dev, "%s: queue busy\n",
+			 __func__);
+		return -EBUSY;
+	}
+
+	q_data = get_q_data(ctx, f->type);
+
+	ret = ipu_csc_scaler_try_fmt(file, priv, f);
+	if (ret < 0)
+		return ret;
+
+	q_data->cur_fmt.width = f->fmt.pix.width;
+	q_data->cur_fmt.height = f->fmt.pix.height;
+	q_data->cur_fmt.pixelformat = f->fmt.pix.pixelformat;
+	q_data->cur_fmt.field = f->fmt.pix.field;
+	q_data->cur_fmt.bytesperline = f->fmt.pix.bytesperline;
+	q_data->cur_fmt.sizeimage = f->fmt.pix.sizeimage;
+
+	/* Reset cropping/composing rectangle */
+	q_data->rect.left = 0;
+	q_data->rect.top = 0;
+	q_data->rect.width = q_data->cur_fmt.width;
+	q_data->rect.height = q_data->cur_fmt.height;
+
+	if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
+		/* Set colorimetry on the output queue */
+		q_data->cur_fmt.colorspace = f->fmt.pix.colorspace;
+		q_data->cur_fmt.ycbcr_enc = f->fmt.pix.ycbcr_enc;
+		q_data->cur_fmt.xfer_func = f->fmt.pix.xfer_func;
+		q_data->cur_fmt.quantization = f->fmt.pix.quantization;
+		/* Propagate colorimetry to the capture queue */
+		q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+		q_data->cur_fmt.colorspace = f->fmt.pix.colorspace;
+		q_data->cur_fmt.ycbcr_enc = f->fmt.pix.ycbcr_enc;
+		q_data->cur_fmt.xfer_func = f->fmt.pix.xfer_func;
+		q_data->cur_fmt.quantization = f->fmt.pix.quantization;
+	}
+
+	/*
+	 * TODO: Setting colorimetry on the capture queue is currently not
+	 * supported by the V4L2 API
+	 */
+
+	return 0;
+}
+
+static int ipu_csc_scaler_g_selection(struct file *file, void *priv,
+				      struct v4l2_selection *s)
+{
+	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
+	struct ipu_csc_scaler_q_data *q_data;
+
+	switch (s->target) {
+	case V4L2_SEL_TGT_CROP:
+	case V4L2_SEL_TGT_CROP_DEFAULT:
+	case V4L2_SEL_TGT_CROP_BOUNDS:
+		if (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
+			return -EINVAL;
+		q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+		break;
+	case V4L2_SEL_TGT_COMPOSE:
+	case V4L2_SEL_TGT_COMPOSE_DEFAULT:
+	case V4L2_SEL_TGT_COMPOSE_BOUNDS:
+		if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+			return -EINVAL;
+		q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (s->target == V4L2_SEL_TGT_CROP ||
+	    s->target == V4L2_SEL_TGT_COMPOSE) {
+		s->r = q_data->rect;
+	} else {
+		s->r.left = 0;
+		s->r.top = 0;
+		s->r.width = q_data->cur_fmt.width;
+		s->r.height = q_data->cur_fmt.height;
+	}
+
+	return 0;
+}
+
+static int ipu_csc_scaler_s_selection(struct file *file, void *priv,
+				      struct v4l2_selection *s)
+{
+	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(priv);
+	struct ipu_csc_scaler_q_data *q_data;
+
+	switch (s->target) {
+	case V4L2_SEL_TGT_CROP:
+		if (s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
+			return -EINVAL;
+		break;
+	case V4L2_SEL_TGT_COMPOSE:
+		if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (s->type != V4L2_BUF_TYPE_VIDEO_CAPTURE &&
+	    s->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)
+		return -EINVAL;
+
+	q_data = get_q_data(ctx, s->type);
+
+	/* The input's frame width to the IC must be a multiple of 8 pixels
+	 * When performing resizing the frame width must be multiple of burst
+	 * size - 8 or 16 pixels as defined by CB#_BURST_16 parameter.
+	 */
+	if (s->flags & V4L2_SEL_FLAG_GE)
+		s->r.width = round_up(s->r.width, 8);
+	if (s->flags & V4L2_SEL_FLAG_LE)
+		s->r.width = round_down(s->r.width, 8);
+	s->r.width = clamp_t(unsigned int, s->r.width, 8,
+			     round_down(q_data->cur_fmt.width, 8));
+	s->r.height = clamp_t(unsigned int, s->r.height, 1,
+			      q_data->cur_fmt.height);
+	s->r.left = clamp_t(unsigned int, s->r.left, 0,
+			    q_data->cur_fmt.width - s->r.width);
+	s->r.top = clamp_t(unsigned int, s->r.top, 0,
+			   q_data->cur_fmt.height - s->r.height);
+
+	/* V4L2_SEL_FLAG_KEEP_CONFIG is only valid for subdevices */
+	q_data->rect = s->r;
+
+	return 0;
+}
+
+static const struct v4l2_ioctl_ops ipu_csc_scaler_ioctl_ops = {
+	.vidioc_querycap		= ipu_csc_scaler_querycap,
+
+	.vidioc_enum_fmt_vid_cap	= ipu_csc_scaler_enum_fmt,
+	.vidioc_g_fmt_vid_cap		= ipu_csc_scaler_g_fmt,
+	.vidioc_try_fmt_vid_cap		= ipu_csc_scaler_try_fmt,
+	.vidioc_s_fmt_vid_cap		= ipu_csc_scaler_s_fmt,
+
+	.vidioc_enum_fmt_vid_out	= ipu_csc_scaler_enum_fmt,
+	.vidioc_g_fmt_vid_out		= ipu_csc_scaler_g_fmt,
+	.vidioc_try_fmt_vid_out		= ipu_csc_scaler_try_fmt,
+	.vidioc_s_fmt_vid_out		= ipu_csc_scaler_s_fmt,
+
+	.vidioc_g_selection		= ipu_csc_scaler_g_selection,
+	.vidioc_s_selection		= ipu_csc_scaler_s_selection,
+
+	.vidioc_reqbufs			= v4l2_m2m_ioctl_reqbufs,
+	.vidioc_querybuf		= v4l2_m2m_ioctl_querybuf,
+
+	.vidioc_qbuf			= v4l2_m2m_ioctl_qbuf,
+	.vidioc_expbuf			= v4l2_m2m_ioctl_expbuf,
+	.vidioc_dqbuf			= v4l2_m2m_ioctl_dqbuf,
+	.vidioc_create_bufs		= v4l2_m2m_ioctl_create_bufs,
+	.vidioc_prepare_buf		= v4l2_m2m_ioctl_prepare_buf,
+
+	.vidioc_streamon		= v4l2_m2m_ioctl_streamon,
+	.vidioc_streamoff		= v4l2_m2m_ioctl_streamoff,
+
+	.vidioc_subscribe_event		= v4l2_ctrl_subscribe_event,
+	.vidioc_unsubscribe_event	= v4l2_event_unsubscribe,
+};
+
+/*
+ * Queue operations
+ */
+
+static int ipu_csc_scaler_queue_setup(struct vb2_queue *vq,
+				      unsigned int *nbuffers,
+				      unsigned int *nplanes,
+				      unsigned int sizes[],
+				      struct device *alloc_devs[])
+{
+	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(vq);
+	struct ipu_csc_scaler_q_data *q_data;
+	unsigned int size, count = *nbuffers;
+
+	q_data = get_q_data(ctx, vq->type);
+
+	size = q_data->cur_fmt.sizeimage;
+
+	*nbuffers = count;
+
+	if (*nplanes)
+		return sizes[0] < size ? -EINVAL : 0;
+
+	*nplanes = 1;
+	sizes[0] = size;
+
+	dev_dbg(ctx->priv->dev, "get %d buffer(s) of size %d each.\n",
+		count, size);
+
+	return 0;
+}
+
+static int ipu_csc_scaler_buf_prepare(struct vb2_buffer *vb)
+{
+	struct vb2_queue *vq = vb->vb2_queue;
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(vq);
+	struct ipu_csc_scaler_q_data *q_data;
+	unsigned long size;
+
+	dev_dbg(ctx->priv->dev, "type: %d\n", vq->type);
+
+	if (V4L2_TYPE_IS_OUTPUT(vq->type)) {
+		if (vbuf->field == V4L2_FIELD_ANY)
+			vbuf->field = V4L2_FIELD_NONE;
+		if (vbuf->field != V4L2_FIELD_NONE) {
+			dev_dbg(ctx->priv->dev, "%s: field isn't supported\n",
+				__func__);
+			return -EINVAL;
+		}
+	}
+
+	q_data = get_q_data(ctx, vq->type);
+	size = q_data->cur_fmt.sizeimage;
+
+	if (vb2_plane_size(vb, 0) < size) {
+		dev_dbg(ctx->priv->dev,
+			"%s: data will not fit into plane (%lu < %lu)\n",
+			__func__, vb2_plane_size(vb, 0), size);
+		return -EINVAL;
+	}
+
+	vb2_set_plane_payload(vb, 0, size);
+
+	return 0;
+}
+
+static void ipu_csc_scaler_buf_queue(struct vb2_buffer *vb)
+{
+	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+
+	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, to_vb2_v4l2_buffer(vb));
+}
+
+static void ipu_image_from_q_data(struct ipu_image *im,
+				  struct ipu_csc_scaler_q_data *q_data)
+{
+	im->pix.width = q_data->cur_fmt.width;
+	im->pix.height = q_data->cur_fmt.height;
+	im->pix.bytesperline = q_data->cur_fmt.bytesperline;
+	im->pix.pixelformat = q_data->cur_fmt.pixelformat;
+	im->rect = q_data->rect;
+}
+
+static int ipu_csc_scaler_start_streaming(struct vb2_queue *q,
+					  unsigned int count)
+{
+	const enum ipu_ic_task ic_task = IC_TASK_POST_PROCESSOR;
+	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(q);
+	struct ipu_csc_scaler_priv *priv = ctx->priv;
+	struct ipu_soc *ipu = priv->md->ipu[0];
+	struct ipu_csc_scaler_q_data *q_data;
+	struct vb2_queue *other_q;
+	struct ipu_image in, out;
+
+	other_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
+				  (q->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) ?
+				  V4L2_BUF_TYPE_VIDEO_OUTPUT :
+				  V4L2_BUF_TYPE_VIDEO_CAPTURE);
+	if (!vb2_is_streaming(other_q))
+		return 0;
+
+	if (ctx->icc) {
+		v4l2_warn(ctx->priv->vdev.vfd->v4l2_dev, "removing old ICC\n");
+		ipu_image_convert_unprepare(ctx->icc);
+	}
+
+	q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
+	ipu_image_from_q_data(&in, q_data);
+
+	q_data = get_q_data(ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
+	ipu_image_from_q_data(&out, q_data);
+
+	ctx->icc = ipu_image_convert_prepare(ipu, ic_task, &in, &out,
+					     ctx->rot_mode,
+					     ipu_ic_pp_complete, ctx);
+	if (IS_ERR(ctx->icc)) {
+		struct vb2_v4l2_buffer *buf;
+		int ret = PTR_ERR(ctx->icc);
+
+		ctx->icc = NULL;
+		v4l2_err(ctx->priv->vdev.vfd->v4l2_dev, "%s: error %d\n",
+			 __func__, ret);
+		while ((buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx)))
+			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_QUEUED);
+		while ((buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx)))
+			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_QUEUED);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void ipu_csc_scaler_stop_streaming(struct vb2_queue *q)
+{
+	struct ipu_csc_scaler_ctx *ctx = vb2_get_drv_priv(q);
+	struct vb2_v4l2_buffer *buf;
+
+	if (ctx->icc) {
+		ipu_image_convert_unprepare(ctx->icc);
+		ctx->icc = NULL;
+	}
+
+	ctx->sequence = 0;
+
+	if (q->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
+		while ((buf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx)))
+			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
+	} else {
+		while ((buf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx)))
+			v4l2_m2m_buf_done(buf, VB2_BUF_STATE_ERROR);
+	}
+}
+
+static const struct vb2_ops ipu_csc_scaler_qops = {
+	.queue_setup		= ipu_csc_scaler_queue_setup,
+	.buf_prepare		= ipu_csc_scaler_buf_prepare,
+	.buf_queue		= ipu_csc_scaler_buf_queue,
+	.wait_prepare		= vb2_ops_wait_prepare,
+	.wait_finish		= vb2_ops_wait_finish,
+	.start_streaming	= ipu_csc_scaler_start_streaming,
+	.stop_streaming		= ipu_csc_scaler_stop_streaming,
+};
+
+static int ipu_csc_scaler_queue_init(void *priv, struct vb2_queue *src_vq,
+				     struct vb2_queue *dst_vq)
+{
+	struct ipu_csc_scaler_ctx *ctx = priv;
+	int ret;
+
+	memset(src_vq, 0, sizeof(*src_vq));
+	src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
+	src_vq->drv_priv = ctx;
+	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
+	src_vq->ops = &ipu_csc_scaler_qops;
+	src_vq->mem_ops = &vb2_dma_contig_memops;
+	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	src_vq->lock = &ctx->priv->mutex;
+	src_vq->dev = ctx->priv->dev;
+
+	ret = vb2_queue_init(src_vq);
+	if (ret)
+		return ret;
+
+	memset(dst_vq, 0, sizeof(*dst_vq));
+	dst_vq->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
+	dst_vq->drv_priv = ctx;
+	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
+	dst_vq->ops = &ipu_csc_scaler_qops;
+	dst_vq->mem_ops = &vb2_dma_contig_memops;
+	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	dst_vq->lock = &ctx->priv->mutex;
+	dst_vq->dev = ctx->priv->dev;
+
+	return vb2_queue_init(dst_vq);
+}
+
+static int ipu_csc_scaler_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct ipu_csc_scaler_ctx *ctx = container_of(ctrl->handler,
+						      struct ipu_csc_scaler_ctx,
+						      ctrl_hdlr);
+	enum ipu_rotate_mode rot_mode;
+	int rotate;
+	bool hflip, vflip;
+	int ret = 0;
+
+	rotate = ctx->rotate;
+	hflip = ctx->hflip;
+	vflip = ctx->vflip;
+
+	switch (ctrl->id) {
+	case V4L2_CID_HFLIP:
+		hflip = ctrl->val;
+		break;
+	case V4L2_CID_VFLIP:
+		vflip = ctrl->val;
+		break;
+	case V4L2_CID_ROTATE:
+		rotate = ctrl->val;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = ipu_degrees_to_rot_mode(&rot_mode, rotate, hflip, vflip);
+	if (ret)
+		return ret;
+
+	if (rot_mode != ctx->rot_mode) {
+		struct v4l2_pix_format *in_fmt, *out_fmt;
+		struct ipu_image test_in, test_out;
+
+		in_fmt = &ctx->q_data[V4L2_M2M_SRC].cur_fmt;
+		out_fmt = &ctx->q_data[V4L2_M2M_DST].cur_fmt;
+
+		test_in.pix = *in_fmt;
+		test_out.pix = *out_fmt;
+
+		if (ipu_rot_mode_is_irt(rot_mode) !=
+		    ipu_rot_mode_is_irt(ctx->rot_mode)) {
+			/* Switch width and height to keep aspect ratio intact */
+			test_out.pix.width = out_fmt->height;
+			test_out.pix.height = out_fmt->width;
+		}
+
+		ipu_image_convert_adjust(&test_in, &test_out, ctx->rot_mode);
+
+		/* Check if output format needs to be changed */
+		if (test_in.pix.width != in_fmt->width ||
+		    test_in.pix.height != in_fmt->height ||
+		    test_in.pix.bytesperline != in_fmt->bytesperline ||
+		    test_in.pix.sizeimage != in_fmt->sizeimage) {
+			struct vb2_queue *out_q;
+
+			out_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
+						V4L2_BUF_TYPE_VIDEO_OUTPUT);
+			if (vb2_is_busy(out_q))
+				return -EBUSY;
+		}
+
+		/* Check if capture format needs to be changed */
+		if (test_out.pix.width != out_fmt->width ||
+		    test_out.pix.height != out_fmt->height ||
+		    test_out.pix.bytesperline != out_fmt->bytesperline ||
+		    test_out.pix.sizeimage != out_fmt->sizeimage) {
+			struct vb2_queue *cap_q;
+
+			cap_q = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
+						V4L2_BUF_TYPE_VIDEO_CAPTURE);
+			if (vb2_is_busy(cap_q))
+				return -EBUSY;
+		}
+
+		*in_fmt = test_in.pix;
+		*out_fmt = test_out.pix;
+
+		ctx->rot_mode = rot_mode;
+		ctx->rotate = rotate;
+		ctx->hflip = hflip;
+		ctx->vflip = vflip;
+	}
+
+	return 0;
+}
+
+static const struct v4l2_ctrl_ops ipu_csc_scaler_ctrl_ops = {
+	.s_ctrl = ipu_csc_scaler_s_ctrl,
+};
+
+static int ipu_csc_scaler_init_controls(struct ipu_csc_scaler_ctx *ctx)
+{
+	struct v4l2_ctrl_handler *hdlr = &ctx->ctrl_hdlr;
+
+	v4l2_ctrl_handler_init(hdlr, 3);
+
+	v4l2_ctrl_new_std(hdlr, &ipu_csc_scaler_ctrl_ops, V4L2_CID_HFLIP,
+			  0, 1, 1, 0);
+	v4l2_ctrl_new_std(hdlr, &ipu_csc_scaler_ctrl_ops, V4L2_CID_VFLIP,
+			  0, 1, 1, 0);
+	v4l2_ctrl_new_std(hdlr, &ipu_csc_scaler_ctrl_ops, V4L2_CID_ROTATE,
+			  0, 270, 90, 0);
+
+	if (hdlr->error) {
+		v4l2_ctrl_handler_free(hdlr);
+		return hdlr->error;
+	}
+
+	v4l2_ctrl_handler_setup(hdlr);
+	return 0;
+}
+
+#define DEFAULT_WIDTH	720
+#define DEFAULT_HEIGHT	576
+static const struct ipu_csc_scaler_q_data ipu_csc_scaler_q_data_default = {
+	.cur_fmt = {
+		.width = DEFAULT_WIDTH,
+		.height = DEFAULT_HEIGHT,
+		.pixelformat = V4L2_PIX_FMT_YUV420,
+		.field = V4L2_FIELD_NONE,
+		.bytesperline = DEFAULT_WIDTH,
+		.sizeimage = DEFAULT_WIDTH * DEFAULT_HEIGHT * 3 / 2,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+	},
+	.rect = {
+		.width = DEFAULT_WIDTH,
+		.height = DEFAULT_HEIGHT,
+	},
+};
+
+/*
+ * File operations
+ */
+static int ipu_csc_scaler_open(struct file *file)
+{
+	struct ipu_csc_scaler_priv *priv = video_drvdata(file);
+	struct ipu_csc_scaler_ctx *ctx = NULL;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->rot_mode = IPU_ROTATE_NONE;
+
+	v4l2_fh_init(&ctx->fh, video_devdata(file));
+	file->private_data = &ctx->fh;
+	v4l2_fh_add(&ctx->fh);
+	ctx->priv = priv;
+
+	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(priv->m2m_dev, ctx,
+					    &ipu_csc_scaler_queue_init);
+	if (IS_ERR(ctx->fh.m2m_ctx)) {
+		ret = PTR_ERR(ctx->fh.m2m_ctx);
+		goto err_ctx;
+	}
+
+	ret = ipu_csc_scaler_init_controls(ctx);
+	if (ret)
+		goto err_ctrls;
+
+	ctx->fh.ctrl_handler = &ctx->ctrl_hdlr;
+
+	ctx->q_data[V4L2_M2M_SRC] = ipu_csc_scaler_q_data_default;
+	ctx->q_data[V4L2_M2M_DST] = ipu_csc_scaler_q_data_default;
+
+	dev_dbg(priv->dev, "Created instance %p, m2m_ctx: %p\n", ctx,
+		ctx->fh.m2m_ctx);
+
+	return 0;
+
+err_ctrls:
+	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
+err_ctx:
+	v4l2_fh_del(&ctx->fh);
+	v4l2_fh_exit(&ctx->fh);
+	kfree(ctx);
+	return ret;
+}
+
+static int ipu_csc_scaler_release(struct file *file)
+{
+	struct ipu_csc_scaler_priv *priv = video_drvdata(file);
+	struct ipu_csc_scaler_ctx *ctx = fh_to_ctx(file->private_data);
+
+	dev_dbg(priv->dev, "Releasing instance %p\n", ctx);
+
+	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
+	v4l2_fh_del(&ctx->fh);
+	v4l2_fh_exit(&ctx->fh);
+	kfree(ctx);
+
+	return 0;
+}
+
+static const struct v4l2_file_operations ipu_csc_scaler_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ipu_csc_scaler_open,
+	.release	= ipu_csc_scaler_release,
+	.poll		= v4l2_m2m_fop_poll,
+	.unlocked_ioctl	= video_ioctl2,
+	.mmap		= v4l2_m2m_fop_mmap,
+};
+
+static struct v4l2_m2m_ops m2m_ops = {
+	.device_run	= device_run,
+	.job_abort	= job_abort,
+};
+
+static const struct video_device ipu_csc_scaler_videodev_template = {
+	.name		= "ipu_ic_pp csc/scaler",
+	.fops		= &ipu_csc_scaler_fops,
+	.ioctl_ops	= &ipu_csc_scaler_ioctl_ops,
+	.minor		= -1,
+	.release	= video_device_release,
+	.vfl_dir	= VFL_DIR_M2M,
+	.device_caps	= V4L2_CAP_VIDEO_M2M | V4L2_CAP_STREAMING,
+};
+
+int imx_media_csc_scaler_device_register(struct imx_media_video_dev *vdev)
+{
+	struct ipu_csc_scaler_priv *priv = vdev_to_priv(vdev);
+	struct video_device *vfd = vdev->vfd;
+	int ret;
+
+	vfd->v4l2_dev = &priv->md->v4l2_dev;
+
+	ret = video_register_device(vfd, VFL_TYPE_GRABBER, -1);
+	if (ret) {
+		v4l2_err(vfd->v4l2_dev, "Failed to register video device\n");
+		return ret;
+	}
+
+	v4l2_info(vfd->v4l2_dev, "Registered %s as /dev/%s\n", vfd->name,
+		  video_device_node_name(vfd));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_register);
+
+void imx_media_csc_scaler_device_unregister(struct imx_media_video_dev *vdev)
+{
+	struct ipu_csc_scaler_priv *priv = vdev_to_priv(vdev);
+	struct video_device *vfd = priv->vdev.vfd;
+
+	mutex_lock(&priv->mutex);
+
+	if (video_is_registered(vfd))
+		video_unregister_device(vfd);
+
+	mutex_unlock(&priv->mutex);
+}
+EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_unregister);
+
+struct imx_media_video_dev *
+imx_media_csc_scaler_device_init(struct imx_media_dev *md)
+{
+	struct ipu_csc_scaler_priv *priv;
+	struct video_device *vfd;
+	int ret;
+
+	priv = devm_kzalloc(md->md.dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+
+	priv->md = md;
+	priv->dev = md->md.dev;
+
+	mutex_init(&priv->mutex);
+
+	vfd = video_device_alloc();
+	if (!vfd)
+		return ERR_PTR(-ENOMEM);
+
+	*vfd = ipu_csc_scaler_videodev_template;
+	vfd->lock = &priv->mutex;
+	priv->vdev.vfd = vfd;
+
+	INIT_LIST_HEAD(&priv->vdev.list);
+
+	video_set_drvdata(vfd, priv);
+
+	priv->m2m_dev = v4l2_m2m_init(&m2m_ops);
+	if (IS_ERR(priv->m2m_dev)) {
+		ret = PTR_ERR(priv->m2m_dev);
+		v4l2_err(&md->v4l2_dev, "Failed to init mem2mem device: %d\n",
+			 ret);
+		return ERR_PTR(ret);
+	}
+
+	return &priv->vdev;
+}
+EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_init);
+
+void imx_media_csc_scaler_device_remove(struct imx_media_video_dev *vdev)
+{
+	struct ipu_csc_scaler_priv *priv = vdev_to_priv(vdev);
+
+	v4l2_m2m_release(priv->m2m_dev);
+}
+EXPORT_SYMBOL_GPL(imx_media_csc_scaler_device_remove);
+
+MODULE_DESCRIPTION("i.MX IPUv3 mem2mem scaler/CSC driver");
+MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/media/imx/imx-media-dev.c b/drivers/staging/media/imx/imx-media-dev.c
index 0a7d1d183141..4d2078d18a48 100644
--- a/drivers/staging/media/imx/imx-media-dev.c
+++ b/drivers/staging/media/imx/imx-media-dev.c
@@ -323,12 +323,36 @@  int imx_media_probe_complete(struct v4l2_async_notifier *notifier)
 		goto unlock;
 
 	ret = v4l2_device_register_subdev_nodes(&imxmd->v4l2_dev);
-unlock:
-	mutex_unlock(&imxmd->mutex);
 	if (ret)
-		return ret;
+		goto unlock;
+
+	imxmd->m2m_vdev = imx_media_csc_scaler_device_init(imxmd);
+	if (IS_ERR(imxmd->m2m_vdev)) {
+		ret = PTR_ERR(imxmd->m2m_vdev);
+		goto unlock;
+	}
 
-	return media_device_register(&imxmd->md);
+	ret = imx_media_csc_scaler_device_register(imxmd->m2m_vdev);
+	if (ret)
+		goto m2m_remove;
+
+	mutex_unlock(&imxmd->mutex);
+
+	ret = media_device_register(&imxmd->md);
+	if (ret) {
+		mutex_lock(&imxmd->mutex);
+		goto m2m_unreg;
+	}
+
+	return 0;
+
+m2m_unreg:
+	imx_media_csc_scaler_device_unregister(imxmd->m2m_vdev);
+m2m_remove:
+	imx_media_csc_scaler_device_remove(imxmd->m2m_vdev);
+unlock:
+	mutex_unlock(&imxmd->mutex);
+	return ret;
 }
 
 /*
@@ -504,6 +528,8 @@  static int imx_media_remove(struct platform_device *pdev)
 	v4l2_async_notifier_unregister(&imxmd->notifier);
 	imx_media_remove_ipu_internal_subdevs(imxmd);
 	v4l2_async_notifier_cleanup(&imxmd->notifier);
+	imx_media_csc_scaler_device_unregister(imxmd->m2m_vdev);
+	imx_media_csc_scaler_device_remove(imxmd->m2m_vdev);
 	media_device_unregister(&imxmd->md);
 	v4l2_device_unregister(&imxmd->v4l2_dev);
 	media_device_cleanup(&imxmd->md);
diff --git a/drivers/staging/media/imx/imx-media.h b/drivers/staging/media/imx/imx-media.h
index dd603a6b3a70..1894553b4497 100644
--- a/drivers/staging/media/imx/imx-media.h
+++ b/drivers/staging/media/imx/imx-media.h
@@ -151,6 +151,9 @@  struct imx_media_dev {
 
 	/* for async subdev registration */
 	struct v4l2_async_notifier notifier;
+
+	/* IC scaler/CSC mem2mem video device */
+	struct imx_media_video_dev *m2m_vdev;
 };
 
 enum codespace_sel {
@@ -281,6 +284,13 @@  void imx_media_capture_device_set_format(struct imx_media_video_dev *vdev,
 					 const struct v4l2_rect *compose);
 void imx_media_capture_device_error(struct imx_media_video_dev *vdev);
 
+/* imx-media-mem2mem.c */
+struct imx_media_video_dev *
+imx_media_csc_scaler_device_init(struct imx_media_dev *dev);
+void imx_media_csc_scaler_device_remove(struct imx_media_video_dev *vdev);
+int imx_media_csc_scaler_device_register(struct imx_media_video_dev *vdev);
+void imx_media_csc_scaler_device_unregister(struct imx_media_video_dev *vdev);
+
 /* subdev group ids */
 #define IMX_MEDIA_GRP_ID_CSI2          BIT(8)
 #define IMX_MEDIA_GRP_ID_CSI           BIT(9)