diff mbox series

[RFC,V4,4/4] platform: mtk-isp: Add Mediatek FD driver

Message ID 20191204124732.10932-5-Jerry-Ch.chen@mediatek.com (mailing list archive)
State New, archived
Headers show
Series media: platform: Add support for Face Detection (FD) on mt8183 SoC | expand

Commit Message

Jerry-ch Chen Dec. 4, 2019, 12:47 p.m. UTC
From: Jerry-ch Chen <jerry-ch.chen@mediatek.com>

This patch adds the driver of Face Detection (FD) unit in
Mediatek camera system, providing face detection function.

The mtk-isp directory will contain drivers for multiple IP
blocks found in Mediatek ISP system. It will include ISP Pass 1
driver (CAM), sensor interface driver, DIP driver and face
detection driver.

Signed-off-by: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
---
 drivers/media/platform/Kconfig                |    2 +
 drivers/media/platform/Makefile               |    2 +
 drivers/media/platform/mtk-isp/fd/Kconfig     |   19 +
 drivers/media/platform/mtk-isp/fd/Makefile    |    5 +
 drivers/media/platform/mtk-isp/fd/mtk_fd.h    |  149 ++
 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c | 1279 +++++++++++++++++
 include/uapi/linux/v4l2-controls.h            |    4 +
 include/uapi/linux/videodev2.h                |    3 +
 8 files changed, 1463 insertions(+)
 create mode 100644 drivers/media/platform/mtk-isp/fd/Kconfig
 create mode 100644 drivers/media/platform/mtk-isp/fd/Makefile
 create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd.h
 create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c

Comments

Tomasz Figa May 21, 2020, 6:28 p.m. UTC | #1
Hi Jerry,

On Wed, Dec 04, 2019 at 08:47:32PM +0800, Jerry-ch Chen wrote:
> From: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> 
> This patch adds the driver of Face Detection (FD) unit in
> Mediatek camera system, providing face detection function.
> 
> The mtk-isp directory will contain drivers for multiple IP
> blocks found in Mediatek ISP system. It will include ISP Pass 1
> driver (CAM), sensor interface driver, DIP driver and face
> detection driver.
> 
> Signed-off-by: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> ---
>  drivers/media/platform/Kconfig                |    2 +
>  drivers/media/platform/Makefile               |    2 +
>  drivers/media/platform/mtk-isp/fd/Kconfig     |   19 +
>  drivers/media/platform/mtk-isp/fd/Makefile    |    5 +
>  drivers/media/platform/mtk-isp/fd/mtk_fd.h    |  149 ++
>  drivers/media/platform/mtk-isp/fd/mtk_fd_40.c | 1279 +++++++++++++++++
>  include/uapi/linux/v4l2-controls.h            |    4 +
>  include/uapi/linux/videodev2.h                |    3 +
>  8 files changed, 1463 insertions(+)
>  create mode 100644 drivers/media/platform/mtk-isp/fd/Kconfig
>  create mode 100644 drivers/media/platform/mtk-isp/fd/Makefile
>  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd.h
>  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> 

Thank you for the patch. Please see my comments inline. Really sorry for
the much delayed review.

[snip]
> +config VIDEO_MEDIATEK_FD
> +	tristate "Mediatek face detection processing function"
> +	depends on VIDEO_V4L2
> +	depends on ARCH_MEDIATEK
> +	select VIDEOBUF2_DMA_CONTIG
> +	select VIDEOBUF2_CORE
> +	select VIDEOBUF2_V4L2
> +	select VIDEOBUF2_MEMOPS
> +	select MEDIA_CONTROLLER
> +	select MTK_SCP
> +
> +	default n
> +	help
> +		Support the Face Detection (FD) feature in the Mediatek
> +		mt8183 Soc.

MT8183?

> +
> +		FD driver is a V4L2 memory-to-memory device driver which
> +		provides hardware accelerated face detection function,
> +		it can detect different sizes of faces in a raw image.

A YUV image I guess?

[snip]
> diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> new file mode 100644
> index 000000000000..d85bdcb70d6d
> --- /dev/null
> +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> @@ -0,0 +1,149 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +//
> +// Copyright (c) 2018 MediaTek Inc.
> +
> +#ifndef __MTK_FD_HW_H__
> +#define __MTK_FD_HW_H__
> +
> +#include <linux/completion.h>
> +#include <linux/io.h>
> +#include <linux/types.h>
> +#include <linux/platform_device.h>
> +#include <media/v4l2-ctrls.h>
> +#include <media/v4l2-device.h>
> +#include <media/videobuf2-v4l2.h>
> +
> +#define MTK_FD_OUTPUT_MIN_WIDTH			26U
> +#define MTK_FD_OUTPUT_MIN_HEIGHT		26U
> +#define MTK_FD_OUTPUT_MAX_WIDTH			640U
> +#define MTK_FD_OUTPUT_MAX_HEIGHT		480U
> +
> +#define MTK_FD_HW_FMT_VYUY			2
> +#define MTK_FD_HW_FMT_UYVY			3
> +#define MTK_FD_HW_FMT_YVYU			4
> +#define MTK_FD_HW_FMT_YUYV			5
> +#define MTK_FD_HW_FMT_YVU_2P			6
> +#define MTK_FD_HW_FMT_YUV_2P			7
> +#define MTK_FD_HW_FMT_UNKNOWN			8

What is an unknown format?

> +
> +#define MTK_FD_IPI_CMD_INIT			0
> +#define MTK_FD_IPI_CMD_INIT_ACK			1
> +#define MTK_FD_IPI_CMD_ENQUEUE			2
> +#define MTK_FD_IPI_CMD_ENQ_ACK			3
> +#define MTK_FD_IPI_CMD_EXIT			4
> +#define MTK_FD_IPI_CMD_EXIT_ACK			5
> +#define MTK_FD_IPI_CMD_RESET			6
> +#define MTK_FD_IPI_CMD_RESET_ACK		7
> +
> +#define MTK_FD_REG_OFFSET_HW_ENABLE		0x4
> +#define MTK_FD_REG_OFFSET_INT_EN		0x15c
> +#define MTK_FD_REG_OFFSET_INT_VAL		0x168
> +#define MTK_FD_REG_OFFSET_RESULT		0x178
> +
> +#define MTK_FD_SET_HW_ENABLE			0x111
> +#define MTK_FD_RS_BUF_SIZE			2289664
> +#define MTK_FD_HW_WORK_BUF_SIZE			0x100000

How about using the SZ_1M?

> +#define MTK_FD_MAX_SPEEDUP			7
> +#define MTK_FD_MAX_RESULT_NUM			1026

Perhaps NUM_RESULTS?

> +
> +/* Max scale size counts */
> +#define MTK_FD_SCALE_ARR_NUM			15

Perhaps NUM_SCALE_SIZES?

> +
> +#define MTK_FD_HW_TIMEOUT			1000

What's the unit?

> +
> +enum face_angle {
> +	MTK_FD_FACE_FRONT,
> +	MTK_FD_FACE_RIGHT_50,
> +	MTK_FD_FACE_LEFT_50,
> +	MTK_FD_FACE_RIGHT_90,
> +	MTK_FD_FACE_LEFT_90,
> +	MTK_FD_FACE_ANGLE_NUM,
> +};

This enum seems to define values for the V4L2_CID_MTK_FD_DETECT_POSE
control. Considering that this is an enumeration and the values are
actually integers (-90, -50, 0, 50, 90), perhaps this should be an
INTEGER_MENU control instead?

> +
> +struct fd_buffer {
> +	__u32 scp_addr;	/* used by SCP */
> +	__u32 dma_addr;	/* used by DMA HW */
> +} __packed;
> +
> +struct fd_face_result {
> +	char data[16];
> +};
> +
> +struct fd_user_output {
> +	struct fd_face_result results[MTK_FD_MAX_RESULT_NUM];
> +	__u16 number;

Is this perhaps the number of results? If so, would num_results be a better
name?

> +};

Since this struct is the meta buffer format, it is a part of the userspace
interface and should be defined in a header under include/uapi/linux/.

> +
> +struct user_param {
> +	u8 fd_speedup;
> +	u8 fd_extra_model;
> +	u8 scale_img_num;
> +	u8 src_img_fmt;
> +	__u16 scale_img_width[MTK_FD_SCALE_ARR_NUM];
> +	__u16 scale_img_height[MTK_FD_SCALE_ARR_NUM];
> +	__u16 face_directions[MTK_FD_FACE_ANGLE_NUM];

Is this a user-facing definition or an interface between the kernel driver
and firmware? If the latter, the __ types shouldn't be used.

> +} __packed;
> +
> +struct fd_init_param {
> +	struct fd_buffer fd_manager;
> +	__u32 rs_dma_addr;

Ditto.

> +} __packed;
> +
> +struct fd_enq_param {
> +	__u64 output_vaddr;

Ditto.

> +	struct fd_buffer src_img[2];
> +	struct fd_buffer user_result;
> +	struct user_param user_param;
> +} __packed;
> +
> +struct fd_ack_param {
> +	__u32 ret_code;
> +	__u32 ret_msg;

Ditto.

> +} __packed;
[snip]
> +/*  */

Was there supposed to be a comment here? :)

> +static int mtk_fd_hw_alloc_rs_dma_addr(struct mtk_fd_dev *fd)
> +{
> +	struct device *dev = fd->dev;
> +	void *va;
> +	dma_addr_t dma_handle;
> +
> +	va = dma_alloc_coherent(dev, MTK_FD_RS_BUF_SIZE, &dma_handle,
> +				GFP_KERNEL);
> +	if (!va) {
> +		dev_err(dev, "dma_alloc null va\n");

No need to print errors for memory allocation failures, because one will be
printed automatically.

[snip]
> +static int mtk_fd_hw_connect(struct mtk_fd_dev *fd)
> +{
> +	int ret;
> +
> +	ret = rproc_boot(fd->rproc_handle);
> +

nit: Unnecessary blank line.

[snip]
> +static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> +				  unsigned int *num_buffers,
> +				  unsigned int *num_planes,
> +				  unsigned int sizes[],
> +				  struct device *alloc_devs[])
> +{
> +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> +	unsigned int size[2];
> +	unsigned int plane;
> +
> +	switch (vq->type) {
> +	case V4L2_BUF_TYPE_META_CAPTURE:
> +		size[0] = ctx->dst_fmt.buffersize;
> +		break;
> +	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> +		size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> +		if (*num_planes == 2)
> +			size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> +		break;
> +	}

Is this code above needed? The code below sets sizes[] and it uses a for loop,
without opencoded assignment for the second plane.

> +
> +	if (*num_planes > 2)
> +		return -EINVAL;
> +	if (*num_planes == 0) {
> +		if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
> +			sizes[0] = ctx->dst_fmt.buffersize;
> +			*num_planes = 1;
> +			return 0;
> +		}
> +
> +		*num_planes = ctx->src_fmt.num_planes;
> +		for (plane = 0; plane < *num_planes; plane++)
> +			sizes[plane] = ctx->src_fmt.plane_fmt[plane].sizeimage;
> +		return 0;
> +	}
> +
> +	for (plane = 0; plane < *num_planes; plane++) {
> +		if (sizes[plane] < size[plane])
> +			return -EINVAL;
> +	}
> +	return 0;
> +}
[snip]
> +static int mtk_fd_enum_fmt_out_mp(struct file *file, void *fh,
> +				  struct v4l2_fmtdesc *f)
> +{
> +	if (f->index >= NUM_FORMATS)

Please use ARRAY_SIZE().

> +		return -EINVAL;
> +
> +	f->pixelformat = mtk_fd_img_fmts[f->index].pixelformat;
> +	return 0;
> +}
> +
> +static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> +				  const struct v4l2_pix_format_mplane *sfmt)
> +{
> +	dfmt->field = V4L2_FIELD_NONE;
> +	dfmt->colorspace = V4L2_COLORSPACE_BT2020;
> +	dfmt->num_planes = sfmt->num_planes;
> +	dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> +	dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> +	dfmt->xfer_func =
> +		V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> +
> +	/* Keep user setting as possible */
> +	dfmt->width = clamp(dfmt->width,
> +			    MTK_FD_OUTPUT_MIN_WIDTH,
> +			    MTK_FD_OUTPUT_MAX_WIDTH);
> +	dfmt->height = clamp(dfmt->height,
> +			     MTK_FD_OUTPUT_MIN_HEIGHT,
> +			     MTK_FD_OUTPUT_MAX_HEIGHT);
> +
> +	if (sfmt->num_planes == 2) {
> +		/* NV16M and NV61M has 1 byte per pixel */
> +		dfmt->plane_fmt[0].bytesperline = dfmt->width;
> +		dfmt->plane_fmt[1].bytesperline = dfmt->width;
> +	} else {
> +		/* 2 bytes per pixel */
> +		dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> +	}
> +
> +	dfmt->plane_fmt[0].sizeimage =
> +		dfmt->height * dfmt->plane_fmt[0].bytesperline;

Could some of the code above be replaced with v4l2_fill_pixfmt_mp()?

> +}
> +
> +static const struct v4l2_pix_format_mplane *mtk_fd_find_fmt(u32 format)
> +{
> +	unsigned int i;
> +	const struct v4l2_pix_format_mplane *dev_fmt;
> +
> +	for (i = 0; i < NUM_FORMATS; i++) {

Please use ARRAY_SIZE rather than a custom macro.

> +		dev_fmt = &mtk_fd_img_fmts[i];
> +		if (dev_fmt->pixelformat == format)
> +			return dev_fmt;
> +	}
> +
> +	return NULL;
> +}
> +
> +static int mtk_fd_try_fmt_out_mp(struct file *file,
> +				 void *fh,
> +				 struct v4l2_format *f)
> +{
> +	struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
> +	const struct v4l2_pix_format_mplane *fmt;
> +
> +	fmt = mtk_fd_find_fmt(pix_mp->pixelformat);
> +	if (!fmt)
> +		fmt = &mtk_fd_img_fmts[0];	/* Get default img fmt */

nit: Please move the comment to a separate line and add braces.

> +
> +	mtk_fd_fill_pixfmt_mp(pix_mp, fmt);
> +	return 0;
> +}
[snip]
> +static unsigned int get_fd_img_fmt(unsigned int fourcc)
> +{
> +	switch (fourcc) {
> +	case V4L2_PIX_FMT_VYUY:
> +		return MTK_FD_HW_FMT_VYUY;
> +	case V4L2_PIX_FMT_YUYV:
> +		return MTK_FD_HW_FMT_YUYV;
> +	case V4L2_PIX_FMT_YVYU:
> +		return MTK_FD_HW_FMT_YVYU;
> +	case V4L2_PIX_FMT_UYVY:
> +		return MTK_FD_HW_FMT_UYVY;
> +	case V4L2_PIX_FMT_NV16M:
> +		return MTK_FD_HW_FMT_YUV_2P;
> +	case V4L2_PIX_FMT_NV61M:
> +		return MTK_FD_HW_FMT_YVU_2P;
> +	default:
> +		return MTK_FD_HW_FMT_UNKNOWN;

If we want to be paranoid, we can just add a WARN here and return 0.
Shouldn't be a need to define an unknown format.

[snip]
> +static void mtk_fd_fill_user_param(struct user_param *user_param,
> +				   struct v4l2_ctrl_handler *hdl)
> +{
> +	struct v4l2_ctrl *ctrl;
> +	int i;
> +
> +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_DOWN_IMG_WIDTH);
> +	if (ctrl)
> +		for (i = 0; i < ctrl->elems; i++)
> +			user_param->scale_img_width[i] = ctrl->p_new.p_u16[i];
> +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_DOWN_IMG_HEIGHT);
> +	if (ctrl)
> +		for (i = 0; i < ctrl->elems; i++)
> +			user_param->scale_img_height[i] = ctrl->p_new.p_u16[i];
> +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_IMG_NUM);
> +	if (ctrl)
> +		user_param->scale_img_num = ctrl->val;
> +

nit: Either separate the code dealing with all controls from each other, or
none.

> +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECT_POSE);
> +	if (ctrl)
> +		for (i = 0; i < ctrl->elems; i++)
> +			user_param->face_directions[i] = ctrl->p_new.p_u16[i];
> +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECT_SPEED);
> +	if (ctrl)
> +		user_param->fd_speedup = ctrl->val;
> +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECTION_MODEL);
> +	if (ctrl)
> +		user_param->fd_extra_model = ctrl->val;
> +}
> +
> +static void mtk_fd_device_run(void *priv)
> +{
> +	struct mtk_fd_ctx *ctx = priv;
> +	struct mtk_fd_dev *fd = ctx->fd_dev;
> +	struct vb2_v4l2_buffer *src_buf, *dst_buf;
> +	struct fd_enq_param fd_param;
> +	void *plane_vaddr;
> +
> +	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> +	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
> +
> +	fd_param.src_img[0].dma_addr =
> +		vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
> +	fd_param.user_result.dma_addr =
> +		vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> +	plane_vaddr = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
> +	fd_param.output_vaddr = (u64)(unsigned long)plane_vaddr;

Why is the vaddr needed here? Specfically, it could pose a security problem
if CPU virtual addresses are exchanged with firmware.

> +	fd_param.user_param.src_img_fmt =
> +		get_fd_img_fmt(ctx->src_fmt.pixelformat);
> +	if (ctx->src_fmt.num_planes == 2)
> +		fd_param.src_img[1].dma_addr =
> +			vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 1);

nit: Could this be moved above, to be just below src_img[0] initialization,
for readability reasons?

> +	mtk_fd_fill_user_param(&fd_param.user_param, &ctx->hdl);
> +
> +	/* Complete request controls if any */
> +	v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, &ctx->hdl);
> +
> +	fd->output = plane_vaddr;
> +	mtk_fd_hw_job_exec(fd, &fd_param);
> +}
> +
> +static struct v4l2_m2m_ops fd_m2m_ops = {
> +	.device_run = mtk_fd_device_run,
> +};
> +
> +static const struct media_device_ops fd_m2m_media_ops = {
> +	.req_validate	= vb2_request_validate,
> +	.req_queue	= v4l2_m2m_request_queue,
> +};
> +
> +static int mtk_fd_video_device_register(struct mtk_fd_dev *fd)
> +{
> +	struct video_device *vfd = &fd->vfd;
> +	struct v4l2_m2m_dev *m2m_dev = fd->m2m_dev;
> +	struct device *dev = fd->dev;
> +	int ret;
> +
> +	vfd->fops = &fd_video_fops;
> +	vfd->release = video_device_release;
> +	vfd->lock = &fd->vfd_lock;
> +	vfd->v4l2_dev = &fd->v4l2_dev;
> +	vfd->vfl_dir = VFL_DIR_M2M;
> +	vfd->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_OUTPUT_MPLANE |
> +		V4L2_CAP_META_CAPTURE;
> +	vfd->ioctl_ops = &mtk_fd_v4l2_video_out_ioctl_ops;
> +
> +	strscpy(vfd->name, dev_driver_string(dev), sizeof(vfd->name));
> +
> +	video_set_drvdata(vfd, fd);
> +
> +	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
> +	if (ret) {
> +		dev_err(dev, "Failed to register video device\n");
> +		goto err_free_dev;
> +	}
> +
> +	ret = v4l2_m2m_register_media_controller(m2m_dev, vfd,
> +					     MEDIA_ENT_F_PROC_VIDEO_STATISTICS);
> +	if (ret) {
> +		dev_err(dev, "Failed to init mem2mem media controller\n");
> +		goto err_unreg_video;
> +	}
> +	return 0;
> +
> +err_unreg_video:
> +	video_unregister_device(vfd);
> +err_free_dev:
> +	video_device_release(vfd);
> +	return ret;
> +}
> +
> +static int mtk_fd_dev_v4l2_init(struct mtk_fd_dev *fd)
> +{
> +	struct media_device *mdev = &fd->mdev;
> +	struct device *dev = fd->dev;
> +	int ret;
> +
> +	ret = v4l2_device_register(dev, &fd->v4l2_dev);
> +	if (ret) {
> +		dev_err(dev, "Failed to register v4l2 device\n");
> +		return ret;
> +	}
> +
> +	fd->m2m_dev = v4l2_m2m_init(&fd_m2m_ops);
> +	if (IS_ERR(fd->m2m_dev)) {
> +		dev_err(dev, "Failed to init mem2mem device\n");
> +		ret = PTR_ERR(fd->m2m_dev);
> +		goto err_unreg_v4l2_dev;
> +	}
> +
> +	mdev->dev = dev;
> +	strscpy(mdev->model, dev_driver_string(dev), sizeof(mdev->model));
> +	snprintf(mdev->bus_info, sizeof(mdev->bus_info),
> +		 "platform:%s", dev_name(dev));
> +	media_device_init(mdev);
> +	mdev->ops = &fd_m2m_media_ops;
> +	fd->v4l2_dev.mdev = mdev;
> +
> +	ret = mtk_fd_video_device_register(fd);
> +	if (ret) {
> +		dev_err(dev, "Failed to register video device\n");
> +		goto err_cleanup_mdev;
> +	}
> +
> +	ret = media_device_register(mdev);
> +	if (ret) {
> +		dev_err(dev, "Failed to register mem2mem media device\n");
> +		goto err_unreg_vdev;
> +	}
> +
> +	return 0;
> +
> +err_unreg_vdev:
> +	v4l2_m2m_unregister_media_controller(fd->m2m_dev);
> +	video_unregister_device(&fd->vfd);
> +	video_device_release(&fd->vfd);

This is inconsistent with the registration, which registers all of these in
a function. Perhaps it would be worth to move this cleanup to a
mtk_fd_video_device_unregister() function?

> +err_cleanup_mdev:
> +	media_device_cleanup(mdev);
> +	v4l2_m2m_release(fd->m2m_dev);
> +err_unreg_v4l2_dev:
> +	v4l2_device_unregister(&fd->v4l2_dev);
> +	return ret;
> +}
> +
> +static void mtk_fd_dev_v4l2_release(struct mtk_fd_dev *fd)
> +{
> +	v4l2_m2m_unregister_media_controller(fd->m2m_dev);
> +	video_unregister_device(&fd->vfd);
> +	video_device_release(&fd->vfd);
> +	media_device_cleanup(&fd->mdev);
> +	v4l2_m2m_release(fd->m2m_dev);
> +	v4l2_device_unregister(&fd->v4l2_dev);
> +}
> +
> +static irqreturn_t mtk_fd_irq(int irq, void *data)
> +{
> +	struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> +
> +	/* must read this register otherwise HW will keep sending irq */
> +	readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> +	fd->output->number = readl(fd->fd_base + MTK_FD_REG_OFFSET_RESULT);
> +	dev_dbg(fd->dev, "mtk_fd_face_num:%d\n", fd->output->number);
> +
> +	mtk_fd_hw_done(fd, VB2_BUF_STATE_DONE);
> +	return IRQ_HANDLED;
> +}
> +
> +static int mtk_fd_hw_get_scp_mem(struct mtk_fd_dev *fd)
> +{
> +	struct device *dev = fd->dev;
> +	dma_addr_t addr;
> +	void *ptr;
> +	u32 ret;
> +
> +	/*
> +	 * Allocate coherent reserved memory for SCP firmware usage.
> +	 * The size of SCP composer's memory is fixed to 0x100000
> +	 * for the requirement of firmware.
> +	 */
> +	ptr = dma_alloc_coherent(&fd->scp_pdev->dev,
> +				 MTK_FD_HW_WORK_BUF_SIZE, &addr, GFP_KERNEL);
> +	if (!ptr)
> +		return -ENOMEM;
> +
> +	fd->scp_mem.scp_addr = addr;
> +	fd->scp_mem_virt_addr = ptr;
> +	dev_info(dev, "scp addr:%pad va:%pK\n", &addr, ptr);

These addresses are nothing a user should be concerned about, so please
don't use the _info level here. If you think this is an important debugging
information, please make it dev_dbg(). Otherwise, please just remove.

> +
> +	/*
> +	 * This reserved memory is also be used by FD HW.
> +	 * Need to get iova address for FD DMA.
> +	 */
> +	addr = dma_map_resource(dev, addr, MTK_FD_HW_WORK_BUF_SIZE,
> +				DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);

Note that the second argument to dma_map_resource() is phys_addr_t, but the
code above passes a DMA address. This works only by luck, because both
physical and SCP DMA address space have the same addresses. To be fully
correct, dma_map_single() needs to be used, with the kernel virtual address
passed to it, but currently it doesn't handle the memory from a reserved
pool. Thus, we can only keep the hack as is, but please add a comment
explaining it, e.g.

/*
 * FIXME: Assume SCP DMA and physical addresses are the same until
 * dma_map_single() is fixed to handle reserved memory allocations.
 */

> +	if (dma_mapping_error(dev, addr)) {
> +		dev_err(dev, "Failed to map scp iova\n");
> +		ret = -ENOMEM;
> +		goto fail_free_mem;
> +	}
> +	fd->scp_mem.dma_addr = addr;
> +	dev_info(dev, "scp iova addr:%pad\n", &addr);

Ditto.

Best regards,
Tomasz
Jerry-ch Chen May 22, 2020, 2:10 p.m. UTC | #2
Hi Tomasz,

On Thu, 2020-05-21 at 18:28 +0000, Tomasz Figa wrote:
> Hi Jerry,
> 
> On Wed, Dec 04, 2019 at 08:47:32PM +0800, Jerry-ch Chen wrote:
> > From: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > 
> > This patch adds the driver of Face Detection (FD) unit in
> > Mediatek camera system, providing face detection function.
> > 
> > The mtk-isp directory will contain drivers for multiple IP
> > blocks found in Mediatek ISP system. It will include ISP Pass 1
> > driver (CAM), sensor interface driver, DIP driver and face
> > detection driver.
> > 
> > Signed-off-by: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > ---
> >  drivers/media/platform/Kconfig                |    2 +
> >  drivers/media/platform/Makefile               |    2 +
> >  drivers/media/platform/mtk-isp/fd/Kconfig     |   19 +
> >  drivers/media/platform/mtk-isp/fd/Makefile    |    5 +
> >  drivers/media/platform/mtk-isp/fd/mtk_fd.h    |  149 ++
> >  drivers/media/platform/mtk-isp/fd/mtk_fd_40.c | 1279 +++++++++++++++++
> >  include/uapi/linux/v4l2-controls.h            |    4 +
> >  include/uapi/linux/videodev2.h                |    3 +
> >  8 files changed, 1463 insertions(+)
> >  create mode 100644 drivers/media/platform/mtk-isp/fd/Kconfig
> >  create mode 100644 drivers/media/platform/mtk-isp/fd/Makefile
> >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd.h
> >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> > 
> 
> Thank you for the patch. Please see my comments inline. Really sorry for
> the much delayed review.
> 

I appreciate your comments, here's the reply

> [snip]
> > +config VIDEO_MEDIATEK_FD
> > +	tristate "Mediatek face detection processing function"
> > +	depends on VIDEO_V4L2
> > +	depends on ARCH_MEDIATEK
> > +	select VIDEOBUF2_DMA_CONTIG
> > +	select VIDEOBUF2_CORE
> > +	select VIDEOBUF2_V4L2
> > +	select VIDEOBUF2_MEMOPS
> > +	select MEDIA_CONTROLLER
> > +	select MTK_SCP
> > +
> > +	default n
> > +	help
> > +		Support the Face Detection (FD) feature in the Mediatek
> > +		mt8183 Soc.
> 
> MT8183?
Fixed.

> 
> > +
> > +		FD driver is a V4L2 memory-to-memory device driver which
> > +		provides hardware accelerated face detection function,
> > +		it can detect different sizes of faces in a raw image.
> 
> A YUV image I guess?
Fixed

> 
> [snip]
> > diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > new file mode 100644
> > index 000000000000..d85bdcb70d6d
> > --- /dev/null
> > +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > @@ -0,0 +1,149 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +//
> > +// Copyright (c) 2018 MediaTek Inc.
> > +
> > +#ifndef __MTK_FD_HW_H__
> > +#define __MTK_FD_HW_H__
> > +
> > +#include <linux/completion.h>
> > +#include <linux/io.h>
> > +#include <linux/types.h>
> > +#include <linux/platform_device.h>
> > +#include <media/v4l2-ctrls.h>
> > +#include <media/v4l2-device.h>
> > +#include <media/videobuf2-v4l2.h>
> > +
> > +#define MTK_FD_OUTPUT_MIN_WIDTH			26U
> > +#define MTK_FD_OUTPUT_MIN_HEIGHT		26U
> > +#define MTK_FD_OUTPUT_MAX_WIDTH			640U
> > +#define MTK_FD_OUTPUT_MAX_HEIGHT		480U
> > +
> > +#define MTK_FD_HW_FMT_VYUY			2
> > +#define MTK_FD_HW_FMT_UYVY			3
> > +#define MTK_FD_HW_FMT_YVYU			4
> > +#define MTK_FD_HW_FMT_YUYV			5
> > +#define MTK_FD_HW_FMT_YVU_2P			6
> > +#define MTK_FD_HW_FMT_YUV_2P			7
> > +#define MTK_FD_HW_FMT_UNKNOWN			8
> 
> What is an unknown format?
It's used in translate v4l2 pixel format to the value that FD HW
recognized. But I think the case of UNKNWOWN shouldn't happened.
I will remove it

> 
> > +
> > +#define MTK_FD_IPI_CMD_INIT			0
> > +#define MTK_FD_IPI_CMD_INIT_ACK			1
> > +#define MTK_FD_IPI_CMD_ENQUEUE			2
> > +#define MTK_FD_IPI_CMD_ENQ_ACK			3
> > +#define MTK_FD_IPI_CMD_EXIT			4
> > +#define MTK_FD_IPI_CMD_EXIT_ACK			5
> > +#define MTK_FD_IPI_CMD_RESET			6
> > +#define MTK_FD_IPI_CMD_RESET_ACK		7
> > +
> > +#define MTK_FD_REG_OFFSET_HW_ENABLE		0x4
> > +#define MTK_FD_REG_OFFSET_INT_EN		0x15c
> > +#define MTK_FD_REG_OFFSET_INT_VAL		0x168
> > +#define MTK_FD_REG_OFFSET_RESULT		0x178
> > +
> > +#define MTK_FD_SET_HW_ENABLE			0x111
> > +#define MTK_FD_RS_BUF_SIZE			2289664
> > +#define MTK_FD_HW_WORK_BUF_SIZE			0x100000
> 
> How about using the SZ_1M?
> 
yes, fixed.
> > +#define MTK_FD_MAX_SPEEDUP			7
> > +#define MTK_FD_MAX_RESULT_NUM			1026
> 
> Perhaps NUM_RESULTS?
> 
fixed as MTK_FD_MAX_NUM_RESULT.

> > +
> > +/* Max scale size counts */
> > +#define MTK_FD_SCALE_ARR_NUM			15
> 
> Perhaps NUM_SCALE_SIZES?
> 
fixed as MTK_FD_NUM_SCALE_SIZE

> > +
> > +#define MTK_FD_HW_TIMEOUT			1000
> 
> What's the unit?
it's in milliseconds, I would like to rename it to
MTK_FD_HW_TIMEOUT_MSEC

> 
> > +
> > +enum face_angle {
> > +	MTK_FD_FACE_FRONT,
> > +	MTK_FD_FACE_RIGHT_50,
> > +	MTK_FD_FACE_LEFT_50,
> > +	MTK_FD_FACE_RIGHT_90,
> > +	MTK_FD_FACE_LEFT_90,
> > +	MTK_FD_FACE_ANGLE_NUM,
> > +};
> 
> This enum seems to define values for the V4L2_CID_MTK_FD_DETECT_POSE
> control. Considering that this is an enumeration and the values are
> actually integers (-90, -50, 0, 50, 90), perhaps this should be an
> INTEGER_MENU control instead?
> 

this ioctl let user select multiple face positions(combination of angles
and directions) to be detected. so I thought I am not able to use the
INTEGER_MENU for this purpose.

A bit-field as following should be used by user.
I consider adding it to uapi.

struct face_direction_def {
__u16 MTK_FD_FACE_DIR_0 : 1,
	MTK_FD_FACE_DIR_30 : 1,
	MTK_FD_FACE_DIR_60 : 1,
	MTK_FD_FACE_DIR_90 : 1,
	MTK_FD_FACE_DIR_120 : 1,
	MTK_FD_FACE_DIR_150 : 1,
	MTK_FD_FACE_DIR_180 : 1,
	MTK_FD_FACE_DIR_210 : 1,
	MTK_FD_FACE_DIR_240 : 1,
	MTK_FD_FACE_DIR_270 : 1,
	MTK_FD_FACE_DIR_300 : 1,
	MTK_FD_FACE_DIR_330 : 1,
	: 4;
};

User can also select some face directions of each face angle in one
ioctl, for example:

/* 
 * u16 face_directions[MTK_FD_FACE_ANGLE_NUM] = {0};
 *
 *	face_directions[MTK_FD_FACE_FRONT] = 0x7; //angle:0, dir:0,30,60
 *	face_directions[MTK_FACE_RIGHT_50] = 0x2; //angle:50, dir:30 
 * 
 */

> > +
> > +struct fd_buffer {
> > +	__u32 scp_addr;	/* used by SCP */
> > +	__u32 dma_addr;	/* used by DMA HW */
> > +} __packed;
fd buffer is used for scp ipi

> > +
> > +struct fd_face_result {
> > +	char data[16];
> > +};
fd_face_result is used for user, so it should be moved to
include/uapi/linux.
In fact, it has bit-field definition for user, so I would like to define
it in include/uapi/linux as following:

struct fd_face_result {
  __u64 face_idx : 12,
	type : 1,
	x0 : 10,
	y0 : 10,
	x1 : 10,
	y1 : 10,
	fcv1 : 11;
  __u64 fcv2 : 7,
	rip_dir : 4,
	rop_dir : 3,
	det_size : 5;
};


> > +
> > +struct fd_user_output {
> > +	struct fd_face_result results[MTK_FD_MAX_RESULT_NUM];
> > +	__u16 number;
> 
> Is this perhaps the number of results? If so, would num_results be a better
> name?
> 
yes, fixed.
> > +};
> 
> Since this struct is the meta buffer format, it is a part of the userspace
> interface and should be defined in a header under include/uapi/linux/.
> 
Ok, I will create include/uapi/linux/mtk_fd_40.h
which suppose to include structures that userspace will use.
should the private IOCTLs be placed in it together?


> > +
> > +struct user_param {
> > +	u8 fd_speedup;
> > +	u8 fd_extra_model;
> > +	u8 scale_img_num;
> > +	u8 src_img_fmt;
> > +	__u16 scale_img_width[MTK_FD_SCALE_ARR_NUM];
> > +	__u16 scale_img_height[MTK_FD_SCALE_ARR_NUM];
> > +	__u16 face_directions[MTK_FD_FACE_ANGLE_NUM];
> 
> Is this a user-facing definition or an interface between the kernel driver
> and firmware? If the latter, the __ types shouldn't be used.
> 
It's the later, I'll fix it.


> > +} __packed;
> > +
> > +struct fd_init_param {
> > +	struct fd_buffer fd_manager;
> > +	__u32 rs_dma_addr;
> 
> Ditto.
> 
done.

> > +} __packed;
> > +
> > +struct fd_enq_param {
> > +	__u64 output_vaddr;
> 
> Ditto.
done.

> 
> > +	struct fd_buffer src_img[2];
> > +	struct fd_buffer user_result;
> > +	struct user_param user_param;
> > +} __packed;
> > +
> > +struct fd_ack_param {
> > +	__u32 ret_code;
> > +	__u32 ret_msg;
> 
> Ditto.
done.

> 
> > +} __packed;
> [snip]
> > +/*  */
> 
> Was there supposed to be a comment here? :)
Ooops, no, it should be removed.
> 
> > +static int mtk_fd_hw_alloc_rs_dma_addr(struct mtk_fd_dev *fd)
> > +{
> > +	struct device *dev = fd->dev;
> > +	void *va;
> > +	dma_addr_t dma_handle;
> > +
> > +	va = dma_alloc_coherent(dev, MTK_FD_RS_BUF_SIZE, &dma_handle,
> > +				GFP_KERNEL);
> > +	if (!va) {
> > +		dev_err(dev, "dma_alloc null va\n");
> 
> No need to print errors for memory allocation failures, because one will be
> printed automatically.
Ok, log removed.
> 
> [snip]
> > +static int mtk_fd_hw_connect(struct mtk_fd_dev *fd)
> > +{
> > +	int ret;
> > +
> > +	ret = rproc_boot(fd->rproc_handle);
> > +
> 
> nit: Unnecessary blank line.
> 
fixed.

> [snip]
> > +static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > +				  unsigned int *num_buffers,
> > +				  unsigned int *num_planes,
> > +				  unsigned int sizes[],
> > +				  struct device *alloc_devs[])
> > +{
> > +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > +	unsigned int size[2];
> > +	unsigned int plane;
> > +
> > +	switch (vq->type) {
> > +	case V4L2_BUF_TYPE_META_CAPTURE:
> > +		size[0] = ctx->dst_fmt.buffersize;
> > +		break;
> > +	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > +		size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > +		if (*num_planes == 2)
> > +			size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > +		break;
> > +	}
> 
> Is this code above needed? The code below sets sizes[] and it uses a for loop,
> without opencoded assignment for the second plane.
> 

Looks like not really useful here,
it should check sizes and num_planes if num_plane not zero,
and for V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, it will at most have 2
planes, maybe no need for loop as well.
I will refine this function as following:
mtk_fd_vb2_queue_setup(...)
{
	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);

	if (*num_planes == 0) {
		if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
			sizes[0] = ctx->dst_fmt.buffersize;
			*num_planes = 1;
			return 0;
		} else if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
			*num_planes = ctx->src_fmt.num_planes;
			sizes[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
			if (*num_planes == 2)
				sizes[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
			return 0;
		}
		return -EINVAL;
	}

	/* If num_plane not zero, check the num_plane and sizes*/
	if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
		if ((*num_planes == 1) &&
		    (sizes[0] <= ctx->dst_fmt.buffersize))
			return 0;
		else
			return -EINVAL;
	}
	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
		if ((*num_planes == 1) &&
		    (sizes[0] <= ctx->src_fmt.plane_fmt[0].sizeimage))
			return 0;
		else if ((*num_planes == 2) &&
			 (sizes[0] <= ctx->src_fmt.plane_fmt[0].sizeimage) &&
			 (sizes[1] <= ctx->src_fmt.plane_fmt[1].sizeimage))
			return 0;
		else
			return -EINVAL;

	}
	return 0;
}

> > +
> > +	if (*num_planes > 2)
> > +		return -EINVAL;
> > +	if (*num_planes == 0) {
> > +		if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
> > +			sizes[0] = ctx->dst_fmt.buffersize;
> > +			*num_planes = 1;
> > +			return 0;
> > +		 }
> > +
> > +		*num_planes = ctx->src_fmt.num_planes;
> > +		for (plane = 0; plane < *num_planes; plane++)
> > +			sizes[plane] = ctx->src_fmt.plane_fmt[plane].sizeimage;
> > +		return 0;
> > +	}
> > +
> > +	for (plane = 0; plane < *num_planes; plane++) {
> > +		if (sizes[plane] < size[plane])
> > +			return -EINVAL;
> > +	}
> > +	return 0;
> > +}
> [snip]
> > +static int mtk_fd_enum_fmt_out_mp(struct file *file, void *fh,
> > +				  struct v4l2_fmtdesc *f)
> > +{
> > +	if (f->index >= NUM_FORMATS)
> 
> Please use ARRAY_SIZE().
fixed.

> 
> > +		return -EINVAL;
> > +
> > +	f->pixelformat = mtk_fd_img_fmts[f->index].pixelformat;
> > +	return 0;
> > +}
> > +
> > +static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> > +				  const struct v4l2_pix_format_mplane *sfmt)
> > +{
> > +	dfmt->field = V4L2_FIELD_NONE;
> > +	dfmt->colorspace = V4L2_COLORSPACE_BT2020;
> > +	dfmt->num_planes = sfmt->num_planes;
> > +	dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> > +	dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> > +	dfmt->xfer_func =
> > +		V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> > +
> > +	/* Keep user setting as possible */
> > +	dfmt->width = clamp(dfmt->width,
> > +			    MTK_FD_OUTPUT_MIN_WIDTH,
> > +			    MTK_FD_OUTPUT_MAX_WIDTH);
> > +	dfmt->height = clamp(dfmt->height,
> > +			     MTK_FD_OUTPUT_MIN_HEIGHT,
> > +			     MTK_FD_OUTPUT_MAX_HEIGHT);
> > +
> > +	if (sfmt->num_planes == 2) {
> > +		/* NV16M and NV61M has 1 byte per pixel */
> > +		dfmt->plane_fmt[0].bytesperline = dfmt->width;
> > +		dfmt->plane_fmt[1].bytesperline = dfmt->width;
> > +	} else {
> > +		/* 2 bytes per pixel */
> > +		dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> > +	}
> > +
> > +	dfmt->plane_fmt[0].sizeimage =
> > +		dfmt->height * dfmt->plane_fmt[0].bytesperline;
> 
> Could some of the code above be replaced with v4l2_fill_pixfmt_mp()?
> 
I would like to refine as following

mtk_fd_fill_pixfmt_mp(...){
	v4l2_fill_pixfmt_mp(dfmt, sfmt->pixelformat, dfmt->width,
dfmt->height);

	dfmt->field = V4L2_FIELD_NONE;
	dfmt->colorspace = V4L2_COLORSPACE_BT2020;
	dfmt->num_planes = sfmt->num_planes;
	dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
	dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
	dfmt->xfer_func =
		V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
}


> > +}
> > +
> > +static const struct v4l2_pix_format_mplane *mtk_fd_find_fmt(u32 format)
> > +{
> > +	unsigned int i;
> > +	const struct v4l2_pix_format_mplane *dev_fmt;
> > +
> > +	for (i = 0; i < NUM_FORMATS; i++) {
> 
> Please use ARRAY_SIZE rather than a custom macro.
fixed.

> 
> > +		dev_fmt = &mtk_fd_img_fmts[i];
> > +		if (dev_fmt->pixelformat == format)
> > +			return dev_fmt;
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> > +static int mtk_fd_try_fmt_out_mp(struct file *file,
> > +				 void *fh,
> > +				 struct v4l2_format *f)
> > +{
> > +	struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
> > +	const struct v4l2_pix_format_mplane *fmt;
> > +
> > +	fmt = mtk_fd_find_fmt(pix_mp->pixelformat);
> > +	if (!fmt)
> > +		fmt = &mtk_fd_img_fmts[0];	/* Get default img fmt */
> 
> nit: Please move the comment to a separate line and add braces.
> 
fixed.

> > +
> > +	mtk_fd_fill_pixfmt_mp(pix_mp, fmt);
> > +	return 0;
> > +}
> [snip]
> > +static unsigned int get_fd_img_fmt(unsigned int fourcc)
> > +{
> > +	switch (fourcc) {
> > +	case V4L2_PIX_FMT_VYUY:
> > +		return MTK_FD_HW_FMT_VYUY;
> > +	case V4L2_PIX_FMT_YUYV:
> > +		return MTK_FD_HW_FMT_YUYV;
> > +	case V4L2_PIX_FMT_YVYU:
> > +		return MTK_FD_HW_FMT_YVYU;
> > +	case V4L2_PIX_FMT_UYVY:
> > +		return MTK_FD_HW_FMT_UYVY;
> > +	case V4L2_PIX_FMT_NV16M:
> > +		return MTK_FD_HW_FMT_YUV_2P;
> > +	case V4L2_PIX_FMT_NV61M:
> > +		return MTK_FD_HW_FMT_YVU_2P;
> > +	default:
> > +		return MTK_FD_HW_FMT_UNKNOWN;
> 
> If we want to be paranoid, we can just add a WARN here and return 0.
> Shouldn't be a need to define an unknown format.
Ok, I will refine as following:
	
default:
	dev_warn(dev, "%s: Unsupported V4L2_PIX_FMT\n", __func__);
	return 0;
> 
> [snip]
> > +static void mtk_fd_fill_user_param(struct user_param *user_param,
> > +				   struct v4l2_ctrl_handler *hdl)
> > +{
> > +	struct v4l2_ctrl *ctrl;
> > +	int i;
> > +
> > +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_DOWN_IMG_WIDTH);
> > +	if (ctrl)
> > +		for (i = 0; i < ctrl->elems; i++)
> > +			user_param->scale_img_width[i] = ctrl->p_new.p_u16[i];
> > +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_DOWN_IMG_HEIGHT);
> > +	if (ctrl)
> > +		for (i = 0; i < ctrl->elems; i++)
> > +			user_param->scale_img_height[i] = ctrl->p_new.p_u16[i];
> > +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_IMG_NUM);
> > +	if (ctrl)
> > +		user_param->scale_img_num = ctrl->val;
> > +
> 
> nit: Either separate the code dealing with all controls from each other, or
> none.
> 
Fixed. 

> > +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECT_POSE);
> > +	if (ctrl)
> > +		for (i = 0; i < ctrl->elems; i++)
> > +			user_param->face_directions[i] = ctrl->p_new.p_u16[i];
> > +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECT_SPEED);
> > +	if (ctrl)
> > +		user_param->fd_speedup = ctrl->val;
> > +	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECTION_MODEL);
> > +	if (ctrl)
> > +		user_param->fd_extra_model = ctrl->val;
> > +}
> > +
> > +static void mtk_fd_device_run(void *priv)
> > +{
> > +	struct mtk_fd_ctx *ctx = priv;
> > +	struct mtk_fd_dev *fd = ctx->fd_dev;
> > +	struct vb2_v4l2_buffer *src_buf, *dst_buf;
> > +	struct fd_enq_param fd_param;
> > +	void *plane_vaddr;
> > +
> > +	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> > +	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
> > +
> > +	fd_param.src_img[0].dma_addr =
> > +		vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
> > +	fd_param.user_result.dma_addr =
> > +		vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> > +	plane_vaddr = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
> > +	fd_param.output_vaddr = (u64)(unsigned long)plane_vaddr;
> 
> Why is the vaddr needed here? Specfically, it could pose a security problem
> if CPU virtual addresses are exchanged with firmware.
> 
This should be removed... SCP side as well. 

> > +	fd_param.user_param.src_img_fmt =
> > +		get_fd_img_fmt(ctx->src_fmt.pixelformat);
> > +	if (ctx->src_fmt.num_planes == 2)
> > +		fd_param.src_img[1].dma_addr =
> > +			vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 1);
> 
> nit: Could this be moved above, to be just below src_img[0] initialization,
> for readability reasons?
> 
Ok, this function will be refined as 

static void mtk_fd_device_run(void *priv)
{
	struct mtk_fd_ctx *ctx = priv;
	struct mtk_fd_dev *fd = ctx->fd_dev;
	struct vb2_v4l2_buffer *src_buf, *dst_buf;
	struct fd_enq_param fd_param;

	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);

	fd_param.src_img[0].dma_addr =
		vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
	if (ctx->src_fmt.num_planes == 2)
		fd_param.src_img[1].dma_addr =
			vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 1);
	fd_param.user_result.dma_addr =
		vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
	fd_param.user_param.src_img_fmt =
		get_fd_img_fmt(fd->dev, ctx->src_fmt.pixelformat);

	mtk_fd_fill_user_param(&fd_param.user_param, &ctx->hdl);

	/* Complete request controls if any */
	v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, &ctx->hdl);

	fd->output = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
	mtk_fd_hw_job_exec(fd, &fd_param);
}
> > +	mtk_fd_fill_user_param(&fd_param.user_param, &ctx->hdl);
> > +
> > +	/* Complete request controls if any */
> > +	v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, &ctx->hdl);
> > +
> > +	fd->output = plane_vaddr;
> > +	mtk_fd_hw_job_exec(fd, &fd_param);
> > +}
> > +
> > +static struct v4l2_m2m_ops fd_m2m_ops = {
> > +	.device_run = mtk_fd_device_run,
> > +};
> > +
> > +static const struct media_device_ops fd_m2m_media_ops = {
> > +	.req_validate	= vb2_request_validate,
> > +	.req_queue	= v4l2_m2m_request_queue,
> > +};
> > +
> > +static int mtk_fd_video_device_register(struct mtk_fd_dev *fd)
> > +{
> > +	struct video_device *vfd = &fd->vfd;
> > +	struct v4l2_m2m_dev *m2m_dev = fd->m2m_dev;
> > +	struct device *dev = fd->dev;
> > +	int ret;
> > +
> > +	vfd->fops = &fd_video_fops;
> > +	vfd->release = video_device_release;
> > +	vfd->lock = &fd->vfd_lock;
> > +	vfd->v4l2_dev = &fd->v4l2_dev;
> > +	vfd->vfl_dir = VFL_DIR_M2M;
> > +	vfd->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_OUTPUT_MPLANE |
> > +		V4L2_CAP_META_CAPTURE;
> > +	vfd->ioctl_ops = &mtk_fd_v4l2_video_out_ioctl_ops;
> > +
> > +	strscpy(vfd->name, dev_driver_string(dev), sizeof(vfd->name));
> > +
> > +	video_set_drvdata(vfd, fd);
> > +
> > +	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to register video device\n");
> > +		goto err_free_dev;
> > +	}
> > +
> > +	ret = v4l2_m2m_register_media_controller(m2m_dev, vfd,
> > +					     MEDIA_ENT_F_PROC_VIDEO_STATISTICS);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to init mem2mem media controller\n");
> > +		goto err_unreg_video;
> > +	}
> > +	return 0;
> > +
> > +err_unreg_video:
> > +	video_unregister_device(vfd);
> > +err_free_dev:
> > +	video_device_release(vfd);
> > +	return ret;
> > +}
> > +
> > +static int mtk_fd_dev_v4l2_init(struct mtk_fd_dev *fd)
> > +{
> > +	struct media_device *mdev = &fd->mdev;
> > +	struct device *dev = fd->dev;
> > +	int ret;
> > +
> > +	ret = v4l2_device_register(dev, &fd->v4l2_dev);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to register v4l2 device\n");
> > +		return ret;
> > +	}
> > +
> > +	fd->m2m_dev = v4l2_m2m_init(&fd_m2m_ops);
> > +	if (IS_ERR(fd->m2m_dev)) {
> > +		dev_err(dev, "Failed to init mem2mem device\n");
> > +		ret = PTR_ERR(fd->m2m_dev);
> > +		goto err_unreg_v4l2_dev;
> > +	}
> > +
> > +	mdev->dev = dev;
> > +	strscpy(mdev->model, dev_driver_string(dev), sizeof(mdev->model));
> > +	snprintf(mdev->bus_info, sizeof(mdev->bus_info),
> > +		 "platform:%s", dev_name(dev));
> > +	media_device_init(mdev);
> > +	mdev->ops = &fd_m2m_media_ops;
> > +	fd->v4l2_dev.mdev = mdev;
> > +
> > +	ret = mtk_fd_video_device_register(fd);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to register video device\n");
> > +		goto err_cleanup_mdev;
> > +	}
> > +
> > +	ret = media_device_register(mdev);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to register mem2mem media device\n");
> > +		goto err_unreg_vdev;
> > +	}
> > +
> > +	return 0;
> > +
> > +err_unreg_vdev:
> > +	v4l2_m2m_unregister_media_controller(fd->m2m_dev);
> > +	video_unregister_device(&fd->vfd);
> > +	video_device_release(&fd->vfd);
> 
> This is inconsistent with the registration, which registers all of these in
> a function. Perhaps it would be worth to move this cleanup to a
> mtk_fd_video_device_unregister() function?
> 
Fixed.

> > +err_cleanup_mdev:
> > +	media_device_cleanup(mdev);
> > +	v4l2_m2m_release(fd->m2m_dev);
> > +err_unreg_v4l2_dev:
> > +	v4l2_device_unregister(&fd->v4l2_dev);
> > +	return ret;
> > +}
> > +
> > +static void mtk_fd_dev_v4l2_release(struct mtk_fd_dev *fd)
> > +{
> > +	v4l2_m2m_unregister_media_controller(fd->m2m_dev);
> > +	video_unregister_device(&fd->vfd);
> > +	video_device_release(&fd->vfd);
> > +	media_device_cleanup(&fd->mdev);
> > +	v4l2_m2m_release(fd->m2m_dev);
> > +	v4l2_device_unregister(&fd->v4l2_dev);
> > +}
> > +
> > +static irqreturn_t mtk_fd_irq(int irq, void *data)
> > +{
> > +	struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> > +
> > +	/* must read this register otherwise HW will keep sending irq */
> > +	readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> > +	fd->output->number = readl(fd->fd_base + MTK_FD_REG_OFFSET_RESULT);
> > +	dev_dbg(fd->dev, "mtk_fd_face_num:%d\n", fd->output->number);
> > +
> > +	mtk_fd_hw_done(fd, VB2_BUF_STATE_DONE);
> > +	return IRQ_HANDLED;
> > +}
> > +
> > +static int mtk_fd_hw_get_scp_mem(struct mtk_fd_dev *fd)
> > +{
> > +	struct device *dev = fd->dev;
> > +	dma_addr_t addr;
> > +	void *ptr;
> > +	u32 ret;
> > +
> > +	/*
> > +	 * Allocate coherent reserved memory for SCP firmware usage.
> > +	 * The size of SCP composer's memory is fixed to 0x100000
> > +	 * for the requirement of firmware.
> > +	 */
> > +	ptr = dma_alloc_coherent(&fd->scp_pdev->dev,
> > +				 MTK_FD_HW_WORK_BUF_SIZE, &addr, GFP_KERNEL);
> > +	if (!ptr)
> > +		return -ENOMEM;
> > +
> > +	fd->scp_mem.scp_addr = addr;
> > +	fd->scp_mem_virt_addr = ptr;
> > +	dev_info(dev, "scp addr:%pad va:%pK\n", &addr, ptr);
> 
> These addresses are nothing a user should be concerned about, so please
> don't use the _info level here. If you think this is an important debugging
> information, please make it dev_dbg(). Otherwise, please just remove.
> 
Removed.

> > +
> > +	/*
> > +	 * This reserved memory is also be used by FD HW.
> > +	 * Need to get iova address for FD DMA.
> > +	 */
> > +	addr = dma_map_resource(dev, addr, MTK_FD_HW_WORK_BUF_SIZE,
> > +				DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
> 
> Note that the second argument to dma_map_resource() is phys_addr_t, but the
> code above passes a DMA address. This works only by luck, because both
> physical and SCP DMA address space have the same addresses. To be fully
> correct, dma_map_single() needs to be used, with the kernel virtual address
> passed to it, but currently it doesn't handle the memory from a reserved
> pool. Thus, we can only keep the hack as is, but please add a comment
> explaining it, e.g.
> 
> /*
>  * FIXME: Assume SCP DMA and physical addresses are the same until
>  * dma_map_single() is fixed to handle reserved memory allocations.
>  */
> 
Ok, I've insert it above the comments there.
as following:

/*
 * FIXME: Assume SCP DMA and physical addresses are the same until
 * dma_map_single() is fixed to handle reserved memory allocations.
 *
 * This reserved memory will also be used by FD HW.
 * Need to get iova address for FD DMA.
 */

> > +	if (dma_mapping_error(dev, addr)) {
> > +		dev_err(dev, "Failed to map scp iova\n");
> > +		ret = -ENOMEM;
> > +		goto fail_free_mem;
> > +	}
> > +	fd->scp_mem.dma_addr = addr;
> > +	dev_info(dev, "scp iova addr:%pad\n", &addr);
> 
> Ditto.
fixed.

> 
> Best regards,
> Tomasz

Thanks and best regards,
Jerry
Tomasz Figa May 25, 2020, 12:24 p.m. UTC | #3
r

On Fri, May 22, 2020 at 4:11 PM Jerry-ch Chen
<Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Thu, 2020-05-21 at 18:28 +0000, Tomasz Figa wrote:
> > Hi Jerry,
> >
> > On Wed, Dec 04, 2019 at 08:47:32PM +0800, Jerry-ch Chen wrote:
[snip]
> > > +
> > > +enum face_angle {
> > > +   MTK_FD_FACE_FRONT,
> > > +   MTK_FD_FACE_RIGHT_50,
> > > +   MTK_FD_FACE_LEFT_50,
> > > +   MTK_FD_FACE_RIGHT_90,
> > > +   MTK_FD_FACE_LEFT_90,
> > > +   MTK_FD_FACE_ANGLE_NUM,
> > > +};
> >
> > This enum seems to define values for the V4L2_CID_MTK_FD_DETECT_POSE
> > control. Considering that this is an enumeration and the values are
> > actually integers (-90, -50, 0, 50, 90), perhaps this should be an
> > INTEGER_MENU control instead?
> >
>
> this ioctl let user select multiple face positions(combination of angles
> and directions) to be detected. so I thought I am not able to use the
> INTEGER_MENU for this purpose.

Ah, okay, I thought there is only one selection possible.

>
> A bit-field as following should be used by user.
> I consider adding it to uapi.
>
> struct face_direction_def {
> __u16 MTK_FD_FACE_DIR_0 : 1,
>         MTK_FD_FACE_DIR_30 : 1,
>         MTK_FD_FACE_DIR_60 : 1,
>         MTK_FD_FACE_DIR_90 : 1,
>         MTK_FD_FACE_DIR_120 : 1,
>         MTK_FD_FACE_DIR_150 : 1,
>         MTK_FD_FACE_DIR_180 : 1,
>         MTK_FD_FACE_DIR_210 : 1,
>         MTK_FD_FACE_DIR_240 : 1,
>         MTK_FD_FACE_DIR_270 : 1,
>         MTK_FD_FACE_DIR_300 : 1,
>         MTK_FD_FACE_DIR_330 : 1,
>         : 4;
> };

Note that bit fields are not recommended in UAPI because of not being
well specified by the language. Instead bits should be defined using
macros with explicit masks or sometimes enums.

>
> User can also select some face directions of each face angle in one
> ioctl, for example:
>
> /*
>  * u16 face_directions[MTK_FD_FACE_ANGLE_NUM] = {0};
>  *
>  *      face_directions[MTK_FD_FACE_FRONT] = 0x7; //angle:0, dir:0,30,60
>  *      face_directions[MTK_FACE_RIGHT_50] = 0x2; //angle:50, dir:30
>  *
>  */

Makes sense, thanks.

>
> > > +
> > > +struct fd_buffer {
> > > +   __u32 scp_addr; /* used by SCP */
> > > +   __u32 dma_addr; /* used by DMA HW */
> > > +} __packed;
> fd buffer is used for scp ipi
>
> > > +
> > > +struct fd_face_result {
> > > +   char data[16];
> > > +};
> fd_face_result is used for user, so it should be moved to
> include/uapi/linux.
> In fact, it has bit-field definition for user, so I would like to define
> it in include/uapi/linux as following:
>
> struct fd_face_result {
>   __u64 face_idx : 12,
>         type : 1,
>         x0 : 10,
>         y0 : 10,
>         x1 : 10,
>         y1 : 10,
>         fcv1 : 11;
>   __u64 fcv2 : 7,
>         rip_dir : 4,
>         rop_dir : 3,
>         det_size : 5;
> };
>

Indeed this should be defined, but as per my comment above, please
avoid using the bitfield construct and define shifts and masks
instead.

>
> > > +
> > > +struct fd_user_output {
> > > +   struct fd_face_result results[MTK_FD_MAX_RESULT_NUM];
> > > +   __u16 number;
> >
> > Is this perhaps the number of results? If so, would num_results be a better
> > name?
> >
> yes, fixed.
> > > +};
> >
> > Since this struct is the meta buffer format, it is a part of the userspace
> > interface and should be defined in a header under include/uapi/linux/.
> >
> Ok, I will create include/uapi/linux/mtk_fd_40.h
> which suppose to include structures that userspace will use.
> should the private IOCTLs be placed in it together?
>

Sorry, what private IOCTLs are you referring to? If you mean private
control IDs, then yes, they should go to that header.

[snip]
> > > +static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > +                             unsigned int *num_buffers,
> > > +                             unsigned int *num_planes,
> > > +                             unsigned int sizes[],
> > > +                             struct device *alloc_devs[])
> > > +{
> > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > +   unsigned int size[2];
> > > +   unsigned int plane;
> > > +
> > > +   switch (vq->type) {
> > > +   case V4L2_BUF_TYPE_META_CAPTURE:
> > > +           size[0] = ctx->dst_fmt.buffersize;
> > > +           break;
> > > +   case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > +           size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > +           if (*num_planes == 2)
> > > +                   size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > > +           break;
> > > +   }
> >
> > Is this code above needed? The code below sets sizes[] and it uses a for loop,
> > without opencoded assignment for the second plane.
> >
>
> Looks like not really useful here,
> it should check sizes and num_planes if num_plane not zero,
> and for V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, it will at most have 2
> planes, maybe no need for loop as well.

Loops generally make the code cleaner and there might be some desire
to add support for more formats in the future, e.g. in case a next
generation of the hardware shows up.

> I will refine this function as following:
> mtk_fd_vb2_queue_setup(...)
> {
>         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
>
>         if (*num_planes == 0) {
>                 if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
>                         sizes[0] = ctx->dst_fmt.buffersize;
>                         *num_planes = 1;
>                         return 0;
>                 } else if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
>                         *num_planes = ctx->src_fmt.num_planes;
>                         sizes[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
>                         if (*num_planes == 2)
>                                 sizes[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
>                         return 0;
>                 }
>                 return -EINVAL;
>         }
>
>         /* If num_plane not zero, check the num_plane and sizes*/
>         if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
>                 if ((*num_planes == 1) &&
>                     (sizes[0] <= ctx->dst_fmt.buffersize))
>                         return 0;

nit: The typical convention is to check for problems and return the
error code earlier, with the success handled at the end of the block.

>                 else
>                         return -EINVAL;
>         }
>         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
>                 if ((*num_planes == 1) &&
>                     (sizes[0] <= ctx->src_fmt.plane_fmt[0].sizeimage))
>                         return 0;
>                 else if ((*num_planes == 2) &&
>                          (sizes[0] <= ctx->src_fmt.plane_fmt[0].sizeimage) &&
>                          (sizes[1] <= ctx->src_fmt.plane_fmt[1].sizeimage))
>                         return 0;

Wouldn't a loop eliminate the need to if/else if through the various
supported cases and duplicate the size checks?

>                 else
>                         return -EINVAL;
>
>         }
>         return 0;
> }

How about the following?

mtk_fd_vb2_queue_setup(...)
{
        struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);

        if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
                if (*num_planes == 0) {
                        *num_planes = 1;
                        sizes[0] = ctx->dst_fmt.buffersize;
                        return 0;
                }

                if (*num_planes != 1 || sizes[0] < ctx->dst_fmt.buffersize)
                                return -EINVAL;

                return 0;
        }

        /* V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE */
        if (*num_planes == 0) {
                        *num_planes = ctx->src_fmt.num_planes;
                        for (i = 0; i < ctx->src_fmt.num_planes; ++i)
                                sizes[i] = ctx->src_fmt.plane_fmt[i].sizeimage;
                        return 0;
        }

        if (*num_planes < ctx->src_fmt.num_planes)
                return -EINVAL;

        for (i = 0; i < ctx->src_fmt.num_planes; ++i)
                if (sizes[i] < ctx->src_fmt.plane_fmt[i].sizeimage)
                        return -EINVAL;

        return 0;
}

Note that it fully separates the code dealing with each queue and thus
improves the readability.

In this case, it could actually be beneficial to split the vb2_ops
implementation into one that deals only with video_output_mplane and
one only with meta_capture. This would allow eliminating the special
casing based on vq->type and thus further simplify the code. Not sure
if it applies to the other vb2 callbacks, though.

[snip]
> > > +static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> > > +                             const struct v4l2_pix_format_mplane *sfmt)
> > > +{
> > > +   dfmt->field = V4L2_FIELD_NONE;
> > > +   dfmt->colorspace = V4L2_COLORSPACE_BT2020;
> > > +   dfmt->num_planes = sfmt->num_planes;
> > > +   dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> > > +   dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> > > +   dfmt->xfer_func =
> > > +           V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> > > +
> > > +   /* Keep user setting as possible */
> > > +   dfmt->width = clamp(dfmt->width,
> > > +                       MTK_FD_OUTPUT_MIN_WIDTH,
> > > +                       MTK_FD_OUTPUT_MAX_WIDTH);
> > > +   dfmt->height = clamp(dfmt->height,
> > > +                        MTK_FD_OUTPUT_MIN_HEIGHT,
> > > +                        MTK_FD_OUTPUT_MAX_HEIGHT);
> > > +
> > > +   if (sfmt->num_planes == 2) {
> > > +           /* NV16M and NV61M has 1 byte per pixel */
> > > +           dfmt->plane_fmt[0].bytesperline = dfmt->width;
> > > +           dfmt->plane_fmt[1].bytesperline = dfmt->width;
> > > +   } else {
> > > +           /* 2 bytes per pixel */
> > > +           dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> > > +   }
> > > +
> > > +   dfmt->plane_fmt[0].sizeimage =
> > > +           dfmt->height * dfmt->plane_fmt[0].bytesperline;
> >
> > Could some of the code above be replaced with v4l2_fill_pixfmt_mp()?
> >
> I would like to refine as following
>
> mtk_fd_fill_pixfmt_mp(...){
>         v4l2_fill_pixfmt_mp(dfmt, sfmt->pixelformat, dfmt->width,
> dfmt->height);
>
>         dfmt->field = V4L2_FIELD_NONE;
>         dfmt->colorspace = V4L2_COLORSPACE_BT2020;
>         dfmt->num_planes = sfmt->num_planes;

num_planes should be already filled in by the helper. That actually
raises a question on whether there is any need to have sfmt passed to
this function at all. Perhaps all we need is the value of pixelformat?

>         dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
>         dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
>         dfmt->xfer_func =
>                 V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> }
>

Isn't still a need to clamp() width and height to min/max, though?

[snip]
> > > +   fd_param.user_param.src_img_fmt =
> > > +           get_fd_img_fmt(ctx->src_fmt.pixelformat);
> > > +   if (ctx->src_fmt.num_planes == 2)
> > > +           fd_param.src_img[1].dma_addr =
> > > +                   vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 1);
> >
> > nit: Could this be moved above, to be just below src_img[0] initialization,
> > for readability reasons?
> >
> Ok, this function will be refined as
>
> static void mtk_fd_device_run(void *priv)
> {
>         struct mtk_fd_ctx *ctx = priv;
>         struct mtk_fd_dev *fd = ctx->fd_dev;
>         struct vb2_v4l2_buffer *src_buf, *dst_buf;
>         struct fd_enq_param fd_param;
>
>         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
>         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
>
>         fd_param.src_img[0].dma_addr =
>                 vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
>         if (ctx->src_fmt.num_planes == 2)
>                 fd_param.src_img[1].dma_addr =
>                         vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 1);

How about making this a loop in terms of ctx->src_fmt.num_planes?

>         fd_param.user_result.dma_addr =
>                 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
>         fd_param.user_param.src_img_fmt =
>                 get_fd_img_fmt(fd->dev, ctx->src_fmt.pixelformat);
>
>         mtk_fd_fill_user_param(&fd_param.user_param, &ctx->hdl);
>
>         /* Complete request controls if any */
>         v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, &ctx->hdl);
>
>         fd->output = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
>         mtk_fd_hw_job_exec(fd, &fd_param);
> }

Best regards,
Tomasz
Jerry-ch Chen May 29, 2020, 12:26 p.m. UTC | #4
Hi Tomasz,

I Appreciate your review comments, here's the reply.

On Mon, 2020-05-25 at 14:24 +0200, Tomasz Figa wrote:
> r
> 
> On Fri, May 22, 2020 at 4:11 PM Jerry-ch Chen
> <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Thu, 2020-05-21 at 18:28 +0000, Tomasz Figa wrote:
> > > Hi Jerry,
> > >
> > > On Wed, Dec 04, 2019 at 08:47:32PM +0800, Jerry-ch Chen wrote:
> [snip]
> > > > +
> > > > +enum face_angle {
> > > > +   MTK_FD_FACE_FRONT,
> > > > +   MTK_FD_FACE_RIGHT_50,
> > > > +   MTK_FD_FACE_LEFT_50,
> > > > +   MTK_FD_FACE_RIGHT_90,
> > > > +   MTK_FD_FACE_LEFT_90,
> > > > +   MTK_FD_FACE_ANGLE_NUM,
> > > > +};
> > >
> > > This enum seems to define values for the V4L2_CID_MTK_FD_DETECT_POSE
> > > control. Considering that this is an enumeration and the values are
> > > actually integers (-90, -50, 0, 50, 90), perhaps this should be an
> > > INTEGER_MENU control instead?
> > >
> >
> > this ioctl let user select multiple face positions(combination of angles
> > and directions) to be detected. so I thought I am not able to use the
> > INTEGER_MENU for this purpose.
> 
> Ah, okay, I thought there is only one selection possible.
> 
> >
> > A bit-field as following should be used by user.
> > I consider adding it to uapi.
> >
> > struct face_direction_def {
> > __u16 MTK_FD_FACE_DIR_0 : 1,
> >         MTK_FD_FACE_DIR_30 : 1,
> >         MTK_FD_FACE_DIR_60 : 1,
> >         MTK_FD_FACE_DIR_90 : 1,
> >         MTK_FD_FACE_DIR_120 : 1,
> >         MTK_FD_FACE_DIR_150 : 1,
> >         MTK_FD_FACE_DIR_180 : 1,
> >         MTK_FD_FACE_DIR_210 : 1,
> >         MTK_FD_FACE_DIR_240 : 1,
> >         MTK_FD_FACE_DIR_270 : 1,
> >         MTK_FD_FACE_DIR_300 : 1,
> >         MTK_FD_FACE_DIR_330 : 1,
> >         : 4;
> > };
> 
> Note that bit fields are not recommended in UAPI because of not being
> well specified by the language. Instead bits should be defined using
> macros with explicit masks or sometimes enums.
> 
Ok, I'll define them in macro with masks.

> >
> > User can also select some face directions of each face angle in one
> > ioctl, for example:
> >
> > /*
> >  * u16 face_directions[MTK_FD_FACE_ANGLE_NUM] = {0};
> >  *
> >  *      face_directions[MTK_FD_FACE_FRONT] = 0x7; //angle:0, dir:0,30,60
> >  *      face_directions[MTK_FACE_RIGHT_50] = 0x2; //angle:50, dir:30
> >  *
> >  */
> 
> Makes sense, thanks.
> 
> >
> > > > +
> > > > +struct fd_buffer {
> > > > +   __u32 scp_addr; /* used by SCP */
> > > > +   __u32 dma_addr; /* used by DMA HW */
> > > > +} __packed;
> > fd buffer is used for scp ipi
> >
> > > > +
> > > > +struct fd_face_result {
> > > > +   char data[16];
> > > > +};
> > fd_face_result is used for user, so it should be moved to
> > include/uapi/linux.
> > In fact, it has bit-field definition for user, so I would like to define
> > it in include/uapi/linux as following:
> >
> > struct fd_face_result {
> >   __u64 face_idx : 12,
> >         type : 1,
> >         x0 : 10,
> >         y0 : 10,
> >         x1 : 10,
> >         y1 : 10,
> >         fcv1 : 11;
> >   __u64 fcv2 : 7,
> >         rip_dir : 4,
> >         rop_dir : 3,
> >         det_size : 5;
> > };
> >
> 
> Indeed this should be defined, but as per my comment above, please
> avoid using the bitfield construct and define shifts and masks
> instead.
> 
Ok, I'll fix it.

> >
> > > > +
> > > > +struct fd_user_output {
> > > > +   struct fd_face_result results[MTK_FD_MAX_RESULT_NUM];
> > > > +   __u16 number;
> > >
> > > Is this perhaps the number of results? If so, would num_results be a better
> > > name?
> > >
> > yes, fixed.
> > > > +};
> > >
> > > Since this struct is the meta buffer format, it is a part of the userspace
> > > interface and should be defined in a header under include/uapi/linux/.
> > >
> > Ok, I will create include/uapi/linux/mtk_fd_40.h
> > which suppose to include structures that userspace will use.
> > should the private IOCTLs be placed in it together?
> >
> 
> Sorry, what private IOCTLs are you referring to? If you mean private
> control IDs, then yes, they should go to that header.
yes, the IDs, sorry for the wrong expression.

> 
> [snip]
> > > > +static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > > +                             unsigned int *num_buffers,
> > > > +                             unsigned int *num_planes,
> > > > +                             unsigned int sizes[],
> > > > +                             struct device *alloc_devs[])
> > > > +{
> > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > +   unsigned int size[2];
> > > > +   unsigned int plane;
> > > > +
> > > > +   switch (vq->type) {
> > > > +   case V4L2_BUF_TYPE_META_CAPTURE:
> > > > +           size[0] = ctx->dst_fmt.buffersize;
> > > > +           break;
> > > > +   case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > > +           size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > > +           if (*num_planes == 2)
> > > > +                   size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > > > +           break;
> > > > +   }
> > >
> > > Is this code above needed? The code below sets sizes[] and it uses a for loop,
> > > without opencoded assignment for the second plane.
> > >
> >
> > Looks like not really useful here,
> > it should check sizes and num_planes if num_plane not zero,
> > and for V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, it will at most have 2
> > planes, maybe no need for loop as well.
> 
> Loops generally make the code cleaner and there might be some desire
> to add support for more formats in the future, e.g. in case a next
> generation of the hardware shows up.
> 
Ok, got it.

> > I will refine this function as following:
> > mtk_fd_vb2_queue_setup(...)
> > {
> >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> >
> >         if (*num_planes == 0) {
> >                 if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
> >                         sizes[0] = ctx->dst_fmt.buffersize;
> >                         *num_planes = 1;
> >                         return 0;
> >                 } else if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
> >                         *num_planes = ctx->src_fmt.num_planes;
> >                         sizes[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> >                         if (*num_planes == 2)
> >                                 sizes[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> >                         return 0;
> >                 }
> >                 return -EINVAL;
> >         }
> >
> >         /* If num_plane not zero, check the num_plane and sizes*/
> >         if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
> >                 if ((*num_planes == 1) &&
> >                     (sizes[0] <= ctx->dst_fmt.buffersize))
> >                         return 0;
> 
> nit: The typical convention is to check for problems and return the
> error code earlier, with the success handled at the end of the block.
> 
Got it.

> >                 else
> >                         return -EINVAL;
> >         }
> >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
> >                 if ((*num_planes == 1) &&
> >                     (sizes[0] <= ctx->src_fmt.plane_fmt[0].sizeimage))
> >                         return 0;
> >                 else if ((*num_planes == 2) &&
> >                          (sizes[0] <= ctx->src_fmt.plane_fmt[0].sizeimage) &&
> >                          (sizes[1] <= ctx->src_fmt.plane_fmt[1].sizeimage))
> >                         return 0;
> 
> Wouldn't a loop eliminate the need to if/else if through the various
> supported cases and duplicate the size checks?
> 
> >                 else
> >                         return -EINVAL;
> >
> >         }
> >         return 0;
> > }
> 
> How about the following?
> 
> mtk_fd_vb2_queue_setup(...)
> {
>         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> 
>         if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
>                 if (*num_planes == 0) {
>                         *num_planes = 1;
>                         sizes[0] = ctx->dst_fmt.buffersize;
>                         return 0;
>                 }
> 
>                 if (*num_planes != 1 || sizes[0] < ctx->dst_fmt.buffersize)
>                                 return -EINVAL;
> 
>                 return 0;
>         }
> 
>         /* V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE */
>         if (*num_planes == 0) {
>                         *num_planes = ctx->src_fmt.num_planes;
>                         for (i = 0; i < ctx->src_fmt.num_planes; ++i)
>                                 sizes[i] = ctx->src_fmt.plane_fmt[i].sizeimage;
>                         return 0;
>         }
> 
>         if (*num_planes < ctx->src_fmt.num_planes)
>                 return -EINVAL;
> 
>         for (i = 0; i < ctx->src_fmt.num_planes; ++i)
>                 if (sizes[i] < ctx->src_fmt.plane_fmt[i].sizeimage)
>                         return -EINVAL;
> 
>         return 0;
> }
> 
> Note that it fully separates the code dealing with each queue and thus
> improves the readability.
> 
> In this case, it could actually be beneficial to split the vb2_ops
> implementation into one that deals only with video_output_mplane and
> one only with meta_capture. This would allow eliminating the special
> casing based on vq->type and thus further simplify the code. Not sure
> if it applies to the other vb2 callbacks, though.
> 
Got it, thanks for the comments.

> [snip]
> > > > +static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> > > > +                             const struct v4l2_pix_format_mplane *sfmt)
> > > > +{
> > > > +   dfmt->field = V4L2_FIELD_NONE;
> > > > +   dfmt->colorspace = V4L2_COLORSPACE_BT2020;
> > > > +   dfmt->num_planes = sfmt->num_planes;
> > > > +   dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> > > > +   dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> > > > +   dfmt->xfer_func =
> > > > +           V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> > > > +
> > > > +   /* Keep user setting as possible */
> > > > +   dfmt->width = clamp(dfmt->width,
> > > > +                       MTK_FD_OUTPUT_MIN_WIDTH,
> > > > +                       MTK_FD_OUTPUT_MAX_WIDTH);
> > > > +   dfmt->height = clamp(dfmt->height,
> > > > +                        MTK_FD_OUTPUT_MIN_HEIGHT,
> > > > +                        MTK_FD_OUTPUT_MAX_HEIGHT);
> > > > +
> > > > +   if (sfmt->num_planes == 2) {
> > > > +           /* NV16M and NV61M has 1 byte per pixel */
> > > > +           dfmt->plane_fmt[0].bytesperline = dfmt->width;
> > > > +           dfmt->plane_fmt[1].bytesperline = dfmt->width;
> > > > +   } else {
> > > > +           /* 2 bytes per pixel */
> > > > +           dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> > > > +   }
> > > > +
> > > > +   dfmt->plane_fmt[0].sizeimage =
> > > > +           dfmt->height * dfmt->plane_fmt[0].bytesperline;
> > >
> > > Could some of the code above be replaced with v4l2_fill_pixfmt_mp()?
> > >
> > I would like to refine as following
> >
> > mtk_fd_fill_pixfmt_mp(...){
> >         v4l2_fill_pixfmt_mp(dfmt, sfmt->pixelformat, dfmt->width,
> > dfmt->height);
> >
> >         dfmt->field = V4L2_FIELD_NONE;
> >         dfmt->colorspace = V4L2_COLORSPACE_BT2020;
> >         dfmt->num_planes = sfmt->num_planes;
> 
> num_planes should be already filled in by the helper. That actually
> raises a question on whether there is any need to have sfmt passed to
> this function at all. Perhaps all we need is the value of pixelformat?
> 
Yes, I'll replace sfmt with u32 pixfmt.

> >         dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> >         dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> >         dfmt->xfer_func =
> >                 V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> > }
> >
> 
> Isn't still a need to clamp() width and height to min/max, though?
Yes, I'll add them back.

This function will be refined as :

static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
                                  u32 pixfmt)
{
        v4l2_fill_pixfmt_mp(dfmt, pixfmt, dfmt->width, dfmt->height);

        dfmt->field = V4L2_FIELD_NONE;
        dfmt->colorspace = V4L2_COLORSPACE_BT2020;
        dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
        dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
        dfmt->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);

        /* Keep user setting as possible */
        dfmt->width = clamp(dfmt->width,
                            MTK_FD_OUTPUT_MIN_WIDTH,
                            MTK_FD_OUTPUT_MAX_WIDTH);
        dfmt->height = clamp(dfmt->height,
                             MTK_FD_OUTPUT_MIN_HEIGHT,
                             MTK_FD_OUTPUT_MAX_HEIGHT);
}


> 
> [snip]
> > > > +   fd_param.user_param.src_img_fmt =
> > > > +           get_fd_img_fmt(ctx->src_fmt.pixelformat);
> > > > +   if (ctx->src_fmt.num_planes == 2)
> > > > +           fd_param.src_img[1].dma_addr =
> > > > +                   vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 1);
> > >
> > > nit: Could this be moved above, to be just below src_img[0] initialization,
> > > for readability reasons?
> > >
> > Ok, this function will be refined as
> >
> > static void mtk_fd_device_run(void *priv)
> > {
> >         struct mtk_fd_ctx *ctx = priv;
> >         struct mtk_fd_dev *fd = ctx->fd_dev;
> >         struct vb2_v4l2_buffer *src_buf, *dst_buf;
> >         struct fd_enq_param fd_param;
> >
> >         src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> >         dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
> >
> >         fd_param.src_img[0].dma_addr =
> >                 vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
> >         if (ctx->src_fmt.num_planes == 2)
> >                 fd_param.src_img[1].dma_addr =
> >                         vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 1);
> 
> How about making this a loop in terms of ctx->src_fmt.num_planes?
> 
yes, it will be refined as following, I use the src_vb2_buf to reduce
the length for fitting 80 columns

src_vb2_buf = &src_buf->vb2_buf;
dst_vb2_buf = &dst_buf->vb2_buf;

for (i = 0; i < ctx->src_fmt.num_planes; i++)
	fd_param.src_img[i].dma_addr =
		vb2_dma_contig_plane_dma_addr(src_vb2_buf,i);

fd_param.user_result.dma_addr =
	vb2_dma_contig_plane_dma_addr(dst_vb2_buf, 0);

> >         fd_param.user_result.dma_addr =
> >                 vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> >         fd_param.user_param.src_img_fmt =
> >                 get_fd_img_fmt(fd->dev, ctx->src_fmt.pixelformat);
> >
> >         mtk_fd_fill_user_param(&fd_param.user_param, &ctx->hdl);
> >
> >         /* Complete request controls if any */
> >         v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, &ctx->hdl);
> >
> >         fd->output = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
> >         mtk_fd_hw_job_exec(fd, &fd_param);
> > }
> 
> Best regards,
> Tomasz

Thanks and Best regards,
Jerry
Tomasz Figa May 29, 2020, 12:59 p.m. UTC | #5
On Fri, May 29, 2020 at 2:26 PM Jerry-ch Chen
<Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> I Appreciate your review comments, here's the reply.
>
> On Mon, 2020-05-25 at 14:24 +0200, Tomasz Figa wrote:
> > r
> >
> > On Fri, May 22, 2020 at 4:11 PM Jerry-ch Chen
> > <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Thu, 2020-05-21 at 18:28 +0000, Tomasz Figa wrote:
> > > > Hi Jerry,
> > > >
> > > > On Wed, Dec 04, 2019 at 08:47:32PM +0800, Jerry-ch Chen wrote:
[snip]
> > Isn't still a need to clamp() width and height to min/max, though?
> Yes, I'll add them back.
>
> This function will be refined as :
>
> static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
>                                   u32 pixfmt)
> {
>         v4l2_fill_pixfmt_mp(dfmt, pixfmt, dfmt->width, dfmt->height);
>
>         dfmt->field = V4L2_FIELD_NONE;
>         dfmt->colorspace = V4L2_COLORSPACE_BT2020;
>         dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
>         dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
>         dfmt->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
>
>         /* Keep user setting as possible */
>         dfmt->width = clamp(dfmt->width,
>                             MTK_FD_OUTPUT_MIN_WIDTH,
>                             MTK_FD_OUTPUT_MAX_WIDTH);
>         dfmt->height = clamp(dfmt->height,
>                              MTK_FD_OUTPUT_MIN_HEIGHT,
>                              MTK_FD_OUTPUT_MAX_HEIGHT);

Note that this would cause the other fields of dfmt to be inconsistent
with width and height. The correct way to do this would be to first
clamp and then call v4l2_fill_pixfmt_mp().

Best regards,
Tomasz
Jerry-ch Chen June 1, 2020, 10:37 a.m. UTC | #6
On Fri, 2020-05-29 at 14:59 +0200, Tomasz Figa wrote:
> On Fri, May 29, 2020 at 2:26 PM Jerry-ch Chen
> <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > I Appreciate your review comments, here's the reply.
> >
> > On Mon, 2020-05-25 at 14:24 +0200, Tomasz Figa wrote:
> > > r
> > >
> > > On Fri, May 22, 2020 at 4:11 PM Jerry-ch Chen
> > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Thu, 2020-05-21 at 18:28 +0000, Tomasz Figa wrote:
> > > > > Hi Jerry,
> > > > >
> > > > > On Wed, Dec 04, 2019 at 08:47:32PM +0800, Jerry-ch Chen wrote:
> [snip]
> > > Isn't still a need to clamp() width and height to min/max, though?
> > Yes, I'll add them back.
> >
> > This function will be refined as :
> >
> > static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> >                                   u32 pixfmt)
> > {
> >         v4l2_fill_pixfmt_mp(dfmt, pixfmt, dfmt->width, dfmt->height);
> >
> >         dfmt->field = V4L2_FIELD_NONE;
> >         dfmt->colorspace = V4L2_COLORSPACE_BT2020;
> >         dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> >         dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> >         dfmt->xfer_func = V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> >
> >         /* Keep user setting as possible */
> >         dfmt->width = clamp(dfmt->width,
> >                             MTK_FD_OUTPUT_MIN_WIDTH,
> >                             MTK_FD_OUTPUT_MAX_WIDTH);
> >         dfmt->height = clamp(dfmt->height,
> >                              MTK_FD_OUTPUT_MIN_HEIGHT,
> >                              MTK_FD_OUTPUT_MAX_HEIGHT);
> 
> Note that this would cause the other fields of dfmt to be inconsistent
> with width and height. The correct way to do this would be to first
> clamp and then call v4l2_fill_pixfmt_mp().
> 
Ok, I will fix it.

Thanks and Best regards,
Jerry
> Best regards,
> Tomasz
diff mbox series

Patch

diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index a505e9f5a1e2..ae99258e26a5 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -32,6 +32,8 @@  source "drivers/media/platform/davinci/Kconfig"
 
 source "drivers/media/platform/omap/Kconfig"
 
+source "drivers/media/platform/mtk-isp/fd/Kconfig"
+
 config VIDEO_ASPEED
 	tristate "Aspeed AST2400 and AST2500 Video Engine driver"
 	depends on VIDEO_V4L2
diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
index e6deb2597738..8b817cc80f43 100644
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile
@@ -94,6 +94,8 @@  obj-$(CONFIG_VIDEO_MEDIATEK_MDP)	+= mtk-mdp/
 
 obj-$(CONFIG_VIDEO_MEDIATEK_JPEG)	+= mtk-jpeg/
 
+obj-$(CONFIG_VIDEO_MEDIATEK_FD)		+= mtk-isp/fd/
+
 obj-$(CONFIG_VIDEO_QCOM_CAMSS)		+= qcom/camss/
 
 obj-$(CONFIG_VIDEO_QCOM_VENUS)		+= qcom/venus/
diff --git a/drivers/media/platform/mtk-isp/fd/Kconfig b/drivers/media/platform/mtk-isp/fd/Kconfig
new file mode 100644
index 000000000000..7a747d947e88
--- /dev/null
+++ b/drivers/media/platform/mtk-isp/fd/Kconfig
@@ -0,0 +1,19 @@ 
+config VIDEO_MEDIATEK_FD
+	tristate "Mediatek face detection processing function"
+	depends on VIDEO_V4L2
+	depends on ARCH_MEDIATEK
+	select VIDEOBUF2_DMA_CONTIG
+	select VIDEOBUF2_CORE
+	select VIDEOBUF2_V4L2
+	select VIDEOBUF2_MEMOPS
+	select MEDIA_CONTROLLER
+	select MTK_SCP
+
+	default n
+	help
+		Support the Face Detection (FD) feature in the Mediatek
+		mt8183 Soc.
+
+		FD driver is a V4L2 memory-to-memory device driver which
+		provides hardware accelerated face detection function,
+		it can detect different sizes of faces in a raw image.
diff --git a/drivers/media/platform/mtk-isp/fd/Makefile b/drivers/media/platform/mtk-isp/fd/Makefile
new file mode 100644
index 000000000000..9b1c501d73e1
--- /dev/null
+++ b/drivers/media/platform/mtk-isp/fd/Makefile
@@ -0,0 +1,5 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+mtk-fd-objs += mtk_fd_40.o
+
+obj-$(CONFIG_VIDEO_MEDIATEK_FD) += mtk-fd.o
\ No newline at end of file
diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
new file mode 100644
index 000000000000..d85bdcb70d6d
--- /dev/null
+++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
@@ -0,0 +1,149 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+//
+// Copyright (c) 2018 MediaTek Inc.
+
+#ifndef __MTK_FD_HW_H__
+#define __MTK_FD_HW_H__
+
+#include <linux/completion.h>
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-device.h>
+#include <media/videobuf2-v4l2.h>
+
+#define MTK_FD_OUTPUT_MIN_WIDTH			26U
+#define MTK_FD_OUTPUT_MIN_HEIGHT		26U
+#define MTK_FD_OUTPUT_MAX_WIDTH			640U
+#define MTK_FD_OUTPUT_MAX_HEIGHT		480U
+
+#define MTK_FD_HW_FMT_VYUY			2
+#define MTK_FD_HW_FMT_UYVY			3
+#define MTK_FD_HW_FMT_YVYU			4
+#define MTK_FD_HW_FMT_YUYV			5
+#define MTK_FD_HW_FMT_YVU_2P			6
+#define MTK_FD_HW_FMT_YUV_2P			7
+#define MTK_FD_HW_FMT_UNKNOWN			8
+
+#define MTK_FD_IPI_CMD_INIT			0
+#define MTK_FD_IPI_CMD_INIT_ACK			1
+#define MTK_FD_IPI_CMD_ENQUEUE			2
+#define MTK_FD_IPI_CMD_ENQ_ACK			3
+#define MTK_FD_IPI_CMD_EXIT			4
+#define MTK_FD_IPI_CMD_EXIT_ACK			5
+#define MTK_FD_IPI_CMD_RESET			6
+#define MTK_FD_IPI_CMD_RESET_ACK		7
+
+#define MTK_FD_REG_OFFSET_HW_ENABLE		0x4
+#define MTK_FD_REG_OFFSET_INT_EN		0x15c
+#define MTK_FD_REG_OFFSET_INT_VAL		0x168
+#define MTK_FD_REG_OFFSET_RESULT		0x178
+
+#define MTK_FD_SET_HW_ENABLE			0x111
+#define MTK_FD_RS_BUF_SIZE			2289664
+#define MTK_FD_HW_WORK_BUF_SIZE			0x100000
+#define MTK_FD_MAX_SPEEDUP			7
+#define MTK_FD_MAX_RESULT_NUM			1026
+
+/* Max scale size counts */
+#define MTK_FD_SCALE_ARR_NUM			15
+
+#define MTK_FD_HW_TIMEOUT			1000
+
+enum face_angle {
+	MTK_FD_FACE_FRONT,
+	MTK_FD_FACE_RIGHT_50,
+	MTK_FD_FACE_LEFT_50,
+	MTK_FD_FACE_RIGHT_90,
+	MTK_FD_FACE_LEFT_90,
+	MTK_FD_FACE_ANGLE_NUM,
+};
+
+struct fd_buffer {
+	__u32 scp_addr;	/* used by SCP */
+	__u32 dma_addr;	/* used by DMA HW */
+} __packed;
+
+struct fd_face_result {
+	char data[16];
+};
+
+struct fd_user_output {
+	struct fd_face_result results[MTK_FD_MAX_RESULT_NUM];
+	__u16 number;
+};
+
+struct user_param {
+	u8 fd_speedup;
+	u8 fd_extra_model;
+	u8 scale_img_num;
+	u8 src_img_fmt;
+	__u16 scale_img_width[MTK_FD_SCALE_ARR_NUM];
+	__u16 scale_img_height[MTK_FD_SCALE_ARR_NUM];
+	__u16 face_directions[MTK_FD_FACE_ANGLE_NUM];
+} __packed;
+
+struct fd_init_param {
+	struct fd_buffer fd_manager;
+	__u32 rs_dma_addr;
+} __packed;
+
+struct fd_enq_param {
+	__u64 output_vaddr;
+	struct fd_buffer src_img[2];
+	struct fd_buffer user_result;
+	struct user_param user_param;
+} __packed;
+
+struct fd_ack_param {
+	__u32 ret_code;
+	__u32 ret_msg;
+} __packed;
+
+struct ipi_message {
+	u8 cmd_id;
+	union {
+		struct fd_init_param fd_init_param;
+		struct fd_enq_param fd_enq_param;
+		struct fd_ack_param fd_ack_param;
+	};
+} __packed;
+
+struct mtk_fd_dev {
+	struct device *dev;
+	struct mtk_fd_ctx *ctx;
+	struct v4l2_device v4l2_dev;
+	struct v4l2_m2m_dev *m2m_dev;
+	struct media_device mdev;
+	struct video_device vfd;
+	struct platform_device *scp_pdev;
+	struct clk *fd_clk;
+	struct rproc *rproc_handle;
+
+	/* Lock for V4L2 operations */
+	struct mutex vfd_lock;
+
+	struct fd_user_output *output;
+	struct fd_buffer scp_mem;
+	void __iomem *fd_base;
+	void *rs_dma_buf;
+	dma_addr_t rs_dma_handle;
+	void *scp_mem_virt_addr;
+
+	u32 fd_stream_count;
+	struct completion fd_job_finished;
+	struct delayed_work job_timeout_work;
+};
+
+struct mtk_fd_ctx {
+	struct mtk_fd_dev *fd_dev;
+	struct device *dev;
+	struct v4l2_fh fh;
+	struct v4l2_ctrl_handler hdl;
+	struct v4l2_pix_format_mplane src_fmt;
+	struct v4l2_meta_format dst_fmt;
+	struct user_param user_param;
+};
+
+#endif/*__MTK_FD_HW_H__*/
diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c b/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
new file mode 100644
index 000000000000..4364232d7013
--- /dev/null
+++ b/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
@@ -0,0 +1,1279 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 MediaTek Inc.
+ *
+ * Author: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/remoteproc/mtk_scp.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/remoteproc.h>
+#include <media/videobuf2-dma-contig.h>
+#include <media/videobuf2-v4l2.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf2-core.h>
+
+#include "mtk_fd.h"
+
+/* Set the face angle and directions to be detected */
+#define V4L2_CID_MTK_FD_DETECT_POSE		(V4L2_CID_USER_MTK_FD_BASE + 1)
+
+/* Set image widths for an input image to be scaled down for face detection */
+#define V4L2_CID_MTK_FD_SCALE_DOWN_IMG_WIDTH	(V4L2_CID_USER_MTK_FD_BASE + 2)
+
+/* Set image heights for an input image to be scaled down for face detection */
+#define V4L2_CID_MTK_FD_SCALE_DOWN_IMG_HEIGHT	(V4L2_CID_USER_MTK_FD_BASE + 3)
+
+/* Set the length of scale down size array */
+#define V4L2_CID_MTK_FD_SCALE_IMG_NUM		(V4L2_CID_USER_MTK_FD_BASE + 4)
+
+/* Set the detection speed, usually reducing accuracy. */
+#define V4L2_CID_MTK_FD_DETECT_SPEED		(V4L2_CID_USER_MTK_FD_BASE + 5)
+
+/* Select the detection model or algorithm to be used. */
+#define V4L2_CID_MTK_FD_DETECTION_MODEL		(V4L2_CID_USER_MTK_FD_BASE + 6)
+
+/* We reserve 16 controls for this driver. */
+#define V4L2_CID_MTK_FD_MAX			16
+
+static const struct v4l2_pix_format_mplane mtk_fd_img_fmts[] = {
+	{
+		.pixelformat = V4L2_PIX_FMT_VYUY,
+		.num_planes = 1,
+	},
+	{
+		.pixelformat = V4L2_PIX_FMT_YUYV,
+		.num_planes = 1,
+	},
+	{
+		.pixelformat = V4L2_PIX_FMT_YVYU,
+		.num_planes = 1,
+	},
+	{
+		.pixelformat = V4L2_PIX_FMT_UYVY,
+		.num_planes = 1,
+	},
+	{
+		.pixelformat = V4L2_PIX_FMT_NV16M,
+		.num_planes = 2,
+	},
+	{
+		.pixelformat = V4L2_PIX_FMT_NV61M,
+		.num_planes = 2,
+	},
+};
+
+#define NUM_FORMATS ARRAY_SIZE(mtk_fd_img_fmts)
+
+static inline struct mtk_fd_ctx *fh_to_ctx(struct v4l2_fh *fh)
+{
+	return container_of(fh, struct mtk_fd_ctx, fh);
+}
+
+/*  */
+static int mtk_fd_hw_alloc_rs_dma_addr(struct mtk_fd_dev *fd)
+{
+	struct device *dev = fd->dev;
+	void *va;
+	dma_addr_t dma_handle;
+
+	va = dma_alloc_coherent(dev, MTK_FD_RS_BUF_SIZE, &dma_handle,
+				GFP_KERNEL);
+	if (!va) {
+		dev_err(dev, "dma_alloc null va\n");
+		return -ENOMEM;
+	}
+	memset(va, 0, MTK_FD_RS_BUF_SIZE);
+	fd->rs_dma_buf = va;
+	fd->rs_dma_handle = dma_handle;
+
+	return 0;
+}
+
+static int mtk_fd_send_ipi_init(struct mtk_fd_dev *fd)
+{
+	struct ipi_message fd_init_msg;
+
+	fd_init_msg.cmd_id = MTK_FD_IPI_CMD_INIT;
+
+	fd_init_msg.fd_init_param.fd_manager.scp_addr = fd->scp_mem.scp_addr;
+	fd_init_msg.fd_init_param.fd_manager.dma_addr = fd->scp_mem.dma_addr;
+
+	memset(fd->rs_dma_buf, 0, MTK_FD_RS_BUF_SIZE);
+
+	fd_init_msg.fd_init_param.rs_dma_addr = fd->rs_dma_handle;
+
+	return scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_init_msg,
+			    sizeof(fd_init_msg), 300);
+}
+
+static void mtk_fd_free_dma_handle(struct mtk_fd_dev *fd)
+{
+	dma_free_coherent(fd->dev, MTK_FD_RS_BUF_SIZE,
+			  fd->rs_dma_buf,
+			  fd->rs_dma_handle);
+}
+
+static int mtk_fd_hw_enable(struct mtk_fd_dev *fd)
+{
+	int ret;
+
+	pm_runtime_get_sync(fd->dev);
+	ret = mtk_fd_send_ipi_init(fd);
+	pm_runtime_put(fd->dev);
+	if (ret) {
+		dev_err(fd->dev, "Failed to send fd ipi init\n");
+		return ret;
+	}
+	return 0;
+}
+
+static void mtk_fd_hw_job_finish(struct mtk_fd_dev *fd,
+				 enum vb2_buffer_state vb_state)
+{
+	struct mtk_fd_ctx *ctx;
+	struct vb2_v4l2_buffer *src_vbuf = NULL, *dst_vbuf = NULL;
+
+	pm_runtime_put(fd->dev);
+
+	ctx = v4l2_m2m_get_curr_priv(fd->m2m_dev);
+	src_vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+	dst_vbuf = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+
+	v4l2_m2m_buf_copy_metadata(src_vbuf, dst_vbuf,
+				   V4L2_BUF_FLAG_TSTAMP_SRC_MASK);
+	v4l2_m2m_buf_done(src_vbuf, vb_state);
+	v4l2_m2m_buf_done(dst_vbuf, vb_state);
+	v4l2_m2m_job_finish(fd->m2m_dev, ctx->fh.m2m_ctx);
+	complete_all(&fd->fd_job_finished);
+}
+
+static void mtk_fd_hw_done(struct mtk_fd_dev *fd,
+			   enum vb2_buffer_state vb_state)
+{
+	if (!cancel_delayed_work(&fd->job_timeout_work))
+		return;
+
+	mtk_fd_hw_job_finish(fd, vb_state);
+}
+
+static void mtk_fd_ipi_handler(void *data, unsigned int len, void *priv)
+{
+	struct mtk_fd_dev *fd = (struct mtk_fd_dev *)priv;
+	struct device *dev = fd->dev;
+	struct ipi_message *fd_ack_msg = (struct ipi_message *)data;
+	struct fd_ack_param *fd_ack = &fd_ack_msg->fd_ack_param;
+
+	dev_dbg(fd->dev, "fd_ipi_ack_id: %d\n", fd_ack_msg->cmd_id);
+	switch (fd_ack_msg->cmd_id) {
+	case MTK_FD_IPI_CMD_INIT_ACK:
+		return;
+	case MTK_FD_IPI_CMD_ENQ_ACK:
+		if (fd_ack->ret_code) {
+			dev_err(dev, "ipi ret: %d, message: %d\n",
+				fd_ack->ret_code, fd_ack->ret_msg);
+			mtk_fd_hw_done(fd, VB2_BUF_STATE_ERROR);
+		}
+		return;
+	case MTK_FD_IPI_CMD_EXIT_ACK:
+		if (fd_ack->ret_code)
+			dev_err(dev, "Failed to exit FD HW\n");
+		return;
+	case MTK_FD_IPI_CMD_RESET_ACK:
+		if (fd_ack->ret_code)
+			dev_err(dev, "Failed to reset FD HW\n");
+		return;
+	}
+}
+
+static int mtk_fd_hw_connect(struct mtk_fd_dev *fd)
+{
+	int ret;
+
+	ret = rproc_boot(fd->rproc_handle);
+
+	if (ret < 0) {
+		/**
+		 * Return 0 if downloading firmware successfully,
+		 * otherwise it is failed
+		 */
+		dev_err(fd->dev, "Failed to boot rproc\n");
+		return ret;
+	}
+
+	ret = scp_ipi_register(fd->scp_pdev, SCP_IPI_FD_CMD,
+			       mtk_fd_ipi_handler, fd);
+	if (ret) {
+		dev_err(fd->dev, "Failed to register IPI cmd handler\n");
+		goto err_rproc_shutdown;
+	}
+
+	fd->fd_stream_count++;
+	if (fd->fd_stream_count == 1) {
+		if (mtk_fd_hw_enable(fd)) {
+			ret = -EINVAL;
+			goto err_rproc_shutdown;
+		}
+	}
+	return 0;
+
+err_rproc_shutdown:
+	rproc_shutdown(fd->rproc_handle);
+	return ret;
+}
+
+static void mtk_fd_exit_hw(struct mtk_fd_dev *fd)
+{
+	struct ipi_message fd_ipi_msg;
+
+	fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_EXIT;
+	if (scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
+			 sizeof(fd_ipi_msg), 300))
+		dev_err(fd->dev, "FD EXIT HW error\n");
+}
+
+static void mtk_fd_hw_disconnect(struct mtk_fd_dev *fd)
+{
+	fd->fd_stream_count--;
+	mtk_fd_exit_hw(fd);
+	rproc_shutdown(fd->rproc_handle);
+}
+
+static int mtk_fd_hw_job_exec(struct mtk_fd_dev *fd,
+			      struct fd_enq_param *fd_param)
+{
+	struct ipi_message fd_ipi_msg;
+	int ret;
+
+	pm_runtime_get_sync((fd->dev));
+
+	reinit_completion(&fd->fd_job_finished);
+	schedule_delayed_work(&fd->job_timeout_work,
+			      msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
+
+	fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_ENQUEUE;
+	memcpy(&fd_ipi_msg.fd_enq_param, fd_param, sizeof(struct fd_enq_param));
+	ret = scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
+			   sizeof(fd_ipi_msg), 300);
+	if (ret) {
+		mtk_fd_hw_done(fd, VB2_BUF_STATE_ERROR);
+		return ret;
+	}
+	return 0;
+}
+
+static int mtk_fd_vb2_buf_out_validate(struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *v4l2_buf = to_vb2_v4l2_buffer(vb);
+
+	if (v4l2_buf->field == V4L2_FIELD_ANY)
+		v4l2_buf->field = V4L2_FIELD_NONE;
+	if (v4l2_buf->field != V4L2_FIELD_NONE)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int mtk_fd_vb2_buf_prepare(struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct vb2_queue *vq = vb->vb2_queue;
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
+	struct device *dev = ctx->dev;
+	struct v4l2_pix_format_mplane *pixfmt;
+
+	switch (vq->type) {
+	case V4L2_BUF_TYPE_META_CAPTURE:
+		if (vb2_plane_size(vb, 0) < ctx->dst_fmt.buffersize) {
+			dev_dbg(dev, "meta size %lu is too small\n",
+				vb2_plane_size(vb, 0));
+			return -EINVAL;
+		}
+		break;
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
+		pixfmt = &ctx->src_fmt;
+
+		if (vbuf->field == V4L2_FIELD_ANY)
+			vbuf->field = V4L2_FIELD_NONE;
+
+		if (vb->num_planes > 2 || vbuf->field != V4L2_FIELD_NONE) {
+			dev_dbg(dev, "plane %d or field %d not supported\n",
+				vb->num_planes, vbuf->field);
+			return -EINVAL;
+		}
+		if (vb2_plane_size(vb, 0) < pixfmt->plane_fmt[0].sizeimage) {
+			dev_dbg(dev, "plane %lu is too small\n",
+				vb2_plane_size(vb, 0));
+			return -EINVAL;
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static void mtk_fd_vb2_buf_queue(struct vb2_buffer *vb)
+{
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+
+	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
+}
+
+static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
+				  unsigned int *num_buffers,
+				  unsigned int *num_planes,
+				  unsigned int sizes[],
+				  struct device *alloc_devs[])
+{
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
+	unsigned int size[2];
+	unsigned int plane;
+
+	switch (vq->type) {
+	case V4L2_BUF_TYPE_META_CAPTURE:
+		size[0] = ctx->dst_fmt.buffersize;
+		break;
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
+		size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
+		if (*num_planes == 2)
+			size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
+		break;
+	}
+
+	if (*num_planes > 2)
+		return -EINVAL;
+	if (*num_planes == 0) {
+		if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
+			sizes[0] = ctx->dst_fmt.buffersize;
+			*num_planes = 1;
+			return 0;
+		}
+
+		*num_planes = ctx->src_fmt.num_planes;
+		for (plane = 0; plane < *num_planes; plane++)
+			sizes[plane] = ctx->src_fmt.plane_fmt[plane].sizeimage;
+		return 0;
+	}
+
+	for (plane = 0; plane < *num_planes; plane++) {
+		if (sizes[plane] < size[plane])
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int mtk_fd_vb2_start_streaming(struct vb2_queue *vq, unsigned int count)
+{
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
+
+	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
+		return mtk_fd_hw_connect(ctx->fd_dev);
+	else
+		return 0;
+}
+
+static void mtk_fd_reset_hw(struct mtk_fd_dev *fd)
+{
+	struct ipi_message fd_ipi_msg;
+
+	fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_RESET;
+	if (scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
+			 sizeof(fd_ipi_msg), 300))
+		dev_err(fd->dev, "FD Reset HW error\n");
+}
+
+static void mtk_fd_job_timeout_work(struct work_struct *work)
+{
+	struct mtk_fd_dev *fd =
+		container_of(work, struct mtk_fd_dev, job_timeout_work.work);
+
+	dev_err(fd->dev, "FD Job timeout!");
+	mtk_fd_reset_hw(fd);
+	mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
+}
+
+static void mtk_fd_job_wait_finish(struct mtk_fd_dev *fd)
+{
+	wait_for_completion(&fd->fd_job_finished);
+}
+
+static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
+{
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
+	struct mtk_fd_dev *fd = ctx->fd_dev;
+	struct vb2_v4l2_buffer *vb;
+	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
+	struct v4l2_m2m_queue_ctx *queue_ctx;
+
+	mtk_fd_job_wait_finish(fd);
+	queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
+					&m2m_ctx->out_q_ctx :
+					&m2m_ctx->cap_q_ctx;
+	while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
+		v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
+
+	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
+		mtk_fd_hw_disconnect(fd);
+}
+
+static void mtk_fd_vb2_request_complete(struct vb2_buffer *vb)
+{
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+
+	v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
+}
+
+static int mtk_fd_querycap(struct file *file, void *fh,
+			   struct v4l2_capability *cap)
+{
+	struct mtk_fd_dev *fd = video_drvdata(file);
+	struct device *dev = fd->dev;
+
+	strscpy(cap->driver, dev_driver_string(dev), sizeof(cap->driver));
+	strscpy(cap->card, dev_driver_string(dev), sizeof(cap->card));
+	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s",
+		 dev_name(fd->dev));
+
+	return 0;
+}
+
+static int mtk_fd_enum_fmt_out_mp(struct file *file, void *fh,
+				  struct v4l2_fmtdesc *f)
+{
+	if (f->index >= NUM_FORMATS)
+		return -EINVAL;
+
+	f->pixelformat = mtk_fd_img_fmts[f->index].pixelformat;
+	return 0;
+}
+
+static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
+				  const struct v4l2_pix_format_mplane *sfmt)
+{
+	dfmt->field = V4L2_FIELD_NONE;
+	dfmt->colorspace = V4L2_COLORSPACE_BT2020;
+	dfmt->num_planes = sfmt->num_planes;
+	dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
+	dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
+	dfmt->xfer_func =
+		V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
+
+	/* Keep user setting as possible */
+	dfmt->width = clamp(dfmt->width,
+			    MTK_FD_OUTPUT_MIN_WIDTH,
+			    MTK_FD_OUTPUT_MAX_WIDTH);
+	dfmt->height = clamp(dfmt->height,
+			     MTK_FD_OUTPUT_MIN_HEIGHT,
+			     MTK_FD_OUTPUT_MAX_HEIGHT);
+
+	if (sfmt->num_planes == 2) {
+		/* NV16M and NV61M has 1 byte per pixel */
+		dfmt->plane_fmt[0].bytesperline = dfmt->width;
+		dfmt->plane_fmt[1].bytesperline = dfmt->width;
+	} else {
+		/* 2 bytes per pixel */
+		dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
+	}
+
+	dfmt->plane_fmt[0].sizeimage =
+		dfmt->height * dfmt->plane_fmt[0].bytesperline;
+}
+
+static const struct v4l2_pix_format_mplane *mtk_fd_find_fmt(u32 format)
+{
+	unsigned int i;
+	const struct v4l2_pix_format_mplane *dev_fmt;
+
+	for (i = 0; i < NUM_FORMATS; i++) {
+		dev_fmt = &mtk_fd_img_fmts[i];
+		if (dev_fmt->pixelformat == format)
+			return dev_fmt;
+	}
+
+	return NULL;
+}
+
+static int mtk_fd_try_fmt_out_mp(struct file *file,
+				 void *fh,
+				 struct v4l2_format *f)
+{
+	struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
+	const struct v4l2_pix_format_mplane *fmt;
+
+	fmt = mtk_fd_find_fmt(pix_mp->pixelformat);
+	if (!fmt)
+		fmt = &mtk_fd_img_fmts[0];	/* Get default img fmt */
+
+	mtk_fd_fill_pixfmt_mp(pix_mp, fmt);
+	return 0;
+}
+
+static int mtk_fd_g_fmt_out_mp(struct file *file, void *fh,
+			       struct v4l2_format *f)
+{
+	struct mtk_fd_ctx *ctx = fh_to_ctx(fh);
+
+	f->fmt.pix_mp = ctx->src_fmt;
+
+	return 0;
+}
+
+static int mtk_fd_s_fmt_out_mp(struct file *file, void *fh,
+			       struct v4l2_format *f)
+{
+	struct mtk_fd_ctx *ctx = fh_to_ctx(fh);
+	struct vb2_queue *vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
+
+	/* Change not allowed if queue is streaming. */
+	if (vb2_is_streaming(vq)) {
+		dev_err(ctx->dev, "Failed to set format, vb2 is busy\n");
+		return -EBUSY;
+	}
+
+	mtk_fd_try_fmt_out_mp(file, fh, f);
+	ctx->src_fmt = f->fmt.pix_mp;
+
+	return 0;
+}
+
+static int mtk_fd_enum_fmt_meta_cap(struct file *file, void *fh,
+				    struct v4l2_fmtdesc *f)
+{
+	if (f->index)
+		return -EINVAL;
+
+	strscpy(f->description, "Face detection result",
+		sizeof(f->description));
+	f->pixelformat = V4L2_META_FMT_MTFD_RESULT;
+	f->flags = 0;
+
+	return 0;
+}
+
+static int mtk_fd_g_fmt_meta_cap(struct file *file, void *fh,
+				 struct v4l2_format *f)
+{
+	f->fmt.meta.dataformat = V4L2_META_FMT_MTFD_RESULT;
+	f->fmt.meta.buffersize = sizeof(struct fd_user_output);
+
+	return 0;
+}
+
+static const struct vb2_ops mtk_fd_vb2_ops = {
+	.queue_setup = mtk_fd_vb2_queue_setup,
+	.buf_out_validate = mtk_fd_vb2_buf_out_validate,
+	.buf_prepare  = mtk_fd_vb2_buf_prepare,
+	.buf_queue = mtk_fd_vb2_buf_queue,
+	.start_streaming = mtk_fd_vb2_start_streaming,
+	.stop_streaming = mtk_fd_vb2_stop_streaming,
+	.wait_prepare = vb2_ops_wait_prepare,
+	.wait_finish = vb2_ops_wait_finish,
+	.buf_request_complete = mtk_fd_vb2_request_complete,
+};
+
+static const struct v4l2_ioctl_ops mtk_fd_v4l2_video_out_ioctl_ops = {
+	.vidioc_querycap = mtk_fd_querycap,
+	.vidioc_enum_fmt_vid_out = mtk_fd_enum_fmt_out_mp,
+	.vidioc_g_fmt_vid_out_mplane = mtk_fd_g_fmt_out_mp,
+	.vidioc_s_fmt_vid_out_mplane = mtk_fd_s_fmt_out_mp,
+	.vidioc_try_fmt_vid_out_mplane = mtk_fd_try_fmt_out_mp,
+	.vidioc_enum_fmt_meta_cap = mtk_fd_enum_fmt_meta_cap,
+	.vidioc_g_fmt_meta_cap = mtk_fd_g_fmt_meta_cap,
+	.vidioc_s_fmt_meta_cap = mtk_fd_g_fmt_meta_cap,
+	.vidioc_try_fmt_meta_cap = mtk_fd_g_fmt_meta_cap,
+	.vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
+	.vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs,
+	.vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
+	.vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
+	.vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
+	.vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
+	.vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
+	.vidioc_streamon = v4l2_m2m_ioctl_streamon,
+	.vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
+	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
+	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
+};
+
+static int
+mtk_fd_queue_init(void *priv, struct vb2_queue *src_vq,
+		  struct vb2_queue *dst_vq)
+{
+	struct mtk_fd_ctx *ctx = priv;
+	int ret;
+
+	src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
+	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
+	src_vq->supports_requests = true;
+	src_vq->drv_priv = ctx;
+	src_vq->ops = &mtk_fd_vb2_ops;
+	src_vq->mem_ops = &vb2_dma_contig_memops;
+	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
+	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	src_vq->lock = &ctx->fd_dev->vfd_lock;
+	src_vq->dev = ctx->fd_dev->v4l2_dev.dev;
+
+	ret = vb2_queue_init(src_vq);
+	if (ret)
+		return ret;
+
+	dst_vq->type = V4L2_BUF_TYPE_META_CAPTURE;
+	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
+	dst_vq->drv_priv = ctx;
+	dst_vq->ops = &mtk_fd_vb2_ops;
+	dst_vq->mem_ops = &vb2_dma_contig_memops;
+	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
+	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	dst_vq->lock = &ctx->fd_dev->vfd_lock;
+	dst_vq->dev = ctx->fd_dev->v4l2_dev.dev;
+
+	return vb2_queue_init(dst_vq);
+}
+
+static struct v4l2_ctrl_config mtk_fd_controls[] = {
+	{
+		.id = V4L2_CID_MTK_FD_SCALE_DOWN_IMG_WIDTH,
+		.name = "FD scale image widths",
+		.type = V4L2_CTRL_TYPE_U16,
+		.min = MTK_FD_OUTPUT_MIN_WIDTH,
+		.max = MTK_FD_OUTPUT_MAX_WIDTH,
+		.step = 1,
+		.def = MTK_FD_OUTPUT_MAX_WIDTH,
+		.dims = { MTK_FD_SCALE_ARR_NUM },
+	},
+	{
+		.id = V4L2_CID_MTK_FD_SCALE_DOWN_IMG_HEIGHT,
+		.name = "FD scale image heights",
+		.type = V4L2_CTRL_TYPE_U16,
+		.min = MTK_FD_OUTPUT_MIN_HEIGHT,
+		.max = MTK_FD_OUTPUT_MAX_HEIGHT,
+		.step = 1,
+		.def = MTK_FD_OUTPUT_MAX_HEIGHT,
+		.dims = { MTK_FD_SCALE_ARR_NUM },
+	},
+	{
+		.id = V4L2_CID_MTK_FD_SCALE_IMG_NUM,
+		.name = "FD scale size counts",
+		.type = V4L2_CTRL_TYPE_INTEGER,
+		.min = 0,
+		.max = MTK_FD_SCALE_ARR_NUM,
+		.step = 1,
+		.def = 0,
+	},
+	{
+		.id = V4L2_CID_MTK_FD_DETECT_POSE,
+		.name = "FD detect face angle and directions",
+		.type = V4L2_CTRL_TYPE_U16,
+		.min = 0,
+		.max = 0xffff,
+		.step = 1,
+		.def = 0x3ff,
+		.dims = { MTK_FD_FACE_ANGLE_NUM},
+	},
+	{
+		.id = V4L2_CID_MTK_FD_DETECT_SPEED,
+		.name = "FD detection speedup",
+		.type = V4L2_CTRL_TYPE_INTEGER,
+		.min = 0,
+		.max = MTK_FD_MAX_SPEEDUP,
+		.step = 1,
+		.def = 0,
+	},
+	{
+		.id = V4L2_CID_MTK_FD_DETECTION_MODEL,
+		.name = "FD use extra model",
+		.type = V4L2_CTRL_TYPE_BOOLEAN,
+		.min = 0,
+		.max = 1,
+		.step = 1,
+		.def = 0,
+	},
+};
+
+static int mtk_fd_ctrls_setup(struct mtk_fd_ctx *ctx)
+{
+	struct v4l2_ctrl_handler *hdl = &ctx->hdl;
+	int i;
+
+	v4l2_ctrl_handler_init(hdl, V4L2_CID_MTK_FD_MAX);
+	if (hdl->error)
+		return hdl->error;
+
+	for (i = 0; i < ARRAY_SIZE(mtk_fd_controls); i++) {
+		v4l2_ctrl_new_custom(hdl, &mtk_fd_controls[i], ctx);
+		if (hdl->error) {
+			v4l2_ctrl_handler_free(hdl);
+			dev_err(ctx->dev, "Failed to register controls:%d", i);
+			return hdl->error;
+		}
+	}
+
+	ctx->fh.ctrl_handler = &ctx->hdl;
+	v4l2_ctrl_handler_setup(hdl);
+
+	return 0;
+}
+
+static unsigned int get_fd_img_fmt(unsigned int fourcc)
+{
+	switch (fourcc) {
+	case V4L2_PIX_FMT_VYUY:
+		return MTK_FD_HW_FMT_VYUY;
+	case V4L2_PIX_FMT_YUYV:
+		return MTK_FD_HW_FMT_YUYV;
+	case V4L2_PIX_FMT_YVYU:
+		return MTK_FD_HW_FMT_YVYU;
+	case V4L2_PIX_FMT_UYVY:
+		return MTK_FD_HW_FMT_UYVY;
+	case V4L2_PIX_FMT_NV16M:
+		return MTK_FD_HW_FMT_YUV_2P;
+	case V4L2_PIX_FMT_NV61M:
+		return MTK_FD_HW_FMT_YVU_2P;
+	default:
+		return MTK_FD_HW_FMT_UNKNOWN;
+	}
+}
+
+static void init_ctx_fmt(struct mtk_fd_ctx *ctx)
+{
+	struct v4l2_pix_format_mplane *src_fmt = &ctx->src_fmt;
+	struct v4l2_meta_format *dst_fmt = &ctx->dst_fmt;
+
+	/* Initialize M2M source fmt */
+	src_fmt->width = MTK_FD_OUTPUT_MAX_WIDTH;
+	src_fmt->height = MTK_FD_OUTPUT_MAX_HEIGHT;
+	mtk_fd_fill_pixfmt_mp(src_fmt, &mtk_fd_img_fmts[0]);
+
+	/* Initialize M2M destination fmt */
+	dst_fmt->buffersize = sizeof(struct fd_user_output);
+	dst_fmt->dataformat = V4L2_META_FMT_MTFD_RESULT;
+}
+
+/*
+ * V4L2 file operations.
+ */
+static int mtk_vfd_open(struct file *filp)
+{
+	struct mtk_fd_dev *fd = video_drvdata(filp);
+	struct video_device *vdev = video_devdata(filp);
+	struct mtk_fd_ctx *ctx;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->fd_dev = fd;
+	ctx->dev = fd->dev;
+	fd->ctx = ctx;
+
+	v4l2_fh_init(&ctx->fh, vdev);
+	filp->private_data = &ctx->fh;
+
+	init_ctx_fmt(ctx);
+
+	ret = mtk_fd_ctrls_setup(ctx);
+	if (ret) {
+		dev_err(ctx->dev, "Failed to set up controls:%d\n", ret);
+		goto err_fh_exit;
+	}
+
+	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(fd->m2m_dev, ctx,
+					    &mtk_fd_queue_init);
+	if (IS_ERR(ctx->fh.m2m_ctx)) {
+		ret = PTR_ERR(ctx->fh.m2m_ctx);
+		goto err_free_ctrl_handler;
+	}
+
+	v4l2_fh_add(&ctx->fh);
+
+	return 0;
+
+err_free_ctrl_handler:
+	v4l2_ctrl_handler_free(&ctx->hdl);
+err_fh_exit:
+	v4l2_fh_exit(&ctx->fh);
+	kfree(ctx);
+
+	return ret;
+}
+
+static int mtk_vfd_release(struct file *filp)
+{
+	struct mtk_fd_ctx *ctx = container_of(filp->private_data,
+					      struct mtk_fd_ctx, fh);
+
+	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
+
+	v4l2_ctrl_handler_free(&ctx->hdl);
+	v4l2_fh_del(&ctx->fh);
+	v4l2_fh_exit(&ctx->fh);
+
+	kfree(ctx);
+
+	return 0;
+}
+
+static const struct v4l2_file_operations fd_video_fops = {
+	.owner = THIS_MODULE,
+	.open = mtk_vfd_open,
+	.release = mtk_vfd_release,
+	.poll = v4l2_m2m_fop_poll,
+	.unlocked_ioctl = video_ioctl2,
+	.mmap = v4l2_m2m_fop_mmap,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl32 = v4l2_compat_ioctl32,
+#endif
+
+};
+
+static void mtk_fd_fill_user_param(struct user_param *user_param,
+				   struct v4l2_ctrl_handler *hdl)
+{
+	struct v4l2_ctrl *ctrl;
+	int i;
+
+	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_DOWN_IMG_WIDTH);
+	if (ctrl)
+		for (i = 0; i < ctrl->elems; i++)
+			user_param->scale_img_width[i] = ctrl->p_new.p_u16[i];
+	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_DOWN_IMG_HEIGHT);
+	if (ctrl)
+		for (i = 0; i < ctrl->elems; i++)
+			user_param->scale_img_height[i] = ctrl->p_new.p_u16[i];
+	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_IMG_NUM);
+	if (ctrl)
+		user_param->scale_img_num = ctrl->val;
+
+	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECT_POSE);
+	if (ctrl)
+		for (i = 0; i < ctrl->elems; i++)
+			user_param->face_directions[i] = ctrl->p_new.p_u16[i];
+	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECT_SPEED);
+	if (ctrl)
+		user_param->fd_speedup = ctrl->val;
+	ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECTION_MODEL);
+	if (ctrl)
+		user_param->fd_extra_model = ctrl->val;
+}
+
+static void mtk_fd_device_run(void *priv)
+{
+	struct mtk_fd_ctx *ctx = priv;
+	struct mtk_fd_dev *fd = ctx->fd_dev;
+	struct vb2_v4l2_buffer *src_buf, *dst_buf;
+	struct fd_enq_param fd_param;
+	void *plane_vaddr;
+
+	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
+	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
+
+	fd_param.src_img[0].dma_addr =
+		vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
+	fd_param.user_result.dma_addr =
+		vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
+	plane_vaddr = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
+	fd_param.output_vaddr = (u64)(unsigned long)plane_vaddr;
+	fd_param.user_param.src_img_fmt =
+		get_fd_img_fmt(ctx->src_fmt.pixelformat);
+	if (ctx->src_fmt.num_planes == 2)
+		fd_param.src_img[1].dma_addr =
+			vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 1);
+	mtk_fd_fill_user_param(&fd_param.user_param, &ctx->hdl);
+
+	/* Complete request controls if any */
+	v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, &ctx->hdl);
+
+	fd->output = plane_vaddr;
+	mtk_fd_hw_job_exec(fd, &fd_param);
+}
+
+static struct v4l2_m2m_ops fd_m2m_ops = {
+	.device_run = mtk_fd_device_run,
+};
+
+static const struct media_device_ops fd_m2m_media_ops = {
+	.req_validate	= vb2_request_validate,
+	.req_queue	= v4l2_m2m_request_queue,
+};
+
+static int mtk_fd_video_device_register(struct mtk_fd_dev *fd)
+{
+	struct video_device *vfd = &fd->vfd;
+	struct v4l2_m2m_dev *m2m_dev = fd->m2m_dev;
+	struct device *dev = fd->dev;
+	int ret;
+
+	vfd->fops = &fd_video_fops;
+	vfd->release = video_device_release;
+	vfd->lock = &fd->vfd_lock;
+	vfd->v4l2_dev = &fd->v4l2_dev;
+	vfd->vfl_dir = VFL_DIR_M2M;
+	vfd->device_caps = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_OUTPUT_MPLANE |
+		V4L2_CAP_META_CAPTURE;
+	vfd->ioctl_ops = &mtk_fd_v4l2_video_out_ioctl_ops;
+
+	strscpy(vfd->name, dev_driver_string(dev), sizeof(vfd->name));
+
+	video_set_drvdata(vfd, fd);
+
+	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
+	if (ret) {
+		dev_err(dev, "Failed to register video device\n");
+		goto err_free_dev;
+	}
+
+	ret = v4l2_m2m_register_media_controller(m2m_dev, vfd,
+					     MEDIA_ENT_F_PROC_VIDEO_STATISTICS);
+	if (ret) {
+		dev_err(dev, "Failed to init mem2mem media controller\n");
+		goto err_unreg_video;
+	}
+	return 0;
+
+err_unreg_video:
+	video_unregister_device(vfd);
+err_free_dev:
+	video_device_release(vfd);
+	return ret;
+}
+
+static int mtk_fd_dev_v4l2_init(struct mtk_fd_dev *fd)
+{
+	struct media_device *mdev = &fd->mdev;
+	struct device *dev = fd->dev;
+	int ret;
+
+	ret = v4l2_device_register(dev, &fd->v4l2_dev);
+	if (ret) {
+		dev_err(dev, "Failed to register v4l2 device\n");
+		return ret;
+	}
+
+	fd->m2m_dev = v4l2_m2m_init(&fd_m2m_ops);
+	if (IS_ERR(fd->m2m_dev)) {
+		dev_err(dev, "Failed to init mem2mem device\n");
+		ret = PTR_ERR(fd->m2m_dev);
+		goto err_unreg_v4l2_dev;
+	}
+
+	mdev->dev = dev;
+	strscpy(mdev->model, dev_driver_string(dev), sizeof(mdev->model));
+	snprintf(mdev->bus_info, sizeof(mdev->bus_info),
+		 "platform:%s", dev_name(dev));
+	media_device_init(mdev);
+	mdev->ops = &fd_m2m_media_ops;
+	fd->v4l2_dev.mdev = mdev;
+
+	ret = mtk_fd_video_device_register(fd);
+	if (ret) {
+		dev_err(dev, "Failed to register video device\n");
+		goto err_cleanup_mdev;
+	}
+
+	ret = media_device_register(mdev);
+	if (ret) {
+		dev_err(dev, "Failed to register mem2mem media device\n");
+		goto err_unreg_vdev;
+	}
+
+	return 0;
+
+err_unreg_vdev:
+	v4l2_m2m_unregister_media_controller(fd->m2m_dev);
+	video_unregister_device(&fd->vfd);
+	video_device_release(&fd->vfd);
+err_cleanup_mdev:
+	media_device_cleanup(mdev);
+	v4l2_m2m_release(fd->m2m_dev);
+err_unreg_v4l2_dev:
+	v4l2_device_unregister(&fd->v4l2_dev);
+	return ret;
+}
+
+static void mtk_fd_dev_v4l2_release(struct mtk_fd_dev *fd)
+{
+	v4l2_m2m_unregister_media_controller(fd->m2m_dev);
+	video_unregister_device(&fd->vfd);
+	video_device_release(&fd->vfd);
+	media_device_cleanup(&fd->mdev);
+	v4l2_m2m_release(fd->m2m_dev);
+	v4l2_device_unregister(&fd->v4l2_dev);
+}
+
+static irqreturn_t mtk_fd_irq(int irq, void *data)
+{
+	struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
+
+	/* must read this register otherwise HW will keep sending irq */
+	readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
+	fd->output->number = readl(fd->fd_base + MTK_FD_REG_OFFSET_RESULT);
+	dev_dbg(fd->dev, "mtk_fd_face_num:%d\n", fd->output->number);
+
+	mtk_fd_hw_done(fd, VB2_BUF_STATE_DONE);
+	return IRQ_HANDLED;
+}
+
+static int mtk_fd_hw_get_scp_mem(struct mtk_fd_dev *fd)
+{
+	struct device *dev = fd->dev;
+	dma_addr_t addr;
+	void *ptr;
+	u32 ret;
+
+	/*
+	 * Allocate coherent reserved memory for SCP firmware usage.
+	 * The size of SCP composer's memory is fixed to 0x100000
+	 * for the requirement of firmware.
+	 */
+	ptr = dma_alloc_coherent(&fd->scp_pdev->dev,
+				 MTK_FD_HW_WORK_BUF_SIZE, &addr, GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	fd->scp_mem.scp_addr = addr;
+	fd->scp_mem_virt_addr = ptr;
+	dev_info(dev, "scp addr:%pad va:%pK\n", &addr, ptr);
+
+	/*
+	 * This reserved memory is also be used by FD HW.
+	 * Need to get iova address for FD DMA.
+	 */
+	addr = dma_map_resource(dev, addr, MTK_FD_HW_WORK_BUF_SIZE,
+				DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(dev, addr)) {
+		dev_err(dev, "Failed to map scp iova\n");
+		ret = -ENOMEM;
+		goto fail_free_mem;
+	}
+	fd->scp_mem.dma_addr = addr;
+	dev_info(dev, "scp iova addr:%pad\n", &addr);
+
+	return 0;
+
+fail_free_mem:
+	dma_free_coherent(&fd->scp_pdev->dev, MTK_FD_HW_WORK_BUF_SIZE,
+			  ptr, fd->scp_mem.scp_addr);
+	fd->scp_mem.scp_addr = 0;
+
+	return ret;
+}
+
+static int mtk_fd_probe(struct platform_device *pdev)
+{
+	struct mtk_fd_dev *fd;
+	struct device *dev = &pdev->dev;
+
+	struct resource *res;
+	phandle rproc_phandle;
+	int irq;
+	int ret;
+
+	fd = devm_kzalloc(&pdev->dev, sizeof(*fd), GFP_KERNEL);
+	if (!fd)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, fd);
+	fd->dev = dev;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "Failed to get irq by platform: %d\n", irq);
+		return irq;
+	}
+	ret = devm_request_irq(dev, irq, mtk_fd_irq, IRQF_SHARED,
+			       dev_driver_string(dev),
+			       fd);
+	if (ret) {
+		dev_err(dev, "Failed to request irq\n");
+		return ret;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fd->fd_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(fd->fd_base)) {
+		dev_err(dev, "Failed to get fd reg base\n");
+		return PTR_ERR(fd->fd_base);
+	}
+
+	fd->fd_clk = devm_clk_get(dev, "fd");
+	if (IS_ERR(fd->fd_clk)) {
+		dev_err(dev, "Failed to get fd_clk_img_fd clock\n");
+		return PTR_ERR(fd->fd_clk);
+	}
+
+	/* init scp */
+	fd->scp_pdev = scp_get_pdev(pdev);
+	if (!fd->scp_pdev) {
+		dev_err(dev, "Failed to get scp device\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_u32(fd->dev->of_node, "mediatek,scp",
+				 &rproc_phandle)) {
+		dev_err(dev, "Failed to get scp device\n");
+		return -EINVAL;
+	}
+
+	fd->rproc_handle = rproc_get_by_phandle(rproc_phandle);
+	if (!fd->rproc_handle) {
+		dev_err(dev, "Failed to get FD's rproc_handle\n");
+		return -EINVAL;
+	}
+
+	ret = mtk_fd_hw_get_scp_mem(fd);
+	if (ret) {
+		dev_err(dev, "Failed to init scp memory: %d\n", ret);
+		return ret;
+	}
+
+	ret = mtk_fd_hw_alloc_rs_dma_addr(fd);
+	if (ret) {
+		dev_err(dev, "Failed to allocate dma buffer: %d\n", ret);
+		return ret;
+	}
+
+	mutex_init(&fd->vfd_lock);
+	init_completion(&fd->fd_job_finished);
+	INIT_DELAYED_WORK(&fd->job_timeout_work, mtk_fd_job_timeout_work);
+	pm_runtime_enable(dev);
+
+	ret = mtk_fd_dev_v4l2_init(fd);
+	if (ret) {
+		dev_err(dev, "Failed to init v4l2 device: %d\n", ret);
+		goto err_destroy_mutex;
+	}
+
+	return 0;
+
+err_destroy_mutex:
+	mutex_destroy(&fd->vfd_lock);
+	pm_runtime_disable(fd->dev);
+	mtk_fd_free_dma_handle(fd);
+	return ret;
+}
+
+static int mtk_fd_remove(struct platform_device *pdev)
+{
+	struct mtk_fd_dev *fd = dev_get_drvdata(&pdev->dev);
+
+	mtk_fd_dev_v4l2_release(fd);
+	pm_runtime_disable(&pdev->dev);
+	dma_unmap_page_attrs(fd->dev,
+			     fd->scp_mem.dma_addr,
+			     MTK_FD_HW_WORK_BUF_SIZE,
+			     DMA_TO_DEVICE,
+			     DMA_ATTR_SKIP_CPU_SYNC);
+	dma_free_coherent(&fd->scp_pdev->dev,
+			  MTK_FD_HW_WORK_BUF_SIZE,
+			  fd->scp_mem_virt_addr,
+			  fd->scp_mem.scp_addr);
+	mutex_destroy(&fd->vfd_lock);
+	mtk_fd_free_dma_handle(fd);
+	rproc_put(fd->rproc_handle);
+
+	return 0;
+}
+
+static int mtk_fd_suspend(struct device *dev)
+{
+	struct mtk_fd_dev *fd = dev_get_drvdata(dev);
+
+	if (pm_runtime_suspended(dev))
+		return 0;
+
+	v4l2_m2m_suspend(fd->m2m_dev);
+
+	/* suspend FD HW */
+	writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
+	writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
+	clk_disable_unprepare(fd->fd_clk);
+	dev_dbg(dev, "%s:disable clock\n", __func__);
+
+	return 0;
+}
+
+static int mtk_fd_resume(struct device *dev)
+{
+	struct mtk_fd_dev *fd = dev_get_drvdata(dev);
+	int ret;
+
+	if (pm_runtime_suspended(dev))
+		return 0;
+
+	ret = clk_prepare_enable(fd->fd_clk);
+	if (ret < 0) {
+		dev_dbg(dev, "Failed to open fd clk:%d\n", ret);
+		return ret;
+	}
+
+	/* resume FD HW */
+	writel(MTK_FD_SET_HW_ENABLE, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
+	writel(0x1, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
+	dev_dbg(dev, "%s:enable clock\n", __func__);
+
+	v4l2_m2m_resume(fd->m2m_dev);
+
+	return 0;
+}
+
+static int mtk_fd_runtime_suspend(struct device *dev)
+{
+	struct mtk_fd_dev *fd = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(fd->fd_clk);
+	return 0;
+}
+
+static int mtk_fd_runtime_resume(struct device *dev)
+{
+	struct mtk_fd_dev *fd = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(fd->fd_clk);
+	if (ret < 0) {
+		dev_err(dev, "Failed to open fd clk:%d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops mtk_fd_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mtk_fd_suspend, mtk_fd_resume)
+	SET_RUNTIME_PM_OPS(mtk_fd_runtime_suspend, mtk_fd_runtime_resume, NULL)
+};
+
+static const struct of_device_id mtk_fd_of_ids[] = {
+	{ .compatible = "mediatek,mt8183-fd", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mtk_fd_of_ids);
+
+static struct platform_driver mtk_fd_driver = {
+	.probe   = mtk_fd_probe,
+	.remove  = mtk_fd_remove,
+	.driver  = {
+		.name  = "mtk-fd-4.0",
+		.of_match_table = of_match_ptr(mtk_fd_of_ids),
+		.pm = &mtk_fd_pm_ops,
+	}
+};
+module_platform_driver(mtk_fd_driver);
+MODULE_AUTHOR("Jerry-ch Chen <jerry-ch.chen@mediatek.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Mediatek FD driver");
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 3dcfc6148f99..eae876ea6d0a 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -192,6 +192,10 @@  enum v4l2_colorfx {
  * We reserve 16 controls for this driver. */
 #define V4L2_CID_USER_IMX_BASE			(V4L2_CID_USER_BASE + 0x10b0)
 
+/* The base for the mediatek FD driver controls */
+/* We reserve 16 controls for this driver. */
+#define V4L2_CID_USER_MTK_FD_BASE		(V4L2_CID_USER_BASE + 0x10d0)
+
 /* MPEG-class control IDs */
 /* The MPEG controls are applicable to all codec controls
  * and the 'MPEG' part of the define is historical */
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index b5671ce2724f..203e49ebc9e8 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -721,6 +721,9 @@  struct v4l2_pix_format {
 #define V4L2_META_FMT_UVC         v4l2_fourcc('U', 'V', 'C', 'H') /* UVC Payload Header metadata */
 #define V4L2_META_FMT_D4XX        v4l2_fourcc('D', '4', 'X', 'X') /* D4XX Payload Header metadata */
 
+/* Vendor specific - Mediatek Face Detection meta buffer format for firmware */
+#define V4L2_META_FMT_MTFD_RESULT  v4l2_fourcc('M', 'T', 'f', 'd') /* FD meta capture buffer */
+
 /* priv field value to indicates that subsequent fields are valid. */
 #define V4L2_PIX_FMT_PRIV_MAGIC		0xfeedcafe