diff mbox series

[RFC,V2,4/4] platform: mtk-isp: Add Mediatek FD driver

Message ID 1562661672-22439-5-git-send-email-Jerry-Ch.chen@mediatek.com (mailing list archive)
State New, archived
Headers show
Series media: platform: Add support for Face Detection (FD) on mt8183 SoC | expand

Commit Message

Jerry-ch Chen July 9, 2019, 8:41 a.m. UTC
From: Jerry-ch Chen <jerry-ch.chen@mediatek.com>

This patch adds the driver of Face Detection (FD) unit in
Mediatek camera system, providing face detection function.

The mtk-isp directory will contain drivers for multiple IP
blocks found in Mediatek ISP system. It will include ISP Pass 1
driver (CAM), sensor interface driver, DIP driver and face
detection driver.

Signed-off-by: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
---
 drivers/media/platform/Makefile               |    2 +
 drivers/media/platform/mtk-isp/fd/Makefile    |    5 +
 drivers/media/platform/mtk-isp/fd/mtk_fd.h    |  157 +++
 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c | 1259 +++++++++++++++++++++++++
 include/uapi/linux/v4l2-controls.h            |    4 +
 5 files changed, 1427 insertions(+)
 create mode 100644 drivers/media/platform/mtk-isp/fd/Makefile
 create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd.h
 create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c

Comments

Enrico Weigelt, metux IT consult July 9, 2019, 10:56 a.m. UTC | #1
On 09.07.19 10:41, Jerry-ch Chen wrote:

Hi,


> diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> new file mode 100644
> index 0000000..289999b
> --- /dev/null
> +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> @@ -0,0 +1,157 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +//
> +// Copyright (c) 2018 MediaTek Inc.
> +
> +#ifndef __MTK_FD_HW_H__
> +#define __MTK_FD_HW_H__
> +
> +#include <linux/io.h>
> +#include <linux/types.h>
> +#include <linux/platform_device.h>
> +#include <media/v4l2-ctrls.h>
> +#include <media/v4l2-device.h>
> +#include <media/videobuf2-v4l2.h>
> +
> +#define MTK_FD_OUTPUT_MIN_WIDTH			26U
> +#define MTK_FD_OUTPUT_MIN_HEIGHT		26U
> +#define MTK_FD_OUTPUT_MAX_WIDTH			640U
> +#define MTK_FD_OUTPUT_MAX_HEIGHT		480U
> +
> +/* Control the user defined image widths and heights
> + * to be scaled and performed face detection in FD HW.
> + * MTK FD support up to 14 user defined image sizes to perform face detection.
> + */
> +#define V4L2_CID_MTK_FD_SCALE_IMG_WIDTH		(V4L2_CID_USER_MTK_FD_BASE + 1)
> +#define V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT	(V4L2_CID_USER_MTK_FD_BASE + 2)

I've got a *really* bad feeling about introducing chip specific
uapi stuff. (by the way: uapi stuff belongs into include/uapi/...)

Maybe you could tell us what that's *really* about, so we can find some
standard / chip-independent api for these things. That's one of the
major point of the kernel: hardware abstraction.

> +#define ENABLE_FD				0x111
> +#define FD_HW_ENABLE				0x4
> +#define FD_INT_EN				0x15c
> +#define FD_INT					0x168
> +#define FD_RESULT				0x178
> +#define FD_IRQ_MASK				0x001
> +
> +#define RS_MAX_BUF_SIZE				2288788
> +#define FD_MAX_SPEEDUP				7
> +#define FD_MAX_POSE_VAL				0xfffffffffffffff
> +#define FD_DEF_POSE_VAL				0x3ff
> +#define MAX_FD_SEL_NUM				1026

If that file is supposed to be included by anything beyond the driver
itself, we need proper prefixing. (same for anything else in here)

> diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
> index 3dcfc61..eae876e 100644
> --- a/include/uapi/linux/v4l2-controls.h
> +++ b/include/uapi/linux/v4l2-controls.h
> @@ -192,6 +192,10 @@ enum v4l2_colorfx {
>   * We reserve 16 controls for this driver. */
>  #define V4L2_CID_USER_IMX_BASE			(V4L2_CID_USER_BASE + 0x10b0)
>  
> +/* The base for the mediatek FD driver controls */
> +/* We reserve 16 controls for this driver. */
> +#define V4L2_CID_USER_MTK_FD_BASE		(V4L2_CID_USER_BASE + 0x10d0)

Why only the base, but not the actual IDs in uapi ?


--mtx
Jerry-ch Chen July 29, 2019, 6:01 a.m. UTC | #2
Hi Enrico, 

On Tue, 2019-07-09 at 18:56 +0800, Enrico Weigelt, metux IT consult
wrote:
> On 09.07.19 10:41, Jerry-ch Chen wrote:
> 
> Hi,
> 
> 
> > diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > new file mode 100644
> > index 0000000..289999b
> > --- /dev/null
> > +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > @@ -0,0 +1,157 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +//
> > +// Copyright (c) 2018 MediaTek Inc.
> > +
> > +#ifndef __MTK_FD_HW_H__
> > +#define __MTK_FD_HW_H__
> > +
> > +#include <linux/io.h>
> > +#include <linux/types.h>
> > +#include <linux/platform_device.h>
> > +#include <media/v4l2-ctrls.h>
> > +#include <media/v4l2-device.h>
> > +#include <media/videobuf2-v4l2.h>
> > +
> > +#define MTK_FD_OUTPUT_MIN_WIDTH			26U
> > +#define MTK_FD_OUTPUT_MIN_HEIGHT		26U
> > +#define MTK_FD_OUTPUT_MAX_WIDTH			640U
> > +#define MTK_FD_OUTPUT_MAX_HEIGHT		480U
> > +
> > +/* Control the user defined image widths and heights
> > + * to be scaled and performed face detection in FD HW.
> > + * MTK FD support up to 14 user defined image sizes to perform face detection.
> > + */
> > +#define V4L2_CID_MTK_FD_SCALE_IMG_WIDTH		(V4L2_CID_USER_MTK_FD_BASE + 1)
> > +#define V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT	(V4L2_CID_USER_MTK_FD_BASE + 2)
> 
> I've got a *really* bad feeling about introducing chip specific
> uapi stuff. (by the way: uapi stuff belongs into include/uapi/...)
> 
Thanks for your comments,

If we remain chip-specific control IDs, I will move the uapi stuff into
inlcude/uapi/mtk_fd.h (filename TBD)

> Maybe you could tell us what that's *really* about, so we can find some
> standard / chip-independent api for these things. That's one of the
> major point of the kernel: hardware abstraction.
> 
I am not sure if it is possible for us to add some standard
v4l2-controls for face detection, a further explanations of controls are
listed below.

In v4l2-controls, there exists V4L2_CID_DETECT_CLASS, but I haven't
found the standards or api that can be used for face detection yet.
https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/v4l2-controls.h#L1092

For detecting certain face angle and head direction, we would need
V4L2_CID_DETECT_ANGLE, V4L2_CID_DETECT_DIRECTION controls for user to
specify the angle and direction to be detected.
In MTK FD driver, we support the following angles and directions to be
selected by user, and they are both multiple selected .
FD_angle_table[] = {-90, -45, 0 , 45, 90}
FD_direction_table[] = {0, 30, 60, 90, 120, 150, ..., 330}

Assuming these v4l2-controls are array of V4L2_CTRL_TYPE_U16 with
dimension 5 and 12.
User can select the desired angle and directions to be detected into
arrays and bring it to driver by these controls, however, the more they
select, the longer execution time needed by HW.

For detecting different sizes of faces and increase the detection speed,
FD driver might need to scales down the input image into different
smaller sizes, besides driver default values, user or proprietary
algorithm library can manually set the desired image sizes, therefore,
we would need the following controls:
V4L2_CID_DETECT_SCALE_DOWN_IMG_WIDTH and
V4L2_CID_DETECT_SCALE_DOWN_IMG_HEIGHT.
In MTK FD driver, we implement these controls as array of
V4L2_CTRL_TYPE_U16 with the dimension 15.

For controlling detection speed, we would need the
V4L2_CID_DETECT_SPEED, the faster speedup implies the lower accuracy of
detection, In MTK FD driver, the max level of speedup is 7, and default
value is 0.

For MTK FD algorithm user library, they would need select extra
detection features(models) used in HW, we need
V4L2_CID_MTK_FD_EXTRA_MODEL, this will be set to 1 for using extra
model. However, we are considering make this control more
chip-independent and can be added into standard.
for example, V4L2_CID_DETECTION_FD_MODEL or ...FD_ALGO,
drivers can define the detection algorithm or detection model to be used
for users to select. How do you think? 

In short, I summery the control IDs as following:
V4L2_CID_DETECT_ANGLE: set the angle of face in degrees. 90 ~ -90
degrees.
V4L2_CID_DETECT_DIRECTION: set the rotation of the head in degrees.
0~330 degrees.
V4L2_CID_DETECT_SCALE_DOWN_IMG_WIDTH: set the image widths for an input
image to be scaled down for face detection
V4L2_CID_DETECT_SCALE_DOWN_IMG_HEIGHT: set the image heights for an
input image to be scaled down for face detection
V4L2_CID_DETECT_SPEED: set the detection speed, usually reducing
accuracy.
V4L2_CID_DETECTION_FD_MODEL: select the detection model or algorithm to
be used by face detection driver.

> > +#define ENABLE_FD				0x111
> > +#define FD_HW_ENABLE				0x4
> > +#define FD_INT_EN				0x15c
> > +#define FD_INT					0x168
> > +#define FD_RESULT				0x178
> > +#define FD_IRQ_MASK				0x001
> > +
> > +#define RS_MAX_BUF_SIZE				2288788
> > +#define FD_MAX_SPEEDUP				7
> > +#define FD_MAX_POSE_VAL				0xfffffffffffffff
> > +#define FD_DEF_POSE_VAL				0x3ff
> > +#define MAX_FD_SEL_NUM				1026
> 
> If that file is supposed to be included by anything beyond the driver
> itself, we need proper prefixing. (same for anything else in here)
> 
I will fix it as following:

#define FD_ENABLE    0x111

#define FD_REG_OFFSET_HW_ENABLE  0x4
#define FD_REG_OFFSET_INT_EN     0x15c
#define FD_REG_OFFSET_INT_VAL    0x168
#define FD_REG_OFFSET_RESULT     0x178

#define FD_IRQ_MASK         1
#define FD_MAX_RS_BUF_SIZE  2288788
#define FD_MAX_SPEEDUP      7
#define FD_MAX_RESULT_NUM   1026

> > diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
> > index 3dcfc61..eae876e 100644
> > --- a/include/uapi/linux/v4l2-controls.h
> > +++ b/include/uapi/linux/v4l2-controls.h
> > @@ -192,6 +192,10 @@ enum v4l2_colorfx {
> >   * We reserve 16 controls for this driver. */
> >  #define V4L2_CID_USER_IMX_BASE			(V4L2_CID_USER_BASE + 0x10b0)
> >  
> > +/* The base for the mediatek FD driver controls */
> > +/* We reserve 16 controls for this driver. */
> > +#define V4L2_CID_USER_MTK_FD_BASE		(V4L2_CID_USER_BASE + 0x10d0)
> 
> Why only the base, but not the actual IDs in uapi ?
> 
I will put actual IDs in uapi/ for user to reference.

> 
> --mtx
> 

Best regards,
Jerry
Tomasz Figa July 29, 2019, 9:57 a.m. UTC | #3
On Mon, Jul 29, 2019 at 3:01 PM Jerry-ch Chen
<Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Enrico,
>
> On Tue, 2019-07-09 at 18:56 +0800, Enrico Weigelt, metux IT consult
> wrote:
> > On 09.07.19 10:41, Jerry-ch Chen wrote:
> >
> > Hi,
> >
> >
> > > diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > > new file mode 100644
> > > index 0000000..289999b
> > > --- /dev/null
> > > +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > > @@ -0,0 +1,157 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +//
> > > +// Copyright (c) 2018 MediaTek Inc.
> > > +
> > > +#ifndef __MTK_FD_HW_H__
> > > +#define __MTK_FD_HW_H__
> > > +
> > > +#include <linux/io.h>
> > > +#include <linux/types.h>
> > > +#include <linux/platform_device.h>
> > > +#include <media/v4l2-ctrls.h>
> > > +#include <media/v4l2-device.h>
> > > +#include <media/videobuf2-v4l2.h>
> > > +
> > > +#define MTK_FD_OUTPUT_MIN_WIDTH                    26U
> > > +#define MTK_FD_OUTPUT_MIN_HEIGHT           26U
> > > +#define MTK_FD_OUTPUT_MAX_WIDTH                    640U
> > > +#define MTK_FD_OUTPUT_MAX_HEIGHT           480U
> > > +
> > > +/* Control the user defined image widths and heights
> > > + * to be scaled and performed face detection in FD HW.
> > > + * MTK FD support up to 14 user defined image sizes to perform face detection.
> > > + */
> > > +#define V4L2_CID_MTK_FD_SCALE_IMG_WIDTH            (V4L2_CID_USER_MTK_FD_BASE + 1)
> > > +#define V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT   (V4L2_CID_USER_MTK_FD_BASE + 2)
> >
> > I've got a *really* bad feeling about introducing chip specific
> > uapi stuff. (by the way: uapi stuff belongs into include/uapi/...)
> >
> Thanks for your comments,
>
> If we remain chip-specific control IDs, I will move the uapi stuff into
> inlcude/uapi/mtk_fd.h (filename TBD)
>
> > Maybe you could tell us what that's *really* about, so we can find some
> > standard / chip-independent api for these things. That's one of the
> > major point of the kernel: hardware abstraction.
> >
> I am not sure if it is possible for us to add some standard
> v4l2-controls for face detection, a further explanations of controls are
> listed below.
>
> In v4l2-controls, there exists V4L2_CID_DETECT_CLASS, but I haven't
> found the standards or api that can be used for face detection yet.
> https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/v4l2-controls.h#L1092
>
> For detecting certain face angle and head direction, we would need
> V4L2_CID_DETECT_ANGLE, V4L2_CID_DETECT_DIRECTION controls for user to
> specify the angle and direction to be detected.
> In MTK FD driver, we support the following angles and directions to be
> selected by user, and they are both multiple selected .
> FD_angle_table[] = {-90, -45, 0 , 45, 90}
> FD_direction_table[] = {0, 30, 60, 90, 120, 150, ..., 330}
>
> Assuming these v4l2-controls are array of V4L2_CTRL_TYPE_U16 with
> dimension 5 and 12.
> User can select the desired angle and directions to be detected into
> arrays and bring it to driver by these controls, however, the more they
> select, the longer execution time needed by HW.
>

Sounds like we need some kind of a menu bitmask control here, but I
don't see V4L2 having anything like that.

Hans, Sakari, any ideas?

> For detecting different sizes of faces and increase the detection speed,
> FD driver might need to scales down the input image into different
> smaller sizes

Do you mean the FD hardware would do the scaling or the driver code
itself? It would be undesirable to do such scaling in a kernel driver,
so if that's not something handled by the hardware, the downscaled
image might need to be provided from the userspace.

>, besides driver default values, user or proprietary
> algorithm library can manually set the desired image sizes, therefore,
> we would need the following controls:
> V4L2_CID_DETECT_SCALE_DOWN_IMG_WIDTH and
> V4L2_CID_DETECT_SCALE_DOWN_IMG_HEIGHT.
> In MTK FD driver, we implement these controls as array of
> V4L2_CTRL_TYPE_U16 with the dimension 15.

Why 15?

>
> For controlling detection speed, we would need the
> V4L2_CID_DETECT_SPEED, the faster speedup implies the lower accuracy of
> detection, In MTK FD driver, the max level of speedup is 7, and default
> value is 0.
>
> For MTK FD algorithm user library, they would need select extra
> detection features(models) used in HW, we need
> V4L2_CID_MTK_FD_EXTRA_MODEL, this will be set to 1 for using extra
> model. However, we are considering make this control more
> chip-independent and can be added into standard.
> for example, V4L2_CID_DETECTION_FD_MODEL or ...FD_ALGO,
> drivers can define the detection algorithm or detection model to be used
> for users to select. How do you think?

Sounds like something that could be a menu control, so it could vary
between drivers.

>
> In short, I summery the control IDs as following:
> V4L2_CID_DETECT_ANGLE: set the angle of face in degrees. 90 ~ -90
> degrees.
> V4L2_CID_DETECT_DIRECTION: set the rotation of the head in degrees.
> 0~330 degrees.
> V4L2_CID_DETECT_SCALE_DOWN_IMG_WIDTH: set the image widths for an input
> image to be scaled down for face detection
> V4L2_CID_DETECT_SCALE_DOWN_IMG_HEIGHT: set the image heights for an
> input image to be scaled down for face detection
> V4L2_CID_DETECT_SPEED: set the detection speed, usually reducing
> accuracy.
> V4L2_CID_DETECTION_FD_MODEL: select the detection model or algorithm to
> be used by face detection driver.
>
> > > +#define ENABLE_FD                          0x111
> > > +#define FD_HW_ENABLE                               0x4
> > > +#define FD_INT_EN                          0x15c
> > > +#define FD_INT                                     0x168
> > > +#define FD_RESULT                          0x178
> > > +#define FD_IRQ_MASK                                0x001
> > > +
> > > +#define RS_MAX_BUF_SIZE                            2288788
> > > +#define FD_MAX_SPEEDUP                             7
> > > +#define FD_MAX_POSE_VAL                            0xfffffffffffffff
> > > +#define FD_DEF_POSE_VAL                            0x3ff
> > > +#define MAX_FD_SEL_NUM                             1026
> >
> > If that file is supposed to be included by anything beyond the driver
> > itself, we need proper prefixing. (same for anything else in here)
> >
> I will fix it as following:
>
> #define FD_ENABLE    0x111
>
> #define FD_REG_OFFSET_HW_ENABLE  0x4
> #define FD_REG_OFFSET_INT_EN     0x15c
> #define FD_REG_OFFSET_INT_VAL    0x168
> #define FD_REG_OFFSET_RESULT     0x178
>
> #define FD_IRQ_MASK         1
> #define FD_MAX_RS_BUF_SIZE  2288788
> #define FD_MAX_SPEEDUP      7
> #define FD_MAX_RESULT_NUM   1026
>

I'd suggest the MTK_FD_ prefix.

> > > diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
> > > index 3dcfc61..eae876e 100644
> > > --- a/include/uapi/linux/v4l2-controls.h
> > > +++ b/include/uapi/linux/v4l2-controls.h
> > > @@ -192,6 +192,10 @@ enum v4l2_colorfx {
> > >   * We reserve 16 controls for this driver. */
> > >  #define V4L2_CID_USER_IMX_BASE                     (V4L2_CID_USER_BASE + 0x10b0)
> > >
> > > +/* The base for the mediatek FD driver controls */
> > > +/* We reserve 16 controls for this driver. */
> > > +#define V4L2_CID_USER_MTK_FD_BASE          (V4L2_CID_USER_BASE + 0x10d0)
> >
> > Why only the base, but not the actual IDs in uapi ?
> >
> I will put actual IDs in uapi/ for user to reference.
>
> >
> > --mtx
> >
>

Best regards,
Tomasz
Jerry-ch Chen July 29, 2019, 11:58 a.m. UTC | #4
Hi Tomasz,

On Mon, 2019-07-29 at 17:57 +0800, Tomasz Figa wrote:
> On Mon, Jul 29, 2019 at 3:01 PM Jerry-ch Chen
> <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Enrico,
> >
> > On Tue, 2019-07-09 at 18:56 +0800, Enrico Weigelt, metux IT consult
> > wrote:
> > > On 09.07.19 10:41, Jerry-ch Chen wrote:
> > >
> > > Hi,
> > >
> > >
> > > > diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > > > new file mode 100644
> > > > index 0000000..289999b
> > > > --- /dev/null
> > > > +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > > > @@ -0,0 +1,157 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > +//
> > > > +// Copyright (c) 2018 MediaTek Inc.
> > > > +
> > > > +#ifndef __MTK_FD_HW_H__
> > > > +#define __MTK_FD_HW_H__
> > > > +
> > > > +#include <linux/io.h>
> > > > +#include <linux/types.h>
> > > > +#include <linux/platform_device.h>
> > > > +#include <media/v4l2-ctrls.h>
> > > > +#include <media/v4l2-device.h>
> > > > +#include <media/videobuf2-v4l2.h>
> > > > +
> > > > +#define MTK_FD_OUTPUT_MIN_WIDTH                    26U
> > > > +#define MTK_FD_OUTPUT_MIN_HEIGHT           26U
> > > > +#define MTK_FD_OUTPUT_MAX_WIDTH                    640U
> > > > +#define MTK_FD_OUTPUT_MAX_HEIGHT           480U
> > > > +
> > > > +/* Control the user defined image widths and heights
> > > > + * to be scaled and performed face detection in FD HW.
> > > > + * MTK FD support up to 14 user defined image sizes to perform face detection.
> > > > + */
> > > > +#define V4L2_CID_MTK_FD_SCALE_IMG_WIDTH            (V4L2_CID_USER_MTK_FD_BASE + 1)
> > > > +#define V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT   (V4L2_CID_USER_MTK_FD_BASE + 2)
> > >
> > > I've got a *really* bad feeling about introducing chip specific
> > > uapi stuff. (by the way: uapi stuff belongs into include/uapi/...)
> > >
> > Thanks for your comments,
> >
> > If we remain chip-specific control IDs, I will move the uapi stuff into
> > inlcude/uapi/mtk_fd.h (filename TBD)
> >
> > > Maybe you could tell us what that's *really* about, so we can find some
> > > standard / chip-independent api for these things. That's one of the
> > > major point of the kernel: hardware abstraction.
> > >
> > I am not sure if it is possible for us to add some standard
> > v4l2-controls for face detection, a further explanations of controls are
> > listed below.
> >
> > In v4l2-controls, there exists V4L2_CID_DETECT_CLASS, but I haven't
> > found the standards or api that can be used for face detection yet.
> > https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/v4l2-controls.h#L1092
> >
> > For detecting certain face angle and head direction, we would need
> > V4L2_CID_DETECT_ANGLE, V4L2_CID_DETECT_DIRECTION controls for user to
> > specify the angle and direction to be detected.
> > In MTK FD driver, we support the following angles and directions to be
> > selected by user, and they are both multiple selected .
> > FD_angle_table[] = {-90, -45, 0 , 45, 90}
> > FD_direction_table[] = {0, 30, 60, 90, 120, 150, ..., 330}
> >
> > Assuming these v4l2-controls are array of V4L2_CTRL_TYPE_U16 with
> > dimension 5 and 12.
> > User can select the desired angle and directions to be detected into
> > arrays and bring it to driver by these controls, however, the more they
> > select, the longer execution time needed by HW.
> >
> 
> Sounds like we need some kind of a menu bitmask control here, but I
> don't see V4L2 having anything like that.
> 
> Hans, Sakari, any ideas?
> 
> > For detecting different sizes of faces and increase the detection speed,
> > FD driver might need to scales down the input image into different
> > smaller sizes
> 
> Do you mean the FD hardware would do the scaling or the driver code
> itself? It would be undesirable to do such scaling in a kernel driver,
> so if that's not something handled by the hardware, the downscaled
> image might need to be provided from the userspace.
> 
Thanks for your comments.

Yes, FD hardware will do the scaling itself, so driver could set the
sizes.

> >, besides driver default values, user or proprietary
> > algorithm library can manually set the desired image sizes, therefore,
> > we would need the following controls:
> > V4L2_CID_DETECT_SCALE_DOWN_IMG_WIDTH and
> > V4L2_CID_DETECT_SCALE_DOWN_IMG_HEIGHT.
> > In MTK FD driver, we implement these controls as array of
> > V4L2_CTRL_TYPE_U16 with the dimension 15.
> 
> Why 15?
> 
It consists of one input image size and 14 down-scaled image sizes,
the amount 15 (or say 14) is defined by the MTK FD algo library,
therefore I remain the number of 15 here for communicate with the
library.
Maybe it should be defined as following?
MTK_FD_MAX_SCALE_SIZE_NUM               14
and 
MTK_FD_SCALE_ARR_NUM			15

> >
> > For controlling detection speed, we would need the
> > V4L2_CID_DETECT_SPEED, the faster speedup implies the lower accuracy of
> > detection, In MTK FD driver, the max level of speedup is 7, and default
> > value is 0.
> >
> > For MTK FD algorithm user library, they would need select extra
> > detection features(models) used in HW, we need
> > V4L2_CID_MTK_FD_EXTRA_MODEL, this will be set to 1 for using extra
> > model. However, we are considering make this control more
> > chip-independent and can be added into standard.
> > for example, V4L2_CID_DETECTION_FD_MODEL or ...FD_ALGO,
> > drivers can define the detection algorithm or detection model to be used
> > for users to select. How do you think?
> 
> Sounds like something that could be a menu control, so it could vary
> between drivers.
> 
Ok, and maybe it should be created by v4l2_ctrl_new_int_menu(...)?

> >
> > In short, I summery the control IDs as following:
> > V4L2_CID_DETECT_ANGLE: set the angle of face in degrees. 90 ~ -90
> > degrees.
> > V4L2_CID_DETECT_DIRECTION: set the rotation of the head in degrees.
> > 0~330 degrees.
> > V4L2_CID_DETECT_SCALE_DOWN_IMG_WIDTH: set the image widths for an input
> > image to be scaled down for face detection
> > V4L2_CID_DETECT_SCALE_DOWN_IMG_HEIGHT: set the image heights for an
> > input image to be scaled down for face detection
> > V4L2_CID_DETECT_SPEED: set the detection speed, usually reducing
> > accuracy.
> > V4L2_CID_DETECTION_FD_MODEL: select the detection model or algorithm to
> > be used by face detection driver.
> >
> > > > +#define ENABLE_FD                          0x111
> > > > +#define FD_HW_ENABLE                               0x4
> > > > +#define FD_INT_EN                          0x15c
> > > > +#define FD_INT                                     0x168
> > > > +#define FD_RESULT                          0x178
> > > > +#define FD_IRQ_MASK                                0x001
> > > > +
> > > > +#define RS_MAX_BUF_SIZE                            2288788
> > > > +#define FD_MAX_SPEEDUP                             7
> > > > +#define FD_MAX_POSE_VAL                            0xfffffffffffffff
> > > > +#define FD_DEF_POSE_VAL                            0x3ff
> > > > +#define MAX_FD_SEL_NUM                             1026
> > >
> > > If that file is supposed to be included by anything beyond the driver
> > > itself, we need proper prefixing. (same for anything else in here)
> > >
> > I will fix it as following:
> >
> > #define FD_ENABLE    0x111
> >
> > #define FD_REG_OFFSET_HW_ENABLE  0x4
> > #define FD_REG_OFFSET_INT_EN     0x15c
> > #define FD_REG_OFFSET_INT_VAL    0x168
> > #define FD_REG_OFFSET_RESULT     0x178
> >
> > #define FD_IRQ_MASK         1
> > #define FD_MAX_RS_BUF_SIZE  2288788
> > #define FD_MAX_SPEEDUP      7
> > #define FD_MAX_RESULT_NUM   1026
> >
> 
> I'd suggest the MTK_FD_ prefix.
> 
Ok, I will use MTK_FD_ prefix.

> > > > diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
> > > > index 3dcfc61..eae876e 100644
> > > > --- a/include/uapi/linux/v4l2-controls.h
> > > > +++ b/include/uapi/linux/v4l2-controls.h
> > > > @@ -192,6 +192,10 @@ enum v4l2_colorfx {
> > > >   * We reserve 16 controls for this driver. */
> > > >  #define V4L2_CID_USER_IMX_BASE                     (V4L2_CID_USER_BASE + 0x10b0)
> > > >
> > > > +/* The base for the mediatek FD driver controls */
> > > > +/* We reserve 16 controls for this driver. */
> > > > +#define V4L2_CID_USER_MTK_FD_BASE          (V4L2_CID_USER_BASE + 0x10d0)
> > >
> > > Why only the base, but not the actual IDs in uapi ?
> > >
> > I will put actual IDs in uapi/ for user to reference.
> >
> > >
> > > --mtx
> > >
> >
> 
> Best regards,
> Tomasz

Best regards,
Jerry
Tomasz Figa Aug. 2, 2019, 8:28 a.m. UTC | #5
Hi Jerry,

On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> From: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> 
> This patch adds the driver of Face Detection (FD) unit in
> Mediatek camera system, providing face detection function.
> 
> The mtk-isp directory will contain drivers for multiple IP
> blocks found in Mediatek ISP system. It will include ISP Pass 1
> driver (CAM), sensor interface driver, DIP driver and face
> detection driver.
> 
> Signed-off-by: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> ---
>  drivers/media/platform/Makefile               |    2 +
>  drivers/media/platform/mtk-isp/fd/Makefile    |    5 +
>  drivers/media/platform/mtk-isp/fd/mtk_fd.h    |  157 +++
>  drivers/media/platform/mtk-isp/fd/mtk_fd_40.c | 1259 +++++++++++++++++++++++++
>  include/uapi/linux/v4l2-controls.h            |    4 +
>  5 files changed, 1427 insertions(+)
>  create mode 100644 drivers/media/platform/mtk-isp/fd/Makefile
>  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd.h
>  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
>

Thanks for the patch! I finally got a chance to fully review the code. Sorry
for the delay. Please check my comments inline.

> diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
> index e6deb25..8b817cc 100644
> --- a/drivers/media/platform/Makefile
> +++ b/drivers/media/platform/Makefile
> @@ -94,6 +94,8 @@ obj-$(CONFIG_VIDEO_MEDIATEK_MDP)	+= mtk-mdp/
>  
>  obj-$(CONFIG_VIDEO_MEDIATEK_JPEG)	+= mtk-jpeg/
>  
> +obj-$(CONFIG_VIDEO_MEDIATEK_FD)		+= mtk-isp/fd/
> +
>  obj-$(CONFIG_VIDEO_QCOM_CAMSS)		+= qcom/camss/
>  
>  obj-$(CONFIG_VIDEO_QCOM_VENUS)		+= qcom/venus/
> diff --git a/drivers/media/platform/mtk-isp/fd/Makefile b/drivers/media/platform/mtk-isp/fd/Makefile
> new file mode 100644
> index 0000000..9b1c501
> --- /dev/null
> +++ b/drivers/media/platform/mtk-isp/fd/Makefile
> @@ -0,0 +1,5 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +mtk-fd-objs += mtk_fd_40.o
> +
> +obj-$(CONFIG_VIDEO_MEDIATEK_FD) += mtk-fd.o
> \ No newline at end of file
> diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> new file mode 100644
> index 0000000..289999b
> --- /dev/null
> +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> @@ -0,0 +1,157 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +//
> +// Copyright (c) 2018 MediaTek Inc.
> +
> +#ifndef __MTK_FD_HW_H__
> +#define __MTK_FD_HW_H__
> +
> +#include <linux/io.h>
> +#include <linux/types.h>
> +#include <linux/platform_device.h>
> +#include <media/v4l2-ctrls.h>
> +#include <media/v4l2-device.h>
> +#include <media/videobuf2-v4l2.h>
> +
> +#define MTK_FD_OUTPUT_MIN_WIDTH			26U
> +#define MTK_FD_OUTPUT_MIN_HEIGHT		26U
> +#define MTK_FD_OUTPUT_MAX_WIDTH			640U
> +#define MTK_FD_OUTPUT_MAX_HEIGHT		480U
> +
> +/* Control the user defined image widths and heights
> + * to be scaled and performed face detection in FD HW.
> + * MTK FD support up to 14 user defined image sizes to perform face detection.
> + */
> +#define V4L2_CID_MTK_FD_SCALE_IMG_WIDTH		(V4L2_CID_USER_MTK_FD_BASE + 1)
> +#define V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT	(V4L2_CID_USER_MTK_FD_BASE + 2)
> +
> +/* Control the numbers of user defined image sizes.
> + * The default value is 0 which means user is not going
> + * to define the specific image sizes.
> + */
> +#define V4L2_CID_MTK_FD_SCALE_IMG_NUM		(V4L2_CID_USER_MTK_FD_BASE + 3)
> +
> +/* Control the Face Pose to be detected.
> + * Here describe the value as following:
> + * {0, detect the front face with rotation from 0 to 270 degrees},
> + * {1, detect the front face with rotation from 0 to 240 and 300 degrees},
> + * {2, detect the front face with rotation from 0 to 240 and 330 degrees},
> + * {3, detect the front face with rotation from 0 to 240 and left side face}.
> + */
> +#define V4L2_CID_MTK_FD_DETECT_POSE		(V4L2_CID_USER_MTK_FD_BASE + 4)
> +#define V4L2_CID_MTK_FD_DETECT_SPEEDUP		(V4L2_CID_USER_MTK_FD_BASE + 5)
> +#define V4L2_CID_MTK_FD_EXTRA_MODEL		(V4L2_CID_USER_MTK_FD_BASE + 6)
> +
> +/* We reserve 16 controls for this driver. */
> +#define V4L2_CID_MTK_FD_MAX			16
> +
> +#define ENABLE_FD				0x111
> +#define FD_HW_ENABLE				0x4
> +#define FD_INT_EN				0x15c
> +#define FD_INT					0x168
> +#define FD_RESULT				0x178
> +#define FD_IRQ_MASK				0x001
> +
> +#define RS_MAX_BUF_SIZE				2288788
> +#define FD_MAX_SPEEDUP				7
> +#define FD_MAX_POSE_VAL				0xfffffffffffffff
> +#define FD_DEF_POSE_VAL				0x3ff
> +#define MAX_FD_SEL_NUM				1026
> +
> +/* The max. number of face sizes could be detected, for feature scaling */
> +#define FACE_SIZE_NUM_MAX			14
> +/* FACE_SIZE_NUM_MAX + 1, first scale for input image W/H */
> +#define FD_SCALE_NUM				15
> +
> +enum fd_state {
> +	FD_ENQ,
> +	FD_CBD,
> +};
> +
> +enum fd_img_format {
> +	FMT_VYUY = 2,
> +	FMT_UYVY,
> +	FMT_YVYU,
> +	FMT_YUYV,
> +	FMT_UNKNOWN
> +};

Please use #define macros for hardware/firmware interface definitions.

> +
> +struct fd_buffer {
> +	__u32 scp_addr;	/* used by SCP */
> +	__u32 dma_addr;	/* used by DMA HW */
> +} __packed;
> +
> +enum fd_scp_cmd {
> +	FD_CMD_INIT,
> +	FD_CMD_ENQUEUE,
> +};

Ditto.

> +
> +struct fd_user_output {
> +	__u64 results[MAX_FD_SEL_NUM];
> +	__u16 number;
> +};
> +
> +struct user_param {
> +	u8 fd_pose;
> +	u8 fd_speedup;
> +	u8 fd_extra_model;
> +	u8 scale_img_num;
> +	u8 src_img_fmt;
> +	__u16 scale_img_width[FD_SCALE_NUM];
> +	__u16 scale_img_height[FD_SCALE_NUM];
> +} __packed;
> +
> +struct fd_hw_param {
> +	struct fd_buffer src_img;
> +	struct fd_buffer user_result;
> +	struct user_param user_param;
> +} __packed;
> +
> +struct cmd_init_info {
> +	struct fd_buffer fd_manager;
> +	__u32 rs_dma_addr;
> +} __packed;
> +
> +struct ipi_message {
> +	u8 cmd_id;
> +	union {
> +		struct cmd_init_info init_param;
> +		struct fd_hw_param hw_param;
> +	};
> +} __packed;
> +
> +struct mtk_fd_hw {
> +	struct clk *fd_clk;
> +	struct rproc *rproc_handle;
> +	struct platform_device *scp_pdev;
> +	struct fd_buffer scp_mem;
> +	wait_queue_head_t wq;
> +	void __iomem *fd_base;
> +	atomic_t fd_user_cnt;
> +	enum fd_state state;
> +	u32 fd_irq_result;
> +	/* Ensure only one job in hw */
> +	struct mutex fd_hw_lock;
> +};
> +
> +struct mtk_fd_dev {
> +	struct platform_device *pdev;
> +	struct v4l2_device v4l2_dev;
> +	struct v4l2_m2m_dev *m2m_dev;
> +	struct media_device mdev;
> +	struct video_device vfd;
> +	struct mtk_fd_hw fd_hw;

Could we just put the contents of that struct directly here? That should
simplify dereference chains in the code.

> +	/* Lock for V4L2 operations */
> +	struct mutex vfd_lock;
> +};
> +
> +struct mtk_fd_ctx {
> +	struct mtk_fd_dev *fd_dev;
> +	struct device *dev;
> +	struct v4l2_fh fh;
> +	struct v4l2_ctrl_handler hdl;
> +	struct v4l2_pix_format_mplane src_fmt;
> +	struct v4l2_meta_format dst_fmt;
> +	struct user_param user_param;
> +};
> +
> +#endif/*__MTK_FD_HW_H__*/
> diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c b/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> new file mode 100644
> index 0000000..246d3aa
> --- /dev/null
> +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> @@ -0,0 +1,1259 @@
> +// SPDX-License-Identifier: GPL-2.0
> +//
> +// Copyright (c) 2018 MediaTek Inc.
> +
> +#include <linux/clk.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/interrupt.h>
> +#include <linux/jiffies.h>
> +#include <linux/module.h>
> +#include <linux/of_address.h>
> +#include <linux/of_irq.h>
> +#include <linux/of_platform.h>
> +#include <linux/platform_data/mtk_scp.h>
> +#include <linux/platform_device.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/remoteproc.h>
> +#include <media/videobuf2-dma-contig.h>
> +#include <media/videobuf2-v4l2.h>
> +#include <linux/wait.h>
> +#include <media/v4l2-ioctl.h>
> +#include <media/v4l2-event.h>
> +#include <media/v4l2-ctrls.h>
> +#include <media/v4l2-mem2mem.h>
> +#include <media/videobuf2-core.h>
> +
> +#include "mtk_fd.h"
> +
> +static struct v4l2_meta_format fw_param_fmts[] = {

const?

Also, isn't this the buffer with the detected faces rather than params?

> +	{
> +		.dataformat = V4L2_META_FMT_MTISP_PARAMS,

This should use its own format.

> +		.buffersize = 1024 * 30,

Please define a C struct for this buffer and use sizeof() here.

> +	},
> +};

Actually, is there a reason to have this array at all if there is only 1
format supported? I think we could just put the right constants directly in
the code.

> +
> +static const struct v4l2_pix_format_mplane in_img_fmts[] = {
> +	{
> +		.width = MTK_FD_OUTPUT_MAX_WIDTH,
> +		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
> +		.pixelformat = V4L2_PIX_FMT_VYUY,
> +		.colorspace = V4L2_COLORSPACE_BT2020,
> +		.field = V4L2_FIELD_NONE,
> +		.num_planes = 1,
> +	},
> +	{
> +		.width = MTK_FD_OUTPUT_MAX_WIDTH,
> +		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
> +		.pixelformat = V4L2_PIX_FMT_YUYV,
> +		.colorspace = V4L2_COLORSPACE_BT2020,
> +		.field = V4L2_FIELD_NONE,
> +		.num_planes = 1,
> +	},
> +	{
> +		.width = MTK_FD_OUTPUT_MAX_WIDTH,
> +		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
> +		.pixelformat = V4L2_PIX_FMT_YVYU,
> +		.colorspace = V4L2_COLORSPACE_BT2020,
> +		.field = V4L2_FIELD_NONE,
> +		.num_planes = 1,
> +	},
> +	{
> +		.width = MTK_FD_OUTPUT_MAX_WIDTH,
> +		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
> +		.pixelformat = V4L2_PIX_FMT_UYVY,
> +		.colorspace = V4L2_COLORSPACE_BT2020,
> +		.field = V4L2_FIELD_NONE,
> +		.num_planes = 1,

If all the formats have the same values for a field, there is no reason to
have the field initialized here. Just make initialize them to the constant
values inside TRY_FMT.

Hmm, are the interleaved YUV formats the only formats supported by this
hardware? That would be quite unfortunate given the formats supported by the
other IP blocks on this SoC use the more typical planar formats.

> +	},
> +};
> +
> +#define NUM_FORMATS ARRAY_SIZE(in_img_fmts)
> +
> +static inline struct mtk_fd_dev *mtk_fd_hw_to_dev(struct mtk_fd_hw *fd_hw)
> +{
> +	return container_of(fd_hw, struct mtk_fd_dev, fd_hw);
> +}
> +
> +static inline struct mtk_fd_ctx *fh_to_ctx(struct v4l2_fh *fh)
> +{
> +	return container_of(fh, struct mtk_fd_ctx, fh);
> +}
> +
> +static int mtk_fd_load_scp(struct mtk_fd_hw *fd_hw)
> +{
> +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> +	struct device *dev = &fd_dev->pdev->dev;
> +	phandle rproc_phandle;
> +	int ret;
> +
> +	/* init scp */
> +	fd_hw->scp_pdev = scp_get_pdev(fd_dev->pdev);
> +	if (!fd_hw->scp_pdev) {
> +		dev_err(dev, "Failed to get scp device\n");
> +		return -ENODEV;
> +	}
> +
> +	if (of_property_read_u32(fd_dev->pdev->dev.of_node, "mediatek,scp",
> +				 &rproc_phandle)) {
> +		dev_err(dev, "Could not get scp device\n");
> +		return -EINVAL;
> +	}
> +
> +	fd_hw->rproc_handle = rproc_get_by_phandle(rproc_phandle);
> +	if (!fd_hw->rproc_handle) {
> +		dev_err(dev, "Could not get FD's rproc_handle\n");
> +		return -EINVAL;
> +	}
> +
> +	ret = rproc_boot(fd_hw->rproc_handle);
> +	if (ret < 0) {
> +		/**
> +		 * Return 0 if downloading firmware successfully,
> +		 * otherwise it is failed
> +		 */
> +		dev_err(dev, "rproc_boot failed\n");
> +		return -ENODEV;
> +	}
> +
> +	return ret;
> +}
> +
> +static dma_addr_t mtk_fd_hw_alloc_rs_dma_addr(struct mtk_fd_hw *fd_hw)
> +{
> +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> +	struct device *dev = &fd_dev->pdev->dev;
> +	void *va;
> +	dma_addr_t dma_handle;
> +
> +	va = dma_alloc_coherent(dev, RS_MAX_BUF_SIZE, &dma_handle, GFP_KERNEL);

I see this allocated here, but I don't see this freed anywhere.

> +	if (!va) {
> +		dev_err(dev, "dma_alloc null va\n");
> +		return -ENOMEM;
> +	}
> +	memset(va, 0, RS_MAX_BUF_SIZE);
> +
> +	return dma_handle;
> +}
> +
> +static int mtk_fd_send_ipi_init(struct mtk_fd_hw *fd_hw)
> +{
> +	struct ipi_message fd_init_msg;
> +	dma_addr_t rs_dma_addr;
> +
> +	fd_init_msg.cmd_id = FD_CMD_INIT;
> +
> +	fd_init_msg.init_param.fd_manager.scp_addr = fd_hw->scp_mem.scp_addr;
> +	fd_init_msg.init_param.fd_manager.dma_addr = fd_hw->scp_mem.dma_addr;
> +
> +	rs_dma_addr = mtk_fd_hw_alloc_rs_dma_addr(fd_hw);
> +	if (!rs_dma_addr)
> +		return -ENOMEM;
> +
> +	fd_init_msg.init_param.rs_dma_addr = rs_dma_addr;
> +
> +	return scp_ipi_send(fd_hw->scp_pdev, SCP_IPI_FD_CMD, &fd_init_msg,
> +			    sizeof(fd_init_msg), 0);
> +}
> +
> +static int mtk_fd_hw_enable(struct mtk_fd_hw *fd_hw)
> +{
> +	int ret;
> +
> +	ret = mtk_fd_load_scp(fd_hw);
> +	if (ret)
> +		return ret;
> +
> +	ret = mtk_fd_send_ipi_init(fd_hw);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static int mtk_fd_hw_connect(struct mtk_fd_hw *fd_hw)
> +{
> +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> +	s32 usercount;
> +
> +	mutex_lock(&fd_hw->fd_hw_lock);

First of all, we don't need a mutex here, because all the V4L2 ioctls are
already running with the video device mutex.

> +	usercount = atomic_inc_return(&fd_hw->fd_user_cnt);

If we already use a mutex, there is no point in using atomic types.

> +	if (usercount == 1) {
> +		pm_runtime_get_sync(&fd_dev->pdev->dev);
> +		if (mtk_fd_hw_enable(fd_hw)) {
> +			pm_runtime_put_sync(&fd_dev->pdev->dev);
> +			atomic_dec_return(&fd_hw->fd_user_cnt);
> +			mutex_unlock(&fd_hw->fd_hw_lock);
> +			return -EINVAL;
> +		}
> +	}

This is a simple mem-to-mem device, so there is no reason to keep it active
all the time it's streaming. Please just get the runtime PM counter when
queuing a job to the hardware and release it when the job finishes.

I guess we might still want to do the costly operations like rproc_boot()
when we start streaming, though.

> +	mutex_unlock(&fd_hw->fd_hw_lock);
> +
> +	return 0;
> +}
> +
> +static int mtk_fd_wait_irq(struct mtk_fd_hw *fd_hw)
> +{
> +	int timeout;
> +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> +	struct device *dev = &fd_dev->pdev->dev;
> +
> +	timeout = wait_event_interruptible_timeout
> +		(fd_hw->wq, (fd_hw->fd_irq_result & FD_IRQ_MASK),
> +		 usecs_to_jiffies(1000000));
> +	if (!timeout) {
> +		dev_err(dev, "%s timeout, %d\n", __func__,
> +			fd_hw->fd_irq_result);
> +		return -EAGAIN;
> +	} else if (!(fd_hw->fd_irq_result & FD_IRQ_MASK)) {
> +		dev_err(dev, "%s No IRQ mask:0x%8x\n",
> +			__func__, fd_hw->fd_irq_result);
> +		return -EINVAL;
> +	}
> +	fd_hw->fd_irq_result = 0;
> +
> +	return 0;
> +}
> +
> +static void mtk_fd_hw_disconnect(struct mtk_fd_hw *fd_hw)
> +{
> +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> +	s32 usercount;
> +
> +	mutex_lock(&fd_hw->fd_hw_lock);
> +	atomic_dec_return(&fd_hw->fd_user_cnt);
> +	usercount = atomic_read(&fd_hw->fd_user_cnt);
> +	if (usercount == 0) {
> +		if (fd_hw->state == FD_ENQ)
> +			mtk_fd_wait_irq(fd_hw);

This shouldn't be possible to happen as the queues should be already stopped
at this point.

> +
> +		pm_runtime_put_sync(&fd_dev->pdev->dev);

Any reason to use pm_runtime_put_sync() over pm_runtime_put()?

> +		rproc_shutdown(fd_hw->rproc_handle);
> +		rproc_put(fd_hw->rproc_handle);
> +	}
> +	mutex_unlock(&fd_hw->fd_hw_lock);
> +}
> +
> +static void mtk_fd_hw_job_finish(struct mtk_fd_hw *fd_hw,
> +				 struct fd_hw_param *fd_param,
> +				 enum vb2_buffer_state vb_state)
> +{
> +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> +	struct mtk_fd_ctx *ctx;
> +	struct device *dev = &fd_dev->pdev->dev;
> +	struct vb2_buffer *src_vb, *dst_vb;
> +	struct vb2_v4l2_buffer *src_vbuf = NULL, *dst_vbuf = NULL;
> +
> +	ctx = v4l2_m2m_get_curr_priv(fd_dev->m2m_dev);
> +	if (!ctx) {
> +		dev_err(dev, "Instance released before end of transaction\n");
> +		return;
> +	}

This shouldn't be possible. I suspect you're seeing this because
.stop_streaming is not implemented correctly. See my comment there.

> +
> +	src_vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> +	if (WARN_ON(!src_vb))
> +		return;

This shouldn't be possible.

> +	src_vbuf = to_vb2_v4l2_buffer(src_vb);
> +
> +	dst_vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> +	if (WARN_ON(!dst_vb))
> +		return;

Ditto.

> +	dst_vbuf = to_vb2_v4l2_buffer(dst_vb);
> +
> +	dst_vbuf->vb2_buf.timestamp = src_vbuf->vb2_buf.timestamp;
> +	dst_vbuf->timecode = src_vbuf->timecode;
> +	dst_vbuf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
> +	dst_vbuf->flags |= src_vbuf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;

We should be able to just use v4l2_m2m_buf_copy_metadata().

> +
> +	v4l2_m2m_buf_done(src_vbuf, vb_state);
> +	v4l2_m2m_buf_done(dst_vbuf, vb_state);
> +	v4l2_m2m_job_finish(fd_dev->m2m_dev, ctx->fh.m2m_ctx);
> +}
> +
> +static int mtk_fd_hw_job_exec(struct mtk_fd_hw *fd_hw,
> +			      struct fd_hw_param *fd_param,
> +			      void *output_vaddr)
> +{
> +	struct fd_user_output *fd_output;
> +	struct ipi_message fd_ipi_msg;
> +	int ret;
> +	u32 num;
> +
> +	if (fd_param->user_param.src_img_fmt == FMT_UNKNOWN)
> +		goto param_err;

Is this possible?

> +
> +	mutex_lock(&fd_hw->fd_hw_lock);
> +	fd_hw->state = FD_ENQ;

What is this state for?

> +	fd_ipi_msg.cmd_id = FD_CMD_ENQUEUE;
> +	memcpy(&fd_ipi_msg.hw_param, fd_param, sizeof(fd_ipi_msg.hw_param));
> +	ret = scp_ipi_send(fd_hw->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
> +			   sizeof(fd_ipi_msg), 0);
> +	if (ret)
> +		goto buf_err;
> +
> +	ret = mtk_fd_wait_irq(fd_hw);
> +	if (ret)
> +		goto buf_err;

This function is called from device_run and it shouldn't be synchronous. It
should only queue the job to the hardware to be handled asynchronously when
it finishes.

> +
> +	num = readl(fd_hw->fd_base + FD_RESULT);
> +	/* Disable FD ISR */
> +	writel(0x0, fd_hw->fd_base + FD_INT_EN);
> +
> +	fd_output = (struct fd_user_output *)output_vaddr;
> +	fd_output->number = num;
> +	fd_hw->state = FD_CBD;
> +	mutex_unlock(&fd_hw->fd_hw_lock);
> +
> +	mtk_fd_hw_job_finish(fd_hw, fd_param, VB2_BUF_STATE_DONE);
> +	return 0;
> +
> +buf_err:
> +	mutex_unlock(&fd_hw->fd_hw_lock);
> +param_err:
> +	mtk_fd_hw_job_finish(fd_hw, fd_param, VB2_BUF_STATE_ERROR);
> +	return ret;
> +}
> +
> +static int mtk_fd_vb2_buf_out_validate(struct vb2_buffer *vb)
> +{
> +	struct vb2_v4l2_buffer *v4l2_buf = to_vb2_v4l2_buffer(vb);
> +
> +	v4l2_buf->field = V4L2_FIELD_NONE;

We need to fail with -EINVAL if v4l2_buf->field was different than
V4L2_FIELD_ANY or _NONE.

> +
> +	return 0;
> +}
> +
> +static int mtk_fd_vb2_buf_prepare(struct vb2_buffer *vb)
> +{
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +	struct vb2_queue *vq = vb->vb2_queue;
> +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct device *dev = ctx->dev;
> +	struct v4l2_pix_format_mplane *pixfmt;
> +
> +	switch (vq->type) {
> +	case V4L2_BUF_TYPE_META_CAPTURE:
> +		if (vb2_plane_size(vb, 0) < ctx->dst_fmt.buffersize) {
> +			dev_err(dev, "meta size %d is too small\n");

Please make this dev_dbg(), because userspace misbehavior must not trigger
kernel error logs.

> +			return -EINVAL;
> +		}
> +		break;
> +	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> +		pixfmt = &ctx->src_fmt;
> +
> +		if (vbuf->field == V4L2_FIELD_ANY)
> +			vbuf->field = V4L2_FIELD_NONE;
> +
> +		if (vb->num_planes != 1 || vbuf->field != V4L2_FIELD_NONE) {
> +			dev_err(dev, "plane or field %d not supported\n",
> +				vb->num_planes, vbuf->field);

Ditto.

> +			return -EINVAL;
> +		}
> +		if (vb2_plane_size(vb, 0) < pixfmt->plane_fmt[0].sizeimage) {
> +			dev_err(dev, "plane %d is too small\n");

Ditto.

> +			return -EINVAL;
> +		}
> +		break;
> +	default:
> +		dev_err(dev, "invalid queue type: %d\n", vq->type);
> +		return -EINVAL;

We shouldn't need to handle this.

> +	}
> +
> +	return 0;
> +}
> +
> +static void mtk_fd_vb2_buf_queue(struct vb2_buffer *vb)
> +{
> +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> +
> +	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
> +}
> +
> +static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> +				  unsigned int *num_buffers,
> +				  unsigned int *num_planes,
> +				  unsigned int sizes[],
> +				  struct device *alloc_devs[])
> +{
> +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct device *dev = ctx->dev;
> +	unsigned int size;
> +
> +	switch (vq->type) {
> +	case V4L2_BUF_TYPE_META_CAPTURE:
> +		size = ctx->dst_fmt.buffersize;
> +		break;
> +	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> +		size = ctx->src_fmt.plane_fmt[0].sizeimage;
> +		break;
> +	default:
> +		dev_err(dev, "invalid queue type: %d\n", vq->type);

We should need to handle this.

> +		return -EINVAL;
> +	}
> +
> +	if (!*num_planes) {
> +		*num_planes = 1;
> +		sizes[0] = size;
> +	}

We need to handle the case when *num_planes is non-zero. We then need to
check if it's 1 and *sizes >= size and fail with -EINVAL if not.

> +
> +	return 0;
> +}
> +
> +static int mtk_fd_vb2_start_streaming(struct vb2_queue *vq, unsigned int count)
> +{
> +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> +
> +	return mtk_fd_hw_connect(&ctx->fd_dev->fd_hw);

This would be called twice for every context, once for the VIDEO_OUTPUT
queue and once for the META_CAPTURE queue. Perhaps it would make sense to
just do this for the VIDEO_OUTPUT queue?

> +}
> +
> +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> +{
> +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> +	struct vb2_buffer *vb;

How do we guarantee here that the hardware isn't still accessing the buffers
removed below?

> +
> +	if (V4L2_TYPE_IS_OUTPUT(vq->type))
> +		vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> +	else
> +		vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> +
> +	while (vb) {
> +		v4l2_m2m_buf_done(to_vb2_v4l2_buffer(vb), VB2_BUF_STATE_ERROR);
> +		if (V4L2_TYPE_IS_OUTPUT(vq->type))
> +			vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> +		else
> +			vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> +	}

We can use v4l2_m2m_buf_remove(). Also we can move the call into the loop
condition:

while ((vb == v4l2_m2m_buf_remove(...)))
	v4l2_m2m_buf_done(...);

> +
> +	mtk_fd_hw_disconnect(&ctx->fd_dev->fd_hw);

This should also probably be done only for 1 queue. Since the VIDEO_OUTPUT
queue supports requestes, I'd consider it the master one.

> +}
> +
> +static void mtk_fd_vb2_request_complete(struct vb2_buffer *vb)
> +{
> +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> +
> +	v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
> +}
> +
> +static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> +				  const struct v4l2_pix_format_mplane *sfmt)
> +{
> +	dfmt->width = sfmt->width;
> +	dfmt->height = sfmt->height;
> +	dfmt->pixelformat = sfmt->pixelformat;
> +	dfmt->field = sfmt->field;
> +	dfmt->colorspace = sfmt->colorspace;
> +	dfmt->num_planes = sfmt->num_planes;
> +
> +	/* Use default */
> +	dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> +	dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> +	dfmt->xfer_func =
> +		V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> +	dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> +	dfmt->plane_fmt[0].sizeimage =
> +		dfmt->height * dfmt->plane_fmt[0].bytesperline;
> +	memset(dfmt->reserved, 0, sizeof(dfmt->reserved));
> +}

Could we unify this function with mtk_fd_m2m_try_fmt_out_mp()? That function
should be almost directly reusable for the default format initialization +/-
the arguments passed to it.

> +
> +static const struct v4l2_pix_format_mplane *mtk_fd_find_fmt(u32 format)
> +{
> +	unsigned int i;
> +	const struct v4l2_pix_format_mplane *dev_fmt;
> +
> +	for (i = 0; i < NUM_FORMATS; i++) {
> +		dev_fmt = &in_img_fmts[i];
> +		if (dev_fmt->pixelformat == format)
> +			return dev_fmt;
> +	}
> +
> +	return NULL;
> +}
> +
> +static int mtk_fd_m2m_querycap(struct file *file, void *fh,
> +			       struct v4l2_capability *cap)
> +{
> +	struct mtk_fd_dev *fd_dev = video_drvdata(file);
> +	struct device *dev = &fd_dev->pdev->dev;
> +
> +	strscpy(cap->driver, dev->driver->name, sizeof(cap->driver));

Please use dev_driver_string().

> +	strscpy(cap->card, fd_dev->vfd.name, sizeof(cap->card));

I think we can also make this dev_driver_string().

> +	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s",
> +		 dev_name(&fd_dev->pdev->dev));
> +
> +	return 0;
> +}
> +
> +static int mtk_fd_m2m_enum_fmt_out_mp(struct file *file, void *fh,
> +				      struct v4l2_fmtdesc *f)
> +{
> +	int i;
> +
> +	for (i = 0; i < NUM_FORMATS; ++i) {
> +		if (i == f->index) {
> +			f->pixelformat = in_img_fmts[i].pixelformat;
> +			return 0;
> +		}
> +	}

Why don't we just check if f->index is within the [0, ARRAY_SIZE()-1] bounds
and then just use it to index the array directly?

> +
> +	return -EINVAL;
> +}
> +
> +static int mtk_fd_m2m_try_fmt_out_mp(struct file *file,
> +				     void *fh,
> +				     struct v4l2_format *f)

I think we could just shorten the function prefixes to "mtk_fd_".

> +{
> +	struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
> +	const struct v4l2_pix_format_mplane *fmt;
> +
> +	fmt = mtk_fd_find_fmt(pix_mp->pixelformat);
> +	if (!fmt) {
> +		/* Get default img fmt */
> +		fmt = &in_img_fmts[0];
> +		f->fmt.pix.pixelformat = fmt->pixelformat;
> +	}
> +
> +	/* Use default */
> +	pix_mp->field = fmt->field;
> +	pix_mp->colorspace = fmt->colorspace;
> +	pix_mp->num_planes = fmt->num_planes;
> +	pix_mp->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> +	pix_mp->quantization = V4L2_QUANTIZATION_DEFAULT;
> +	pix_mp->xfer_func =
> +		V4L2_MAP_XFER_FUNC_DEFAULT(pix_mp->colorspace);
> +
> +	/* Keep user setting as possible */
> +	pix_mp->width = clamp(pix_mp->width,
> +			      MTK_FD_OUTPUT_MIN_WIDTH,
> +			      MTK_FD_OUTPUT_MAX_WIDTH);
> +	pix_mp->height = clamp(pix_mp->height,
> +			       MTK_FD_OUTPUT_MIN_HEIGHT,
> +			       MTK_FD_OUTPUT_MAX_HEIGHT);
> +
> +	pix_mp->plane_fmt[0].bytesperline = pix_mp->width * 2;

Please add a comment explaining why this is always 2.

> +	pix_mp->plane_fmt[0].sizeimage =
> +		pix_mp->plane_fmt[0].bytesperline * pix_mp->height;
> +	memset(pix_mp->plane_fmt[0].reserved, 0,
> +	       sizeof(pix_mp->plane_fmt[0].reserved));

No need to zero this here. The core will handle it.

> +
> +	return 0;
> +}
> +
> +static int mtk_fd_m2m_g_fmt_out_mp(struct file *file, void *fh,
> +				   struct v4l2_format *f)
> +{
> +	struct mtk_fd_ctx *ctx = fh_to_ctx(fh);
> +
> +	f->fmt.pix_mp = ctx->src_fmt;
> +
> +	return 0;
> +}
> +
> +static int mtk_fd_m2m_s_fmt_out_mp(struct file *file, void *fh,
> +				   struct v4l2_format *f)
> +{
> +	struct mtk_fd_ctx *ctx = fh_to_ctx(fh);
> +	struct vb2_queue *vq;
> +
> +	/* Change not allowed if queue is streaming. */
> +	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
> +	if (vb2_is_streaming(vq) || vb2_is_busy(vq)) {

vb2_is_busy() is a superset of vb2_is_streaming(), so it's enough to just
check the former.

> +		dev_dbg(ctx->dev, "vb2_is_streaming or vb2_is_busy");
> +		return -EBUSY;
> +	}
> +
> +	mtk_fd_m2m_try_fmt_out_mp(file, fh, f);
> +	ctx->src_fmt = f->fmt.pix_mp;
> +
> +	return 0;
> +}
> +
> +static int mtk_fd_m2m_enum_fmt_meta_cap(struct file *file, void *fh,
> +					struct v4l2_fmtdesc *f)
> +{
> +	if (f->index)
> +		return -EINVAL;
> +
> +	strscpy(f->description, "Face detection result",
> +		sizeof(f->description));
> +	f->pixelformat = fw_param_fmts[0].dataformat;
> +	f->flags = 0;
> +
> +	return 0;
> +}
> +
> +static int mtk_fd_m2m_g_fmt_meta_cap(struct file *file, void *fh,
> +				     struct v4l2_format *f)
> +{
> +	f->fmt.meta.dataformat = fw_param_fmts[0].dataformat;
> +	f->fmt.meta.buffersize = fw_param_fmts[0].buffersize;
> +
> +	return 0;
> +}
> +
> +static const struct vb2_ops mtk_fd_vb2_ops = {
> +	.queue_setup = mtk_fd_vb2_queue_setup,
> +	.buf_out_validate = mtk_fd_vb2_buf_out_validate,
> +	.buf_prepare  = mtk_fd_vb2_buf_prepare,
> +	.buf_queue = mtk_fd_vb2_buf_queue,
> +	.start_streaming = mtk_fd_vb2_start_streaming,
> +	.stop_streaming = mtk_fd_vb2_stop_streaming,
> +	.wait_prepare = vb2_ops_wait_prepare,
> +	.wait_finish = vb2_ops_wait_finish,
> +	.buf_request_complete = mtk_fd_vb2_request_complete,
> +};
> +
> +static const struct v4l2_ioctl_ops mtk_fd_v4l2_video_out_ioctl_ops = {
> +	.vidioc_querycap = mtk_fd_m2m_querycap,
> +	.vidioc_enum_fmt_vid_out_mplane = mtk_fd_m2m_enum_fmt_out_mp,
> +	.vidioc_g_fmt_vid_out_mplane = mtk_fd_m2m_g_fmt_out_mp,
> +	.vidioc_s_fmt_vid_out_mplane = mtk_fd_m2m_s_fmt_out_mp,
> +	.vidioc_try_fmt_vid_out_mplane = mtk_fd_m2m_try_fmt_out_mp,
> +	.vidioc_enum_fmt_meta_cap = mtk_fd_m2m_enum_fmt_meta_cap,
> +	.vidioc_g_fmt_meta_cap = mtk_fd_m2m_g_fmt_meta_cap,
> +	.vidioc_s_fmt_meta_cap = mtk_fd_m2m_g_fmt_meta_cap,
> +	.vidioc_try_fmt_meta_cap = mtk_fd_m2m_g_fmt_meta_cap,
> +	.vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
> +	.vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs,
> +	.vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
> +	.vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
> +	.vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
> +	.vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
> +	.vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
> +	.vidioc_streamon = v4l2_m2m_ioctl_streamon,
> +	.vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
> +	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
> +	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
> +};
> +
> +static int
> +mtk_fd_queue_init(void *priv, struct vb2_queue *src_vq,
> +		  struct vb2_queue *dst_vq)
> +{
> +	struct mtk_fd_ctx *ctx = priv;
> +	int ret;
> +
> +	src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
> +	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> +	src_vq->supports_requests = true;
> +	src_vq->drv_priv = ctx;
> +	src_vq->ops = &mtk_fd_vb2_ops;
> +	src_vq->mem_ops = &vb2_dma_contig_memops;
> +	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> +	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> +	src_vq->lock = &ctx->fd_dev->vfd_lock;
> +	src_vq->dev = ctx->fd_dev->v4l2_dev.dev;
> +
> +	ret = vb2_queue_init(src_vq);
> +	if (ret)
> +		return ret;
> +
> +	dst_vq->type = V4L2_BUF_TYPE_META_CAPTURE;
> +	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> +	dst_vq->drv_priv = ctx;
> +	dst_vq->ops = &mtk_fd_vb2_ops;
> +	dst_vq->mem_ops = &vb2_dma_contig_memops;
> +	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> +	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> +	dst_vq->lock = &ctx->fd_dev->vfd_lock;
> +	dst_vq->dev = ctx->fd_dev->v4l2_dev.dev;
> +
> +	return vb2_queue_init(dst_vq);
> +}
> +
> +static int mtk_fd_dev_g_ctrl(struct v4l2_ctrl *ctrl)
> +{
> +	struct mtk_fd_ctx *ctx = ctrl->priv;
> +	int i;
> +
> +	switch (ctrl->id) {
> +	case V4L2_CID_MTK_FD_SCALE_IMG_WIDTH:
> +		for (i = 0; i < ctrl->elems; i++)
> +			ctrl->p_new.p_u16[i] =
> +				ctx->user_param.scale_img_width[i];
> +		break;
> +	case V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT:
> +		for (i = 0; i < ctrl->elems; i++)
> +			ctrl->p_new.p_u16[i] =
> +				ctx->user_param.scale_img_height[i];
> +		break;
> +	case V4L2_CID_MTK_FD_DETECT_POSE:
> +		ctrl->val = ctx->user_param.fd_pose;
> +		break;
> +	case V4L2_CID_MTK_FD_DETECT_SPEEDUP:
> +		ctrl->val = ctx->user_param.fd_speedup;
> +		break;
> +	case V4L2_CID_MTK_FD_SCALE_IMG_NUM:
> +		ctrl->val = ctx->user_param.scale_img_num;
> +		break;
> +	case V4L2_CID_MTK_FD_EXTRA_MODEL:
> +		ctrl->val = ctx->user_param.fd_extra_model;
> +		break;
> +	default:
> +		dev_dbg(ctx->dev, "%s: unexpected control: 0x%x:%d",
> +			__func__, ctrl->id, ctrl->val);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static int mtk_fd_dev_s_ctrl(struct v4l2_ctrl *ctrl)
> +{
> +	struct mtk_fd_ctx *ctx = ctrl->priv;
> +	int i;
> +
> +	switch (ctrl->id) {
> +	case V4L2_CID_MTK_FD_SCALE_IMG_WIDTH:
> +		for (i = 0; i < ctrl->elems; i++)
> +			ctx->user_param.scale_img_width[i] =
> +				ctrl->p_new.p_u16[i];
> +		break;
> +	case V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT:
> +		for (i = 0; i < ctrl->elems; i++)
> +			ctx->user_param.scale_img_height[i] =
> +				ctrl->p_new.p_u16[i];
> +		break;
> +	case V4L2_CID_MTK_FD_DETECT_POSE:
> +		ctx->user_param.fd_pose = ctrl->val;
> +		break;
> +	case V4L2_CID_MTK_FD_DETECT_SPEEDUP:
> +		ctx->user_param.fd_speedup = ctrl->val;
> +		break;
> +	case V4L2_CID_MTK_FD_SCALE_IMG_NUM:
> +		ctx->user_param.scale_img_num = ctrl->val;
> +		break;
> +	case V4L2_CID_MTK_FD_EXTRA_MODEL:
> +		ctx->user_param.fd_extra_model = ctrl->val;
> +		break;
> +	default:
> +		dev_dbg(ctx->dev, "%s: unexpected control: 0x%x:%d",
> +			__func__, ctrl->id, ctrl->val);
> +		return -EINVAL;

No need to handle this, as the framework will only pass the controls we
registered earlier to this function.

> +	}
> +
> +	return 0;
> +}
> +
> +static const struct v4l2_ctrl_ops mtk_fd_dev_ctrl_ops = {
> +	.g_volatile_ctrl = mtk_fd_dev_g_ctrl,

I don't see any volatile controls below. No need to implement this callback.

> +	.s_ctrl = mtk_fd_dev_s_ctrl,

I think you might not even need to implement this function (or just provide
a dummy one that returns 0 if its required) if you just use the controls
directly when preparing the job for the hardware. Check v4l2_ctrl_find().

> +};
> +
> +struct v4l2_ctrl_config mtk_fd_controls[] = {
> +	{
> +	.ops = &mtk_fd_dev_ctrl_ops,
> +	.id = V4L2_CID_MTK_FD_SCALE_IMG_WIDTH,
> +	.name = "FD scale image widths",
> +	.type = V4L2_CTRL_TYPE_U16,
> +	.min = MTK_FD_OUTPUT_MIN_WIDTH,
> +	.max = MTK_FD_OUTPUT_MAX_WIDTH,
> +	.step = 1,
> +	.def = MTK_FD_OUTPUT_MAX_WIDTH,
> +	.dims = { FD_SCALE_NUM },

Something wrong with indentation here.

> +	},
> +	{
> +	.ops = &mtk_fd_dev_ctrl_ops,
> +	.id = V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT,
> +	.name = "FD scale image heights",
> +	.type = V4L2_CTRL_TYPE_U16,
> +	.min = MTK_FD_OUTPUT_MIN_HEIGHT,
> +	.max = MTK_FD_OUTPUT_MAX_HEIGHT,
> +	.step = 1,
> +	.def = MTK_FD_OUTPUT_MAX_HEIGHT,
> +	.dims = { FD_SCALE_NUM },
> +	},
> +	{
> +	.ops = &mtk_fd_dev_ctrl_ops,
> +	.id = V4L2_CID_MTK_FD_SCALE_IMG_NUM,
> +	.name = "FD scale size counts",
> +	.type = V4L2_CTRL_TYPE_INTEGER,
> +	.min = 0,
> +	.max = FACE_SIZE_NUM_MAX,
> +	.step = 1,
> +	.def = 0,
> +	},
> +	{
> +	.ops = &mtk_fd_dev_ctrl_ops,
> +	.id = V4L2_CID_MTK_FD_DETECT_POSE,
> +	.name = "FD detect face pose",
> +	.type = V4L2_CTRL_TYPE_INTEGER,
> +	.min = 0,
> +	.max = 3,
> +	.step = 1,
> +	.def = 0,
> +	},
> +	{
> +	.ops = &mtk_fd_dev_ctrl_ops,
> +	.id = V4L2_CID_MTK_FD_DETECT_SPEEDUP,
> +	.name = "FD detection speedup",
> +	.type = V4L2_CTRL_TYPE_INTEGER,
> +	.min = 0,
> +	.max = FD_MAX_SPEEDUP,
> +	.step = 1,
> +	.def = 0,
> +	},
> +	{
> +	.ops = &mtk_fd_dev_ctrl_ops,
> +	.id = V4L2_CID_MTK_FD_EXTRA_MODEL,
> +	.name = "FD use extra model",
> +	.type = V4L2_CTRL_TYPE_BOOLEAN,
> +	.min = 0,
> +	.max = 1,
> +	.step = 1,
> +	.def = 0,
> +	},
> +};
> +
> +static int mtk_fd_ctrls_setup(struct mtk_fd_ctx *ctx)
> +{
> +	struct v4l2_ctrl_handler *hdl = &ctx->hdl;
> +	struct v4l2_ctrl *ctl;
> +	int i;
> +
> +	v4l2_ctrl_handler_init(hdl, V4L2_CID_MTK_FD_MAX);
> +	if (hdl->error)
> +		return hdl->error;
> +
> +	for (i = 0; i < ARRAY_SIZE(mtk_fd_controls); i++) {
> +		ctl = v4l2_ctrl_new_custom(hdl, &mtk_fd_controls[i], ctx);
> +		if (hdl->error) {
> +			v4l2_ctrl_handler_free(hdl);
> +			dev_err(ctx->dev, "Failed to register controls:%d", i);
> +			return hdl->error;
> +		}
> +	}
> +
> +	ctx->fh.ctrl_handler = &ctx->hdl;
> +	v4l2_ctrl_handler_setup(hdl);
> +
> +	return 0;
> +}
> +
> +static unsigned int get_fd_img_fmt(unsigned int fourcc)
> +{
> +	switch (fourcc) {
> +	case V4L2_PIX_FMT_VYUY:
> +		return FMT_VYUY;
> +	case V4L2_PIX_FMT_YUYV:
> +		return FMT_YUYV;
> +	case V4L2_PIX_FMT_YVYU:
> +		return FMT_YVYU;
> +	case V4L2_PIX_FMT_UYVY:
> +		return FMT_UYVY;
> +	default:
> +		return FMT_UNKNOWN;
> +	}
> +}
> +
> +static void init_ctx_fmt(struct mtk_fd_ctx *ctx)
> +{
> +	const struct v4l2_pix_format_mplane *fmt;
> +
> +	/* Initialize M2M source fmt */
> +	fmt = &in_img_fmts[0];
> +	mtk_fd_fill_pixfmt_mp(&ctx->src_fmt, fmt);
> +
> +	/* Initialize M2M destination fmt */
> +	ctx->dst_fmt.buffersize = fw_param_fmts[0].buffersize;
> +	ctx->dst_fmt.dataformat = fw_param_fmts[0].dataformat;

Why not just make dst_fmt a pointer and assign &fw_params_fmt[0] there?

> +}
> +
> +/*
> + * V4L2 file operations.
> + */
> +static int mtk_vfd_open(struct file *filp)
> +{
> +	struct mtk_fd_dev *fd_dev = video_drvdata(filp);
> +	struct video_device *vdev = video_devdata(filp);
> +	struct mtk_fd_ctx *ctx;
> +	int ret;
> +
> +	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ctx->fd_dev = fd_dev;
> +	ctx->dev = &fd_dev->pdev->dev;
> +
> +	v4l2_fh_init(&ctx->fh, vdev);
> +	filp->private_data = &ctx->fh;
> +
> +	init_ctx_fmt(ctx);
> +
> +	ret = mtk_fd_ctrls_setup(ctx);
> +	if (ret) {
> +		dev_err(ctx->dev, "Failed to set up controls:%d\n", ret);
> +		goto err_fh_ctrl;
> +	}
> +
> +	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(fd_dev->m2m_dev, ctx,
> +					    &mtk_fd_queue_init);
> +	if (IS_ERR(ctx->fh.m2m_ctx)) {
> +		ret = PTR_ERR(ctx->fh.m2m_ctx);
> +		goto err_init_ctx;
> +	}
> +
> +	v4l2_fh_add(&ctx->fh);
> +
> +	return 0;
> +
> +err_init_ctx:
> +	v4l2_ctrl_handler_free(&ctx->hdl);
> +err_fh_ctrl:
> +	v4l2_fh_exit(&ctx->fh);
> +	kfree(ctx);
> +
> +	return ret;
> +}
> +
> +static int mtk_vfd_release(struct file *filp)
> +{
> +	struct mtk_fd_ctx *ctx = container_of(filp->private_data,
> +					      struct mtk_fd_ctx, fh);
> +
> +	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
> +
> +	v4l2_ctrl_handler_free(&ctx->hdl);
> +	v4l2_fh_del(&ctx->fh);
> +	v4l2_fh_exit(&ctx->fh);
> +
> +	kfree(ctx);
> +
> +	return 0;
> +}
> +
> +static const struct v4l2_file_operations fd_video_fops = {
> +	.owner = THIS_MODULE,
> +	.open = mtk_vfd_open,
> +	.release = mtk_vfd_release,
> +	.poll = v4l2_m2m_fop_poll,
> +	.unlocked_ioctl = video_ioctl2,
> +	.mmap = v4l2_m2m_fop_mmap,

We also need the compat_ioctl here for 32-bit userspace on 64-bit kernels.

> +};
> +
> +static void mtk_fd_device_run(void *priv)
> +{
> +	struct mtk_fd_ctx *ctx = priv;
> +	struct vb2_v4l2_buffer *src_buf, *dst_buf;
> +	struct fd_hw_param fd_param;
> +	void *fd_result_vaddr;
> +
> +	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> +	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
> +
> +	memset(&fd_param, 0, sizeof(fd_param));
> +
> +	fd_param.src_img.dma_addr =
> +		vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
> +	fd_param.user_result.dma_addr =
> +		vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> +	fd_result_vaddr = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
> +
> +	ctx->user_param.src_img_fmt = get_fd_img_fmt(ctx->src_fmt.pixelformat);
> +	memcpy(&fd_param.user_param, &ctx->user_param, sizeof(ctx->user_param));
> +
> +	/* Complete request controls if any */
> +	v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, &ctx->hdl);
> +
> +	mtk_fd_hw_job_exec(&ctx->fd_dev->fd_hw, &fd_param, fd_result_vaddr);
> +}
> +
> +static struct v4l2_m2m_ops fd_m2m_ops = {
> +	.device_run = mtk_fd_device_run,
> +};
> +
> +static int mtk_fd_request_validate(struct media_request *req)
> +{
> +	unsigned int count;
> +
> +	count = vb2_request_buffer_cnt(req);
> +	if (!count)
> +		return -ENOENT;

Why -ENOENT?

> +	else if (count > 1)
> +		return -EINVAL;
> +
> +	return vb2_request_validate(req);
> +}
> +
> +static const struct media_device_ops fd_m2m_media_ops = {
> +	.req_validate	= mtk_fd_request_validate,
> +	.req_queue	= v4l2_m2m_request_queue,
> +};
> +
> +static int mtk_fd_video_device_register(struct mtk_fd_dev *fd_dev)
> +{
> +	struct video_device *vfd = &fd_dev->vfd;
> +	struct v4l2_m2m_dev *m2m_dev = fd_dev->m2m_dev;
> +	struct device *dev = &fd_dev->pdev->dev;
> +	int function, ret;
> +
> +	vfd->fops = &fd_video_fops;
> +	vfd->release = video_device_release;
> +	vfd->lock = &fd_dev->vfd_lock;
> +	vfd->v4l2_dev = &fd_dev->v4l2_dev;
> +	vfd->vfl_dir = VFL_DIR_M2M;
> +	vfd->device_caps = V4L2_CAP_STREAMING  | V4L2_CAP_VIDEO_OUTPUT_MPLANE |
> +		V4L2_CAP_META_CAPTURE;
> +	vfd->ioctl_ops = &mtk_fd_v4l2_video_out_ioctl_ops;
> +
> +	strscpy(vfd->name, "MTK-FD-V4L2", sizeof(vfd->name));

Please use dev_driver_string().

> +	video_set_drvdata(vfd, fd_dev);
> +
> +	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
> +	if (ret) {
> +		dev_err(dev, "Failed to register video device\n");
> +		goto err_free_dev;
> +	}
> +
> +	function = MEDIA_ENT_F_PROC_VIDEO_DECODER;

MEDIA_ENT_F_PROC_VIDEO_STATISTICS fits here much more.

Also nit: Any reason to have this assigned to this intermediate variable
rather than just directly passed to the function?

> +	ret = v4l2_m2m_register_media_controller(m2m_dev, vfd, function);
> +	if (ret) {
> +		dev_err(dev, "Failed to init mem2mem media controller\n");
> +		goto err_unreg_video;
> +	}
> +	return 0;
> +
> +err_unreg_video:
> +	video_unregister_device(vfd);
> +err_free_dev:
> +	video_device_release(vfd);
> +	return ret;
> +}
> +
> +static int mtk_fd_dev_v4l2_init(struct mtk_fd_dev *fd_dev)
> +{
> +	struct media_device *mdev = &fd_dev->mdev;
> +	struct device *dev = &fd_dev->pdev->dev;
> +	int ret;
> +
> +	ret = v4l2_device_register(dev, &fd_dev->v4l2_dev);
> +	if (ret) {
> +		dev_err(dev, "Failed to register v4l2 device\n");
> +		return ret;
> +	}
> +
> +	fd_dev->m2m_dev = v4l2_m2m_init(&fd_m2m_ops);
> +	if (IS_ERR(fd_dev->m2m_dev)) {
> +		dev_err(dev, "Failed to init mem2mem device\n");
> +		ret = PTR_ERR(fd_dev->m2m_dev);
> +		goto fail_m2m_dev;

Please name the labels after the next clean-up step to be done, not the
failure.

> +	}
> +
> +	mdev->dev = dev;
> +	strscpy(mdev->model, "MTK-FD-V4L2", sizeof(mdev->model));

Could we just use dev_driver_string() here instead?

> +	snprintf(mdev->bus_info, sizeof(mdev->bus_info),
> +		 "platform:%s", dev_name(dev));
> +	media_device_init(mdev);
> +	mdev->ops = &fd_m2m_media_ops;
> +	fd_dev->v4l2_dev.mdev = mdev;
> +
> +	ret = mtk_fd_video_device_register(fd_dev);
> +	if (ret) {
> +		dev_err(dev, "Failed to register video device\n");
> +		goto err_vdev;
> +	}
> +
> +	ret = media_device_register(mdev);
> +	if (ret) {
> +		dev_err(dev, "Failed to register mem2mem media device\n");
> +		goto fail_mdev;
> +	}
> +
> +	return 0;
> +
> +fail_mdev:
> +	v4l2_m2m_unregister_media_controller(fd_dev->m2m_dev);
> +	video_unregister_device(&fd_dev->vfd);
> +	video_device_release(&fd_dev->vfd);
> +err_vdev:
> +	media_device_cleanup(mdev);
> +	v4l2_m2m_release(fd_dev->m2m_dev);
> +fail_m2m_dev:
> +	v4l2_device_unregister(&fd_dev->v4l2_dev);
> +	return ret;
> +}
> +
> +static void mtk_fd_dev_v4l2_release(struct mtk_fd_dev *fd_dev)
> +{
> +	v4l2_m2m_unregister_media_controller(fd_dev->m2m_dev);
> +	video_unregister_device(&fd_dev->vfd);
> +	video_device_release(&fd_dev->vfd);
> +	media_device_cleanup(&fd_dev->mdev);
> +	v4l2_m2m_release(fd_dev->m2m_dev);
> +	v4l2_device_unregister(&fd_dev->v4l2_dev);
> +}
> +
> +static irqreturn_t mtk_fd_irq(int irq, void *data)
> +{
> +	struct mtk_fd_hw *fd_hw = (struct mtk_fd_hw *)data;
> +
> +	fd_hw->fd_irq_result = readl(fd_hw->fd_base + FD_INT);

We should be able to just handle the job completion directly from here.

> +	wake_up_interruptible(&fd_hw->wq);
> +	return IRQ_HANDLED;
> +}
> +
> +static int mtk_fd_hw_get_scp_mem(struct mtk_fd_hw *fd_hw,
> +				 struct fd_buffer *scp_mem)
> +{
> +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> +	struct device *dev = &fd_dev->pdev->dev;
> +	dma_addr_t addr;
> +	u32 size;
> +
> +	scp_mem->scp_addr = scp_get_reserve_mem_phys(SCP_FD_MEM_ID);
> +	size = scp_get_reserve_mem_size(SCP_FD_MEM_ID);
> +	if (!scp_mem->scp_addr || !size)
> +		return -EPROBE_DEFER;

Why -EPROBE_DEFER? Should we have some way to distinguish between the SCP
driver not being initialized yet and some other error?

> +
> +	/* get dma addr address */
> +	addr = dma_map_page_attrs(dev, phys_to_page(scp_mem->scp_addr), 0,
> +				  size, DMA_BIDIRECTIONAL,

Should this be really bidirectional?

> +				  DMA_ATTR_SKIP_CPU_SYNC);
> +	if (dma_mapping_error(dev, addr)) {
> +		scp_mem->scp_addr = 0;
> +		dev_err(dev, "Failed to map scp addr\n");
> +		return -ENOMEM;
> +	}
> +	scp_mem->dma_addr = addr;
> +
> +	return 0;
> +}
> +
> +static int mtk_fd_probe(struct platform_device *pdev)
> +{
> +	struct mtk_fd_dev *fd_dev;

nit: Perhaps just fd, to have shorter code?

> +	struct device *dev = &pdev->dev;
> +	struct mtk_fd_hw *fd_hw;
> +	struct resource *res;
> +	int irq;
> +	int ret;
> +
> +	fd_dev = devm_kzalloc(&pdev->dev, sizeof(*fd_dev), GFP_KERNEL);
> +	if (!fd_dev)
> +		return -ENOMEM;
> +
> +	dev_set_drvdata(dev, fd_dev);
> +	fd_hw = &fd_dev->fd_hw;
> +	fd_dev->pdev = pdev;

Is pdev useful for anything? Perhaps you want to store &pdev->dev instead?

> +
> +	irq = platform_get_irq(pdev, 0);
> +	if (irq < 0) {
> +		dev_err(dev, "no IRQ:%d resource info\n", irq);
> +		return irq;
> +	}
> +	ret = devm_request_irq(dev, irq, mtk_fd_irq, IRQF_SHARED,
> +			       dev_driver_string(dev),
> +			       fd_hw);

Why not just fd_dev?

> +	if (ret) {
> +		dev_err(dev, "req_irq fail:%d\n", irq);
> +		return ret;
> +	}
> +
> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	fd_hw->fd_base = devm_ioremap_resource(dev, res);
> +	if (!fd_hw->fd_base) {
> +		dev_err(dev, "unable to get fd reg base\n");
> +		return PTR_ERR(fd_hw->fd_base);
> +	}
> +
> +	fd_hw->fd_clk = devm_clk_get(dev, "fd");
> +	if (IS_ERR(fd_hw->fd_clk)) {
> +		dev_err(dev, "cannot get fd_clk_img_fd clock\n");
> +		return PTR_ERR(fd_hw->fd_clk);
> +	}
> +
> +	ret = mtk_fd_hw_get_scp_mem(fd_hw, &fd_hw->scp_mem);
> +	if (ret) {
> +		dev_err(dev, "scp memory init failed: %d\n", ret);

nit: Could we make the error messages a bit more consistent? For example
"Failed to get IRQ resource (%d)", "Failed to request IRQ (%d)", "Failed to
... (%d)", etc.

> +		return ret;
> +	}
> +
> +	atomic_set(&fd_hw->fd_user_cnt, 0);
> +	init_waitqueue_head(&fd_hw->wq);
> +	mutex_init(&fd_dev->vfd_lock);
> +	mutex_init(&fd_hw->fd_hw_lock);
> +	pm_runtime_enable(dev);
> +
> +	ret = mtk_fd_dev_v4l2_init(fd_dev);
> +	if (ret) {
> +		mutex_destroy(&fd_dev->fd_hw.fd_hw_lock);
> +		mutex_destroy(&fd_dev->vfd_lock);
> +		pm_runtime_disable(&pdev->dev);

Please move the clean-up to an error path on the bottom of the function.

> +		dev_err(dev, "v4l2 init failed: %d\n", ret);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int mtk_fd_remove(struct platform_device *pdev)
> +{
> +	struct mtk_fd_dev *fd_dev = dev_get_drvdata(&pdev->dev);
> +
> +	mtk_fd_dev_v4l2_release(fd_dev);
> +	mutex_destroy(&fd_dev->fd_hw.fd_hw_lock);
> +	mutex_destroy(&fd_dev->vfd_lock);
> +	pm_runtime_disable(&pdev->dev);

The order of calls in remove should normally be the opposite to probe.

> +
> +	return 0;
> +}
> +
> +static int mtk_fd_suspend(struct device *dev)
> +{
> +	struct mtk_fd_dev *fd_dev = dev_get_drvdata(dev);
> +
> +	if (pm_runtime_suspended(dev))
> +		return 0;
> +
> +	/* suspend FD HW */
> +	writel(0x0, fd_dev->fd_hw.fd_base + FD_HW_ENABLE);
> +	writel(0x0, fd_dev->fd_hw.fd_base + FD_INT_EN);
> +	clk_disable_unprepare(fd_dev->fd_hw.fd_clk);
> +	return 0;
> +}
> +
> +static int mtk_fd_resume(struct device *dev)
> +{
> +	struct mtk_fd_dev *fd_dev = dev_get_drvdata(dev);
> +	int ret;
> +
> +	if (pm_runtime_suspended(dev))
> +		return 0;
> +
> +	ret = clk_prepare_enable(fd_dev->fd_hw.fd_clk);
> +	if (ret < 0) {
> +		dev_dbg(dev, "open fd clk failed\n");
> +		clk_disable_unprepare(fd_dev->fd_hw.fd_clk);
> +	}
> +
> +	/* resume FD HW */
> +	writel(ENABLE_FD, fd_dev->fd_hw.fd_base + FD_HW_ENABLE);
> +	if (fd_dev->fd_hw.state == FD_ENQ)
> +		writel(0x1, fd_dev->fd_hw.fd_base + FD_INT_EN);
> +	return 0;
> +}
> +
> +static const struct dev_pm_ops mtk_fd_pm_ops = {
> +	SET_SYSTEM_SLEEP_PM_OPS(mtk_fd_suspend, mtk_fd_resume)
> +	SET_RUNTIME_PM_OPS(mtk_fd_suspend, mtk_fd_resume, NULL)

We need separate runtime and system PM ops. Their behavior is expected to be
different.

For runtime PM ops, you the functions should just unconditionally power on
or power off the device.

For system PM ops, suspend needs to make sure that no further job is queued
to the hardware and that any job being already processed by the hardware is
completed. Resume needs to resume the processing if there are any further
jobs to be queued to the hardware.

> +};
> +
> +static const struct of_device_id mtk_fd_of_ids[] = {
> +	{ .compatible = "mediatek,mt8183-fd", },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(of, mtk_fd_of_ids);
> +
> +static struct platform_driver mtk_fd_driver = {
> +	.probe   = mtk_fd_probe,
> +	.remove  = mtk_fd_remove,
> +	.driver  = {
> +		.name  = "mtk-fd-4.0",
> +		.of_match_table = of_match_ptr(mtk_fd_of_ids),
> +		.pm = &mtk_fd_pm_ops,
> +	}
> +};
> +module_platform_driver(mtk_fd_driver);
> +
> +MODULE_DESCRIPTION("Mediatek FD driver");
> +MODULE_LICENSE("GPL");

GPL v2

Best regards,
Tomasz
Tomasz Figa Aug. 9, 2019, 8:07 a.m. UTC | #6
On Mon, Jul 29, 2019 at 8:58 PM Jerry-ch Chen
<Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Mon, 2019-07-29 at 17:57 +0800, Tomasz Figa wrote:
> > On Mon, Jul 29, 2019 at 3:01 PM Jerry-ch Chen
> > <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Enrico,
> > >
> > > On Tue, 2019-07-09 at 18:56 +0800, Enrico Weigelt, metux IT consult
> > > wrote:
> > > > On 09.07.19 10:41, Jerry-ch Chen wrote:
> > > >
> > > > Hi,
> > > >
> > > >
> > > > > diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > > > > new file mode 100644
> > > > > index 0000000..289999b
> > > > > --- /dev/null
> > > > > +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > > > > @@ -0,0 +1,157 @@
> > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > +//
> > > > > +// Copyright (c) 2018 MediaTek Inc.
> > > > > +
> > > > > +#ifndef __MTK_FD_HW_H__
> > > > > +#define __MTK_FD_HW_H__
> > > > > +
> > > > > +#include <linux/io.h>
> > > > > +#include <linux/types.h>
> > > > > +#include <linux/platform_device.h>
> > > > > +#include <media/v4l2-ctrls.h>
> > > > > +#include <media/v4l2-device.h>
> > > > > +#include <media/videobuf2-v4l2.h>
> > > > > +
> > > > > +#define MTK_FD_OUTPUT_MIN_WIDTH                    26U
> > > > > +#define MTK_FD_OUTPUT_MIN_HEIGHT           26U
> > > > > +#define MTK_FD_OUTPUT_MAX_WIDTH                    640U
> > > > > +#define MTK_FD_OUTPUT_MAX_HEIGHT           480U
> > > > > +
> > > > > +/* Control the user defined image widths and heights
> > > > > + * to be scaled and performed face detection in FD HW.
> > > > > + * MTK FD support up to 14 user defined image sizes to perform face detection.
> > > > > + */
> > > > > +#define V4L2_CID_MTK_FD_SCALE_IMG_WIDTH            (V4L2_CID_USER_MTK_FD_BASE + 1)
> > > > > +#define V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT   (V4L2_CID_USER_MTK_FD_BASE + 2)
> > > >
> > > > I've got a *really* bad feeling about introducing chip specific
> > > > uapi stuff. (by the way: uapi stuff belongs into include/uapi/...)
> > > >
> > > Thanks for your comments,
> > >
> > > If we remain chip-specific control IDs, I will move the uapi stuff into
> > > inlcude/uapi/mtk_fd.h (filename TBD)
> > >
> > > > Maybe you could tell us what that's *really* about, so we can find some
> > > > standard / chip-independent api for these things. That's one of the
> > > > major point of the kernel: hardware abstraction.
> > > >
> > > I am not sure if it is possible for us to add some standard
> > > v4l2-controls for face detection, a further explanations of controls are
> > > listed below.
> > >
> > > In v4l2-controls, there exists V4L2_CID_DETECT_CLASS, but I haven't
> > > found the standards or api that can be used for face detection yet.
> > > https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/v4l2-controls.h#L1092
> > >
> > > For detecting certain face angle and head direction, we would need
> > > V4L2_CID_DETECT_ANGLE, V4L2_CID_DETECT_DIRECTION controls for user to
> > > specify the angle and direction to be detected.
> > > In MTK FD driver, we support the following angles and directions to be
> > > selected by user, and they are both multiple selected .
> > > FD_angle_table[] = {-90, -45, 0 , 45, 90}
> > > FD_direction_table[] = {0, 30, 60, 90, 120, 150, ..., 330}
> > >
> > > Assuming these v4l2-controls are array of V4L2_CTRL_TYPE_U16 with
> > > dimension 5 and 12.
> > > User can select the desired angle and directions to be detected into
> > > arrays and bring it to driver by these controls, however, the more they
> > > select, the longer execution time needed by HW.
> > >
> >
> > Sounds like we need some kind of a menu bitmask control here, but I
> > don't see V4L2 having anything like that.
> >
> > Hans, Sakari, any ideas?
> >
> > > For detecting different sizes of faces and increase the detection speed,
> > > FD driver might need to scales down the input image into different
> > > smaller sizes
> >
> > Do you mean the FD hardware would do the scaling or the driver code
> > itself? It would be undesirable to do such scaling in a kernel driver,
> > so if that's not something handled by the hardware, the downscaled
> > image might need to be provided from the userspace.
> >
> Thanks for your comments.
>
> Yes, FD hardware will do the scaling itself, so driver could set the
> sizes.
>
> > >, besides driver default values, user or proprietary
> > > algorithm library can manually set the desired image sizes, therefore,
> > > we would need the following controls:
> > > V4L2_CID_DETECT_SCALE_DOWN_IMG_WIDTH and
> > > V4L2_CID_DETECT_SCALE_DOWN_IMG_HEIGHT.
> > > In MTK FD driver, we implement these controls as array of
> > > V4L2_CTRL_TYPE_U16 with the dimension 15.
> >
> > Why 15?
> >
> It consists of one input image size and 14 down-scaled image sizes,
> the amount 15 (or say 14) is defined by the MTK FD algo library,
> therefore I remain the number of 15 here for communicate with the
> library.
> Maybe it should be defined as following?
> MTK_FD_MAX_SCALE_SIZE_NUM               14
> and
> MTK_FD_SCALE_ARR_NUM                    15
>
> > >
> > > For controlling detection speed, we would need the
> > > V4L2_CID_DETECT_SPEED, the faster speedup implies the lower accuracy of
> > > detection, In MTK FD driver, the max level of speedup is 7, and default
> > > value is 0.
> > >
> > > For MTK FD algorithm user library, they would need select extra
> > > detection features(models) used in HW, we need
> > > V4L2_CID_MTK_FD_EXTRA_MODEL, this will be set to 1 for using extra
> > > model. However, we are considering make this control more
> > > chip-independent and can be added into standard.
> > > for example, V4L2_CID_DETECTION_FD_MODEL or ...FD_ALGO,
> > > drivers can define the detection algorithm or detection model to be used
> > > for users to select. How do you think?
> >
> > Sounds like something that could be a menu control, so it could vary
> > between drivers.
> >
> Ok, and maybe it should be created by v4l2_ctrl_new_int_menu(...)?
>
> > >
> > > In short, I summery the control IDs as following:
> > > V4L2_CID_DETECT_ANGLE: set the angle of face in degrees. 90 ~ -90
> > > degrees.
> > > V4L2_CID_DETECT_DIRECTION: set the rotation of the head in degrees.
> > > 0~330 degrees.
> > > V4L2_CID_DETECT_SCALE_DOWN_IMG_WIDTH: set the image widths for an input
> > > image to be scaled down for face detection
> > > V4L2_CID_DETECT_SCALE_DOWN_IMG_HEIGHT: set the image heights for an
> > > input image to be scaled down for face detection
> > > V4L2_CID_DETECT_SPEED: set the detection speed, usually reducing
> > > accuracy.
> > > V4L2_CID_DETECTION_FD_MODEL: select the detection model or algorithm to
> > > be used by face detection driver.
> > >
> > > > > +#define ENABLE_FD                          0x111
> > > > > +#define FD_HW_ENABLE                               0x4
> > > > > +#define FD_INT_EN                          0x15c
> > > > > +#define FD_INT                                     0x168
> > > > > +#define FD_RESULT                          0x178
> > > > > +#define FD_IRQ_MASK                                0x001
> > > > > +
> > > > > +#define RS_MAX_BUF_SIZE                            2288788
> > > > > +#define FD_MAX_SPEEDUP                             7
> > > > > +#define FD_MAX_POSE_VAL                            0xfffffffffffffff
> > > > > +#define FD_DEF_POSE_VAL                            0x3ff
> > > > > +#define MAX_FD_SEL_NUM                             1026
> > > >
> > > > If that file is supposed to be included by anything beyond the driver
> > > > itself, we need proper prefixing. (same for anything else in here)
> > > >
> > > I will fix it as following:
> > >
> > > #define FD_ENABLE    0x111
> > >
> > > #define FD_REG_OFFSET_HW_ENABLE  0x4
> > > #define FD_REG_OFFSET_INT_EN     0x15c
> > > #define FD_REG_OFFSET_INT_VAL    0x168
> > > #define FD_REG_OFFSET_RESULT     0x178
> > >
> > > #define FD_IRQ_MASK         1
> > > #define FD_MAX_RS_BUF_SIZE  2288788
> > > #define FD_MAX_SPEEDUP      7
> > > #define FD_MAX_RESULT_NUM   1026
> > >
> >
> > I'd suggest the MTK_FD_ prefix.
> >
> Ok, I will use MTK_FD_ prefix.
>
> > > > > diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
> > > > > index 3dcfc61..eae876e 100644
> > > > > --- a/include/uapi/linux/v4l2-controls.h
> > > > > +++ b/include/uapi/linux/v4l2-controls.h
> > > > > @@ -192,6 +192,10 @@ enum v4l2_colorfx {
> > > > >   * We reserve 16 controls for this driver. */
> > > > >  #define V4L2_CID_USER_IMX_BASE                     (V4L2_CID_USER_BASE + 0x10b0)
> > > > >
> > > > > +/* The base for the mediatek FD driver controls */
> > > > > +/* We reserve 16 controls for this driver. */
> > > > > +#define V4L2_CID_USER_MTK_FD_BASE          (V4L2_CID_USER_BASE + 0x10d0)
> > > >
> > > > Why only the base, but not the actual IDs in uapi ?
> > > >
> > > I will put actual IDs in uapi/ for user to reference.

Enrico, any thoughts on the explanation that Jerry provided and
further discussion above?

Thanks,
Tomasz
Jerry-ch Chen Aug. 25, 2019, 9:18 a.m. UTC | #7
Hi Tomasz,

On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote: 
> Hi Jerry,
> 
> On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > From: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > 
> > This patch adds the driver of Face Detection (FD) unit in
> > Mediatek camera system, providing face detection function.
> > 
> > The mtk-isp directory will contain drivers for multiple IP
> > blocks found in Mediatek ISP system. It will include ISP Pass 1
> > driver (CAM), sensor interface driver, DIP driver and face
> > detection driver.
> > 
> > Signed-off-by: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > ---
> >  drivers/media/platform/Makefile               |    2 +
> >  drivers/media/platform/mtk-isp/fd/Makefile    |    5 +
> >  drivers/media/platform/mtk-isp/fd/mtk_fd.h    |  157 +++
> >  drivers/media/platform/mtk-isp/fd/mtk_fd_40.c | 1259 +++++++++++++++++++++++++
> >  include/uapi/linux/v4l2-controls.h            |    4 +
> >  5 files changed, 1427 insertions(+)
> >  create mode 100644 drivers/media/platform/mtk-isp/fd/Makefile
> >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd.h
> >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> >
> 
> Thanks for the patch! I finally got a chance to fully review the code. Sorry
> for the delay. Please check my comments inline.
> 
I appreciate your comments.
I've fixed most of the comments and verifying them,
Sorry for the delay, here is the reply.

> 
> > diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
> > index e6deb25..8b817cc 100644
> > --- a/drivers/media/platform/Makefile
> > +++ b/drivers/media/platform/Makefile
> > @@ -94,6 +94,8 @@ obj-$(CONFIG_VIDEO_MEDIATEK_MDP)	+= mtk-mdp/
> >  
> >  obj-$(CONFIG_VIDEO_MEDIATEK_JPEG)	+= mtk-jpeg/
> >  
> > +obj-$(CONFIG_VIDEO_MEDIATEK_FD)		+= mtk-isp/fd/
> > +
> >  obj-$(CONFIG_VIDEO_QCOM_CAMSS)		+= qcom/camss/
> >  
> >  obj-$(CONFIG_VIDEO_QCOM_VENUS)		+= qcom/venus/
> > diff --git a/drivers/media/platform/mtk-isp/fd/Makefile b/drivers/media/platform/mtk-isp/fd/Makefile
> > new file mode 100644
> > index 0000000..9b1c501
> > --- /dev/null
> > +++ b/drivers/media/platform/mtk-isp/fd/Makefile
> > @@ -0,0 +1,5 @@
> > +# SPDX-License-Identifier: GPL-2.0
> > +
> > +mtk-fd-objs += mtk_fd_40.o
> > +
> > +obj-$(CONFIG_VIDEO_MEDIATEK_FD) += mtk-fd.o
> > \ No newline at end of file
> > diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > new file mode 100644
> > index 0000000..289999b
> > --- /dev/null
> > +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > @@ -0,0 +1,157 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +//
> > +// Copyright (c) 2018 MediaTek Inc.
> > +
> > +#ifndef __MTK_FD_HW_H__
> > +#define __MTK_FD_HW_H__
> > +
> > +#include <linux/io.h>
> > +#include <linux/types.h>
> > +#include <linux/platform_device.h>
> > +#include <media/v4l2-ctrls.h>
> > +#include <media/v4l2-device.h>
> > +#include <media/videobuf2-v4l2.h>
> > +
> > +#define MTK_FD_OUTPUT_MIN_WIDTH			26U
> > +#define MTK_FD_OUTPUT_MIN_HEIGHT		26U
> > +#define MTK_FD_OUTPUT_MAX_WIDTH			640U
> > +#define MTK_FD_OUTPUT_MAX_HEIGHT		480U
> > +
> > +/* Control the user defined image widths and heights
> > + * to be scaled and performed face detection in FD HW.
> > + * MTK FD support up to 14 user defined image sizes to perform face detection.
> > + */
> > +#define V4L2_CID_MTK_FD_SCALE_IMG_WIDTH		(V4L2_CID_USER_MTK_FD_BASE + 1)
> > +#define V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT	(V4L2_CID_USER_MTK_FD_BASE + 2)
> > +
> > +/* Control the numbers of user defined image sizes.
> > + * The default value is 0 which means user is not going
> > + * to define the specific image sizes.
> > + */
> > +#define V4L2_CID_MTK_FD_SCALE_IMG_NUM		(V4L2_CID_USER_MTK_FD_BASE + 3)
> > +
> > +/* Control the Face Pose to be detected.
> > + * Here describe the value as following:
> > + * {0, detect the front face with rotation from 0 to 270 degrees},
> > + * {1, detect the front face with rotation from 0 to 240 and 300 degrees},
> > + * {2, detect the front face with rotation from 0 to 240 and 330 degrees},
> > + * {3, detect the front face with rotation from 0 to 240 and left side face}.
> > + */
> > +#define V4L2_CID_MTK_FD_DETECT_POSE		(V4L2_CID_USER_MTK_FD_BASE + 4)
> > +#define V4L2_CID_MTK_FD_DETECT_SPEEDUP		(V4L2_CID_USER_MTK_FD_BASE + 5)
> > +#define V4L2_CID_MTK_FD_EXTRA_MODEL		(V4L2_CID_USER_MTK_FD_BASE + 6)
> > +
> > +/* We reserve 16 controls for this driver. */
> > +#define V4L2_CID_MTK_FD_MAX			16
> > +
> > +#define ENABLE_FD				0x111
> > +#define FD_HW_ENABLE				0x4
> > +#define FD_INT_EN				0x15c
> > +#define FD_INT					0x168
> > +#define FD_RESULT				0x178
> > +#define FD_IRQ_MASK				0x001
> > +
> > +#define RS_MAX_BUF_SIZE				2288788
> > +#define FD_MAX_SPEEDUP				7
> > +#define FD_MAX_POSE_VAL				0xfffffffffffffff
> > +#define FD_DEF_POSE_VAL				0x3ff
> > +#define MAX_FD_SEL_NUM				1026
> > +
> > +/* The max. number of face sizes could be detected, for feature scaling */
> > +#define FACE_SIZE_NUM_MAX			14
> > +/* FACE_SIZE_NUM_MAX + 1, first scale for input image W/H */
> > +#define FD_SCALE_NUM				15
> > +
> > +enum fd_state {
> > +	FD_ENQ,
> > +	FD_CBD,
> > +};
> > +
> > +enum fd_img_format {
> > +	FMT_VYUY = 2,
> > +	FMT_UYVY,
> > +	FMT_YVYU,
> > +	FMT_YUYV,
> > +	FMT_UNKNOWN
> > +};
> 
> Please use #define macros for hardware/firmware interface definitions.
> 
Ok, I will refine as following
#define MTK_FD_HW_FMT_VYUY    2
#define MTK_FD_HW_FMT_UYVY    3
#define MTK_FD_HW_FMT_YVYU    4
#define MTK_FD_HW_FMT_YUYV    5
#define MTK_FD_HW_FMT_UNKNOWN    6

> > +
> > +struct fd_buffer {
> > +	__u32 scp_addr;	/* used by SCP */
> > +	__u32 dma_addr;	/* used by DMA HW */
> > +} __packed;
> > +
> > +enum fd_scp_cmd {
> > +	FD_CMD_INIT,
> > +	FD_CMD_ENQUEUE,
> > +};
> 
> Ditto.
Ok, I will refine as following 
#define MTK_FD_IPI_CMD_INIT 0
#define MTK_FD_IPI_CMD_ENQUEUE 1 
> 
> > +
> > +struct fd_user_output {
> > +	__u64 results[MAX_FD_SEL_NUM];
> > +	__u16 number;
> > +};
> > +
> > +struct user_param {
> > +	u8 fd_pose;
> > +	u8 fd_speedup;
> > +	u8 fd_extra_model;
> > +	u8 scale_img_num;
> > +	u8 src_img_fmt;
> > +	__u16 scale_img_width[FD_SCALE_NUM];
> > +	__u16 scale_img_height[FD_SCALE_NUM];
> > +} __packed;
> > +
> > +struct fd_hw_param {
> > +	struct fd_buffer src_img;
> > +	struct fd_buffer user_result;
> > +	struct user_param user_param;
> > +} __packed;
> > +
> > +struct cmd_init_info {
> > +	struct fd_buffer fd_manager;
> > +	__u32 rs_dma_addr;
> > +} __packed;
> > +
> > +struct ipi_message {
> > +	u8 cmd_id;
> > +	union {
> > +		struct cmd_init_info init_param;
> > +		struct fd_hw_param hw_param;
> > +	};
> > +} __packed;
> > +
> > +struct mtk_fd_hw {
> > +	struct clk *fd_clk;
> > +	struct rproc *rproc_handle;
> > +	struct platform_device *scp_pdev;
> > +	struct fd_buffer scp_mem;
> > +	wait_queue_head_t wq;
> > +	void __iomem *fd_base;
> > +	atomic_t fd_user_cnt;
> > +	enum fd_state state;
> > +	u32 fd_irq_result;
> > +	/* Ensure only one job in hw */
> > +	struct mutex fd_hw_lock;
> > +};
> > +
> > +struct mtk_fd_dev {
> > +	struct platform_device *pdev;
> > +	struct v4l2_device v4l2_dev;
> > +	struct v4l2_m2m_dev *m2m_dev;
> > +	struct media_device mdev;
> > +	struct video_device vfd;
> > +	struct mtk_fd_hw fd_hw;
> 
> Could we just put the contents of that struct directly here? That should
> simplify dereference chains in the code.
> 
Ok, I will fix it.

> > +	/* Lock for V4L2 operations */
> > +	struct mutex vfd_lock;
> > +};
> > +
> > +struct mtk_fd_ctx {
> > +	struct mtk_fd_dev *fd_dev;
> > +	struct device *dev;
> > +	struct v4l2_fh fh;
> > +	struct v4l2_ctrl_handler hdl;
> > +	struct v4l2_pix_format_mplane src_fmt;
> > +	struct v4l2_meta_format dst_fmt;
> > +	struct user_param user_param;
> > +};
> > +
> > +#endif/*__MTK_FD_HW_H__*/
> > diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c b/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> > new file mode 100644
> > index 0000000..246d3aa
> > --- /dev/null
> > +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> > @@ -0,0 +1,1259 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +//
> > +// Copyright (c) 2018 MediaTek Inc.
> > +
> > +#include <linux/clk.h>
> > +#include <linux/dma-mapping.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/jiffies.h>
> > +#include <linux/module.h>
> > +#include <linux/of_address.h>
> > +#include <linux/of_irq.h>
> > +#include <linux/of_platform.h>
> > +#include <linux/platform_data/mtk_scp.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/pm_runtime.h>
> > +#include <linux/remoteproc.h>
> > +#include <media/videobuf2-dma-contig.h>
> > +#include <media/videobuf2-v4l2.h>
> > +#include <linux/wait.h>
> > +#include <media/v4l2-ioctl.h>
> > +#include <media/v4l2-event.h>
> > +#include <media/v4l2-ctrls.h>
> > +#include <media/v4l2-mem2mem.h>
> > +#include <media/videobuf2-core.h>
> > +
> > +#include "mtk_fd.h"
> > +
> > +static struct v4l2_meta_format fw_param_fmts[] = {
> 
> const?
> 
> Also, isn't this the buffer with the detected faces rather than params?

This struct will be removed. Yes, it's the buffer with detected faces.

> 
> > +	{
> > +		.dataformat = V4L2_META_FMT_MTISP_PARAMS,
> 
> This should use its own format.
Ok, I will define  V4L2_META_FMT_MTFD_RESULT in videodev2.h  

> 
> > +		.buffersize = 1024 * 30,
> 
> Please define a C struct for this buffer and use sizeof() here.
> 
Ok, that will be sizeof(struct fd_user_output);

> > +	},
> > +};
> 
> Actually, is there a reason to have this array at all if there is only 1
> format supported? I think we could just put the right constants directly in
> the code.
> 
I agree, I'll remove this array and use the constants in the code.

> > +
> > +static const struct v4l2_pix_format_mplane in_img_fmts[] = {
> > +	{
> > +		.width = MTK_FD_OUTPUT_MAX_WIDTH,
> > +		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
> > +		.pixelformat = V4L2_PIX_FMT_VYUY,
> > +		.colorspace = V4L2_COLORSPACE_BT2020,
> > +		.field = V4L2_FIELD_NONE,
> > +		.num_planes = 1,
> > +	},
> > +	{
> > +		.width = MTK_FD_OUTPUT_MAX_WIDTH,
> > +		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
> > +		.pixelformat = V4L2_PIX_FMT_YUYV,
> > +		.colorspace = V4L2_COLORSPACE_BT2020,
> > +		.field = V4L2_FIELD_NONE,
> > +		.num_planes = 1,
> > +	},
> > +	{
> > +		.width = MTK_FD_OUTPUT_MAX_WIDTH,
> > +		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
> > +		.pixelformat = V4L2_PIX_FMT_YVYU,
> > +		.colorspace = V4L2_COLORSPACE_BT2020,
> > +		.field = V4L2_FIELD_NONE,
> > +		.num_planes = 1,
> > +	},
> > +	{
> > +		.width = MTK_FD_OUTPUT_MAX_WIDTH,
> > +		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
> > +		.pixelformat = V4L2_PIX_FMT_UYVY,
> > +		.colorspace = V4L2_COLORSPACE_BT2020,
> > +		.field = V4L2_FIELD_NONE,
> > +		.num_planes = 1,
> 
> If all the formats have the same values for a field, there is no reason to
> have the field initialized here. Just make initialize them to the constant
> values inside TRY_FMT.
> 
Ok, I will do so.

> Hmm, are the interleaved YUV formats the only formats supported by this
> hardware? That would be quite unfortunate given the formats supported by the
> other IP blocks on this SoC use the more typical planar formats.
> 
The supported YUV 1 plane formats are listed above.
The rest supported format is YUV 2 plane format, which should be
V4L2_PIX_FMT_NV16M.
For the in_img_fmts[], I will refine as following:

static const struct v4l2_pix_format_mplane mtk_fd_img_fmts[] = {
{
.pixelformat = V4L2_PIX_FMT_VYUY,
.num_planes = 1,
},
{
.pixelformat = V4L2_PIX_FMT_YUYV,
.num_planes = 1,
},
{
.pixelformat = V4L2_PIX_FMT_YVYU,
.num_planes = 1,
},
{
.pixelformat = V4L2_PIX_FMT_UYVY,
.num_planes = 1,
},
{
.pixelformat = V4L2_PIX_FMT_NV16M,
.num_planes = 2,
},
{
.pixelformat = V4L2_PIX_FMT_NV61M,
.num_planes = 2,
},
};

> > +	},
> > +};
> > +
> > +#define NUM_FORMATS ARRAY_SIZE(in_img_fmts)
> > +
> > +static inline struct mtk_fd_dev *mtk_fd_hw_to_dev(struct mtk_fd_hw *fd_hw)
> > +{
> > +	return container_of(fd_hw, struct mtk_fd_dev, fd_hw);
> > +}
> > +
> > +static inline struct mtk_fd_ctx *fh_to_ctx(struct v4l2_fh *fh)
> > +{
> > +	return container_of(fh, struct mtk_fd_ctx, fh);
> > +}
> > +
> > +static int mtk_fd_load_scp(struct mtk_fd_hw *fd_hw)
> > +{
> > +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> > +	struct device *dev = &fd_dev->pdev->dev;
> > +	phandle rproc_phandle;
> > +	int ret;
> > +
> > +	/* init scp */
> > +	fd_hw->scp_pdev = scp_get_pdev(fd_dev->pdev);
> > +	if (!fd_hw->scp_pdev) {
> > +		dev_err(dev, "Failed to get scp device\n");
> > +		return -ENODEV;
> > +	}
> > +
> > +	if (of_property_read_u32(fd_dev->pdev->dev.of_node, "mediatek,scp",
> > +				 &rproc_phandle)) {
> > +		dev_err(dev, "Could not get scp device\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	fd_hw->rproc_handle = rproc_get_by_phandle(rproc_phandle);
> > +	if (!fd_hw->rproc_handle) {
> > +		dev_err(dev, "Could not get FD's rproc_handle\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	ret = rproc_boot(fd_hw->rproc_handle);
> > +	if (ret < 0) {
> > +		/**
> > +		 * Return 0 if downloading firmware successfully,
> > +		 * otherwise it is failed
> > +		 */
> > +		dev_err(dev, "rproc_boot failed\n");
> > +		return -ENODEV;
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static dma_addr_t mtk_fd_hw_alloc_rs_dma_addr(struct mtk_fd_hw *fd_hw)
> > +{
> > +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> > +	struct device *dev = &fd_dev->pdev->dev;
> > +	void *va;
> > +	dma_addr_t dma_handle;
> > +
> > +	va = dma_alloc_coherent(dev, RS_MAX_BUF_SIZE, &dma_handle, GFP_KERNEL);
> 
> I see this allocated here, but I don't see this freed anywhere.
> 
I will fix it in next version.

> > +	if (!va) {
> > +		dev_err(dev, "dma_alloc null va\n");
> > +		return -ENOMEM;
> > +	}
> > +	memset(va, 0, RS_MAX_BUF_SIZE);
> > +
> > +	return dma_handle;
> > +}
> > +
> > +static int mtk_fd_send_ipi_init(struct mtk_fd_hw *fd_hw)
> > +{
> > +	struct ipi_message fd_init_msg;
> > +	dma_addr_t rs_dma_addr;
> > +
> > +	fd_init_msg.cmd_id = FD_CMD_INIT;
> > +
> > +	fd_init_msg.init_param.fd_manager.scp_addr = fd_hw->scp_mem.scp_addr;
> > +	fd_init_msg.init_param.fd_manager.dma_addr = fd_hw->scp_mem.dma_addr;
> > +
> > +	rs_dma_addr = mtk_fd_hw_alloc_rs_dma_addr(fd_hw);
> > +	if (!rs_dma_addr)
> > +		return -ENOMEM;
> > +
> > +	fd_init_msg.init_param.rs_dma_addr = rs_dma_addr;
> > +
> > +	return scp_ipi_send(fd_hw->scp_pdev, SCP_IPI_FD_CMD, &fd_init_msg,
> > +			    sizeof(fd_init_msg), 0);
> > +}
> > +
> > +static int mtk_fd_hw_enable(struct mtk_fd_hw *fd_hw)
> > +{
> > +	int ret;
> > +
> > +	ret = mtk_fd_load_scp(fd_hw);
> > +	if (ret)
> > +		return ret;
> > +
> > +	ret = mtk_fd_send_ipi_init(fd_hw);
> > +	if (ret)
> > +		return ret;
> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_hw_connect(struct mtk_fd_hw *fd_hw)
> > +{
> > +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> > +	s32 usercount;
> > +
> > +	mutex_lock(&fd_hw->fd_hw_lock);
> 
> First of all, we don't need a mutex here, because all the V4L2 ioctls are
> already running with the video device mutex.
Ok, I will remove the using of the mutex here.

> 
> > +	usercount = atomic_inc_return(&fd_hw->fd_user_cnt);
> 
> If we already use a mutex, there is no point in using atomic types.
> 
Ok, I will replace the atomic types with normal types

> > +	if (usercount == 1) {
> > +		pm_runtime_get_sync(&fd_dev->pdev->dev);
> > +		if (mtk_fd_hw_enable(fd_hw)) {
> > +			pm_runtime_put_sync(&fd_dev->pdev->dev);
> > +			atomic_dec_return(&fd_hw->fd_user_cnt);
> > +			mutex_unlock(&fd_hw->fd_hw_lock);
> > +			return -EINVAL;
> > +		}
> > +	}
> 
> This is a simple mem-to-mem device, so there is no reason to keep it active
> all the time it's streaming. Please just get the runtime PM counter when
> queuing a job to the hardware and release it when the job finishes.
> 
> I guess we might still want to do the costly operations like rproc_boot()
> when we start streaming, though.
> 
Do you mean by moving the pm_runtime_get/put stuff to the job execution
and job finish place?
the rproc_boot() operation will be done here.

> > +	mutex_unlock(&fd_hw->fd_hw_lock);
> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_wait_irq(struct mtk_fd_hw *fd_hw)
> > +{
> > +	int timeout;
> > +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> > +	struct device *dev = &fd_dev->pdev->dev;
> > +
> > +	timeout = wait_event_interruptible_timeout
> > +		(fd_hw->wq, (fd_hw->fd_irq_result & FD_IRQ_MASK),
> > +		 usecs_to_jiffies(1000000));
> > +	if (!timeout) {
> > +		dev_err(dev, "%s timeout, %d\n", __func__,
> > +			fd_hw->fd_irq_result);
> > +		return -EAGAIN;
> > +	} else if (!(fd_hw->fd_irq_result & FD_IRQ_MASK)) {
> > +		dev_err(dev, "%s No IRQ mask:0x%8x\n",
> > +			__func__, fd_hw->fd_irq_result);
> > +		return -EINVAL;
> > +	}
> > +	fd_hw->fd_irq_result = 0;
> > +
> > +	return 0;
> > +}
> > +
> > +static void mtk_fd_hw_disconnect(struct mtk_fd_hw *fd_hw)
> > +{
> > +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> > +	s32 usercount;
> > +
> > +	mutex_lock(&fd_hw->fd_hw_lock);
> > +	atomic_dec_return(&fd_hw->fd_user_cnt);
> > +	usercount = atomic_read(&fd_hw->fd_user_cnt);
> > +	if (usercount == 0) {
> > +		if (fd_hw->state == FD_ENQ)
> > +			mtk_fd_wait_irq(fd_hw);
> 
> This shouldn't be possible to happen as the queues should be already stopped
> at this point.
> 
Ok, I will remove it. 
> > +
> > +		pm_runtime_put_sync(&fd_dev->pdev->dev);
> 
> Any reason to use pm_runtime_put_sync() over pm_runtime_put()?
> 
No special reason to do so, the pm_runtime_put_sync here will be moved
to the place each job finished.

> > +		rproc_shutdown(fd_hw->rproc_handle);
> > +		rproc_put(fd_hw->rproc_handle);
> > +	}
> > +	mutex_unlock(&fd_hw->fd_hw_lock);
> > +}
> > +
> > +static void mtk_fd_hw_job_finish(struct mtk_fd_hw *fd_hw,
> > +				 struct fd_hw_param *fd_param,
> > +				 enum vb2_buffer_state vb_state)
> > +{
> > +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> > +	struct mtk_fd_ctx *ctx;
> > +	struct device *dev = &fd_dev->pdev->dev;
> > +	struct vb2_buffer *src_vb, *dst_vb;
> > +	struct vb2_v4l2_buffer *src_vbuf = NULL, *dst_vbuf = NULL;
> > +
> > +	ctx = v4l2_m2m_get_curr_priv(fd_dev->m2m_dev);
> > +	if (!ctx) {
> > +		dev_err(dev, "Instance released before end of transaction\n");
> > +		return;
> > +	}
> 
> This shouldn't be possible. I suspect you're seeing this because
> .stop_streaming is not implemented correctly. See my comment there.
> 
Ok, I will remove this. 
> > +
> > +	src_vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> > +	if (WARN_ON(!src_vb))
> > +		return;
> 
> This shouldn't be possible.
> 
Ditto. 
> > +	src_vbuf = to_vb2_v4l2_buffer(src_vb);
> > +
> > +	dst_vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> > +	if (WARN_ON(!dst_vb))
> > +		return;
> 
> Ditto.
> 
Ditto. 
> > +	dst_vbuf = to_vb2_v4l2_buffer(dst_vb);
> > +
> > +	dst_vbuf->vb2_buf.timestamp = src_vbuf->vb2_buf.timestamp;
> > +	dst_vbuf->timecode = src_vbuf->timecode;
> > +	dst_vbuf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
> > +	dst_vbuf->flags |= src_vbuf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
> 
> We should be able to just use v4l2_m2m_buf_copy_metadata().
> 
Ok, I will refine as:
v4l2_m2m_buf_copy_metadata(src_vbuf, dst_vbuf,
   V4L2_BUF_FLAG_TSTAMP_SRC_MASK); 
> > +
> > +	v4l2_m2m_buf_done(src_vbuf, vb_state);
> > +	v4l2_m2m_buf_done(dst_vbuf, vb_state);
> > +	v4l2_m2m_job_finish(fd_dev->m2m_dev, ctx->fh.m2m_ctx);
> > +}
> > +
> > +static int mtk_fd_hw_job_exec(struct mtk_fd_hw *fd_hw,
> > +			      struct fd_hw_param *fd_param,
> > +			      void *output_vaddr)
> > +{
> > +	struct fd_user_output *fd_output;
> > +	struct ipi_message fd_ipi_msg;
> > +	int ret;
> > +	u32 num;
> > +
> > +	if (fd_param->user_param.src_img_fmt == FMT_UNKNOWN)
> > +		goto param_err;
> 
> Is this possible?
> 
Only if user set wrong format, I will remove this.

> > +
> > +	mutex_lock(&fd_hw->fd_hw_lock);
> > +	fd_hw->state = FD_ENQ;
> 
> What is this state for?
> 
It was for checking status, if a job is processing, the state is
FD_ENQ, 
then we should wait for the last job to be done when pm_suspend().

> > +	fd_ipi_msg.cmd_id = FD_CMD_ENQUEUE;
> > +	memcpy(&fd_ipi_msg.hw_param, fd_param, sizeof(fd_ipi_msg.hw_param));
> > +	ret = scp_ipi_send(fd_hw->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
> > +			   sizeof(fd_ipi_msg), 0);
> > +	if (ret)
> > +		goto buf_err;
> > +
> > +	ret = mtk_fd_wait_irq(fd_hw);
> > +	if (ret)
> > +		goto buf_err;
> 
> This function is called from device_run and it shouldn't be synchronous. It
> should only queue the job to the hardware to be handled asynchronously when
> it finishes.
> 
Ok, I will fix it.

> > +
> > +	num = readl(fd_hw->fd_base + FD_RESULT);
> > +	/* Disable FD ISR */
> > +	writel(0x0, fd_hw->fd_base + FD_INT_EN);
> > +
> > +	fd_output = (struct fd_user_output *)output_vaddr;
> > +	fd_output->number = num;
> > +	fd_hw->state = FD_CBD;
> > +	mutex_unlock(&fd_hw->fd_hw_lock);
> > +
> > +	mtk_fd_hw_job_finish(fd_hw, fd_param, VB2_BUF_STATE_DONE);
> > +	return 0;
> > +
> > +buf_err:
> > +	mutex_unlock(&fd_hw->fd_hw_lock);
> > +param_err:
> > +	mtk_fd_hw_job_finish(fd_hw, fd_param, VB2_BUF_STATE_ERROR);
> > +	return ret;
> > +}
> > +
> > +static int mtk_fd_vb2_buf_out_validate(struct vb2_buffer *vb)
> > +{
> > +	struct vb2_v4l2_buffer *v4l2_buf = to_vb2_v4l2_buffer(vb);
> > +
> > +	v4l2_buf->field = V4L2_FIELD_NONE;
> 
> We need to fail with -EINVAL if v4l2_buf->field was different than
> V4L2_FIELD_ANY or _NONE.
> 

Ok, I will refine as following:

if (v4l2_buf->field == V4L2_FIELD_ANY)
v4l2_buf->field = V4L2_FIELD_NONE;
if (v4l2_buf->field != V4L2_FIELD_NONE)
return -EINVAL;

> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_vb2_buf_prepare(struct vb2_buffer *vb)
> > +{
> > +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> > +	struct vb2_queue *vq = vb->vb2_queue;
> > +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > +	struct device *dev = ctx->dev;
> > +	struct v4l2_pix_format_mplane *pixfmt;
> > +
> > +	switch (vq->type) {
> > +	case V4L2_BUF_TYPE_META_CAPTURE:
> > +		if (vb2_plane_size(vb, 0) < ctx->dst_fmt.buffersize) {
> > +			dev_err(dev, "meta size %d is too small\n");
> 
> Please make this dev_dbg(), because userspace misbehavior must not trigger
> kernel error logs.
> 

Ok, fixed.

> > +			return -EINVAL;
> > +		}
> > +		break;
> > +	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > +		pixfmt = &ctx->src_fmt;
> > +
> > +		if (vbuf->field == V4L2_FIELD_ANY)
> > +			vbuf->field = V4L2_FIELD_NONE;
> > +
> > +		if (vb->num_planes != 1 || vbuf->field != V4L2_FIELD_NONE) {
> > +			dev_err(dev, "plane or field %d not supported\n",
> > +				vb->num_planes, vbuf->field);
> 
> Ditto.
> 

Done.

> > +			return -EINVAL;
> > +		}
> > +		if (vb2_plane_size(vb, 0) < pixfmt->plane_fmt[0].sizeimage) {
> > +			dev_err(dev, "plane %d is too small\n");
> 
> Ditto.
> 

Done.

> > +			return -EINVAL;
> > +		}
> > +		break;
> > +	default:
> > +		dev_err(dev, "invalid queue type: %d\n", vq->type);
> > +		return -EINVAL;
> 
> We shouldn't need to handle this.
> 

Ok, I will remove this.

> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void mtk_fd_vb2_buf_queue(struct vb2_buffer *vb)
> > +{
> > +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> > +	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
> > +
> > +	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
> > +}
> > +
> > +static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > +				  unsigned int *num_buffers,
> > +				  unsigned int *num_planes,
> > +				  unsigned int sizes[],
> > +				  struct device *alloc_devs[])
> > +{
> > +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > +	struct device *dev = ctx->dev;
> > +	unsigned int size;
> > +
> > +	switch (vq->type) {
> > +	case V4L2_BUF_TYPE_META_CAPTURE:
> > +		size = ctx->dst_fmt.buffersize;
> > +		break;
> > +	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > +		size = ctx->src_fmt.plane_fmt[0].sizeimage;
> > +		break;
> > +	default:
> > +		dev_err(dev, "invalid queue type: %d\n", vq->type);
> 
> We should need to handle this.
> 
Do you mean by giving a size instead of return -EINVAL?

> > +		return -EINVAL;
> > +	}
> > +
> > +	if (!*num_planes) {
> > +		*num_planes = 1;
> > +		sizes[0] = size;
> > +	}
> 
> We need to handle the case when *num_planes is non-zero. We then need to
> check if it's 1 and *sizes >= size and fail with -EINVAL if not.
> 
Ok, I will refine as following:
if (*num_planes) {
if (sizes[0] < size || *num_planes != 1)
return -EINVAL;
} else {
*num_planes = 1;
sizes[0] = size;
} 
> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_vb2_start_streaming(struct vb2_queue *vq, unsigned int count)
> > +{
> > +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > +
> > +	return mtk_fd_hw_connect(&ctx->fd_dev->fd_hw);
> 
> This would be called twice for every context, once for the VIDEO_OUTPUT
> queue and once for the META_CAPTURE queue. Perhaps it would make sense to
> just do this for the VIDEO_OUTPUT queue?
> 
Ok, I will refine as:
if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
return mtk_fd_hw_connect(&ctx->fd_dev->fd_hw);
else
return 0;

> > +}
> > +
> > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > +{
> > +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > +	struct vb2_buffer *vb;
> 
> How do we guarantee here that the hardware isn't still accessing the buffers
> removed below?
> 
Maybe we can check the driver state flag and aborting the unfinished
jobs?
(fd_hw->state == FD_ENQ)

> > +
> > +	if (V4L2_TYPE_IS_OUTPUT(vq->type))
> > +		vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> > +	else
> > +		vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> > +
> > +	while (vb) {
> > +		v4l2_m2m_buf_done(to_vb2_v4l2_buffer(vb), VB2_BUF_STATE_ERROR);
> > +		if (V4L2_TYPE_IS_OUTPUT(vq->type))
> > +			vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> > +		else
> > +			vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> > +	}
> 
> We can use v4l2_m2m_buf_remove(). Also we can move the call into the loop
> condition:
> 
> while ((vb == v4l2_m2m_buf_remove(...)))
> 	v4l2_m2m_buf_done(...);
> 
Ok, I will refine as following:

while ((vb = v4l2_m2m_buf_remove(V4L2_TYPE_IS_OUTPUT(vq->type)?
  &m2m_ctx->out_q_ctx :
  &m2m_ctx->cap_q_ctx)))
v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);

> > +
> > +	mtk_fd_hw_disconnect(&ctx->fd_dev->fd_hw);
> 
> This should also probably be done only for 1 queue. Since the VIDEO_OUTPUT
> queue supports requestes, I'd consider it the master one.
> 
Ok, only for output queue to trigger disconnect.

if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
mtk_fd_hw_disconnect(&ctx->fd_dev->fd_hw); 
> > +}
> > +
> > +static void mtk_fd_vb2_request_complete(struct vb2_buffer *vb)
> > +{
> > +	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> > +
> > +	v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
> > +}
> > +
> > +static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> > +				  const struct v4l2_pix_format_mplane *sfmt)
> > +{
> > +	dfmt->width = sfmt->width;
> > +	dfmt->height = sfmt->height;
> > +	dfmt->pixelformat = sfmt->pixelformat;
> > +	dfmt->field = sfmt->field;
> > +	dfmt->colorspace = sfmt->colorspace;
> > +	dfmt->num_planes = sfmt->num_planes;
> > +
> > +	/* Use default */
> > +	dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> > +	dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> > +	dfmt->xfer_func =
> > +		V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> > +	dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> > +	dfmt->plane_fmt[0].sizeimage =
> > +		dfmt->height * dfmt->plane_fmt[0].bytesperline;
> > +	memset(dfmt->reserved, 0, sizeof(dfmt->reserved));
> > +}
> 
> Could we unify this function with mtk_fd_m2m_try_fmt_out_mp()? That function
> should be almost directly reusable for the default format initialization +/-
> the arguments passed to it.
> 
Ok, I will try to reuse it as following:

static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
  const struct v4l2_pix_format_mplane *sfmt)
{
dfmt->field = V4L2_FIELD_NONE;
dfmt->colorspace = V4L2_COLORSPACE_BT2020;
dfmt->num_planes = sfmt->num_planes;
dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
dfmt->xfer_func =
V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);

/* Keep user setting as possible */
dfmt->width = clamp(dfmt->width,
    MTK_FD_OUTPUT_MIN_WIDTH,
    MTK_FD_OUTPUT_MAX_WIDTH);
dfmt->height = clamp(dfmt->height,
     MTK_FD_OUTPUT_MIN_HEIGHT,
     MTK_FD_OUTPUT_MAX_HEIGHT);

if (sfmt->num_planes == 2) {
/* NV16M and NV61M has 1 byte per pixel */
dfmt->plane_fmt[0].bytesperline = dfmt->width;
dfmt->plane_fmt[1].bytesperline = dfmt->width;
} else {
/* 2 bytes per pixel */
dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
}

dfmt->plane_fmt[0].sizeimage =
dfmt->height * dfmt->plane_fmt[0].bytesperline;
}

> > +
> > +static const struct v4l2_pix_format_mplane *mtk_fd_find_fmt(u32 format)
> > +{
> > +	unsigned int i;
> > +	const struct v4l2_pix_format_mplane *dev_fmt;
> > +
> > +	for (i = 0; i < NUM_FORMATS; i++) {
> > +		dev_fmt = &in_img_fmts[i];
> > +		if (dev_fmt->pixelformat == format)
> > +			return dev_fmt;
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> > +static int mtk_fd_m2m_querycap(struct file *file, void *fh,
> > +			       struct v4l2_capability *cap)
> > +{
> > +	struct mtk_fd_dev *fd_dev = video_drvdata(file);
> > +	struct device *dev = &fd_dev->pdev->dev;
> > +
> > +	strscpy(cap->driver, dev->driver->name, sizeof(cap->driver));
> 
> Please use dev_driver_string().
> 
Done,

> > +	strscpy(cap->card, fd_dev->vfd.name, sizeof(cap->card));
> 
> I think we can also make this dev_driver_string().
> 
Done.

> > +	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s",
> > +		 dev_name(&fd_dev->pdev->dev));
> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_m2m_enum_fmt_out_mp(struct file *file, void *fh,
> > +				      struct v4l2_fmtdesc *f)
> > +{
> > +	int i;
> > +
> > +	for (i = 0; i < NUM_FORMATS; ++i) {
> > +		if (i == f->index) {
> > +			f->pixelformat = in_img_fmts[i].pixelformat;
> > +			return 0;
> > +		}
> > +	}
> 
> Why don't we just check if f->index is within the [0, ARRAY_SIZE()-1] bounds
> and then just use it to index the array directly?
> 
I will refine as :

static int mtk_fd_m2m_enum_fmt_out_mp(struct file *file, void *fh,
      struct v4l2_fmtdesc *f)
{
if ((f->index >= 0) && (f->index < NUM_FORMATS)) {
f->pixelformat = in_img_fmts[f->index].pixelformat;
return 0;
}

return -EINVAL;
} 
> > +
> > +	return -EINVAL;
> > +}
> > +
> > +static int mtk_fd_m2m_try_fmt_out_mp(struct file *file,
> > +				     void *fh,
> > +				     struct v4l2_format *f)
> 
> I think we could just shorten the function prefixes to "mtk_fd_".
> 
Do you mean by replace mtk_fd_m2m_* with mtk_fd_ ?

> > +{
> > +	struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
> > +	const struct v4l2_pix_format_mplane *fmt;
> > +
> > +	fmt = mtk_fd_find_fmt(pix_mp->pixelformat);
> > +	if (!fmt) {
> > +		/* Get default img fmt */
> > +		fmt = &in_img_fmts[0];
> > +		f->fmt.pix.pixelformat = fmt->pixelformat;
> > +	}
> > +
> > +	/* Use default */
> > +	pix_mp->field = fmt->field;
> > +	pix_mp->colorspace = fmt->colorspace;
> > +	pix_mp->num_planes = fmt->num_planes;
> > +	pix_mp->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> > +	pix_mp->quantization = V4L2_QUANTIZATION_DEFAULT;
> > +	pix_mp->xfer_func =
> > +		V4L2_MAP_XFER_FUNC_DEFAULT(pix_mp->colorspace);
> > +
> > +	/* Keep user setting as possible */
> > +	pix_mp->width = clamp(pix_mp->width,
> > +			      MTK_FD_OUTPUT_MIN_WIDTH,
> > +			      MTK_FD_OUTPUT_MAX_WIDTH);
> > +	pix_mp->height = clamp(pix_mp->height,
> > +			       MTK_FD_OUTPUT_MIN_HEIGHT,
> > +			       MTK_FD_OUTPUT_MAX_HEIGHT);
> > +
> > +	pix_mp->plane_fmt[0].bytesperline = pix_mp->width * 2;
> 
> Please add a comment explaining why this is always 2.
> 
Done.
/* 2 bytes per pixel */ 
> > +	pix_mp->plane_fmt[0].sizeimage =
> > +		pix_mp->plane_fmt[0].bytesperline * pix_mp->height;
> > +	memset(pix_mp->plane_fmt[0].reserved, 0,
> > +	       sizeof(pix_mp->plane_fmt[0].reserved));
> 
> No need to zero this here. The core will handle it.
> 
Ok, I will remove it.

> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_m2m_g_fmt_out_mp(struct file *file, void *fh,
> > +				   struct v4l2_format *f)
> > +{
> > +	struct mtk_fd_ctx *ctx = fh_to_ctx(fh);
> > +
> > +	f->fmt.pix_mp = ctx->src_fmt;
> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_m2m_s_fmt_out_mp(struct file *file, void *fh,
> > +				   struct v4l2_format *f)
> > +{
> > +	struct mtk_fd_ctx *ctx = fh_to_ctx(fh);
> > +	struct vb2_queue *vq;
> > +
> > +	/* Change not allowed if queue is streaming. */
> > +	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
> > +	if (vb2_is_streaming(vq) || vb2_is_busy(vq)) {
> 
> vb2_is_busy() is a superset of vb2_is_streaming(), so it's enough to just
> check the former.
> 
Ok, I will just check the former.

> > +		dev_dbg(ctx->dev, "vb2_is_streaming or vb2_is_busy");
> > +		return -EBUSY;
> > +	}
> > +
> > +	mtk_fd_m2m_try_fmt_out_mp(file, fh, f);
> > +	ctx->src_fmt = f->fmt.pix_mp;
> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_m2m_enum_fmt_meta_cap(struct file *file, void *fh,
> > +					struct v4l2_fmtdesc *f)
> > +{
> > +	if (f->index)
> > +		return -EINVAL;
> > +
> > +	strscpy(f->description, "Face detection result",
> > +		sizeof(f->description));
> > +	f->pixelformat = fw_param_fmts[0].dataformat;
> > +	f->flags = 0;
> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_m2m_g_fmt_meta_cap(struct file *file, void *fh,
> > +				     struct v4l2_format *f)
> > +{
> > +	f->fmt.meta.dataformat = fw_param_fmts[0].dataformat;
> > +	f->fmt.meta.buffersize = fw_param_fmts[0].buffersize;
> > +
> > +	return 0;
> > +}
> > +
> > +static const struct vb2_ops mtk_fd_vb2_ops = {
> > +	.queue_setup = mtk_fd_vb2_queue_setup,
> > +	.buf_out_validate = mtk_fd_vb2_buf_out_validate,
> > +	.buf_prepare  = mtk_fd_vb2_buf_prepare,
> > +	.buf_queue = mtk_fd_vb2_buf_queue,
> > +	.start_streaming = mtk_fd_vb2_start_streaming,
> > +	.stop_streaming = mtk_fd_vb2_stop_streaming,
> > +	.wait_prepare = vb2_ops_wait_prepare,
> > +	.wait_finish = vb2_ops_wait_finish,
> > +	.buf_request_complete = mtk_fd_vb2_request_complete,
> > +};
> > +
> > +static const struct v4l2_ioctl_ops mtk_fd_v4l2_video_out_ioctl_ops = {
> > +	.vidioc_querycap = mtk_fd_m2m_querycap,
> > +	.vidioc_enum_fmt_vid_out_mplane = mtk_fd_m2m_enum_fmt_out_mp,
> > +	.vidioc_g_fmt_vid_out_mplane = mtk_fd_m2m_g_fmt_out_mp,
> > +	.vidioc_s_fmt_vid_out_mplane = mtk_fd_m2m_s_fmt_out_mp,
> > +	.vidioc_try_fmt_vid_out_mplane = mtk_fd_m2m_try_fmt_out_mp,
> > +	.vidioc_enum_fmt_meta_cap = mtk_fd_m2m_enum_fmt_meta_cap,
> > +	.vidioc_g_fmt_meta_cap = mtk_fd_m2m_g_fmt_meta_cap,
> > +	.vidioc_s_fmt_meta_cap = mtk_fd_m2m_g_fmt_meta_cap,
> > +	.vidioc_try_fmt_meta_cap = mtk_fd_m2m_g_fmt_meta_cap,
> > +	.vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
> > +	.vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs,
> > +	.vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
> > +	.vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
> > +	.vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
> > +	.vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
> > +	.vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
> > +	.vidioc_streamon = v4l2_m2m_ioctl_streamon,
> > +	.vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
> > +	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
> > +	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
> > +};
> > +
> > +static int
> > +mtk_fd_queue_init(void *priv, struct vb2_queue *src_vq,
> > +		  struct vb2_queue *dst_vq)
> > +{
> > +	struct mtk_fd_ctx *ctx = priv;
> > +	int ret;
> > +
> > +	src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
> > +	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> > +	src_vq->supports_requests = true;
> > +	src_vq->drv_priv = ctx;
> > +	src_vq->ops = &mtk_fd_vb2_ops;
> > +	src_vq->mem_ops = &vb2_dma_contig_memops;
> > +	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> > +	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > +	src_vq->lock = &ctx->fd_dev->vfd_lock;
> > +	src_vq->dev = ctx->fd_dev->v4l2_dev.dev;
> > +
> > +	ret = vb2_queue_init(src_vq);
> > +	if (ret)
> > +		return ret;
> > +
> > +	dst_vq->type = V4L2_BUF_TYPE_META_CAPTURE;
> > +	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
> > +	dst_vq->drv_priv = ctx;
> > +	dst_vq->ops = &mtk_fd_vb2_ops;
> > +	dst_vq->mem_ops = &vb2_dma_contig_memops;
> > +	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
> > +	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > +	dst_vq->lock = &ctx->fd_dev->vfd_lock;
> > +	dst_vq->dev = ctx->fd_dev->v4l2_dev.dev;
> > +
> > +	return vb2_queue_init(dst_vq);
> > +}
> > +
> > +static int mtk_fd_dev_g_ctrl(struct v4l2_ctrl *ctrl)
> > +{
> > +	struct mtk_fd_ctx *ctx = ctrl->priv;
> > +	int i;
> > +
> > +	switch (ctrl->id) {
> > +	case V4L2_CID_MTK_FD_SCALE_IMG_WIDTH:
> > +		for (i = 0; i < ctrl->elems; i++)
> > +			ctrl->p_new.p_u16[i] =
> > +				ctx->user_param.scale_img_width[i];
> > +		break;
> > +	case V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT:
> > +		for (i = 0; i < ctrl->elems; i++)
> > +			ctrl->p_new.p_u16[i] =
> > +				ctx->user_param.scale_img_height[i];
> > +		break;
> > +	case V4L2_CID_MTK_FD_DETECT_POSE:
> > +		ctrl->val = ctx->user_param.fd_pose;
> > +		break;
> > +	case V4L2_CID_MTK_FD_DETECT_SPEEDUP:
> > +		ctrl->val = ctx->user_param.fd_speedup;
> > +		break;
> > +	case V4L2_CID_MTK_FD_SCALE_IMG_NUM:
> > +		ctrl->val = ctx->user_param.scale_img_num;
> > +		break;
> > +	case V4L2_CID_MTK_FD_EXTRA_MODEL:
> > +		ctrl->val = ctx->user_param.fd_extra_model;
> > +		break;
> > +	default:
> > +		dev_dbg(ctx->dev, "%s: unexpected control: 0x%x:%d",
> > +			__func__, ctrl->id, ctrl->val);
> > +		return -EINVAL;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_dev_s_ctrl(struct v4l2_ctrl *ctrl)
> > +{
> > +	struct mtk_fd_ctx *ctx = ctrl->priv;
> > +	int i;
> > +
> > +	switch (ctrl->id) {
> > +	case V4L2_CID_MTK_FD_SCALE_IMG_WIDTH:
> > +		for (i = 0; i < ctrl->elems; i++)
> > +			ctx->user_param.scale_img_width[i] =
> > +				ctrl->p_new.p_u16[i];
> > +		break;
> > +	case V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT:
> > +		for (i = 0; i < ctrl->elems; i++)
> > +			ctx->user_param.scale_img_height[i] =
> > +				ctrl->p_new.p_u16[i];
> > +		break;
> > +	case V4L2_CID_MTK_FD_DETECT_POSE:
> > +		ctx->user_param.fd_pose = ctrl->val;
> > +		break;
> > +	case V4L2_CID_MTK_FD_DETECT_SPEEDUP:
> > +		ctx->user_param.fd_speedup = ctrl->val;
> > +		break;
> > +	case V4L2_CID_MTK_FD_SCALE_IMG_NUM:
> > +		ctx->user_param.scale_img_num = ctrl->val;
> > +		break;
> > +	case V4L2_CID_MTK_FD_EXTRA_MODEL:
> > +		ctx->user_param.fd_extra_model = ctrl->val;
> > +		break;
> > +	default:
> > +		dev_dbg(ctx->dev, "%s: unexpected control: 0x%x:%d",
> > +			__func__, ctrl->id, ctrl->val);
> > +		return -EINVAL;
> 
> No need to handle this, as the framework will only pass the controls we
> registered earlier to this function.
> 
Ok, I will remove this .

> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static const struct v4l2_ctrl_ops mtk_fd_dev_ctrl_ops = {
> > +	.g_volatile_ctrl = mtk_fd_dev_g_ctrl,
> 
> I don't see any volatile controls below. No need to implement this callback.
> 
Ok, I will remove this .

> > +	.s_ctrl = mtk_fd_dev_s_ctrl,
> 
> I think you might not even need to implement this function (or just provide
> a dummy one that returns 0 if its required) if you just use the controls
> directly when preparing the job for the hardware. Check v4l2_ctrl_find().
> 
I will remove the mtk_fd_dev_ctrl_ops, and use the following function to
get control values when preparing the job for HW,

static void mtk_fd_fill_user_param(struct user_param *user_param,
   struct v4l2_ctrl_handler *hdl)
{
struct v4l2_ctrl *ctrl;
int i;
ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_IMG_WIDTH);
if (ctrl)
for (i = 0; i < ctrl->elems; i++)
user_param->scale_img_width[i] = ctrl->p_new.p_u16[i];

ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT);
if (ctrl)
for (i = 0; i < ctrl->elems; i++)
user_param->scale_img_height[i] = ctrl->p_new.p_u16[i];

ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECT_POSE);
if (ctrl)
user_param->fd_pose = ctrl->val;
pr_info("pose:%d\n", user_param->fd_pose);
ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_DETECT_SPEEDUP);
if (ctrl)
user_param->fd_speedup = ctrl->val;

ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_SCALE_IMG_NUM);
if (ctrl)
user_param->scale_img_num = ctrl->val;

ctrl = v4l2_ctrl_find(hdl, V4L2_CID_MTK_FD_EXTRA_MODEL);
if (ctrl)
user_param->fd_extra_model = ctrl->val;
}


> > +};
> > +
> > +struct v4l2_ctrl_config mtk_fd_controls[] = {
> > +	{
> > +	.ops = &mtk_fd_dev_ctrl_ops,
> > +	.id = V4L2_CID_MTK_FD_SCALE_IMG_WIDTH,
> > +	.name = "FD scale image widths",
> > +	.type = V4L2_CTRL_TYPE_U16,
> > +	.min = MTK_FD_OUTPUT_MIN_WIDTH,
> > +	.max = MTK_FD_OUTPUT_MAX_WIDTH,
> > +	.step = 1,
> > +	.def = MTK_FD_OUTPUT_MAX_WIDTH,
> > +	.dims = { FD_SCALE_NUM },
> 
> Something wrong with indentation here.
> 
Done.

> > +	},
> > +	{
> > +	.ops = &mtk_fd_dev_ctrl_ops,
> > +	.id = V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT,
> > +	.name = "FD scale image heights",
> > +	.type = V4L2_CTRL_TYPE_U16,
> > +	.min = MTK_FD_OUTPUT_MIN_HEIGHT,
> > +	.max = MTK_FD_OUTPUT_MAX_HEIGHT,
> > +	.step = 1,
> > +	.def = MTK_FD_OUTPUT_MAX_HEIGHT,
> > +	.dims = { FD_SCALE_NUM },
> > +	},
> > +	{
> > +	.ops = &mtk_fd_dev_ctrl_ops,
> > +	.id = V4L2_CID_MTK_FD_SCALE_IMG_NUM,
> > +	.name = "FD scale size counts",
> > +	.type = V4L2_CTRL_TYPE_INTEGER,
> > +	.min = 0,
> > +	.max = FACE_SIZE_NUM_MAX,
> > +	.step = 1,
> > +	.def = 0,
> > +	},
> > +	{
> > +	.ops = &mtk_fd_dev_ctrl_ops,
> > +	.id = V4L2_CID_MTK_FD_DETECT_POSE,
> > +	.name = "FD detect face pose",
> > +	.type = V4L2_CTRL_TYPE_INTEGER,
> > +	.min = 0,
> > +	.max = 3,
> > +	.step = 1,
> > +	.def = 0,
> > +	},
> > +	{
> > +	.ops = &mtk_fd_dev_ctrl_ops,
> > +	.id = V4L2_CID_MTK_FD_DETECT_SPEEDUP,
> > +	.name = "FD detection speedup",
> > +	.type = V4L2_CTRL_TYPE_INTEGER,
> > +	.min = 0,
> > +	.max = FD_MAX_SPEEDUP,
> > +	.step = 1,
> > +	.def = 0,
> > +	},
> > +	{
> > +	.ops = &mtk_fd_dev_ctrl_ops,
> > +	.id = V4L2_CID_MTK_FD_EXTRA_MODEL,
> > +	.name = "FD use extra model",
> > +	.type = V4L2_CTRL_TYPE_BOOLEAN,
> > +	.min = 0,
> > +	.max = 1,
> > +	.step = 1,
> > +	.def = 0,
> > +	},
> > +};
> > +
> > +static int mtk_fd_ctrls_setup(struct mtk_fd_ctx *ctx)
> > +{
> > +	struct v4l2_ctrl_handler *hdl = &ctx->hdl;
> > +	struct v4l2_ctrl *ctl;
> > +	int i;
> > +
> > +	v4l2_ctrl_handler_init(hdl, V4L2_CID_MTK_FD_MAX);
> > +	if (hdl->error)
> > +		return hdl->error;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(mtk_fd_controls); i++) {
> > +		ctl = v4l2_ctrl_new_custom(hdl, &mtk_fd_controls[i], ctx);
> > +		if (hdl->error) {
> > +			v4l2_ctrl_handler_free(hdl);
> > +			dev_err(ctx->dev, "Failed to register controls:%d", i);
> > +			return hdl->error;
> > +		}
> > +	}
> > +
> > +	ctx->fh.ctrl_handler = &ctx->hdl;
> > +	v4l2_ctrl_handler_setup(hdl);
> > +
> > +	return 0;
> > +}
> > +
> > +static unsigned int get_fd_img_fmt(unsigned int fourcc)
> > +{
> > +	switch (fourcc) {
> > +	case V4L2_PIX_FMT_VYUY:
> > +		return FMT_VYUY;
> > +	case V4L2_PIX_FMT_YUYV:
> > +		return FMT_YUYV;
> > +	case V4L2_PIX_FMT_YVYU:
> > +		return FMT_YVYU;
> > +	case V4L2_PIX_FMT_UYVY:
> > +		return FMT_UYVY;
> > +	default:
> > +		return FMT_UNKNOWN;
> > +	}
> > +}
> > +
> > +static void init_ctx_fmt(struct mtk_fd_ctx *ctx)
> > +{
> > +	const struct v4l2_pix_format_mplane *fmt;
> > +
> > +	/* Initialize M2M source fmt */
> > +	fmt = &in_img_fmts[0];
> > +	mtk_fd_fill_pixfmt_mp(&ctx->src_fmt, fmt);
> > +
> > +	/* Initialize M2M destination fmt */
> > +	ctx->dst_fmt.buffersize = fw_param_fmts[0].buffersize;
> > +	ctx->dst_fmt.dataformat = fw_param_fmts[0].dataformat;
> 
> Why not just make dst_fmt a pointer and assign &fw_params_fmt[0] there?
> 
fw_params_fmt[] will be removed and assign the const value here.

ctx->dst_fmt.buffersize = sizeof(struct fd_user_output);
ctx->dst_fmt.dataformat = V4L2_META_FMT_MTFD_RESULT; 
> > +}
> > +
> > +/*
> > + * V4L2 file operations.
> > + */
> > +static int mtk_vfd_open(struct file *filp)
> > +{
> > +	struct mtk_fd_dev *fd_dev = video_drvdata(filp);
> > +	struct video_device *vdev = video_devdata(filp);
> > +	struct mtk_fd_ctx *ctx;
> > +	int ret;
> > +
> > +	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
> > +	if (!ctx)
> > +		return -ENOMEM;
> > +
> > +	ctx->fd_dev = fd_dev;
> > +	ctx->dev = &fd_dev->pdev->dev;
> > +
> > +	v4l2_fh_init(&ctx->fh, vdev);
> > +	filp->private_data = &ctx->fh;
> > +
> > +	init_ctx_fmt(ctx);
> > +
> > +	ret = mtk_fd_ctrls_setup(ctx);
> > +	if (ret) {
> > +		dev_err(ctx->dev, "Failed to set up controls:%d\n", ret);
> > +		goto err_fh_ctrl;
> > +	}
> > +
> > +	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(fd_dev->m2m_dev, ctx,
> > +					    &mtk_fd_queue_init);
> > +	if (IS_ERR(ctx->fh.m2m_ctx)) {
> > +		ret = PTR_ERR(ctx->fh.m2m_ctx);
> > +		goto err_init_ctx;
> > +	}
> > +
> > +	v4l2_fh_add(&ctx->fh);
> > +
> > +	return 0;
> > +
> > +err_init_ctx:
> > +	v4l2_ctrl_handler_free(&ctx->hdl);
> > +err_fh_ctrl:
> > +	v4l2_fh_exit(&ctx->fh);
> > +	kfree(ctx);
> > +
> > +	return ret;
> > +}
> > +
> > +static int mtk_vfd_release(struct file *filp)
> > +{
> > +	struct mtk_fd_ctx *ctx = container_of(filp->private_data,
> > +					      struct mtk_fd_ctx, fh);
> > +
> > +	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
> > +
> > +	v4l2_ctrl_handler_free(&ctx->hdl);
> > +	v4l2_fh_del(&ctx->fh);
> > +	v4l2_fh_exit(&ctx->fh);
> > +
> > +	kfree(ctx);
> > +
> > +	return 0;
> > +}
> > +
> > +static const struct v4l2_file_operations fd_video_fops = {
> > +	.owner = THIS_MODULE,
> > +	.open = mtk_vfd_open,
> > +	.release = mtk_vfd_release,
> > +	.poll = v4l2_m2m_fop_poll,
> > +	.unlocked_ioctl = video_ioctl2,
> > +	.mmap = v4l2_m2m_fop_mmap,
> 
> We also need the compat_ioctl here for 32-bit userspace on 64-bit kernels.
> 
Done.

> > +};
> > +
> > +static void mtk_fd_device_run(void *priv)
> > +{
> > +	struct mtk_fd_ctx *ctx = priv;
> > +	struct vb2_v4l2_buffer *src_buf, *dst_buf;
> > +	struct fd_hw_param fd_param;
> > +	void *fd_result_vaddr;
> > +
> > +	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> > +	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
> > +
> > +	memset(&fd_param, 0, sizeof(fd_param));
> > +
> > +	fd_param.src_img.dma_addr =
> > +		vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
> > +	fd_param.user_result.dma_addr =
> > +		vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
> > +	fd_result_vaddr = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
> > +
> > +	ctx->user_param.src_img_fmt = get_fd_img_fmt(ctx->src_fmt.pixelformat);
> > +	memcpy(&fd_param.user_param, &ctx->user_param, sizeof(ctx->user_param));
> > +
> > +	/* Complete request controls if any */
> > +	v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, &ctx->hdl);
> > +
> > +	mtk_fd_hw_job_exec(&ctx->fd_dev->fd_hw, &fd_param, fd_result_vaddr);
> > +}
> > +
> > +static struct v4l2_m2m_ops fd_m2m_ops = {
> > +	.device_run = mtk_fd_device_run,
> > +};
> > +
> > +static int mtk_fd_request_validate(struct media_request *req)
> > +{
> > +	unsigned int count;
> > +
> > +	count = vb2_request_buffer_cnt(req);
> > +	if (!count)
> > +		return -ENOENT;
> 
> Why -ENOENT?
> 
Reference the return code in vb2_request_validate()
I consider refining as following:
static int mtk_fd_request_validate(struct media_request *req)
{
if (vb2_request_buffer_cnt(req) > 1)
return -EINVAL;

return vb2_request_validate(req);
}
or maybe I don't need to worry the request count greater than 1,
just remove this function and set vb2_request_validate as .req_validate
directly?

> > +	else if (count > 1)
> > +		return -EINVAL;
> > +
> > +	return vb2_request_validate(req);
> > +}
> > +
> > +static const struct media_device_ops fd_m2m_media_ops = {
> > +	.req_validate	= mtk_fd_request_validate,
> > +	.req_queue	= v4l2_m2m_request_queue,
> > +};
> > +
> > +static int mtk_fd_video_device_register(struct mtk_fd_dev *fd_dev)
> > +{
> > +	struct video_device *vfd = &fd_dev->vfd;
> > +	struct v4l2_m2m_dev *m2m_dev = fd_dev->m2m_dev;
> > +	struct device *dev = &fd_dev->pdev->dev;
> > +	int function, ret;
> > +
> > +	vfd->fops = &fd_video_fops;
> > +	vfd->release = video_device_release;
> > +	vfd->lock = &fd_dev->vfd_lock;
> > +	vfd->v4l2_dev = &fd_dev->v4l2_dev;
> > +	vfd->vfl_dir = VFL_DIR_M2M;
> > +	vfd->device_caps = V4L2_CAP_STREAMING  | V4L2_CAP_VIDEO_OUTPUT_MPLANE |
> > +		V4L2_CAP_META_CAPTURE;
> > +	vfd->ioctl_ops = &mtk_fd_v4l2_video_out_ioctl_ops;
> > +
> > +	strscpy(vfd->name, "MTK-FD-V4L2", sizeof(vfd->name));
> 
> Please use dev_driver_string().
> 
Done.

> > +	video_set_drvdata(vfd, fd_dev);
> > +
> > +	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to register video device\n");
> > +		goto err_free_dev;
> > +	}
> > +
> > +	function = MEDIA_ENT_F_PROC_VIDEO_DECODER;
> 
> MEDIA_ENT_F_PROC_VIDEO_STATISTICS fits here much more.
> 
> Also nit: Any reason to have this assigned to this intermediate variable
> rather than just directly passed to the function?
> 
Too long for 80 columns, I will remove the intermediate variable and
refine as following:
ret = v4l2_m2m_register_media_controller(m2m_dev, vfd,
MEDIA_ENT_F_PROC_VIDEO_STATISTICS);

> > +	ret = v4l2_m2m_register_media_controller(m2m_dev, vfd, function);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to init mem2mem media controller\n");
> > +		goto err_unreg_video;
> > +	}
> > +	return 0;
> > +
> > +err_unreg_video:
> > +	video_unregister_device(vfd);
> > +err_free_dev:
> > +	video_device_release(vfd);
> > +	return ret;
> > +}
> > +
> > +static int mtk_fd_dev_v4l2_init(struct mtk_fd_dev *fd_dev)
> > +{
> > +	struct media_device *mdev = &fd_dev->mdev;
> > +	struct device *dev = &fd_dev->pdev->dev;
> > +	int ret;
> > +
> > +	ret = v4l2_device_register(dev, &fd_dev->v4l2_dev);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to register v4l2 device\n");
> > +		return ret;
> > +	}
> > +
> > +	fd_dev->m2m_dev = v4l2_m2m_init(&fd_m2m_ops);
> > +	if (IS_ERR(fd_dev->m2m_dev)) {
> > +		dev_err(dev, "Failed to init mem2mem device\n");
> > +		ret = PTR_ERR(fd_dev->m2m_dev);
> > +		goto fail_m2m_dev;
> 
> Please name the labels after the next clean-up step to be done, not the
> failure.
> 
Ok, I will refine as following:
err_unreg_vdev:
v4l2_m2m_unregister_media_controller(fd_dev->m2m_dev);
video_unregister_device(&fd_dev->vfd);
video_device_release(&fd_dev->vfd);
err_cleanup_mdev:
media_device_cleanup(mdev);
v4l2_m2m_release(fd_dev->m2m_dev);
err_unreg_v4l2_dev:
v4l2_device_unregister(&fd_dev->v4l2_dev);

> > +	}
> > +
> > +	mdev->dev = dev;
> > +	strscpy(mdev->model, "MTK-FD-V4L2", sizeof(mdev->model));
> 
> Could we just use dev_driver_string() here instead?
> 
Done.

> > +	snprintf(mdev->bus_info, sizeof(mdev->bus_info),
> > +		 "platform:%s", dev_name(dev));
> > +	media_device_init(mdev);
> > +	mdev->ops = &fd_m2m_media_ops;
> > +	fd_dev->v4l2_dev.mdev = mdev;
> > +
> > +	ret = mtk_fd_video_device_register(fd_dev);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to register video device\n");
> > +		goto err_vdev;
> > +	}
> > +
> > +	ret = media_device_register(mdev);
> > +	if (ret) {
> > +		dev_err(dev, "Failed to register mem2mem media device\n");
> > +		goto fail_mdev;
> > +	}
> > +
> > +	return 0;
> > +
> > +fail_mdev:
> > +	v4l2_m2m_unregister_media_controller(fd_dev->m2m_dev);
> > +	video_unregister_device(&fd_dev->vfd);
> > +	video_device_release(&fd_dev->vfd);
> > +err_vdev:
> > +	media_device_cleanup(mdev);
> > +	v4l2_m2m_release(fd_dev->m2m_dev);
> > +fail_m2m_dev:
> > +	v4l2_device_unregister(&fd_dev->v4l2_dev);
> > +	return ret;
> > +}
> > +
> > +static void mtk_fd_dev_v4l2_release(struct mtk_fd_dev *fd_dev)
> > +{
> > +	v4l2_m2m_unregister_media_controller(fd_dev->m2m_dev);
> > +	video_unregister_device(&fd_dev->vfd);
> > +	video_device_release(&fd_dev->vfd);
> > +	media_device_cleanup(&fd_dev->mdev);
> > +	v4l2_m2m_release(fd_dev->m2m_dev);
> > +	v4l2_device_unregister(&fd_dev->v4l2_dev);
> > +}
> > +
> > +static irqreturn_t mtk_fd_irq(int irq, void *data)
> > +{
> > +	struct mtk_fd_hw *fd_hw = (struct mtk_fd_hw *)data;
> > +
> > +	fd_hw->fd_irq_result = readl(fd_hw->fd_base + FD_INT);
> 
> We should be able to just handle the job completion directly from here.
> 
Ok, I will try to handle the job completion from here.

> > +	wake_up_interruptible(&fd_hw->wq);
> > +	return IRQ_HANDLED;
> > +}
> > +
> > +static int mtk_fd_hw_get_scp_mem(struct mtk_fd_hw *fd_hw,
> > +				 struct fd_buffer *scp_mem)
> > +{
> > +	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
> > +	struct device *dev = &fd_dev->pdev->dev;
> > +	dma_addr_t addr;
> > +	u32 size;
> > +
> > +	scp_mem->scp_addr = scp_get_reserve_mem_phys(SCP_FD_MEM_ID);
> > +	size = scp_get_reserve_mem_size(SCP_FD_MEM_ID);
> > +	if (!scp_mem->scp_addr || !size)
> > +		return -EPROBE_DEFER;
> 
> Why -EPROBE_DEFER? Should we have some way to distinguish between the SCP
> driver not being initialized yet and some other error?
> 
Ok, I will fix it to -ENOMEM.
Not being initialized or some other error can be found during device
probe. 
(will also load scp stuff there)

> > +
> > +	/* get dma addr address */
> > +	addr = dma_map_page_attrs(dev, phys_to_page(scp_mem->scp_addr), 0,
> > +				  size, DMA_BIDIRECTIONAL,
> 
> Should this be really bidirectional?
> 
I will fix it to DMA_TO_DEVICE

> > +				  DMA_ATTR_SKIP_CPU_SYNC);
> > +	if (dma_mapping_error(dev, addr)) {
> > +		scp_mem->scp_addr = 0;
> > +		dev_err(dev, "Failed to map scp addr\n");
> > +		return -ENOMEM;
> > +	}
> > +	scp_mem->dma_addr = addr;
> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_probe(struct platform_device *pdev)
> > +{
> > +	struct mtk_fd_dev *fd_dev;
> 
> nit: Perhaps just fd, to have shorter code?
> 
Ok, I will refine as 
    struct mtk_fd_dev *fd; 
> > +	struct device *dev = &pdev->dev;
> > +	struct mtk_fd_hw *fd_hw;
> > +	struct resource *res;
> > +	int irq;
> > +	int ret;
> > +
> > +	fd_dev = devm_kzalloc(&pdev->dev, sizeof(*fd_dev), GFP_KERNEL);
> > +	if (!fd_dev)
> > +		return -ENOMEM;
> > +
> > +	dev_set_drvdata(dev, fd_dev);
> > +	fd_hw = &fd_dev->fd_hw;
> > +	fd_dev->pdev = pdev;
> 
> Is pdev useful for anything? Perhaps you want to store &pdev->dev instead?
> 
Yes, I will store that instead. 
> > +
> > +	irq = platform_get_irq(pdev, 0);
> > +	if (irq < 0) {
> > +		dev_err(dev, "no IRQ:%d resource info\n", irq);
> > +		return irq;
> > +	}
> > +	ret = devm_request_irq(dev, irq, mtk_fd_irq, IRQF_SHARED,
> > +			       dev_driver_string(dev),
> > +			       fd_hw);
> 
> Why not just fd_dev?
> 
Ok, I will use fd_dev here.

> > +	if (ret) {
> > +		dev_err(dev, "req_irq fail:%d\n", irq);
> > +		return ret;
> > +	}
> > +
> > +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> > +	fd_hw->fd_base = devm_ioremap_resource(dev, res);
> > +	if (!fd_hw->fd_base) {
> > +		dev_err(dev, "unable to get fd reg base\n");
> > +		return PTR_ERR(fd_hw->fd_base);
> > +	}
> > +
> > +	fd_hw->fd_clk = devm_clk_get(dev, "fd");
> > +	if (IS_ERR(fd_hw->fd_clk)) {
> > +		dev_err(dev, "cannot get fd_clk_img_fd clock\n");
> > +		return PTR_ERR(fd_hw->fd_clk);
> > +	}
> > +
> > +	ret = mtk_fd_hw_get_scp_mem(fd_hw, &fd_hw->scp_mem);
> > +	if (ret) {
> > +		dev_err(dev, "scp memory init failed: %d\n", ret);
> 
> nit: Could we make the error messages a bit more consistent? For example
> "Failed to get IRQ resource (%d)", "Failed to request IRQ (%d)", "Failed to
> ... (%d)", etc.
> 
Ok, I will fix it.

> > +		return ret;
> > +	}
> > +
> > +	atomic_set(&fd_hw->fd_user_cnt, 0);
> > +	init_waitqueue_head(&fd_hw->wq);
> > +	mutex_init(&fd_dev->vfd_lock);
> > +	mutex_init(&fd_hw->fd_hw_lock);
> > +	pm_runtime_enable(dev);
> > +
> > +	ret = mtk_fd_dev_v4l2_init(fd_dev);
> > +	if (ret) {
> > +		mutex_destroy(&fd_dev->fd_hw.fd_hw_lock);
> > +		mutex_destroy(&fd_dev->vfd_lock);
> > +		pm_runtime_disable(&pdev->dev);
> 
> Please move the clean-up to an error path on the bottom of the function.
> 
Done.

> > +		dev_err(dev, "v4l2 init failed: %d\n", ret);
> > +		return ret;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_remove(struct platform_device *pdev)
> > +{
> > +	struct mtk_fd_dev *fd_dev = dev_get_drvdata(&pdev->dev);
> > +
> > +	mtk_fd_dev_v4l2_release(fd_dev);
> > +	mutex_destroy(&fd_dev->fd_hw.fd_hw_lock);
> > +	mutex_destroy(&fd_dev->vfd_lock);
> > +	pm_runtime_disable(&pdev->dev);
> 
> The order of calls in remove should normally be the opposite to probe.
> 
Ok, I will refine as following

mtk_fd_dev_v4l2_release(fd_dev);
pm_runtime_disable(&pdev->dev);
mutex_destroy(&fd_dev->fd_hw.fd_hw_lock);
mutex_destroy(&fd_dev->vfd_lock);
rproc_put(fd_dev->fd_hw.rproc_handle);

> > +
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_suspend(struct device *dev)
> > +{
> > +	struct mtk_fd_dev *fd_dev = dev_get_drvdata(dev);
> > +
> > +	if (pm_runtime_suspended(dev))
> > +		return 0;
> > +
> > +	/* suspend FD HW */
> > +	writel(0x0, fd_dev->fd_hw.fd_base + FD_HW_ENABLE);
> > +	writel(0x0, fd_dev->fd_hw.fd_base + FD_INT_EN);
> > +	clk_disable_unprepare(fd_dev->fd_hw.fd_clk);
> > +	return 0;
> > +}
> > +
> > +static int mtk_fd_resume(struct device *dev)
> > +{
> > +	struct mtk_fd_dev *fd_dev = dev_get_drvdata(dev);
> > +	int ret;
> > +
> > +	if (pm_runtime_suspended(dev))
> > +		return 0;
> > +
> > +	ret = clk_prepare_enable(fd_dev->fd_hw.fd_clk);
> > +	if (ret < 0) {
> > +		dev_dbg(dev, "open fd clk failed\n");
> > +		clk_disable_unprepare(fd_dev->fd_hw.fd_clk);
> > +	}
> > +
> > +	/* resume FD HW */
> > +	writel(ENABLE_FD, fd_dev->fd_hw.fd_base + FD_HW_ENABLE);
> > +	if (fd_dev->fd_hw.state == FD_ENQ)
> > +		writel(0x1, fd_dev->fd_hw.fd_base + FD_INT_EN);
> > +	return 0;
> > +}
> > +
> > +static const struct dev_pm_ops mtk_fd_pm_ops = {
> > +	SET_SYSTEM_SLEEP_PM_OPS(mtk_fd_suspend, mtk_fd_resume)
> > +	SET_RUNTIME_PM_OPS(mtk_fd_suspend, mtk_fd_resume, NULL)
> 
> We need separate runtime and system PM ops. Their behavior is expected to be
> different.
> 
> For runtime PM ops, you the functions should just unconditionally power on
> or power off the device.
> 
> For system PM ops, suspend needs to make sure that no further job is queued
> to the hardware and that any job being already processed by the hardware is
> completed. Resume needs to resume the processing if there are any further
> jobs to be queued to the hardware.
> 
Ok, for runtime suspend/resume I will add the following functions:
static int mtk_fd_runtime_suspend(struct device *dev)
{
struct mtk_fd_dev *fd_dev = dev_get_drvdata(dev);

dev_dbg(dev, "%s:disable clock\n", __func__);
clk_disable_unprepare(fd_dev->fd_hw.fd_clk);

return 0;
}

static int mtk_fd_runtime_resume(struct device *dev)
{
struct mtk_fd_dev *fd_dev = dev_get_drvdata(dev);
int ret;

dev_dbg(dev, "%s:enable clock\n", __func__);

ret = clk_prepare_enable(fd_dev->fd_hw.fd_clk);
if (ret < 0) {
dev_err(dev, "Failed to open fd clk:%d\n", ret);
return ret;
}

return 0;
}

Best regards,
Jerry 
> > +};
> > +
> > +static const struct of_device_id mtk_fd_of_ids[] = {
> > +	{ .compatible = "mediatek,mt8183-fd", },
> > +	{}
> > +};
> > +MODULE_DEVICE_TABLE(of, mtk_fd_of_ids);
> > +
> > +static struct platform_driver mtk_fd_driver = {
> > +	.probe   = mtk_fd_probe,
> > +	.remove  = mtk_fd_remove,
> > +	.driver  = {
> > +		.name  = "mtk-fd-4.0",
> > +		.of_match_table = of_match_ptr(mtk_fd_of_ids),
> > +		.pm = &mtk_fd_pm_ops,
> > +	}
> > +};
> > +module_platform_driver(mtk_fd_driver);
> > +
> > +MODULE_DESCRIPTION("Mediatek FD driver");
> > +MODULE_LICENSE("GPL");
> 
> GPL v2
> 
> Best regards,
> Tomasz
>
Tomasz Figa Aug. 26, 2019, 6:36 a.m. UTC | #8
Hi Jerry,

On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
<Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > Hi Jerry,
> >
> > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > From: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > >
> > > This patch adds the driver of Face Detection (FD) unit in
> > > Mediatek camera system, providing face detection function.
> > >
> > > The mtk-isp directory will contain drivers for multiple IP
> > > blocks found in Mediatek ISP system. It will include ISP Pass 1
> > > driver (CAM), sensor interface driver, DIP driver and face
> > > detection driver.
> > >
> > > Signed-off-by: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > > ---
> > >  drivers/media/platform/Makefile               |    2 +
> > >  drivers/media/platform/mtk-isp/fd/Makefile    |    5 +
> > >  drivers/media/platform/mtk-isp/fd/mtk_fd.h    |  157 +++
> > >  drivers/media/platform/mtk-isp/fd/mtk_fd_40.c | 1259 +++++++++++++++++++++++++
> > >  include/uapi/linux/v4l2-controls.h            |    4 +
> > >  5 files changed, 1427 insertions(+)
> > >  create mode 100644 drivers/media/platform/mtk-isp/fd/Makefile
> > >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> > >
> >
> > Thanks for the patch! I finally got a chance to fully review the code. Sorry
> > for the delay. Please check my comments inline.
> >
> I appreciate your comments.
> I've fixed most of the comments and verifying them,
> Sorry for the delay, here is the reply.
>

Thanks for replying to all the comments, it's very helpful. I'll snip
the parts that I don't have any further comments.

[snip]

> > > +   if (usercount == 1) {
> > > +           pm_runtime_get_sync(&fd_dev->pdev->dev);
> > > +           if (mtk_fd_hw_enable(fd_hw)) {
> > > +                   pm_runtime_put_sync(&fd_dev->pdev->dev);
> > > +                   atomic_dec_return(&fd_hw->fd_user_cnt);
> > > +                   mutex_unlock(&fd_hw->fd_hw_lock);
> > > +                   return -EINVAL;
> > > +           }
> > > +   }
> >
> > This is a simple mem-to-mem device, so there is no reason to keep it active
> > all the time it's streaming. Please just get the runtime PM counter when
> > queuing a job to the hardware and release it when the job finishes.
> >
> > I guess we might still want to do the costly operations like rproc_boot()
> > when we start streaming, though.
> >
> Do you mean by moving the pm_runtime_get/put stuff to the job execution
> and job finish place?

Yes.

> the rproc_boot() operation will be done here.
>

How much time does the rproc_boot() operation take?

[snip]

> > > +
> > > +           pm_runtime_put_sync(&fd_dev->pdev->dev);
> >
> > Any reason to use pm_runtime_put_sync() over pm_runtime_put()?
> >
> No special reason to do so, the pm_runtime_put_sync here will be moved
> to the place each job finished.
>

If there is no reason, then the _sync() variant shouldn't be used, as
it could affect the performance negatively.

[snip]

> > > +static int mtk_fd_hw_job_exec(struct mtk_fd_hw *fd_hw,
> > > +                         struct fd_hw_param *fd_param,
> > > +                         void *output_vaddr)
> > > +{
> > > +   struct fd_user_output *fd_output;
> > > +   struct ipi_message fd_ipi_msg;
> > > +   int ret;
> > > +   u32 num;
> > > +
> > > +   if (fd_param->user_param.src_img_fmt == FMT_UNKNOWN)
> > > +           goto param_err;
> >
> > Is this possible?
> >
> Only if user set wrong format, I will remove this.
>

It shouldn't be possible to set a wrong format, because TRY_/S_FMT
should adjust what the user set to something that is valid.

> > > +
> > > +   mutex_lock(&fd_hw->fd_hw_lock);
> > > +   fd_hw->state = FD_ENQ;
> >
> > What is this state for?
> >
> It was for checking status, if a job is processing, the state is
> FD_ENQ,
> then we should wait for the last job to be done when pm_suspend().
>

If so, would it be possible to make it a bool and call is_processing?

[snip]

> > > +static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > +                             unsigned int *num_buffers,
> > > +                             unsigned int *num_planes,
> > > +                             unsigned int sizes[],
> > > +                             struct device *alloc_devs[])
> > > +{
> > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > +   struct device *dev = ctx->dev;
> > > +   unsigned int size;
> > > +
> > > +   switch (vq->type) {
> > > +   case V4L2_BUF_TYPE_META_CAPTURE:
> > > +           size = ctx->dst_fmt.buffersize;
> > > +           break;
> > > +   case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > +           size = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > +           break;
> > > +   default:
> > > +           dev_err(dev, "invalid queue type: %d\n", vq->type);
> >
> > We should need to handle this.
> >
> Do you mean by giving a size instead of return -EINVAL?
>

Sorry, typo. I meant we shouldn't need to handle it, because we can't
get any other queue type here.

[snip]

> > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > +{
> > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > +   struct vb2_buffer *vb;
> >
> > How do we guarantee here that the hardware isn't still accessing the buffers
> > removed below?
> >
> Maybe we can check the driver state flag and aborting the unfinished
> jobs?
> (fd_hw->state == FD_ENQ)
>

Yes, we need to either cancel or wait for the currently processing
job. It depends on hardware capabilities, but cancelling is generally
preferred for the lower latency.

> > > +
> > > +   if (V4L2_TYPE_IS_OUTPUT(vq->type))
> > > +           vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> > > +   else
> > > +           vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> > > +
> > > +   while (vb) {
> > > +           v4l2_m2m_buf_done(to_vb2_v4l2_buffer(vb), VB2_BUF_STATE_ERROR);
> > > +           if (V4L2_TYPE_IS_OUTPUT(vq->type))
> > > +                   vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> > > +           else
> > > +                   vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> > > +   }
> >
> > We can use v4l2_m2m_buf_remove(). Also we can move the call into the loop
> > condition:
> >
> > while ((vb == v4l2_m2m_buf_remove(...)))
> >       v4l2_m2m_buf_done(...);
> >
> Ok, I will refine as following:
>
> while ((vb = v4l2_m2m_buf_remove(V4L2_TYPE_IS_OUTPUT(vq->type)?
>   &m2m_ctx->out_q_ctx :
>   &m2m_ctx->cap_q_ctx)))
> v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);

Please move the queue type check before the loop and save the queue
context in a local variable.

[snip]

> > > +}
> > > +
> > > +static void mtk_fd_vb2_request_complete(struct vb2_buffer *vb)
> > > +{
> > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> > > +
> > > +   v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
> > > +}
> > > +
> > > +static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> > > +                             const struct v4l2_pix_format_mplane *sfmt)
> > > +{
> > > +   dfmt->width = sfmt->width;
> > > +   dfmt->height = sfmt->height;
> > > +   dfmt->pixelformat = sfmt->pixelformat;
> > > +   dfmt->field = sfmt->field;
> > > +   dfmt->colorspace = sfmt->colorspace;
> > > +   dfmt->num_planes = sfmt->num_planes;
> > > +
> > > +   /* Use default */
> > > +   dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> > > +   dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> > > +   dfmt->xfer_func =
> > > +           V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> > > +   dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> > > +   dfmt->plane_fmt[0].sizeimage =
> > > +           dfmt->height * dfmt->plane_fmt[0].bytesperline;
> > > +   memset(dfmt->reserved, 0, sizeof(dfmt->reserved));
> > > +}
> >
> > Could we unify this function with mtk_fd_m2m_try_fmt_out_mp()? That function
> > should be almost directly reusable for the default format initialization +/-
> > the arguments passed to it.
> >
> Ok, I will try to reuse it as following:
>
> static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
>   const struct v4l2_pix_format_mplane *sfmt)
> {
> dfmt->field = V4L2_FIELD_NONE;
> dfmt->colorspace = V4L2_COLORSPACE_BT2020;
> dfmt->num_planes = sfmt->num_planes;
> dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> dfmt->xfer_func =
> V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
>
> /* Keep user setting as possible */
> dfmt->width = clamp(dfmt->width,
>     MTK_FD_OUTPUT_MIN_WIDTH,
>     MTK_FD_OUTPUT_MAX_WIDTH);
> dfmt->height = clamp(dfmt->height,
>      MTK_FD_OUTPUT_MIN_HEIGHT,
>      MTK_FD_OUTPUT_MAX_HEIGHT);
>
> if (sfmt->num_planes == 2) {
> /* NV16M and NV61M has 1 byte per pixel */
> dfmt->plane_fmt[0].bytesperline = dfmt->width;
> dfmt->plane_fmt[1].bytesperline = dfmt->width;
> } else {
> /* 2 bytes per pixel */
> dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> }
>
> dfmt->plane_fmt[0].sizeimage =
> dfmt->height * dfmt->plane_fmt[0].bytesperline;
> }

How would the implementation of TRY_FMT look in this case?

[snip]

> > > +static int mtk_fd_m2m_enum_fmt_out_mp(struct file *file, void *fh,
> > > +                                 struct v4l2_fmtdesc *f)
> > > +{
> > > +   int i;
> > > +
> > > +   for (i = 0; i < NUM_FORMATS; ++i) {
> > > +           if (i == f->index) {
> > > +                   f->pixelformat = in_img_fmts[i].pixelformat;
> > > +                   return 0;
> > > +           }
> > > +   }
> >
> > Why don't we just check if f->index is within the [0, ARRAY_SIZE()-1] bounds
> > and then just use it to index the array directly?
> >
> I will refine as :
>
> static int mtk_fd_m2m_enum_fmt_out_mp(struct file *file, void *fh,
>       struct v4l2_fmtdesc *f)
> {
> if ((f->index >= 0) && (f->index < NUM_FORMATS)) {

f->index is unsigned

> f->pixelformat = in_img_fmts[f->index].pixelformat;
> return 0;
> }
>
> return -EINVAL;
> }

nit: The usual convention is to check for invalid values and return early, i.e.

if (f->index >= NUM_FORMATS)
    return -EINVAL;

f->pixelformat = in_img_fmts[f->index].pixelformat;
return 0;

> > > +
> > > +   return -EINVAL;
> > > +}
> > > +
> > > +static int mtk_fd_m2m_try_fmt_out_mp(struct file *file,
> > > +                                void *fh,
> > > +                                struct v4l2_format *f)
> >
> > I think we could just shorten the function prefixes to "mtk_fd_".
> >
> Do you mean by replace mtk_fd_m2m_* with mtk_fd_ ?
>

Yes.

[snip]

> > > +static int mtk_fd_request_validate(struct media_request *req)
> > > +{
> > > +   unsigned int count;
> > > +
> > > +   count = vb2_request_buffer_cnt(req);
> > > +   if (!count)
> > > +           return -ENOENT;
> >
> > Why -ENOENT?
> >
> Reference the return code in vb2_request_validate()

You're right, -ENOENT seems to be the right error code here.

> I consider refining as following:
> static int mtk_fd_request_validate(struct media_request *req)
> {
> if (vb2_request_buffer_cnt(req) > 1)
> return -EINVAL;
>
> return vb2_request_validate(req);
> }
> or maybe I don't need to worry the request count greater than 1,
> just remove this function and set vb2_request_validate as .req_validate
> directly?
>

Given that we only have 1 queue handling requests, we should be able
to just use vb2_request_validate() indeed.

Best regards,
Tomasz
Jerry-ch Chen Aug. 28, 2019, 2 a.m. UTC | #9
Hi Tomasz,

On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> Hi Jerry,
> 
> On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > Hi Jerry,
> > >
> > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > From: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > > >
> > > > This patch adds the driver of Face Detection (FD) unit in
> > > > Mediatek camera system, providing face detection function.
> > > >
> > > > The mtk-isp directory will contain drivers for multiple IP
> > > > blocks found in Mediatek ISP system. It will include ISP Pass 1
> > > > driver (CAM), sensor interface driver, DIP driver and face
> > > > detection driver.
> > > >
> > > > Signed-off-by: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > > > ---
> > > >  drivers/media/platform/Makefile               |    2 +
> > > >  drivers/media/platform/mtk-isp/fd/Makefile    |    5 +
> > > >  drivers/media/platform/mtk-isp/fd/mtk_fd.h    |  157 +++
> > > >  drivers/media/platform/mtk-isp/fd/mtk_fd_40.c | 1259 +++++++++++++++++++++++++
> > > >  include/uapi/linux/v4l2-controls.h            |    4 +
> > > >  5 files changed, 1427 insertions(+)
> > > >  create mode 100644 drivers/media/platform/mtk-isp/fd/Makefile
> > > >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > > >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> > > >
> > >
> > > Thanks for the patch! I finally got a chance to fully review the code. Sorry
> > > for the delay. Please check my comments inline.
> > >
> > I appreciate your comments.
> > I've fixed most of the comments and verifying them,
> > Sorry for the delay, here is the reply.
> >
> 
> Thanks for replying to all the comments, it's very helpful. I'll snip
> the parts that I don't have any further comments.
> 
> [snip]
> 
> > > > +   if (usercount == 1) {
> > > > +           pm_runtime_get_sync(&fd_dev->pdev->dev);
> > > > +           if (mtk_fd_hw_enable(fd_hw)) {
> > > > +                   pm_runtime_put_sync(&fd_dev->pdev->dev);
> > > > +                   atomic_dec_return(&fd_hw->fd_user_cnt);
> > > > +                   mutex_unlock(&fd_hw->fd_hw_lock);
> > > > +                   return -EINVAL;
> > > > +           }
> > > > +   }
> > >
> > > This is a simple mem-to-mem device, so there is no reason to keep it active
> > > all the time it's streaming. Please just get the runtime PM counter when
> > > queuing a job to the hardware and release it when the job finishes.
> > >
> > > I guess we might still want to do the costly operations like rproc_boot()
> > > when we start streaming, though.
> > >
> > Do you mean by moving the pm_runtime_get/put stuff to the job execution
> > and job finish place?
> 
> Yes.
> 
> > the rproc_boot() operation will be done here.
> >
> 
> How much time does the rproc_boot() operation take?
> 

About 0.7~1.3ms, average 0.8ms (14 measurements)

> [snip]
> 
> > > > +
> > > > +           pm_runtime_put_sync(&fd_dev->pdev->dev);
> > >
> > > Any reason to use pm_runtime_put_sync() over pm_runtime_put()?
> > >
> > No special reason to do so, the pm_runtime_put_sync here will be moved
> > to the place each job finished.
> >
> 
> If there is no reason, then the _sync() variant shouldn't be used, as
> it could affect the performance negatively.
> 

Ok, I will use pm_runtime_put();

> [snip]
> 
> > > > +static int mtk_fd_hw_job_exec(struct mtk_fd_hw *fd_hw,
> > > > +                         struct fd_hw_param *fd_param,
> > > > +                         void *output_vaddr)
> > > > +{
> > > > +   struct fd_user_output *fd_output;
> > > > +   struct ipi_message fd_ipi_msg;
> > > > +   int ret;
> > > > +   u32 num;
> > > > +
> > > > +   if (fd_param->user_param.src_img_fmt == FMT_UNKNOWN)
> > > > +           goto param_err;
> > >
> > > Is this possible?
> > >
> > Only if user set wrong format, I will remove this.
> >
> 
> It shouldn't be possible to set a wrong format, because TRY_/S_FMT
> should adjust what the user set to something that is valid.
> 

Ok, this will be removed.

> > > > +
> > > > +   mutex_lock(&fd_hw->fd_hw_lock);
> > > > +   fd_hw->state = FD_ENQ;
> > >
> > > What is this state for?
> > >
> > It was for checking status, if a job is processing, the state is
> > FD_ENQ,
> > then we should wait for the last job to be done when pm_suspend().
> >
> 
> If so, would it be possible to make it a bool and call is_processing?
> 
> [snip]
> 
> > > > +static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > > +                             unsigned int *num_buffers,
> > > > +                             unsigned int *num_planes,
> > > > +                             unsigned int sizes[],
> > > > +                             struct device *alloc_devs[])
> > > > +{
> > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > +   struct device *dev = ctx->dev;
> > > > +   unsigned int size;
> > > > +
> > > > +   switch (vq->type) {
> > > > +   case V4L2_BUF_TYPE_META_CAPTURE:
> > > > +           size = ctx->dst_fmt.buffersize;
> > > > +           break;
> > > > +   case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > > +           size = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > > +           break;
> > > > +   default:
> > > > +           dev_err(dev, "invalid queue type: %d\n", vq->type);
> > >
> > > We should need to handle this.
> > >
> > Do you mean by giving a size instead of return -EINVAL?
> >
> 
> Sorry, typo. I meant we shouldn't need to handle it, because we can't
> get any other queue type here.
> 

Ok, I see, I will remove it.
also, this function will be updated as following due to the 2 plane
case.

static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
				  unsigned int *num_buffers,
				  unsigned int *num_planes,
				  unsigned int sizes[],
				  struct device *alloc_devs[])
{
	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
	struct device *dev = ctx->dev;
	unsigned int size[2];

	switch (vq->type) {
	case V4L2_BUF_TYPE_META_CAPTURE:
		size[0] = ctx->dst_fmt.buffersize;
		break;
	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
		size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
		if (*num_planes == 2)
			size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
		break;
	}

	if (*num_planes == 1) {
		if (sizes[0] < size[0])
			return -EINVAL;
	} else if (*num_planes == 2) {
		if ((sizes[0] < size[0]) && (sizes[1] < size[1]))
			return -EINVAL;
	} else {
		*num_planes = 1;
		sizes[0] = size[0];
	}

	return 0;
}

> [snip]
> 
> > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > +{
> > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > +   struct vb2_buffer *vb;
> > >
> > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > removed below?
> > >
> > Maybe we can check the driver state flag and aborting the unfinished
> > jobs?
> > (fd_hw->state == FD_ENQ)
> >
> 
> Yes, we need to either cancel or wait for the currently processing
> job. It depends on hardware capabilities, but cancelling is generally
> preferred for the lower latency.
> 
Ok, it the state is ENQ, then we can disable the FD hw by controlling
the registers.

for example:
	writel(0x0, fd->fd_base + FD_HW_ENABLE);
	writel(0x0, fd->fd_base + FD_INT_EN);

> > > > +
> > > > +   if (V4L2_TYPE_IS_OUTPUT(vq->type))
> > > > +           vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> > > > +   else
> > > > +           vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> > > > +
> > > > +   while (vb) {
> > > > +           v4l2_m2m_buf_done(to_vb2_v4l2_buffer(vb), VB2_BUF_STATE_ERROR);
> > > > +           if (V4L2_TYPE_IS_OUTPUT(vq->type))
> > > > +                   vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> > > > +           else
> > > > +                   vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
> > > > +   }
> > >
> > > We can use v4l2_m2m_buf_remove(). Also we can move the call into the loop
> > > condition:
> > >
> > > while ((vb == v4l2_m2m_buf_remove(...)))
> > >       v4l2_m2m_buf_done(...);
> > >
> > Ok, I will refine as following:
> >
> > while ((vb = v4l2_m2m_buf_remove(V4L2_TYPE_IS_OUTPUT(vq->type)?
> >   &m2m_ctx->out_q_ctx :
> >   &m2m_ctx->cap_q_ctx)))
> > v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> 
> Please move the queue type check before the loop and save the queue
> context in a local variable.
> 

Ok, I will refine as following:

	struct v4l2_m2m_queue_ctx *queue_ctx;

	queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
					&m2m_ctx->out_q_ctx :
					&m2m_ctx->cap_q_ctx;
	while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
		v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);

> [snip]
> 
> > > > +}
> > > > +
> > > > +static void mtk_fd_vb2_request_complete(struct vb2_buffer *vb)
> > > > +{
> > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> > > > +
> > > > +   v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
> > > > +}
> > > > +
> > > > +static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> > > > +                             const struct v4l2_pix_format_mplane *sfmt)
> > > > +{
> > > > +   dfmt->width = sfmt->width;
> > > > +   dfmt->height = sfmt->height;
> > > > +   dfmt->pixelformat = sfmt->pixelformat;
> > > > +   dfmt->field = sfmt->field;
> > > > +   dfmt->colorspace = sfmt->colorspace;
> > > > +   dfmt->num_planes = sfmt->num_planes;
> > > > +
> > > > +   /* Use default */
> > > > +   dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> > > > +   dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> > > > +   dfmt->xfer_func =
> > > > +           V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> > > > +   dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> > > > +   dfmt->plane_fmt[0].sizeimage =
> > > > +           dfmt->height * dfmt->plane_fmt[0].bytesperline;
> > > > +   memset(dfmt->reserved, 0, sizeof(dfmt->reserved));
> > > > +}
> > >
> > > Could we unify this function with mtk_fd_m2m_try_fmt_out_mp()? That function
> > > should be almost directly reusable for the default format initialization +/-
> > > the arguments passed to it.
> > >
> > Ok, I will try to reuse it as following:
> >
> > static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> >   const struct v4l2_pix_format_mplane *sfmt)
> > {
> > dfmt->field = V4L2_FIELD_NONE;
> > dfmt->colorspace = V4L2_COLORSPACE_BT2020;
> > dfmt->num_planes = sfmt->num_planes;
> > dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> > dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> > dfmt->xfer_func =
> > V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> >
> > /* Keep user setting as possible */
> > dfmt->width = clamp(dfmt->width,
> >     MTK_FD_OUTPUT_MIN_WIDTH,
> >     MTK_FD_OUTPUT_MAX_WIDTH);
> > dfmt->height = clamp(dfmt->height,
> >      MTK_FD_OUTPUT_MIN_HEIGHT,
> >      MTK_FD_OUTPUT_MAX_HEIGHT);
> >
> > if (sfmt->num_planes == 2) {
> > /* NV16M and NV61M has 1 byte per pixel */
> > dfmt->plane_fmt[0].bytesperline = dfmt->width;
> > dfmt->plane_fmt[1].bytesperline = dfmt->width;
> > } else {
> > /* 2 bytes per pixel */
> > dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> > }
> >
> > dfmt->plane_fmt[0].sizeimage =
> > dfmt->height * dfmt->plane_fmt[0].bytesperline;
> > }
> 
> How would the implementation of TRY_FMT look in this case?
> 

It will be looked like:

static int mtk_fd_try_fmt_out_mp(struct file *file,
				     void *fh,
				     struct v4l2_format *f)
{
	struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
	const struct v4l2_pix_format_mplane *fmt;

	fmt = mtk_fd_find_fmt(pix_mp->pixelformat);
	if (!fmt)
		fmt = &mtk_fd_img_fmts[0];	/* Get default img fmt */

	mtk_fd_fill_pixfmt_mp(pix_mp, fmt);
	return 0;
}

> [snip]
> 
> > > > +static int mtk_fd_m2m_enum_fmt_out_mp(struct file *file, void *fh,
> > > > +                                 struct v4l2_fmtdesc *f)
> > > > +{
> > > > +   int i;
> > > > +
> > > > +   for (i = 0; i < NUM_FORMATS; ++i) {
> > > > +           if (i == f->index) {
> > > > +                   f->pixelformat = in_img_fmts[i].pixelformat;
> > > > +                   return 0;
> > > > +           }
> > > > +   }
> > >
> > > Why don't we just check if f->index is within the [0, ARRAY_SIZE()-1] bounds
> > > and then just use it to index the array directly?
> > >
> > I will refine as :
> >
> > static int mtk_fd_m2m_enum_fmt_out_mp(struct file *file, void *fh,
> >       struct v4l2_fmtdesc *f)
> > {
> > if ((f->index >= 0) && (f->index < NUM_FORMATS)) {
> 
> f->index is unsigned
> 
> > f->pixelformat = in_img_fmts[f->index].pixelformat;
> > return 0;
> > }
> >
> > return -EINVAL;
> > }
> 
> nit: The usual convention is to check for invalid values and return early, i.e.
> 
> if (f->index >= NUM_FORMATS)
>     return -EINVAL;
> 
> f->pixelformat = in_img_fmts[f->index].pixelformat;
> return 0;
> 
Ok, Done.

> > > > +
> > > > +   return -EINVAL;
> > > > +}
> > > > +
> > > > +static int mtk_fd_m2m_try_fmt_out_mp(struct file *file,
> > > > +                                void *fh,
> > > > +                                struct v4l2_format *f)
> > >
> > > I think we could just shorten the function prefixes to "mtk_fd_".
> > >
> > Do you mean by replace mtk_fd_m2m_* with mtk_fd_ ?
> >
> 
> Yes.
> 
Done.

> [snip]
> 
> > > > +static int mtk_fd_request_validate(struct media_request *req)
> > > > +{
> > > > +   unsigned int count;
> > > > +
> > > > +   count = vb2_request_buffer_cnt(req);
> > > > +   if (!count)
> > > > +           return -ENOENT;
> > >
> > > Why -ENOENT?
> > >
> > Reference the return code in vb2_request_validate()
> 
> You're right, -ENOENT seems to be the right error code here.
> 
> > I consider refining as following:
> > static int mtk_fd_request_validate(struct media_request *req)
> > {
> > if (vb2_request_buffer_cnt(req) > 1)
> > return -EINVAL;
> >
> > return vb2_request_validate(req);
> > }
> > or maybe I don't need to worry the request count greater than 1,
> > just remove this function and set vb2_request_validate as .req_validate
> > directly?
> >
> 
> Given that we only have 1 queue handling requests, we should be able
> to just use vb2_request_validate() indeed.
> 

Ok, it will be refined as following:

static const struct media_device_ops fd_m2m_media_ops = {
	.req_validate	= vb2_request_validate,
	.req_queue	= v4l2_m2m_request_queue,
};

Thanks and best regards,
Jerry

> Best regards,
> Tomasz
Tomasz Figa Aug. 30, 2019, 8:33 a.m. UTC | #10
On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
<Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > Hi Jerry,
> >
> > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > Hi Jerry,
> > > >
> > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > > From: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > > > >
> > > > > This patch adds the driver of Face Detection (FD) unit in
> > > > > Mediatek camera system, providing face detection function.
> > > > >
> > > > > The mtk-isp directory will contain drivers for multiple IP
> > > > > blocks found in Mediatek ISP system. It will include ISP Pass 1
> > > > > driver (CAM), sensor interface driver, DIP driver and face
> > > > > detection driver.
> > > > >
> > > > > Signed-off-by: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > > > > ---
> > > > >  drivers/media/platform/Makefile               |    2 +
> > > > >  drivers/media/platform/mtk-isp/fd/Makefile    |    5 +
> > > > >  drivers/media/platform/mtk-isp/fd/mtk_fd.h    |  157 +++
> > > > >  drivers/media/platform/mtk-isp/fd/mtk_fd_40.c | 1259 +++++++++++++++++++++++++
> > > > >  include/uapi/linux/v4l2-controls.h            |    4 +
> > > > >  5 files changed, 1427 insertions(+)
> > > > >  create mode 100644 drivers/media/platform/mtk-isp/fd/Makefile
> > > > >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > > > >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> > > > >
> > > >
> > > > Thanks for the patch! I finally got a chance to fully review the code. Sorry
> > > > for the delay. Please check my comments inline.
> > > >
> > > I appreciate your comments.
> > > I've fixed most of the comments and verifying them,
> > > Sorry for the delay, here is the reply.
> > >
> >
> > Thanks for replying to all the comments, it's very helpful. I'll snip
> > the parts that I don't have any further comments.
> >
> > [snip]
> >
> > > > > +   if (usercount == 1) {
> > > > > +           pm_runtime_get_sync(&fd_dev->pdev->dev);
> > > > > +           if (mtk_fd_hw_enable(fd_hw)) {
> > > > > +                   pm_runtime_put_sync(&fd_dev->pdev->dev);
> > > > > +                   atomic_dec_return(&fd_hw->fd_user_cnt);
> > > > > +                   mutex_unlock(&fd_hw->fd_hw_lock);
> > > > > +                   return -EINVAL;
> > > > > +           }
> > > > > +   }
> > > >
> > > > This is a simple mem-to-mem device, so there is no reason to keep it active
> > > > all the time it's streaming. Please just get the runtime PM counter when
> > > > queuing a job to the hardware and release it when the job finishes.
> > > >
> > > > I guess we might still want to do the costly operations like rproc_boot()
> > > > when we start streaming, though.
> > > >
> > > Do you mean by moving the pm_runtime_get/put stuff to the job execution
> > > and job finish place?
> >
> > Yes.
> >
> > > the rproc_boot() operation will be done here.
> > >
> >
> > How much time does the rproc_boot() operation take?
> >
>
> About 0.7~1.3ms, average 0.8ms (14 measurements)
>

Okay, I think we may want to call it when we start streaming then.

[snip]

> > > > > +static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > > > +                             unsigned int *num_buffers,
> > > > > +                             unsigned int *num_planes,
> > > > > +                             unsigned int sizes[],
> > > > > +                             struct device *alloc_devs[])
> > > > > +{
> > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > +   struct device *dev = ctx->dev;
> > > > > +   unsigned int size;
> > > > > +
> > > > > +   switch (vq->type) {
> > > > > +   case V4L2_BUF_TYPE_META_CAPTURE:
> > > > > +           size = ctx->dst_fmt.buffersize;
> > > > > +           break;
> > > > > +   case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > > > +           size = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > > > +           break;
> > > > > +   default:
> > > > > +           dev_err(dev, "invalid queue type: %d\n", vq->type);
> > > >
> > > > We should need to handle this.
> > > >
> > > Do you mean by giving a size instead of return -EINVAL?
> > >
> >
> > Sorry, typo. I meant we shouldn't need to handle it, because we can't
> > get any other queue type here.
> >
>
> Ok, I see, I will remove it.
> also, this function will be updated as following due to the 2 plane
> case.
>
> static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
>                                   unsigned int *num_buffers,
>                                   unsigned int *num_planes,
>                                   unsigned int sizes[],
>                                   struct device *alloc_devs[])
> {
>         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
>         struct device *dev = ctx->dev;
>         unsigned int size[2];
>
>         switch (vq->type) {
>         case V4L2_BUF_TYPE_META_CAPTURE:
>                 size[0] = ctx->dst_fmt.buffersize;
>                 break;
>         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
>                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
>                 if (*num_planes == 2)
>                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
>                 break;
>         }
>
>         if (*num_planes == 1) {
>                 if (sizes[0] < size[0])
>                         return -EINVAL;
>         } else if (*num_planes == 2) {
>                 if ((sizes[0] < size[0]) && (sizes[1] < size[1]))
>                         return -EINVAL;

Can we just use a loop here and combine the 2 cases above?

Also, we need to fail with -EINVAL if *num_planes is > 2.

>         } else {
>                 *num_planes = 1;
>                 sizes[0] = size[0];

This should be the case if *num_planes == 0 and the number of planes
and sizes should match the currently active format.

>         }
>
>         return 0;
> }
>
> > [snip]
> >
> > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > +{
> > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > +   struct vb2_buffer *vb;
> > > >
> > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > removed below?
> > > >
> > > Maybe we can check the driver state flag and aborting the unfinished
> > > jobs?
> > > (fd_hw->state == FD_ENQ)
> > >
> >
> > Yes, we need to either cancel or wait for the currently processing
> > job. It depends on hardware capabilities, but cancelling is generally
> > preferred for the lower latency.
> >
> Ok, it the state is ENQ, then we can disable the FD hw by controlling
> the registers.
>
> for example:
>         writel(0x0, fd->fd_base + FD_HW_ENABLE);
>         writel(0x0, fd->fd_base + FD_INT_EN);
>

What's exactly the effect of writing 0 to FD_HW_ENABLE?

[snip]

> > > > > +}
> > > > > +
> > > > > +static void mtk_fd_vb2_request_complete(struct vb2_buffer *vb)
> > > > > +{
> > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
> > > > > +
> > > > > +   v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
> > > > > +}
> > > > > +
> > > > > +static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> > > > > +                             const struct v4l2_pix_format_mplane *sfmt)
> > > > > +{
> > > > > +   dfmt->width = sfmt->width;
> > > > > +   dfmt->height = sfmt->height;
> > > > > +   dfmt->pixelformat = sfmt->pixelformat;
> > > > > +   dfmt->field = sfmt->field;
> > > > > +   dfmt->colorspace = sfmt->colorspace;
> > > > > +   dfmt->num_planes = sfmt->num_planes;
> > > > > +
> > > > > +   /* Use default */
> > > > > +   dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> > > > > +   dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> > > > > +   dfmt->xfer_func =
> > > > > +           V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> > > > > +   dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> > > > > +   dfmt->plane_fmt[0].sizeimage =
> > > > > +           dfmt->height * dfmt->plane_fmt[0].bytesperline;
> > > > > +   memset(dfmt->reserved, 0, sizeof(dfmt->reserved));
> > > > > +}
> > > >
> > > > Could we unify this function with mtk_fd_m2m_try_fmt_out_mp()? That function
> > > > should be almost directly reusable for the default format initialization +/-
> > > > the arguments passed to it.
> > > >
> > > Ok, I will try to reuse it as following:
> > >
> > > static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
> > >   const struct v4l2_pix_format_mplane *sfmt)
> > > {
> > > dfmt->field = V4L2_FIELD_NONE;
> > > dfmt->colorspace = V4L2_COLORSPACE_BT2020;
> > > dfmt->num_planes = sfmt->num_planes;
> > > dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
> > > dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
> > > dfmt->xfer_func =
> > > V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
> > >
> > > /* Keep user setting as possible */
> > > dfmt->width = clamp(dfmt->width,
> > >     MTK_FD_OUTPUT_MIN_WIDTH,
> > >     MTK_FD_OUTPUT_MAX_WIDTH);
> > > dfmt->height = clamp(dfmt->height,
> > >      MTK_FD_OUTPUT_MIN_HEIGHT,
> > >      MTK_FD_OUTPUT_MAX_HEIGHT);
> > >
> > > if (sfmt->num_planes == 2) {
> > > /* NV16M and NV61M has 1 byte per pixel */
> > > dfmt->plane_fmt[0].bytesperline = dfmt->width;
> > > dfmt->plane_fmt[1].bytesperline = dfmt->width;
> > > } else {
> > > /* 2 bytes per pixel */
> > > dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
> > > }
> > >
> > > dfmt->plane_fmt[0].sizeimage =
> > > dfmt->height * dfmt->plane_fmt[0].bytesperline;
> > > }
> >
> > How would the implementation of TRY_FMT look in this case?
> >
>
> It will be looked like:
>
> static int mtk_fd_try_fmt_out_mp(struct file *file,
>                                      void *fh,
>                                      struct v4l2_format *f)
> {
>         struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
>         const struct v4l2_pix_format_mplane *fmt;
>
>         fmt = mtk_fd_find_fmt(pix_mp->pixelformat);
>         if (!fmt)
>                 fmt = &mtk_fd_img_fmts[0];      /* Get default img fmt */
>
>         mtk_fd_fill_pixfmt_mp(pix_mp, fmt);
>         return 0;
> }
>

Okay, thanks!

Best regards,
Tomasz
Jerry-ch Chen Sept. 2, 2019, 11:47 a.m. UTC | #11
Hi Tomasz,

On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > Hi Jerry,
> > >
> > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > Hi Jerry,
> > > > >
> > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > > > From: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > > > > >
> > > > > > This patch adds the driver of Face Detection (FD) unit in
> > > > > > Mediatek camera system, providing face detection function.
> > > > > >
> > > > > > The mtk-isp directory will contain drivers for multiple IP
> > > > > > blocks found in Mediatek ISP system. It will include ISP Pass 1
> > > > > > driver (CAM), sensor interface driver, DIP driver and face
> > > > > > detection driver.
> > > > > >
> > > > > > Signed-off-by: Jerry-ch Chen <jerry-ch.chen@mediatek.com>
> > > > > > ---
> > > > > >  drivers/media/platform/Makefile               |    2 +
> > > > > >  drivers/media/platform/mtk-isp/fd/Makefile    |    5 +
> > > > > >  drivers/media/platform/mtk-isp/fd/mtk_fd.h    |  157 +++
> > > > > >  drivers/media/platform/mtk-isp/fd/mtk_fd_40.c | 1259 +++++++++++++++++++++++++
> > > > > >  include/uapi/linux/v4l2-controls.h            |    4 +
> > > > > >  5 files changed, 1427 insertions(+)
> > > > > >  create mode 100644 drivers/media/platform/mtk-isp/fd/Makefile
> > > > > >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd.h
> > > > > >  create mode 100644 drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
> > > > > >
> > > > >
> > > > > Thanks for the patch! I finally got a chance to fully review the code. Sorry
> > > > > for the delay. Please check my comments inline.
> > > > >
> > > > I appreciate your comments.
> > > > I've fixed most of the comments and verifying them,
> > > > Sorry for the delay, here is the reply.
> > > >
> > >
> > > Thanks for replying to all the comments, it's very helpful. I'll snip
> > > the parts that I don't have any further comments.
> > >
> > > [snip]
> > >
> > > > > > +   if (usercount == 1) {
> > > > > > +           pm_runtime_get_sync(&fd_dev->pdev->dev);
> > > > > > +           if (mtk_fd_hw_enable(fd_hw)) {
> > > > > > +                   pm_runtime_put_sync(&fd_dev->pdev->dev);
> > > > > > +                   atomic_dec_return(&fd_hw->fd_user_cnt);
> > > > > > +                   mutex_unlock(&fd_hw->fd_hw_lock);
> > > > > > +                   return -EINVAL;
> > > > > > +           }
> > > > > > +   }
> > > > >
> > > > > This is a simple mem-to-mem device, so there is no reason to keep it active
> > > > > all the time it's streaming. Please just get the runtime PM counter when
> > > > > queuing a job to the hardware and release it when the job finishes.
> > > > >
> > > > > I guess we might still want to do the costly operations like rproc_boot()
> > > > > when we start streaming, though.
> > > > >
> > > > Do you mean by moving the pm_runtime_get/put stuff to the job execution
> > > > and job finish place?
> > >
> > > Yes.
> > >
> > > > the rproc_boot() operation will be done here.
> > > >
> > >
> > > How much time does the rproc_boot() operation take?
> > >
> >
> > About 0.7~1.3ms, average 0.8ms (14 measurements)
> >
> 
> Okay, I think we may want to call it when we start streaming then.
> 
Done.

> [snip]
> 
> > > > > > +static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > > > > +                             unsigned int *num_buffers,
> > > > > > +                             unsigned int *num_planes,
> > > > > > +                             unsigned int sizes[],
> > > > > > +                             struct device *alloc_devs[])
> > > > > > +{
> > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > +   struct device *dev = ctx->dev;
> > > > > > +   unsigned int size;
> > > > > > +
> > > > > > +   switch (vq->type) {
> > > > > > +   case V4L2_BUF_TYPE_META_CAPTURE:
> > > > > > +           size = ctx->dst_fmt.buffersize;
> > > > > > +           break;
> > > > > > +   case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > > > > +           size = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > > > > +           break;
> > > > > > +   default:
> > > > > > +           dev_err(dev, "invalid queue type: %d\n", vq->type);
> > > > >
> > > > > We should need to handle this.
> > > > >
> > > > Do you mean by giving a size instead of return -EINVAL?
> > > >
> > >
> > > Sorry, typo. I meant we shouldn't need to handle it, because we can't
> > > get any other queue type here.
> > >
> >
> > Ok, I see, I will remove it.
> > also, this function will be updated as following due to the 2 plane
> > case.
> >
> > static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> >                                   unsigned int *num_buffers,
> >                                   unsigned int *num_planes,
> >                                   unsigned int sizes[],
> >                                   struct device *alloc_devs[])
> > {
> >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> >         struct device *dev = ctx->dev;
> >         unsigned int size[2];
> >
> >         switch (vq->type) {
> >         case V4L2_BUF_TYPE_META_CAPTURE:
> >                 size[0] = ctx->dst_fmt.buffersize;
> >                 break;
> >         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> >                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> >                 if (*num_planes == 2)
> >                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> >                 break;
> >         }
> >
> >         if (*num_planes == 1) {
> >                 if (sizes[0] < size[0])
> >                         return -EINVAL;
> >         } else if (*num_planes == 2) {
> >                 if ((sizes[0] < size[0]) && (sizes[1] < size[1]))
> >                         return -EINVAL;
> 
> Can we just use a loop here and combine the 2 cases above?
> 
> Also, we need to fail with -EINVAL if *num_planes is > 2.
> 
> >         } else {
> >                 *num_planes = 1;
> >                 sizes[0] = size[0];
> 
> This should be the case if *num_planes == 0 and the number of planes
> and sizes should match the currently active format.
> 
I appreciate your comments,

Ok, I will update as following:
static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
				  unsigned int *num_buffers,
				  unsigned int *num_planes,
				  unsigned int sizes[],
				  struct device *alloc_devs[])
{
	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
	unsigned int size[2];
	unsigned int plane;

	switch (vq->type) {
	case V4L2_BUF_TYPE_META_CAPTURE:
		size[0] = ctx->dst_fmt.buffersize;
		break;
	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
		size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
		if (*num_planes == 2)
			size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
		break;
	}

	if (*num_planes > 2)
		return -EINVAL;
	if (*num_planes == 0) {
		if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
			sizes[0] = ctx->dst_fmt.buffersize;
			*num_planes = 1;
			return 0;
		}

		*num_planes = ctx->src_fmt.num_planes;
		for (plane = 0; plane < *num_planes; plane++)
			sizes[plane] = ctx->src_fmt.plane_fmt[plane].sizeimage;
		return 0;
	}

	for (plane = 0; plane < *num_planes; plane++) {
		if(sizes[plane] < size[plane])
			return -EINVAL;
	}
	return 0;
}

> >         }
> >
> >         return 0;
> > }
> >
> > > [snip]
> > >
> > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > +{
> > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > +   struct vb2_buffer *vb;
> > > > >
> > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > removed below?
> > > > >
> > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > jobs?
> > > > (fd_hw->state == FD_ENQ)
> > > >
> > >
> > > Yes, we need to either cancel or wait for the currently processing
> > > job. It depends on hardware capabilities, but cancelling is generally
> > > preferred for the lower latency.
> > >
> > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > the registers.
> >
> > for example:
> >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> >         writel(0x0, fd->fd_base + FD_INT_EN);
> >
> 
> What's exactly the effect of writing 0 to FD_HW_ENABLE?
> 
Sorry, my last reply didn't solve the question,
we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().

which is able to readl_poll_timeout_atomic()
and check the HW busy bits in the register FD_INT_EN;

if they are not cleared until timeout, we could handle the last
processing job.
Otherwise, the FD irq handler should have handled the last processing
job and we could continue the stop_streaming().

For job_abort():
static void mtk_fd_job_abort(void *priv)
{
	struct mtk_fd_ctx *ctx = priv;
	struct mtk_fd_dev *fd = ctx->fd_dev;
	u32 val;
	u32 ret;

	ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
					val,
					(val & MTK_FD_HW_BUSY_MASK) ==
					MTK_FD_HW_STATE_IS_BUSY,
					USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
	if (ret) {
		dev_warn(fd->dev, "FD HW timeout\n");
		mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
	}
	writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
	writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
}

Best Regards,
Jerry
Tomasz Figa Sept. 3, 2019, 5:19 a.m. UTC | #12
On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > Hi Jerry,
> > > >
> > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > >
> > > > > Hi Tomasz,
> > > > >
> > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > Hi Jerry,
> > > > > >
> > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
[snip]
> > > static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > >                                   unsigned int *num_buffers,
> > >                                   unsigned int *num_planes,
> > >                                   unsigned int sizes[],
> > >                                   struct device *alloc_devs[])
> > > {
> > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > >         struct device *dev = ctx->dev;
> > >         unsigned int size[2];
> > >
> > >         switch (vq->type) {
> > >         case V4L2_BUF_TYPE_META_CAPTURE:
> > >                 size[0] = ctx->dst_fmt.buffersize;
> > >                 break;
> > >         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > >                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > >                 if (*num_planes == 2)
> > >                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > >                 break;
> > >         }
> > >
> > >         if (*num_planes == 1) {
> > >                 if (sizes[0] < size[0])
> > >                         return -EINVAL;
> > >         } else if (*num_planes == 2) {
> > >                 if ((sizes[0] < size[0]) && (sizes[1] < size[1]))
> > >                         return -EINVAL;
> >
> > Can we just use a loop here and combine the 2 cases above?
> >
> > Also, we need to fail with -EINVAL if *num_planes is > 2.
> >
> > >         } else {
> > >                 *num_planes = 1;
> > >                 sizes[0] = size[0];
> >
> > This should be the case if *num_planes == 0 and the number of planes
> > and sizes should match the currently active format.
> >
> I appreciate your comments,
>
> Ok, I will update as following:
> static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
>                                   unsigned int *num_buffers,
>                                   unsigned int *num_planes,
>                                   unsigned int sizes[],
>                                   struct device *alloc_devs[])
> {
>         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
>         unsigned int size[2];
>         unsigned int plane;
>
>         switch (vq->type) {
>         case V4L2_BUF_TYPE_META_CAPTURE:
>                 size[0] = ctx->dst_fmt.buffersize;
>                 break;
>         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
>                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
>                 if (*num_planes == 2)
>                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
>                 break;
>         }
>
>         if (*num_planes > 2)
>                 return -EINVAL;
>         if (*num_planes == 0) {
>                 if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
>                         sizes[0] = ctx->dst_fmt.buffersize;
>                         *num_planes = 1;
>                         return 0;
>                 }
>
>                 *num_planes = ctx->src_fmt.num_planes;
>                 for (plane = 0; plane < *num_planes; plane++)
>                         sizes[plane] = ctx->src_fmt.plane_fmt[plane].sizeimage;
>                 return 0;
>         }
>
>         for (plane = 0; plane < *num_planes; plane++) {
>                 if(sizes[plane] < size[plane])
>                         return -EINVAL;
>         }
>         return 0;
> }
>

Looks good, thanks!

> > >         }
> > >
> > >         return 0;
> > > }
> > >
> > > > [snip]
> > > >
> > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > +{
> > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > +   struct vb2_buffer *vb;
> > > > > >
> > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > removed below?
> > > > > >
> > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > jobs?
> > > > > (fd_hw->state == FD_ENQ)
> > > > >
> > > >
> > > > Yes, we need to either cancel or wait for the currently processing
> > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > preferred for the lower latency.
> > > >
> > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > the registers.
> > >
> > > for example:
> > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > >
> >
> > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> >
> Sorry, my last reply didn't solve the question,
> we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
>
> which is able to readl_poll_timeout_atomic()
> and check the HW busy bits in the register FD_INT_EN;
>
> if they are not cleared until timeout, we could handle the last
> processing job.
> Otherwise, the FD irq handler should have handled the last processing
> job and we could continue the stop_streaming().
>
> For job_abort():
> static void mtk_fd_job_abort(void *priv)
> {
>         struct mtk_fd_ctx *ctx = priv;
>         struct mtk_fd_dev *fd = ctx->fd_dev;
>         u32 val;
>         u32 ret;
>
>         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
>                                         val,
>                                         (val & MTK_FD_HW_BUSY_MASK) ==
>                                         MTK_FD_HW_STATE_IS_BUSY,
>                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);

Hmm, would it be possible to avoid the busy wait by having a
completion that could be signalled from the interrupt handler?

Best regards,
Tomasz
Jerry-ch Chen Sept. 3, 2019, 6:44 a.m. UTC | #13
On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > Hi Jerry,
> > > > >
> > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > >
> > > > > > Hi Tomasz,
> > > > > >
> > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > Hi Jerry,
> > > > > > >
> > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> [snip]
> > > > static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > >                                   unsigned int *num_buffers,
> > > >                                   unsigned int *num_planes,
> > > >                                   unsigned int sizes[],
> > > >                                   struct device *alloc_devs[])
> > > > {
> > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > >         struct device *dev = ctx->dev;
> > > >         unsigned int size[2];
> > > >
> > > >         switch (vq->type) {
> > > >         case V4L2_BUF_TYPE_META_CAPTURE:
> > > >                 size[0] = ctx->dst_fmt.buffersize;
> > > >                 break;
> > > >         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > >                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > >                 if (*num_planes == 2)
> > > >                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > > >                 break;
> > > >         }
> > > >
> > > >         if (*num_planes == 1) {
> > > >                 if (sizes[0] < size[0])
> > > >                         return -EINVAL;
> > > >         } else if (*num_planes == 2) {
> > > >                 if ((sizes[0] < size[0]) && (sizes[1] < size[1]))
> > > >                         return -EINVAL;
> > >
> > > Can we just use a loop here and combine the 2 cases above?
> > >
> > > Also, we need to fail with -EINVAL if *num_planes is > 2.
> > >
> > > >         } else {
> > > >                 *num_planes = 1;
> > > >                 sizes[0] = size[0];
> > >
> > > This should be the case if *num_planes == 0 and the number of planes
> > > and sizes should match the currently active format.
> > >
> > I appreciate your comments,
> >
> > Ok, I will update as following:
> > static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> >                                   unsigned int *num_buffers,
> >                                   unsigned int *num_planes,
> >                                   unsigned int sizes[],
> >                                   struct device *alloc_devs[])
> > {
> >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> >         unsigned int size[2];
> >         unsigned int plane;
> >
> >         switch (vq->type) {
> >         case V4L2_BUF_TYPE_META_CAPTURE:
> >                 size[0] = ctx->dst_fmt.buffersize;
> >                 break;
> >         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> >                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> >                 if (*num_planes == 2)
> >                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> >                 break;
> >         }
> >
> >         if (*num_planes > 2)
> >                 return -EINVAL;
> >         if (*num_planes == 0) {
> >                 if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
> >                         sizes[0] = ctx->dst_fmt.buffersize;
> >                         *num_planes = 1;
> >                         return 0;
> >                 }
> >
> >                 *num_planes = ctx->src_fmt.num_planes;
> >                 for (plane = 0; plane < *num_planes; plane++)
> >                         sizes[plane] = ctx->src_fmt.plane_fmt[plane].sizeimage;
> >                 return 0;
> >         }
> >
> >         for (plane = 0; plane < *num_planes; plane++) {
> >                 if(sizes[plane] < size[plane])
> >                         return -EINVAL;
> >         }
> >         return 0;
> > }
> >
> 
> Looks good, thanks!
> 
> > > >         }
> > > >
> > > >         return 0;
> > > > }
> > > >
> > > > > [snip]
> > > > >
> > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > +{
> > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > +   struct vb2_buffer *vb;
> > > > > > >
> > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > removed below?
> > > > > > >
> > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > jobs?
> > > > > > (fd_hw->state == FD_ENQ)
> > > > > >
> > > > >
> > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > preferred for the lower latency.
> > > > >
> > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > the registers.
> > > >
> > > > for example:
> > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > >
> > >
> > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > >
> > Sorry, my last reply didn't solve the question,
> > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> >
> > which is able to readl_poll_timeout_atomic()
> > and check the HW busy bits in the register FD_INT_EN;
> >
> > if they are not cleared until timeout, we could handle the last
> > processing job.
> > Otherwise, the FD irq handler should have handled the last processing
> > job and we could continue the stop_streaming().
> >
> > For job_abort():
> > static void mtk_fd_job_abort(void *priv)
> > {
> >         struct mtk_fd_ctx *ctx = priv;
> >         struct mtk_fd_dev *fd = ctx->fd_dev;
> >         u32 val;
> >         u32 ret;
> >
> >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> >                                         val,
> >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> >                                         MTK_FD_HW_STATE_IS_BUSY,
> >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> 
> Hmm, would it be possible to avoid the busy wait by having a
> completion that could be signalled from the interrupt handler?
> 
> Best regards,
> Tomasz

I suppose that would be wakeup a wait queue in the interrupt handler,
the the wait_event_interrupt_timeout() will be used in here and system
suspend e.g. mtk_fd_suspend().
Or do you suggest to wait_event_interrupt_timeout() every frame in the
mtk_fd_ipi_handler()?
I think maybe the readl_poll_timeout_atomic would be good enough.


One more thing, for the mtk_fd_video_device_register()
Sorry that I would need to use intermediate variable here since the 80
columns check.

	function = MEDIA_ENT_F_PROC_VIDEO_STATISTICS;
	ret = v4l2_m2m_register_media_controller(m2m_dev, vfd, function);
	if (ret) {
		dev_err(dev, "Failed to init mem2mem media controller\n");
		goto err_unreg_video;
	}


Thanks and Best regards,
Jerry
Tomasz Figa Sept. 3, 2019, 7:04 a.m. UTC | #14
On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
>
> On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > >
> > > > > Hi Tomasz,
> > > > >
> > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > Hi Jerry,
> > > > > >
> > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > >
> > > > > > > Hi Tomasz,
> > > > > > >
> > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > Hi Jerry,
> > > > > > > >
> > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > [snip]
> > > > > static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > > >                                   unsigned int *num_buffers,
> > > > >                                   unsigned int *num_planes,
> > > > >                                   unsigned int sizes[],
> > > > >                                   struct device *alloc_devs[])
> > > > > {
> > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > >         struct device *dev = ctx->dev;
> > > > >         unsigned int size[2];
> > > > >
> > > > >         switch (vq->type) {
> > > > >         case V4L2_BUF_TYPE_META_CAPTURE:
> > > > >                 size[0] = ctx->dst_fmt.buffersize;
> > > > >                 break;
> > > > >         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > > >                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > > >                 if (*num_planes == 2)
> > > > >                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > > > >                 break;
> > > > >         }
> > > > >
> > > > >         if (*num_planes == 1) {
> > > > >                 if (sizes[0] < size[0])
> > > > >                         return -EINVAL;
> > > > >         } else if (*num_planes == 2) {
> > > > >                 if ((sizes[0] < size[0]) && (sizes[1] < size[1]))
> > > > >                         return -EINVAL;
> > > >
> > > > Can we just use a loop here and combine the 2 cases above?
> > > >
> > > > Also, we need to fail with -EINVAL if *num_planes is > 2.
> > > >
> > > > >         } else {
> > > > >                 *num_planes = 1;
> > > > >                 sizes[0] = size[0];
> > > >
> > > > This should be the case if *num_planes == 0 and the number of planes
> > > > and sizes should match the currently active format.
> > > >
> > > I appreciate your comments,
> > >
> > > Ok, I will update as following:
> > > static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > >                                   unsigned int *num_buffers,
> > >                                   unsigned int *num_planes,
> > >                                   unsigned int sizes[],
> > >                                   struct device *alloc_devs[])
> > > {
> > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > >         unsigned int size[2];
> > >         unsigned int plane;
> > >
> > >         switch (vq->type) {
> > >         case V4L2_BUF_TYPE_META_CAPTURE:
> > >                 size[0] = ctx->dst_fmt.buffersize;
> > >                 break;
> > >         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > >                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > >                 if (*num_planes == 2)
> > >                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > >                 break;
> > >         }
> > >
> > >         if (*num_planes > 2)
> > >                 return -EINVAL;
> > >         if (*num_planes == 0) {
> > >                 if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
> > >                         sizes[0] = ctx->dst_fmt.buffersize;
> > >                         *num_planes = 1;
> > >                         return 0;
> > >                 }
> > >
> > >                 *num_planes = ctx->src_fmt.num_planes;
> > >                 for (plane = 0; plane < *num_planes; plane++)
> > >                         sizes[plane] = ctx->src_fmt.plane_fmt[plane].sizeimage;
> > >                 return 0;
> > >         }
> > >
> > >         for (plane = 0; plane < *num_planes; plane++) {
> > >                 if(sizes[plane] < size[plane])
> > >                         return -EINVAL;
> > >         }
> > >         return 0;
> > > }
> > >
> >
> > Looks good, thanks!
> >
> > > > >         }
> > > > >
> > > > >         return 0;
> > > > > }
> > > > >
> > > > > > [snip]
> > > > > >
> > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > +{
> > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > >
> > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > removed below?
> > > > > > > >
> > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > jobs?
> > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > >
> > > > > >
> > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > preferred for the lower latency.
> > > > > >
> > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > the registers.
> > > > >
> > > > > for example:
> > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > >
> > > >
> > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > >
> > > Sorry, my last reply didn't solve the question,
> > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > >
> > > which is able to readl_poll_timeout_atomic()
> > > and check the HW busy bits in the register FD_INT_EN;
> > >
> > > if they are not cleared until timeout, we could handle the last
> > > processing job.
> > > Otherwise, the FD irq handler should have handled the last processing
> > > job and we could continue the stop_streaming().
> > >
> > > For job_abort():
> > > static void mtk_fd_job_abort(void *priv)
> > > {
> > >         struct mtk_fd_ctx *ctx = priv;
> > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > >         u32 val;
> > >         u32 ret;
> > >
> > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > >                                         val,
> > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> >
> > Hmm, would it be possible to avoid the busy wait by having a
> > completion that could be signalled from the interrupt handler?
> >
> > Best regards,
> > Tomasz
>
> I suppose that would be wakeup a wait queue in the interrupt handler,
> the the wait_event_interrupt_timeout() will be used in here and system
> suspend e.g. mtk_fd_suspend().

Yes, that should work.

> Or do you suggest to wait_event_interrupt_timeout() every frame in the
> mtk_fd_ipi_handler()?

Nope, we shouldn't need that.

> I think maybe the readl_poll_timeout_atomic would be good enough.
>

Not really. Busy waiting should be avoided as much as possible. What's
the point of entering suspend if you end up burning the power by
spinning the CPU for some milliseconds?

>
> One more thing, for the mtk_fd_video_device_register()
> Sorry that I would need to use intermediate variable here since the 80
> columns check.
>
>         function = MEDIA_ENT_F_PROC_VIDEO_STATISTICS;
>         ret = v4l2_m2m_register_media_controller(m2m_dev, vfd, function);

Why not just make it like this:

ret = v4l2_m2m_register_media_controller(m2m_dev,
                MEDIA_ENT_F_PROC_VIDEO_STATISTICS);

The above line is aligned using tabs so that its end is as close to
the 80 character boundary as possible.

Best regards,
Tomasz
Jerry-ch Chen Sept. 3, 2019, 11:46 a.m. UTC | #15
Hi Tomasz,

On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > >
> > > > > > Hi Tomasz,
> > > > > >
> > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > Hi Jerry,
> > > > > > >
> > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > >
> > > > > > > > Hi Tomasz,
> > > > > > > >
> > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > Hi Jerry,
> > > > > > > > >
> > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > [snip]
> > > > > > static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > > > >                                   unsigned int *num_buffers,
> > > > > >                                   unsigned int *num_planes,
> > > > > >                                   unsigned int sizes[],
> > > > > >                                   struct device *alloc_devs[])
> > > > > > {
> > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > >         struct device *dev = ctx->dev;
> > > > > >         unsigned int size[2];
> > > > > >
> > > > > >         switch (vq->type) {
> > > > > >         case V4L2_BUF_TYPE_META_CAPTURE:
> > > > > >                 size[0] = ctx->dst_fmt.buffersize;
> > > > > >                 break;
> > > > > >         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > > > >                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > > > >                 if (*num_planes == 2)
> > > > > >                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > > > > >                 break;
> > > > > >         }
> > > > > >
> > > > > >         if (*num_planes == 1) {
> > > > > >                 if (sizes[0] < size[0])
> > > > > >                         return -EINVAL;
> > > > > >         } else if (*num_planes == 2) {
> > > > > >                 if ((sizes[0] < size[0]) && (sizes[1] < size[1]))
> > > > > >                         return -EINVAL;
> > > > >
> > > > > Can we just use a loop here and combine the 2 cases above?
> > > > >
> > > > > Also, we need to fail with -EINVAL if *num_planes is > 2.
> > > > >
> > > > > >         } else {
> > > > > >                 *num_planes = 1;
> > > > > >                 sizes[0] = size[0];
> > > > >
> > > > > This should be the case if *num_planes == 0 and the number of planes
> > > > > and sizes should match the currently active format.
> > > > >
> > > > I appreciate your comments,
> > > >
> > > > Ok, I will update as following:
> > > > static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > >                                   unsigned int *num_buffers,
> > > >                                   unsigned int *num_planes,
> > > >                                   unsigned int sizes[],
> > > >                                   struct device *alloc_devs[])
> > > > {
> > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > >         unsigned int size[2];
> > > >         unsigned int plane;
> > > >
> > > >         switch (vq->type) {
> > > >         case V4L2_BUF_TYPE_META_CAPTURE:
> > > >                 size[0] = ctx->dst_fmt.buffersize;
> > > >                 break;
> > > >         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > >                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > >                 if (*num_planes == 2)
> > > >                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > > >                 break;
> > > >         }
> > > >
> > > >         if (*num_planes > 2)
> > > >                 return -EINVAL;
> > > >         if (*num_planes == 0) {
> > > >                 if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
> > > >                         sizes[0] = ctx->dst_fmt.buffersize;
> > > >                         *num_planes = 1;
> > > >                         return 0;
> > > >                 }
> > > >
> > > >                 *num_planes = ctx->src_fmt.num_planes;
> > > >                 for (plane = 0; plane < *num_planes; plane++)
> > > >                         sizes[plane] = ctx->src_fmt.plane_fmt[plane].sizeimage;
> > > >                 return 0;
> > > >         }
> > > >
> > > >         for (plane = 0; plane < *num_planes; plane++) {
> > > >                 if(sizes[plane] < size[plane])
> > > >                         return -EINVAL;
> > > >         }
> > > >         return 0;
> > > > }
> > > >
> > >
> > > Looks good, thanks!
> > >
> > > > > >         }
> > > > > >
> > > > > >         return 0;
> > > > > > }
> > > > > >
> > > > > > > [snip]
> > > > > > >
> > > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > +{
> > > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > > >
> > > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > > removed below?
> > > > > > > > >
> > > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > > jobs?
> > > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > > >
> > > > > > >
> > > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > > preferred for the lower latency.
> > > > > > >
> > > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > > the registers.
> > > > > >
> > > > > > for example:
> > > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > > >
> > > > >
> > > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > > >
> > > > Sorry, my last reply didn't solve the question,
> > > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > > >
> > > > which is able to readl_poll_timeout_atomic()
> > > > and check the HW busy bits in the register FD_INT_EN;
> > > >
> > > > if they are not cleared until timeout, we could handle the last
> > > > processing job.
> > > > Otherwise, the FD irq handler should have handled the last processing
> > > > job and we could continue the stop_streaming().
> > > >
> > > > For job_abort():
> > > > static void mtk_fd_job_abort(void *priv)
> > > > {
> > > >         struct mtk_fd_ctx *ctx = priv;
> > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > >         u32 val;
> > > >         u32 ret;
> > > >
> > > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > > >                                         val,
> > > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> > >
> > > Hmm, would it be possible to avoid the busy wait by having a
> > > completion that could be signalled from the interrupt handler?
> > >
> > > Best regards,
> > > Tomasz
> >
> > I suppose that would be wakeup a wait queue in the interrupt handler,
> > the the wait_event_interrupt_timeout() will be used in here and system
> > suspend e.g. mtk_fd_suspend().
> 
> Yes, that should work.
> 
> > Or do you suggest to wait_event_interrupt_timeout() every frame in the
> > mtk_fd_ipi_handler()?
> 
> Nope, we shouldn't need that.
> 
> > I think maybe the readl_poll_timeout_atomic would be good enough.
> >
> 
> Not really. Busy waiting should be avoided as much as possible. What's
> the point of entering suspend if you end up burning the power by
> spinning the CPU for some milliseconds?
> 
Ok, I see, busy waiting is not a good idea, I will use the wait queue
instead. the job abort will refine as following:

static void mtk_fd_job_abort(void *priv)
{
	struct mtk_fd_ctx *ctx = priv;
	struct mtk_fd_dev *fd = ctx->fd_dev;
	u32 ret;

	ret = wait_event_interruptible_timeout
		(fd->wq, (fd->fd_irq_result & MTK_FD_HW_IRQ_MASK),
		 usecs_to_jiffies(50000));
	if (ret)
		mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
	dev_dbg(fd->dev, "%s, ret:%d\n", __func__, ret);

	fd->fd_irq_result = 0;
}

static struct v4l2_m2m_ops fd_m2m_ops = {
	.device_run = mtk_fd_device_run,
	.job_abort = mtk_fd_job_abort,
};

and we could use it in suspend.
static int mtk_fd_suspend(struct device *dev)
{
	struct mtk_fd_dev *fd = dev_get_drvdata(dev);

	if (pm_runtime_suspended(dev))
		return 0;

	if (fd->fd_stream_count)
		mtk_fd_job_abort(fd->ctx);

	/* suspend FD HW */
	writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
	writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
	clk_disable_unprepare(fd->fd_clk);
	dev_dbg(dev, "%s:disable clock\n", __func__);

	return 0;
}

static irqreturn_t mtk_fd_irq(int irq, void *data)
{
	struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;

	fd->fd_irq_result = readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
	wake_up_interruptible(&fd->wq);
	fd->output->number = readl(fd->fd_base + MTK_FD_REG_OFFSET_RESULT);
	dev_dbg(fd->dev, "mtk_fd_face_num:%d\n", fd->output->number);

	pm_runtime_put((fd->dev));
	mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_DONE);
	return IRQ_HANDLED;
}
> >
> > One more thing, for the mtk_fd_video_device_register()
> > Sorry that I would need to use intermediate variable here since the 80
> > columns check.
> >
> >         function = MEDIA_ENT_F_PROC_VIDEO_STATISTICS;
> >         ret = v4l2_m2m_register_media_controller(m2m_dev, vfd, function);
> 
> Why not just make it like this:
> 
> ret = v4l2_m2m_register_media_controller(m2m_dev,
>                 MEDIA_ENT_F_PROC_VIDEO_STATISTICS);
> 
> The above line is aligned using tabs so that its end is as close to
> the 80 character boundary as possible.
> 
I tried but the checkpatch script still gave me a check saying align to
the scope, I will refine as following:

	ret = v4l2_m2m_register_media_controller
		(m2m_dev, vfd, MEDIA_ENT_F_PROC_VIDEO_STATISTICS);


Thanks and best regards,
Jerry
> Best regards,
> Tomasz
Tomasz Figa Sept. 3, 2019, 12:05 p.m. UTC | #16
On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > >
> > > > > Hi Tomasz,
> > > > >
> > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > >
> > > > > > > Hi Tomasz,
> > > > > > >
> > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > Hi Jerry,
> > > > > > > >
> > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > >
> > > > > > > > > Hi Tomasz,
> > > > > > > > >
> > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > Hi Jerry,
> > > > > > > > > >
> > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > [snip]
> > > > > > > static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > > > > >                                   unsigned int *num_buffers,
> > > > > > >                                   unsigned int *num_planes,
> > > > > > >                                   unsigned int sizes[],
> > > > > > >                                   struct device *alloc_devs[])
> > > > > > > {
> > > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > >         struct device *dev = ctx->dev;
> > > > > > >         unsigned int size[2];
> > > > > > >
> > > > > > >         switch (vq->type) {
> > > > > > >         case V4L2_BUF_TYPE_META_CAPTURE:
> > > > > > >                 size[0] = ctx->dst_fmt.buffersize;
> > > > > > >                 break;
> > > > > > >         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > > > > >                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > > > > >                 if (*num_planes == 2)
> > > > > > >                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > > > > > >                 break;
> > > > > > >         }
> > > > > > >
> > > > > > >         if (*num_planes == 1) {
> > > > > > >                 if (sizes[0] < size[0])
> > > > > > >                         return -EINVAL;
> > > > > > >         } else if (*num_planes == 2) {
> > > > > > >                 if ((sizes[0] < size[0]) && (sizes[1] < size[1]))
> > > > > > >                         return -EINVAL;
> > > > > >
> > > > > > Can we just use a loop here and combine the 2 cases above?
> > > > > >
> > > > > > Also, we need to fail with -EINVAL if *num_planes is > 2.
> > > > > >
> > > > > > >         } else {
> > > > > > >                 *num_planes = 1;
> > > > > > >                 sizes[0] = size[0];
> > > > > >
> > > > > > This should be the case if *num_planes == 0 and the number of planes
> > > > > > and sizes should match the currently active format.
> > > > > >
> > > > > I appreciate your comments,
> > > > >
> > > > > Ok, I will update as following:
> > > > > static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
> > > > >                                   unsigned int *num_buffers,
> > > > >                                   unsigned int *num_planes,
> > > > >                                   unsigned int sizes[],
> > > > >                                   struct device *alloc_devs[])
> > > > > {
> > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > >         unsigned int size[2];
> > > > >         unsigned int plane;
> > > > >
> > > > >         switch (vq->type) {
> > > > >         case V4L2_BUF_TYPE_META_CAPTURE:
> > > > >                 size[0] = ctx->dst_fmt.buffersize;
> > > > >                 break;
> > > > >         case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
> > > > >                 size[0] = ctx->src_fmt.plane_fmt[0].sizeimage;
> > > > >                 if (*num_planes == 2)
> > > > >                         size[1] = ctx->src_fmt.plane_fmt[1].sizeimage;
> > > > >                 break;
> > > > >         }
> > > > >
> > > > >         if (*num_planes > 2)
> > > > >                 return -EINVAL;
> > > > >         if (*num_planes == 0) {
> > > > >                 if (vq->type == V4L2_BUF_TYPE_META_CAPTURE) {
> > > > >                         sizes[0] = ctx->dst_fmt.buffersize;
> > > > >                         *num_planes = 1;
> > > > >                         return 0;
> > > > >                 }
> > > > >
> > > > >                 *num_planes = ctx->src_fmt.num_planes;
> > > > >                 for (plane = 0; plane < *num_planes; plane++)
> > > > >                         sizes[plane] = ctx->src_fmt.plane_fmt[plane].sizeimage;
> > > > >                 return 0;
> > > > >         }
> > > > >
> > > > >         for (plane = 0; plane < *num_planes; plane++) {
> > > > >                 if(sizes[plane] < size[plane])
> > > > >                         return -EINVAL;
> > > > >         }
> > > > >         return 0;
> > > > > }
> > > > >
> > > >
> > > > Looks good, thanks!
> > > >
> > > > > > >         }
> > > > > > >
> > > > > > >         return 0;
> > > > > > > }
> > > > > > >
> > > > > > > > [snip]
> > > > > > > >
> > > > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > > +{
> > > > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > > > >
> > > > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > > > removed below?
> > > > > > > > > >
> > > > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > > > jobs?
> > > > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > > > >
> > > > > > > >
> > > > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > > > preferred for the lower latency.
> > > > > > > >
> > > > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > > > the registers.
> > > > > > >
> > > > > > > for example:
> > > > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > > > >
> > > > > >
> > > > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > > > >
> > > > > Sorry, my last reply didn't solve the question,
> > > > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > > > >
> > > > > which is able to readl_poll_timeout_atomic()
> > > > > and check the HW busy bits in the register FD_INT_EN;
> > > > >
> > > > > if they are not cleared until timeout, we could handle the last
> > > > > processing job.
> > > > > Otherwise, the FD irq handler should have handled the last processing
> > > > > job and we could continue the stop_streaming().
> > > > >
> > > > > For job_abort():
> > > > > static void mtk_fd_job_abort(void *priv)
> > > > > {
> > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > >         u32 val;
> > > > >         u32 ret;
> > > > >
> > > > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > > > >                                         val,
> > > > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > > > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > > > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> > > >
> > > > Hmm, would it be possible to avoid the busy wait by having a
> > > > completion that could be signalled from the interrupt handler?
> > > >
> > > > Best regards,
> > > > Tomasz
> > >
> > > I suppose that would be wakeup a wait queue in the interrupt handler,
> > > the the wait_event_interrupt_timeout() will be used in here and system
> > > suspend e.g. mtk_fd_suspend().
> >
> > Yes, that should work.
> >
> > > Or do you suggest to wait_event_interrupt_timeout() every frame in the
> > > mtk_fd_ipi_handler()?
> >
> > Nope, we shouldn't need that.
> >
> > > I think maybe the readl_poll_timeout_atomic would be good enough.
> > >
> >
> > Not really. Busy waiting should be avoided as much as possible. What's
> > the point of entering suspend if you end up burning the power by
> > spinning the CPU for some milliseconds?
> >
> Ok, I see, busy waiting is not a good idea, I will use the wait queue
> instead. the job abort will refine as following:
>
> static void mtk_fd_job_abort(void *priv)
> {
>         struct mtk_fd_ctx *ctx = priv;
>         struct mtk_fd_dev *fd = ctx->fd_dev;
>         u32 ret;
>
>         ret = wait_event_interruptible_timeout
>                 (fd->wq, (fd->fd_irq_result & MTK_FD_HW_IRQ_MASK),
>                  usecs_to_jiffies(50000));
>         if (ret)
>                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
>         dev_dbg(fd->dev, "%s, ret:%d\n", __func__, ret);
>
>         fd->fd_irq_result = 0;
> }
>
> static struct v4l2_m2m_ops fd_m2m_ops = {
>         .device_run = mtk_fd_device_run,
>         .job_abort = mtk_fd_job_abort,

I'm not sure we should be using the functon above as the .job_abort
callback. It's expected to abort the job, but we're just waiting for
it to finish. I think we should just call mtk_fd_job_abort() manually
from .stop_streaming.

> };
>
> and we could use it in suspend.
> static int mtk_fd_suspend(struct device *dev)
> {
>         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
>
>         if (pm_runtime_suspended(dev))
>                 return 0;
>
>         if (fd->fd_stream_count)
>                 mtk_fd_job_abort(fd->ctx);
>
>         /* suspend FD HW */
>         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
>         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
>         clk_disable_unprepare(fd->fd_clk);
>         dev_dbg(dev, "%s:disable clock\n", __func__);
>
>         return 0;
> }
>
> static irqreturn_t mtk_fd_irq(int irq, void *data)
> {
>         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
>
>         fd->fd_irq_result = readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
>         wake_up_interruptible(&fd->wq);

The wake up should be done at the very end of this function. Otherwise
we end up with m2m framework racing with the mtk_fd_hw_job_finish()
below.

Also, I'd use a completion here rather than an open coded wait and
wake-up. The driver could reinit_completion() before queuing a job to
the hardware and the IRQ handler would complete(). There would be no
need to store the IRQ flags in driver data anymore.

>         fd->output->number = readl(fd->fd_base + MTK_FD_REG_OFFSET_RESULT);
>         dev_dbg(fd->dev, "mtk_fd_face_num:%d\n", fd->output->number);
>
>         pm_runtime_put((fd->dev));
>         mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_DONE);
>         return IRQ_HANDLED;
> }
> > >
> > > One more thing, for the mtk_fd_video_device_register()
> > > Sorry that I would need to use intermediate variable here since the 80
> > > columns check.
> > >
> > >         function = MEDIA_ENT_F_PROC_VIDEO_STATISTICS;
> > >         ret = v4l2_m2m_register_media_controller(m2m_dev, vfd, function);
> >
> > Why not just make it like this:
> >
> > ret = v4l2_m2m_register_media_controller(m2m_dev,
> >                 MEDIA_ENT_F_PROC_VIDEO_STATISTICS);
> >
> > The above line is aligned using tabs so that its end is as close to
> > the 80 character boundary as possible.
> >
> I tried but the checkpatch script still gave me a check saying align to
> the scope, I will refine as following:
>
>         ret = v4l2_m2m_register_media_controller
>                 (m2m_dev, vfd, MEDIA_ENT_F_PROC_VIDEO_STATISTICS);

Please ignore that checkpatch warning, it sometimes gives false
positives. The above looks clearly worse and less consistent with
kernel coding style than what I suggested.

Best regards,
Tomasz
Jerry-ch Chen Sept. 4, 2019, 3:38 a.m. UTC | #17
Hi Tomasz,

On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > >
> > > > > > Hi Tomasz,
> > > > > >
> > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > >
> > > > > > > > Hi Tomasz,
> > > > > > > >
> > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > Hi Jerry,
> > > > > > > > >
> > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > >
> > > > > > > > > > Hi Tomasz,
> > > > > > > > > >
> > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > Hi Jerry,
> > > > > > > > > > >
> > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
[snip]
> > > > > > > > > [snip]
> > > > > > > > >
> > > > > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > > > +{
> > > > > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > > > > >
> > > > > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > > > > removed below?
> > > > > > > > > > >
> > > > > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > > > > jobs?
> > > > > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > > > > preferred for the lower latency.
> > > > > > > > >
> > > > > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > > > > the registers.
> > > > > > > >
> > > > > > > > for example:
> > > > > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > > > > >
> > > > > > >
> > > > > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > > > > >
> > > > > > Sorry, my last reply didn't solve the question,
> > > > > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > > > > >
> > > > > > which is able to readl_poll_timeout_atomic()
> > > > > > and check the HW busy bits in the register FD_INT_EN;
> > > > > >
> > > > > > if they are not cleared until timeout, we could handle the last
> > > > > > processing job.
> > > > > > Otherwise, the FD irq handler should have handled the last processing
> > > > > > job and we could continue the stop_streaming().
> > > > > >
> > > > > > For job_abort():
> > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > {
> > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > >         u32 val;
> > > > > >         u32 ret;
> > > > > >
> > > > > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > > > > >                                         val,
> > > > > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > > > > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > > > > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> > > > >
> > > > > Hmm, would it be possible to avoid the busy wait by having a
> > > > > completion that could be signalled from the interrupt handler?
> > > > >
> > > > > Best regards,
> > > > > Tomasz
> > > >
> > > > I suppose that would be wakeup a wait queue in the interrupt handler,
> > > > the the wait_event_interrupt_timeout() will be used in here and system
> > > > suspend e.g. mtk_fd_suspend().
> > >
> > > Yes, that should work.
> > >
> > > > Or do you suggest to wait_event_interrupt_timeout() every frame in the
> > > > mtk_fd_ipi_handler()?
> > >
> > > Nope, we shouldn't need that.
> > >
> > > > I think maybe the readl_poll_timeout_atomic would be good enough.
> > > >
> > >
> > > Not really. Busy waiting should be avoided as much as possible. What's
> > > the point of entering suspend if you end up burning the power by
> > > spinning the CPU for some milliseconds?
> > >
> > Ok, I see, busy waiting is not a good idea, I will use the wait queue
> > instead. the job abort will refine as following:
> >
> > static void mtk_fd_job_abort(void *priv)
> > {
> >         struct mtk_fd_ctx *ctx = priv;
> >         struct mtk_fd_dev *fd = ctx->fd_dev;
> >         u32 ret;
> >
> >         ret = wait_event_interruptible_timeout
> >                 (fd->wq, (fd->fd_irq_result & MTK_FD_HW_IRQ_MASK),
> >                  usecs_to_jiffies(50000));
> >         if (ret)
> >                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> >         dev_dbg(fd->dev, "%s, ret:%d\n", __func__, ret);
> >
> >         fd->fd_irq_result = 0;
> > }
> >
> > static struct v4l2_m2m_ops fd_m2m_ops = {
> >         .device_run = mtk_fd_device_run,
> >         .job_abort = mtk_fd_job_abort,
> 
> I'm not sure we should be using the functon above as the .job_abort
> callback. It's expected to abort the job, but we're just waiting for
> it to finish. I think we should just call mtk_fd_job_abort() manually
> from .stop_streaming.
> 

Ok, I will fix it.

> > };
> >
> > and we could use it in suspend.
> > static int mtk_fd_suspend(struct device *dev)
> > {
> >         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
> >
> >         if (pm_runtime_suspended(dev))
> >                 return 0;
> >
> >         if (fd->fd_stream_count)
> >                 mtk_fd_job_abort(fd->ctx);
> >
> >         /* suspend FD HW */
> >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
> >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
> >         clk_disable_unprepare(fd->fd_clk);
> >         dev_dbg(dev, "%s:disable clock\n", __func__);
> >
> >         return 0;
> > }
> >
> > static irqreturn_t mtk_fd_irq(int irq, void *data)
> > {
> >         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> >
> >         fd->fd_irq_result = readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> >         wake_up_interruptible(&fd->wq);
> 
> The wake up should be done at the very end of this function. Otherwise
> we end up with m2m framework racing with the mtk_fd_hw_job_finish()
> below.
> 
> Also, I'd use a completion here rather than an open coded wait and
> wake-up. The driver could reinit_completion() before queuing a job to
> the hardware and the IRQ handler would complete(). There would be no
> need to store the IRQ flags in driver data anymore.
Ok, It will be refined as following:

suspend and stop streaming will call mtk_fd_job_abort()

static void mtk_fd_job_abort(struct mtk_fd_dev *fd)
{
	u32 ret;

	ret = wait_for_completion_timeout(&fd->fd_irq_done,
					  msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
	if (ret)
		mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
}

complete at irq handler

static irqreturn_t mtk_fd_irq(int irq, void *data)
{
	struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;

	/* must read this register otherwise HW will keep sending irq */
	readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
	fd->output->number = readl(fd->fd_base + MTK_FD_REG_OFFSET_RESULT);
	dev_dbg(fd->dev, "mtk_fd_face_num:%d\n", fd->output->number);

	pm_runtime_put((fd->dev));
	mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_DONE);
	complete(&fd->fd_irq_done);
	return IRQ_HANDLED;
}

and reinit_completion before time before sending a job to HW

static int mtk_fd_hw_job_exec(struct mtk_fd_dev *fd,
			      struct fd_enq_param *fd_param)
{
	struct ipi_message fd_ipi_msg;
	int ret;

	pm_runtime_get_sync((fd->dev));

	reinit_completion(&fd->fd_irq_done);
	fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_ENQUEUE;
	memcpy(&fd_ipi_msg.fd_enq_param, fd_param, sizeof(struct
fd_enq_param));
	ret = scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
			   sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT);
	if (ret) {
		pm_runtime_put((fd->dev));
		mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
		return ret;
	}
	return 0;
}

> 
> >         fd->output->number = readl(fd->fd_base + MTK_FD_REG_OFFSET_RESULT);
> >         dev_dbg(fd->dev, "mtk_fd_face_num:%d\n", fd->output->number);
> >
> >         pm_runtime_put((fd->dev));
> >         mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_DONE);
> >         return IRQ_HANDLED;
> > }
> > > >
> > > > One more thing, for the mtk_fd_video_device_register()
> > > > Sorry that I would need to use intermediate variable here since the 80
> > > > columns check.
> > > >
> > > >         function = MEDIA_ENT_F_PROC_VIDEO_STATISTICS;
> > > >         ret = v4l2_m2m_register_media_controller(m2m_dev, vfd, function);
> > >
> > > Why not just make it like this:
> > >
> > > ret = v4l2_m2m_register_media_controller(m2m_dev,
> > >                 MEDIA_ENT_F_PROC_VIDEO_STATISTICS);
> > >
> > > The above line is aligned using tabs so that its end is as close to
> > > the 80 character boundary as possible.
> > >
> > I tried but the checkpatch script still gave me a check saying align to
> > the scope, I will refine as following:
> >
> >         ret = v4l2_m2m_register_media_controller
> >                 (m2m_dev, vfd, MEDIA_ENT_F_PROC_VIDEO_STATISTICS);
> 
> Please ignore that checkpatch warning, it sometimes gives false
> positives. The above looks clearly worse and less consistent with
> kernel coding style than what I suggested.
> 
Ok, I see, I will fix it.

Thanks and best regards,
Jerry

> Best regards,
> Tomasz
Tomasz Figa Sept. 4, 2019, 4:15 a.m. UTC | #18
On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
<Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > >
> > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > >
> > > > > > > Hi Tomasz,
> > > > > > >
> > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > >
> > > > > > > > > Hi Tomasz,
> > > > > > > > >
> > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > Hi Jerry,
> > > > > > > > > >
> > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > >
> > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > >
> > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> [snip]
> > > > > > > > > > [snip]
> > > > > > > > > >
> > > > > > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > > > > +{
> > > > > > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > > > > > >
> > > > > > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > > > > > removed below?
> > > > > > > > > > > >
> > > > > > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > > > > > jobs?
> > > > > > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > > > > > preferred for the lower latency.
> > > > > > > > > >
> > > > > > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > > > > > the registers.
> > > > > > > > >
> > > > > > > > > for example:
> > > > > > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > > > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > > > > > >
> > > > > > > >
> > > > > > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > > > > > >
> > > > > > > Sorry, my last reply didn't solve the question,
> > > > > > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > > > > > >
> > > > > > > which is able to readl_poll_timeout_atomic()
> > > > > > > and check the HW busy bits in the register FD_INT_EN;
> > > > > > >
> > > > > > > if they are not cleared until timeout, we could handle the last
> > > > > > > processing job.
> > > > > > > Otherwise, the FD irq handler should have handled the last processing
> > > > > > > job and we could continue the stop_streaming().
> > > > > > >
> > > > > > > For job_abort():
> > > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > > {
> > > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > >         u32 val;
> > > > > > >         u32 ret;
> > > > > > >
> > > > > > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > > > > > >                                         val,
> > > > > > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > > > > > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > > > > > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> > > > > >
> > > > > > Hmm, would it be possible to avoid the busy wait by having a
> > > > > > completion that could be signalled from the interrupt handler?
> > > > > >
> > > > > > Best regards,
> > > > > > Tomasz
> > > > >
> > > > > I suppose that would be wakeup a wait queue in the interrupt handler,
> > > > > the the wait_event_interrupt_timeout() will be used in here and system
> > > > > suspend e.g. mtk_fd_suspend().
> > > >
> > > > Yes, that should work.
> > > >
> > > > > Or do you suggest to wait_event_interrupt_timeout() every frame in the
> > > > > mtk_fd_ipi_handler()?
> > > >
> > > > Nope, we shouldn't need that.
> > > >
> > > > > I think maybe the readl_poll_timeout_atomic would be good enough.
> > > > >
> > > >
> > > > Not really. Busy waiting should be avoided as much as possible. What's
> > > > the point of entering suspend if you end up burning the power by
> > > > spinning the CPU for some milliseconds?
> > > >
> > > Ok, I see, busy waiting is not a good idea, I will use the wait queue
> > > instead. the job abort will refine as following:
> > >
> > > static void mtk_fd_job_abort(void *priv)
> > > {
> > >         struct mtk_fd_ctx *ctx = priv;
> > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > >         u32 ret;
> > >
> > >         ret = wait_event_interruptible_timeout
> > >                 (fd->wq, (fd->fd_irq_result & MTK_FD_HW_IRQ_MASK),
> > >                  usecs_to_jiffies(50000));
> > >         if (ret)
> > >                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> > >         dev_dbg(fd->dev, "%s, ret:%d\n", __func__, ret);
> > >
> > >         fd->fd_irq_result = 0;
> > > }
> > >
> > > static struct v4l2_m2m_ops fd_m2m_ops = {
> > >         .device_run = mtk_fd_device_run,
> > >         .job_abort = mtk_fd_job_abort,
> >
> > I'm not sure we should be using the functon above as the .job_abort
> > callback. It's expected to abort the job, but we're just waiting for
> > it to finish. I think we should just call mtk_fd_job_abort() manually
> > from .stop_streaming.
> >
>
> Ok, I will fix it.
>
> > > };
> > >
> > > and we could use it in suspend.
> > > static int mtk_fd_suspend(struct device *dev)
> > > {
> > >         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
> > >
> > >         if (pm_runtime_suspended(dev))
> > >                 return 0;
> > >
> > >         if (fd->fd_stream_count)
> > >                 mtk_fd_job_abort(fd->ctx);
> > >
> > >         /* suspend FD HW */
> > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
> > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
> > >         clk_disable_unprepare(fd->fd_clk);
> > >         dev_dbg(dev, "%s:disable clock\n", __func__);
> > >
> > >         return 0;
> > > }
> > >
> > > static irqreturn_t mtk_fd_irq(int irq, void *data)
> > > {
> > >         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> > >
> > >         fd->fd_irq_result = readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> > >         wake_up_interruptible(&fd->wq);
> >
> > The wake up should be done at the very end of this function. Otherwise
> > we end up with m2m framework racing with the mtk_fd_hw_job_finish()
> > below.
> >
> > Also, I'd use a completion here rather than an open coded wait and
> > wake-up. The driver could reinit_completion() before queuing a job to
> > the hardware and the IRQ handler would complete(). There would be no
> > need to store the IRQ flags in driver data anymore.
> Ok, It will be refined as following:
>
> suspend and stop streaming will call mtk_fd_job_abort()
>
> static void mtk_fd_job_abort(struct mtk_fd_dev *fd)
> {
>         u32 ret;
>
>         ret = wait_for_completion_timeout(&fd->fd_irq_done,
>                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
>         if (ret)

For the _timeout variants, !ret means the timeout and ret > 0 means
that the wait ended successfully before.

Also please make sure that the timeout is big enough not to happen
normally on a heavily loaded system. Something like 1 second should be
good.

>                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> }
>
> complete at irq handler
>
> static irqreturn_t mtk_fd_irq(int irq, void *data)
> {
>         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
>
>         /* must read this register otherwise HW will keep sending irq */
>         readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
>         fd->output->number = readl(fd->fd_base + MTK_FD_REG_OFFSET_RESULT);
>         dev_dbg(fd->dev, "mtk_fd_face_num:%d\n", fd->output->number);
>
>         pm_runtime_put((fd->dev));
>         mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_DONE);
>         complete(&fd->fd_irq_done);
>         return IRQ_HANDLED;
> }
>
> and reinit_completion before time before sending a job to HW
>
> static int mtk_fd_hw_job_exec(struct mtk_fd_dev *fd,
>                               struct fd_enq_param *fd_param)
> {
>         struct ipi_message fd_ipi_msg;
>         int ret;
>
>         pm_runtime_get_sync((fd->dev));
>
>         reinit_completion(&fd->fd_irq_done);
>         fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_ENQUEUE;
>         memcpy(&fd_ipi_msg.fd_enq_param, fd_param, sizeof(struct
> fd_enq_param));
>         ret = scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
>                            sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT);
>         if (ret) {
>                 pm_runtime_put((fd->dev));
>                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
>                 return ret;
>         }
>         return 0;
> }

Looks good, thanks. Please also don't forget about init_completion()
in driver probe.

Best regards,
Tomasz
Jerry-ch Chen Sept. 4, 2019, 6:09 a.m. UTC | #19
Hi Tomasz,

On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > >
> > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > >
> > > > > > > > Hi Tomasz,
> > > > > > > >
> > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > >
> > > > > > > > > > Hi Tomasz,
> > > > > > > > > >
> > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > Hi Jerry,
> > > > > > > > > > >
> > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > >
> > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > [snip]
> > > > > > > > > > > [snip]
> > > > > > > > > > >
> > > > > > > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > > > > > > >
> > > > > > > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > > > > > > removed below?
> > > > > > > > > > > > >
> > > > > > > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > > > > > > jobs?
> > > > > > > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > > > > > > preferred for the lower latency.
> > > > > > > > > > >
> > > > > > > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > > > > > > the registers.
> > > > > > > > > >
> > > > > > > > > > for example:
> > > > > > > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > > > > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > > > > > > >
> > > > > > > > Sorry, my last reply didn't solve the question,
> > > > > > > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > > > > > > >
> > > > > > > > which is able to readl_poll_timeout_atomic()
> > > > > > > > and check the HW busy bits in the register FD_INT_EN;
> > > > > > > >
> > > > > > > > if they are not cleared until timeout, we could handle the last
> > > > > > > > processing job.
> > > > > > > > Otherwise, the FD irq handler should have handled the last processing
> > > > > > > > job and we could continue the stop_streaming().
> > > > > > > >
> > > > > > > > For job_abort():
> > > > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > > > {
> > > > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > >         u32 val;
> > > > > > > >         u32 ret;
> > > > > > > >
> > > > > > > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > > > > > > >                                         val,
> > > > > > > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > > > > > > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > > > > > > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> > > > > > >
> > > > > > > Hmm, would it be possible to avoid the busy wait by having a
> > > > > > > completion that could be signalled from the interrupt handler?
> > > > > > >
> > > > > > > Best regards,
> > > > > > > Tomasz
> > > > > >
> > > > > > I suppose that would be wakeup a wait queue in the interrupt handler,
> > > > > > the the wait_event_interrupt_timeout() will be used in here and system
> > > > > > suspend e.g. mtk_fd_suspend().
> > > > >
> > > > > Yes, that should work.
> > > > >
> > > > > > Or do you suggest to wait_event_interrupt_timeout() every frame in the
> > > > > > mtk_fd_ipi_handler()?
> > > > >
> > > > > Nope, we shouldn't need that.
> > > > >
> > > > > > I think maybe the readl_poll_timeout_atomic would be good enough.
> > > > > >
> > > > >
> > > > > Not really. Busy waiting should be avoided as much as possible. What's
> > > > > the point of entering suspend if you end up burning the power by
> > > > > spinning the CPU for some milliseconds?
> > > > >
> > > > Ok, I see, busy waiting is not a good idea, I will use the wait queue
> > > > instead. the job abort will refine as following:
> > > >
> > > > static void mtk_fd_job_abort(void *priv)
> > > > {
> > > >         struct mtk_fd_ctx *ctx = priv;
> > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > >         u32 ret;
> > > >
> > > >         ret = wait_event_interruptible_timeout
> > > >                 (fd->wq, (fd->fd_irq_result & MTK_FD_HW_IRQ_MASK),
> > > >                  usecs_to_jiffies(50000));
> > > >         if (ret)
> > > >                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> > > >         dev_dbg(fd->dev, "%s, ret:%d\n", __func__, ret);
> > > >
> > > >         fd->fd_irq_result = 0;
> > > > }
> > > >
> > > > static struct v4l2_m2m_ops fd_m2m_ops = {
> > > >         .device_run = mtk_fd_device_run,
> > > >         .job_abort = mtk_fd_job_abort,
> > >
> > > I'm not sure we should be using the functon above as the .job_abort
> > > callback. It's expected to abort the job, but we're just waiting for
> > > it to finish. I think we should just call mtk_fd_job_abort() manually
> > > from .stop_streaming.
> > >
> >
> > Ok, I will fix it.
> >
> > > > };
> > > >
> > > > and we could use it in suspend.
> > > > static int mtk_fd_suspend(struct device *dev)
> > > > {
> > > >         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
> > > >
> > > >         if (pm_runtime_suspended(dev))
> > > >                 return 0;
> > > >
> > > >         if (fd->fd_stream_count)
> > > >                 mtk_fd_job_abort(fd->ctx);
> > > >
> > > >         /* suspend FD HW */
> > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
> > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
> > > >         clk_disable_unprepare(fd->fd_clk);
> > > >         dev_dbg(dev, "%s:disable clock\n", __func__);
> > > >
> > > >         return 0;
> > > > }
> > > >
> > > > static irqreturn_t mtk_fd_irq(int irq, void *data)
> > > > {
> > > >         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> > > >
> > > >         fd->fd_irq_result = readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> > > >         wake_up_interruptible(&fd->wq);
> > >
> > > The wake up should be done at the very end of this function. Otherwise
> > > we end up with m2m framework racing with the mtk_fd_hw_job_finish()
> > > below.
> > >
> > > Also, I'd use a completion here rather than an open coded wait and
> > > wake-up. The driver could reinit_completion() before queuing a job to
> > > the hardware and the IRQ handler would complete(). There would be no
> > > need to store the IRQ flags in driver data anymore.
> > Ok, It will be refined as following:
> >
> > suspend and stop streaming will call mtk_fd_job_abort()
> >
> > static void mtk_fd_job_abort(struct mtk_fd_dev *fd)
> > {
> >         u32 ret;
> >
> >         ret = wait_for_completion_timeout(&fd->fd_irq_done,
> >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> >         if (ret)
> 
> For the _timeout variants, !ret means the timeout and ret > 0 means
> that the wait ended successfully before.
> 
Thanks, fixed.

> Also please make sure that the timeout is big enough not to happen
> normally on a heavily loaded system. Something like 1 second should be
> good.
> 
Ok, I will make it 1 second
#define MTK_FD_HW_TIMEOUT 1000

Also, I will add a condition in mtk_fd_vb2_stop_streaming(), since it
would be called twice, now it works fine whether I reuse the condition
with mtk_fd_hw_disconnect or not, however, I think do it before buffer
remove looks more reasonable.

static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
{
	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
	struct mtk_fd_dev *fd = ctx->fd_dev;
	struct vb2_v4l2_buffer *vb;
	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
	struct v4l2_m2m_queue_ctx *queue_ctx;

	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
		mtk_fd_job_abort(fd);

	queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
					&m2m_ctx->out_q_ctx :
					&m2m_ctx->cap_q_ctx;
	while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
		v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);

	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
		mtk_fd_hw_disconnect(fd);
}

> >                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> > }
> >
> > complete at irq handler
> >
> > static irqreturn_t mtk_fd_irq(int irq, void *data)
> > {
> >         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> >
> >         /* must read this register otherwise HW will keep sending irq */
> >         readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> >         fd->output->number = readl(fd->fd_base + MTK_FD_REG_OFFSET_RESULT);
> >         dev_dbg(fd->dev, "mtk_fd_face_num:%d\n", fd->output->number);
> >
> >         pm_runtime_put((fd->dev));
> >         mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_DONE);
> >         complete(&fd->fd_irq_done);
> >         return IRQ_HANDLED;
> > }
> >
> > and reinit_completion before time before sending a job to HW
> >
> > static int mtk_fd_hw_job_exec(struct mtk_fd_dev *fd,
> >                               struct fd_enq_param *fd_param)
> > {
> >         struct ipi_message fd_ipi_msg;
> >         int ret;
> >
> >         pm_runtime_get_sync((fd->dev));
> >
> >         reinit_completion(&fd->fd_irq_done);
> >         fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_ENQUEUE;
> >         memcpy(&fd_ipi_msg.fd_enq_param, fd_param, sizeof(struct
> > fd_enq_param));
> >         ret = scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
> >                            sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT);
> >         if (ret) {
> >                 pm_runtime_put((fd->dev));
> >                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> >                 return ret;
> >         }
> >         return 0;
> > }
> 
> Looks good, thanks. Please also don't forget about init_completion()
> in driver probe.
> 
Yes, thanks for remind.

Best regards,
Jerry

> Best regards,
> Tomasz
Tomasz Figa Sept. 4, 2019, 6:34 a.m. UTC | #20
On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > >
> > > > > Hi Tomasz,
> > > > >
> > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > >
> > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > >
> > > > > > > > > Hi Tomasz,
> > > > > > > > >
> > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > >
> > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > >
> > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > [snip]
> > > > > > > > > > > > [snip]
> > > > > > > > > > > >
> > > > > > > > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > > > > > > > removed below?
> > > > > > > > > > > > > >
> > > > > > > > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > > > > > > > jobs?
> > > > > > > > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > > > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > > > > > > > preferred for the lower latency.
> > > > > > > > > > > >
> > > > > > > > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > > > > > > > the registers.
> > > > > > > > > > >
> > > > > > > > > > > for example:
> > > > > > > > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > > > > > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > > > > > > > >
> > > > > > > > > Sorry, my last reply didn't solve the question,
> > > > > > > > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > > > > > > > >
> > > > > > > > > which is able to readl_poll_timeout_atomic()
> > > > > > > > > and check the HW busy bits in the register FD_INT_EN;
> > > > > > > > >
> > > > > > > > > if they are not cleared until timeout, we could handle the last
> > > > > > > > > processing job.
> > > > > > > > > Otherwise, the FD irq handler should have handled the last processing
> > > > > > > > > job and we could continue the stop_streaming().
> > > > > > > > >
> > > > > > > > > For job_abort():
> > > > > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > > > > {
> > > > > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > > >         u32 val;
> > > > > > > > >         u32 ret;
> > > > > > > > >
> > > > > > > > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > > > > > > > >                                         val,
> > > > > > > > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > > > > > > > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > > > > > > > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> > > > > > > >
> > > > > > > > Hmm, would it be possible to avoid the busy wait by having a
> > > > > > > > completion that could be signalled from the interrupt handler?
> > > > > > > >
> > > > > > > > Best regards,
> > > > > > > > Tomasz
> > > > > > >
> > > > > > > I suppose that would be wakeup a wait queue in the interrupt handler,
> > > > > > > the the wait_event_interrupt_timeout() will be used in here and system
> > > > > > > suspend e.g. mtk_fd_suspend().
> > > > > >
> > > > > > Yes, that should work.
> > > > > >
> > > > > > > Or do you suggest to wait_event_interrupt_timeout() every frame in the
> > > > > > > mtk_fd_ipi_handler()?
> > > > > >
> > > > > > Nope, we shouldn't need that.
> > > > > >
> > > > > > > I think maybe the readl_poll_timeout_atomic would be good enough.
> > > > > > >
> > > > > >
> > > > > > Not really. Busy waiting should be avoided as much as possible. What's
> > > > > > the point of entering suspend if you end up burning the power by
> > > > > > spinning the CPU for some milliseconds?
> > > > > >
> > > > > Ok, I see, busy waiting is not a good idea, I will use the wait queue
> > > > > instead. the job abort will refine as following:
> > > > >
> > > > > static void mtk_fd_job_abort(void *priv)
> > > > > {
> > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > >         u32 ret;
> > > > >
> > > > >         ret = wait_event_interruptible_timeout
> > > > >                 (fd->wq, (fd->fd_irq_result & MTK_FD_HW_IRQ_MASK),
> > > > >                  usecs_to_jiffies(50000));
> > > > >         if (ret)
> > > > >                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> > > > >         dev_dbg(fd->dev, "%s, ret:%d\n", __func__, ret);
> > > > >
> > > > >         fd->fd_irq_result = 0;
> > > > > }
> > > > >
> > > > > static struct v4l2_m2m_ops fd_m2m_ops = {
> > > > >         .device_run = mtk_fd_device_run,
> > > > >         .job_abort = mtk_fd_job_abort,
> > > >
> > > > I'm not sure we should be using the functon above as the .job_abort
> > > > callback. It's expected to abort the job, but we're just waiting for
> > > > it to finish. I think we should just call mtk_fd_job_abort() manually
> > > > from .stop_streaming.
> > > >
> > >
> > > Ok, I will fix it.
> > >
> > > > > };
> > > > >
> > > > > and we could use it in suspend.
> > > > > static int mtk_fd_suspend(struct device *dev)
> > > > > {
> > > > >         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
> > > > >
> > > > >         if (pm_runtime_suspended(dev))
> > > > >                 return 0;
> > > > >
> > > > >         if (fd->fd_stream_count)
> > > > >                 mtk_fd_job_abort(fd->ctx);
> > > > >
> > > > >         /* suspend FD HW */
> > > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
> > > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
> > > > >         clk_disable_unprepare(fd->fd_clk);
> > > > >         dev_dbg(dev, "%s:disable clock\n", __func__);
> > > > >
> > > > >         return 0;
> > > > > }
> > > > >
> > > > > static irqreturn_t mtk_fd_irq(int irq, void *data)
> > > > > {
> > > > >         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> > > > >
> > > > >         fd->fd_irq_result = readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> > > > >         wake_up_interruptible(&fd->wq);
> > > >
> > > > The wake up should be done at the very end of this function. Otherwise
> > > > we end up with m2m framework racing with the mtk_fd_hw_job_finish()
> > > > below.
> > > >
> > > > Also, I'd use a completion here rather than an open coded wait and
> > > > wake-up. The driver could reinit_completion() before queuing a job to
> > > > the hardware and the IRQ handler would complete(). There would be no
> > > > need to store the IRQ flags in driver data anymore.
> > > Ok, It will be refined as following:
> > >
> > > suspend and stop streaming will call mtk_fd_job_abort()
> > >
> > > static void mtk_fd_job_abort(struct mtk_fd_dev *fd)
> > > {
> > >         u32 ret;
> > >
> > >         ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > >         if (ret)
> >
> > For the _timeout variants, !ret means the timeout and ret > 0 means
> > that the wait ended successfully before.
> >
> Thanks, fixed.
>
> > Also please make sure that the timeout is big enough not to happen
> > normally on a heavily loaded system. Something like 1 second should be
> > good.
> >
> Ok, I will make it 1 second
> #define MTK_FD_HW_TIMEOUT 1000
>
> Also, I will add a condition in mtk_fd_vb2_stop_streaming(), since it
> would be called twice, now it works fine whether I reuse the condition
> with mtk_fd_hw_disconnect or not, however, I think do it before buffer
> remove looks more reasonable.
>
> static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> {
>         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
>         struct mtk_fd_dev *fd = ctx->fd_dev;
>         struct vb2_v4l2_buffer *vb;
>         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
>         struct v4l2_m2m_queue_ctx *queue_ctx;
>
>         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
>                 mtk_fd_job_abort(fd);

We need to do it regardless of the queue type, otherwise we could
still free CAPTURE buffers before the hardware releases them.

Best regards,
Tomasz
Jerry-ch Chen Sept. 4, 2019, 8:09 a.m. UTC | #21
Hi Tomasz,

On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > >
> > > > > > Hi Tomasz,
> > > > > >
> > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > >
> > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > >
> > > > > > > > > > Hi Tomasz,
> > > > > > > > > >
> > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > >
> > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > [snip]
> > > > > > > > > > > > > [snip]
> > > > > > > > > > > > >
> > > > > > > > > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > > > > > > > > removed below?
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > > > > > > > > jobs?
> > > > > > > > > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > > > > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > > > > > > > > preferred for the lower latency.
> > > > > > > > > > > > >
> > > > > > > > > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > > > > > > > > the registers.
> > > > > > > > > > > >
> > > > > > > > > > > > for example:
> > > > > > > > > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > > > > > > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > > > > > > > > >
> > > > > > > > > > Sorry, my last reply didn't solve the question,
> > > > > > > > > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > > > > > > > > >
> > > > > > > > > > which is able to readl_poll_timeout_atomic()
> > > > > > > > > > and check the HW busy bits in the register FD_INT_EN;
> > > > > > > > > >
> > > > > > > > > > if they are not cleared until timeout, we could handle the last
> > > > > > > > > > processing job.
> > > > > > > > > > Otherwise, the FD irq handler should have handled the last processing
> > > > > > > > > > job and we could continue the stop_streaming().
> > > > > > > > > >
> > > > > > > > > > For job_abort():
> > > > > > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > > > > > {
> > > > > > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > > > >         u32 val;
> > > > > > > > > >         u32 ret;
> > > > > > > > > >
> > > > > > > > > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > > > > > > > > >                                         val,
> > > > > > > > > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > > > > > > > > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > > > > > > > > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> > > > > > > > >
> > > > > > > > > Hmm, would it be possible to avoid the busy wait by having a
> > > > > > > > > completion that could be signalled from the interrupt handler?
> > > > > > > > >
> > > > > > > > > Best regards,
> > > > > > > > > Tomasz
> > > > > > > >
> > > > > > > > I suppose that would be wakeup a wait queue in the interrupt handler,
> > > > > > > > the the wait_event_interrupt_timeout() will be used in here and system
> > > > > > > > suspend e.g. mtk_fd_suspend().
> > > > > > >
> > > > > > > Yes, that should work.
> > > > > > >
> > > > > > > > Or do you suggest to wait_event_interrupt_timeout() every frame in the
> > > > > > > > mtk_fd_ipi_handler()?
> > > > > > >
> > > > > > > Nope, we shouldn't need that.
> > > > > > >
> > > > > > > > I think maybe the readl_poll_timeout_atomic would be good enough.
> > > > > > > >
> > > > > > >
> > > > > > > Not really. Busy waiting should be avoided as much as possible. What's
> > > > > > > the point of entering suspend if you end up burning the power by
> > > > > > > spinning the CPU for some milliseconds?
> > > > > > >
> > > > > > Ok, I see, busy waiting is not a good idea, I will use the wait queue
> > > > > > instead. the job abort will refine as following:
> > > > > >
> > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > {
> > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > >         u32 ret;
> > > > > >
> > > > > >         ret = wait_event_interruptible_timeout
> > > > > >                 (fd->wq, (fd->fd_irq_result & MTK_FD_HW_IRQ_MASK),
> > > > > >                  usecs_to_jiffies(50000));
> > > > > >         if (ret)
> > > > > >                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> > > > > >         dev_dbg(fd->dev, "%s, ret:%d\n", __func__, ret);
> > > > > >
> > > > > >         fd->fd_irq_result = 0;
> > > > > > }
> > > > > >
> > > > > > static struct v4l2_m2m_ops fd_m2m_ops = {
> > > > > >         .device_run = mtk_fd_device_run,
> > > > > >         .job_abort = mtk_fd_job_abort,
> > > > >
> > > > > I'm not sure we should be using the functon above as the .job_abort
> > > > > callback. It's expected to abort the job, but we're just waiting for
> > > > > it to finish. I think we should just call mtk_fd_job_abort() manually
> > > > > from .stop_streaming.
> > > > >
> > > >
> > > > Ok, I will fix it.
> > > >
> > > > > > };
> > > > > >
> > > > > > and we could use it in suspend.
> > > > > > static int mtk_fd_suspend(struct device *dev)
> > > > > > {
> > > > > >         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
> > > > > >
> > > > > >         if (pm_runtime_suspended(dev))
> > > > > >                 return 0;
> > > > > >
> > > > > >         if (fd->fd_stream_count)
> > > > > >                 mtk_fd_job_abort(fd->ctx);
> > > > > >
> > > > > >         /* suspend FD HW */
> > > > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
> > > > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
> > > > > >         clk_disable_unprepare(fd->fd_clk);
> > > > > >         dev_dbg(dev, "%s:disable clock\n", __func__);
> > > > > >
> > > > > >         return 0;
> > > > > > }
> > > > > >
> > > > > > static irqreturn_t mtk_fd_irq(int irq, void *data)
> > > > > > {
> > > > > >         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> > > > > >
> > > > > >         fd->fd_irq_result = readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> > > > > >         wake_up_interruptible(&fd->wq);
> > > > >
> > > > > The wake up should be done at the very end of this function. Otherwise
> > > > > we end up with m2m framework racing with the mtk_fd_hw_job_finish()
> > > > > below.
> > > > >
> > > > > Also, I'd use a completion here rather than an open coded wait and
> > > > > wake-up. The driver could reinit_completion() before queuing a job to
> > > > > the hardware and the IRQ handler would complete(). There would be no
> > > > > need to store the IRQ flags in driver data anymore.
> > > > Ok, It will be refined as following:
> > > >
> > > > suspend and stop streaming will call mtk_fd_job_abort()
> > > >
> > > > static void mtk_fd_job_abort(struct mtk_fd_dev *fd)
> > > > {
> > > >         u32 ret;
> > > >
> > > >         ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > > >         if (ret)
> > >
> > > For the _timeout variants, !ret means the timeout and ret > 0 means
> > > that the wait ended successfully before.
> > >
> > Thanks, fixed.
> >
> > > Also please make sure that the timeout is big enough not to happen
> > > normally on a heavily loaded system. Something like 1 second should be
> > > good.
> > >
> > Ok, I will make it 1 second
> > #define MTK_FD_HW_TIMEOUT 1000
> >
> > Also, I will add a condition in mtk_fd_vb2_stop_streaming(), since it
> > would be called twice, now it works fine whether I reuse the condition
> > with mtk_fd_hw_disconnect or not, however, I think do it before buffer
> > remove looks more reasonable.
> >
> > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > {
> >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> >         struct mtk_fd_dev *fd = ctx->fd_dev;
> >         struct vb2_v4l2_buffer *vb;
> >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> >         struct v4l2_m2m_queue_ctx *queue_ctx;
> >
> >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> >                 mtk_fd_job_abort(fd);
> 
> We need to do it regardless of the queue type, otherwise we could
> still free CAPTURE buffers before the hardware releases them.
> 

I think we may read the fd->fd_irq_done.done and do wait for completion
if it's not being done.
How do you think?

static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
{
	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
	struct mtk_fd_dev *fd = ctx->fd_dev;
	struct vb2_v4l2_buffer *vb;
	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
	struct v4l2_m2m_queue_ctx *queue_ctx;
	u32 ret;

	if (!fd->fd_irq_done.done)
		ret = wait_for_completion_timeout(&fd->fd_irq_done,
					  	  msecs_to_jiffies(
					  	  	MTK_FD_HW_TIMEOUT));
	queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
					&m2m_ctx->out_q_ctx :
					&m2m_ctx->cap_q_ctx;
	while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
		v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);

	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
		mtk_fd_hw_disconnect(fd);
}

I've also tried to wait completion unconditionally for both queues and
the second time will wait until timeout, as a result, it takes longer to
swap the camera every time and close the camera app.

Thanks and best regards,
Jerry

> Best regards,
> Tomasz
Tomasz Figa Sept. 4, 2019, 8:25 a.m. UTC | #22
On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > >
> > > > > Hi Tomasz,
> > > > >
> > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > >
> > > > > > > Hi Tomasz,
> > > > > > >
> > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > >
> > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > >
> > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > > [snip]
> > > > > > > > > > > > > > [snip]
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > > > > > > > > > removed below?
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > > > > > > > > > jobs?
> > > > > > > > > > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > > > > > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > > > > > > > > > preferred for the lower latency.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > > > > > > > > > the registers.
> > > > > > > > > > > > >
> > > > > > > > > > > > > for example:
> > > > > > > > > > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > > > > > > > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > > > > > > > > > >
> > > > > > > > > > > Sorry, my last reply didn't solve the question,
> > > > > > > > > > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > > > > > > > > > >
> > > > > > > > > > > which is able to readl_poll_timeout_atomic()
> > > > > > > > > > > and check the HW busy bits in the register FD_INT_EN;
> > > > > > > > > > >
> > > > > > > > > > > if they are not cleared until timeout, we could handle the last
> > > > > > > > > > > processing job.
> > > > > > > > > > > Otherwise, the FD irq handler should have handled the last processing
> > > > > > > > > > > job and we could continue the stop_streaming().
> > > > > > > > > > >
> > > > > > > > > > > For job_abort():
> > > > > > > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > > > > > > {
> > > > > > > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > > > > >         u32 val;
> > > > > > > > > > >         u32 ret;
> > > > > > > > > > >
> > > > > > > > > > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > > > > > > > > > >                                         val,
> > > > > > > > > > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > > > > > > > > > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > > > > > > > > > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> > > > > > > > > >
> > > > > > > > > > Hmm, would it be possible to avoid the busy wait by having a
> > > > > > > > > > completion that could be signalled from the interrupt handler?
> > > > > > > > > >
> > > > > > > > > > Best regards,
> > > > > > > > > > Tomasz
> > > > > > > > >
> > > > > > > > > I suppose that would be wakeup a wait queue in the interrupt handler,
> > > > > > > > > the the wait_event_interrupt_timeout() will be used in here and system
> > > > > > > > > suspend e.g. mtk_fd_suspend().
> > > > > > > >
> > > > > > > > Yes, that should work.
> > > > > > > >
> > > > > > > > > Or do you suggest to wait_event_interrupt_timeout() every frame in the
> > > > > > > > > mtk_fd_ipi_handler()?
> > > > > > > >
> > > > > > > > Nope, we shouldn't need that.
> > > > > > > >
> > > > > > > > > I think maybe the readl_poll_timeout_atomic would be good enough.
> > > > > > > > >
> > > > > > > >
> > > > > > > > Not really. Busy waiting should be avoided as much as possible. What's
> > > > > > > > the point of entering suspend if you end up burning the power by
> > > > > > > > spinning the CPU for some milliseconds?
> > > > > > > >
> > > > > > > Ok, I see, busy waiting is not a good idea, I will use the wait queue
> > > > > > > instead. the job abort will refine as following:
> > > > > > >
> > > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > > {
> > > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > >         u32 ret;
> > > > > > >
> > > > > > >         ret = wait_event_interruptible_timeout
> > > > > > >                 (fd->wq, (fd->fd_irq_result & MTK_FD_HW_IRQ_MASK),
> > > > > > >                  usecs_to_jiffies(50000));
> > > > > > >         if (ret)
> > > > > > >                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> > > > > > >         dev_dbg(fd->dev, "%s, ret:%d\n", __func__, ret);
> > > > > > >
> > > > > > >         fd->fd_irq_result = 0;
> > > > > > > }
> > > > > > >
> > > > > > > static struct v4l2_m2m_ops fd_m2m_ops = {
> > > > > > >         .device_run = mtk_fd_device_run,
> > > > > > >         .job_abort = mtk_fd_job_abort,
> > > > > >
> > > > > > I'm not sure we should be using the functon above as the .job_abort
> > > > > > callback. It's expected to abort the job, but we're just waiting for
> > > > > > it to finish. I think we should just call mtk_fd_job_abort() manually
> > > > > > from .stop_streaming.
> > > > > >
> > > > >
> > > > > Ok, I will fix it.
> > > > >
> > > > > > > };
> > > > > > >
> > > > > > > and we could use it in suspend.
> > > > > > > static int mtk_fd_suspend(struct device *dev)
> > > > > > > {
> > > > > > >         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
> > > > > > >
> > > > > > >         if (pm_runtime_suspended(dev))
> > > > > > >                 return 0;
> > > > > > >
> > > > > > >         if (fd->fd_stream_count)
> > > > > > >                 mtk_fd_job_abort(fd->ctx);
> > > > > > >
> > > > > > >         /* suspend FD HW */
> > > > > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
> > > > > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
> > > > > > >         clk_disable_unprepare(fd->fd_clk);
> > > > > > >         dev_dbg(dev, "%s:disable clock\n", __func__);
> > > > > > >
> > > > > > >         return 0;
> > > > > > > }
> > > > > > >
> > > > > > > static irqreturn_t mtk_fd_irq(int irq, void *data)
> > > > > > > {
> > > > > > >         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> > > > > > >
> > > > > > >         fd->fd_irq_result = readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> > > > > > >         wake_up_interruptible(&fd->wq);
> > > > > >
> > > > > > The wake up should be done at the very end of this function. Otherwise
> > > > > > we end up with m2m framework racing with the mtk_fd_hw_job_finish()
> > > > > > below.
> > > > > >
> > > > > > Also, I'd use a completion here rather than an open coded wait and
> > > > > > wake-up. The driver could reinit_completion() before queuing a job to
> > > > > > the hardware and the IRQ handler would complete(). There would be no
> > > > > > need to store the IRQ flags in driver data anymore.
> > > > > Ok, It will be refined as following:
> > > > >
> > > > > suspend and stop streaming will call mtk_fd_job_abort()
> > > > >
> > > > > static void mtk_fd_job_abort(struct mtk_fd_dev *fd)
> > > > > {
> > > > >         u32 ret;
> > > > >
> > > > >         ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > > > >         if (ret)
> > > >
> > > > For the _timeout variants, !ret means the timeout and ret > 0 means
> > > > that the wait ended successfully before.
> > > >
> > > Thanks, fixed.
> > >
> > > > Also please make sure that the timeout is big enough not to happen
> > > > normally on a heavily loaded system. Something like 1 second should be
> > > > good.
> > > >
> > > Ok, I will make it 1 second
> > > #define MTK_FD_HW_TIMEOUT 1000
> > >
> > > Also, I will add a condition in mtk_fd_vb2_stop_streaming(), since it
> > > would be called twice, now it works fine whether I reuse the condition
> > > with mtk_fd_hw_disconnect or not, however, I think do it before buffer
> > > remove looks more reasonable.
> > >
> > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > {
> > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > >         struct vb2_v4l2_buffer *vb;
> > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > >
> > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > >                 mtk_fd_job_abort(fd);
> >
> > We need to do it regardless of the queue type, otherwise we could
> > still free CAPTURE buffers before the hardware releases them.
> >
>
> I think we may read the fd->fd_irq_done.done and do wait for completion
> if it's not being done.
> How do you think?
>
> static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> {
>         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
>         struct mtk_fd_dev *fd = ctx->fd_dev;
>         struct vb2_v4l2_buffer *vb;
>         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
>         struct v4l2_m2m_queue_ctx *queue_ctx;
>         u32 ret;
>
>         if (!fd->fd_irq_done.done)

We shouldn't access internal fields of completion.

>                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
>                                                   msecs_to_jiffies(
>                                                         MTK_FD_HW_TIMEOUT));
>         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
>                                         &m2m_ctx->out_q_ctx :
>                                         &m2m_ctx->cap_q_ctx;
>         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
>                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
>
>         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
>                 mtk_fd_hw_disconnect(fd);
> }
>
> I've also tried to wait completion unconditionally for both queues and
> the second time will wait until timeout, as a result, it takes longer to
> swap the camera every time and close the camera app.

I think it should work better if we call complete_all() instead of complete().

Best regards,
Tomasz
Jerry-ch Chen Sept. 4, 2019, 9:01 a.m. UTC | #23
Hi Tomasz,

On Wed, 2019-09-04 at 16:25 +0800, Tomasz Figa wrote:
> On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > >
> > > > > > Hi Tomasz,
> > > > > >
> > > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > >
> > > > > > > > Hi Tomasz,
> > > > > > > >
> > > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > >
> > > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > > > [snip]
> > > > > > > > > > > > > > > [snip]
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > > > > > > > > > > removed below?
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > > > > > > > > > > jobs?
> > > > > > > > > > > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > > > > > > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > > > > > > > > > > preferred for the lower latency.
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > > > > > > > > > > the registers.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > for example:
> > > > > > > > > > > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > > > > > > > > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > > > > > > > > > > >
> > > > > > > > > > > > Sorry, my last reply didn't solve the question,
> > > > > > > > > > > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > > > > > > > > > > >
> > > > > > > > > > > > which is able to readl_poll_timeout_atomic()
> > > > > > > > > > > > and check the HW busy bits in the register FD_INT_EN;
> > > > > > > > > > > >
> > > > > > > > > > > > if they are not cleared until timeout, we could handle the last
> > > > > > > > > > > > processing job.
> > > > > > > > > > > > Otherwise, the FD irq handler should have handled the last processing
> > > > > > > > > > > > job and we could continue the stop_streaming().
> > > > > > > > > > > >
> > > > > > > > > > > > For job_abort():
> > > > > > > > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > > > > > > > {
> > > > > > > > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > > > > > >         u32 val;
> > > > > > > > > > > >         u32 ret;
> > > > > > > > > > > >
> > > > > > > > > > > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > > > > > > > > > > >                                         val,
> > > > > > > > > > > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > > > > > > > > > > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > > > > > > > > > > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> > > > > > > > > > >
> > > > > > > > > > > Hmm, would it be possible to avoid the busy wait by having a
> > > > > > > > > > > completion that could be signalled from the interrupt handler?
> > > > > > > > > > >
> > > > > > > > > > > Best regards,
> > > > > > > > > > > Tomasz
> > > > > > > > > >
> > > > > > > > > > I suppose that would be wakeup a wait queue in the interrupt handler,
> > > > > > > > > > the the wait_event_interrupt_timeout() will be used in here and system
> > > > > > > > > > suspend e.g. mtk_fd_suspend().
> > > > > > > > >
> > > > > > > > > Yes, that should work.
> > > > > > > > >
> > > > > > > > > > Or do you suggest to wait_event_interrupt_timeout() every frame in the
> > > > > > > > > > mtk_fd_ipi_handler()?
> > > > > > > > >
> > > > > > > > > Nope, we shouldn't need that.
> > > > > > > > >
> > > > > > > > > > I think maybe the readl_poll_timeout_atomic would be good enough.
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > Not really. Busy waiting should be avoided as much as possible. What's
> > > > > > > > > the point of entering suspend if you end up burning the power by
> > > > > > > > > spinning the CPU for some milliseconds?
> > > > > > > > >
> > > > > > > > Ok, I see, busy waiting is not a good idea, I will use the wait queue
> > > > > > > > instead. the job abort will refine as following:
> > > > > > > >
> > > > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > > > {
> > > > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > >         u32 ret;
> > > > > > > >
> > > > > > > >         ret = wait_event_interruptible_timeout
> > > > > > > >                 (fd->wq, (fd->fd_irq_result & MTK_FD_HW_IRQ_MASK),
> > > > > > > >                  usecs_to_jiffies(50000));
> > > > > > > >         if (ret)
> > > > > > > >                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> > > > > > > >         dev_dbg(fd->dev, "%s, ret:%d\n", __func__, ret);
> > > > > > > >
> > > > > > > >         fd->fd_irq_result = 0;
> > > > > > > > }
> > > > > > > >
> > > > > > > > static struct v4l2_m2m_ops fd_m2m_ops = {
> > > > > > > >         .device_run = mtk_fd_device_run,
> > > > > > > >         .job_abort = mtk_fd_job_abort,
> > > > > > >
> > > > > > > I'm not sure we should be using the functon above as the .job_abort
> > > > > > > callback. It's expected to abort the job, but we're just waiting for
> > > > > > > it to finish. I think we should just call mtk_fd_job_abort() manually
> > > > > > > from .stop_streaming.
> > > > > > >
> > > > > >
> > > > > > Ok, I will fix it.
> > > > > >
> > > > > > > > };
> > > > > > > >
> > > > > > > > and we could use it in suspend.
> > > > > > > > static int mtk_fd_suspend(struct device *dev)
> > > > > > > > {
> > > > > > > >         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
> > > > > > > >
> > > > > > > >         if (pm_runtime_suspended(dev))
> > > > > > > >                 return 0;
> > > > > > > >
> > > > > > > >         if (fd->fd_stream_count)
> > > > > > > >                 mtk_fd_job_abort(fd->ctx);
> > > > > > > >
> > > > > > > >         /* suspend FD HW */
> > > > > > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
> > > > > > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
> > > > > > > >         clk_disable_unprepare(fd->fd_clk);
> > > > > > > >         dev_dbg(dev, "%s:disable clock\n", __func__);
> > > > > > > >
> > > > > > > >         return 0;
> > > > > > > > }
> > > > > > > >
> > > > > > > > static irqreturn_t mtk_fd_irq(int irq, void *data)
> > > > > > > > {
> > > > > > > >         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> > > > > > > >
> > > > > > > >         fd->fd_irq_result = readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> > > > > > > >         wake_up_interruptible(&fd->wq);
> > > > > > >
> > > > > > > The wake up should be done at the very end of this function. Otherwise
> > > > > > > we end up with m2m framework racing with the mtk_fd_hw_job_finish()
> > > > > > > below.
> > > > > > >
> > > > > > > Also, I'd use a completion here rather than an open coded wait and
> > > > > > > wake-up. The driver could reinit_completion() before queuing a job to
> > > > > > > the hardware and the IRQ handler would complete(). There would be no
> > > > > > > need to store the IRQ flags in driver data anymore.
> > > > > > Ok, It will be refined as following:
> > > > > >
> > > > > > suspend and stop streaming will call mtk_fd_job_abort()
> > > > > >
> > > > > > static void mtk_fd_job_abort(struct mtk_fd_dev *fd)
> > > > > > {
> > > > > >         u32 ret;
> > > > > >
> > > > > >         ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > > > > >         if (ret)
> > > > >
> > > > > For the _timeout variants, !ret means the timeout and ret > 0 means
> > > > > that the wait ended successfully before.
> > > > >
> > > > Thanks, fixed.
> > > >
> > > > > Also please make sure that the timeout is big enough not to happen
> > > > > normally on a heavily loaded system. Something like 1 second should be
> > > > > good.
> > > > >
> > > > Ok, I will make it 1 second
> > > > #define MTK_FD_HW_TIMEOUT 1000
> > > >
> > > > Also, I will add a condition in mtk_fd_vb2_stop_streaming(), since it
> > > > would be called twice, now it works fine whether I reuse the condition
> > > > with mtk_fd_hw_disconnect or not, however, I think do it before buffer
> > > > remove looks more reasonable.
> > > >
> > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > {
> > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > >         struct vb2_v4l2_buffer *vb;
> > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > >
> > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > >                 mtk_fd_job_abort(fd);
> > >
> > > We need to do it regardless of the queue type, otherwise we could
> > > still free CAPTURE buffers before the hardware releases them.
> > >
> >
> > I think we may read the fd->fd_irq_done.done and do wait for completion
> > if it's not being done.
> > How do you think?
> >
> > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > {
> >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> >         struct mtk_fd_dev *fd = ctx->fd_dev;
> >         struct vb2_v4l2_buffer *vb;
> >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> >         struct v4l2_m2m_queue_ctx *queue_ctx;
> >         u32 ret;
> >
> >         if (!fd->fd_irq_done.done)
> 
> We shouldn't access internal fields of completion.
> 
> >                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
> >                                                   msecs_to_jiffies(
> >                                                         MTK_FD_HW_TIMEOUT));
> >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> >                                         &m2m_ctx->out_q_ctx :
> >                                         &m2m_ctx->cap_q_ctx;
> >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> >
> >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> >                 mtk_fd_hw_disconnect(fd);
> > }
> >
> > I've also tried to wait completion unconditionally for both queues and
> > the second time will wait until timeout, as a result, it takes longer to
> > swap the camera every time and close the camera app.
> 
> I think it should work better if we call complete_all() instead of complete().
> 
Thanks,

I use complete_all(), and it works fine now.

static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
{
	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
	struct mtk_fd_dev *fd = ctx->fd_dev;
	struct vb2_v4l2_buffer *vb;
	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
	struct v4l2_m2m_queue_ctx *queue_ctx;

	wait_for_completion_timeout(&fd->fd_irq_done,
					  msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
	queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
					&m2m_ctx->out_q_ctx :
					&m2m_ctx->cap_q_ctx;
	while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
		v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);

	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
		mtk_fd_hw_disconnect(fd);
}

Best regards,
Jerry

> Best regards,
> Tomasz
Tomasz Figa Sept. 4, 2019, 9:03 a.m. UTC | #24
On Wed, Sep 4, 2019 at 6:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Wed, 2019-09-04 at 16:25 +0800, Tomasz Figa wrote:
> > On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > > > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > >
> > > > > Hi Tomasz,
> > > > >
> > > > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > >
> > > > > > > Hi Tomasz,
> > > > > > >
> > > > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > >
> > > > > > > > > Hi Tomasz,
> > > > > > > > >
> > > > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > > > > [snip]
> > > > > > > > > > > > > > > > [snip]
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > +static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > > > > > > > > > > +{
> > > > > > > > > > > > > > > > > > > +   struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > > > > > > > > > > +   struct vb2_buffer *vb;
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > How do we guarantee here that the hardware isn't still accessing the buffers
> > > > > > > > > > > > > > > > > > removed below?
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > Maybe we can check the driver state flag and aborting the unfinished
> > > > > > > > > > > > > > > > > jobs?
> > > > > > > > > > > > > > > > > (fd_hw->state == FD_ENQ)
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > Yes, we need to either cancel or wait for the currently processing
> > > > > > > > > > > > > > > > job. It depends on hardware capabilities, but cancelling is generally
> > > > > > > > > > > > > > > > preferred for the lower latency.
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Ok, it the state is ENQ, then we can disable the FD hw by controlling
> > > > > > > > > > > > > > > the registers.
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > for example:
> > > > > > > > > > > > > > >         writel(0x0, fd->fd_base + FD_HW_ENABLE);
> > > > > > > > > > > > > > >         writel(0x0, fd->fd_base + FD_INT_EN);
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > What's exactly the effect of writing 0 to FD_HW_ENABLE?
> > > > > > > > > > > > > >
> > > > > > > > > > > > > Sorry, my last reply didn't solve the question,
> > > > > > > > > > > > > we should implement a mtk_fd_job_abort() for v4l2_m2m_ops().
> > > > > > > > > > > > >
> > > > > > > > > > > > > which is able to readl_poll_timeout_atomic()
> > > > > > > > > > > > > and check the HW busy bits in the register FD_INT_EN;
> > > > > > > > > > > > >
> > > > > > > > > > > > > if they are not cleared until timeout, we could handle the last
> > > > > > > > > > > > > processing job.
> > > > > > > > > > > > > Otherwise, the FD irq handler should have handled the last processing
> > > > > > > > > > > > > job and we could continue the stop_streaming().
> > > > > > > > > > > > >
> > > > > > > > > > > > > For job_abort():
> > > > > > > > > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > > > > > > > > {
> > > > > > > > > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > > > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > > > > > > >         u32 val;
> > > > > > > > > > > > >         u32 ret;
> > > > > > > > > > > > >
> > > > > > > > > > > > >         ret = readl_poll_timeout_atomic(fd->fd_base + MTK_FD_REG_OFFSET_INT_EN,
> > > > > > > > > > > > >                                         val,
> > > > > > > > > > > > >                                         (val & MTK_FD_HW_BUSY_MASK) ==
> > > > > > > > > > > > >                                         MTK_FD_HW_STATE_IS_BUSY,
> > > > > > > > > > > > >                                         USEC_PER_MSEC, MTK_FD_STOP_HW_TIMEOUT);
> > > > > > > > > > > >
> > > > > > > > > > > > Hmm, would it be possible to avoid the busy wait by having a
> > > > > > > > > > > > completion that could be signalled from the interrupt handler?
> > > > > > > > > > > >
> > > > > > > > > > > > Best regards,
> > > > > > > > > > > > Tomasz
> > > > > > > > > > >
> > > > > > > > > > > I suppose that would be wakeup a wait queue in the interrupt handler,
> > > > > > > > > > > the the wait_event_interrupt_timeout() will be used in here and system
> > > > > > > > > > > suspend e.g. mtk_fd_suspend().
> > > > > > > > > >
> > > > > > > > > > Yes, that should work.
> > > > > > > > > >
> > > > > > > > > > > Or do you suggest to wait_event_interrupt_timeout() every frame in the
> > > > > > > > > > > mtk_fd_ipi_handler()?
> > > > > > > > > >
> > > > > > > > > > Nope, we shouldn't need that.
> > > > > > > > > >
> > > > > > > > > > > I think maybe the readl_poll_timeout_atomic would be good enough.
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Not really. Busy waiting should be avoided as much as possible. What's
> > > > > > > > > > the point of entering suspend if you end up burning the power by
> > > > > > > > > > spinning the CPU for some milliseconds?
> > > > > > > > > >
> > > > > > > > > Ok, I see, busy waiting is not a good idea, I will use the wait queue
> > > > > > > > > instead. the job abort will refine as following:
> > > > > > > > >
> > > > > > > > > static void mtk_fd_job_abort(void *priv)
> > > > > > > > > {
> > > > > > > > >         struct mtk_fd_ctx *ctx = priv;
> > > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > > >         u32 ret;
> > > > > > > > >
> > > > > > > > >         ret = wait_event_interruptible_timeout
> > > > > > > > >                 (fd->wq, (fd->fd_irq_result & MTK_FD_HW_IRQ_MASK),
> > > > > > > > >                  usecs_to_jiffies(50000));
> > > > > > > > >         if (ret)
> > > > > > > > >                 mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> > > > > > > > >         dev_dbg(fd->dev, "%s, ret:%d\n", __func__, ret);
> > > > > > > > >
> > > > > > > > >         fd->fd_irq_result = 0;
> > > > > > > > > }
> > > > > > > > >
> > > > > > > > > static struct v4l2_m2m_ops fd_m2m_ops = {
> > > > > > > > >         .device_run = mtk_fd_device_run,
> > > > > > > > >         .job_abort = mtk_fd_job_abort,
> > > > > > > >
> > > > > > > > I'm not sure we should be using the functon above as the .job_abort
> > > > > > > > callback. It's expected to abort the job, but we're just waiting for
> > > > > > > > it to finish. I think we should just call mtk_fd_job_abort() manually
> > > > > > > > from .stop_streaming.
> > > > > > > >
> > > > > > >
> > > > > > > Ok, I will fix it.
> > > > > > >
> > > > > > > > > };
> > > > > > > > >
> > > > > > > > > and we could use it in suspend.
> > > > > > > > > static int mtk_fd_suspend(struct device *dev)
> > > > > > > > > {
> > > > > > > > >         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
> > > > > > > > >
> > > > > > > > >         if (pm_runtime_suspended(dev))
> > > > > > > > >                 return 0;
> > > > > > > > >
> > > > > > > > >         if (fd->fd_stream_count)
> > > > > > > > >                 mtk_fd_job_abort(fd->ctx);
> > > > > > > > >
> > > > > > > > >         /* suspend FD HW */
> > > > > > > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
> > > > > > > > >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
> > > > > > > > >         clk_disable_unprepare(fd->fd_clk);
> > > > > > > > >         dev_dbg(dev, "%s:disable clock\n", __func__);
> > > > > > > > >
> > > > > > > > >         return 0;
> > > > > > > > > }
> > > > > > > > >
> > > > > > > > > static irqreturn_t mtk_fd_irq(int irq, void *data)
> > > > > > > > > {
> > > > > > > > >         struct mtk_fd_dev *fd = (struct mtk_fd_dev *)data;
> > > > > > > > >
> > > > > > > > >         fd->fd_irq_result = readl(fd->fd_base + MTK_FD_REG_OFFSET_INT_VAL);
> > > > > > > > >         wake_up_interruptible(&fd->wq);
> > > > > > > >
> > > > > > > > The wake up should be done at the very end of this function. Otherwise
> > > > > > > > we end up with m2m framework racing with the mtk_fd_hw_job_finish()
> > > > > > > > below.
> > > > > > > >
> > > > > > > > Also, I'd use a completion here rather than an open coded wait and
> > > > > > > > wake-up. The driver could reinit_completion() before queuing a job to
> > > > > > > > the hardware and the IRQ handler would complete(). There would be no
> > > > > > > > need to store the IRQ flags in driver data anymore.
> > > > > > > Ok, It will be refined as following:
> > > > > > >
> > > > > > > suspend and stop streaming will call mtk_fd_job_abort()
> > > > > > >
> > > > > > > static void mtk_fd_job_abort(struct mtk_fd_dev *fd)
> > > > > > > {
> > > > > > >         u32 ret;
> > > > > > >
> > > > > > >         ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > > > > > >         if (ret)
> > > > > >
> > > > > > For the _timeout variants, !ret means the timeout and ret > 0 means
> > > > > > that the wait ended successfully before.
> > > > > >
> > > > > Thanks, fixed.
> > > > >
> > > > > > Also please make sure that the timeout is big enough not to happen
> > > > > > normally on a heavily loaded system. Something like 1 second should be
> > > > > > good.
> > > > > >
> > > > > Ok, I will make it 1 second
> > > > > #define MTK_FD_HW_TIMEOUT 1000
> > > > >
> > > > > Also, I will add a condition in mtk_fd_vb2_stop_streaming(), since it
> > > > > would be called twice, now it works fine whether I reuse the condition
> > > > > with mtk_fd_hw_disconnect or not, however, I think do it before buffer
> > > > > remove looks more reasonable.
> > > > >
> > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > {
> > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > >         struct vb2_v4l2_buffer *vb;
> > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > >
> > > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > > >                 mtk_fd_job_abort(fd);
> > > >
> > > > We need to do it regardless of the queue type, otherwise we could
> > > > still free CAPTURE buffers before the hardware releases them.
> > > >
> > >
> > > I think we may read the fd->fd_irq_done.done and do wait for completion
> > > if it's not being done.
> > > How do you think?
> > >
> > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > {
> > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > >         struct vb2_v4l2_buffer *vb;
> > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > >         u32 ret;
> > >
> > >         if (!fd->fd_irq_done.done)
> >
> > We shouldn't access internal fields of completion.
> >
> > >                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > >                                                   msecs_to_jiffies(
> > >                                                         MTK_FD_HW_TIMEOUT));
> > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > >                                         &m2m_ctx->out_q_ctx :
> > >                                         &m2m_ctx->cap_q_ctx;
> > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > >
> > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > >                 mtk_fd_hw_disconnect(fd);
> > > }
> > >
> > > I've also tried to wait completion unconditionally for both queues and
> > > the second time will wait until timeout, as a result, it takes longer to
> > > swap the camera every time and close the camera app.
> >
> > I think it should work better if we call complete_all() instead of complete().
> >
> Thanks,
>
> I use complete_all(), and it works fine now.
>
> static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> {
>         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
>         struct mtk_fd_dev *fd = ctx->fd_dev;
>         struct vb2_v4l2_buffer *vb;
>         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
>         struct v4l2_m2m_queue_ctx *queue_ctx;
>
>         wait_for_completion_timeout(&fd->fd_irq_done,
>                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));

Shouldn't we still send some command to the hardware to stop? Like a
reset. Otherwise we don't know if it isn't still accessing the memory.

>         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
>                                         &m2m_ctx->out_q_ctx :
>                                         &m2m_ctx->cap_q_ctx;
>         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
>                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
>
>         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
>                 mtk_fd_hw_disconnect(fd);
> }
>
> Best regards,
> Jerry
>
> > Best regards,
> > Tomasz
>
>
Jerry-ch Chen Sept. 4, 2019, 9:26 a.m. UTC | #25
Hi Tomasz,

On Wed, 2019-09-04 at 17:03 +0800, Tomasz Figa wrote:
> On Wed, Sep 4, 2019 at 6:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Wed, 2019-09-04 at 16:25 +0800, Tomasz Figa wrote:
> > > On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > > > > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > >
> > > > > > Hi Tomasz,
> > > > > >
> > > > > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > >
> > > > > > > > Hi Tomasz,
> > > > > > > >
> > > > > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > >
> > > > > > > > > > Hi Tomasz,
> > > > > > > > > >
> > > > > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
[snip]
> > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > {
> > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > >         struct vb2_v4l2_buffer *vb;
> > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > >         u32 ret;
> > > >
> > > >         if (!fd->fd_irq_done.done)
> > >
> > > We shouldn't access internal fields of completion.
> > >
> > > >                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > >                                                   msecs_to_jiffies(
> > > >                                                         MTK_FD_HW_TIMEOUT));
> > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > >                                         &m2m_ctx->out_q_ctx :
> > > >                                         &m2m_ctx->cap_q_ctx;
> > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > >
> > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > >                 mtk_fd_hw_disconnect(fd);
> > > > }
> > > >
> > > > I've also tried to wait completion unconditionally for both queues and
> > > > the second time will wait until timeout, as a result, it takes longer to
> > > > swap the camera every time and close the camera app.
> > >
> > > I think it should work better if we call complete_all() instead of complete().
> > >
> > Thanks,
> >
> > I use complete_all(), and it works fine now.
> >
> > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > {
> >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> >         struct mtk_fd_dev *fd = ctx->fd_dev;
> >         struct vb2_v4l2_buffer *vb;
> >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> >         struct v4l2_m2m_queue_ctx *queue_ctx;
> >
> >         wait_for_completion_timeout(&fd->fd_irq_done,
> >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> 
> Shouldn't we still send some command to the hardware to stop? Like a
> reset. Otherwise we don't know if it isn't still accessing the memory.
> 
I thought no more jobs will be enqueued here when stop_streaming so we
don't need it.
We still could send an ipi command to reset the HW, and wait for it's
callback or we could set the register MTK_FD_REG_OFFSET_HW_ENABLE to
zero to disable the HW.

Best regards,
Jerry

> >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> >                                         &m2m_ctx->out_q_ctx :
> >                                         &m2m_ctx->cap_q_ctx;
> >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> >
> >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> >                 mtk_fd_hw_disconnect(fd);
> > }
> >
> > Best regards,
> > Jerry
> >
> > > Best regards,
> > > Tomasz
> >
> >
Tomasz Figa Sept. 4, 2019, 1:12 p.m. UTC | #26
On Wed, Sep 4, 2019 at 6:26 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Wed, 2019-09-04 at 17:03 +0800, Tomasz Figa wrote:
> > On Wed, Sep 4, 2019 at 6:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Wed, 2019-09-04 at 16:25 +0800, Tomasz Figa wrote:
> > > > On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > >
> > > > > Hi Tomasz,
> > > > >
> > > > > On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > > > > > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > >
> > > > > > > Hi Tomasz,
> > > > > > >
> > > > > > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > > > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > >
> > > > > > > > > Hi Tomasz,
> > > > > > > > >
> > > > > > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > >
> > > > > > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> [snip]
> > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > {
> > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > >         struct vb2_v4l2_buffer *vb;
> > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > >         u32 ret;
> > > > >
> > > > >         if (!fd->fd_irq_done.done)
> > > >
> > > > We shouldn't access internal fields of completion.
> > > >
> > > > >                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > > >                                                   msecs_to_jiffies(
> > > > >                                                         MTK_FD_HW_TIMEOUT));
> > > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > > >                                         &m2m_ctx->out_q_ctx :
> > > > >                                         &m2m_ctx->cap_q_ctx;
> > > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > > >
> > > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > > >                 mtk_fd_hw_disconnect(fd);
> > > > > }
> > > > >
> > > > > I've also tried to wait completion unconditionally for both queues and
> > > > > the second time will wait until timeout, as a result, it takes longer to
> > > > > swap the camera every time and close the camera app.
> > > >
> > > > I think it should work better if we call complete_all() instead of complete().
> > > >
> > > Thanks,
> > >
> > > I use complete_all(), and it works fine now.
> > >
> > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > {
> > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > >         struct vb2_v4l2_buffer *vb;
> > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > >
> > >         wait_for_completion_timeout(&fd->fd_irq_done,
> > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> >
> > Shouldn't we still send some command to the hardware to stop? Like a
> > reset. Otherwise we don't know if it isn't still accessing the memory.
> >
> I thought no more jobs will be enqueued here when stop_streaming so we
> don't need it.

That's true for the case when the wait completed successfully, but we
also need to ensure the hardware is stopped even if a timeout happens.

> We still could send an ipi command to reset the HW, and wait for it's
> callback or we could set the register MTK_FD_REG_OFFSET_HW_ENABLE to
> zero to disable the HW.

Since it's for handling a timeout, a reset should be more likely to
bring the hardware back to a reasonable state.

>
> Best regards,
> Jerry
>
> > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > >                                         &m2m_ctx->out_q_ctx :
> > >                                         &m2m_ctx->cap_q_ctx;
> > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > >
> > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > >                 mtk_fd_hw_disconnect(fd);
> > > }
> > >
> > > Best regards,
> > > Jerry
> > >
> > > > Best regards,
> > > > Tomasz
> > >
> > >
>
>
Jerry-ch Chen Sept. 4, 2019, 1:19 p.m. UTC | #27
On Wed, 2019-09-04 at 21:12 +0800, Tomasz Figa wrote:
> On Wed, Sep 4, 2019 at 6:26 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Wed, 2019-09-04 at 17:03 +0800, Tomasz Figa wrote:
> > > On Wed, Sep 4, 2019 at 6:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Wed, 2019-09-04 at 16:25 +0800, Tomasz Figa wrote:
> > > > > On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > >
> > > > > > Hi Tomasz,
> > > > > >
> > > > > > On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > > > > > > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > >
> > > > > > > > Hi Tomasz,
> > > > > > > >
> > > > > > > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > > > > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > >
> > > > > > > > > > Hi Tomasz,
> > > > > > > > > >
> > > > > > > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > >
> > > > > > > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > [snip]
> > > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > {
> > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > >         struct vb2_v4l2_buffer *vb;
> > > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > > >         u32 ret;
> > > > > >
> > > > > >         if (!fd->fd_irq_done.done)
> > > > >
> > > > > We shouldn't access internal fields of completion.
> > > > >
> > > > > >                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > >                                                   msecs_to_jiffies(
> > > > > >                                                         MTK_FD_HW_TIMEOUT));
> > > > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > > > >                                         &m2m_ctx->out_q_ctx :
> > > > > >                                         &m2m_ctx->cap_q_ctx;
> > > > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > > > >
> > > > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > > > >                 mtk_fd_hw_disconnect(fd);
> > > > > > }
> > > > > >
> > > > > > I've also tried to wait completion unconditionally for both queues and
> > > > > > the second time will wait until timeout, as a result, it takes longer to
> > > > > > swap the camera every time and close the camera app.
> > > > >
> > > > > I think it should work better if we call complete_all() instead of complete().
> > > > >
> > > > Thanks,
> > > >
> > > > I use complete_all(), and it works fine now.
> > > >
> > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > {
> > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > >         struct vb2_v4l2_buffer *vb;
> > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > >
> > > >         wait_for_completion_timeout(&fd->fd_irq_done,
> > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > >
> > > Shouldn't we still send some command to the hardware to stop? Like a
> > > reset. Otherwise we don't know if it isn't still accessing the memory.
> > >
> > I thought no more jobs will be enqueued here when stop_streaming so we
> > don't need it.
> 
> That's true for the case when the wait completed successfully, but we
> also need to ensure the hardware is stopped even if a timeout happens.
> 
> > We still could send an ipi command to reset the HW, and wait for it's
> > callback or we could set the register MTK_FD_REG_OFFSET_HW_ENABLE to
> > zero to disable the HW.
> 
> Since it's for handling a timeout, a reset should be more likely to
> bring the hardware back to a reasonable state.
> 

Ok, I will send the ipi command to reset the HW.

Thanks and best regards,
Jerry
> >
> > Best regards,
> > Jerry
> >
> > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > >                                         &m2m_ctx->out_q_ctx :
> > > >                                         &m2m_ctx->cap_q_ctx;
> > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > >
> > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > >                 mtk_fd_hw_disconnect(fd);
> > > > }
> > > >
> > > > Best regards,
> > > > Jerry
> > > >
> > > > > Best regards,
> > > > > Tomasz
> > > >
> > > >
> >
> >
Jerry-ch Chen Sept. 5, 2019, 7:02 a.m. UTC | #28
Hi Tomasz,

On Wed, 2019-09-04 at 21:19 +0800, Jerry-ch Chen wrote:
> On Wed, 2019-09-04 at 21:12 +0800, Tomasz Figa wrote:
> > On Wed, Sep 4, 2019 at 6:26 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Wed, 2019-09-04 at 17:03 +0800, Tomasz Figa wrote:
> > > > On Wed, Sep 4, 2019 at 6:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > >
> > > > > Hi Tomasz,
> > > > >
> > > > > On Wed, 2019-09-04 at 16:25 +0800, Tomasz Figa wrote:
> > > > > > On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > >
> > > > > > > Hi Tomasz,
> > > > > > >
> > > > > > > On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > > > > > > > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > >
> > > > > > > > > Hi Tomasz,
> > > > > > > > >
> > > > > > > > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > > > > > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > >
> > > > > > > > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > [snip]
> > > > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > {
> > > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > >         struct vb2_v4l2_buffer *vb;
> > > > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > > > >         u32 ret;
> > > > > > >
> > > > > > >         if (!fd->fd_irq_done.done)
> > > > > >
> > > > > > We shouldn't access internal fields of completion.
> > > > > >
> > > > > > >                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > > >                                                   msecs_to_jiffies(
> > > > > > >                                                         MTK_FD_HW_TIMEOUT));
> > > > > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > > > > >                                         &m2m_ctx->out_q_ctx :
> > > > > > >                                         &m2m_ctx->cap_q_ctx;
> > > > > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > > > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > > > > >
> > > > > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > > > > >                 mtk_fd_hw_disconnect(fd);
> > > > > > > }
> > > > > > >
> > > > > > > I've also tried to wait completion unconditionally for both queues and
> > > > > > > the second time will wait until timeout, as a result, it takes longer to
> > > > > > > swap the camera every time and close the camera app.
> > > > > >
> > > > > > I think it should work better if we call complete_all() instead of complete().
> > > > > >
> > > > > Thanks,
> > > > >
> > > > > I use complete_all(), and it works fine now.
> > > > >
> > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > {
> > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > >         struct vb2_v4l2_buffer *vb;
> > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > >
> > > > >         wait_for_completion_timeout(&fd->fd_irq_done,
> > > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > > >
> > > > Shouldn't we still send some command to the hardware to stop? Like a
> > > > reset. Otherwise we don't know if it isn't still accessing the memory.
> > > >
> > > I thought no more jobs will be enqueued here when stop_streaming so we
> > > don't need it.
> > 
> > That's true for the case when the wait completed successfully, but we
> > also need to ensure the hardware is stopped even if a timeout happens.
> > 
> > > We still could send an ipi command to reset the HW, and wait for it's
> > > callback or we could set the register MTK_FD_REG_OFFSET_HW_ENABLE to
> > > zero to disable the HW.
> > 
> > Since it's for handling a timeout, a reset should be more likely to
> > bring the hardware back to a reasonable state.
> > 
> 
> Ok, I will send the ipi command to reset the HW.
> 
> Thanks and best regards,
> Jerry
I've tested and will refine as following:

static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
{
	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
	struct mtk_fd_dev *fd = ctx->fd_dev;
	struct vb2_v4l2_buffer *vb;
	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
	struct v4l2_m2m_queue_ctx *queue_ctx;
	u32 ret;

	ret = wait_for_completion_timeout(&fd->fd_irq_done,
					  msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
	/* Disable FD HW */
	if(!ret) {
		struct ipi_message fd_ipi_msg;

		fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_RESET;
		ret = scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
				   sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT);
		if (ret)
			dev_err(fd->dev, "FD Reset HW error\n");
	}
	queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
					&m2m_ctx->out_q_ctx :
					&m2m_ctx->cap_q_ctx;
	while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
		v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);

	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
		mtk_fd_hw_disconnect(fd);
}

If there is no other concern, may I send the RFC v3 patch for review?

Thanks and best regards,
Jerry
> > >
> > > Best regards,
> > > Jerry
> > >
> > > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > > >                                         &m2m_ctx->out_q_ctx :
> > > > >                                         &m2m_ctx->cap_q_ctx;
> > > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > > >
> > > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > > >                 mtk_fd_hw_disconnect(fd);
> > > > > }
> > > > >
> > > > > Best regards,
> > > > > Jerry
> > > > >
> > > > > > Best regards,
> > > > > > Tomasz
> > > > >
> > > > >
> > >
> > >
>
Tomasz Figa Sept. 5, 2019, 7:13 a.m. UTC | #29
On Thu, Sep 5, 2019 at 4:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Wed, 2019-09-04 at 21:19 +0800, Jerry-ch Chen wrote:
> > On Wed, 2019-09-04 at 21:12 +0800, Tomasz Figa wrote:
> > > On Wed, Sep 4, 2019 at 6:26 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Wed, 2019-09-04 at 17:03 +0800, Tomasz Figa wrote:
> > > > > On Wed, Sep 4, 2019 at 6:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > >
> > > > > > Hi Tomasz,
> > > > > >
> > > > > > On Wed, 2019-09-04 at 16:25 +0800, Tomasz Figa wrote:
> > > > > > > On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > >
> > > > > > > > Hi Tomasz,
> > > > > > > >
> > > > > > > > On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > > > > > > > > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > >
> > > > > > > > > > Hi Tomasz,
> > > > > > > > > >
> > > > > > > > > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > > > > > > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > >
> > > > > > > > > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > [snip]
> > > > > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > {
> > > > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > >         struct vb2_v4l2_buffer *vb;
> > > > > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > > > > >         u32 ret;
> > > > > > > >
> > > > > > > >         if (!fd->fd_irq_done.done)
> > > > > > >
> > > > > > > We shouldn't access internal fields of completion.
> > > > > > >
> > > > > > > >                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > > > >                                                   msecs_to_jiffies(
> > > > > > > >                                                         MTK_FD_HW_TIMEOUT));
> > > > > > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > > > > > >                                         &m2m_ctx->out_q_ctx :
> > > > > > > >                                         &m2m_ctx->cap_q_ctx;
> > > > > > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > > > > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > > > > > >
> > > > > > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > > > > > >                 mtk_fd_hw_disconnect(fd);
> > > > > > > > }
> > > > > > > >
> > > > > > > > I've also tried to wait completion unconditionally for both queues and
> > > > > > > > the second time will wait until timeout, as a result, it takes longer to
> > > > > > > > swap the camera every time and close the camera app.
> > > > > > >
> > > > > > > I think it should work better if we call complete_all() instead of complete().
> > > > > > >
> > > > > > Thanks,
> > > > > >
> > > > > > I use complete_all(), and it works fine now.
> > > > > >
> > > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > {
> > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > >         struct vb2_v4l2_buffer *vb;
> > > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > > >
> > > > > >         wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > > > >
> > > > > Shouldn't we still send some command to the hardware to stop? Like a
> > > > > reset. Otherwise we don't know if it isn't still accessing the memory.
> > > > >
> > > > I thought no more jobs will be enqueued here when stop_streaming so we
> > > > don't need it.
> > >
> > > That's true for the case when the wait completed successfully, but we
> > > also need to ensure the hardware is stopped even if a timeout happens.
> > >
> > > > We still could send an ipi command to reset the HW, and wait for it's
> > > > callback or we could set the register MTK_FD_REG_OFFSET_HW_ENABLE to
> > > > zero to disable the HW.
> > >
> > > Since it's for handling a timeout, a reset should be more likely to
> > > bring the hardware back to a reasonable state.
> > >
> >
> > Ok, I will send the ipi command to reset the HW.
> >
> > Thanks and best regards,
> > Jerry
> I've tested and will refine as following:
>
> static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> {
>         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
>         struct mtk_fd_dev *fd = ctx->fd_dev;
>         struct vb2_v4l2_buffer *vb;
>         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
>         struct v4l2_m2m_queue_ctx *queue_ctx;
>         u32 ret;
>
>         ret = wait_for_completion_timeout(&fd->fd_irq_done,
>                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
>         /* Disable FD HW */
>         if(!ret) {
>                 struct ipi_message fd_ipi_msg;
>
>                 fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_RESET;
>                 ret = scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
>                                    sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT);
>                 if (ret)
>                         dev_err(fd->dev, "FD Reset HW error\n");
>         }

Would you also put the same code in suspend handler? If so, perhaps
it's better to keep this in a helper function (mtk_fd_job_abort()) as
we had before?

>         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
>                                         &m2m_ctx->out_q_ctx :
>                                         &m2m_ctx->cap_q_ctx;
>         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
>                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
>
>         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
>                 mtk_fd_hw_disconnect(fd);
> }
>
> If there is no other concern, may I send the RFC v3 patch for review?

Thanks, technically it looks good now. Just one comment about avoiding
code duplication above.

Best regards,
Tomasz
Jerry-ch Chen Sept. 5, 2019, 8:16 a.m. UTC | #30
Hi Tomasz,

On Thu, 2019-09-05 at 15:13 +0800, Tomasz Figa wrote:
> On Thu, Sep 5, 2019 at 4:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Wed, 2019-09-04 at 21:19 +0800, Jerry-ch Chen wrote:
> > > On Wed, 2019-09-04 at 21:12 +0800, Tomasz Figa wrote:
> > > > On Wed, Sep 4, 2019 at 6:26 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > >
> > > > > Hi Tomasz,
> > > > >
> > > > > On Wed, 2019-09-04 at 17:03 +0800, Tomasz Figa wrote:
> > > > > > On Wed, Sep 4, 2019 at 6:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > >
> > > > > > > Hi Tomasz,
> > > > > > >
> > > > > > > On Wed, 2019-09-04 at 16:25 +0800, Tomasz Figa wrote:
> > > > > > > > On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > >
> > > > > > > > > Hi Tomasz,
> > > > > > > > >
> > > > > > > > > On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > > > > > > > > > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > >
> > > > > > > > > > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > > [snip]
> > > > > > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > {
> > > > > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > > >         struct vb2_v4l2_buffer *vb;
> > > > > > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > > > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > > > > > >         u32 ret;
> > > > > > > > >
> > > > > > > > >         if (!fd->fd_irq_done.done)
> > > > > > > >
> > > > > > > > We shouldn't access internal fields of completion.
> > > > > > > >
> > > > > > > > >                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > > > > >                                                   msecs_to_jiffies(
> > > > > > > > >                                                         MTK_FD_HW_TIMEOUT));
> > > > > > > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > > > > > > >                                         &m2m_ctx->out_q_ctx :
> > > > > > > > >                                         &m2m_ctx->cap_q_ctx;
> > > > > > > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > > > > > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > > > > > > >
> > > > > > > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > > > > > > >                 mtk_fd_hw_disconnect(fd);
> > > > > > > > > }
> > > > > > > > >
> > > > > > > > > I've also tried to wait completion unconditionally for both queues and
> > > > > > > > > the second time will wait until timeout, as a result, it takes longer to
> > > > > > > > > swap the camera every time and close the camera app.
> > > > > > > >
> > > > > > > > I think it should work better if we call complete_all() instead of complete().
> > > > > > > >
> > > > > > > Thanks,
> > > > > > >
> > > > > > > I use complete_all(), and it works fine now.
> > > > > > >
> > > > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > {
> > > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > >         struct vb2_v4l2_buffer *vb;
> > > > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > > > >
> > > > > > >         wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > > > > >
> > > > > > Shouldn't we still send some command to the hardware to stop? Like a
> > > > > > reset. Otherwise we don't know if it isn't still accessing the memory.
> > > > > >
> > > > > I thought no more jobs will be enqueued here when stop_streaming so we
> > > > > don't need it.
> > > >
> > > > That's true for the case when the wait completed successfully, but we
> > > > also need to ensure the hardware is stopped even if a timeout happens.
> > > >
> > > > > We still could send an ipi command to reset the HW, and wait for it's
> > > > > callback or we could set the register MTK_FD_REG_OFFSET_HW_ENABLE to
> > > > > zero to disable the HW.
> > > >
> > > > Since it's for handling a timeout, a reset should be more likely to
> > > > bring the hardware back to a reasonable state.
> > > >
> > >
> > > Ok, I will send the ipi command to reset the HW.
> > >
> > > Thanks and best regards,
> > > Jerry
> > I've tested and will refine as following:
> >
> > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > {
> >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> >         struct mtk_fd_dev *fd = ctx->fd_dev;
> >         struct vb2_v4l2_buffer *vb;
> >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> >         struct v4l2_m2m_queue_ctx *queue_ctx;
> >         u32 ret;
> >
> >         ret = wait_for_completion_timeout(&fd->fd_irq_done,
> >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> >         /* Disable FD HW */
> >         if(!ret) {
> >                 struct ipi_message fd_ipi_msg;
> >
> >                 fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_RESET;
> >                 ret = scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
> >                                    sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT);
> >                 if (ret)
> >                         dev_err(fd->dev, "FD Reset HW error\n");
> >         }
> 
> Would you also put the same code in suspend handler? If so, perhaps
> it's better to keep this in a helper function (mtk_fd_job_abort()) as
> we had before?
> 

Ok, done, It will reset the HW and return ETIMEOUT if the last job is
timeout, the return value will be used in suspend for further action.

static int mtk_fd_job_abort(struct mtk_fd_dev *fd)
{
	u32 ret;

	ret = wait_for_completion_timeout(&fd->fd_irq_done,
					  msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
	/* Reset FD HW */
	if (!ret) {
		struct ipi_message fd_ipi_msg;

		fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_RESET;
		if (scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
				 sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT))
			dev_err(fd->dev, "FD Reset HW error\n");
		return -ETIMEDOUT;
	}
	return 0;
}

static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
{
	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
	struct mtk_fd_dev *fd = ctx->fd_dev;
	struct vb2_v4l2_buffer *vb;
	struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
	struct v4l2_m2m_queue_ctx *queue_ctx;

	mtk_fd_job_abort(fd);
	queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
					&m2m_ctx->out_q_ctx :
					&m2m_ctx->cap_q_ctx;
	while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
		v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);

	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
		mtk_fd_hw_disconnect(fd);
}

static int mtk_fd_suspend(struct device *dev)
{
	struct mtk_fd_dev *fd = dev_get_drvdata(dev);

	if (pm_runtime_suspended(dev))
		return 0;

	if (fd->fd_stream_count)
		if (mtk_fd_job_abort(fd))
			mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);

	/* suspend FD HW */
	writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
	writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
	clk_disable_unprepare(fd->fd_clk);
	dev_dbg(dev, "%s:disable clock\n", __func__);

	return 0;
}

> >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> >                                         &m2m_ctx->out_q_ctx :
> >                                         &m2m_ctx->cap_q_ctx;
> >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> >
> >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> >                 mtk_fd_hw_disconnect(fd);
> > }
> >
> > If there is no other concern, may I send the RFC v3 patch for review?
> 
> Thanks, technically it looks good now. Just one comment about avoiding
> code duplication above.
> 

Thanks, 

I will send the v3 if the above fix-up is accepted,

Best regards,
Jerry

> Best regards,
> Tomasz
Tomasz Figa Sept. 5, 2019, 8:52 a.m. UTC | #31
On Thu, Sep 5, 2019 at 5:17 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
>
> Hi Tomasz,
>
> On Thu, 2019-09-05 at 15:13 +0800, Tomasz Figa wrote:
> > On Thu, Sep 5, 2019 at 4:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > >
> > > Hi Tomasz,
> > >
> > > On Wed, 2019-09-04 at 21:19 +0800, Jerry-ch Chen wrote:
> > > > On Wed, 2019-09-04 at 21:12 +0800, Tomasz Figa wrote:
> > > > > On Wed, Sep 4, 2019 at 6:26 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > >
> > > > > > Hi Tomasz,
> > > > > >
> > > > > > On Wed, 2019-09-04 at 17:03 +0800, Tomasz Figa wrote:
> > > > > > > On Wed, Sep 4, 2019 at 6:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > >
> > > > > > > > Hi Tomasz,
> > > > > > > >
> > > > > > > > On Wed, 2019-09-04 at 16:25 +0800, Tomasz Figa wrote:
> > > > > > > > > On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > >
> > > > > > > > > > Hi Tomasz,
> > > > > > > > > >
> > > > > > > > > > On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > > > > > > > > > > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > >
> > > > > > > > > > > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > > > [snip]
> > > > > > > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > {
> > > > > > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > > > >         struct vb2_v4l2_buffer *vb;
> > > > > > > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > > > > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > > > > > > >         u32 ret;
> > > > > > > > > >
> > > > > > > > > >         if (!fd->fd_irq_done.done)
> > > > > > > > >
> > > > > > > > > We shouldn't access internal fields of completion.
> > > > > > > > >
> > > > > > > > > >                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > > > > > >                                                   msecs_to_jiffies(
> > > > > > > > > >                                                         MTK_FD_HW_TIMEOUT));
> > > > > > > > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > > > > > > > >                                         &m2m_ctx->out_q_ctx :
> > > > > > > > > >                                         &m2m_ctx->cap_q_ctx;
> > > > > > > > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > > > > > > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > > > > > > > >
> > > > > > > > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > > > > > > > >                 mtk_fd_hw_disconnect(fd);
> > > > > > > > > > }
> > > > > > > > > >
> > > > > > > > > > I've also tried to wait completion unconditionally for both queues and
> > > > > > > > > > the second time will wait until timeout, as a result, it takes longer to
> > > > > > > > > > swap the camera every time and close the camera app.
> > > > > > > > >
> > > > > > > > > I think it should work better if we call complete_all() instead of complete().
> > > > > > > > >
> > > > > > > > Thanks,
> > > > > > > >
> > > > > > > > I use complete_all(), and it works fine now.
> > > > > > > >
> > > > > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > {
> > > > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > >         struct vb2_v4l2_buffer *vb;
> > > > > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > > > > >
> > > > > > > >         wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > > > > > >
> > > > > > > Shouldn't we still send some command to the hardware to stop? Like a
> > > > > > > reset. Otherwise we don't know if it isn't still accessing the memory.
> > > > > > >
> > > > > > I thought no more jobs will be enqueued here when stop_streaming so we
> > > > > > don't need it.
> > > > >
> > > > > That's true for the case when the wait completed successfully, but we
> > > > > also need to ensure the hardware is stopped even if a timeout happens.
> > > > >
> > > > > > We still could send an ipi command to reset the HW, and wait for it's
> > > > > > callback or we could set the register MTK_FD_REG_OFFSET_HW_ENABLE to
> > > > > > zero to disable the HW.
> > > > >
> > > > > Since it's for handling a timeout, a reset should be more likely to
> > > > > bring the hardware back to a reasonable state.
> > > > >
> > > >
> > > > Ok, I will send the ipi command to reset the HW.
> > > >
> > > > Thanks and best regards,
> > > > Jerry
> > > I've tested and will refine as following:
> > >
> > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > {
> > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > >         struct vb2_v4l2_buffer *vb;
> > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > >         u32 ret;
> > >
> > >         ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > >         /* Disable FD HW */
> > >         if(!ret) {
> > >                 struct ipi_message fd_ipi_msg;
> > >
> > >                 fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_RESET;
> > >                 ret = scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
> > >                                    sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT);
> > >                 if (ret)
> > >                         dev_err(fd->dev, "FD Reset HW error\n");
> > >         }
> >
> > Would you also put the same code in suspend handler? If so, perhaps
> > it's better to keep this in a helper function (mtk_fd_job_abort()) as
> > we had before?
> >
>
> Ok, done, It will reset the HW and return ETIMEOUT if the last job is
> timeout, the return value will be used in suspend for further action.
>
> static int mtk_fd_job_abort(struct mtk_fd_dev *fd)
> {
>         u32 ret;
>
>         ret = wait_for_completion_timeout(&fd->fd_irq_done,
>                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
>         /* Reset FD HW */
>         if (!ret) {
>                 struct ipi_message fd_ipi_msg;
>
>                 fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_RESET;
>                 if (scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
>                                  sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT))
>                         dev_err(fd->dev, "FD Reset HW error\n");
>                 return -ETIMEDOUT;
>         }
>         return 0;
> }
>
> static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> {
>         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
>         struct mtk_fd_dev *fd = ctx->fd_dev;
>         struct vb2_v4l2_buffer *vb;
>         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
>         struct v4l2_m2m_queue_ctx *queue_ctx;
>
>         mtk_fd_job_abort(fd);
>         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
>                                         &m2m_ctx->out_q_ctx :
>                                         &m2m_ctx->cap_q_ctx;
>         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
>                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
>
>         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
>                 mtk_fd_hw_disconnect(fd);
> }
>
> static int mtk_fd_suspend(struct device *dev)
> {
>         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
>
>         if (pm_runtime_suspended(dev))
>                 return 0;
>
>         if (fd->fd_stream_count)
>                 if (mtk_fd_job_abort(fd))
>                         mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);

Wouldn't this cause the next job to be run?

>
>         /* suspend FD HW */
>         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
>         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
>         clk_disable_unprepare(fd->fd_clk);
>         dev_dbg(dev, "%s:disable clock\n", __func__);
>
>         return 0;
> }
>
> > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > >                                         &m2m_ctx->out_q_ctx :
> > >                                         &m2m_ctx->cap_q_ctx;
> > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > >
> > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > >                 mtk_fd_hw_disconnect(fd);
> > > }
> > >
> > > If there is no other concern, may I send the RFC v3 patch for review?
> >
> > Thanks, technically it looks good now. Just one comment about avoiding
> > code duplication above.
> >
>
> Thanks,
>
> I will send the v3 if the above fix-up is accepted,

I think there is a bigger issue here actually, related to how the m2m
helpers work. Let's just keep the code as you proposed and post v3.

We can continue the discussion there.

Best regards,
Tomasz
Laurent Pinchart Sept. 5, 2019, 12:35 p.m. UTC | #32
Hello,

On Fri, Aug 09, 2019 at 05:07:21PM +0900, Tomasz Figa wrote:
> On Mon, Jul 29, 2019 at 8:58 PM Jerry-ch Chen wrote:
> > On Mon, 2019-07-29 at 17:57 +0800, Tomasz Figa wrote:
> >> On Mon, Jul 29, 2019 at 3:01 PM Jerry-ch Chen wrote:
> >>> On Tue, 2019-07-09 at 18:56 +0800, Enrico Weigelt, metux IT consult wrote:
> >>>> On 09.07.19 10:41, Jerry-ch Chen wrote:
> >>>>> diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> >>>>> new file mode 100644
> >>>>> index 0000000..289999b
> >>>>> --- /dev/null
> >>>>> +++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
> >>>>> @@ -0,0 +1,157 @@
> >>>>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>>>> +//
> >>>>> +// Copyright (c) 2018 MediaTek Inc.
> >>>>> +
> >>>>> +#ifndef __MTK_FD_HW_H__
> >>>>> +#define __MTK_FD_HW_H__
> >>>>> +
> >>>>> +#include <linux/io.h>
> >>>>> +#include <linux/types.h>
> >>>>> +#include <linux/platform_device.h>
> >>>>> +#include <media/v4l2-ctrls.h>
> >>>>> +#include <media/v4l2-device.h>
> >>>>> +#include <media/videobuf2-v4l2.h>
> >>>>> +
> >>>>> +#define MTK_FD_OUTPUT_MIN_WIDTH                    26U
> >>>>> +#define MTK_FD_OUTPUT_MIN_HEIGHT           26U
> >>>>> +#define MTK_FD_OUTPUT_MAX_WIDTH                    640U
> >>>>> +#define MTK_FD_OUTPUT_MAX_HEIGHT           480U
> >>>>> +
> >>>>> +/* Control the user defined image widths and heights
> >>>>> + * to be scaled and performed face detection in FD HW.
> >>>>> + * MTK FD support up to 14 user defined image sizes to perform face detection.
> >>>>> + */
> >>>>> +#define V4L2_CID_MTK_FD_SCALE_IMG_WIDTH            (V4L2_CID_USER_MTK_FD_BASE + 1)
> >>>>> +#define V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT   (V4L2_CID_USER_MTK_FD_BASE + 2)
> >>>>
> >>>> I've got a *really* bad feeling about introducing chip specific
> >>>> uapi stuff. (by the way: uapi stuff belongs into include/uapi/...)
> >>>
> >>> Thanks for your comments,
> >>>
> >>> If we remain chip-specific control IDs, I will move the uapi stuff into
> >>> inlcude/uapi/mtk_fd.h (filename TBD)
> >>>
> >>>> Maybe you could tell us what that's *really* about, so we can find some
> >>>> standard / chip-independent api for these things. That's one of the
> >>>> major point of the kernel: hardware abstraction.
> >>>
> >>> I am not sure if it is possible for us to add some standard
> >>> v4l2-controls for face detection, a further explanations of controls are
> >>> listed below.
> >>>
> >>> In v4l2-controls, there exists V4L2_CID_DETECT_CLASS, but I haven't
> >>> found the standards or api that can be used for face detection yet.
> >>> https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/v4l2-controls.h#L1092
> >>>
> >>> For detecting certain face angle and head direction, we would need
> >>> V4L2_CID_DETECT_ANGLE, V4L2_CID_DETECT_DIRECTION controls for user to
> >>> specify the angle and direction to be detected.
> >>> In MTK FD driver, we support the following angles and directions to be
> >>> selected by user, and they are both multiple selected .
> >>> FD_angle_table[] = {-90, -45, 0 , 45, 90}
> >>> FD_direction_table[] = {0, 30, 60, 90, 120, 150, ..., 330}
> >>>
> >>> Assuming these v4l2-controls are array of V4L2_CTRL_TYPE_U16 with
> >>> dimension 5 and 12.
> >>> User can select the desired angle and directions to be detected into
> >>> arrays and bring it to driver by these controls, however, the more they
> >>> select, the longer execution time needed by HW.
> >>
> >> Sounds like we need some kind of a menu bitmask control here, but I
> >> don't see V4L2 having anything like that.
> >>
> >> Hans, Sakari, any ideas?
> >>
> >>> For detecting different sizes of faces and increase the detection speed,
> >>> FD driver might need to scales down the input image into different
> >>> smaller sizes
> >>
> >> Do you mean the FD hardware would do the scaling or the driver code
> >> itself? It would be undesirable to do such scaling in a kernel driver,
> >> so if that's not something handled by the hardware, the downscaled
> >> image might need to be provided from the userspace.
> >
> > Thanks for your comments.
> >
> > Yes, FD hardware will do the scaling itself, so driver could set the
> > sizes.
> >
> >> >, besides driver default values, user or proprietary
> >>> algorithm library can manually set the desired image sizes, therefore,
> >>> we would need the following controls:
> >>> V4L2_CID_DETECT_SCALE_DOWN_IMG_WIDTH and
> >>> V4L2_CID_DETECT_SCALE_DOWN_IMG_HEIGHT.
> >>> In MTK FD driver, we implement these controls as array of
> >>> V4L2_CTRL_TYPE_U16 with the dimension 15.
> >>
> >> Why 15?
> >
> > It consists of one input image size and 14 down-scaled image sizes,
> > the amount 15 (or say 14) is defined by the MTK FD algo library,
> > therefore I remain the number of 15 here for communicate with the
> > library.
> > Maybe it should be defined as following?
> > MTK_FD_MAX_SCALE_SIZE_NUM               14
> > and
> > MTK_FD_SCALE_ARR_NUM                    15
> >
> >>> For controlling detection speed, we would need the
> >>> V4L2_CID_DETECT_SPEED, the faster speedup implies the lower accuracy of
> >>> detection, In MTK FD driver, the max level of speedup is 7, and default
> >>> value is 0.
> >>>
> >>> For MTK FD algorithm user library, they would need select extra
> >>> detection features(models) used in HW, we need
> >>> V4L2_CID_MTK_FD_EXTRA_MODEL, this will be set to 1 for using extra
> >>> model. However, we are considering make this control more
> >>> chip-independent and can be added into standard.
> >>> for example, V4L2_CID_DETECTION_FD_MODEL or ...FD_ALGO,
> >>> drivers can define the detection algorithm or detection model to be used
> >>> for users to select. How do you think?
> >>
> >> Sounds like something that could be a menu control, so it could vary
> >> between drivers.
> >
> > Ok, and maybe it should be created by v4l2_ctrl_new_int_menu(...)?
> >
> >>> In short, I summery the control IDs as following:
> >>> V4L2_CID_DETECT_ANGLE: set the angle of face in degrees. 90 ~ -90
> >>> degrees.
> >>> V4L2_CID_DETECT_DIRECTION: set the rotation of the head in degrees.
> >>> 0~330 degrees.
> >>> V4L2_CID_DETECT_SCALE_DOWN_IMG_WIDTH: set the image widths for an input
> >>> image to be scaled down for face detection
> >>> V4L2_CID_DETECT_SCALE_DOWN_IMG_HEIGHT: set the image heights for an
> >>> input image to be scaled down for face detection
> >>> V4L2_CID_DETECT_SPEED: set the detection speed, usually reducing
> >>> accuracy.
> >>> V4L2_CID_DETECTION_FD_MODEL: select the detection model or algorithm to
> >>> be used by face detection driver.
> >>>
> >>>>> +#define ENABLE_FD                          0x111
> >>>>> +#define FD_HW_ENABLE                               0x4
> >>>>> +#define FD_INT_EN                          0x15c
> >>>>> +#define FD_INT                                     0x168
> >>>>> +#define FD_RESULT                          0x178
> >>>>> +#define FD_IRQ_MASK                                0x001
> >>>>> +
> >>>>> +#define RS_MAX_BUF_SIZE                            2288788
> >>>>> +#define FD_MAX_SPEEDUP                             7
> >>>>> +#define FD_MAX_POSE_VAL                            0xfffffffffffffff
> >>>>> +#define FD_DEF_POSE_VAL                            0x3ff
> >>>>> +#define MAX_FD_SEL_NUM                             1026
> >>>>
> >>>> If that file is supposed to be included by anything beyond the driver
> >>>> itself, we need proper prefixing. (same for anything else in here)
> >>>
> >>> I will fix it as following:
> >>>
> >>> #define FD_ENABLE    0x111
> >>>
> >>> #define FD_REG_OFFSET_HW_ENABLE  0x4
> >>> #define FD_REG_OFFSET_INT_EN     0x15c
> >>> #define FD_REG_OFFSET_INT_VAL    0x168
> >>> #define FD_REG_OFFSET_RESULT     0x178
> >>>
> >>> #define FD_IRQ_MASK         1
> >>> #define FD_MAX_RS_BUF_SIZE  2288788
> >>> #define FD_MAX_SPEEDUP      7
> >>> #define FD_MAX_RESULT_NUM   1026
> >>
> >> I'd suggest the MTK_FD_ prefix.
> >
> > Ok, I will use MTK_FD_ prefix.
> >
> >>>>> diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
> >>>>> index 3dcfc61..eae876e 100644
> >>>>> --- a/include/uapi/linux/v4l2-controls.h
> >>>>> +++ b/include/uapi/linux/v4l2-controls.h
> >>>>> @@ -192,6 +192,10 @@ enum v4l2_colorfx {
> >>>>>   * We reserve 16 controls for this driver. */
> >>>>>  #define V4L2_CID_USER_IMX_BASE                     (V4L2_CID_USER_BASE + 0x10b0)
> >>>>>
> >>>>> +/* The base for the mediatek FD driver controls */
> >>>>> +/* We reserve 16 controls for this driver. */
> >>>>> +#define V4L2_CID_USER_MTK_FD_BASE          (V4L2_CID_USER_BASE + 0x10d0)
> >>>>
> >>>> Why only the base, but not the actual IDs in uapi ?
> >>>>
> >>> I will put actual IDs in uapi/ for user to reference.
> 
> Enrico, any thoughts on the explanation that Jerry provided and
> further discussion above?

I agree with Enrico that standardising the face detection API is the way
to go.
Jerry-ch Chen Sept. 5, 2019, 1:36 p.m. UTC | #33
On Thu, 2019-09-05 at 16:52 +0800, Tomasz Figa wrote:
> On Thu, Sep 5, 2019 at 5:17 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> >
> > Hi Tomasz,
> >
> > On Thu, 2019-09-05 at 15:13 +0800, Tomasz Figa wrote:
> > > On Thu, Sep 5, 2019 at 4:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > >
> > > > Hi Tomasz,
> > > >
> > > > On Wed, 2019-09-04 at 21:19 +0800, Jerry-ch Chen wrote:
> > > > > On Wed, 2019-09-04 at 21:12 +0800, Tomasz Figa wrote:
> > > > > > On Wed, Sep 4, 2019 at 6:26 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > >
> > > > > > > Hi Tomasz,
> > > > > > >
> > > > > > > On Wed, 2019-09-04 at 17:03 +0800, Tomasz Figa wrote:
> > > > > > > > On Wed, Sep 4, 2019 at 6:02 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > >
> > > > > > > > > Hi Tomasz,
> > > > > > > > >
> > > > > > > > > On Wed, 2019-09-04 at 16:25 +0800, Tomasz Figa wrote:
> > > > > > > > > > On Wed, Sep 4, 2019 at 5:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > >
> > > > > > > > > > > On Wed, 2019-09-04 at 14:34 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > On Wed, Sep 4, 2019 at 3:09 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Wed, 2019-09-04 at 12:15 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > On Wed, Sep 4, 2019 at 12:38 PM Jerry-ch Chen
> > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > On Tue, 2019-09-03 at 20:05 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > On Tue, Sep 3, 2019 at 8:46 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > On Tue, 2019-09-03 at 15:04 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > On Tue, Sep 3, 2019 at 3:44 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > On Tue, 2019-09-03 at 13:19 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > On Mon, Sep 2, 2019 at 8:47 PM Jerry-ch Chen <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > On Fri, 2019-08-30 at 16:33 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > > On Wed, Aug 28, 2019 at 11:00 AM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > On Mon, 2019-08-26 at 14:36 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > > On Sun, Aug 25, 2019 at 6:18 PM Jerry-ch Chen
> > > > > > > > > > > > > > > > > > > > > > > > <Jerry-ch.Chen@mediatek.com> wrote:
> > > > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > > > Hi Tomasz,
> > > > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > > > On Fri, 2019-08-02 at 16:28 +0800, Tomasz Figa wrote:
> > > > > > > > > > > > > > > > > > > > > > > > > > Hi Jerry,
> > > > > > > > > > > > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > > > > > > > > > > > On Tue, Jul 09, 2019 at 04:41:12PM +0800, Jerry-ch Chen wrote:
> > > > > > > [snip]
> > > > > > > > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > > > {
> > > > > > > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > > > > >         struct vb2_v4l2_buffer *vb;
> > > > > > > > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > > > > > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > > > > > > > >         u32 ret;
> > > > > > > > > > >
> > > > > > > > > > >         if (!fd->fd_irq_done.done)
> > > > > > > > > >
> > > > > > > > > > We shouldn't access internal fields of completion.
> > > > > > > > > >
> > > > > > > > > > >                 ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > > > > > > >                                                   msecs_to_jiffies(
> > > > > > > > > > >                                                         MTK_FD_HW_TIMEOUT));
> > > > > > > > > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > > > > > > > > >                                         &m2m_ctx->out_q_ctx :
> > > > > > > > > > >                                         &m2m_ctx->cap_q_ctx;
> > > > > > > > > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > > > > > > > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > > > > > > > > >
> > > > > > > > > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > > > > > > > > >                 mtk_fd_hw_disconnect(fd);
> > > > > > > > > > > }
> > > > > > > > > > >
> > > > > > > > > > > I've also tried to wait completion unconditionally for both queues and
> > > > > > > > > > > the second time will wait until timeout, as a result, it takes longer to
> > > > > > > > > > > swap the camera every time and close the camera app.
> > > > > > > > > >
> > > > > > > > > > I think it should work better if we call complete_all() instead of complete().
> > > > > > > > > >
> > > > > > > > > Thanks,
> > > > > > > > >
> > > > > > > > > I use complete_all(), and it works fine now.
> > > > > > > > >
> > > > > > > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > > > > > > {
> > > > > > > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > > > > > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > > > > > > >         struct vb2_v4l2_buffer *vb;
> > > > > > > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > > > > > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > > > > > > >
> > > > > > > > >         wait_for_completion_timeout(&fd->fd_irq_done,
> > > > > > > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > > > > > > >
> > > > > > > > Shouldn't we still send some command to the hardware to stop? Like a
> > > > > > > > reset. Otherwise we don't know if it isn't still accessing the memory.
> > > > > > > >
> > > > > > > I thought no more jobs will be enqueued here when stop_streaming so we
> > > > > > > don't need it.
> > > > > >
> > > > > > That's true for the case when the wait completed successfully, but we
> > > > > > also need to ensure the hardware is stopped even if a timeout happens.
> > > > > >
> > > > > > > We still could send an ipi command to reset the HW, and wait for it's
> > > > > > > callback or we could set the register MTK_FD_REG_OFFSET_HW_ENABLE to
> > > > > > > zero to disable the HW.
> > > > > >
> > > > > > Since it's for handling a timeout, a reset should be more likely to
> > > > > > bring the hardware back to a reasonable state.
> > > > > >
> > > > >
> > > > > Ok, I will send the ipi command to reset the HW.
> > > > >
> > > > > Thanks and best regards,
> > > > > Jerry
> > > > I've tested and will refine as following:
> > > >
> > > > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > > > {
> > > >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> > > >         struct mtk_fd_dev *fd = ctx->fd_dev;
> > > >         struct vb2_v4l2_buffer *vb;
> > > >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> > > >         struct v4l2_m2m_queue_ctx *queue_ctx;
> > > >         u32 ret;
> > > >
> > > >         ret = wait_for_completion_timeout(&fd->fd_irq_done,
> > > >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> > > >         /* Disable FD HW */
> > > >         if(!ret) {
> > > >                 struct ipi_message fd_ipi_msg;
> > > >
> > > >                 fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_RESET;
> > > >                 ret = scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
> > > >                                    sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT);
> > > >                 if (ret)
> > > >                         dev_err(fd->dev, "FD Reset HW error\n");
> > > >         }
> > >
> > > Would you also put the same code in suspend handler? If so, perhaps
> > > it's better to keep this in a helper function (mtk_fd_job_abort()) as
> > > we had before?
> > >
> >
> > Ok, done, It will reset the HW and return ETIMEOUT if the last job is
> > timeout, the return value will be used in suspend for further action.
> >
> > static int mtk_fd_job_abort(struct mtk_fd_dev *fd)
> > {
> >         u32 ret;
> >
> >         ret = wait_for_completion_timeout(&fd->fd_irq_done,
> >                                           msecs_to_jiffies(MTK_FD_HW_TIMEOUT));
> >         /* Reset FD HW */
> >         if (!ret) {
> >                 struct ipi_message fd_ipi_msg;
> >
> >                 fd_ipi_msg.cmd_id = MTK_FD_IPI_CMD_RESET;
> >                 if (scp_ipi_send(fd->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
> >                                  sizeof(fd_ipi_msg), MTK_FD_IPI_SEND_TIMEOUT))
> >                         dev_err(fd->dev, "FD Reset HW error\n");
> >                 return -ETIMEDOUT;
> >         }
> >         return 0;
> > }
> >
> > static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
> > {
> >         struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
> >         struct mtk_fd_dev *fd = ctx->fd_dev;
> >         struct vb2_v4l2_buffer *vb;
> >         struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx;
> >         struct v4l2_m2m_queue_ctx *queue_ctx;
> >
> >         mtk_fd_job_abort(fd);
> >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> >                                         &m2m_ctx->out_q_ctx :
> >                                         &m2m_ctx->cap_q_ctx;
> >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> >
> >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> >                 mtk_fd_hw_disconnect(fd);
> > }
> >
> > static int mtk_fd_suspend(struct device *dev)
> > {
> >         struct mtk_fd_dev *fd = dev_get_drvdata(dev);
> >
> >         if (pm_runtime_suspended(dev))
> >                 return 0;
> >
> >         if (fd->fd_stream_count)
> >                 if (mtk_fd_job_abort(fd))
> >                         mtk_fd_hw_job_finish(fd, VB2_BUF_STATE_ERROR);
> 
> Wouldn't this cause the next job to be run?
> 
> >
> >         /* suspend FD HW */
> >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_INT_EN);
> >         writel(0x0, fd->fd_base + MTK_FD_REG_OFFSET_HW_ENABLE);
> >         clk_disable_unprepare(fd->fd_clk);
> >         dev_dbg(dev, "%s:disable clock\n", __func__);
> >
> >         return 0;
> > }
> >
> > > >         queue_ctx = V4L2_TYPE_IS_OUTPUT(vq->type) ?
> > > >                                         &m2m_ctx->out_q_ctx :
> > > >                                         &m2m_ctx->cap_q_ctx;
> > > >         while ((vb = v4l2_m2m_buf_remove(queue_ctx)))
> > > >                 v4l2_m2m_buf_done(vb, VB2_BUF_STATE_ERROR);
> > > >
> > > >         if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > > >                 mtk_fd_hw_disconnect(fd);
> > > > }
> > > >
> > > > If there is no other concern, may I send the RFC v3 patch for review?
> > >
> > > Thanks, technically it looks good now. Just one comment about avoiding
> > > code duplication above.
> > >
> >
> > Thanks,
> >
> > I will send the v3 if the above fix-up is accepted,
> 
> I think there is a bigger issue here actually, related to how the m2m
> helpers work. Let's just keep the code as you proposed and post v3.
> 
> We can continue the discussion there.
> 

Ok, I will send v3 soon.

Thanks and best regards,
Jerry

> Best regards,
> Tomasz
diff mbox series

Patch

diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
index e6deb25..8b817cc 100644
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile
@@ -94,6 +94,8 @@  obj-$(CONFIG_VIDEO_MEDIATEK_MDP)	+= mtk-mdp/
 
 obj-$(CONFIG_VIDEO_MEDIATEK_JPEG)	+= mtk-jpeg/
 
+obj-$(CONFIG_VIDEO_MEDIATEK_FD)		+= mtk-isp/fd/
+
 obj-$(CONFIG_VIDEO_QCOM_CAMSS)		+= qcom/camss/
 
 obj-$(CONFIG_VIDEO_QCOM_VENUS)		+= qcom/venus/
diff --git a/drivers/media/platform/mtk-isp/fd/Makefile b/drivers/media/platform/mtk-isp/fd/Makefile
new file mode 100644
index 0000000..9b1c501
--- /dev/null
+++ b/drivers/media/platform/mtk-isp/fd/Makefile
@@ -0,0 +1,5 @@ 
+# SPDX-License-Identifier: GPL-2.0
+
+mtk-fd-objs += mtk_fd_40.o
+
+obj-$(CONFIG_VIDEO_MEDIATEK_FD) += mtk-fd.o
\ No newline at end of file
diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd.h b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
new file mode 100644
index 0000000..289999b
--- /dev/null
+++ b/drivers/media/platform/mtk-isp/fd/mtk_fd.h
@@ -0,0 +1,157 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+//
+// Copyright (c) 2018 MediaTek Inc.
+
+#ifndef __MTK_FD_HW_H__
+#define __MTK_FD_HW_H__
+
+#include <linux/io.h>
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-device.h>
+#include <media/videobuf2-v4l2.h>
+
+#define MTK_FD_OUTPUT_MIN_WIDTH			26U
+#define MTK_FD_OUTPUT_MIN_HEIGHT		26U
+#define MTK_FD_OUTPUT_MAX_WIDTH			640U
+#define MTK_FD_OUTPUT_MAX_HEIGHT		480U
+
+/* Control the user defined image widths and heights
+ * to be scaled and performed face detection in FD HW.
+ * MTK FD support up to 14 user defined image sizes to perform face detection.
+ */
+#define V4L2_CID_MTK_FD_SCALE_IMG_WIDTH		(V4L2_CID_USER_MTK_FD_BASE + 1)
+#define V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT	(V4L2_CID_USER_MTK_FD_BASE + 2)
+
+/* Control the numbers of user defined image sizes.
+ * The default value is 0 which means user is not going
+ * to define the specific image sizes.
+ */
+#define V4L2_CID_MTK_FD_SCALE_IMG_NUM		(V4L2_CID_USER_MTK_FD_BASE + 3)
+
+/* Control the Face Pose to be detected.
+ * Here describe the value as following:
+ * {0, detect the front face with rotation from 0 to 270 degrees},
+ * {1, detect the front face with rotation from 0 to 240 and 300 degrees},
+ * {2, detect the front face with rotation from 0 to 240 and 330 degrees},
+ * {3, detect the front face with rotation from 0 to 240 and left side face}.
+ */
+#define V4L2_CID_MTK_FD_DETECT_POSE		(V4L2_CID_USER_MTK_FD_BASE + 4)
+#define V4L2_CID_MTK_FD_DETECT_SPEEDUP		(V4L2_CID_USER_MTK_FD_BASE + 5)
+#define V4L2_CID_MTK_FD_EXTRA_MODEL		(V4L2_CID_USER_MTK_FD_BASE + 6)
+
+/* We reserve 16 controls for this driver. */
+#define V4L2_CID_MTK_FD_MAX			16
+
+#define ENABLE_FD				0x111
+#define FD_HW_ENABLE				0x4
+#define FD_INT_EN				0x15c
+#define FD_INT					0x168
+#define FD_RESULT				0x178
+#define FD_IRQ_MASK				0x001
+
+#define RS_MAX_BUF_SIZE				2288788
+#define FD_MAX_SPEEDUP				7
+#define FD_MAX_POSE_VAL				0xfffffffffffffff
+#define FD_DEF_POSE_VAL				0x3ff
+#define MAX_FD_SEL_NUM				1026
+
+/* The max. number of face sizes could be detected, for feature scaling */
+#define FACE_SIZE_NUM_MAX			14
+/* FACE_SIZE_NUM_MAX + 1, first scale for input image W/H */
+#define FD_SCALE_NUM				15
+
+enum fd_state {
+	FD_ENQ,
+	FD_CBD,
+};
+
+enum fd_img_format {
+	FMT_VYUY = 2,
+	FMT_UYVY,
+	FMT_YVYU,
+	FMT_YUYV,
+	FMT_UNKNOWN
+};
+
+struct fd_buffer {
+	__u32 scp_addr;	/* used by SCP */
+	__u32 dma_addr;	/* used by DMA HW */
+} __packed;
+
+enum fd_scp_cmd {
+	FD_CMD_INIT,
+	FD_CMD_ENQUEUE,
+};
+
+struct fd_user_output {
+	__u64 results[MAX_FD_SEL_NUM];
+	__u16 number;
+};
+
+struct user_param {
+	u8 fd_pose;
+	u8 fd_speedup;
+	u8 fd_extra_model;
+	u8 scale_img_num;
+	u8 src_img_fmt;
+	__u16 scale_img_width[FD_SCALE_NUM];
+	__u16 scale_img_height[FD_SCALE_NUM];
+} __packed;
+
+struct fd_hw_param {
+	struct fd_buffer src_img;
+	struct fd_buffer user_result;
+	struct user_param user_param;
+} __packed;
+
+struct cmd_init_info {
+	struct fd_buffer fd_manager;
+	__u32 rs_dma_addr;
+} __packed;
+
+struct ipi_message {
+	u8 cmd_id;
+	union {
+		struct cmd_init_info init_param;
+		struct fd_hw_param hw_param;
+	};
+} __packed;
+
+struct mtk_fd_hw {
+	struct clk *fd_clk;
+	struct rproc *rproc_handle;
+	struct platform_device *scp_pdev;
+	struct fd_buffer scp_mem;
+	wait_queue_head_t wq;
+	void __iomem *fd_base;
+	atomic_t fd_user_cnt;
+	enum fd_state state;
+	u32 fd_irq_result;
+	/* Ensure only one job in hw */
+	struct mutex fd_hw_lock;
+};
+
+struct mtk_fd_dev {
+	struct platform_device *pdev;
+	struct v4l2_device v4l2_dev;
+	struct v4l2_m2m_dev *m2m_dev;
+	struct media_device mdev;
+	struct video_device vfd;
+	struct mtk_fd_hw fd_hw;
+	/* Lock for V4L2 operations */
+	struct mutex vfd_lock;
+};
+
+struct mtk_fd_ctx {
+	struct mtk_fd_dev *fd_dev;
+	struct device *dev;
+	struct v4l2_fh fh;
+	struct v4l2_ctrl_handler hdl;
+	struct v4l2_pix_format_mplane src_fmt;
+	struct v4l2_meta_format dst_fmt;
+	struct user_param user_param;
+};
+
+#endif/*__MTK_FD_HW_H__*/
diff --git a/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c b/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
new file mode 100644
index 0000000..246d3aa
--- /dev/null
+++ b/drivers/media/platform/mtk-isp/fd/mtk_fd_40.c
@@ -0,0 +1,1259 @@ 
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2018 MediaTek Inc.
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_data/mtk_scp.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/remoteproc.h>
+#include <media/videobuf2-dma-contig.h>
+#include <media/videobuf2-v4l2.h>
+#include <linux/wait.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-mem2mem.h>
+#include <media/videobuf2-core.h>
+
+#include "mtk_fd.h"
+
+static struct v4l2_meta_format fw_param_fmts[] = {
+	{
+		.dataformat = V4L2_META_FMT_MTISP_PARAMS,
+		.buffersize = 1024 * 30,
+	},
+};
+
+static const struct v4l2_pix_format_mplane in_img_fmts[] = {
+	{
+		.width = MTK_FD_OUTPUT_MAX_WIDTH,
+		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
+		.pixelformat = V4L2_PIX_FMT_VYUY,
+		.colorspace = V4L2_COLORSPACE_BT2020,
+		.field = V4L2_FIELD_NONE,
+		.num_planes = 1,
+	},
+	{
+		.width = MTK_FD_OUTPUT_MAX_WIDTH,
+		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
+		.pixelformat = V4L2_PIX_FMT_YUYV,
+		.colorspace = V4L2_COLORSPACE_BT2020,
+		.field = V4L2_FIELD_NONE,
+		.num_planes = 1,
+	},
+	{
+		.width = MTK_FD_OUTPUT_MAX_WIDTH,
+		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
+		.pixelformat = V4L2_PIX_FMT_YVYU,
+		.colorspace = V4L2_COLORSPACE_BT2020,
+		.field = V4L2_FIELD_NONE,
+		.num_planes = 1,
+	},
+	{
+		.width = MTK_FD_OUTPUT_MAX_WIDTH,
+		.height = MTK_FD_OUTPUT_MAX_HEIGHT,
+		.pixelformat = V4L2_PIX_FMT_UYVY,
+		.colorspace = V4L2_COLORSPACE_BT2020,
+		.field = V4L2_FIELD_NONE,
+		.num_planes = 1,
+	},
+};
+
+#define NUM_FORMATS ARRAY_SIZE(in_img_fmts)
+
+static inline struct mtk_fd_dev *mtk_fd_hw_to_dev(struct mtk_fd_hw *fd_hw)
+{
+	return container_of(fd_hw, struct mtk_fd_dev, fd_hw);
+}
+
+static inline struct mtk_fd_ctx *fh_to_ctx(struct v4l2_fh *fh)
+{
+	return container_of(fh, struct mtk_fd_ctx, fh);
+}
+
+static int mtk_fd_load_scp(struct mtk_fd_hw *fd_hw)
+{
+	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
+	struct device *dev = &fd_dev->pdev->dev;
+	phandle rproc_phandle;
+	int ret;
+
+	/* init scp */
+	fd_hw->scp_pdev = scp_get_pdev(fd_dev->pdev);
+	if (!fd_hw->scp_pdev) {
+		dev_err(dev, "Failed to get scp device\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_u32(fd_dev->pdev->dev.of_node, "mediatek,scp",
+				 &rproc_phandle)) {
+		dev_err(dev, "Could not get scp device\n");
+		return -EINVAL;
+	}
+
+	fd_hw->rproc_handle = rproc_get_by_phandle(rproc_phandle);
+	if (!fd_hw->rproc_handle) {
+		dev_err(dev, "Could not get FD's rproc_handle\n");
+		return -EINVAL;
+	}
+
+	ret = rproc_boot(fd_hw->rproc_handle);
+	if (ret < 0) {
+		/**
+		 * Return 0 if downloading firmware successfully,
+		 * otherwise it is failed
+		 */
+		dev_err(dev, "rproc_boot failed\n");
+		return -ENODEV;
+	}
+
+	return ret;
+}
+
+static dma_addr_t mtk_fd_hw_alloc_rs_dma_addr(struct mtk_fd_hw *fd_hw)
+{
+	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
+	struct device *dev = &fd_dev->pdev->dev;
+	void *va;
+	dma_addr_t dma_handle;
+
+	va = dma_alloc_coherent(dev, RS_MAX_BUF_SIZE, &dma_handle, GFP_KERNEL);
+	if (!va) {
+		dev_err(dev, "dma_alloc null va\n");
+		return -ENOMEM;
+	}
+	memset(va, 0, RS_MAX_BUF_SIZE);
+
+	return dma_handle;
+}
+
+static int mtk_fd_send_ipi_init(struct mtk_fd_hw *fd_hw)
+{
+	struct ipi_message fd_init_msg;
+	dma_addr_t rs_dma_addr;
+
+	fd_init_msg.cmd_id = FD_CMD_INIT;
+
+	fd_init_msg.init_param.fd_manager.scp_addr = fd_hw->scp_mem.scp_addr;
+	fd_init_msg.init_param.fd_manager.dma_addr = fd_hw->scp_mem.dma_addr;
+
+	rs_dma_addr = mtk_fd_hw_alloc_rs_dma_addr(fd_hw);
+	if (!rs_dma_addr)
+		return -ENOMEM;
+
+	fd_init_msg.init_param.rs_dma_addr = rs_dma_addr;
+
+	return scp_ipi_send(fd_hw->scp_pdev, SCP_IPI_FD_CMD, &fd_init_msg,
+			    sizeof(fd_init_msg), 0);
+}
+
+static int mtk_fd_hw_enable(struct mtk_fd_hw *fd_hw)
+{
+	int ret;
+
+	ret = mtk_fd_load_scp(fd_hw);
+	if (ret)
+		return ret;
+
+	ret = mtk_fd_send_ipi_init(fd_hw);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int mtk_fd_hw_connect(struct mtk_fd_hw *fd_hw)
+{
+	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
+	s32 usercount;
+
+	mutex_lock(&fd_hw->fd_hw_lock);
+	usercount = atomic_inc_return(&fd_hw->fd_user_cnt);
+	if (usercount == 1) {
+		pm_runtime_get_sync(&fd_dev->pdev->dev);
+		if (mtk_fd_hw_enable(fd_hw)) {
+			pm_runtime_put_sync(&fd_dev->pdev->dev);
+			atomic_dec_return(&fd_hw->fd_user_cnt);
+			mutex_unlock(&fd_hw->fd_hw_lock);
+			return -EINVAL;
+		}
+	}
+	mutex_unlock(&fd_hw->fd_hw_lock);
+
+	return 0;
+}
+
+static int mtk_fd_wait_irq(struct mtk_fd_hw *fd_hw)
+{
+	int timeout;
+	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
+	struct device *dev = &fd_dev->pdev->dev;
+
+	timeout = wait_event_interruptible_timeout
+		(fd_hw->wq, (fd_hw->fd_irq_result & FD_IRQ_MASK),
+		 usecs_to_jiffies(1000000));
+	if (!timeout) {
+		dev_err(dev, "%s timeout, %d\n", __func__,
+			fd_hw->fd_irq_result);
+		return -EAGAIN;
+	} else if (!(fd_hw->fd_irq_result & FD_IRQ_MASK)) {
+		dev_err(dev, "%s No IRQ mask:0x%8x\n",
+			__func__, fd_hw->fd_irq_result);
+		return -EINVAL;
+	}
+	fd_hw->fd_irq_result = 0;
+
+	return 0;
+}
+
+static void mtk_fd_hw_disconnect(struct mtk_fd_hw *fd_hw)
+{
+	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
+	s32 usercount;
+
+	mutex_lock(&fd_hw->fd_hw_lock);
+	atomic_dec_return(&fd_hw->fd_user_cnt);
+	usercount = atomic_read(&fd_hw->fd_user_cnt);
+	if (usercount == 0) {
+		if (fd_hw->state == FD_ENQ)
+			mtk_fd_wait_irq(fd_hw);
+
+		pm_runtime_put_sync(&fd_dev->pdev->dev);
+		rproc_shutdown(fd_hw->rproc_handle);
+		rproc_put(fd_hw->rproc_handle);
+	}
+	mutex_unlock(&fd_hw->fd_hw_lock);
+}
+
+static void mtk_fd_hw_job_finish(struct mtk_fd_hw *fd_hw,
+				 struct fd_hw_param *fd_param,
+				 enum vb2_buffer_state vb_state)
+{
+	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
+	struct mtk_fd_ctx *ctx;
+	struct device *dev = &fd_dev->pdev->dev;
+	struct vb2_buffer *src_vb, *dst_vb;
+	struct vb2_v4l2_buffer *src_vbuf = NULL, *dst_vbuf = NULL;
+
+	ctx = v4l2_m2m_get_curr_priv(fd_dev->m2m_dev);
+	if (!ctx) {
+		dev_err(dev, "Instance released before end of transaction\n");
+		return;
+	}
+
+	src_vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+	if (WARN_ON(!src_vb))
+		return;
+	src_vbuf = to_vb2_v4l2_buffer(src_vb);
+
+	dst_vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+	if (WARN_ON(!dst_vb))
+		return;
+	dst_vbuf = to_vb2_v4l2_buffer(dst_vb);
+
+	dst_vbuf->vb2_buf.timestamp = src_vbuf->vb2_buf.timestamp;
+	dst_vbuf->timecode = src_vbuf->timecode;
+	dst_vbuf->flags &= ~V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
+	dst_vbuf->flags |= src_vbuf->flags & V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
+
+	v4l2_m2m_buf_done(src_vbuf, vb_state);
+	v4l2_m2m_buf_done(dst_vbuf, vb_state);
+	v4l2_m2m_job_finish(fd_dev->m2m_dev, ctx->fh.m2m_ctx);
+}
+
+static int mtk_fd_hw_job_exec(struct mtk_fd_hw *fd_hw,
+			      struct fd_hw_param *fd_param,
+			      void *output_vaddr)
+{
+	struct fd_user_output *fd_output;
+	struct ipi_message fd_ipi_msg;
+	int ret;
+	u32 num;
+
+	if (fd_param->user_param.src_img_fmt == FMT_UNKNOWN)
+		goto param_err;
+
+	mutex_lock(&fd_hw->fd_hw_lock);
+	fd_hw->state = FD_ENQ;
+	fd_ipi_msg.cmd_id = FD_CMD_ENQUEUE;
+	memcpy(&fd_ipi_msg.hw_param, fd_param, sizeof(fd_ipi_msg.hw_param));
+	ret = scp_ipi_send(fd_hw->scp_pdev, SCP_IPI_FD_CMD, &fd_ipi_msg,
+			   sizeof(fd_ipi_msg), 0);
+	if (ret)
+		goto buf_err;
+
+	ret = mtk_fd_wait_irq(fd_hw);
+	if (ret)
+		goto buf_err;
+
+	num = readl(fd_hw->fd_base + FD_RESULT);
+	/* Disable FD ISR */
+	writel(0x0, fd_hw->fd_base + FD_INT_EN);
+
+	fd_output = (struct fd_user_output *)output_vaddr;
+	fd_output->number = num;
+	fd_hw->state = FD_CBD;
+	mutex_unlock(&fd_hw->fd_hw_lock);
+
+	mtk_fd_hw_job_finish(fd_hw, fd_param, VB2_BUF_STATE_DONE);
+	return 0;
+
+buf_err:
+	mutex_unlock(&fd_hw->fd_hw_lock);
+param_err:
+	mtk_fd_hw_job_finish(fd_hw, fd_param, VB2_BUF_STATE_ERROR);
+	return ret;
+}
+
+static int mtk_fd_vb2_buf_out_validate(struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *v4l2_buf = to_vb2_v4l2_buffer(vb);
+
+	v4l2_buf->field = V4L2_FIELD_NONE;
+
+	return 0;
+}
+
+static int mtk_fd_vb2_buf_prepare(struct vb2_buffer *vb)
+{
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+	struct vb2_queue *vq = vb->vb2_queue;
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
+	struct device *dev = ctx->dev;
+	struct v4l2_pix_format_mplane *pixfmt;
+
+	switch (vq->type) {
+	case V4L2_BUF_TYPE_META_CAPTURE:
+		if (vb2_plane_size(vb, 0) < ctx->dst_fmt.buffersize) {
+			dev_err(dev, "meta size %d is too small\n");
+			return -EINVAL;
+		}
+		break;
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
+		pixfmt = &ctx->src_fmt;
+
+		if (vbuf->field == V4L2_FIELD_ANY)
+			vbuf->field = V4L2_FIELD_NONE;
+
+		if (vb->num_planes != 1 || vbuf->field != V4L2_FIELD_NONE) {
+			dev_err(dev, "plane or field %d not supported\n",
+				vb->num_planes, vbuf->field);
+			return -EINVAL;
+		}
+		if (vb2_plane_size(vb, 0) < pixfmt->plane_fmt[0].sizeimage) {
+			dev_err(dev, "plane %d is too small\n");
+			return -EINVAL;
+		}
+		break;
+	default:
+		dev_err(dev, "invalid queue type: %d\n", vq->type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void mtk_fd_vb2_buf_queue(struct vb2_buffer *vb)
+{
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
+
+	v4l2_m2m_buf_queue(ctx->fh.m2m_ctx, vbuf);
+}
+
+static int mtk_fd_vb2_queue_setup(struct vb2_queue *vq,
+				  unsigned int *num_buffers,
+				  unsigned int *num_planes,
+				  unsigned int sizes[],
+				  struct device *alloc_devs[])
+{
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
+	struct device *dev = ctx->dev;
+	unsigned int size;
+
+	switch (vq->type) {
+	case V4L2_BUF_TYPE_META_CAPTURE:
+		size = ctx->dst_fmt.buffersize;
+		break;
+	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
+		size = ctx->src_fmt.plane_fmt[0].sizeimage;
+		break;
+	default:
+		dev_err(dev, "invalid queue type: %d\n", vq->type);
+		return -EINVAL;
+	}
+
+	if (!*num_planes) {
+		*num_planes = 1;
+		sizes[0] = size;
+	}
+
+	return 0;
+}
+
+static int mtk_fd_vb2_start_streaming(struct vb2_queue *vq, unsigned int count)
+{
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
+
+	return mtk_fd_hw_connect(&ctx->fd_dev->fd_hw);
+}
+
+static void mtk_fd_vb2_stop_streaming(struct vb2_queue *vq)
+{
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vq);
+	struct vb2_buffer *vb;
+
+	if (V4L2_TYPE_IS_OUTPUT(vq->type))
+		vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+	else
+		vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+
+	while (vb) {
+		v4l2_m2m_buf_done(to_vb2_v4l2_buffer(vb), VB2_BUF_STATE_ERROR);
+		if (V4L2_TYPE_IS_OUTPUT(vq->type))
+			vb = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
+		else
+			vb = v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx);
+	}
+
+	mtk_fd_hw_disconnect(&ctx->fd_dev->fd_hw);
+}
+
+static void mtk_fd_vb2_request_complete(struct vb2_buffer *vb)
+{
+	struct mtk_fd_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue);
+
+	v4l2_ctrl_request_complete(vb->req_obj.req, &ctx->hdl);
+}
+
+static void mtk_fd_fill_pixfmt_mp(struct v4l2_pix_format_mplane *dfmt,
+				  const struct v4l2_pix_format_mplane *sfmt)
+{
+	dfmt->width = sfmt->width;
+	dfmt->height = sfmt->height;
+	dfmt->pixelformat = sfmt->pixelformat;
+	dfmt->field = sfmt->field;
+	dfmt->colorspace = sfmt->colorspace;
+	dfmt->num_planes = sfmt->num_planes;
+
+	/* Use default */
+	dfmt->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
+	dfmt->quantization = V4L2_QUANTIZATION_DEFAULT;
+	dfmt->xfer_func =
+		V4L2_MAP_XFER_FUNC_DEFAULT(dfmt->colorspace);
+	dfmt->plane_fmt[0].bytesperline = dfmt->width * 2;
+	dfmt->plane_fmt[0].sizeimage =
+		dfmt->height * dfmt->plane_fmt[0].bytesperline;
+	memset(dfmt->reserved, 0, sizeof(dfmt->reserved));
+}
+
+static const struct v4l2_pix_format_mplane *mtk_fd_find_fmt(u32 format)
+{
+	unsigned int i;
+	const struct v4l2_pix_format_mplane *dev_fmt;
+
+	for (i = 0; i < NUM_FORMATS; i++) {
+		dev_fmt = &in_img_fmts[i];
+		if (dev_fmt->pixelformat == format)
+			return dev_fmt;
+	}
+
+	return NULL;
+}
+
+static int mtk_fd_m2m_querycap(struct file *file, void *fh,
+			       struct v4l2_capability *cap)
+{
+	struct mtk_fd_dev *fd_dev = video_drvdata(file);
+	struct device *dev = &fd_dev->pdev->dev;
+
+	strscpy(cap->driver, dev->driver->name, sizeof(cap->driver));
+	strscpy(cap->card, fd_dev->vfd.name, sizeof(cap->card));
+	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform:%s",
+		 dev_name(&fd_dev->pdev->dev));
+
+	return 0;
+}
+
+static int mtk_fd_m2m_enum_fmt_out_mp(struct file *file, void *fh,
+				      struct v4l2_fmtdesc *f)
+{
+	int i;
+
+	for (i = 0; i < NUM_FORMATS; ++i) {
+		if (i == f->index) {
+			f->pixelformat = in_img_fmts[i].pixelformat;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int mtk_fd_m2m_try_fmt_out_mp(struct file *file,
+				     void *fh,
+				     struct v4l2_format *f)
+{
+	struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
+	const struct v4l2_pix_format_mplane *fmt;
+
+	fmt = mtk_fd_find_fmt(pix_mp->pixelformat);
+	if (!fmt) {
+		/* Get default img fmt */
+		fmt = &in_img_fmts[0];
+		f->fmt.pix.pixelformat = fmt->pixelformat;
+	}
+
+	/* Use default */
+	pix_mp->field = fmt->field;
+	pix_mp->colorspace = fmt->colorspace;
+	pix_mp->num_planes = fmt->num_planes;
+	pix_mp->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
+	pix_mp->quantization = V4L2_QUANTIZATION_DEFAULT;
+	pix_mp->xfer_func =
+		V4L2_MAP_XFER_FUNC_DEFAULT(pix_mp->colorspace);
+
+	/* Keep user setting as possible */
+	pix_mp->width = clamp(pix_mp->width,
+			      MTK_FD_OUTPUT_MIN_WIDTH,
+			      MTK_FD_OUTPUT_MAX_WIDTH);
+	pix_mp->height = clamp(pix_mp->height,
+			       MTK_FD_OUTPUT_MIN_HEIGHT,
+			       MTK_FD_OUTPUT_MAX_HEIGHT);
+
+	pix_mp->plane_fmt[0].bytesperline = pix_mp->width * 2;
+	pix_mp->plane_fmt[0].sizeimage =
+		pix_mp->plane_fmt[0].bytesperline * pix_mp->height;
+	memset(pix_mp->plane_fmt[0].reserved, 0,
+	       sizeof(pix_mp->plane_fmt[0].reserved));
+
+	return 0;
+}
+
+static int mtk_fd_m2m_g_fmt_out_mp(struct file *file, void *fh,
+				   struct v4l2_format *f)
+{
+	struct mtk_fd_ctx *ctx = fh_to_ctx(fh);
+
+	f->fmt.pix_mp = ctx->src_fmt;
+
+	return 0;
+}
+
+static int mtk_fd_m2m_s_fmt_out_mp(struct file *file, void *fh,
+				   struct v4l2_format *f)
+{
+	struct mtk_fd_ctx *ctx = fh_to_ctx(fh);
+	struct vb2_queue *vq;
+
+	/* Change not allowed if queue is streaming. */
+	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type);
+	if (vb2_is_streaming(vq) || vb2_is_busy(vq)) {
+		dev_dbg(ctx->dev, "vb2_is_streaming or vb2_is_busy");
+		return -EBUSY;
+	}
+
+	mtk_fd_m2m_try_fmt_out_mp(file, fh, f);
+	ctx->src_fmt = f->fmt.pix_mp;
+
+	return 0;
+}
+
+static int mtk_fd_m2m_enum_fmt_meta_cap(struct file *file, void *fh,
+					struct v4l2_fmtdesc *f)
+{
+	if (f->index)
+		return -EINVAL;
+
+	strscpy(f->description, "Face detection result",
+		sizeof(f->description));
+	f->pixelformat = fw_param_fmts[0].dataformat;
+	f->flags = 0;
+
+	return 0;
+}
+
+static int mtk_fd_m2m_g_fmt_meta_cap(struct file *file, void *fh,
+				     struct v4l2_format *f)
+{
+	f->fmt.meta.dataformat = fw_param_fmts[0].dataformat;
+	f->fmt.meta.buffersize = fw_param_fmts[0].buffersize;
+
+	return 0;
+}
+
+static const struct vb2_ops mtk_fd_vb2_ops = {
+	.queue_setup = mtk_fd_vb2_queue_setup,
+	.buf_out_validate = mtk_fd_vb2_buf_out_validate,
+	.buf_prepare  = mtk_fd_vb2_buf_prepare,
+	.buf_queue = mtk_fd_vb2_buf_queue,
+	.start_streaming = mtk_fd_vb2_start_streaming,
+	.stop_streaming = mtk_fd_vb2_stop_streaming,
+	.wait_prepare = vb2_ops_wait_prepare,
+	.wait_finish = vb2_ops_wait_finish,
+	.buf_request_complete = mtk_fd_vb2_request_complete,
+};
+
+static const struct v4l2_ioctl_ops mtk_fd_v4l2_video_out_ioctl_ops = {
+	.vidioc_querycap = mtk_fd_m2m_querycap,
+	.vidioc_enum_fmt_vid_out_mplane = mtk_fd_m2m_enum_fmt_out_mp,
+	.vidioc_g_fmt_vid_out_mplane = mtk_fd_m2m_g_fmt_out_mp,
+	.vidioc_s_fmt_vid_out_mplane = mtk_fd_m2m_s_fmt_out_mp,
+	.vidioc_try_fmt_vid_out_mplane = mtk_fd_m2m_try_fmt_out_mp,
+	.vidioc_enum_fmt_meta_cap = mtk_fd_m2m_enum_fmt_meta_cap,
+	.vidioc_g_fmt_meta_cap = mtk_fd_m2m_g_fmt_meta_cap,
+	.vidioc_s_fmt_meta_cap = mtk_fd_m2m_g_fmt_meta_cap,
+	.vidioc_try_fmt_meta_cap = mtk_fd_m2m_g_fmt_meta_cap,
+	.vidioc_reqbufs = v4l2_m2m_ioctl_reqbufs,
+	.vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs,
+	.vidioc_expbuf = v4l2_m2m_ioctl_expbuf,
+	.vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf,
+	.vidioc_querybuf = v4l2_m2m_ioctl_querybuf,
+	.vidioc_qbuf = v4l2_m2m_ioctl_qbuf,
+	.vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf,
+	.vidioc_streamon = v4l2_m2m_ioctl_streamon,
+	.vidioc_streamoff = v4l2_m2m_ioctl_streamoff,
+	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
+	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
+};
+
+static int
+mtk_fd_queue_init(void *priv, struct vb2_queue *src_vq,
+		  struct vb2_queue *dst_vq)
+{
+	struct mtk_fd_ctx *ctx = priv;
+	int ret;
+
+	src_vq->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
+	src_vq->io_modes = VB2_MMAP | VB2_DMABUF;
+	src_vq->supports_requests = true;
+	src_vq->drv_priv = ctx;
+	src_vq->ops = &mtk_fd_vb2_ops;
+	src_vq->mem_ops = &vb2_dma_contig_memops;
+	src_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
+	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	src_vq->lock = &ctx->fd_dev->vfd_lock;
+	src_vq->dev = ctx->fd_dev->v4l2_dev.dev;
+
+	ret = vb2_queue_init(src_vq);
+	if (ret)
+		return ret;
+
+	dst_vq->type = V4L2_BUF_TYPE_META_CAPTURE;
+	dst_vq->io_modes = VB2_MMAP | VB2_DMABUF;
+	dst_vq->drv_priv = ctx;
+	dst_vq->ops = &mtk_fd_vb2_ops;
+	dst_vq->mem_ops = &vb2_dma_contig_memops;
+	dst_vq->buf_struct_size = sizeof(struct v4l2_m2m_buffer);
+	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
+	dst_vq->lock = &ctx->fd_dev->vfd_lock;
+	dst_vq->dev = ctx->fd_dev->v4l2_dev.dev;
+
+	return vb2_queue_init(dst_vq);
+}
+
+static int mtk_fd_dev_g_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct mtk_fd_ctx *ctx = ctrl->priv;
+	int i;
+
+	switch (ctrl->id) {
+	case V4L2_CID_MTK_FD_SCALE_IMG_WIDTH:
+		for (i = 0; i < ctrl->elems; i++)
+			ctrl->p_new.p_u16[i] =
+				ctx->user_param.scale_img_width[i];
+		break;
+	case V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT:
+		for (i = 0; i < ctrl->elems; i++)
+			ctrl->p_new.p_u16[i] =
+				ctx->user_param.scale_img_height[i];
+		break;
+	case V4L2_CID_MTK_FD_DETECT_POSE:
+		ctrl->val = ctx->user_param.fd_pose;
+		break;
+	case V4L2_CID_MTK_FD_DETECT_SPEEDUP:
+		ctrl->val = ctx->user_param.fd_speedup;
+		break;
+	case V4L2_CID_MTK_FD_SCALE_IMG_NUM:
+		ctrl->val = ctx->user_param.scale_img_num;
+		break;
+	case V4L2_CID_MTK_FD_EXTRA_MODEL:
+		ctrl->val = ctx->user_param.fd_extra_model;
+		break;
+	default:
+		dev_dbg(ctx->dev, "%s: unexpected control: 0x%x:%d",
+			__func__, ctrl->id, ctrl->val);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int mtk_fd_dev_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct mtk_fd_ctx *ctx = ctrl->priv;
+	int i;
+
+	switch (ctrl->id) {
+	case V4L2_CID_MTK_FD_SCALE_IMG_WIDTH:
+		for (i = 0; i < ctrl->elems; i++)
+			ctx->user_param.scale_img_width[i] =
+				ctrl->p_new.p_u16[i];
+		break;
+	case V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT:
+		for (i = 0; i < ctrl->elems; i++)
+			ctx->user_param.scale_img_height[i] =
+				ctrl->p_new.p_u16[i];
+		break;
+	case V4L2_CID_MTK_FD_DETECT_POSE:
+		ctx->user_param.fd_pose = ctrl->val;
+		break;
+	case V4L2_CID_MTK_FD_DETECT_SPEEDUP:
+		ctx->user_param.fd_speedup = ctrl->val;
+		break;
+	case V4L2_CID_MTK_FD_SCALE_IMG_NUM:
+		ctx->user_param.scale_img_num = ctrl->val;
+		break;
+	case V4L2_CID_MTK_FD_EXTRA_MODEL:
+		ctx->user_param.fd_extra_model = ctrl->val;
+		break;
+	default:
+		dev_dbg(ctx->dev, "%s: unexpected control: 0x%x:%d",
+			__func__, ctrl->id, ctrl->val);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct v4l2_ctrl_ops mtk_fd_dev_ctrl_ops = {
+	.g_volatile_ctrl = mtk_fd_dev_g_ctrl,
+	.s_ctrl = mtk_fd_dev_s_ctrl,
+};
+
+struct v4l2_ctrl_config mtk_fd_controls[] = {
+	{
+	.ops = &mtk_fd_dev_ctrl_ops,
+	.id = V4L2_CID_MTK_FD_SCALE_IMG_WIDTH,
+	.name = "FD scale image widths",
+	.type = V4L2_CTRL_TYPE_U16,
+	.min = MTK_FD_OUTPUT_MIN_WIDTH,
+	.max = MTK_FD_OUTPUT_MAX_WIDTH,
+	.step = 1,
+	.def = MTK_FD_OUTPUT_MAX_WIDTH,
+	.dims = { FD_SCALE_NUM },
+	},
+	{
+	.ops = &mtk_fd_dev_ctrl_ops,
+	.id = V4L2_CID_MTK_FD_SCALE_IMG_HEIGHT,
+	.name = "FD scale image heights",
+	.type = V4L2_CTRL_TYPE_U16,
+	.min = MTK_FD_OUTPUT_MIN_HEIGHT,
+	.max = MTK_FD_OUTPUT_MAX_HEIGHT,
+	.step = 1,
+	.def = MTK_FD_OUTPUT_MAX_HEIGHT,
+	.dims = { FD_SCALE_NUM },
+	},
+	{
+	.ops = &mtk_fd_dev_ctrl_ops,
+	.id = V4L2_CID_MTK_FD_SCALE_IMG_NUM,
+	.name = "FD scale size counts",
+	.type = V4L2_CTRL_TYPE_INTEGER,
+	.min = 0,
+	.max = FACE_SIZE_NUM_MAX,
+	.step = 1,
+	.def = 0,
+	},
+	{
+	.ops = &mtk_fd_dev_ctrl_ops,
+	.id = V4L2_CID_MTK_FD_DETECT_POSE,
+	.name = "FD detect face pose",
+	.type = V4L2_CTRL_TYPE_INTEGER,
+	.min = 0,
+	.max = 3,
+	.step = 1,
+	.def = 0,
+	},
+	{
+	.ops = &mtk_fd_dev_ctrl_ops,
+	.id = V4L2_CID_MTK_FD_DETECT_SPEEDUP,
+	.name = "FD detection speedup",
+	.type = V4L2_CTRL_TYPE_INTEGER,
+	.min = 0,
+	.max = FD_MAX_SPEEDUP,
+	.step = 1,
+	.def = 0,
+	},
+	{
+	.ops = &mtk_fd_dev_ctrl_ops,
+	.id = V4L2_CID_MTK_FD_EXTRA_MODEL,
+	.name = "FD use extra model",
+	.type = V4L2_CTRL_TYPE_BOOLEAN,
+	.min = 0,
+	.max = 1,
+	.step = 1,
+	.def = 0,
+	},
+};
+
+static int mtk_fd_ctrls_setup(struct mtk_fd_ctx *ctx)
+{
+	struct v4l2_ctrl_handler *hdl = &ctx->hdl;
+	struct v4l2_ctrl *ctl;
+	int i;
+
+	v4l2_ctrl_handler_init(hdl, V4L2_CID_MTK_FD_MAX);
+	if (hdl->error)
+		return hdl->error;
+
+	for (i = 0; i < ARRAY_SIZE(mtk_fd_controls); i++) {
+		ctl = v4l2_ctrl_new_custom(hdl, &mtk_fd_controls[i], ctx);
+		if (hdl->error) {
+			v4l2_ctrl_handler_free(hdl);
+			dev_err(ctx->dev, "Failed to register controls:%d", i);
+			return hdl->error;
+		}
+	}
+
+	ctx->fh.ctrl_handler = &ctx->hdl;
+	v4l2_ctrl_handler_setup(hdl);
+
+	return 0;
+}
+
+static unsigned int get_fd_img_fmt(unsigned int fourcc)
+{
+	switch (fourcc) {
+	case V4L2_PIX_FMT_VYUY:
+		return FMT_VYUY;
+	case V4L2_PIX_FMT_YUYV:
+		return FMT_YUYV;
+	case V4L2_PIX_FMT_YVYU:
+		return FMT_YVYU;
+	case V4L2_PIX_FMT_UYVY:
+		return FMT_UYVY;
+	default:
+		return FMT_UNKNOWN;
+	}
+}
+
+static void init_ctx_fmt(struct mtk_fd_ctx *ctx)
+{
+	const struct v4l2_pix_format_mplane *fmt;
+
+	/* Initialize M2M source fmt */
+	fmt = &in_img_fmts[0];
+	mtk_fd_fill_pixfmt_mp(&ctx->src_fmt, fmt);
+
+	/* Initialize M2M destination fmt */
+	ctx->dst_fmt.buffersize = fw_param_fmts[0].buffersize;
+	ctx->dst_fmt.dataformat = fw_param_fmts[0].dataformat;
+}
+
+/*
+ * V4L2 file operations.
+ */
+static int mtk_vfd_open(struct file *filp)
+{
+	struct mtk_fd_dev *fd_dev = video_drvdata(filp);
+	struct video_device *vdev = video_devdata(filp);
+	struct mtk_fd_ctx *ctx;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->fd_dev = fd_dev;
+	ctx->dev = &fd_dev->pdev->dev;
+
+	v4l2_fh_init(&ctx->fh, vdev);
+	filp->private_data = &ctx->fh;
+
+	init_ctx_fmt(ctx);
+
+	ret = mtk_fd_ctrls_setup(ctx);
+	if (ret) {
+		dev_err(ctx->dev, "Failed to set up controls:%d\n", ret);
+		goto err_fh_ctrl;
+	}
+
+	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(fd_dev->m2m_dev, ctx,
+					    &mtk_fd_queue_init);
+	if (IS_ERR(ctx->fh.m2m_ctx)) {
+		ret = PTR_ERR(ctx->fh.m2m_ctx);
+		goto err_init_ctx;
+	}
+
+	v4l2_fh_add(&ctx->fh);
+
+	return 0;
+
+err_init_ctx:
+	v4l2_ctrl_handler_free(&ctx->hdl);
+err_fh_ctrl:
+	v4l2_fh_exit(&ctx->fh);
+	kfree(ctx);
+
+	return ret;
+}
+
+static int mtk_vfd_release(struct file *filp)
+{
+	struct mtk_fd_ctx *ctx = container_of(filp->private_data,
+					      struct mtk_fd_ctx, fh);
+
+	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
+
+	v4l2_ctrl_handler_free(&ctx->hdl);
+	v4l2_fh_del(&ctx->fh);
+	v4l2_fh_exit(&ctx->fh);
+
+	kfree(ctx);
+
+	return 0;
+}
+
+static const struct v4l2_file_operations fd_video_fops = {
+	.owner = THIS_MODULE,
+	.open = mtk_vfd_open,
+	.release = mtk_vfd_release,
+	.poll = v4l2_m2m_fop_poll,
+	.unlocked_ioctl = video_ioctl2,
+	.mmap = v4l2_m2m_fop_mmap,
+};
+
+static void mtk_fd_device_run(void *priv)
+{
+	struct mtk_fd_ctx *ctx = priv;
+	struct vb2_v4l2_buffer *src_buf, *dst_buf;
+	struct fd_hw_param fd_param;
+	void *fd_result_vaddr;
+
+	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
+	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
+
+	memset(&fd_param, 0, sizeof(fd_param));
+
+	fd_param.src_img.dma_addr =
+		vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
+	fd_param.user_result.dma_addr =
+		vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
+	fd_result_vaddr = vb2_plane_vaddr(&dst_buf->vb2_buf, 0);
+
+	ctx->user_param.src_img_fmt = get_fd_img_fmt(ctx->src_fmt.pixelformat);
+	memcpy(&fd_param.user_param, &ctx->user_param, sizeof(ctx->user_param));
+
+	/* Complete request controls if any */
+	v4l2_ctrl_request_complete(src_buf->vb2_buf.req_obj.req, &ctx->hdl);
+
+	mtk_fd_hw_job_exec(&ctx->fd_dev->fd_hw, &fd_param, fd_result_vaddr);
+}
+
+static struct v4l2_m2m_ops fd_m2m_ops = {
+	.device_run = mtk_fd_device_run,
+};
+
+static int mtk_fd_request_validate(struct media_request *req)
+{
+	unsigned int count;
+
+	count = vb2_request_buffer_cnt(req);
+	if (!count)
+		return -ENOENT;
+	else if (count > 1)
+		return -EINVAL;
+
+	return vb2_request_validate(req);
+}
+
+static const struct media_device_ops fd_m2m_media_ops = {
+	.req_validate	= mtk_fd_request_validate,
+	.req_queue	= v4l2_m2m_request_queue,
+};
+
+static int mtk_fd_video_device_register(struct mtk_fd_dev *fd_dev)
+{
+	struct video_device *vfd = &fd_dev->vfd;
+	struct v4l2_m2m_dev *m2m_dev = fd_dev->m2m_dev;
+	struct device *dev = &fd_dev->pdev->dev;
+	int function, ret;
+
+	vfd->fops = &fd_video_fops;
+	vfd->release = video_device_release;
+	vfd->lock = &fd_dev->vfd_lock;
+	vfd->v4l2_dev = &fd_dev->v4l2_dev;
+	vfd->vfl_dir = VFL_DIR_M2M;
+	vfd->device_caps = V4L2_CAP_STREAMING  | V4L2_CAP_VIDEO_OUTPUT_MPLANE |
+		V4L2_CAP_META_CAPTURE;
+	vfd->ioctl_ops = &mtk_fd_v4l2_video_out_ioctl_ops;
+
+	strscpy(vfd->name, "MTK-FD-V4L2", sizeof(vfd->name));
+	video_set_drvdata(vfd, fd_dev);
+
+	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
+	if (ret) {
+		dev_err(dev, "Failed to register video device\n");
+		goto err_free_dev;
+	}
+
+	function = MEDIA_ENT_F_PROC_VIDEO_DECODER;
+	ret = v4l2_m2m_register_media_controller(m2m_dev, vfd, function);
+	if (ret) {
+		dev_err(dev, "Failed to init mem2mem media controller\n");
+		goto err_unreg_video;
+	}
+	return 0;
+
+err_unreg_video:
+	video_unregister_device(vfd);
+err_free_dev:
+	video_device_release(vfd);
+	return ret;
+}
+
+static int mtk_fd_dev_v4l2_init(struct mtk_fd_dev *fd_dev)
+{
+	struct media_device *mdev = &fd_dev->mdev;
+	struct device *dev = &fd_dev->pdev->dev;
+	int ret;
+
+	ret = v4l2_device_register(dev, &fd_dev->v4l2_dev);
+	if (ret) {
+		dev_err(dev, "Failed to register v4l2 device\n");
+		return ret;
+	}
+
+	fd_dev->m2m_dev = v4l2_m2m_init(&fd_m2m_ops);
+	if (IS_ERR(fd_dev->m2m_dev)) {
+		dev_err(dev, "Failed to init mem2mem device\n");
+		ret = PTR_ERR(fd_dev->m2m_dev);
+		goto fail_m2m_dev;
+	}
+
+	mdev->dev = dev;
+	strscpy(mdev->model, "MTK-FD-V4L2", sizeof(mdev->model));
+	snprintf(mdev->bus_info, sizeof(mdev->bus_info),
+		 "platform:%s", dev_name(dev));
+	media_device_init(mdev);
+	mdev->ops = &fd_m2m_media_ops;
+	fd_dev->v4l2_dev.mdev = mdev;
+
+	ret = mtk_fd_video_device_register(fd_dev);
+	if (ret) {
+		dev_err(dev, "Failed to register video device\n");
+		goto err_vdev;
+	}
+
+	ret = media_device_register(mdev);
+	if (ret) {
+		dev_err(dev, "Failed to register mem2mem media device\n");
+		goto fail_mdev;
+	}
+
+	return 0;
+
+fail_mdev:
+	v4l2_m2m_unregister_media_controller(fd_dev->m2m_dev);
+	video_unregister_device(&fd_dev->vfd);
+	video_device_release(&fd_dev->vfd);
+err_vdev:
+	media_device_cleanup(mdev);
+	v4l2_m2m_release(fd_dev->m2m_dev);
+fail_m2m_dev:
+	v4l2_device_unregister(&fd_dev->v4l2_dev);
+	return ret;
+}
+
+static void mtk_fd_dev_v4l2_release(struct mtk_fd_dev *fd_dev)
+{
+	v4l2_m2m_unregister_media_controller(fd_dev->m2m_dev);
+	video_unregister_device(&fd_dev->vfd);
+	video_device_release(&fd_dev->vfd);
+	media_device_cleanup(&fd_dev->mdev);
+	v4l2_m2m_release(fd_dev->m2m_dev);
+	v4l2_device_unregister(&fd_dev->v4l2_dev);
+}
+
+static irqreturn_t mtk_fd_irq(int irq, void *data)
+{
+	struct mtk_fd_hw *fd_hw = (struct mtk_fd_hw *)data;
+
+	fd_hw->fd_irq_result = readl(fd_hw->fd_base + FD_INT);
+	wake_up_interruptible(&fd_hw->wq);
+	return IRQ_HANDLED;
+}
+
+static int mtk_fd_hw_get_scp_mem(struct mtk_fd_hw *fd_hw,
+				 struct fd_buffer *scp_mem)
+{
+	struct mtk_fd_dev *fd_dev = mtk_fd_hw_to_dev(fd_hw);
+	struct device *dev = &fd_dev->pdev->dev;
+	dma_addr_t addr;
+	u32 size;
+
+	scp_mem->scp_addr = scp_get_reserve_mem_phys(SCP_FD_MEM_ID);
+	size = scp_get_reserve_mem_size(SCP_FD_MEM_ID);
+	if (!scp_mem->scp_addr || !size)
+		return -EPROBE_DEFER;
+
+	/* get dma addr address */
+	addr = dma_map_page_attrs(dev, phys_to_page(scp_mem->scp_addr), 0,
+				  size, DMA_BIDIRECTIONAL,
+				  DMA_ATTR_SKIP_CPU_SYNC);
+	if (dma_mapping_error(dev, addr)) {
+		scp_mem->scp_addr = 0;
+		dev_err(dev, "Failed to map scp addr\n");
+		return -ENOMEM;
+	}
+	scp_mem->dma_addr = addr;
+
+	return 0;
+}
+
+static int mtk_fd_probe(struct platform_device *pdev)
+{
+	struct mtk_fd_dev *fd_dev;
+	struct device *dev = &pdev->dev;
+	struct mtk_fd_hw *fd_hw;
+	struct resource *res;
+	int irq;
+	int ret;
+
+	fd_dev = devm_kzalloc(&pdev->dev, sizeof(*fd_dev), GFP_KERNEL);
+	if (!fd_dev)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, fd_dev);
+	fd_hw = &fd_dev->fd_hw;
+	fd_dev->pdev = pdev;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "no IRQ:%d resource info\n", irq);
+		return irq;
+	}
+	ret = devm_request_irq(dev, irq, mtk_fd_irq, IRQF_SHARED,
+			       dev_driver_string(dev),
+			       fd_hw);
+	if (ret) {
+		dev_err(dev, "req_irq fail:%d\n", irq);
+		return ret;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fd_hw->fd_base = devm_ioremap_resource(dev, res);
+	if (!fd_hw->fd_base) {
+		dev_err(dev, "unable to get fd reg base\n");
+		return PTR_ERR(fd_hw->fd_base);
+	}
+
+	fd_hw->fd_clk = devm_clk_get(dev, "fd");
+	if (IS_ERR(fd_hw->fd_clk)) {
+		dev_err(dev, "cannot get fd_clk_img_fd clock\n");
+		return PTR_ERR(fd_hw->fd_clk);
+	}
+
+	ret = mtk_fd_hw_get_scp_mem(fd_hw, &fd_hw->scp_mem);
+	if (ret) {
+		dev_err(dev, "scp memory init failed: %d\n", ret);
+		return ret;
+	}
+
+	atomic_set(&fd_hw->fd_user_cnt, 0);
+	init_waitqueue_head(&fd_hw->wq);
+	mutex_init(&fd_dev->vfd_lock);
+	mutex_init(&fd_hw->fd_hw_lock);
+	pm_runtime_enable(dev);
+
+	ret = mtk_fd_dev_v4l2_init(fd_dev);
+	if (ret) {
+		mutex_destroy(&fd_dev->fd_hw.fd_hw_lock);
+		mutex_destroy(&fd_dev->vfd_lock);
+		pm_runtime_disable(&pdev->dev);
+		dev_err(dev, "v4l2 init failed: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int mtk_fd_remove(struct platform_device *pdev)
+{
+	struct mtk_fd_dev *fd_dev = dev_get_drvdata(&pdev->dev);
+
+	mtk_fd_dev_v4l2_release(fd_dev);
+	mutex_destroy(&fd_dev->fd_hw.fd_hw_lock);
+	mutex_destroy(&fd_dev->vfd_lock);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static int mtk_fd_suspend(struct device *dev)
+{
+	struct mtk_fd_dev *fd_dev = dev_get_drvdata(dev);
+
+	if (pm_runtime_suspended(dev))
+		return 0;
+
+	/* suspend FD HW */
+	writel(0x0, fd_dev->fd_hw.fd_base + FD_HW_ENABLE);
+	writel(0x0, fd_dev->fd_hw.fd_base + FD_INT_EN);
+	clk_disable_unprepare(fd_dev->fd_hw.fd_clk);
+	return 0;
+}
+
+static int mtk_fd_resume(struct device *dev)
+{
+	struct mtk_fd_dev *fd_dev = dev_get_drvdata(dev);
+	int ret;
+
+	if (pm_runtime_suspended(dev))
+		return 0;
+
+	ret = clk_prepare_enable(fd_dev->fd_hw.fd_clk);
+	if (ret < 0) {
+		dev_dbg(dev, "open fd clk failed\n");
+		clk_disable_unprepare(fd_dev->fd_hw.fd_clk);
+	}
+
+	/* resume FD HW */
+	writel(ENABLE_FD, fd_dev->fd_hw.fd_base + FD_HW_ENABLE);
+	if (fd_dev->fd_hw.state == FD_ENQ)
+		writel(0x1, fd_dev->fd_hw.fd_base + FD_INT_EN);
+	return 0;
+}
+
+static const struct dev_pm_ops mtk_fd_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mtk_fd_suspend, mtk_fd_resume)
+	SET_RUNTIME_PM_OPS(mtk_fd_suspend, mtk_fd_resume, NULL)
+};
+
+static const struct of_device_id mtk_fd_of_ids[] = {
+	{ .compatible = "mediatek,mt8183-fd", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mtk_fd_of_ids);
+
+static struct platform_driver mtk_fd_driver = {
+	.probe   = mtk_fd_probe,
+	.remove  = mtk_fd_remove,
+	.driver  = {
+		.name  = "mtk-fd-4.0",
+		.of_match_table = of_match_ptr(mtk_fd_of_ids),
+		.pm = &mtk_fd_pm_ops,
+	}
+};
+module_platform_driver(mtk_fd_driver);
+
+MODULE_DESCRIPTION("Mediatek FD driver");
+MODULE_LICENSE("GPL");
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 3dcfc61..eae876e 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -192,6 +192,10 @@  enum v4l2_colorfx {
  * We reserve 16 controls for this driver. */
 #define V4L2_CID_USER_IMX_BASE			(V4L2_CID_USER_BASE + 0x10b0)
 
+/* The base for the mediatek FD driver controls */
+/* We reserve 16 controls for this driver. */
+#define V4L2_CID_USER_MTK_FD_BASE		(V4L2_CID_USER_BASE + 0x10d0)
+
 /* MPEG-class control IDs */
 /* The MPEG controls are applicable to all codec controls
  * and the 'MPEG' part of the define is historical */