diff mbox series

[3/3] rcar-vin: handle transfer errors from subdevices and stop streaming if required

Message ID 1652983210-1194-4-git-send-email-mrodin@de.adit-jv.com (mailing list archive)
State New, archived
Headers show
Series [1/3] media: videobuf2: Add a transfer error event | expand

Commit Message

Michael Rodin May 19, 2022, 6 p.m. UTC
When a subdevice sends a transfer error event during streaming and we can
not capture new frames, then we know for sure that this is an unrecoverable
failure and not just a temporary glitch. In this case we can not ignore the
transfer error any more and have to notify userspace. In response to the
transfer error event userspace can try to restart streaming and hope that
it works again.

This patch is based on the patch [1] from Niklas Söderlund, however it adds
more logic to check whether the VIN hardware module is actually affected by
the transfer errors reported by the usptream device. For this it takes some
ideas from the imx driver where EOF interrupts are monitored by the
eof_timeout_timer added by commit 4a34ec8e470c ("[media] media: imx: Add
CSI subdev driver").

[1] https://lore.kernel.org/linux-renesas-soc/20211108160220.767586-4-niklas.soderlund+renesas@ragnatech.se/

Signed-off-by: Michael Rodin <mrodin@de.adit-jv.com>
---
 drivers/media/platform/renesas/rcar-vin/rcar-dma.c | 34 ++++++++++++++++++++++
 .../media/platform/renesas/rcar-vin/rcar-v4l2.c    | 18 +++++++++++-
 drivers/media/platform/renesas/rcar-vin/rcar-vin.h |  7 +++++
 3 files changed, 58 insertions(+), 1 deletion(-)

Comments

Niklas Söderlund May 19, 2022, 9 p.m. UTC | #1
Hi Michael,

Thanks for your work.

I like this patch, I think it captures the issue discussed in the 
previous thread quiet nicely. One small nit below.

On 2022-05-19 20:00:09 +0200, Michael Rodin wrote:
> When a subdevice sends a transfer error event during streaming and we can
> not capture new frames, then we know for sure that this is an unrecoverable
> failure and not just a temporary glitch. In this case we can not ignore the
> transfer error any more and have to notify userspace. In response to the
> transfer error event userspace can try to restart streaming and hope that
> it works again.
> 
> This patch is based on the patch [1] from Niklas Söderlund, however it adds
> more logic to check whether the VIN hardware module is actually affected by
> the transfer errors reported by the usptream device. For this it takes some
> ideas from the imx driver where EOF interrupts are monitored by the
> eof_timeout_timer added by commit 4a34ec8e470c ("[media] media: imx: Add
> CSI subdev driver").
> 
> [1] https://lore.kernel.org/linux-renesas-soc/20211108160220.767586-4-niklas.soderlund+renesas@ragnatech.se/
> 
> Signed-off-by: Michael Rodin <mrodin@de.adit-jv.com>
> ---
>  drivers/media/platform/renesas/rcar-vin/rcar-dma.c | 34 ++++++++++++++++++++++
>  .../media/platform/renesas/rcar-vin/rcar-v4l2.c    | 18 +++++++++++-
>  drivers/media/platform/renesas/rcar-vin/rcar-vin.h |  7 +++++
>  3 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/media/platform/renesas/rcar-vin/rcar-dma.c b/drivers/media/platform/renesas/rcar-vin/rcar-dma.c
> index 2272f1c..596a367 100644
> --- a/drivers/media/platform/renesas/rcar-vin/rcar-dma.c
> +++ b/drivers/media/platform/renesas/rcar-vin/rcar-dma.c
> @@ -13,6 +13,7 @@
>  #include <linux/delay.h>
>  #include <linux/interrupt.h>
>  #include <linux/pm_runtime.h>
> +#include <media/v4l2-event.h>
>  
>  #include <media/videobuf2-dma-contig.h>
>  
> @@ -1060,6 +1061,9 @@ static irqreturn_t rvin_irq(int irq, void *data)
>  		vin_dbg(vin, "Dropping frame %u\n", vin->sequence);
>  	}
>  
> +	cancel_delayed_work(&vin->frame_timeout);
> +	schedule_delayed_work(&vin->frame_timeout, msecs_to_jiffies(FRAME_TIMEOUT_MS));
> +
>  	vin->sequence++;
>  
>  	/* Prepare for next frame */
> @@ -1283,6 +1287,7 @@ int rvin_start_streaming(struct rvin_dev *vin)
>  	spin_lock_irqsave(&vin->qlock, flags);
>  
>  	vin->sequence = 0;
> +	vin->xfer_error = false;
>  
>  	ret = rvin_capture_start(vin);
>  	if (ret)
> @@ -1290,6 +1295,10 @@ int rvin_start_streaming(struct rvin_dev *vin)
>  
>  	spin_unlock_irqrestore(&vin->qlock, flags);
>  
> +	/* We start the frame watchdog only after we have successfully started streaming */
> +	if (!ret)
> +		schedule_delayed_work(&vin->frame_timeout, msecs_to_jiffies(FRAME_TIMEOUT_MS));
> +
>  	return ret;
>  }
>  
> @@ -1332,6 +1341,12 @@ void rvin_stop_streaming(struct rvin_dev *vin)
>  	}
>  
>  	vin->state = STOPPING;
> +	/*
> +	 * Since we are now stopping and don't expect more frames to be captured, make sure that
> +	 * there is no pending work for error handling.
> +	 */
> +	cancel_delayed_work_sync(&vin->frame_timeout);
> +	vin->xfer_error = false;

Do we need to set xfer_error to false here? The delayed work is canceled 
and we reset the xfer_error when we start in rvin_start_streaming().

>  
>  	/* Wait until only scratch buffer is used, max 3 interrupts. */
>  	retries = 0;
> @@ -1424,6 +1439,23 @@ void rvin_dma_unregister(struct rvin_dev *vin)
>  	v4l2_device_unregister(&vin->v4l2_dev);
>  }
>  
> +static void rvin_frame_timeout(struct work_struct *work)
> +{
> +	struct delayed_work *dwork = to_delayed_work(work);
> +	struct rvin_dev *vin = container_of(dwork, struct rvin_dev, frame_timeout);
> +	struct v4l2_event event = {
> +		.type = V4L2_EVENT_XFER_ERROR,
> +	};
> +
> +	vin_dbg(vin, "Frame timeout!\n");
> +
> +	if (!vin->xfer_error)
> +		return;
> +	vin_err(vin, "Unrecoverable transfer error detected, stopping streaming\n");
> +	vb2_queue_error(&vin->queue);
> +	v4l2_event_queue(&vin->vdev, &event);
> +}
> +
>  int rvin_dma_register(struct rvin_dev *vin, int irq)
>  {
>  	struct vb2_queue *q = &vin->queue;
> @@ -1470,6 +1502,8 @@ int rvin_dma_register(struct rvin_dev *vin, int irq)
>  		goto error;
>  	}
>  
> +	INIT_DELAYED_WORK(&vin->frame_timeout, rvin_frame_timeout);
> +
>  	return 0;
>  error:
>  	rvin_dma_unregister(vin);
> diff --git a/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c b/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c
> index 2e2aa9d..bd7f6fe2 100644
> --- a/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c
> +++ b/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c
> @@ -648,6 +648,8 @@ static int rvin_subscribe_event(struct v4l2_fh *fh,
>  	switch (sub->type) {
>  	case V4L2_EVENT_SOURCE_CHANGE:
>  		return v4l2_event_subscribe(fh, sub, 4, NULL);
> +	case V4L2_EVENT_XFER_ERROR:
> +		return v4l2_event_subscribe(fh, sub, 1, NULL);
>  	}
>  	return v4l2_ctrl_subscribe_event(fh, sub);
>  }
> @@ -1000,9 +1002,23 @@ void rvin_v4l2_unregister(struct rvin_dev *vin)
>  static void rvin_notify_video_device(struct rvin_dev *vin,
>  				     unsigned int notification, void *arg)
>  {
> +	const struct v4l2_event *event;
> +
>  	switch (notification) {
>  	case V4L2_DEVICE_NOTIFY_EVENT:
> -		v4l2_event_queue(&vin->vdev, arg);
> +		event = arg;
> +
> +		switch (event->type) {
> +		case V4L2_EVENT_XFER_ERROR:
> +			if (vin->state != STOPPED && vin->state != STOPPING) {
> +				vin_dbg(vin, "Subdevice signaled transfer error.\n");
> +				vin->xfer_error = true;
> +			}
> +			break;
> +		default:
> +			break;
> +		}
> +
>  		break;
>  	default:
>  		break;
> diff --git a/drivers/media/platform/renesas/rcar-vin/rcar-vin.h b/drivers/media/platform/renesas/rcar-vin/rcar-vin.h
> index 1f94589..4726a69 100644
> --- a/drivers/media/platform/renesas/rcar-vin/rcar-vin.h
> +++ b/drivers/media/platform/renesas/rcar-vin/rcar-vin.h
> @@ -31,6 +31,9 @@
>  /* Max number on VIN instances that can be in a system */
>  #define RCAR_VIN_NUM 32
>  
> +/* maximum time we wait before signalling an error to userspace */
> +#define FRAME_TIMEOUT_MS 1000
> +
>  struct rvin_group;
>  
>  enum model_id {
> @@ -207,6 +210,8 @@ struct rvin_info {
>   * @std:		active video standard of the video source
>   *
>   * @alpha:		Alpha component to fill in for supported pixel formats
> + * @xfer_error:		Indicates if any transfer errors occurred in the current streaming session.
> + * @frame_timeout:	Watchdog for monitoring regular capturing of frames in rvin_irq.
>   */
>  struct rvin_dev {
>  	struct device *dev;
> @@ -251,6 +256,8 @@ struct rvin_dev {
>  	v4l2_std_id std;
>  
>  	unsigned int alpha;
> +	bool xfer_error;
> +	struct delayed_work frame_timeout;
>  };
>  
>  #define vin_to_source(vin)		((vin)->parallel.subdev)
> -- 
> 2.7.4
>
Michael Rodin May 20, 2022, 7:50 p.m. UTC | #2
Hi Niklas,

On Thu, May 19, 2022 at 11:00:20PM +0200, Niklas Söderlund wrote:
> Hi Michael,
> 
> Thanks for your work.
> 
> I like this patch, I think it captures the issue discussed in the 
> previous thread quiet nicely. One small nit below.
> 
> On 2022-05-19 20:00:09 +0200, Michael Rodin wrote:
> > When a subdevice sends a transfer error event during streaming and we can
> > not capture new frames, then we know for sure that this is an unrecoverable
> > failure and not just a temporary glitch. In this case we can not ignore the
> > transfer error any more and have to notify userspace. In response to the
> > transfer error event userspace can try to restart streaming and hope that
> > it works again.
> > 
> > This patch is based on the patch [1] from Niklas Söderlund, however it adds
> > more logic to check whether the VIN hardware module is actually affected by
> > the transfer errors reported by the usptream device. For this it takes some
> > ideas from the imx driver where EOF interrupts are monitored by the
> > eof_timeout_timer added by commit 4a34ec8e470c ("[media] media: imx: Add
> > CSI subdev driver").
> > 
> > [1] https://urldefense.proofpoint.com/v2/url?u=https-3A__lore.kernel.org_linux-2Drenesas-2Dsoc_20211108160220.767586-2D4-2Dniklas.soderlund-2Brenesas-40ragnatech.se_&d=DwIDAw&c=euGZstcaTDllvimEN8b7jXrwqOf-v5A_CdpgnVfiiMM&r=sWsgk3pKkv5GeIDM2RZlPY8TjNFU2D0oBeOj6QNBadE&m=on7B_2z5sGrhiuvQgbA4XC0_qMRWNTZoWGRMzD9N0Ag&s=_LetePiuy8odH72QwAj6k-I0YOANjzkNwTnqqFr0_ck&e=
> > 
> > Signed-off-by: Michael Rodin <mrodin@de.adit-jv.com>
> > ---
> >  drivers/media/platform/renesas/rcar-vin/rcar-dma.c | 34 ++++++++++++++++++++++
> >  .../media/platform/renesas/rcar-vin/rcar-v4l2.c    | 18 +++++++++++-
> >  drivers/media/platform/renesas/rcar-vin/rcar-vin.h |  7 +++++
> >  3 files changed, 58 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/media/platform/renesas/rcar-vin/rcar-dma.c b/drivers/media/platform/renesas/rcar-vin/rcar-dma.c
> > index 2272f1c..596a367 100644
> > --- a/drivers/media/platform/renesas/rcar-vin/rcar-dma.c
> > +++ b/drivers/media/platform/renesas/rcar-vin/rcar-dma.c
> > @@ -13,6 +13,7 @@
> >  #include <linux/delay.h>
> >  #include <linux/interrupt.h>
> >  #include <linux/pm_runtime.h>
> > +#include <media/v4l2-event.h>
> >  
> >  #include <media/videobuf2-dma-contig.h>
> >  
> > @@ -1060,6 +1061,9 @@ static irqreturn_t rvin_irq(int irq, void *data)
> >  		vin_dbg(vin, "Dropping frame %u\n", vin->sequence);
> >  	}
> >  
> > +	cancel_delayed_work(&vin->frame_timeout);
> > +	schedule_delayed_work(&vin->frame_timeout, msecs_to_jiffies(FRAME_TIMEOUT_MS));
> > +
> >  	vin->sequence++;
> >  
> >  	/* Prepare for next frame */
> > @@ -1283,6 +1287,7 @@ int rvin_start_streaming(struct rvin_dev *vin)
> >  	spin_lock_irqsave(&vin->qlock, flags);
> >  
> >  	vin->sequence = 0;
> > +	vin->xfer_error = false;
> >  
> >  	ret = rvin_capture_start(vin);
> >  	if (ret)
> > @@ -1290,6 +1295,10 @@ int rvin_start_streaming(struct rvin_dev *vin)
> >  
> >  	spin_unlock_irqrestore(&vin->qlock, flags);
> >  
> > +	/* We start the frame watchdog only after we have successfully started streaming */
> > +	if (!ret)
> > +		schedule_delayed_work(&vin->frame_timeout, msecs_to_jiffies(FRAME_TIMEOUT_MS));
> > +
> >  	return ret;
> >  }
> >  
> > @@ -1332,6 +1341,12 @@ void rvin_stop_streaming(struct rvin_dev *vin)
> >  	}
> >  
> >  	vin->state = STOPPING;
> > +	/*
> > +	 * Since we are now stopping and don't expect more frames to be captured, make sure that
> > +	 * there is no pending work for error handling.
> > +	 */
> > +	cancel_delayed_work_sync(&vin->frame_timeout);
> > +	vin->xfer_error = false;
> 
> Do we need to set xfer_error to false here? The delayed work is canceled 
> and we reset the xfer_error when we start in rvin_start_streaming().
> 

You are right, this seems to be redundant. But I think that there might be
a different case where we have to reset xfer_error:

 1. A non-critical transfer error has occurred during streaming from a
    HDMI source.
 2. Frames are still captured for an hour without any further problems,
    since it was just a short glitch
 3. Now the source (e.g. HDMI signal generator) has been powered off by the
    user so it does not send new frames.
 4. Timeout occurs due to 3 but since xfer_error has been set 1 hour ago,
    userspace is notified about a transfer error and assumes that streaming
    has been stopped because of this.

To avoid this scenario I think maybe we have to restrict validity of
xfer_error. Maybe it would be better to make xfer_error a counter which is
set after a transfer error to e.g. 10 frames and then decremented after
each captured frame so after 10 successfully captured frames we know that a
timeout has occurred definitely not due to a transfer error?

Another possible improvement might be to make FRAME_TIMEOUT_MS configurable,
maybe via a v4l2 control from userspace? Or we could also define the timeout
as a multiple of the frame interval of the source. This would allow us to
reduce the timeout further based on the particular source so the userspace
does not have to wait for a second until it knows that it has to restart
streaming.

What do you think?

> >  
> >  	/* Wait until only scratch buffer is used, max 3 interrupts. */
> >  	retries = 0;
> > @@ -1424,6 +1439,23 @@ void rvin_dma_unregister(struct rvin_dev *vin)
> >  	v4l2_device_unregister(&vin->v4l2_dev);
> >  }
> >  
> > +static void rvin_frame_timeout(struct work_struct *work)
> > +{
> > +	struct delayed_work *dwork = to_delayed_work(work);
> > +	struct rvin_dev *vin = container_of(dwork, struct rvin_dev, frame_timeout);
> > +	struct v4l2_event event = {
> > +		.type = V4L2_EVENT_XFER_ERROR,
> > +	};
> > +
> > +	vin_dbg(vin, "Frame timeout!\n");
> > +
> > +	if (!vin->xfer_error)
> > +		return;
> > +	vin_err(vin, "Unrecoverable transfer error detected, stopping streaming\n");
> > +	vb2_queue_error(&vin->queue);
> > +	v4l2_event_queue(&vin->vdev, &event);
> > +}
> > +
> >  int rvin_dma_register(struct rvin_dev *vin, int irq)
> >  {
> >  	struct vb2_queue *q = &vin->queue;
> > @@ -1470,6 +1502,8 @@ int rvin_dma_register(struct rvin_dev *vin, int irq)
> >  		goto error;
> >  	}
> >  
> > +	INIT_DELAYED_WORK(&vin->frame_timeout, rvin_frame_timeout);
> > +
> >  	return 0;
> >  error:
> >  	rvin_dma_unregister(vin);
> > diff --git a/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c b/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c
> > index 2e2aa9d..bd7f6fe2 100644
> > --- a/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c
> > +++ b/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c
> > @@ -648,6 +648,8 @@ static int rvin_subscribe_event(struct v4l2_fh *fh,
> >  	switch (sub->type) {
> >  	case V4L2_EVENT_SOURCE_CHANGE:
> >  		return v4l2_event_subscribe(fh, sub, 4, NULL);
> > +	case V4L2_EVENT_XFER_ERROR:
> > +		return v4l2_event_subscribe(fh, sub, 1, NULL);
> >  	}
> >  	return v4l2_ctrl_subscribe_event(fh, sub);
> >  }
> > @@ -1000,9 +1002,23 @@ void rvin_v4l2_unregister(struct rvin_dev *vin)
> >  static void rvin_notify_video_device(struct rvin_dev *vin,
> >  				     unsigned int notification, void *arg)
> >  {
> > +	const struct v4l2_event *event;
> > +
> >  	switch (notification) {
> >  	case V4L2_DEVICE_NOTIFY_EVENT:
> > -		v4l2_event_queue(&vin->vdev, arg);
> > +		event = arg;
> > +
> > +		switch (event->type) {
> > +		case V4L2_EVENT_XFER_ERROR:
> > +			if (vin->state != STOPPED && vin->state != STOPPING) {
> > +				vin_dbg(vin, "Subdevice signaled transfer error.\n");
> > +				vin->xfer_error = true;
> > +			}
> > +			break;
> > +		default:
> > +			break;
> > +		}
> > +
> >  		break;
> >  	default:
> >  		break;
> > diff --git a/drivers/media/platform/renesas/rcar-vin/rcar-vin.h b/drivers/media/platform/renesas/rcar-vin/rcar-vin.h
> > index 1f94589..4726a69 100644
> > --- a/drivers/media/platform/renesas/rcar-vin/rcar-vin.h
> > +++ b/drivers/media/platform/renesas/rcar-vin/rcar-vin.h
> > @@ -31,6 +31,9 @@
> >  /* Max number on VIN instances that can be in a system */
> >  #define RCAR_VIN_NUM 32
> >  
> > +/* maximum time we wait before signalling an error to userspace */
> > +#define FRAME_TIMEOUT_MS 1000
> > +
> >  struct rvin_group;
> >  
> >  enum model_id {
> > @@ -207,6 +210,8 @@ struct rvin_info {
> >   * @std:		active video standard of the video source
> >   *
> >   * @alpha:		Alpha component to fill in for supported pixel formats
> > + * @xfer_error:		Indicates if any transfer errors occurred in the current streaming session.
> > + * @frame_timeout:	Watchdog for monitoring regular capturing of frames in rvin_irq.
> >   */
> >  struct rvin_dev {
> >  	struct device *dev;
> > @@ -251,6 +256,8 @@ struct rvin_dev {
> >  	v4l2_std_id std;
> >  
> >  	unsigned int alpha;
> > +	bool xfer_error;
> > +	struct delayed_work frame_timeout;
> >  };
> >  
> >  #define vin_to_source(vin)		((vin)->parallel.subdev)
> > -- 
> > 2.7.4
> > 
> 
> -- 
> Kind Regards,
> Niklas Söderlund
Niklas Söderlund June 8, 2022, 9:04 p.m. UTC | #3
Hi Michael,

On 2022-05-20 21:50:41 +0200, Michael Rodin wrote:

[snip]

> > 
> > Do we need to set xfer_error to false here? The delayed work is canceled 
> > and we reset the xfer_error when we start in rvin_start_streaming().
> > 
> 
> You are right, this seems to be redundant. But I think that there might be
> a different case where we have to reset xfer_error:
> 
>  1. A non-critical transfer error has occurred during streaming from a
>     HDMI source.
>  2. Frames are still captured for an hour without any further problems,
>     since it was just a short glitch
>  3. Now the source (e.g. HDMI signal generator) has been powered off by the
>     user so it does not send new frames.
>  4. Timeout occurs due to 3 but since xfer_error has been set 1 hour ago,
>     userspace is notified about a transfer error and assumes that streaming
>     has been stopped because of this.
> 
> To avoid this scenario I think maybe we have to restrict validity of
> xfer_error. Maybe it would be better to make xfer_error a counter which is
> set after a transfer error to e.g. 10 frames and then decremented after
> each captured frame so after 10 successfully captured frames we know that a
> timeout has occurred definitely not due to a transfer error?
> 
> Another possible improvement might be to make FRAME_TIMEOUT_MS configurable,
> maybe via a v4l2 control from userspace? Or we could also define the timeout
> as a multiple of the frame interval of the source. This would allow us to
> reduce the timeout further based on the particular source so the userspace
> does not have to wait for a second until it knows that it has to restart
> streaming.
> 
> What do you think?

I discussed this problem last week at a conference and the consensus was 
that this problem of timeouts and the like should in the first hand be 
handled in user-space. The reason being that there might be use-cases 
that are better dealt with there.

If the monitor thread is is strictly needed for some reason in kernel 
thread it should likely be moved to the V4L2 core as all drivers would 
then be able to use it instead of deeding on slightly different 
implementations in each driver.

So I fear we are back to only try to signal xfer errors in the driver 
and then leave it to either user-space or some new V4L2 code to help 
monitoring.

Sorry for only understanding this so late in the review, it took some 
time for me to understand it but once explained to me it made sens.
Michael Rodin June 28, 2022, 6 p.m. UTC | #4
Hello,

this series is a followup to the other series [1] started by Niklas Söderlund
where only the first patch has been merged. The overall idea is to be more
compliant with the Renesas hardware manual which requires a reset or stop
of capture in the VIN module before reset of CSI2. Another goal is to be
more resilient with respect to non-critical CSI2 errors so the driver does
not end in an endless restart loop. Compared to the previous version [2] of
this series the patch 3 is replaced based on the conclusion in [3] so now
userspace has to take care of figuring out if a transfer error was harmless
or unrecoverable. Other patches are adapted accordingly so no assumptions
about criticality of transfer errors are made in the kernel and the
decision is left up to userspace.

[1] https://lore.kernel.org/linux-renesas-soc/20211108160220.767586-1-niklas.soderlund+renesas@ragnatech.se/
[2] https://lore.kernel.org/all/1652983210-1194-1-git-send-email-mrodin@de.adit-jv.com/
[3] https://lore.kernel.org/all/YqEO3%2FKekkZhVjW+@oden.dyn.berto.se/
Niklas Söderlund July 5, 2022, 9:46 a.m. UTC | #5
Hi Michael,

Thanks for your persistent work with this series.

On 2022-06-28 20:00:19 +0200, Michael Rodin wrote:
> Hello,
> 
> this series is a followup to the other series [1] started by Niklas Söderlund
> where only the first patch has been merged. The overall idea is to be more
> compliant with the Renesas hardware manual which requires a reset or stop
> of capture in the VIN module before reset of CSI2. Another goal is to be
> more resilient with respect to non-critical CSI2 errors so the driver does
> not end in an endless restart loop. Compared to the previous version [2] of
> this series the patch 3 is replaced based on the conclusion in [3] so now
> userspace has to take care of figuring out if a transfer error was harmless
> or unrecoverable. Other patches are adapted accordingly so no assumptions
> about criticality of transfer errors are made in the kernel and the
> decision is left up to userspace.

I like this solution as it truly pushes the decision to user-space. What 
bugs me a little bit is that we don't have a way to communicate errors 
that we know are unrecoverable (it was for this case the work in this 
area started) and ones that could be recoverable (the use-case added on 
top).

I would also like to hear what Hans thinks as he had good suggestions 
for how to handle the cases we know can't be recovers in [4].

> 
> [1] https://lore.kernel.org/linux-renesas-soc/20211108160220.767586-1-niklas.soderlund+renesas@ragnatech.se/
> [2] https://lore.kernel.org/all/1652983210-1194-1-git-send-email-mrodin@de.adit-jv.com/
> [3] https://lore.kernel.org/all/YqEO3%2FKekkZhVjW+@oden.dyn.berto.se/

4. https://lore.kernel.org/all/1fddc966-5a23-63b4-185e-c17aa6d65b54@xs4all.nl/
Michael Rodin July 15, 2022, 1:42 p.m. UTC | #6
Hi Niklas, Hans,

On Tue, Jul 05, 2022 at 11:46:22AM +0200, Niklas Söderlund wrote:
> Hi Michael,
> 
> Thanks for your persistent work with this series.

Thank you for the feedback!

> On 2022-06-28 20:00:19 +0200, Michael Rodin wrote:
> > Hello,
> > 
> > this series is a followup to the other series [1] started by Niklas Söderlund
> > where only the first patch has been merged. The overall idea is to be more
> > compliant with the Renesas hardware manual which requires a reset or stop
> > of capture in the VIN module before reset of CSI2. Another goal is to be
> > more resilient with respect to non-critical CSI2 errors so the driver does
> > not end in an endless restart loop. Compared to the previous version [2] of
> > this series the patch 3 is replaced based on the conclusion in [3] so now
> > userspace has to take care of figuring out if a transfer error was harmless
> > or unrecoverable. Other patches are adapted accordingly so no assumptions
> > about criticality of transfer errors are made in the kernel and the
> > decision is left up to userspace.
> 
> I like this solution as it truly pushes the decision to user-space. What 
> bugs me a little bit is that we don't have a way to communicate errors 
> that we know are unrecoverable (it was for this case the work in this 
> area started) and ones that could be recoverable (the use-case added on 
> top).

Yes, it's not nice that V4L2_EVENT_XFER_ERROR does not tell userspace
whether an error is recoverable (i.e. the event can be ignored) or not
(i.e. a restart of streaming is required) but the other possible option
would be (as concluded in [3]) to implement a frame timeout monitoring
thread in v4l2 core. I am not sure if it is possible to implement this
second option cleanly...

> I would also like to hear what Hans thinks as he had good suggestions 
> for how to handle the cases we know can't be recovers in [4].

A a new function vb2_queue_error_with_event() suggested by Hans seems to be
redundant now, since it would not be used by rcar-vin (unless we implement
frame timeout monitoring in the v4l2 core). Or do you have an idea, which
drivers could be the first users of it, e.g. staging/media/imx I mentioned
before?

> > 
> > [1] https://urldefense.proofpoint.com/v2/url?u=https-3A__lore.kernel.org_linux-2Drenesas-2Dsoc_20211108160220.767586-2D1-2Dniklas.soderlund-2Brenesas-40ragnatech.se_&d=DwIDAw&c=euGZstcaTDllvimEN8b7jXrwqOf-v5A_CdpgnVfiiMM&r=sWsgk3pKkv5GeIDM2RZlPY8TjNFU2D0oBeOj6QNBadE&m=ecX7IwfatUO7SNPiyQ6x_8K9t2eWJf3y8GNuNHJ_0W0&s=Cli6jADEgMmCOLVoFekRRXzmty9WBXtoSF9utZJNMXY&e=
> > [2] https://urldefense.proofpoint.com/v2/url?u=https-3A__lore.kernel.org_all_1652983210-2D1194-2D1-2Dgit-2Dsend-2Demail-2Dmrodin-40de.adit-2Djv.com_&d=DwIDAw&c=euGZstcaTDllvimEN8b7jXrwqOf-v5A_CdpgnVfiiMM&r=sWsgk3pKkv5GeIDM2RZlPY8TjNFU2D0oBeOj6QNBadE&m=ecX7IwfatUO7SNPiyQ6x_8K9t2eWJf3y8GNuNHJ_0W0&s=6CysfSY0OoAenEwCzigeyPOb8vyaa4GgzkJSR-ny83U&e=
> > [3] https://urldefense.proofpoint.com/v2/url?u=https-3A__lore.kernel.org_all_YqEO3-252FKekkZhVjW-2B-40oden.dyn.berto.se_&d=DwIDAw&c=euGZstcaTDllvimEN8b7jXrwqOf-v5A_CdpgnVfiiMM&r=sWsgk3pKkv5GeIDM2RZlPY8TjNFU2D0oBeOj6QNBadE&m=ecX7IwfatUO7SNPiyQ6x_8K9t2eWJf3y8GNuNHJ_0W0&s=67JE_QR4x7omrtC7wzbpn2OgW75TAR80-R8WQyE-bVo&e=
> 
> 4. https://urldefense.proofpoint.com/v2/url?u=https-3A__lore.kernel.org_all_1fddc966-2D5a23-2D63b4-2D185e-2Dc17aa6d65b54-40xs4all.nl_&d=DwIDAw&c=euGZstcaTDllvimEN8b7jXrwqOf-v5A_CdpgnVfiiMM&r=sWsgk3pKkv5GeIDM2RZlPY8TjNFU2D0oBeOj6QNBadE&m=ecX7IwfatUO7SNPiyQ6x_8K9t2eWJf3y8GNuNHJ_0W0&s=I18yWgde2UKZY4AiwB5s-Lf12eebHOcHFZFOlTcO2oQ&e=
> 
> -- 
> Kind Regards,
> Niklas Söderlund
diff mbox series

Patch

diff --git a/drivers/media/platform/renesas/rcar-vin/rcar-dma.c b/drivers/media/platform/renesas/rcar-vin/rcar-dma.c
index 2272f1c..596a367 100644
--- a/drivers/media/platform/renesas/rcar-vin/rcar-dma.c
+++ b/drivers/media/platform/renesas/rcar-vin/rcar-dma.c
@@ -13,6 +13,7 @@ 
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
+#include <media/v4l2-event.h>
 
 #include <media/videobuf2-dma-contig.h>
 
@@ -1060,6 +1061,9 @@  static irqreturn_t rvin_irq(int irq, void *data)
 		vin_dbg(vin, "Dropping frame %u\n", vin->sequence);
 	}
 
+	cancel_delayed_work(&vin->frame_timeout);
+	schedule_delayed_work(&vin->frame_timeout, msecs_to_jiffies(FRAME_TIMEOUT_MS));
+
 	vin->sequence++;
 
 	/* Prepare for next frame */
@@ -1283,6 +1287,7 @@  int rvin_start_streaming(struct rvin_dev *vin)
 	spin_lock_irqsave(&vin->qlock, flags);
 
 	vin->sequence = 0;
+	vin->xfer_error = false;
 
 	ret = rvin_capture_start(vin);
 	if (ret)
@@ -1290,6 +1295,10 @@  int rvin_start_streaming(struct rvin_dev *vin)
 
 	spin_unlock_irqrestore(&vin->qlock, flags);
 
+	/* We start the frame watchdog only after we have successfully started streaming */
+	if (!ret)
+		schedule_delayed_work(&vin->frame_timeout, msecs_to_jiffies(FRAME_TIMEOUT_MS));
+
 	return ret;
 }
 
@@ -1332,6 +1341,12 @@  void rvin_stop_streaming(struct rvin_dev *vin)
 	}
 
 	vin->state = STOPPING;
+	/*
+	 * Since we are now stopping and don't expect more frames to be captured, make sure that
+	 * there is no pending work for error handling.
+	 */
+	cancel_delayed_work_sync(&vin->frame_timeout);
+	vin->xfer_error = false;
 
 	/* Wait until only scratch buffer is used, max 3 interrupts. */
 	retries = 0;
@@ -1424,6 +1439,23 @@  void rvin_dma_unregister(struct rvin_dev *vin)
 	v4l2_device_unregister(&vin->v4l2_dev);
 }
 
+static void rvin_frame_timeout(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct rvin_dev *vin = container_of(dwork, struct rvin_dev, frame_timeout);
+	struct v4l2_event event = {
+		.type = V4L2_EVENT_XFER_ERROR,
+	};
+
+	vin_dbg(vin, "Frame timeout!\n");
+
+	if (!vin->xfer_error)
+		return;
+	vin_err(vin, "Unrecoverable transfer error detected, stopping streaming\n");
+	vb2_queue_error(&vin->queue);
+	v4l2_event_queue(&vin->vdev, &event);
+}
+
 int rvin_dma_register(struct rvin_dev *vin, int irq)
 {
 	struct vb2_queue *q = &vin->queue;
@@ -1470,6 +1502,8 @@  int rvin_dma_register(struct rvin_dev *vin, int irq)
 		goto error;
 	}
 
+	INIT_DELAYED_WORK(&vin->frame_timeout, rvin_frame_timeout);
+
 	return 0;
 error:
 	rvin_dma_unregister(vin);
diff --git a/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c b/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c
index 2e2aa9d..bd7f6fe2 100644
--- a/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c
+++ b/drivers/media/platform/renesas/rcar-vin/rcar-v4l2.c
@@ -648,6 +648,8 @@  static int rvin_subscribe_event(struct v4l2_fh *fh,
 	switch (sub->type) {
 	case V4L2_EVENT_SOURCE_CHANGE:
 		return v4l2_event_subscribe(fh, sub, 4, NULL);
+	case V4L2_EVENT_XFER_ERROR:
+		return v4l2_event_subscribe(fh, sub, 1, NULL);
 	}
 	return v4l2_ctrl_subscribe_event(fh, sub);
 }
@@ -1000,9 +1002,23 @@  void rvin_v4l2_unregister(struct rvin_dev *vin)
 static void rvin_notify_video_device(struct rvin_dev *vin,
 				     unsigned int notification, void *arg)
 {
+	const struct v4l2_event *event;
+
 	switch (notification) {
 	case V4L2_DEVICE_NOTIFY_EVENT:
-		v4l2_event_queue(&vin->vdev, arg);
+		event = arg;
+
+		switch (event->type) {
+		case V4L2_EVENT_XFER_ERROR:
+			if (vin->state != STOPPED && vin->state != STOPPING) {
+				vin_dbg(vin, "Subdevice signaled transfer error.\n");
+				vin->xfer_error = true;
+			}
+			break;
+		default:
+			break;
+		}
+
 		break;
 	default:
 		break;
diff --git a/drivers/media/platform/renesas/rcar-vin/rcar-vin.h b/drivers/media/platform/renesas/rcar-vin/rcar-vin.h
index 1f94589..4726a69 100644
--- a/drivers/media/platform/renesas/rcar-vin/rcar-vin.h
+++ b/drivers/media/platform/renesas/rcar-vin/rcar-vin.h
@@ -31,6 +31,9 @@ 
 /* Max number on VIN instances that can be in a system */
 #define RCAR_VIN_NUM 32
 
+/* maximum time we wait before signalling an error to userspace */
+#define FRAME_TIMEOUT_MS 1000
+
 struct rvin_group;
 
 enum model_id {
@@ -207,6 +210,8 @@  struct rvin_info {
  * @std:		active video standard of the video source
  *
  * @alpha:		Alpha component to fill in for supported pixel formats
+ * @xfer_error:		Indicates if any transfer errors occurred in the current streaming session.
+ * @frame_timeout:	Watchdog for monitoring regular capturing of frames in rvin_irq.
  */
 struct rvin_dev {
 	struct device *dev;
@@ -251,6 +256,8 @@  struct rvin_dev {
 	v4l2_std_id std;
 
 	unsigned int alpha;
+	bool xfer_error;
+	struct delayed_work frame_timeout;
 };
 
 #define vin_to_source(vin)		((vin)->parallel.subdev)