diff mbox series

[v5,6/9] media: uvcvideo: Move decode processing to process context

Message ID 926c5590f922810b52851415803c01e32ebeae7b.1541534872.git-series.kieran.bingham@ideasonboard.com (mailing list archive)
State New, archived
Headers show
Series Asynchronous UVC | expand

Commit Message

Kieran Bingham Nov. 6, 2018, 9:27 p.m. UTC
From: Kieran Bingham <kieran.bingham@ideasonboard.com>

Newer high definition cameras, and cameras with multiple lenses such as
the range of stereo-vision cameras now available have ever increasing
data rates.

The inclusion of a variable length packet header in URB packets mean
that we must memcpy the frame data out to our destination 'manually'.
This can result in data rates of up to 2 gigabits per second being
processed.

To improve efficiency, and maximise throughput, handle the URB decode
processing through a work queue to move it from interrupt context, and
allow multiple processors to work on URBs in parallel.

Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>

---
v2:
 - Lock full critical section of usb_submit_urb()

v3:
 - Fix race on submitting uvc_video_decode_data_work() to work queue.
 - Rename uvc_decode_op -> uvc_copy_op (Generic to encode/decode)
 - Rename decodes -> copy_operations
 - Don't queue work if there is no async task
 - obtain copy op structure directly in uvc_video_decode_data()
 - uvc_video_decode_data_work() -> uvc_video_copy_data_work()

v4:
 - Provide for_each_uvc_urb()
 - Simplify fix for shutdown race to flush queue before freeing URBs
 - Rebase to v4.16-rc4 (linux-media/master) adjusting for metadata
   conflicts.

v5:
 - Rebase to media/v4.20-2
 - Use GFP_KERNEL allocation in uvc_video_copy_data_work()
 - Fix function documentation for uvc_video_copy_data_work()
 - Add periods to the end of sentences
 - Rename 'decode' variable to 'op' in uvc_video_decode_data()
 - Move uvc_urb->async_operations initialisation to before use
 - Move async workqueue to match uvc_streaming lifetime instead of
   streamon/streamoff

 drivers/media/usb/uvc/uvc_driver.c |   2 +-
 drivers/media/usb/uvc/uvc_video.c  | 110 +++++++++++++++++++++++-------
 drivers/media/usb/uvc/uvcvideo.h   |  28 ++++++++-
 3 files changed, 116 insertions(+), 24 deletions(-)

Comments

Laurent Pinchart Nov. 6, 2018, 10:58 p.m. UTC | #1
Hi Kieran,

Thank you for the patch.

On Tuesday, 6 November 2018 23:27:17 EET Kieran Bingham wrote:
> From: Kieran Bingham <kieran.bingham@ideasonboard.com>
> 
> Newer high definition cameras, and cameras with multiple lenses such as
> the range of stereo-vision cameras now available have ever increasing
> data rates.
> 
> The inclusion of a variable length packet header in URB packets mean
> that we must memcpy the frame data out to our destination 'manually'.
> This can result in data rates of up to 2 gigabits per second being
> processed.
> 
> To improve efficiency, and maximise throughput, handle the URB decode
> processing through a work queue to move it from interrupt context, and
> allow multiple processors to work on URBs in parallel.
> 
> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>

Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>

I wonder if we shouldn't, as a future improvement, only queue async work when 
the quantity of data to be copied is above a certain threshold.

> ---
> v2:
>  - Lock full critical section of usb_submit_urb()
> 
> v3:
>  - Fix race on submitting uvc_video_decode_data_work() to work queue.
>  - Rename uvc_decode_op -> uvc_copy_op (Generic to encode/decode)
>  - Rename decodes -> copy_operations
>  - Don't queue work if there is no async task
>  - obtain copy op structure directly in uvc_video_decode_data()
>  - uvc_video_decode_data_work() -> uvc_video_copy_data_work()
> 
> v4:
>  - Provide for_each_uvc_urb()
>  - Simplify fix for shutdown race to flush queue before freeing URBs
>  - Rebase to v4.16-rc4 (linux-media/master) adjusting for metadata
>    conflicts.
> 
> v5:
>  - Rebase to media/v4.20-2
>  - Use GFP_KERNEL allocation in uvc_video_copy_data_work()
>  - Fix function documentation for uvc_video_copy_data_work()
>  - Add periods to the end of sentences
>  - Rename 'decode' variable to 'op' in uvc_video_decode_data()
>  - Move uvc_urb->async_operations initialisation to before use
>  - Move async workqueue to match uvc_streaming lifetime instead of
>    streamon/streamoff
> 
>  drivers/media/usb/uvc/uvc_driver.c |   2 +-
>  drivers/media/usb/uvc/uvc_video.c  | 110 +++++++++++++++++++++++-------
>  drivers/media/usb/uvc/uvcvideo.h   |  28 ++++++++-
>  3 files changed, 116 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/media/usb/uvc/uvc_driver.c
> b/drivers/media/usb/uvc/uvc_driver.c index bc369a0934a3..e61a6d26e812
> 100644
> --- a/drivers/media/usb/uvc/uvc_driver.c
> +++ b/drivers/media/usb/uvc/uvc_driver.c
> @@ -1883,6 +1883,8 @@ static void uvc_unregister_video(struct uvc_device
> *dev) video_unregister_device(&stream->vdev);
>  		video_unregister_device(&stream->meta.vdev);
> 
> +		destroy_workqueue(stream->async_wq);
> +
>  		uvc_debugfs_cleanup_stream(stream);
>  	}
>  }
> diff --git a/drivers/media/usb/uvc/uvc_video.c
> b/drivers/media/usb/uvc/uvc_video.c index 7a7779e1b466..ce9e40444507 100644
> --- a/drivers/media/usb/uvc/uvc_video.c
> +++ b/drivers/media/usb/uvc/uvc_video.c
> @@ -1094,21 +1094,54 @@ static int uvc_video_decode_start(struct
> uvc_streaming *stream, return data[0];
>  }
> 
> -static void uvc_video_decode_data(struct uvc_streaming *stream,
> +/*
> + * uvc_video_decode_data_work: Asynchronous memcpy processing
> + *
> + * Copy URB data to video buffers in process context, releasing buffer
> + * references and requeuing the URB when done.
> + */
> +static void uvc_video_copy_data_work(struct work_struct *work)
> +{
> +	struct uvc_urb *uvc_urb = container_of(work, struct uvc_urb, work);
> +	unsigned int i;
> +	int ret;
> +
> +	for (i = 0; i < uvc_urb->async_operations; i++) {
> +		struct uvc_copy_op *op = &uvc_urb->copy_operations[i];
> +
> +		memcpy(op->dst, op->src, op->len);
> +
> +		/* Release reference taken on this buffer. */
> +		uvc_queue_buffer_release(op->buf);
> +	}
> +
> +	ret = usb_submit_urb(uvc_urb->urb, GFP_KERNEL);
> +	if (ret < 0)
> +		uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n",
> +			   ret);
> +}
> +
> +static void uvc_video_decode_data(struct uvc_urb *uvc_urb,
>  		struct uvc_buffer *buf, const u8 *data, int len)
>  {
> -	unsigned int maxlen, nbytes;
> -	void *mem;
> +	unsigned int active_op = uvc_urb->async_operations;
> +	struct uvc_copy_op *op = &uvc_urb->copy_operations[active_op];
> +	unsigned int maxlen;
> 
>  	if (len <= 0)
>  		return;
> 
> -	/* Copy the video data to the buffer. */
>  	maxlen = buf->length - buf->bytesused;
> -	mem = buf->mem + buf->bytesused;
> -	nbytes = min((unsigned int)len, maxlen);
> -	memcpy(mem, data, nbytes);
> -	buf->bytesused += nbytes;
> +
> +	/* Take a buffer reference for async work. */
> +	kref_get(&buf->ref);
> +
> +	op->buf = buf;
> +	op->src = data;
> +	op->dst = buf->mem + buf->bytesused;
> +	op->len = min_t(unsigned int, len, maxlen);
> +
> +	buf->bytesused += op->len;
> 
>  	/* Complete the current frame if the buffer size was exceeded. */
>  	if (len > maxlen) {
> @@ -1116,6 +1149,8 @@ static void uvc_video_decode_data(struct uvc_streaming
> *stream, buf->error = 1;
>  		buf->state = UVC_BUF_STATE_READY;
>  	}
> +
> +	uvc_urb->async_operations++;
>  }
> 
>  static void uvc_video_decode_end(struct uvc_streaming *stream,
> @@ -1324,7 +1359,7 @@ static void uvc_video_decode_isoc(struct uvc_urb
> *uvc_urb, uvc_video_decode_meta(stream, meta_buf, mem, ret);
> 
>  		/* Decode the payload data. */
> -		uvc_video_decode_data(stream, buf, mem + ret,
> +		uvc_video_decode_data(uvc_urb, buf, mem + ret,
>  			urb->iso_frame_desc[i].actual_length - ret);
> 
>  		/* Process the header again. */
> @@ -1384,9 +1419,9 @@ static void uvc_video_decode_bulk(struct uvc_urb
> *uvc_urb, * sure buf is never dereferenced if NULL.
>  	 */
> 
> -	/* Process video data. */
> +	/* Prepare video data for processing. */
>  	if (!stream->bulk.skip_payload && buf != NULL)
> -		uvc_video_decode_data(stream, buf, mem, len);
> +		uvc_video_decode_data(uvc_urb, buf, mem, len);
> 
>  	/* Detect the payload end by a URB smaller than the maximum size (or
>  	 * a payload size equal to the maximum) and process the header again.
> @@ -1472,7 +1507,7 @@ static void uvc_video_complete(struct urb *urb)
>  		uvc_printk(KERN_WARNING, "Non-zero status (%d) in video "
>  			"completion handler.\n", urb->status);
>  		/* fall through */
> -	case -ENOENT:		/* usb_kill_urb() called. */
> +	case -ENOENT:		/* usb_poison_urb() called. */
>  		if (stream->frozen)
>  			return;
>  		/* fall through */
> @@ -1494,12 +1529,26 @@ static void uvc_video_complete(struct urb *urb)
>  		spin_unlock_irqrestore(&qmeta->irqlock, flags);
>  	}
> 
> +	/* Re-initialise the URB async work. */
> +	uvc_urb->async_operations = 0;
> +
> +	/*
> +	 * Process the URB headers, and optionally queue expensive memcpy tasks
> +	 * to be deferred to a work queue.
> +	 */
>  	stream->decode(uvc_urb, buf, buf_meta);
> 
> -	if ((ret = usb_submit_urb(urb, GFP_ATOMIC)) < 0) {
> -		uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n",
> -			ret);
> +	/* If no async work is needed, resubmit the URB immediately. */
> +	if (!uvc_urb->async_operations) {
> +		ret = usb_submit_urb(uvc_urb->urb, GFP_ATOMIC);
> +		if (ret < 0)
> +			uvc_printk(KERN_ERR,
> +				   "Failed to resubmit video URB (%d).\n",
> +				   ret);
> +		return;
>  	}
> +
> +	queue_work(stream->async_wq, &uvc_urb->work);
>  }
> 
>  /*
> @@ -1594,20 +1643,22 @@ static int uvc_alloc_urb_buffers(struct
> uvc_streaming *stream, */
>  static void uvc_uninit_video(struct uvc_streaming *stream, int
> free_buffers) {
> -	struct urb *urb;
> -	unsigned int i;
> +	struct uvc_urb *uvc_urb;
> 
>  	uvc_video_stats_stop(stream);
> 
> -	for (i = 0; i < UVC_URBS; ++i) {
> -		struct uvc_urb *uvc_urb = &stream->uvc_urb[i];
> +	/*
> +	 * We must poison the URBs rather than kill them to ensure that even
> +	 * after the completion handler returns, any asynchronous workqueues
> +	 * will be prevented from resubmitting the URBs.
> +	 */
> +	for_each_uvc_urb(uvc_urb, stream)
> +		usb_poison_urb(uvc_urb->urb);
> 
> -		urb = uvc_urb->urb;
> -		if (urb == NULL)
> -			continue;
> +	flush_workqueue(stream->async_wq);
> 
> -		usb_kill_urb(urb);
> -		usb_free_urb(urb);
> +	for_each_uvc_urb(uvc_urb, stream) {
> +		usb_free_urb(uvc_urb->urb);
>  		uvc_urb->urb = NULL;
>  	}
> 
> @@ -1932,6 +1983,7 @@ int uvc_video_init(struct uvc_streaming *stream)
>  	struct uvc_streaming_control *probe = &stream->ctrl;
>  	struct uvc_format *format = NULL;
>  	struct uvc_frame *frame = NULL;
> +	struct uvc_urb *uvc_urb;
>  	unsigned int i;
>  	int ret;
> 
> @@ -2017,6 +2069,16 @@ int uvc_video_init(struct uvc_streaming *stream)
>  		}
>  	}
> 
> +	/* Allocate a stream specific work queue for asynchronous tasks. */
> +	stream->async_wq = alloc_workqueue("uvcvideo", WQ_UNBOUND | WQ_HIGHPRI,
> +					   0);
> +	if (!stream->async_wq)
> +		return -ENOMEM;
> +
> +	/* Prepare asynchronous work items. */
> +	for_each_uvc_urb(uvc_urb, stream)
> +		INIT_WORK(&uvc_urb->work, uvc_video_copy_data_work);
> +
>  	return 0;
>  }
> 
> diff --git a/drivers/media/usb/uvc/uvcvideo.h
> b/drivers/media/usb/uvc/uvcvideo.h index 1bc17da7f3d4..0953e2e59a79 100644
> --- a/drivers/media/usb/uvc/uvcvideo.h
> +++ b/drivers/media/usb/uvc/uvcvideo.h
> @@ -491,12 +491,30 @@ struct uvc_stats_stream {
>  #define UVC_METATADA_BUF_SIZE 1024
> 
>  /**
> + * struct uvc_copy_op: Context structure to schedule asynchronous memcpy
> + *
> + * @buf: active buf object for this operation
> + * @dst: copy destination address
> + * @src: copy source address
> + * @len: copy length
> + */
> +struct uvc_copy_op {
> +	struct uvc_buffer *buf;
> +	void *dst;
> +	const __u8 *src;
> +	size_t len;
> +};
> +
> +/**
>   * struct uvc_urb - URB context management structure
>   *
>   * @urb: the URB described by this context structure
>   * @stream: UVC streaming context
>   * @buffer: memory storage for the URB
>   * @dma: DMA coherent addressing for the urb_buffer
> + * @async_operations: counter to indicate the number of copy operations
> + * @copy_operations: work descriptors for asynchronous copy operations
> + * @work: work queue entry for asynchronous decode
>   */
>  struct uvc_urb {
>  	struct urb *urb;
> @@ -504,6 +522,10 @@ struct uvc_urb {
> 
>  	char *buffer;
>  	dma_addr_t dma;
> +
> +	unsigned int async_operations;
> +	struct uvc_copy_op copy_operations[UVC_MAX_PACKETS];
> +	struct work_struct work;
>  };
> 
>  struct uvc_streaming {
> @@ -536,6 +558,7 @@ struct uvc_streaming {
>  	/* Buffers queue. */
>  	unsigned int frozen : 1;
>  	struct uvc_video_queue queue;
> +	struct workqueue_struct *async_wq;
>  	void (*decode)(struct uvc_urb *uvc_urb, struct uvc_buffer *buf,
>  		       struct uvc_buffer *meta_buf);
> 
> @@ -589,6 +612,11 @@ struct uvc_streaming {
>  	} clock;
>  };
> 
> +#define for_each_uvc_urb(uvc_urb, uvc_streaming) \
> +	for ((uvc_urb) = &(uvc_streaming)->uvc_urb[0]; \
> +	     (uvc_urb) < &(uvc_streaming)->uvc_urb[UVC_URBS]; \
> +	     ++(uvc_urb))
> +
>  struct uvc_device_info {
>  	u32	quirks;
>  	u32	meta_format;
Kieran Bingham Nov. 7, 2018, 12:22 p.m. UTC | #2
On 06/11/2018 22:58, Laurent Pinchart wrote:
> Hi Kieran,
> 
> Thank you for the patch.
> 
> On Tuesday, 6 November 2018 23:27:17 EET Kieran Bingham wrote:
>> From: Kieran Bingham <kieran.bingham@ideasonboard.com>
>>
>> Newer high definition cameras, and cameras with multiple lenses such as
>> the range of stereo-vision cameras now available have ever increasing
>> data rates.
>>
>> The inclusion of a variable length packet header in URB packets mean
>> that we must memcpy the frame data out to our destination 'manually'.
>> This can result in data rates of up to 2 gigabits per second being
>> processed.
>>
>> To improve efficiency, and maximise throughput, handle the URB decode
>> processing through a work queue to move it from interrupt context, and
>> allow multiple processors to work on URBs in parallel.
>>
>> Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
> 
> Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> 
> I wonder if we shouldn't, as a future improvement, only queue async work when 
> the quantity of data to be copied is above a certain threshold.


Possibly - lets keep it in mind for when we get back to looking at
Keiichi's patch and any cache management for further performance
improvements

--
Kieran

> 
>> ---
>> v2:
>>  - Lock full critical section of usb_submit_urb()
>>
>> v3:
>>  - Fix race on submitting uvc_video_decode_data_work() to work queue.
>>  - Rename uvc_decode_op -> uvc_copy_op (Generic to encode/decode)
>>  - Rename decodes -> copy_operations
>>  - Don't queue work if there is no async task
>>  - obtain copy op structure directly in uvc_video_decode_data()
>>  - uvc_video_decode_data_work() -> uvc_video_copy_data_work()
>>
>> v4:
>>  - Provide for_each_uvc_urb()
>>  - Simplify fix for shutdown race to flush queue before freeing URBs
>>  - Rebase to v4.16-rc4 (linux-media/master) adjusting for metadata
>>    conflicts.
>>
>> v5:
>>  - Rebase to media/v4.20-2
>>  - Use GFP_KERNEL allocation in uvc_video_copy_data_work()
>>  - Fix function documentation for uvc_video_copy_data_work()
>>  - Add periods to the end of sentences
>>  - Rename 'decode' variable to 'op' in uvc_video_decode_data()
>>  - Move uvc_urb->async_operations initialisation to before use
>>  - Move async workqueue to match uvc_streaming lifetime instead of
>>    streamon/streamoff
>>
>>  drivers/media/usb/uvc/uvc_driver.c |   2 +-
>>  drivers/media/usb/uvc/uvc_video.c  | 110 +++++++++++++++++++++++-------
>>  drivers/media/usb/uvc/uvcvideo.h   |  28 ++++++++-
>>  3 files changed, 116 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/media/usb/uvc/uvc_driver.c
>> b/drivers/media/usb/uvc/uvc_driver.c index bc369a0934a3..e61a6d26e812
>> 100644
>> --- a/drivers/media/usb/uvc/uvc_driver.c
>> +++ b/drivers/media/usb/uvc/uvc_driver.c
>> @@ -1883,6 +1883,8 @@ static void uvc_unregister_video(struct uvc_device
>> *dev) video_unregister_device(&stream->vdev);
>>  		video_unregister_device(&stream->meta.vdev);
>>
>> +		destroy_workqueue(stream->async_wq);
>> +
>>  		uvc_debugfs_cleanup_stream(stream);
>>  	}
>>  }
>> diff --git a/drivers/media/usb/uvc/uvc_video.c
>> b/drivers/media/usb/uvc/uvc_video.c index 7a7779e1b466..ce9e40444507 100644
>> --- a/drivers/media/usb/uvc/uvc_video.c
>> +++ b/drivers/media/usb/uvc/uvc_video.c
>> @@ -1094,21 +1094,54 @@ static int uvc_video_decode_start(struct
>> uvc_streaming *stream, return data[0];
>>  }
>>
>> -static void uvc_video_decode_data(struct uvc_streaming *stream,
>> +/*
>> + * uvc_video_decode_data_work: Asynchronous memcpy processing
>> + *
>> + * Copy URB data to video buffers in process context, releasing buffer
>> + * references and requeuing the URB when done.
>> + */
>> +static void uvc_video_copy_data_work(struct work_struct *work)
>> +{
>> +	struct uvc_urb *uvc_urb = container_of(work, struct uvc_urb, work);
>> +	unsigned int i;
>> +	int ret;
>> +
>> +	for (i = 0; i < uvc_urb->async_operations; i++) {
>> +		struct uvc_copy_op *op = &uvc_urb->copy_operations[i];
>> +
>> +		memcpy(op->dst, op->src, op->len);
>> +
>> +		/* Release reference taken on this buffer. */
>> +		uvc_queue_buffer_release(op->buf);
>> +	}
>> +
>> +	ret = usb_submit_urb(uvc_urb->urb, GFP_KERNEL);
>> +	if (ret < 0)
>> +		uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n",
>> +			   ret);
>> +}
>> +
>> +static void uvc_video_decode_data(struct uvc_urb *uvc_urb,
>>  		struct uvc_buffer *buf, const u8 *data, int len)
>>  {
>> -	unsigned int maxlen, nbytes;
>> -	void *mem;
>> +	unsigned int active_op = uvc_urb->async_operations;
>> +	struct uvc_copy_op *op = &uvc_urb->copy_operations[active_op];
>> +	unsigned int maxlen;
>>
>>  	if (len <= 0)
>>  		return;
>>
>> -	/* Copy the video data to the buffer. */
>>  	maxlen = buf->length - buf->bytesused;
>> -	mem = buf->mem + buf->bytesused;
>> -	nbytes = min((unsigned int)len, maxlen);
>> -	memcpy(mem, data, nbytes);
>> -	buf->bytesused += nbytes;
>> +
>> +	/* Take a buffer reference for async work. */
>> +	kref_get(&buf->ref);
>> +
>> +	op->buf = buf;
>> +	op->src = data;
>> +	op->dst = buf->mem + buf->bytesused;
>> +	op->len = min_t(unsigned int, len, maxlen);
>> +
>> +	buf->bytesused += op->len;
>>
>>  	/* Complete the current frame if the buffer size was exceeded. */
>>  	if (len > maxlen) {
>> @@ -1116,6 +1149,8 @@ static void uvc_video_decode_data(struct uvc_streaming
>> *stream, buf->error = 1;
>>  		buf->state = UVC_BUF_STATE_READY;
>>  	}
>> +
>> +	uvc_urb->async_operations++;
>>  }
>>
>>  static void uvc_video_decode_end(struct uvc_streaming *stream,
>> @@ -1324,7 +1359,7 @@ static void uvc_video_decode_isoc(struct uvc_urb
>> *uvc_urb, uvc_video_decode_meta(stream, meta_buf, mem, ret);
>>
>>  		/* Decode the payload data. */
>> -		uvc_video_decode_data(stream, buf, mem + ret,
>> +		uvc_video_decode_data(uvc_urb, buf, mem + ret,
>>  			urb->iso_frame_desc[i].actual_length - ret);
>>
>>  		/* Process the header again. */
>> @@ -1384,9 +1419,9 @@ static void uvc_video_decode_bulk(struct uvc_urb
>> *uvc_urb, * sure buf is never dereferenced if NULL.
>>  	 */
>>
>> -	/* Process video data. */
>> +	/* Prepare video data for processing. */
>>  	if (!stream->bulk.skip_payload && buf != NULL)
>> -		uvc_video_decode_data(stream, buf, mem, len);
>> +		uvc_video_decode_data(uvc_urb, buf, mem, len);
>>
>>  	/* Detect the payload end by a URB smaller than the maximum size (or
>>  	 * a payload size equal to the maximum) and process the header again.
>> @@ -1472,7 +1507,7 @@ static void uvc_video_complete(struct urb *urb)
>>  		uvc_printk(KERN_WARNING, "Non-zero status (%d) in video "
>>  			"completion handler.\n", urb->status);
>>  		/* fall through */
>> -	case -ENOENT:		/* usb_kill_urb() called. */
>> +	case -ENOENT:		/* usb_poison_urb() called. */
>>  		if (stream->frozen)
>>  			return;
>>  		/* fall through */
>> @@ -1494,12 +1529,26 @@ static void uvc_video_complete(struct urb *urb)
>>  		spin_unlock_irqrestore(&qmeta->irqlock, flags);
>>  	}
>>
>> +	/* Re-initialise the URB async work. */
>> +	uvc_urb->async_operations = 0;
>> +
>> +	/*
>> +	 * Process the URB headers, and optionally queue expensive memcpy tasks
>> +	 * to be deferred to a work queue.
>> +	 */
>>  	stream->decode(uvc_urb, buf, buf_meta);
>>
>> -	if ((ret = usb_submit_urb(urb, GFP_ATOMIC)) < 0) {
>> -		uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n",
>> -			ret);
>> +	/* If no async work is needed, resubmit the URB immediately. */
>> +	if (!uvc_urb->async_operations) {
>> +		ret = usb_submit_urb(uvc_urb->urb, GFP_ATOMIC);
>> +		if (ret < 0)
>> +			uvc_printk(KERN_ERR,
>> +				   "Failed to resubmit video URB (%d).\n",
>> +				   ret);
>> +		return;
>>  	}
>> +
>> +	queue_work(stream->async_wq, &uvc_urb->work);
>>  }
>>
>>  /*
>> @@ -1594,20 +1643,22 @@ static int uvc_alloc_urb_buffers(struct
>> uvc_streaming *stream, */
>>  static void uvc_uninit_video(struct uvc_streaming *stream, int
>> free_buffers) {
>> -	struct urb *urb;
>> -	unsigned int i;
>> +	struct uvc_urb *uvc_urb;
>>
>>  	uvc_video_stats_stop(stream);
>>
>> -	for (i = 0; i < UVC_URBS; ++i) {
>> -		struct uvc_urb *uvc_urb = &stream->uvc_urb[i];
>> +	/*
>> +	 * We must poison the URBs rather than kill them to ensure that even
>> +	 * after the completion handler returns, any asynchronous workqueues
>> +	 * will be prevented from resubmitting the URBs.
>> +	 */
>> +	for_each_uvc_urb(uvc_urb, stream)
>> +		usb_poison_urb(uvc_urb->urb);
>>
>> -		urb = uvc_urb->urb;
>> -		if (urb == NULL)
>> -			continue;
>> +	flush_workqueue(stream->async_wq);
>>
>> -		usb_kill_urb(urb);
>> -		usb_free_urb(urb);
>> +	for_each_uvc_urb(uvc_urb, stream) {
>> +		usb_free_urb(uvc_urb->urb);
>>  		uvc_urb->urb = NULL;
>>  	}
>>
>> @@ -1932,6 +1983,7 @@ int uvc_video_init(struct uvc_streaming *stream)
>>  	struct uvc_streaming_control *probe = &stream->ctrl;
>>  	struct uvc_format *format = NULL;
>>  	struct uvc_frame *frame = NULL;
>> +	struct uvc_urb *uvc_urb;
>>  	unsigned int i;
>>  	int ret;
>>
>> @@ -2017,6 +2069,16 @@ int uvc_video_init(struct uvc_streaming *stream)
>>  		}
>>  	}
>>
>> +	/* Allocate a stream specific work queue for asynchronous tasks. */
>> +	stream->async_wq = alloc_workqueue("uvcvideo", WQ_UNBOUND | WQ_HIGHPRI,
>> +					   0);
>> +	if (!stream->async_wq)
>> +		return -ENOMEM;
>> +
>> +	/* Prepare asynchronous work items. */
>> +	for_each_uvc_urb(uvc_urb, stream)
>> +		INIT_WORK(&uvc_urb->work, uvc_video_copy_data_work);
>> +
>>  	return 0;
>>  }
>>
>> diff --git a/drivers/media/usb/uvc/uvcvideo.h
>> b/drivers/media/usb/uvc/uvcvideo.h index 1bc17da7f3d4..0953e2e59a79 100644
>> --- a/drivers/media/usb/uvc/uvcvideo.h
>> +++ b/drivers/media/usb/uvc/uvcvideo.h
>> @@ -491,12 +491,30 @@ struct uvc_stats_stream {
>>  #define UVC_METATADA_BUF_SIZE 1024
>>
>>  /**
>> + * struct uvc_copy_op: Context structure to schedule asynchronous memcpy
>> + *
>> + * @buf: active buf object for this operation
>> + * @dst: copy destination address
>> + * @src: copy source address
>> + * @len: copy length
>> + */
>> +struct uvc_copy_op {
>> +	struct uvc_buffer *buf;
>> +	void *dst;
>> +	const __u8 *src;
>> +	size_t len;
>> +};
>> +
>> +/**
>>   * struct uvc_urb - URB context management structure
>>   *
>>   * @urb: the URB described by this context structure
>>   * @stream: UVC streaming context
>>   * @buffer: memory storage for the URB
>>   * @dma: DMA coherent addressing for the urb_buffer
>> + * @async_operations: counter to indicate the number of copy operations
>> + * @copy_operations: work descriptors for asynchronous copy operations
>> + * @work: work queue entry for asynchronous decode
>>   */
>>  struct uvc_urb {
>>  	struct urb *urb;
>> @@ -504,6 +522,10 @@ struct uvc_urb {
>>
>>  	char *buffer;
>>  	dma_addr_t dma;
>> +
>> +	unsigned int async_operations;
>> +	struct uvc_copy_op copy_operations[UVC_MAX_PACKETS];
>> +	struct work_struct work;
>>  };
>>
>>  struct uvc_streaming {
>> @@ -536,6 +558,7 @@ struct uvc_streaming {
>>  	/* Buffers queue. */
>>  	unsigned int frozen : 1;
>>  	struct uvc_video_queue queue;
>> +	struct workqueue_struct *async_wq;
>>  	void (*decode)(struct uvc_urb *uvc_urb, struct uvc_buffer *buf,
>>  		       struct uvc_buffer *meta_buf);
>>
>> @@ -589,6 +612,11 @@ struct uvc_streaming {
>>  	} clock;
>>  };
>>
>> +#define for_each_uvc_urb(uvc_urb, uvc_streaming) \
>> +	for ((uvc_urb) = &(uvc_streaming)->uvc_urb[0]; \
>> +	     (uvc_urb) < &(uvc_streaming)->uvc_urb[UVC_URBS]; \
>> +	     ++(uvc_urb))
>> +
>>  struct uvc_device_info {
>>  	u32	quirks;
>>  	u32	meta_format;
> 
>
diff mbox series

Patch

diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c
index bc369a0934a3..e61a6d26e812 100644
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -1883,6 +1883,8 @@  static void uvc_unregister_video(struct uvc_device *dev)
 		video_unregister_device(&stream->vdev);
 		video_unregister_device(&stream->meta.vdev);
 
+		destroy_workqueue(stream->async_wq);
+
 		uvc_debugfs_cleanup_stream(stream);
 	}
 }
diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c
index 7a7779e1b466..ce9e40444507 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -1094,21 +1094,54 @@  static int uvc_video_decode_start(struct uvc_streaming *stream,
 	return data[0];
 }
 
-static void uvc_video_decode_data(struct uvc_streaming *stream,
+/*
+ * uvc_video_decode_data_work: Asynchronous memcpy processing
+ *
+ * Copy URB data to video buffers in process context, releasing buffer
+ * references and requeuing the URB when done.
+ */
+static void uvc_video_copy_data_work(struct work_struct *work)
+{
+	struct uvc_urb *uvc_urb = container_of(work, struct uvc_urb, work);
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < uvc_urb->async_operations; i++) {
+		struct uvc_copy_op *op = &uvc_urb->copy_operations[i];
+
+		memcpy(op->dst, op->src, op->len);
+
+		/* Release reference taken on this buffer. */
+		uvc_queue_buffer_release(op->buf);
+	}
+
+	ret = usb_submit_urb(uvc_urb->urb, GFP_KERNEL);
+	if (ret < 0)
+		uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n",
+			   ret);
+}
+
+static void uvc_video_decode_data(struct uvc_urb *uvc_urb,
 		struct uvc_buffer *buf, const u8 *data, int len)
 {
-	unsigned int maxlen, nbytes;
-	void *mem;
+	unsigned int active_op = uvc_urb->async_operations;
+	struct uvc_copy_op *op = &uvc_urb->copy_operations[active_op];
+	unsigned int maxlen;
 
 	if (len <= 0)
 		return;
 
-	/* Copy the video data to the buffer. */
 	maxlen = buf->length - buf->bytesused;
-	mem = buf->mem + buf->bytesused;
-	nbytes = min((unsigned int)len, maxlen);
-	memcpy(mem, data, nbytes);
-	buf->bytesused += nbytes;
+
+	/* Take a buffer reference for async work. */
+	kref_get(&buf->ref);
+
+	op->buf = buf;
+	op->src = data;
+	op->dst = buf->mem + buf->bytesused;
+	op->len = min_t(unsigned int, len, maxlen);
+
+	buf->bytesused += op->len;
 
 	/* Complete the current frame if the buffer size was exceeded. */
 	if (len > maxlen) {
@@ -1116,6 +1149,8 @@  static void uvc_video_decode_data(struct uvc_streaming *stream,
 		buf->error = 1;
 		buf->state = UVC_BUF_STATE_READY;
 	}
+
+	uvc_urb->async_operations++;
 }
 
 static void uvc_video_decode_end(struct uvc_streaming *stream,
@@ -1324,7 +1359,7 @@  static void uvc_video_decode_isoc(struct uvc_urb *uvc_urb,
 		uvc_video_decode_meta(stream, meta_buf, mem, ret);
 
 		/* Decode the payload data. */
-		uvc_video_decode_data(stream, buf, mem + ret,
+		uvc_video_decode_data(uvc_urb, buf, mem + ret,
 			urb->iso_frame_desc[i].actual_length - ret);
 
 		/* Process the header again. */
@@ -1384,9 +1419,9 @@  static void uvc_video_decode_bulk(struct uvc_urb *uvc_urb,
 	 * sure buf is never dereferenced if NULL.
 	 */
 
-	/* Process video data. */
+	/* Prepare video data for processing. */
 	if (!stream->bulk.skip_payload && buf != NULL)
-		uvc_video_decode_data(stream, buf, mem, len);
+		uvc_video_decode_data(uvc_urb, buf, mem, len);
 
 	/* Detect the payload end by a URB smaller than the maximum size (or
 	 * a payload size equal to the maximum) and process the header again.
@@ -1472,7 +1507,7 @@  static void uvc_video_complete(struct urb *urb)
 		uvc_printk(KERN_WARNING, "Non-zero status (%d) in video "
 			"completion handler.\n", urb->status);
 		/* fall through */
-	case -ENOENT:		/* usb_kill_urb() called. */
+	case -ENOENT:		/* usb_poison_urb() called. */
 		if (stream->frozen)
 			return;
 		/* fall through */
@@ -1494,12 +1529,26 @@  static void uvc_video_complete(struct urb *urb)
 		spin_unlock_irqrestore(&qmeta->irqlock, flags);
 	}
 
+	/* Re-initialise the URB async work. */
+	uvc_urb->async_operations = 0;
+
+	/*
+	 * Process the URB headers, and optionally queue expensive memcpy tasks
+	 * to be deferred to a work queue.
+	 */
 	stream->decode(uvc_urb, buf, buf_meta);
 
-	if ((ret = usb_submit_urb(urb, GFP_ATOMIC)) < 0) {
-		uvc_printk(KERN_ERR, "Failed to resubmit video URB (%d).\n",
-			ret);
+	/* If no async work is needed, resubmit the URB immediately. */
+	if (!uvc_urb->async_operations) {
+		ret = usb_submit_urb(uvc_urb->urb, GFP_ATOMIC);
+		if (ret < 0)
+			uvc_printk(KERN_ERR,
+				   "Failed to resubmit video URB (%d).\n",
+				   ret);
+		return;
 	}
+
+	queue_work(stream->async_wq, &uvc_urb->work);
 }
 
 /*
@@ -1594,20 +1643,22 @@  static int uvc_alloc_urb_buffers(struct uvc_streaming *stream,
  */
 static void uvc_uninit_video(struct uvc_streaming *stream, int free_buffers)
 {
-	struct urb *urb;
-	unsigned int i;
+	struct uvc_urb *uvc_urb;
 
 	uvc_video_stats_stop(stream);
 
-	for (i = 0; i < UVC_URBS; ++i) {
-		struct uvc_urb *uvc_urb = &stream->uvc_urb[i];
+	/*
+	 * We must poison the URBs rather than kill them to ensure that even
+	 * after the completion handler returns, any asynchronous workqueues
+	 * will be prevented from resubmitting the URBs.
+	 */
+	for_each_uvc_urb(uvc_urb, stream)
+		usb_poison_urb(uvc_urb->urb);
 
-		urb = uvc_urb->urb;
-		if (urb == NULL)
-			continue;
+	flush_workqueue(stream->async_wq);
 
-		usb_kill_urb(urb);
-		usb_free_urb(urb);
+	for_each_uvc_urb(uvc_urb, stream) {
+		usb_free_urb(uvc_urb->urb);
 		uvc_urb->urb = NULL;
 	}
 
@@ -1932,6 +1983,7 @@  int uvc_video_init(struct uvc_streaming *stream)
 	struct uvc_streaming_control *probe = &stream->ctrl;
 	struct uvc_format *format = NULL;
 	struct uvc_frame *frame = NULL;
+	struct uvc_urb *uvc_urb;
 	unsigned int i;
 	int ret;
 
@@ -2017,6 +2069,16 @@  int uvc_video_init(struct uvc_streaming *stream)
 		}
 	}
 
+	/* Allocate a stream specific work queue for asynchronous tasks. */
+	stream->async_wq = alloc_workqueue("uvcvideo", WQ_UNBOUND | WQ_HIGHPRI,
+					   0);
+	if (!stream->async_wq)
+		return -ENOMEM;
+
+	/* Prepare asynchronous work items. */
+	for_each_uvc_urb(uvc_urb, stream)
+		INIT_WORK(&uvc_urb->work, uvc_video_copy_data_work);
+
 	return 0;
 }
 
diff --git a/drivers/media/usb/uvc/uvcvideo.h b/drivers/media/usb/uvc/uvcvideo.h
index 1bc17da7f3d4..0953e2e59a79 100644
--- a/drivers/media/usb/uvc/uvcvideo.h
+++ b/drivers/media/usb/uvc/uvcvideo.h
@@ -491,12 +491,30 @@  struct uvc_stats_stream {
 #define UVC_METATADA_BUF_SIZE 1024
 
 /**
+ * struct uvc_copy_op: Context structure to schedule asynchronous memcpy
+ *
+ * @buf: active buf object for this operation
+ * @dst: copy destination address
+ * @src: copy source address
+ * @len: copy length
+ */
+struct uvc_copy_op {
+	struct uvc_buffer *buf;
+	void *dst;
+	const __u8 *src;
+	size_t len;
+};
+
+/**
  * struct uvc_urb - URB context management structure
  *
  * @urb: the URB described by this context structure
  * @stream: UVC streaming context
  * @buffer: memory storage for the URB
  * @dma: DMA coherent addressing for the urb_buffer
+ * @async_operations: counter to indicate the number of copy operations
+ * @copy_operations: work descriptors for asynchronous copy operations
+ * @work: work queue entry for asynchronous decode
  */
 struct uvc_urb {
 	struct urb *urb;
@@ -504,6 +522,10 @@  struct uvc_urb {
 
 	char *buffer;
 	dma_addr_t dma;
+
+	unsigned int async_operations;
+	struct uvc_copy_op copy_operations[UVC_MAX_PACKETS];
+	struct work_struct work;
 };
 
 struct uvc_streaming {
@@ -536,6 +558,7 @@  struct uvc_streaming {
 	/* Buffers queue. */
 	unsigned int frozen : 1;
 	struct uvc_video_queue queue;
+	struct workqueue_struct *async_wq;
 	void (*decode)(struct uvc_urb *uvc_urb, struct uvc_buffer *buf,
 		       struct uvc_buffer *meta_buf);
 
@@ -589,6 +612,11 @@  struct uvc_streaming {
 	} clock;
 };
 
+#define for_each_uvc_urb(uvc_urb, uvc_streaming) \
+	for ((uvc_urb) = &(uvc_streaming)->uvc_urb[0]; \
+	     (uvc_urb) < &(uvc_streaming)->uvc_urb[UVC_URBS]; \
+	     ++(uvc_urb))
+
 struct uvc_device_info {
 	u32	quirks;
 	u32	meta_format;