diff mbox

[v3,2/4] videobuf2-dma-streaming: new videobuf2 memory allocator

Message ID 1348484332-8106-2-git-send-email-federico.vaga@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Federico Vaga Sept. 24, 2012, 10:58 a.m. UTC
The DMA streaming allocator is similar to the DMA contig but it use the
DMA streaming interface (dma_map_single, dma_unmap_single). The
allocator allocates buffers and immediately map the memory for DMA
transfer. For each buffer prepare/finish it does a DMA synchronization.

Signed-off-by: Federico Vaga <federico.vaga@gmail.com>
---
 drivers/media/v4l2-core/Kconfig                   |   5 +
 drivers/media/v4l2-core/Makefile                  |   1 +
 drivers/media/v4l2-core/videobuf2-dma-streaming.c | 205 ++++++++++++++++++++++
 include/media/videobuf2-dma-streaming.h           |  32 ++++
 4 file modificati, 243 inserzioni(+)
 create mode 100644 drivers/media/v4l2-core/videobuf2-dma-streaming.c
 create mode 100644 include/media/videobuf2-dma-streaming.h

Comments

Marek Szyprowski Sept. 24, 2012, 12:44 p.m. UTC | #1
Hello,

On Monday, September 24, 2012 12:59 PM Federico Vaga wrote:

> The DMA streaming allocator is similar to the DMA contig but it use the
> DMA streaming interface (dma_map_single, dma_unmap_single). The
> allocator allocates buffers and immediately map the memory for DMA
> transfer. For each buffer prepare/finish it does a DMA synchronization.
> 
> Signed-off-by: Federico Vaga <federico.vaga@gmail.com>

Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>

> ---
>  drivers/media/v4l2-core/Kconfig                   |   5 +
>  drivers/media/v4l2-core/Makefile                  |   1 +
>  drivers/media/v4l2-core/videobuf2-dma-streaming.c | 205 ++++++++++++++++++++++
>  include/media/videobuf2-dma-streaming.h           |  32 ++++
>  4 file modificati, 243 inserzioni(+)
>  create mode 100644 drivers/media/v4l2-core/videobuf2-dma-streaming.c
>  create mode 100644 include/media/videobuf2-dma-streaming.h
> 
> diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig
> index 0c54e19..60548a7 100644
> --- a/drivers/media/v4l2-core/Kconfig
> +++ b/drivers/media/v4l2-core/Kconfig
> @@ -79,3 +79,8 @@ config VIDEOBUF2_DMA_SG
>  	#depends on HAS_DMA
>  	select VIDEOBUF2_CORE
>  	select VIDEOBUF2_MEMOPS
> +
> +config VIDEOBUF2_DMA_STREAMING
> +	select VIDEOBUF2_CORE
> +	select VIDEOBUF2_MEMOPS
> +	tristate
> diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
> index c2d61d4..0b2756f 100644
> --- a/drivers/media/v4l2-core/Makefile
> +++ b/drivers/media/v4l2-core/Makefile
> @@ -28,6 +28,7 @@ obj-$(CONFIG_VIDEOBUF2_MEMOPS) += videobuf2-memops.o
>  obj-$(CONFIG_VIDEOBUF2_VMALLOC) += videobuf2-vmalloc.o
>  obj-$(CONFIG_VIDEOBUF2_DMA_CONTIG) += videobuf2-dma-contig.o
>  obj-$(CONFIG_VIDEOBUF2_DMA_SG) += videobuf2-dma-sg.o
> +obj-$(CONFIG_VIDEOBUF2_DMA_STREAMING) += videobuf2-dma-streaming.o
> 
>  ccflags-y += -I$(srctree)/drivers/media/dvb-core
>  ccflags-y += -I$(srctree)/drivers/media/dvb-frontends
> diff --git a/drivers/media/v4l2-core/videobuf2-dma-streaming.c b/drivers/media/v4l2-
> core/videobuf2-dma-streaming.c
> new file mode 100644
> index 0000000..c839e05
> --- /dev/null
> +++ b/drivers/media/v4l2-core/videobuf2-dma-streaming.c
> @@ -0,0 +1,205 @@
> +/*
> + * videobuf2-dma-streaming.c - DMA streaming memory allocator for videobuf2
> + *
> + * Copyright (C) 2012 Federico Vaga <federico.vaga@gmail.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/pagemap.h>
> +#include <linux/dma-mapping.h>
> +
> +#include <media/videobuf2-core.h>
> +#include <media/videobuf2-dma-streaming.h>
> +#include <media/videobuf2-memops.h>
> +
> +struct vb2_streaming_conf {
> +	struct device			*dev;
> +};
> +struct vb2_streaming_buf {
> +	struct vb2_streaming_conf	*conf;
> +	void				*vaddr;
> +
> +	dma_addr_t			dma_handle;
> +
> +	unsigned long			size;
> +	struct vm_area_struct		*vma;
> +
> +	atomic_t			refcount;
> +	struct vb2_vmarea_handler	handler;
> +};
> +
> +static void vb2_dma_streaming_put(void *buf_priv)
> +{
> +	struct vb2_streaming_buf *buf = buf_priv;
> +
> +	if (atomic_dec_and_test(&buf->refcount)) {
> +		dma_unmap_single(buf->conf->dev, buf->dma_handle, buf->size,
> +				 DMA_FROM_DEVICE);
> +		free_pages_exact(buf->vaddr, buf->size);
> +		kfree(buf);
> +	}
> +
> +}
> +
> +static void *vb2_dma_streaming_alloc(void *alloc_ctx, unsigned long size)
> +{
> +	struct vb2_streaming_conf *conf = alloc_ctx;
> +	struct vb2_streaming_buf *buf;
> +	int err;
> +
> +	buf = kzalloc(sizeof(struct vb2_streaming_buf), GFP_KERNEL);
> +	if (!buf)
> +		return ERR_PTR(-ENOMEM);
> +	buf->vaddr = alloc_pages_exact(size, GFP_KERNEL | GFP_DMA);
> +	if (!buf->vaddr) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +	buf->dma_handle = dma_map_single(conf->dev, buf->vaddr, size,
> +					 DMA_FROM_DEVICE);
> +	err = dma_mapping_error(conf->dev, buf->dma_handle);
> +	if (err) {
> +		dev_err(conf->dev, "dma_map_single failed\n");
> +
> +		free_pages_exact(buf->vaddr, size);
> +		buf->vaddr = NULL;
> +		goto out_pages;
> +	}
> +	buf->conf = conf;
> +	buf->size = size;
> +	buf->handler.refcount = &buf->refcount;
> +	buf->handler.put = vb2_dma_streaming_put;
> +	buf->handler.arg = buf;
> +
> +	atomic_inc(&buf->refcount);
> +	return buf;
> +
> +out_pages:
> +	free_pages_exact(buf->vaddr, buf->size);
> +out:
> +	kfree(buf);
> +	return ERR_PTR(err);
> +}
> +
> +static void *vb2_dma_streaming_cookie(void *buf_priv)
> +{
> +	struct vb2_streaming_buf *buf = buf_priv;
> +
> +	return &buf->dma_handle;
> +}
> +
> +static void *vb2_dma_streaming_vaddr(void *buf_priv)
> +{
> +	struct vb2_streaming_buf *buf = buf_priv;
> +
> +	if (!buf)
> +		return NULL;
> +	return buf->vaddr;
> +}
> +
> +static unsigned int vb2_dma_streaming_num_users(void *buf_priv)
> +{
> +	struct vb2_streaming_buf *buf = buf_priv;
> +
> +	return atomic_read(&buf->refcount);
> +}
> +
> +static int vb2_dma_streaming_mmap(void *buf_priv, struct vm_area_struct *vma)
> +{
> +	struct vb2_streaming_buf *buf = buf_priv;
> +	unsigned long pos, start = vma->vm_start;
> +	unsigned long size;
> +	struct page *page;
> +	int err;
> +
> +	/* Try to remap memory */
> +	size = vma->vm_end - vma->vm_start;
> +	size = (size < buf->size) ? size : buf->size;
> +	pos = (unsigned long)buf->vaddr;
> +
> +	while (size > 0) {
> +		page = virt_to_page((void *)pos);
> +		if (!page) {
> +			dev_err(buf->conf->dev, "mmap: virt_to_page failed\n");
> +			return -ENOMEM;
> +		}
> +		err = vm_insert_page(vma, start, page);
> +		if (err) {
> +			dev_err(buf->conf->dev, "mmap: insert failed %d\n", err);
> +			return -ENOMEM;
> +		}
> +		start += PAGE_SIZE;
> +		pos += PAGE_SIZE;
> +
> +		if (size > PAGE_SIZE)
> +			size -= PAGE_SIZE;
> +		else
> +			size = 0;
> +	}
> +
> +
> +	vma->vm_ops = &vb2_common_vm_ops;
> +	vma->vm_flags |= VM_DONTEXPAND;
> +	vma->vm_private_data = &buf->handler;
> +
> +	vma->vm_ops->open(vma);
> +
> +	return 0;
> +}
> +
> +static void vb2_dma_streaming_prepare(void *buf_priv)
> +{
> +	struct vb2_streaming_buf *buf = buf_priv;
> +
> +	dma_sync_single_for_device(buf->conf->dev, buf->dma_handle,
> +				   buf->size, DMA_FROM_DEVICE);
> +}
> +
> +static void vb2_dma_streaming_finish(void *buf_priv)
> +{
> +	struct vb2_streaming_buf *buf = buf_priv;
> +
> +	dma_sync_single_for_cpu(buf->conf->dev, buf->dma_handle,
> +				buf->size, DMA_FROM_DEVICE);
> +}
> +
> +const struct vb2_mem_ops vb2_dma_streaming_memops = {
> +	.alloc		= vb2_dma_streaming_alloc,
> +	.put		= vb2_dma_streaming_put,
> +	.cookie		= vb2_dma_streaming_cookie,
> +	.vaddr		= vb2_dma_streaming_vaddr,
> +	.mmap		= vb2_dma_streaming_mmap,
> +	.num_users	= vb2_dma_streaming_num_users,
> +	.prepare	= vb2_dma_streaming_prepare,
> +	.finish		= vb2_dma_streaming_finish,
> +};
> +EXPORT_SYMBOL_GPL(vb2_dma_streaming_memops);
> +
> +void *vb2_dma_streaming_init_ctx(struct device *dev)
> +{
> +	struct vb2_streaming_conf *conf;
> +
> +	conf = kmalloc(sizeof(struct vb2_streaming_conf), GFP_KERNEL);
> +	if (!conf)
> +		return ERR_PTR(-ENOMEM);
> +
> +	conf->dev = dev;
> +
> +	return conf;
> +}
> +EXPORT_SYMBOL_GPL(vb2_dma_streaming_init_ctx);
> +
> +void vb2_dma_streaming_cleanup_ctx(void *alloc_ctx)
> +{
> +	kfree(alloc_ctx);
> +}
> +EXPORT_SYMBOL_GPL(vb2_dma_streaming_cleanup_ctx);
> +
> +MODULE_DESCRIPTION("DMA-streaming memory allocator for videobuf2");
> +MODULE_AUTHOR("Federico Vaga <federico.vaga@gmail.com>");
> +MODULE_LICENSE("GPL v2");
> diff --git a/include/media/videobuf2-dma-streaming.h b/include/media/videobuf2-dma-streaming.h
> new file mode 100644
> index 0000000..2a62d93
> --- /dev/null
> +++ b/include/media/videobuf2-dma-streaming.h
> @@ -0,0 +1,32 @@
> +/*
> + * videobuf2-dma-streaming.h - DMA streaming memory allocator for videobuf2
> + *
> + * Copyright (C) 2012 Federico Vaga
> + *
> + * Author: Federico Vaga <federico.vaga@gmail.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation.
> + */
> +
> +#ifndef _MEDIA_VIDEOBUF2_DMA_STREAMING_H
> +#define _MEDIA_VIDEOBUF2_DMA_STREAMING_H
> +
> +#include <media/videobuf2-core.h>
> +#include <linux/dma-mapping.h>
> +
> +void *vb2_dma_streaming_init_ctx(struct device *dev);
> +void vb2_dma_streaming_cleanup_ctx(void *alloc_ctx);
> +
> +extern const struct vb2_mem_ops vb2_dma_streaming_memops;
> +
> +static inline dma_addr_t
> +vb2_dma_streaming_plane_paddr(struct vb2_buffer *vb, unsigned int plane_no)
> +{
> +	dma_addr_t *dma_addr = vb2_plane_cookie(vb, plane_no);
> +
> +	return *dma_addr;
> +}
> +
> +#endif
> --
> 1.7.11.4

Best regards
Mauro Carvalho Chehab Dec. 4, 2012, 4:04 p.m. UTC | #2
Em 24-09-2012 09:44, Marek Szyprowski escreveu:
> Hello,
>
> On Monday, September 24, 2012 12:59 PM Federico Vaga wrote:
>
>> The DMA streaming allocator is similar to the DMA contig but it use the
>> DMA streaming interface (dma_map_single, dma_unmap_single). The
>> allocator allocates buffers and immediately map the memory for DMA
>> transfer. For each buffer prepare/finish it does a DMA synchronization.

Hmm.. the explanation didn't convince me, e. g.:
	1) why is it needed;
	2) why vb2-dma-config can't be patched to use dma_map_single
(eventually using a different vb2_io_modes bit?);
	3) what are the usecases for it.

Could you please detail it? Without that, one that would be needing to
write a driver will have serious doubts about what would be the right
driver for its usage. Also, please document it at the driver itself.

Thanks!
Mauro

>>
>> Signed-off-by: Federico Vaga <federico.vaga@gmail.com>
>
> Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
>
>> ---
>>   drivers/media/v4l2-core/Kconfig                   |   5 +
>>   drivers/media/v4l2-core/Makefile                  |   1 +
>>   drivers/media/v4l2-core/videobuf2-dma-streaming.c | 205 ++++++++++++++++++++++
>>   include/media/videobuf2-dma-streaming.h           |  32 ++++
>>   4 file modificati, 243 inserzioni(+)
>>   create mode 100644 drivers/media/v4l2-core/videobuf2-dma-streaming.c
>>   create mode 100644 include/media/videobuf2-dma-streaming.h
>>
>> diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig
>> index 0c54e19..60548a7 100644
>> --- a/drivers/media/v4l2-core/Kconfig
>> +++ b/drivers/media/v4l2-core/Kconfig
>> @@ -79,3 +79,8 @@ config VIDEOBUF2_DMA_SG
>>   	#depends on HAS_DMA
>>   	select VIDEOBUF2_CORE
>>   	select VIDEOBUF2_MEMOPS
>> +
>> +config VIDEOBUF2_DMA_STREAMING
>> +	select VIDEOBUF2_CORE
>> +	select VIDEOBUF2_MEMOPS
>> +	tristate
>> diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
>> index c2d61d4..0b2756f 100644
>> --- a/drivers/media/v4l2-core/Makefile
>> +++ b/drivers/media/v4l2-core/Makefile
>> @@ -28,6 +28,7 @@ obj-$(CONFIG_VIDEOBUF2_MEMOPS) += videobuf2-memops.o
>>   obj-$(CONFIG_VIDEOBUF2_VMALLOC) += videobuf2-vmalloc.o
>>   obj-$(CONFIG_VIDEOBUF2_DMA_CONTIG) += videobuf2-dma-contig.o
>>   obj-$(CONFIG_VIDEOBUF2_DMA_SG) += videobuf2-dma-sg.o
>> +obj-$(CONFIG_VIDEOBUF2_DMA_STREAMING) += videobuf2-dma-streaming.o
>>
>>   ccflags-y += -I$(srctree)/drivers/media/dvb-core
>>   ccflags-y += -I$(srctree)/drivers/media/dvb-frontends
>> diff --git a/drivers/media/v4l2-core/videobuf2-dma-streaming.c b/drivers/media/v4l2-
>> core/videobuf2-dma-streaming.c
>> new file mode 100644
>> index 0000000..c839e05
>> --- /dev/null
>> +++ b/drivers/media/v4l2-core/videobuf2-dma-streaming.c
>> @@ -0,0 +1,205 @@
>> +/*
>> + * videobuf2-dma-streaming.c - DMA streaming memory allocator for videobuf2
>> + *
>> + * Copyright (C) 2012 Federico Vaga <federico.vaga@gmail.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/module.h>
>> +#include <linux/slab.h>
>> +#include <linux/pagemap.h>
>> +#include <linux/dma-mapping.h>
>> +
>> +#include <media/videobuf2-core.h>
>> +#include <media/videobuf2-dma-streaming.h>
>> +#include <media/videobuf2-memops.h>
>> +
>> +struct vb2_streaming_conf {
>> +	struct device			*dev;
>> +};
>> +struct vb2_streaming_buf {
>> +	struct vb2_streaming_conf	*conf;
>> +	void				*vaddr;
>> +
>> +	dma_addr_t			dma_handle;
>> +
>> +	unsigned long			size;
>> +	struct vm_area_struct		*vma;
>> +
>> +	atomic_t			refcount;
>> +	struct vb2_vmarea_handler	handler;
>> +};
>> +
>> +static void vb2_dma_streaming_put(void *buf_priv)
>> +{
>> +	struct vb2_streaming_buf *buf = buf_priv;
>> +
>> +	if (atomic_dec_and_test(&buf->refcount)) {
>> +		dma_unmap_single(buf->conf->dev, buf->dma_handle, buf->size,
>> +				 DMA_FROM_DEVICE);
>> +		free_pages_exact(buf->vaddr, buf->size);
>> +		kfree(buf);
>> +	}
>> +
>> +}
>> +
>> +static void *vb2_dma_streaming_alloc(void *alloc_ctx, unsigned long size)
>> +{
>> +	struct vb2_streaming_conf *conf = alloc_ctx;
>> +	struct vb2_streaming_buf *buf;
>> +	int err;
>> +
>> +	buf = kzalloc(sizeof(struct vb2_streaming_buf), GFP_KERNEL);
>> +	if (!buf)
>> +		return ERR_PTR(-ENOMEM);
>> +	buf->vaddr = alloc_pages_exact(size, GFP_KERNEL | GFP_DMA);
>> +	if (!buf->vaddr) {
>> +		err = -ENOMEM;
>> +		goto out;
>> +	}
>> +	buf->dma_handle = dma_map_single(conf->dev, buf->vaddr, size,
>> +					 DMA_FROM_DEVICE);
>> +	err = dma_mapping_error(conf->dev, buf->dma_handle);
>> +	if (err) {
>> +		dev_err(conf->dev, "dma_map_single failed\n");
>> +
>> +		free_pages_exact(buf->vaddr, size);
>> +		buf->vaddr = NULL;
>> +		goto out_pages;
>> +	}
>> +	buf->conf = conf;
>> +	buf->size = size;
>> +	buf->handler.refcount = &buf->refcount;
>> +	buf->handler.put = vb2_dma_streaming_put;
>> +	buf->handler.arg = buf;
>> +
>> +	atomic_inc(&buf->refcount);
>> +	return buf;
>> +
>> +out_pages:
>> +	free_pages_exact(buf->vaddr, buf->size);
>> +out:
>> +	kfree(buf);
>> +	return ERR_PTR(err);
>> +}
>> +
>> +static void *vb2_dma_streaming_cookie(void *buf_priv)
>> +{
>> +	struct vb2_streaming_buf *buf = buf_priv;
>> +
>> +	return &buf->dma_handle;
>> +}
>> +
>> +static void *vb2_dma_streaming_vaddr(void *buf_priv)
>> +{
>> +	struct vb2_streaming_buf *buf = buf_priv;
>> +
>> +	if (!buf)
>> +		return NULL;
>> +	return buf->vaddr;
>> +}
>> +
>> +static unsigned int vb2_dma_streaming_num_users(void *buf_priv)
>> +{
>> +	struct vb2_streaming_buf *buf = buf_priv;
>> +
>> +	return atomic_read(&buf->refcount);
>> +}
>> +
>> +static int vb2_dma_streaming_mmap(void *buf_priv, struct vm_area_struct *vma)
>> +{
>> +	struct vb2_streaming_buf *buf = buf_priv;
>> +	unsigned long pos, start = vma->vm_start;
>> +	unsigned long size;
>> +	struct page *page;
>> +	int err;
>> +
>> +	/* Try to remap memory */
>> +	size = vma->vm_end - vma->vm_start;
>> +	size = (size < buf->size) ? size : buf->size;
>> +	pos = (unsigned long)buf->vaddr;
>> +
>> +	while (size > 0) {
>> +		page = virt_to_page((void *)pos);
>> +		if (!page) {
>> +			dev_err(buf->conf->dev, "mmap: virt_to_page failed\n");
>> +			return -ENOMEM;
>> +		}
>> +		err = vm_insert_page(vma, start, page);
>> +		if (err) {
>> +			dev_err(buf->conf->dev, "mmap: insert failed %d\n", err);
>> +			return -ENOMEM;
>> +		}
>> +		start += PAGE_SIZE;
>> +		pos += PAGE_SIZE;
>> +
>> +		if (size > PAGE_SIZE)
>> +			size -= PAGE_SIZE;
>> +		else
>> +			size = 0;
>> +	}
>> +
>> +
>> +	vma->vm_ops = &vb2_common_vm_ops;
>> +	vma->vm_flags |= VM_DONTEXPAND;
>> +	vma->vm_private_data = &buf->handler;
>> +
>> +	vma->vm_ops->open(vma);
>> +
>> +	return 0;
>> +}
>> +
>> +static void vb2_dma_streaming_prepare(void *buf_priv)
>> +{
>> +	struct vb2_streaming_buf *buf = buf_priv;
>> +
>> +	dma_sync_single_for_device(buf->conf->dev, buf->dma_handle,
>> +				   buf->size, DMA_FROM_DEVICE);
>> +}
>> +
>> +static void vb2_dma_streaming_finish(void *buf_priv)
>> +{
>> +	struct vb2_streaming_buf *buf = buf_priv;
>> +
>> +	dma_sync_single_for_cpu(buf->conf->dev, buf->dma_handle,
>> +				buf->size, DMA_FROM_DEVICE);
>> +}
>> +
>> +const struct vb2_mem_ops vb2_dma_streaming_memops = {
>> +	.alloc		= vb2_dma_streaming_alloc,
>> +	.put		= vb2_dma_streaming_put,
>> +	.cookie		= vb2_dma_streaming_cookie,
>> +	.vaddr		= vb2_dma_streaming_vaddr,
>> +	.mmap		= vb2_dma_streaming_mmap,
>> +	.num_users	= vb2_dma_streaming_num_users,
>> +	.prepare	= vb2_dma_streaming_prepare,
>> +	.finish		= vb2_dma_streaming_finish,
>> +};
>> +EXPORT_SYMBOL_GPL(vb2_dma_streaming_memops);
>> +
>> +void *vb2_dma_streaming_init_ctx(struct device *dev)
>> +{
>> +	struct vb2_streaming_conf *conf;
>> +
>> +	conf = kmalloc(sizeof(struct vb2_streaming_conf), GFP_KERNEL);
>> +	if (!conf)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	conf->dev = dev;
>> +
>> +	return conf;
>> +}
>> +EXPORT_SYMBOL_GPL(vb2_dma_streaming_init_ctx);
>> +
>> +void vb2_dma_streaming_cleanup_ctx(void *alloc_ctx)
>> +{
>> +	kfree(alloc_ctx);
>> +}
>> +EXPORT_SYMBOL_GPL(vb2_dma_streaming_cleanup_ctx);
>> +
>> +MODULE_DESCRIPTION("DMA-streaming memory allocator for videobuf2");
>> +MODULE_AUTHOR("Federico Vaga <federico.vaga@gmail.com>");
>> +MODULE_LICENSE("GPL v2");
>> diff --git a/include/media/videobuf2-dma-streaming.h b/include/media/videobuf2-dma-streaming.h
>> new file mode 100644
>> index 0000000..2a62d93
>> --- /dev/null
>> +++ b/include/media/videobuf2-dma-streaming.h
>> @@ -0,0 +1,32 @@
>> +/*
>> + * videobuf2-dma-streaming.h - DMA streaming memory allocator for videobuf2
>> + *
>> + * Copyright (C) 2012 Federico Vaga
>> + *
>> + * Author: Federico Vaga <federico.vaga@gmail.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation.
>> + */
>> +
>> +#ifndef _MEDIA_VIDEOBUF2_DMA_STREAMING_H
>> +#define _MEDIA_VIDEOBUF2_DMA_STREAMING_H
>> +
>> +#include <media/videobuf2-core.h>
>> +#include <linux/dma-mapping.h>
>> +
>> +void *vb2_dma_streaming_init_ctx(struct device *dev);
>> +void vb2_dma_streaming_cleanup_ctx(void *alloc_ctx);
>> +
>> +extern const struct vb2_mem_ops vb2_dma_streaming_memops;
>> +
>> +static inline dma_addr_t
>> +vb2_dma_streaming_plane_paddr(struct vb2_buffer *vb, unsigned int plane_no)
>> +{
>> +	dma_addr_t *dma_addr = vb2_plane_cookie(vb, plane_no);
>> +
>> +	return *dma_addr;
>> +}
>> +
>> +#endif
>> --
>> 1.7.11.4
>
> Best regards
>

--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Federico Vaga Dec. 5, 2012, 12:50 p.m. UTC | #3
On Tuesday 04 December 2012 14:04:22 Mauro Carvalho Chehab wrote:
> Em 24-09-2012 09:44, Marek Szyprowski escreveu:
> > Hello,
> > 
> > On Monday, September 24, 2012 12:59 PM Federico Vaga wrote:
> >> The DMA streaming allocator is similar to the DMA contig but it use the
> >> DMA streaming interface (dma_map_single, dma_unmap_single). The
> >> allocator allocates buffers and immediately map the memory for DMA
> >> transfer. For each buffer prepare/finish it does a DMA synchronization.
> 
> Hmm.. the explanation didn't convince me, e. g.:
> 	1) why is it needed;

This allocator is needed because some device (like STA2X11 VIP) cannot work 
with DMA sg or DMA coherent. Some other device (like the one used by Jonathan 
when he proposes vb2-dma-nc allocator) can obtain much better performance with 
DMA streaming than coherent.

> 	2) why vb2-dma-config can't be patched to use dma_map_single
> (eventually using a different vb2_io_modes bit?);

I did not modify vb2-dma-contig because I was thinking that each DMA memory 
allocator should reflect a DMA API.

> 	3) what are the usecases for it.
> 
> Could you please detail it? Without that, one that would be needing to
> write a driver will have serious doubts about what would be the right
> driver for its usage. Also, please document it at the driver itself.

I did not write all this details because the reasons to use vb2-dma-contig, 
vb2-dma-sg or vb2-dma-streaming are the same reasons because someone choose 
SG, coherent or streaming API. This is already documented in the DMA-*.txt 
files, so I did not rewrite it to avoid duplication.
Mauro Carvalho Chehab Dec. 5, 2012, 2:25 p.m. UTC | #4
Em 05-12-2012 10:50, Federico Vaga escreveu:
> On Tuesday 04 December 2012 14:04:22 Mauro Carvalho Chehab wrote:
>> Em 24-09-2012 09:44, Marek Szyprowski escreveu:
>>> Hello,
>>>
>>> On Monday, September 24, 2012 12:59 PM Federico Vaga wrote:
>>>> The DMA streaming allocator is similar to the DMA contig but it use the
>>>> DMA streaming interface (dma_map_single, dma_unmap_single). The
>>>> allocator allocates buffers and immediately map the memory for DMA
>>>> transfer. For each buffer prepare/finish it does a DMA synchronization.
>>
>> Hmm.. the explanation didn't convince me, e. g.:
>> 	1) why is it needed;
>
> This allocator is needed because some device (like STA2X11 VIP) cannot work
> with DMA sg or DMA coherent. Some other device (like the one used by Jonathan
> when he proposes vb2-dma-nc allocator) can obtain much better performance with
> DMA streaming than coherent.

Ok, please add such explanations at the patch's descriptions, as it is
important not only for me, but to others that may need to use it..

>
>> 	2) why vb2-dma-config can't be patched to use dma_map_single
>> (eventually using a different vb2_io_modes bit?);
>
> I did not modify vb2-dma-contig because I was thinking that each DMA memory
> allocator should reflect a DMA API.

The basic reason for having more than one VB low-level handling (vb2 was
inspired on this concept) is that some DMA APIs are very different than
the other ones (see vmalloc x DMA S/G for example).

I didn't make a diff between videobuf2-dma-streaming and videobuf2-dma-contig,
so I can't tell if it makes sense to merge them or not, but the above
argument seems too weak. I was expecting for a technical reason why
it wouldn't make sense for merging them.

>
>> 	3) what are the usecases for it.
>>
>> Could you please detail it? Without that, one that would be needing to
>> write a driver will have serious doubts about what would be the right
>> driver for its usage. Also, please document it at the driver itself.
>
> I did not write all this details because the reasons to use vb2-dma-contig,
> vb2-dma-sg or vb2-dma-streaming are the same reasons because someone choose
> SG, coherent or streaming API. This is already documented in the DMA-*.txt
> files, so I did not rewrite it to avoid duplication.

I see. It doesn't hurt to add a short explanation then at the patch description,
pointing to Documentation/DMA-API-HOWTO.txt, describing when using it instead
of vb2-dma-config (or vb2-dma-sg) would likely give better performance results,
and when the reverse is true.

Btw, from Documentation/DMA-API-HOWTO.txt:

   "Good examples of what to use streaming mappings for are:

	- Networking buffers transmitted/received by a device.
	- Filesystem buffers written/read by a SCSI device.

    The interfaces for using this type of mapping were designed in
    such a way that an implementation can make whatever performance
    optimizations the hardware allows.  To this end, when using
    such mappings you must be explicit about what you want to happen."

I'm not a DMA performance expert. As such, from that comment, it sounded to me
that replacing dma-config/dma-sg by dma streaming will always give "performance
optimizations the hardware allow".

If this is always true, why to preserve the old vb2-dma-config/vb2-dma-sg?

In other words, I suspect that the above is just half of the history ;)

On a separate but related issue, while doing DMABUF tests with an Exynos4
hardware, using a s5p sensor, sending data to s5p-tv, I noticed a CPU
consumption of about 42%, which seems too high. Could it be related to
not using the DMA streaming API?

(c/c Sylwester, due to this last comment)

Regards,
Mauro



--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Federico Vaga Dec. 11, 2012, 1:54 p.m. UTC | #5
Sorry for the late answer to this.

> > This allocator is needed because some device (like STA2X11 VIP) cannot
> > work
> > with DMA sg or DMA coherent. Some other device (like the one used by
> > Jonathan when he proposes vb2-dma-nc allocator) can obtain much better
> > performance with DMA streaming than coherent.
> 
> Ok, please add such explanations at the patch's descriptions, as it is
> important not only for me, but to others that may need to use it..

OK

> >> 	2) why vb2-dma-config can't be patched to use dma_map_single
> >> 
> >> (eventually using a different vb2_io_modes bit?);
> > 
> > I did not modify vb2-dma-contig because I was thinking that each DMA
> > memory
> > allocator should reflect a DMA API.
> 
> The basic reason for having more than one VB low-level handling (vb2 was
> inspired on this concept) is that some DMA APIs are very different than
> the other ones (see vmalloc x DMA S/G for example).
> 
> I didn't make a diff between videobuf2-dma-streaming and
> videobuf2-dma-contig, so I can't tell if it makes sense to merge them or
> not, but the above argument seems too weak. I was expecting for a technical
> reason why it wouldn't make sense for merging them.

I cannot work on this now. But I think that I can do an integration like the 
one that I pushed some month ago (a8f3c203e19b702fa5e8e83a9b6fb3c5a6d1cce4).
Wind River made that changes to videobuf-contig and I tested, fixed and 
pushed.

> >> 	3) what are the usecases for it.
> >> 
> >> Could you please detail it? Without that, one that would be needing to
> >> write a driver will have serious doubts about what would be the right
> >> driver for its usage. Also, please document it at the driver itself.

I don't have a full understand of the board so I don't know exactly why 
dma_alloc_coherent does not work. I focused my development on previous work by 
Wind River. I asked to Wind River (which did all the work on this board) for 
the technical explanation about why coherent doesn't work, but they do not 
know. That's why I made the new allocator: coherent doesn't work and HW 
doesn't support SG.

> I'm not a DMA performance expert. As such, from that comment, it sounded to
> me that replacing dma-config/dma-sg by dma streaming will always give
> "performance optimizations the hardware allow".

me too, I'm not a DMA performance expert. I'm just an user of the DMA API. On 
my hardware simply it works only with that interface, it is not a performance 
problem.

> On a separate but related issue, while doing DMABUF tests with an Exynos4
> hardware, using a s5p sensor, sending data to s5p-tv, I noticed a CPU
> consumption of about 42%, which seems too high. Could it be related to
> not using the DMA streaming API?

As I wrote above, I'm not a DMA performance expert. I skip this
Marek Szyprowski Dec. 18, 2012, 2:41 p.m. UTC | #6
Hello,

I'm sorry for the delay, I've been terribly busy recently.

On 12/11/2012 2:54 PM, Federico Vaga wrote:

>> > This allocator is needed because some device (like STA2X11 VIP) cannot
>> > work
>> > with DMA sg or DMA coherent. Some other device (like the one used by
>> > Jonathan when he proposes vb2-dma-nc allocator) can obtain much better
>> > performance with DMA streaming than coherent.
>>
>> Ok, please add such explanations at the patch's descriptions, as it is
>> important not only for me, but to others that may need to use it..
>
> OK
>
>> >> 	2) why vb2-dma-config can't be patched to use dma_map_single
>> >>
>> >> (eventually using a different vb2_io_modes bit?);
>> >
>> > I did not modify vb2-dma-contig because I was thinking that each DMA
>> > memory allocator should reflect a DMA API.
>>
>> The basic reason for having more than one VB low-level handling (vb2 was
>> inspired on this concept) is that some DMA APIs are very different than
>> the other ones (see vmalloc x DMA S/G for example).
>>
>> I didn't make a diff between videobuf2-dma-streaming and
>> videobuf2-dma-contig, so I can't tell if it makes sense to merge them or
>> not, but the above argument seems too weak. I was expecting for a technical
>> reason why it wouldn't make sense for merging them.
>
> I cannot work on this now. But I think that I can do an integration like the
> one that I pushed some month ago (a8f3c203e19b702fa5e8e83a9b6fb3c5a6d1cce4).
> Wind River made that changes to videobuf-contig and I tested, fixed and
> pushed.
>
>> >> 	3) what are the usecases for it.
>> >>
>> >> Could you please detail it? Without that, one that would be needing to
>> >> write a driver will have serious doubts about what would be the right
>> >> driver for its usage. Also, please document it at the driver itself.
>
> I don't have a full understand of the board so I don't know exactly why
> dma_alloc_coherent does not work. I focused my development on previous work by
> Wind River. I asked to Wind River (which did all the work on this board) for
> the technical explanation about why coherent doesn't work, but they do not
> know. That's why I made the new allocator: coherent doesn't work and HW
> doesn't support SG.

Ok, now I see the whole image. I was convinced that this so called 
streaming allocator is required for performance reasons, not because of 
the broken platform support for coherent calls.

My ultimate goal is to have support for both non-cached (coherent) and 
cached (non-coherent) buffers in the dma mapping subsystem on top of the 
common API. Then both types of buffers will be easily supported by 
dma-contig vb2 allocator. Currently support for streaming-style buffers 
requires completely different dma mapping calls, although from the 
device driver point of view the buffers behaves similarly, so 
implementing them as a separate allocator seems to be the best idea.

I can take a look at the dma coherent issues with that board, but I will 
need some help as I don't have this hardware.

>> I'm not a DMA performance expert. As such, from that comment, it sounded to
>> me that replacing dma-config/dma-sg by dma streaming will always give
>> "performance optimizations the hardware allow".
>
> me too, I'm not a DMA performance expert. I'm just an user of the DMA API. On
> my hardware simply it works only with that interface, it is not a performance
> problem.
>
>> On a separate but related issue, while doing DMABUF tests with an Exynos4
>> hardware, using a s5p sensor, sending data to s5p-tv, I noticed a CPU
>> consumption of about 42%, which seems too high. Could it be related to
>> not using the DMA streaming API?

This might be related to the excessive cpu cache flushing on dma buf 
buffers as there were some misunderstanding who is responsible of that 
(I saw some strange code in drm, but it has been changed a few times). I 
will add this issue to my todo list.

Best regards
Federico Vaga Dec. 20, 2012, 3:37 p.m. UTC | #7
> I can take a look at the dma coherent issues with that board, but I 
will
> need some help as I don't have this hardware.

I have the hardware, but I don't have the full knowledge of the 
boards. As I told before, I asked to windriver which develop the 
software for the whole board, but they cannot help me.
Mauro Carvalho Chehab Jan. 1, 2013, 12:52 p.m. UTC | #8
Hi Federico,

Em Thu, 20 Dec 2012 16:37:50 +0100
Federico Vaga <federico.vaga@gmail.com> escreveu:

> > I can take a look at the dma coherent issues with that board, but I 
> will
> > need some help as I don't have this hardware.
> 
> I have the hardware, but I don't have the full knowledge of the 
> boards. As I told before, I asked to windriver which develop the 
> software for the whole board, but they cannot help me.
> 

After all those discussions, I'm ok on adding this new driver, but please
add a summary of those discussions at the patch description. As I said,
the reason why this driver is needed is not obvious. So, it needs to be
very well described.

Your new "v3 3/4" patch seems OK on my eyes (I can't test it, as I don't
have the hardware). Yet, there was one merge conflict on it.

Patch 1/4 of this series doesn't apply anymore (maybe it were already
applied?).

So, could you please send us a v4, rebased on the top of staging/for_v3.9
branch of the media-tree?

Thanks!
Mauro
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Federico Vaga Jan. 3, 2013, 4:13 p.m. UTC | #9
> After all those discussions, I'm ok on adding this new driver, but please
> add a summary of those discussions at the patch description. As I said,
> the reason why this driver is needed is not obvious. So, it needs to be
> very well described.

ack. I will ask more information to ST about the board because the 
architecture side it is not in the kernel mainline, but it should be.

> Patch 1/4 of this series doesn't apply anymore (maybe it were already
> applied?).

Probably already applied

> So, could you please send us a v4, rebased on the top of staging/for_v3.9
> branch of the media-tree?

I will do it
Federico Vaga Jan. 4, 2013, 1:30 p.m. UTC | #10
On Thursday 03 January 2013 17:13:14 Federico Vaga wrote:
> > After all those discussions, I'm ok on adding this new driver, but please
> > add a summary of those discussions at the patch description. As I said,
> > the reason why this driver is needed is not obvious. So, it needs to be
> > very well described.
> 
> ack. I will ask more information to ST about the board because the
> architecture side it is not in the kernel mainline, but it should be.

I have more information about DMA on the board that I'm using; probably, I can 
make dma-contig work with my device. Unfortunately, I cannot test at the 
moment; I hope to do a test on Monday.
Federico Vaga Jan. 6, 2013, 5:04 p.m. UTC | #11
> I have more information about DMA on the board that I'm using; probably, I
> can make dma-contig work with my device.

Ok, the driver STA2X11 now works with a patched dma-contig allocator. So, my 
streaming allocator it is not mandatory. 

I based my work on the previous work made by Windriver, but now I understand 
the DMA problem and the solution easy.
I investigated (asked to Alessandro Rubini who worked on this board) about 
this DMA issue. The problem is that on the sta2x11 architecture only the first 
512MB are available through the PCI bus, but the allocator can allocate memory 
for DMA above this limit. By using GFP_DMA flags the allocation take place 
under the 16MB so it works.

If you think that the streaming allocator can be useful for someone else (who 
has performance problem with uncached DMA like Jonathan when he did dma-nc 
allocator), I can resend the patch.
I cannot do performance test at the moment because I don't have the time, so I 
cannot personally justify the presence of a new allocator. I think that I will 
do some performance test with this driver; if I will find that dma-streaming 
works better I will propose it again.

I will propose V4 patches soon.
Alessandro Rubini Jan. 6, 2013, 11:09 p.m. UTC | #12
> The problem is that on the sta2x11 architecture only the first 
> 512MB are available through the PCI bus, but the allocator can allocate memory 
> for DMA above this limit. By using GFP_DMA flags the allocation take place 
> under the 16MB so it works.

Still, you are not running the upstream allocator.  IIUC, you added a
"gfp_t" field in the platform data or somewhere, so the sta2x11 can
request GFP_DMA to be OR'd, while other users remain unaffected.  Will
you please submit the patch to achieve that?

> I cannot do performance test at the moment because I don't have the time, so I 
> cannot personally justify the presence of a new allocator.

I don't expect you'll see serious performance differences on the PC. I
think ARM users will have better benefits, due to the different cache
architecture.  You told me Jon measured meaningful figures on a Marvel
CPU.

> I will propose V4 patches soon.

thanks
/alessandro
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jonathan Corbet Jan. 7, 2013, 7:40 p.m. UTC | #13
On Mon, 7 Jan 2013 00:09:47 +0100
Alessandro Rubini <rubini@gnudd.com> wrote:

> I don't expect you'll see serious performance differences on the PC. I
> think ARM users will have better benefits, due to the different cache
> architecture.  You told me Jon measured meaningful figures on a Marvel
> CPU.

It made the difference between 10 frames per second with the CPU running
flat out and 30fps mostly idle.  I think that probably counts as
meaningful, yeah...:)

jon
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mauro Carvalho Chehab Jan. 7, 2013, 8:15 p.m. UTC | #14
Em Mon, 7 Jan 2013 12:40:50 -0700
Jonathan Corbet <corbet@lwn.net> escreveu:

> On Mon, 7 Jan 2013 00:09:47 +0100
> Alessandro Rubini <rubini@gnudd.com> wrote:
> 
> > I don't expect you'll see serious performance differences on the PC. I
> > think ARM users will have better benefits, due to the different cache
> > architecture.  You told me Jon measured meaningful figures on a Marvel
> > CPU.
> 
> It made the difference between 10 frames per second with the CPU running
> flat out and 30fps mostly idle.  I think that probably counts as
> meaningful, yeah...:)

Couldn't this performance difference be due to the usage of GFP_DMA inside
the VB2 code, like Federico's new patch series is proposing?

If not, why are there a so large performance penalty?

Regards,
Mauro
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marek Szyprowski Jan. 8, 2013, 6:50 a.m. UTC | #15
On 1/7/2013 9:15 PM, Mauro Carvalho Chehab wrote:
> Em Mon, 7 Jan 2013 12:40:50 -0700
> Jonathan Corbet <corbet@lwn.net> escreveu:
>
> > On Mon, 7 Jan 2013 00:09:47 +0100
> > Alessandro Rubini <rubini@gnudd.com> wrote:
> >
> > > I don't expect you'll see serious performance differences on the PC. I
> > > think ARM users will have better benefits, due to the different cache
> > > architecture.  You told me Jon measured meaningful figures on a Marvel
> > > CPU.
> >
> > It made the difference between 10 frames per second with the CPU running
> > flat out and 30fps mostly idle.  I think that probably counts as
> > meaningful, yeah...:)
>
> Couldn't this performance difference be due to the usage of GFP_DMA inside
> the VB2 code, like Federico's new patch series is proposing?
>
> If not, why are there a so large performance penalty?

Nope, this was caused rather by a very poor CPU access to non-cached (aka
'coherent') memory and the way the video data has been accessed/read 
with CPU.

Best regards
Jonathan Corbet Jan. 8, 2013, 2:31 p.m. UTC | #16
On Tue, 08 Jan 2013 07:50:41 +0100
Marek Szyprowski <m.szyprowski@samsung.com> wrote:

> > Couldn't this performance difference be due to the usage of GFP_DMA inside
> > the VB2 code, like Federico's new patch series is proposing?
> >
> > If not, why are there a so large performance penalty?  
> 
> Nope, this was caused rather by a very poor CPU access to non-cached (aka
> 'coherent') memory and the way the video data has been accessed/read 
> with CPU.

Exactly.  Uncached memory *hurts*, especially if you're having to touch it
all with the CPU.

jon
--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Olbrich Jan. 9, 2013, 7:48 a.m. UTC | #17
On Tue, Jan 08, 2013 at 07:31:30AM -0700, Jonathan Corbet wrote:
> On Tue, 08 Jan 2013 07:50:41 +0100
> Marek Szyprowski <m.szyprowski@samsung.com> wrote:
> 
> > > Couldn't this performance difference be due to the usage of GFP_DMA inside
> > > the VB2 code, like Federico's new patch series is proposing?
> > >
> > > If not, why are there a so large performance penalty?  
> > 
> > Nope, this was caused rather by a very poor CPU access to non-cached (aka
> > 'coherent') memory and the way the video data has been accessed/read 
> > with CPU.
> 
> Exactly.  Uncached memory *hurts*, especially if you're having to touch it
> all with the CPU.

Even worse, on ARMv7 (at least) the cache implements or is necessary for
(I'm not an expert here) unaligned access. I've seen applications crash
on non-cached memory with a bus error because gcc assumes unaligned access
works. And there isn't even a exception handler in the kernel, probably for
the same reason.

Michael
diff mbox

Patch

diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig
index 0c54e19..60548a7 100644
--- a/drivers/media/v4l2-core/Kconfig
+++ b/drivers/media/v4l2-core/Kconfig
@@ -79,3 +79,8 @@  config VIDEOBUF2_DMA_SG
 	#depends on HAS_DMA
 	select VIDEOBUF2_CORE
 	select VIDEOBUF2_MEMOPS
+
+config VIDEOBUF2_DMA_STREAMING
+	select VIDEOBUF2_CORE
+	select VIDEOBUF2_MEMOPS
+	tristate
diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
index c2d61d4..0b2756f 100644
--- a/drivers/media/v4l2-core/Makefile
+++ b/drivers/media/v4l2-core/Makefile
@@ -28,6 +28,7 @@  obj-$(CONFIG_VIDEOBUF2_MEMOPS) += videobuf2-memops.o
 obj-$(CONFIG_VIDEOBUF2_VMALLOC) += videobuf2-vmalloc.o
 obj-$(CONFIG_VIDEOBUF2_DMA_CONTIG) += videobuf2-dma-contig.o
 obj-$(CONFIG_VIDEOBUF2_DMA_SG) += videobuf2-dma-sg.o
+obj-$(CONFIG_VIDEOBUF2_DMA_STREAMING) += videobuf2-dma-streaming.o
 
 ccflags-y += -I$(srctree)/drivers/media/dvb-core
 ccflags-y += -I$(srctree)/drivers/media/dvb-frontends
diff --git a/drivers/media/v4l2-core/videobuf2-dma-streaming.c b/drivers/media/v4l2-core/videobuf2-dma-streaming.c
new file mode 100644
index 0000000..c839e05
--- /dev/null
+++ b/drivers/media/v4l2-core/videobuf2-dma-streaming.c
@@ -0,0 +1,205 @@ 
+/*
+ * videobuf2-dma-streaming.c - DMA streaming memory allocator for videobuf2
+ *
+ * Copyright (C) 2012 Federico Vaga <federico.vaga@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/dma-mapping.h>
+
+#include <media/videobuf2-core.h>
+#include <media/videobuf2-dma-streaming.h>
+#include <media/videobuf2-memops.h>
+
+struct vb2_streaming_conf {
+	struct device			*dev;
+};
+struct vb2_streaming_buf {
+	struct vb2_streaming_conf	*conf;
+	void				*vaddr;
+
+	dma_addr_t			dma_handle;
+
+	unsigned long			size;
+	struct vm_area_struct		*vma;
+
+	atomic_t			refcount;
+	struct vb2_vmarea_handler	handler;
+};
+
+static void vb2_dma_streaming_put(void *buf_priv)
+{
+	struct vb2_streaming_buf *buf = buf_priv;
+
+	if (atomic_dec_and_test(&buf->refcount)) {
+		dma_unmap_single(buf->conf->dev, buf->dma_handle, buf->size,
+				 DMA_FROM_DEVICE);
+		free_pages_exact(buf->vaddr, buf->size);
+		kfree(buf);
+	}
+
+}
+
+static void *vb2_dma_streaming_alloc(void *alloc_ctx, unsigned long size)
+{
+	struct vb2_streaming_conf *conf = alloc_ctx;
+	struct vb2_streaming_buf *buf;
+	int err;
+
+	buf = kzalloc(sizeof(struct vb2_streaming_buf), GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+	buf->vaddr = alloc_pages_exact(size, GFP_KERNEL | GFP_DMA);
+	if (!buf->vaddr) {
+		err = -ENOMEM;
+		goto out;
+	}
+	buf->dma_handle = dma_map_single(conf->dev, buf->vaddr, size,
+					 DMA_FROM_DEVICE);
+	err = dma_mapping_error(conf->dev, buf->dma_handle);
+	if (err) {
+		dev_err(conf->dev, "dma_map_single failed\n");
+
+		free_pages_exact(buf->vaddr, size);
+		buf->vaddr = NULL;
+		goto out_pages;
+	}
+	buf->conf = conf;
+	buf->size = size;
+	buf->handler.refcount = &buf->refcount;
+	buf->handler.put = vb2_dma_streaming_put;
+	buf->handler.arg = buf;
+
+	atomic_inc(&buf->refcount);
+	return buf;
+
+out_pages:
+	free_pages_exact(buf->vaddr, buf->size);
+out:
+	kfree(buf);
+	return ERR_PTR(err);
+}
+
+static void *vb2_dma_streaming_cookie(void *buf_priv)
+{
+	struct vb2_streaming_buf *buf = buf_priv;
+
+	return &buf->dma_handle;
+}
+
+static void *vb2_dma_streaming_vaddr(void *buf_priv)
+{
+	struct vb2_streaming_buf *buf = buf_priv;
+
+	if (!buf)
+		return NULL;
+	return buf->vaddr;
+}
+
+static unsigned int vb2_dma_streaming_num_users(void *buf_priv)
+{
+	struct vb2_streaming_buf *buf = buf_priv;
+
+	return atomic_read(&buf->refcount);
+}
+
+static int vb2_dma_streaming_mmap(void *buf_priv, struct vm_area_struct *vma)
+{
+	struct vb2_streaming_buf *buf = buf_priv;
+	unsigned long pos, start = vma->vm_start;
+	unsigned long size;
+	struct page *page;
+	int err;
+
+	/* Try to remap memory */
+	size = vma->vm_end - vma->vm_start;
+	size = (size < buf->size) ? size : buf->size;
+	pos = (unsigned long)buf->vaddr;
+
+	while (size > 0) {
+		page = virt_to_page((void *)pos);
+		if (!page) {
+			dev_err(buf->conf->dev, "mmap: virt_to_page failed\n");
+			return -ENOMEM;
+		}
+		err = vm_insert_page(vma, start, page);
+		if (err) {
+			dev_err(buf->conf->dev, "mmap: insert failed %d\n", err);
+			return -ENOMEM;
+		}
+		start += PAGE_SIZE;
+		pos += PAGE_SIZE;
+
+		if (size > PAGE_SIZE)
+			size -= PAGE_SIZE;
+		else
+			size = 0;
+	}
+
+
+	vma->vm_ops = &vb2_common_vm_ops;
+	vma->vm_flags |= VM_DONTEXPAND;
+	vma->vm_private_data = &buf->handler;
+
+	vma->vm_ops->open(vma);
+
+	return 0;
+}
+
+static void vb2_dma_streaming_prepare(void *buf_priv)
+{
+	struct vb2_streaming_buf *buf = buf_priv;
+
+	dma_sync_single_for_device(buf->conf->dev, buf->dma_handle,
+				   buf->size, DMA_FROM_DEVICE);
+}
+
+static void vb2_dma_streaming_finish(void *buf_priv)
+{
+	struct vb2_streaming_buf *buf = buf_priv;
+
+	dma_sync_single_for_cpu(buf->conf->dev, buf->dma_handle,
+				buf->size, DMA_FROM_DEVICE);
+}
+
+const struct vb2_mem_ops vb2_dma_streaming_memops = {
+	.alloc		= vb2_dma_streaming_alloc,
+	.put		= vb2_dma_streaming_put,
+	.cookie		= vb2_dma_streaming_cookie,
+	.vaddr		= vb2_dma_streaming_vaddr,
+	.mmap		= vb2_dma_streaming_mmap,
+	.num_users	= vb2_dma_streaming_num_users,
+	.prepare	= vb2_dma_streaming_prepare,
+	.finish		= vb2_dma_streaming_finish,
+};
+EXPORT_SYMBOL_GPL(vb2_dma_streaming_memops);
+
+void *vb2_dma_streaming_init_ctx(struct device *dev)
+{
+	struct vb2_streaming_conf *conf;
+
+	conf = kmalloc(sizeof(struct vb2_streaming_conf), GFP_KERNEL);
+	if (!conf)
+		return ERR_PTR(-ENOMEM);
+
+	conf->dev = dev;
+
+	return conf;
+}
+EXPORT_SYMBOL_GPL(vb2_dma_streaming_init_ctx);
+
+void vb2_dma_streaming_cleanup_ctx(void *alloc_ctx)
+{
+	kfree(alloc_ctx);
+}
+EXPORT_SYMBOL_GPL(vb2_dma_streaming_cleanup_ctx);
+
+MODULE_DESCRIPTION("DMA-streaming memory allocator for videobuf2");
+MODULE_AUTHOR("Federico Vaga <federico.vaga@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/media/videobuf2-dma-streaming.h b/include/media/videobuf2-dma-streaming.h
new file mode 100644
index 0000000..2a62d93
--- /dev/null
+++ b/include/media/videobuf2-dma-streaming.h
@@ -0,0 +1,32 @@ 
+/*
+ * videobuf2-dma-streaming.h - DMA streaming memory allocator for videobuf2
+ *
+ * Copyright (C) 2012 Federico Vaga
+ *
+ * Author: Federico Vaga <federico.vaga@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _MEDIA_VIDEOBUF2_DMA_STREAMING_H
+#define _MEDIA_VIDEOBUF2_DMA_STREAMING_H
+
+#include <media/videobuf2-core.h>
+#include <linux/dma-mapping.h>
+
+void *vb2_dma_streaming_init_ctx(struct device *dev);
+void vb2_dma_streaming_cleanup_ctx(void *alloc_ctx);
+
+extern const struct vb2_mem_ops vb2_dma_streaming_memops;
+
+static inline dma_addr_t
+vb2_dma_streaming_plane_paddr(struct vb2_buffer *vb, unsigned int plane_no)
+{
+	dma_addr_t *dma_addr = vb2_plane_cookie(vb, plane_no);
+
+	return *dma_addr;
+}
+
+#endif