diff mbox

[v6,3/5] dma-buf: Add ioctls to allow userspace to flush

Message ID 1450304743-6571-4-git-send-email-tiago.vignatti@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tiago Vignatti Dec. 16, 2015, 10:25 p.m. UTC
From: Daniel Vetter <daniel.vetter@ffwll.ch>

The userspace might need some sort of cache coherency management e.g. when CPU
and GPU domains are being accessed through dma-buf at the same time. To
circumvent this problem there are begin/end coherency markers, that forward
directly to existing dma-buf device drivers vfunc hooks. Userspace can make use
of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be
used like following:
     - mmap dma-buf fd
     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
       want (with the new data being consumed by the GPU or say scanout device)
     - munmap once you don't need the buffer any more

v2 (Tiago): Fix header file type names (u64 -> __u64)
v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end
dma-buf functions. Check for overflows in start/length.
v4 (Tiago): use 2d regions for sync.
v5 (Tiago): forget about 2d regions (v4); use _IOW in DMA_BUF_IOCTL_SYNC and
remove range information from struct dma_buf_sync.
v6 (Tiago): use __u64 structured padded flags instead enum. Adjust
documentation about the recommendation on using sync ioctls.

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Tiago Vignatti <tiago.vignatti@intel.com>
---
 Documentation/dma-buf-sharing.txt | 22 +++++++++++++++++++-
 drivers/dma-buf/dma-buf.c         | 43 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/dma-buf.h      | 38 ++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 include/uapi/linux/dma-buf.h

Comments

Alex Deucher Dec. 17, 2015, 6:19 p.m. UTC | #1
On Wed, Dec 16, 2015 at 5:25 PM, Tiago Vignatti
<tiago.vignatti@intel.com> wrote:
> From: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> The userspace might need some sort of cache coherency management e.g. when CPU
> and GPU domains are being accessed through dma-buf at the same time. To
> circumvent this problem there are begin/end coherency markers, that forward
> directly to existing dma-buf device drivers vfunc hooks. Userspace can make use
> of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be
> used like following:
>      - mmap dma-buf fd
>      - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
>        to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
>        want (with the new data being consumed by the GPU or say scanout device)
>      - munmap once you don't need the buffer any more
>
> v2 (Tiago): Fix header file type names (u64 -> __u64)
> v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end
> dma-buf functions. Check for overflows in start/length.
> v4 (Tiago): use 2d regions for sync.
> v5 (Tiago): forget about 2d regions (v4); use _IOW in DMA_BUF_IOCTL_SYNC and
> remove range information from struct dma_buf_sync.
> v6 (Tiago): use __u64 structured padded flags instead enum. Adjust
> documentation about the recommendation on using sync ioctls.
>
> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> Signed-off-by: Tiago Vignatti <tiago.vignatti@intel.com>
> ---
>  Documentation/dma-buf-sharing.txt | 22 +++++++++++++++++++-
>  drivers/dma-buf/dma-buf.c         | 43 +++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/dma-buf.h      | 38 ++++++++++++++++++++++++++++++++++
>  3 files changed, 102 insertions(+), 1 deletion(-)
>  create mode 100644 include/uapi/linux/dma-buf.h
>
> diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
> index 4f4a84b..2ddd4b2 100644
> --- a/Documentation/dma-buf-sharing.txt
> +++ b/Documentation/dma-buf-sharing.txt
> @@ -350,7 +350,27 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
>     handles, too). So it's beneficial to support this in a similar fashion on
>     dma-buf to have a good transition path for existing Android userspace.
>
> -   No special interfaces, userspace simply calls mmap on the dma-buf fd.
> +   No special interfaces, userspace simply calls mmap on the dma-buf fd. Very
> +   important to note though is that, even if it is not mandatory, the userspace
> +   is strongly recommended to always use the cache synchronization ioctl
> +   (DMA_BUF_IOCTL_SYNC) discussed next.
> +
> +   Some systems might need some sort of cache coherency management e.g. when
> +   CPU and GPU domains are being accessed through dma-buf at the same time. To
> +   circumvent this problem there are begin/end coherency markers, that forward
> +   directly to existing dma-buf device drivers vfunc hooks. Userspace can make
> +   use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence
> +   would be used like following:
> +     - mmap dma-buf fd
> +     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
> +       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
> +       want (with the new data being consumed by the GPU or say scanout device)
> +     - munmap once you don't need the buffer any more
> +
> +    In principle systems with the memory cache shared by the GPU and CPU may
> +    not need SYNC_START and SYNC_END but still, userspace is always encouraged
> +    to use these ioctls before and after, respectively, when accessing the
> +    mapped address.
>
>  2. Supporting existing mmap interfaces in importers
>
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index b2ac13b..9a298bd 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -34,6 +34,8 @@
>  #include <linux/poll.h>
>  #include <linux/reservation.h>
>
> +#include <uapi/linux/dma-buf.h>
> +
>  static inline int is_dma_buf_file(struct file *);
>
>  struct dma_buf_list {
> @@ -251,11 +253,52 @@ out:
>         return events;
>  }
>
> +static long dma_buf_ioctl(struct file *file,
> +                         unsigned int cmd, unsigned long arg)
> +{
> +       struct dma_buf *dmabuf;
> +       struct dma_buf_sync sync;
> +       enum dma_data_direction direction;
> +
> +       dmabuf = file->private_data;
> +
> +       if (!is_dma_buf_file(file))
> +               return -EINVAL;
> +
> +       switch (cmd) {
> +       case DMA_BUF_IOCTL_SYNC:
> +               if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
> +                       return -EFAULT;
> +
> +               if (sync.flags & DMA_BUF_SYNC_RW)
> +                       direction = DMA_BIDIRECTIONAL;
> +               else if (sync.flags & DMA_BUF_SYNC_READ)
> +                       direction = DMA_FROM_DEVICE;
> +               else if (sync.flags & DMA_BUF_SYNC_WRITE)
> +                       direction = DMA_TO_DEVICE;
> +               else
> +                       return -EINVAL;
> +
> +               if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
> +                       return -EINVAL;
> +
> +               if (sync.flags & DMA_BUF_SYNC_END)
> +                       dma_buf_end_cpu_access(dmabuf, direction);
> +               else
> +                       dma_buf_begin_cpu_access(dmabuf, direction);
> +
> +               return 0;
> +       default:
> +               return -ENOTTY;
> +       }
> +}
> +
>  static const struct file_operations dma_buf_fops = {
>         .release        = dma_buf_release,
>         .mmap           = dma_buf_mmap_internal,
>         .llseek         = dma_buf_llseek,
>         .poll           = dma_buf_poll,
> +       .unlocked_ioctl = dma_buf_ioctl,
>  };
>
>  /*
> diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
> new file mode 100644
> index 0000000..bd195f2
> --- /dev/null
> +++ b/include/uapi/linux/dma-buf.h
> @@ -0,0 +1,38 @@
> +/*
> + * Framework for buffer objects that can be shared across devices/subsystems.
> + *
> + * Copyright(C) 2015 Intel Ltd
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef _DMA_BUF_UAPI_H_
> +#define _DMA_BUF_UAPI_H_
> +
> +/* begin/end dma-buf functions used for userspace mmap. */
> +struct dma_buf_sync {
> +       __u64 flags;
> +};
> +
> +#define DMA_BUF_SYNC_READ      (1 << 0)
> +#define DMA_BUF_SYNC_WRITE     (2 << 0)
> +#define DMA_BUF_SYNC_RW        (3 << 0)

Maybe make this:

#define DMA_BUF_SYNC_RW        (DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE)

or maybe:

#define DMA_BUF_SYNC_RW_MASK        (3 << 0)

> +#define DMA_BUF_SYNC_START     (0 << 2)
> +#define DMA_BUF_SYNC_END       (1 << 2)

if you go with the mask above, maybe:
#define DMA_BUF_SYNC_MASK     (1 << 2)

> +#define DMA_BUF_SYNC_VALID_FLAGS_MASK \
> +       (DMA_BUF_SYNC_RW | DMA_BUF_SYNC_END)
> +
> +#define DMA_BUF_BASE           'b'
> +#define DMA_BUF_IOCTL_SYNC     _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
> +
> +#endif
> --
> 2.1.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
Thomas Hellstrom Dec. 17, 2015, 9:58 p.m. UTC | #2
On 12/16/2015 11:25 PM, Tiago Vignatti wrote:
> From: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> The userspace might need some sort of cache coherency management e.g. when CPU
> and GPU domains are being accessed through dma-buf at the same time. To
> circumvent this problem there are begin/end coherency markers, that forward
> directly to existing dma-buf device drivers vfunc hooks. Userspace can make use
> of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be
> used like following:
>      - mmap dma-buf fd
>      - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
>        to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
>        want (with the new data being consumed by the GPU or say scanout device)
>      - munmap once you don't need the buffer any more
>
> v2 (Tiago): Fix header file type names (u64 -> __u64)
> v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end
> dma-buf functions. Check for overflows in start/length.
> v4 (Tiago): use 2d regions for sync.
> v5 (Tiago): forget about 2d regions (v4); use _IOW in DMA_BUF_IOCTL_SYNC and
> remove range information from struct dma_buf_sync.
> v6 (Tiago): use __u64 structured padded flags instead enum. Adjust
> documentation about the recommendation on using sync ioctls.
>
> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> Signed-off-by: Tiago Vignatti <tiago.vignatti@intel.com>
> ---
>  Documentation/dma-buf-sharing.txt | 22 +++++++++++++++++++-
>  drivers/dma-buf/dma-buf.c         | 43 +++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/dma-buf.h      | 38 ++++++++++++++++++++++++++++++++++
>  3 files changed, 102 insertions(+), 1 deletion(-)
>  create mode 100644 include/uapi/linux/dma-buf.h
>
> diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
> index 4f4a84b..2ddd4b2 100644
> --- a/Documentation/dma-buf-sharing.txt
> +++ b/Documentation/dma-buf-sharing.txt
> @@ -350,7 +350,27 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
>     handles, too). So it's beneficial to support this in a similar fashion on
>     dma-buf to have a good transition path for existing Android userspace.
>  
> -   No special interfaces, userspace simply calls mmap on the dma-buf fd.
> +   No special interfaces, userspace simply calls mmap on the dma-buf fd. Very
> +   important to note though is that, even if it is not mandatory, the userspace
> +   is strongly recommended to always use the cache synchronization ioctl
> +   (DMA_BUF_IOCTL_SYNC) discussed next.
> +
> +   Some systems might need some sort of cache coherency management e.g. when
> +   CPU and GPU domains are being accessed through dma-buf at the same time. To
> +   circumvent this problem there are begin/end coherency markers, that forward
> +   directly to existing dma-buf device drivers vfunc hooks. Userspace can make
> +   use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence
> +   would be used like following:
> +     - mmap dma-buf fd
> +     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
> +       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
> +       want (with the new data being consumed by the GPU or say scanout device)
> +     - munmap once you don't need the buffer any more
> +
> +    In principle systems with the memory cache shared by the GPU and CPU may
> +    not need SYNC_START and SYNC_END but still, userspace is always encouraged
> +    to use these ioctls before and after, respectively, when accessing the
> +    mapped address.
>  

I think the wording here is far too weak. If this is a generic
user-space interface and syncing
is required for
a) Correctness: then syncing must be mandatory.
b) Optimal performance then an implementation must generate expected
results also in the absence of SYNC ioctls, but is allowed to rely on
correct pairing of SYNC_START and SYNC_END to render correctly.


Also recalling the discussion of multidimensional sync, which we said we
would add when it was needed, my worst nightmare is when (not if) there
are a number of important applications starting to abuse this interface
for small writes or reads to large DMA buffers. It will work flawlessly
on coherent architectures, and probably some incoherent architectures as
well, but will probably  be mostly useless on VMware. What is the plan
for adding 2D sync to make it work? How do we pursuade app developers to
think of damage regions, and can we count on support from the DRM
community when this happens?

/Thomas







>  2. Supporting existing mmap interfaces in importers
>  
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index b2ac13b..9a298bd 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -34,6 +34,8 @@
>  #include <linux/poll.h>
>  #include <linux/reservation.h>
>  
> +#include <uapi/linux/dma-buf.h>
> +
>  static inline int is_dma_buf_file(struct file *);
>  
>  struct dma_buf_list {
> @@ -251,11 +253,52 @@ out:
>  	return events;
>  }
>  
> +static long dma_buf_ioctl(struct file *file,
> +			  unsigned int cmd, unsigned long arg)
> +{
> +	struct dma_buf *dmabuf;
> +	struct dma_buf_sync sync;
> +	enum dma_data_direction direction;
> +
> +	dmabuf = file->private_data;
> +
> +	if (!is_dma_buf_file(file))
> +		return -EINVAL;
> +
> +	switch (cmd) {
> +	case DMA_BUF_IOCTL_SYNC:
> +		if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
> +			return -EFAULT;
> +
> +		if (sync.flags & DMA_BUF_SYNC_RW)
> +			direction = DMA_BIDIRECTIONAL;
> +		else if (sync.flags & DMA_BUF_SYNC_READ)
> +			direction = DMA_FROM_DEVICE;
> +		else if (sync.flags & DMA_BUF_SYNC_WRITE)
> +			direction = DMA_TO_DEVICE;
> +		else
> +			return -EINVAL;
> +
> +		if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
> +			return -EINVAL;
> +
> +		if (sync.flags & DMA_BUF_SYNC_END)
> +			dma_buf_end_cpu_access(dmabuf, direction);
> +		else
> +			dma_buf_begin_cpu_access(dmabuf, direction);
> +
> +		return 0;
> +	default:
> +		return -ENOTTY;
> +	}
> +}
> +
>  static const struct file_operations dma_buf_fops = {
>  	.release	= dma_buf_release,
>  	.mmap		= dma_buf_mmap_internal,
>  	.llseek		= dma_buf_llseek,
>  	.poll		= dma_buf_poll,
> +	.unlocked_ioctl	= dma_buf_ioctl,
>  };
>  
>  /*
> diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
> new file mode 100644
> index 0000000..bd195f2
> --- /dev/null
> +++ b/include/uapi/linux/dma-buf.h
> @@ -0,0 +1,38 @@
> +/*
> + * Framework for buffer objects that can be shared across devices/subsystems.
> + *
> + * Copyright(C) 2015 Intel Ltd
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#ifndef _DMA_BUF_UAPI_H_
> +#define _DMA_BUF_UAPI_H_
> +
> +/* begin/end dma-buf functions used for userspace mmap. */
> +struct dma_buf_sync {
> +	__u64 flags;
> +};
> +
> +#define DMA_BUF_SYNC_READ      (1 << 0)
> +#define DMA_BUF_SYNC_WRITE     (2 << 0)
> +#define DMA_BUF_SYNC_RW        (3 << 0)
> +#define DMA_BUF_SYNC_START     (0 << 2)
> +#define DMA_BUF_SYNC_END       (1 << 2)
> +#define DMA_BUF_SYNC_VALID_FLAGS_MASK \
> +	(DMA_BUF_SYNC_RW | DMA_BUF_SYNC_END)
> +
> +#define DMA_BUF_BASE		'b'
> +#define DMA_BUF_IOCTL_SYNC	_IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
> +
> +#endif
Daniel Vetter Dec. 18, 2015, 3:29 p.m. UTC | #3
On Thu, Dec 17, 2015 at 10:58:20PM +0100, Thomas Hellstrom wrote:
> On 12/16/2015 11:25 PM, Tiago Vignatti wrote:
> > From: Daniel Vetter <daniel.vetter@ffwll.ch>
> >
> > The userspace might need some sort of cache coherency management e.g. when CPU
> > and GPU domains are being accessed through dma-buf at the same time. To
> > circumvent this problem there are begin/end coherency markers, that forward
> > directly to existing dma-buf device drivers vfunc hooks. Userspace can make use
> > of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be
> > used like following:
> >      - mmap dma-buf fd
> >      - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
> >        to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
> >        want (with the new data being consumed by the GPU or say scanout device)
> >      - munmap once you don't need the buffer any more
> >
> > v2 (Tiago): Fix header file type names (u64 -> __u64)
> > v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end
> > dma-buf functions. Check for overflows in start/length.
> > v4 (Tiago): use 2d regions for sync.
> > v5 (Tiago): forget about 2d regions (v4); use _IOW in DMA_BUF_IOCTL_SYNC and
> > remove range information from struct dma_buf_sync.
> > v6 (Tiago): use __u64 structured padded flags instead enum. Adjust
> > documentation about the recommendation on using sync ioctls.
> >
> > Cc: Sumit Semwal <sumit.semwal@linaro.org>
> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > Signed-off-by: Tiago Vignatti <tiago.vignatti@intel.com>
> > ---
> >  Documentation/dma-buf-sharing.txt | 22 +++++++++++++++++++-
> >  drivers/dma-buf/dma-buf.c         | 43 +++++++++++++++++++++++++++++++++++++++
> >  include/uapi/linux/dma-buf.h      | 38 ++++++++++++++++++++++++++++++++++
> >  3 files changed, 102 insertions(+), 1 deletion(-)
> >  create mode 100644 include/uapi/linux/dma-buf.h
> >
> > diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
> > index 4f4a84b..2ddd4b2 100644
> > --- a/Documentation/dma-buf-sharing.txt
> > +++ b/Documentation/dma-buf-sharing.txt
> > @@ -350,7 +350,27 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
> >     handles, too). So it's beneficial to support this in a similar fashion on
> >     dma-buf to have a good transition path for existing Android userspace.
> >  
> > -   No special interfaces, userspace simply calls mmap on the dma-buf fd.
> > +   No special interfaces, userspace simply calls mmap on the dma-buf fd. Very
> > +   important to note though is that, even if it is not mandatory, the userspace
> > +   is strongly recommended to always use the cache synchronization ioctl
> > +   (DMA_BUF_IOCTL_SYNC) discussed next.
> > +
> > +   Some systems might need some sort of cache coherency management e.g. when
> > +   CPU and GPU domains are being accessed through dma-buf at the same time. To
> > +   circumvent this problem there are begin/end coherency markers, that forward
> > +   directly to existing dma-buf device drivers vfunc hooks. Userspace can make
> > +   use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence
> > +   would be used like following:
> > +     - mmap dma-buf fd
> > +     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
> > +       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
> > +       want (with the new data being consumed by the GPU or say scanout device)
> > +     - munmap once you don't need the buffer any more
> > +
> > +    In principle systems with the memory cache shared by the GPU and CPU may
> > +    not need SYNC_START and SYNC_END but still, userspace is always encouraged
> > +    to use these ioctls before and after, respectively, when accessing the
> > +    mapped address.
> >  
> 
> I think the wording here is far too weak. If this is a generic
> user-space interface and syncing
> is required for
> a) Correctness: then syncing must be mandatory.
> b) Optimal performance then an implementation must generate expected
> results also in the absence of SYNC ioctls, but is allowed to rely on
> correct pairing of SYNC_START and SYNC_END to render correctly.

Yeah I guess the wroking should be stronger to make it clear you better
call these, always.

> Also recalling the discussion of multidimensional sync, which we said we
> would add when it was needed, my worst nightmare is when (not if) there
> are a number of important applications starting to abuse this interface
> for small writes or reads to large DMA buffers. It will work flawlessly
> on coherent architectures, and probably some incoherent architectures as
> well, but will probably  be mostly useless on VMware. What is the plan
> for adding 2D sync to make it work? How do we pursuade app developers to
> think of damage regions, and can we count on support from the DRM
> community when this happens?

The current use-case in cros seems to be to do full-blown buffer uploads,
nothing partial. I don't think there's anything we can do really (flushing
for small uploads will make i915 crawl too, albeit you can still see a
slideshow and it's not a complete stop), except maybe improve the docs
with a statement that trying to use dma-buf mmap for small uploads will be
result in horrible performance. I think inevitable reality is that some
truly horrible software will always ship. I am pretty hopeful though that
this won't be too common, since the main users for anything dma-buf are on
mobile, and those systems (at least on i915) are all incoherent. So as
long as it'll run acceptably on i915 I think it should run at least ok-ish
on vmwgfx too.
-Daniel
Tiago Vignatti Dec. 18, 2015, 6:46 p.m. UTC | #4
On 12/17/2015 04:19 PM, Alex Deucher wrote:
> On Wed, Dec 16, 2015 at 5:25 PM, Tiago Vignatti
> <tiago.vignatti@intel.com> wrote:
>> From: Daniel Vetter <daniel.vetter@ffwll.ch>
>>
>> The userspace might need some sort of cache coherency management e.g. when CPU
>> and GPU domains are being accessed through dma-buf at the same time. To
>> circumvent this problem there are begin/end coherency markers, that forward
>> directly to existing dma-buf device drivers vfunc hooks. Userspace can make use
>> of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be
>> used like following:
>>       - mmap dma-buf fd
>>       - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
>>         to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
>>         want (with the new data being consumed by the GPU or say scanout device)
>>       - munmap once you don't need the buffer any more
>>
>> v2 (Tiago): Fix header file type names (u64 -> __u64)
>> v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end
>> dma-buf functions. Check for overflows in start/length.
>> v4 (Tiago): use 2d regions for sync.
>> v5 (Tiago): forget about 2d regions (v4); use _IOW in DMA_BUF_IOCTL_SYNC and
>> remove range information from struct dma_buf_sync.
>> v6 (Tiago): use __u64 structured padded flags instead enum. Adjust
>> documentation about the recommendation on using sync ioctls.
>>
>> Cc: Sumit Semwal <sumit.semwal@linaro.org>
>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>> Signed-off-by: Tiago Vignatti <tiago.vignatti@intel.com>
>> ---
>>   Documentation/dma-buf-sharing.txt | 22 +++++++++++++++++++-
>>   drivers/dma-buf/dma-buf.c         | 43 +++++++++++++++++++++++++++++++++++++++
>>   include/uapi/linux/dma-buf.h      | 38 ++++++++++++++++++++++++++++++++++
>>   3 files changed, 102 insertions(+), 1 deletion(-)
>>   create mode 100644 include/uapi/linux/dma-buf.h
>>
>> diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
>> index 4f4a84b..2ddd4b2 100644
>> --- a/Documentation/dma-buf-sharing.txt
>> +++ b/Documentation/dma-buf-sharing.txt
>> @@ -350,7 +350,27 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
>>      handles, too). So it's beneficial to support this in a similar fashion on
>>      dma-buf to have a good transition path for existing Android userspace.
>>
>> -   No special interfaces, userspace simply calls mmap on the dma-buf fd.
>> +   No special interfaces, userspace simply calls mmap on the dma-buf fd. Very
>> +   important to note though is that, even if it is not mandatory, the userspace
>> +   is strongly recommended to always use the cache synchronization ioctl
>> +   (DMA_BUF_IOCTL_SYNC) discussed next.
>> +
>> +   Some systems might need some sort of cache coherency management e.g. when
>> +   CPU and GPU domains are being accessed through dma-buf at the same time. To
>> +   circumvent this problem there are begin/end coherency markers, that forward
>> +   directly to existing dma-buf device drivers vfunc hooks. Userspace can make
>> +   use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence
>> +   would be used like following:
>> +     - mmap dma-buf fd
>> +     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
>> +       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
>> +       want (with the new data being consumed by the GPU or say scanout device)
>> +     - munmap once you don't need the buffer any more
>> +
>> +    In principle systems with the memory cache shared by the GPU and CPU may
>> +    not need SYNC_START and SYNC_END but still, userspace is always encouraged
>> +    to use these ioctls before and after, respectively, when accessing the
>> +    mapped address.
>>
>>   2. Supporting existing mmap interfaces in importers
>>
>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
>> index b2ac13b..9a298bd 100644
>> --- a/drivers/dma-buf/dma-buf.c
>> +++ b/drivers/dma-buf/dma-buf.c
>> @@ -34,6 +34,8 @@
>>   #include <linux/poll.h>
>>   #include <linux/reservation.h>
>>
>> +#include <uapi/linux/dma-buf.h>
>> +
>>   static inline int is_dma_buf_file(struct file *);
>>
>>   struct dma_buf_list {
>> @@ -251,11 +253,52 @@ out:
>>          return events;
>>   }
>>
>> +static long dma_buf_ioctl(struct file *file,
>> +                         unsigned int cmd, unsigned long arg)
>> +{
>> +       struct dma_buf *dmabuf;
>> +       struct dma_buf_sync sync;
>> +       enum dma_data_direction direction;
>> +
>> +       dmabuf = file->private_data;
>> +
>> +       if (!is_dma_buf_file(file))
>> +               return -EINVAL;
>> +
>> +       switch (cmd) {
>> +       case DMA_BUF_IOCTL_SYNC:
>> +               if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
>> +                       return -EFAULT;
>> +
>> +               if (sync.flags & DMA_BUF_SYNC_RW)
>> +                       direction = DMA_BIDIRECTIONAL;
>> +               else if (sync.flags & DMA_BUF_SYNC_READ)
>> +                       direction = DMA_FROM_DEVICE;
>> +               else if (sync.flags & DMA_BUF_SYNC_WRITE)
>> +                       direction = DMA_TO_DEVICE;
>> +               else
>> +                       return -EINVAL;
>> +
>> +               if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
>> +                       return -EINVAL;
>> +
>> +               if (sync.flags & DMA_BUF_SYNC_END)
>> +                       dma_buf_end_cpu_access(dmabuf, direction);
>> +               else
>> +                       dma_buf_begin_cpu_access(dmabuf, direction);
>> +
>> +               return 0;
>> +       default:
>> +               return -ENOTTY;
>> +       }
>> +}
>> +
>>   static const struct file_operations dma_buf_fops = {
>>          .release        = dma_buf_release,
>>          .mmap           = dma_buf_mmap_internal,
>>          .llseek         = dma_buf_llseek,
>>          .poll           = dma_buf_poll,
>> +       .unlocked_ioctl = dma_buf_ioctl,
>>   };
>>
>>   /*
>> diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
>> new file mode 100644
>> index 0000000..bd195f2
>> --- /dev/null
>> +++ b/include/uapi/linux/dma-buf.h
>> @@ -0,0 +1,38 @@
>> +/*
>> + * Framework for buffer objects that can be shared across devices/subsystems.
>> + *
>> + * Copyright(C) 2015 Intel Ltd
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published by
>> + * the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License along with
>> + * this program.  If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#ifndef _DMA_BUF_UAPI_H_
>> +#define _DMA_BUF_UAPI_H_
>> +
>> +/* begin/end dma-buf functions used for userspace mmap. */
>> +struct dma_buf_sync {
>> +       __u64 flags;
>> +};
>> +
>> +#define DMA_BUF_SYNC_READ      (1 << 0)
>> +#define DMA_BUF_SYNC_WRITE     (2 << 0)
>> +#define DMA_BUF_SYNC_RW        (3 << 0)
>
> Maybe make this:
>
> #define DMA_BUF_SYNC_RW        (DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE)
>
> or maybe:
>
> #define DMA_BUF_SYNC_RW_MASK        (3 << 0)

Thanks Alex. I think I like your first option for being a bit more 
suggestive than the other -- I'm changing for it now.

Tiago
Tiago Vignatti Dec. 18, 2015, 7:50 p.m. UTC | #5
On 12/17/2015 07:58 PM, Thomas Hellstrom wrote:
> On 12/16/2015 11:25 PM, Tiago Vignatti wrote:
>> From: Daniel Vetter <daniel.vetter@ffwll.ch>
>>
>> The userspace might need some sort of cache coherency management e.g. when CPU
>> and GPU domains are being accessed through dma-buf at the same time. To
>> circumvent this problem there are begin/end coherency markers, that forward
>> directly to existing dma-buf device drivers vfunc hooks. Userspace can make use
>> of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence would be
>> used like following:
>>       - mmap dma-buf fd
>>       - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
>>         to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
>>         want (with the new data being consumed by the GPU or say scanout device)
>>       - munmap once you don't need the buffer any more
>>
>> v2 (Tiago): Fix header file type names (u64 -> __u64)
>> v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the begin/end
>> dma-buf functions. Check for overflows in start/length.
>> v4 (Tiago): use 2d regions for sync.
>> v5 (Tiago): forget about 2d regions (v4); use _IOW in DMA_BUF_IOCTL_SYNC and
>> remove range information from struct dma_buf_sync.
>> v6 (Tiago): use __u64 structured padded flags instead enum. Adjust
>> documentation about the recommendation on using sync ioctls.
>>
>> Cc: Sumit Semwal <sumit.semwal@linaro.org>
>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>> Signed-off-by: Tiago Vignatti <tiago.vignatti@intel.com>
>> ---
>>   Documentation/dma-buf-sharing.txt | 22 +++++++++++++++++++-
>>   drivers/dma-buf/dma-buf.c         | 43 +++++++++++++++++++++++++++++++++++++++
>>   include/uapi/linux/dma-buf.h      | 38 ++++++++++++++++++++++++++++++++++
>>   3 files changed, 102 insertions(+), 1 deletion(-)
>>   create mode 100644 include/uapi/linux/dma-buf.h
>>
>> diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
>> index 4f4a84b..2ddd4b2 100644
>> --- a/Documentation/dma-buf-sharing.txt
>> +++ b/Documentation/dma-buf-sharing.txt
>> @@ -350,7 +350,27 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
>>      handles, too). So it's beneficial to support this in a similar fashion on
>>      dma-buf to have a good transition path for existing Android userspace.
>>
>> -   No special interfaces, userspace simply calls mmap on the dma-buf fd.
>> +   No special interfaces, userspace simply calls mmap on the dma-buf fd. Very
>> +   important to note though is that, even if it is not mandatory, the userspace
>> +   is strongly recommended to always use the cache synchronization ioctl
>> +   (DMA_BUF_IOCTL_SYNC) discussed next.
>> +
>> +   Some systems might need some sort of cache coherency management e.g. when
>> +   CPU and GPU domains are being accessed through dma-buf at the same time. To
>> +   circumvent this problem there are begin/end coherency markers, that forward
>> +   directly to existing dma-buf device drivers vfunc hooks. Userspace can make
>> +   use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence
>> +   would be used like following:
>> +     - mmap dma-buf fd
>> +     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
>> +       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
>> +       want (with the new data being consumed by the GPU or say scanout device)
>> +     - munmap once you don't need the buffer any more
>> +
>> +    In principle systems with the memory cache shared by the GPU and CPU may
>> +    not need SYNC_START and SYNC_END but still, userspace is always encouraged
>> +    to use these ioctls before and after, respectively, when accessing the
>> +    mapped address.
>>
>
> I think the wording here is far too weak. If this is a generic
> user-space interface and syncing
> is required for
> a) Correctness: then syncing must be mandatory.
> b) Optimal performance then an implementation must generate expected
> results also in the absence of SYNC ioctls, but is allowed to rely on
> correct pairing of SYNC_START and SYNC_END to render correctly.

Thomas, do you think the following write-up captures this?


-   No special interfaces, userspace simply calls mmap on the dma-buf fd.
+   No special interfaces, userspace simply calls mmap on the dma-buf 
fd, making
+   sure that the cache synchronization ioctl (DMA_BUF_IOCTL_SYNC) is 
*always*
+   used when the access happens. This is discussed next paragraphs.
+
+   Some systems might need some sort of cache coherency management e.g. 
when
+   CPU and GPU domains are being accessed through dma-buf at the same 
time. To
+   circumvent this problem there are begin/end coherency markers, that 
forward
+   directly to existing dma-buf device drivers vfunc hooks. Userspace 
can make
+   use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence
+   would be used like following:
+     - mmap dma-buf fd
+     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. 
read/write
+       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
+       want (with the new data being consumed by the GPU or say scanout 
device)
+     - munmap once you don't need the buffer any more
+
+    Therefore, for correctness and optimal performance, systems with 
the memory
+    cache shared by the GPU and CPU i.e. the "coherent" and also 
"incoherent"
+    systems are always required to use SYNC_START and SYNC_END before and
+    after, respectively, when accessing the mapped address.


Thank you,

Tiago
Thomas Hellstrom Dec. 21, 2015, 9:38 a.m. UTC | #6
On 12/18/2015 08:50 PM, Tiago Vignatti wrote:
> On 12/17/2015 07:58 PM, Thomas Hellstrom wrote:
>> On 12/16/2015 11:25 PM, Tiago Vignatti wrote:
>>> From: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>
>>> The userspace might need some sort of cache coherency management
>>> e.g. when CPU
>>> and GPU domains are being accessed through dma-buf at the same time. To
>>> circumvent this problem there are begin/end coherency markers, that
>>> forward
>>> directly to existing dma-buf device drivers vfunc hooks. Userspace
>>> can make use
>>> of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence
>>> would be
>>> used like following:
>>>       - mmap dma-buf fd
>>>       - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2.
>>> read/write
>>>         to mmap area 3. SYNC_END ioctl. This can be repeated as
>>> often as you
>>>         want (with the new data being consumed by the GPU or say
>>> scanout device)
>>>       - munmap once you don't need the buffer any more
>>>
>>> v2 (Tiago): Fix header file type names (u64 -> __u64)
>>> v3 (Tiago): Add documentation. Use enum dma_buf_sync_flags to the
>>> begin/end
>>> dma-buf functions. Check for overflows in start/length.
>>> v4 (Tiago): use 2d regions for sync.
>>> v5 (Tiago): forget about 2d regions (v4); use _IOW in
>>> DMA_BUF_IOCTL_SYNC and
>>> remove range information from struct dma_buf_sync.
>>> v6 (Tiago): use __u64 structured padded flags instead enum. Adjust
>>> documentation about the recommendation on using sync ioctls.
>>>
>>> Cc: Sumit Semwal <sumit.semwal@linaro.org>
>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>> Signed-off-by: Tiago Vignatti <tiago.vignatti@intel.com>
>>> ---
>>>   Documentation/dma-buf-sharing.txt | 22 +++++++++++++++++++-
>>>   drivers/dma-buf/dma-buf.c         | 43
>>> +++++++++++++++++++++++++++++++++++++++
>>>   include/uapi/linux/dma-buf.h      | 38
>>> ++++++++++++++++++++++++++++++++++
>>>   3 files changed, 102 insertions(+), 1 deletion(-)
>>>   create mode 100644 include/uapi/linux/dma-buf.h
>>>
>>> diff --git a/Documentation/dma-buf-sharing.txt
>>> b/Documentation/dma-buf-sharing.txt
>>> index 4f4a84b..2ddd4b2 100644
>>> --- a/Documentation/dma-buf-sharing.txt
>>> +++ b/Documentation/dma-buf-sharing.txt
>>> @@ -350,7 +350,27 @@ Being able to mmap an export dma-buf buffer
>>> object has 2 main use-cases:
>>>      handles, too). So it's beneficial to support this in a similar
>>> fashion on
>>>      dma-buf to have a good transition path for existing Android
>>> userspace.
>>>
>>> -   No special interfaces, userspace simply calls mmap on the
>>> dma-buf fd.
>>> +   No special interfaces, userspace simply calls mmap on the
>>> dma-buf fd. Very
>>> +   important to note though is that, even if it is not mandatory,
>>> the userspace
>>> +   is strongly recommended to always use the cache synchronization
>>> ioctl
>>> +   (DMA_BUF_IOCTL_SYNC) discussed next.
>>> +
>>> +   Some systems might need some sort of cache coherency management
>>> e.g. when
>>> +   CPU and GPU domains are being accessed through dma-buf at the
>>> same time. To
>>> +   circumvent this problem there are begin/end coherency markers,
>>> that forward
>>> +   directly to existing dma-buf device drivers vfunc hooks.
>>> Userspace can make
>>> +   use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The
>>> sequence
>>> +   would be used like following:
>>> +     - mmap dma-buf fd
>>> +     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2.
>>> read/write
>>> +       to mmap area 3. SYNC_END ioctl. This can be repeated as
>>> often as you
>>> +       want (with the new data being consumed by the GPU or say
>>> scanout device)
>>> +     - munmap once you don't need the buffer any more
>>> +
>>> +    In principle systems with the memory cache shared by the GPU
>>> and CPU may
>>> +    not need SYNC_START and SYNC_END but still, userspace is always
>>> encouraged
>>> +    to use these ioctls before and after, respectively, when
>>> accessing the
>>> +    mapped address.
>>>
>>
>> I think the wording here is far too weak. If this is a generic
>> user-space interface and syncing
>> is required for
>> a) Correctness: then syncing must be mandatory.
>> b) Optimal performance then an implementation must generate expected
>> results also in the absence of SYNC ioctls, but is allowed to rely on
>> correct pairing of SYNC_START and SYNC_END to render correctly.
>
> Thomas, do you think the following write-up captures this?
>
>
> -   No special interfaces, userspace simply calls mmap on the dma-buf fd.
> +   No special interfaces, userspace simply calls mmap on the dma-buf
> fd, making
> +   sure that the cache synchronization ioctl (DMA_BUF_IOCTL_SYNC) is
> *always*
> +   used when the access happens. This is discussed next paragraphs.
> +
> +   Some systems might need some sort of cache coherency management
> e.g. when
> +   CPU and GPU domains are being accessed through dma-buf at the same
> time. To
> +   circumvent this problem there are begin/end coherency markers,
> that forward
> +   directly to existing dma-buf device drivers vfunc hooks. Userspace
> can make
> +   use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The
> sequence
> +   would be used like following:
> +     - mmap dma-buf fd
> +     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2.
> read/write
> +       to mmap area 3. SYNC_END ioctl. This can be repeated as often
> as you
> +       want (with the new data being consumed by the GPU or say
> scanout device)
> +     - munmap once you don't need the buffer any more
> +
> +    Therefore, for correctness and optimal performance, systems with
> the memory
> +    cache shared by the GPU and CPU i.e. the "coherent" and also
> "incoherent"
> +    systems are always required to use SYNC_START and SYNC_END before
> and
> +    after, respectively, when accessing the mapped address.
>
>
> Thank you,
>
> Tiago
Yes, that sounds better,

Thanks,
Thomas
diff mbox

Patch

diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index 4f4a84b..2ddd4b2 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -350,7 +350,27 @@  Being able to mmap an export dma-buf buffer object has 2 main use-cases:
    handles, too). So it's beneficial to support this in a similar fashion on
    dma-buf to have a good transition path for existing Android userspace.
 
-   No special interfaces, userspace simply calls mmap on the dma-buf fd.
+   No special interfaces, userspace simply calls mmap on the dma-buf fd. Very
+   important to note though is that, even if it is not mandatory, the userspace
+   is strongly recommended to always use the cache synchronization ioctl
+   (DMA_BUF_IOCTL_SYNC) discussed next.
+
+   Some systems might need some sort of cache coherency management e.g. when
+   CPU and GPU domains are being accessed through dma-buf at the same time. To
+   circumvent this problem there are begin/end coherency markers, that forward
+   directly to existing dma-buf device drivers vfunc hooks. Userspace can make
+   use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence
+   would be used like following:
+     - mmap dma-buf fd
+     - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
+       to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
+       want (with the new data being consumed by the GPU or say scanout device)
+     - munmap once you don't need the buffer any more
+
+    In principle systems with the memory cache shared by the GPU and CPU may
+    not need SYNC_START and SYNC_END but still, userspace is always encouraged
+    to use these ioctls before and after, respectively, when accessing the
+    mapped address.
 
 2. Supporting existing mmap interfaces in importers
 
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index b2ac13b..9a298bd 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -34,6 +34,8 @@ 
 #include <linux/poll.h>
 #include <linux/reservation.h>
 
+#include <uapi/linux/dma-buf.h>
+
 static inline int is_dma_buf_file(struct file *);
 
 struct dma_buf_list {
@@ -251,11 +253,52 @@  out:
 	return events;
 }
 
+static long dma_buf_ioctl(struct file *file,
+			  unsigned int cmd, unsigned long arg)
+{
+	struct dma_buf *dmabuf;
+	struct dma_buf_sync sync;
+	enum dma_data_direction direction;
+
+	dmabuf = file->private_data;
+
+	if (!is_dma_buf_file(file))
+		return -EINVAL;
+
+	switch (cmd) {
+	case DMA_BUF_IOCTL_SYNC:
+		if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
+			return -EFAULT;
+
+		if (sync.flags & DMA_BUF_SYNC_RW)
+			direction = DMA_BIDIRECTIONAL;
+		else if (sync.flags & DMA_BUF_SYNC_READ)
+			direction = DMA_FROM_DEVICE;
+		else if (sync.flags & DMA_BUF_SYNC_WRITE)
+			direction = DMA_TO_DEVICE;
+		else
+			return -EINVAL;
+
+		if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
+			return -EINVAL;
+
+		if (sync.flags & DMA_BUF_SYNC_END)
+			dma_buf_end_cpu_access(dmabuf, direction);
+		else
+			dma_buf_begin_cpu_access(dmabuf, direction);
+
+		return 0;
+	default:
+		return -ENOTTY;
+	}
+}
+
 static const struct file_operations dma_buf_fops = {
 	.release	= dma_buf_release,
 	.mmap		= dma_buf_mmap_internal,
 	.llseek		= dma_buf_llseek,
 	.poll		= dma_buf_poll,
+	.unlocked_ioctl	= dma_buf_ioctl,
 };
 
 /*
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
new file mode 100644
index 0000000..bd195f2
--- /dev/null
+++ b/include/uapi/linux/dma-buf.h
@@ -0,0 +1,38 @@ 
+/*
+ * Framework for buffer objects that can be shared across devices/subsystems.
+ *
+ * Copyright(C) 2015 Intel Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _DMA_BUF_UAPI_H_
+#define _DMA_BUF_UAPI_H_
+
+/* begin/end dma-buf functions used for userspace mmap. */
+struct dma_buf_sync {
+	__u64 flags;
+};
+
+#define DMA_BUF_SYNC_READ      (1 << 0)
+#define DMA_BUF_SYNC_WRITE     (2 << 0)
+#define DMA_BUF_SYNC_RW        (3 << 0)
+#define DMA_BUF_SYNC_START     (0 << 2)
+#define DMA_BUF_SYNC_END       (1 << 2)
+#define DMA_BUF_SYNC_VALID_FLAGS_MASK \
+	(DMA_BUF_SYNC_RW | DMA_BUF_SYNC_END)
+
+#define DMA_BUF_BASE		'b'
+#define DMA_BUF_IOCTL_SYNC	_IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
+
+#endif