diff mbox

[RFC] dma-buf: Import/export the implicit fences on the dma-buf

Message ID 1468274378-23081-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 11, 2016, 9:59 p.m. UTC
When dealing with user interfaces that utilize explicit fences, it is
convenient to sometimes create those fences after the fact, i.e. to
query the dma-buf for the current set of implicit fences, encapsulate
those into a sync_file and hand that fd back to userspace.
Correspondingly, being able to add an explicit fence back into the mix
of fences being tracked by the dma-buf allows that userspace fence to be
included in any implicit tracking.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: dri-devel@lists.freedesktop.org
---

Gustavo, could you look at the sync-file/fence-array handling? There's
definitely room for a cleaner sync_file_create() API.

I was thinking about this for the "why not sync-file" question Gustavo
posed for the vgem_fences. I wanted to add an ioctl to the vgem to allow
exporting and creating fences from sync-file for testing passing 
explicit userspaces around between drivers, and realised that I was just
writing a generic mechanism that only required dma-buf.

Whilst this interface could be used for lazily creating explicit fences,
drivers will also likely want to support specific ioctl to skip the
dmabuf creation, I think this interfaces will be useful with the vgem
fences for testing sync_file handling in drivers (on i915 at the moment,
my tests for sync_file involve sleeping and a few white lies). So
fulfilling a similar role in driver testing to debugfs/sw_sync?
(sw_sync is still more apt for testing timelines etc, vgem feels more
apt for ease of testing rendering.)
-Chris

---
 drivers/dma-buf/dma-buf.c    | 100 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/dma-buf.h |   9 ++++
 2 files changed, 109 insertions(+)

Comments

Christian König July 12, 2016, 11:53 a.m. UTC | #1
Am 11.07.2016 um 23:59 schrieb Chris Wilson:
> When dealing with user interfaces that utilize explicit fences, it is
> convenient to sometimes create those fences after the fact, i.e. to
> query the dma-buf for the current set of implicit fences, encapsulate
> those into a sync_file and hand that fd back to userspace.
> Correspondingly, being able to add an explicit fence back into the mix
> of fences being tracked by the dma-buf allows that userspace fence to be
> included in any implicit tracking.

Well I think that this is a rather questionable interface.

For example how do you deal with race conditions? E.g. between querying 
the fences from the reservation object and adding a new one we could 
gain new fences because of the kernel swapping things out or another 
thread making some submission with this buffer.

Additional to that IIRC reservation_object_add_excl_fence() currently 
replaces all shared fences with the exclusive one. A malicious 
application could use this to trick the kernel driver into thinking that 
this buffer object is idle while it is still accessed by some operation. 
At least with GPU operations you can easily take over the system when 
you manage to get access to a page table with this.

Regards,
Christian.

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
> Cc: Rob Clark <robdclark@gmail.com>
> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: dri-devel@lists.freedesktop.org
> ---
>
> Gustavo, could you look at the sync-file/fence-array handling? There's
> definitely room for a cleaner sync_file_create() API.
>
> I was thinking about this for the "why not sync-file" question Gustavo
> posed for the vgem_fences. I wanted to add an ioctl to the vgem to allow
> exporting and creating fences from sync-file for testing passing
> explicit userspaces around between drivers, and realised that I was just
> writing a generic mechanism that only required dma-buf.
>
> Whilst this interface could be used for lazily creating explicit fences,
> drivers will also likely want to support specific ioctl to skip the
> dmabuf creation, I think this interfaces will be useful with the vgem
> fences for testing sync_file handling in drivers (on i915 at the moment,
> my tests for sync_file involve sleeping and a few white lies). So
> fulfilling a similar role in driver testing to debugfs/sw_sync?
> (sw_sync is still more apt for testing timelines etc, vgem feels more
> apt for ease of testing rendering.)
> -Chris
>
> ---
>   drivers/dma-buf/dma-buf.c    | 100 +++++++++++++++++++++++++++++++++++++++++++
>   include/uapi/linux/dma-buf.h |   9 ++++
>   2 files changed, 109 insertions(+)
>
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 41fbce0c273a..6f066a8affd7 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -26,11 +26,13 @@
>   #include <linux/slab.h>
>   #include <linux/dma-buf.h>
>   #include <linux/fence.h>
> +#include <linux/fence-array.h>
>   #include <linux/anon_inodes.h>
>   #include <linux/export.h>
>   #include <linux/debugfs.h>
>   #include <linux/module.h>
>   #include <linux/seq_file.h>
> +#include <linux/sync_file.h>
>   #include <linux/poll.h>
>   #include <linux/reservation.h>
>   #include <linux/mm.h>
> @@ -254,6 +256,97 @@ out:
>   	return events;
>   }
>   
> +static long dma_buf_import_fence_ioctl(struct dma_buf *dmabuf,
> +				       struct dma_buf_fence  __user *arg)
> +{
> +	struct reservation_object *resv = dmabuf->resv;
> +	struct dma_buf_fence cmd;
> +	struct fence *fence;
> +	int ret;
> +
> +	if (copy_from_user(&cmd, arg, sizeof(cmd)))
> +		return -EFAULT;
> +
> +	fence = NULL;
> +	//fence = sync_file_get_fence(cmd.fd);
> +	if (!fence)
> +		return -EINVAL;
> +
> +	mutex_lock(&resv->lock.base);
> +	if (cmd.flags & DMA_BUF_FENCE_WRITE)
> +		reservation_object_add_excl_fence(resv, fence);
> +	else if ((ret = reservation_object_reserve_shared(resv)) == 0)
> +		reservation_object_add_shared_fence(resv, fence);
> +	mutex_unlock(&resv->lock.base);
> +
> +	fence_put(fence);
> +	return ret;
> +}
> +
> +static long dma_buf_export_fence_ioctl(struct dma_buf *dmabuf,
> +				       struct dma_buf_fence  __user *arg)
> +{
> +	struct reservation_object *resv = dmabuf->resv;
> +	struct dma_buf_fence cmd;
> +	struct fence *excl, **shared;
> +	struct sync_file *sync = NULL;
> +	unsigned int count, n;
> +	int ret;
> +
> +	if (get_user(cmd.flags, &arg->flags))
> +		return -EFAULT;
> +
> +	ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared);
> +	if (ret)
> +		return ret;
> +
> +	if (cmd.flags & DMA_BUF_FENCE_WRITE) {
> +		if (excl) {
> +			sync = sync_file_create(excl);
> +			if (!sync) {
> +				ret = -ENOMEM;
> +				fence_put(excl);
> +			}
> +		}
> +		for (n = 0; n < count; n++)
> +			fence_put(shared[n]);
> +		kfree(shared);
> +	} else {
> +		if (count) {
> +			struct fence_array *array;
> +
> +			array = fence_array_create(count, shared, 0, 0, false);
> +			if (!array) {
> +				for (n = 0; n < count; n++)
> +					fence_put(shared[n]);
> +				kfree(shared);
> +			} else
> +				sync = sync_file_create(&array->base);
> +			if (!sync) {
> +				ret = -ENOMEM;
> +				fence_put(&array->base);
> +			}
> +		}
> +		fence_put(excl);
> +	}
> +	if (ret)
> +		return ret;
> +
> +	cmd.fd = get_unused_fd_flags(O_CLOEXEC);
> +	if (cmd.fd < 0) {
> +		fput(sync->file);
> +		return cmd.fd;
> +	}
> +
> +	if (put_user(cmd.fd, &arg->fd)) {
> +		fput(sync->file);
> +		return -EFAULT;
> +	}
> +
> +	fd_install(cmd.fd, sync->file);
> +	return 0;
> +}
> +
>   static long dma_buf_ioctl(struct file *file,
>   			  unsigned int cmd, unsigned long arg)
>   {
> @@ -292,6 +385,13 @@ static long dma_buf_ioctl(struct file *file,
>   			ret = dma_buf_begin_cpu_access(dmabuf, direction);
>   
>   		return ret;
> +
> +	case DMA_BUF_IOCTL_IMPORT_FENCE:
> +		return dma_buf_import_fence_ioctl(dmabuf, (void __user *)arg);
> +
> +	case DMA_BUF_IOCTL_EXPORT_FENCE:
> +		return dma_buf_export_fence_ioctl(dmabuf, (void __user *)arg);
> +
>   	default:
>   		return -ENOTTY;
>   	}
> diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
> index fb0dedb7c121..8d9a0d73ebaa 100644
> --- a/include/uapi/linux/dma-buf.h
> +++ b/include/uapi/linux/dma-buf.h
> @@ -37,4 +37,13 @@ struct dma_buf_sync {
>   #define DMA_BUF_BASE		'b'
>   #define DMA_BUF_IOCTL_SYNC	_IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
>   
> +struct dma_buf_fence {
> +	__s32 fd;
> +	__u32 flags;
> +#define DMA_BUF_FENCE_WRITE	(1 << 0)
> +};
> +
> +#define DMA_BUF_IOCTL_IMPORT_FENCE _IOW(DMA_BUF_BASE, 1, struct dma_buf_fence)
> +#define DMA_BUF_IOCTL_EXPORT_FENCE _IOWR(DMA_BUF_BASE, 2, struct dma_buf_fence)
> +
>   #endif
Chris Wilson July 12, 2016, 12:14 p.m. UTC | #2
On Tue, Jul 12, 2016 at 01:53:56PM +0200, Christian König wrote:
> Am 11.07.2016 um 23:59 schrieb Chris Wilson:
> >When dealing with user interfaces that utilize explicit fences, it is
> >convenient to sometimes create those fences after the fact, i.e. to
> >query the dma-buf for the current set of implicit fences, encapsulate
> >those into a sync_file and hand that fd back to userspace.
> >Correspondingly, being able to add an explicit fence back into the mix
> >of fences being tracked by the dma-buf allows that userspace fence to be
> >included in any implicit tracking.
> 
> Well I think that this is a rather questionable interface.
> 
> For example how do you deal with race conditions? E.g. between
> querying the fences from the reservation object and adding a new one
> we could gain new fences because of the kernel swapping things out
> or another thread making some submission with this buffer.
> 
> Additional to that IIRC reservation_object_add_excl_fence()
> currently replaces all shared fences with the exclusive one. A
> malicious application could use this to trick the kernel driver into
> thinking that this buffer object is idle while it is still accessed
> by some operation. At least with GPU operations you can easily take
> over the system when you manage to get access to a page table with
> this.

The only difference here is that we believe the GPU drivers to enforce
the ordering between each other. So we can either insert a wait before
adding the exclusive fence, or we can just not expose an import ioctl.
Extracting the set of fences isn't an issue? (That's the part that has a
more legitimate usecase.)
-Chris
Daniel Vetter July 12, 2016, 2:31 p.m. UTC | #3
On Tue, Jul 12, 2016 at 01:14:41PM +0100, Chris Wilson wrote:
> On Tue, Jul 12, 2016 at 01:53:56PM +0200, Christian König wrote:
> > Am 11.07.2016 um 23:59 schrieb Chris Wilson:
> > >When dealing with user interfaces that utilize explicit fences, it is
> > >convenient to sometimes create those fences after the fact, i.e. to
> > >query the dma-buf for the current set of implicit fences, encapsulate
> > >those into a sync_file and hand that fd back to userspace.
> > >Correspondingly, being able to add an explicit fence back into the mix
> > >of fences being tracked by the dma-buf allows that userspace fence to be
> > >included in any implicit tracking.
> > 
> > Well I think that this is a rather questionable interface.
> > 
> > For example how do you deal with race conditions? E.g. between
> > querying the fences from the reservation object and adding a new one
> > we could gain new fences because of the kernel swapping things out
> > or another thread making some submission with this buffer.
> > 
> > Additional to that IIRC reservation_object_add_excl_fence()
> > currently replaces all shared fences with the exclusive one. A
> > malicious application could use this to trick the kernel driver into
> > thinking that this buffer object is idle while it is still accessed
> > by some operation. At least with GPU operations you can easily take
> > over the system when you manage to get access to a page table with
> > this.
> 
> The only difference here is that we believe the GPU drivers to enforce
> the ordering between each other. So we can either insert a wait before
> adding the exclusive fence, or we can just not expose an import ioctl.
> Extracting the set of fences isn't an issue? (That's the part that has a
> more legitimate usecase.)

If we change the kernel to just merge everything together when importing a
new fence I think it should be perfectly save. I.e.
1) grab reservation lock
2) assemble a fence_array with the current fences + the new one passed in
through sync_file.
3) put that into the right slot
4) unlock

If we switch reservations over to fence_array it might even be somewhat
pretty.
-Daniel
Christian König July 12, 2016, 3:36 p.m. UTC | #4
Am 12.07.2016 um 16:31 schrieb Daniel Vetter:
> On Tue, Jul 12, 2016 at 01:14:41PM +0100, Chris Wilson wrote:
>> On Tue, Jul 12, 2016 at 01:53:56PM +0200, Christian König wrote:
>>> Am 11.07.2016 um 23:59 schrieb Chris Wilson:
>>>> When dealing with user interfaces that utilize explicit fences, it is
>>>> convenient to sometimes create those fences after the fact, i.e. to
>>>> query the dma-buf for the current set of implicit fences, encapsulate
>>>> those into a sync_file and hand that fd back to userspace.
>>>> Correspondingly, being able to add an explicit fence back into the mix
>>>> of fences being tracked by the dma-buf allows that userspace fence to be
>>>> included in any implicit tracking.
>>> Well I think that this is a rather questionable interface.
>>>
>>> For example how do you deal with race conditions? E.g. between
>>> querying the fences from the reservation object and adding a new one
>>> we could gain new fences because of the kernel swapping things out
>>> or another thread making some submission with this buffer.
>>>
>>> Additional to that IIRC reservation_object_add_excl_fence()
>>> currently replaces all shared fences with the exclusive one. A
>>> malicious application could use this to trick the kernel driver into
>>> thinking that this buffer object is idle while it is still accessed
>>> by some operation. At least with GPU operations you can easily take
>>> over the system when you manage to get access to a page table with
>>> this.
>> The only difference here is that we believe the GPU drivers to enforce
>> the ordering between each other. So we can either insert a wait before
>> adding the exclusive fence, or we can just not expose an import ioctl.
>> Extracting the set of fences isn't an issue? (That's the part that has a
>> more legitimate usecase.)
> If we change the kernel to just merge everything together when importing a
> new fence I think it should be perfectly save. I.e.
> 1) grab reservation lock
> 2) assemble a fence_array with the current fences + the new one passed in
> through sync_file.
> 3) put that into the right slot
> 4) unlock

Yeah that sounds like this should work.

> If we switch reservations over to fence_array it might even be somewhat
> pretty.

Actually I rather wanted to suggest that we use something like the 
amdgpu_sync object for the reservation object instead.

E.g. a collection like interface where fences can be added later on and 
only the newest one is kept around for each context.

The only problem with that approach is that it is a bit tricky to do 
without locking, e.g. only RCU.

Christian.

> -Daniel
Daniel Vetter July 12, 2016, 4:16 p.m. UTC | #5
On Tue, Jul 12, 2016 at 05:36:37PM +0200, Christian König wrote:
> Am 12.07.2016 um 16:31 schrieb Daniel Vetter:
> > On Tue, Jul 12, 2016 at 01:14:41PM +0100, Chris Wilson wrote:
> > > On Tue, Jul 12, 2016 at 01:53:56PM +0200, Christian König wrote:
> > > > Am 11.07.2016 um 23:59 schrieb Chris Wilson:
> > > > > When dealing with user interfaces that utilize explicit fences, it is
> > > > > convenient to sometimes create those fences after the fact, i.e. to
> > > > > query the dma-buf for the current set of implicit fences, encapsulate
> > > > > those into a sync_file and hand that fd back to userspace.
> > > > > Correspondingly, being able to add an explicit fence back into the mix
> > > > > of fences being tracked by the dma-buf allows that userspace fence to be
> > > > > included in any implicit tracking.
> > > > Well I think that this is a rather questionable interface.
> > > > 
> > > > For example how do you deal with race conditions? E.g. between
> > > > querying the fences from the reservation object and adding a new one
> > > > we could gain new fences because of the kernel swapping things out
> > > > or another thread making some submission with this buffer.
> > > > 
> > > > Additional to that IIRC reservation_object_add_excl_fence()
> > > > currently replaces all shared fences with the exclusive one. A
> > > > malicious application could use this to trick the kernel driver into
> > > > thinking that this buffer object is idle while it is still accessed
> > > > by some operation. At least with GPU operations you can easily take
> > > > over the system when you manage to get access to a page table with
> > > > this.
> > > The only difference here is that we believe the GPU drivers to enforce
> > > the ordering between each other. So we can either insert a wait before
> > > adding the exclusive fence, or we can just not expose an import ioctl.
> > > Extracting the set of fences isn't an issue? (That's the part that has a
> > > more legitimate usecase.)
> > If we change the kernel to just merge everything together when importing a
> > new fence I think it should be perfectly save. I.e.
> > 1) grab reservation lock
> > 2) assemble a fence_array with the current fences + the new one passed in
> > through sync_file.
> > 3) put that into the right slot
> > 4) unlock
> 
> Yeah that sounds like this should work.
> 
> > If we switch reservations over to fence_array it might even be somewhat
> > pretty.
> 
> Actually I rather wanted to suggest that we use something like the
> amdgpu_sync object for the reservation object instead.
> 
> E.g. a collection like interface where fences can be added later on and only
> the newest one is kept around for each context.

Yeah, imo fence_array should automatically reduce the set of fences to the
minimal set needed and merge all the ones on the same timeline. And we
could rebase reservations on top of fence-array.

Or at least something along those lines.

> The only problem with that approach is that it is a bit tricky to do without
> locking, e.g. only RCU.

I thought RCU is only for reading/waiting on fences, and that for changing
them you always need to acquire the ww mutex behind the reservation?

Cheers, Daniel
Inki Dae July 14, 2016, 3:05 p.m. UTC | #6
2016-07-12 6:59 GMT+09:00 Chris Wilson <chris@chris-wilson.co.uk>:
> When dealing with user interfaces that utilize explicit fences, it is
> convenient to sometimes create those fences after the fact, i.e. to
> query the dma-buf for the current set of implicit fences, encapsulate
> those into a sync_file and hand that fd back to userspace.
> Correspondingly, being able to add an explicit fence back into the mix
> of fences being tracked by the dma-buf allows that userspace fence to be
> included in any implicit tracking.

Regarding import fence, explicit fence can be corresponded to one of
sub-allocated blocks of a DMA buffer. What if user-space requested
import fence with a fence corresponding to a sub-allocated block?
As you know, implicit fence is always corresponded to one DMA buffer
not sub block of it. I guess there may be other things you should
consider for import fence interface.

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
> Cc: Rob Clark <robdclark@gmail.com>
> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: dri-devel@lists.freedesktop.org
> ---
>
> Gustavo, could you look at the sync-file/fence-array handling? There's
> definitely room for a cleaner sync_file_create() API.
>
> I was thinking about this for the "why not sync-file" question Gustavo
> posed for the vgem_fences. I wanted to add an ioctl to the vgem to allow
> exporting and creating fences from sync-file for testing passing
> explicit userspaces around between drivers, and realised that I was just
> writing a generic mechanism that only required dma-buf.
>
> Whilst this interface could be used for lazily creating explicit fences,
> drivers will also likely want to support specific ioctl to skip the
> dmabuf creation, I think this interfaces will be useful with the vgem
> fences for testing sync_file handling in drivers (on i915 at the moment,
> my tests for sync_file involve sleeping and a few white lies). So
> fulfilling a similar role in driver testing to debugfs/sw_sync?
> (sw_sync is still more apt for testing timelines etc, vgem feels more
> apt for ease of testing rendering.)
> -Chris
>
> ---
>  drivers/dma-buf/dma-buf.c    | 100 +++++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/dma-buf.h |   9 ++++
>  2 files changed, 109 insertions(+)
>
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 41fbce0c273a..6f066a8affd7 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -26,11 +26,13 @@
>  #include <linux/slab.h>
>  #include <linux/dma-buf.h>
>  #include <linux/fence.h>
> +#include <linux/fence-array.h>
>  #include <linux/anon_inodes.h>
>  #include <linux/export.h>
>  #include <linux/debugfs.h>
>  #include <linux/module.h>
>  #include <linux/seq_file.h>
> +#include <linux/sync_file.h>
>  #include <linux/poll.h>
>  #include <linux/reservation.h>
>  #include <linux/mm.h>
> @@ -254,6 +256,97 @@ out:
>         return events;
>  }
>
> +static long dma_buf_import_fence_ioctl(struct dma_buf *dmabuf,
> +                                      struct dma_buf_fence  __user *arg)
> +{
> +       struct reservation_object *resv = dmabuf->resv;
> +       struct dma_buf_fence cmd;
> +       struct fence *fence;
> +       int ret;
> +
> +       if (copy_from_user(&cmd, arg, sizeof(cmd)))
> +               return -EFAULT;
> +
> +       fence = NULL;
> +       //fence = sync_file_get_fence(cmd.fd);

Your mistake?

Thanks,
Inki Dae

> +       if (!fence)
> +               return -EINVAL;
> +
diff mbox

Patch

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 41fbce0c273a..6f066a8affd7 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -26,11 +26,13 @@ 
 #include <linux/slab.h>
 #include <linux/dma-buf.h>
 #include <linux/fence.h>
+#include <linux/fence-array.h>
 #include <linux/anon_inodes.h>
 #include <linux/export.h>
 #include <linux/debugfs.h>
 #include <linux/module.h>
 #include <linux/seq_file.h>
+#include <linux/sync_file.h>
 #include <linux/poll.h>
 #include <linux/reservation.h>
 #include <linux/mm.h>
@@ -254,6 +256,97 @@  out:
 	return events;
 }
 
+static long dma_buf_import_fence_ioctl(struct dma_buf *dmabuf,
+				       struct dma_buf_fence  __user *arg)
+{
+	struct reservation_object *resv = dmabuf->resv;
+	struct dma_buf_fence cmd;
+	struct fence *fence;
+	int ret;
+
+	if (copy_from_user(&cmd, arg, sizeof(cmd)))
+		return -EFAULT;
+
+	fence = NULL;
+	//fence = sync_file_get_fence(cmd.fd);
+	if (!fence)
+		return -EINVAL;
+
+	mutex_lock(&resv->lock.base);
+	if (cmd.flags & DMA_BUF_FENCE_WRITE)
+		reservation_object_add_excl_fence(resv, fence);
+	else if ((ret = reservation_object_reserve_shared(resv)) == 0)
+		reservation_object_add_shared_fence(resv, fence);
+	mutex_unlock(&resv->lock.base);
+
+	fence_put(fence);
+	return ret;
+}
+
+static long dma_buf_export_fence_ioctl(struct dma_buf *dmabuf,
+				       struct dma_buf_fence  __user *arg)
+{
+	struct reservation_object *resv = dmabuf->resv;
+	struct dma_buf_fence cmd;
+	struct fence *excl, **shared;
+	struct sync_file *sync = NULL;
+	unsigned int count, n;
+	int ret;
+
+	if (get_user(cmd.flags, &arg->flags))
+		return -EFAULT;
+
+	ret = reservation_object_get_fences_rcu(resv, &excl, &count, &shared);
+	if (ret)
+		return ret;
+
+	if (cmd.flags & DMA_BUF_FENCE_WRITE) {
+		if (excl) {
+			sync = sync_file_create(excl);
+			if (!sync) {
+				ret = -ENOMEM;
+				fence_put(excl);
+			}
+		}
+		for (n = 0; n < count; n++)
+			fence_put(shared[n]);
+		kfree(shared);
+	} else {
+		if (count) {
+			struct fence_array *array;
+
+			array = fence_array_create(count, shared, 0, 0, false);
+			if (!array) {
+				for (n = 0; n < count; n++)
+					fence_put(shared[n]);
+				kfree(shared);
+			} else
+				sync = sync_file_create(&array->base);
+			if (!sync) {
+				ret = -ENOMEM;
+				fence_put(&array->base);
+			}
+		}
+		fence_put(excl);
+	}
+	if (ret)
+		return ret;
+
+	cmd.fd = get_unused_fd_flags(O_CLOEXEC);
+	if (cmd.fd < 0) {
+		fput(sync->file);
+		return cmd.fd;
+	}
+
+	if (put_user(cmd.fd, &arg->fd)) {
+		fput(sync->file);
+		return -EFAULT;
+	}
+
+	fd_install(cmd.fd, sync->file);
+	return 0;
+}
+
 static long dma_buf_ioctl(struct file *file,
 			  unsigned int cmd, unsigned long arg)
 {
@@ -292,6 +385,13 @@  static long dma_buf_ioctl(struct file *file,
 			ret = dma_buf_begin_cpu_access(dmabuf, direction);
 
 		return ret;
+
+	case DMA_BUF_IOCTL_IMPORT_FENCE:
+		return dma_buf_import_fence_ioctl(dmabuf, (void __user *)arg);
+
+	case DMA_BUF_IOCTL_EXPORT_FENCE:
+		return dma_buf_export_fence_ioctl(dmabuf, (void __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
diff --git a/include/uapi/linux/dma-buf.h b/include/uapi/linux/dma-buf.h
index fb0dedb7c121..8d9a0d73ebaa 100644
--- a/include/uapi/linux/dma-buf.h
+++ b/include/uapi/linux/dma-buf.h
@@ -37,4 +37,13 @@  struct dma_buf_sync {
 #define DMA_BUF_BASE		'b'
 #define DMA_BUF_IOCTL_SYNC	_IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
 
+struct dma_buf_fence {
+	__s32 fd;
+	__u32 flags;
+#define DMA_BUF_FENCE_WRITE	(1 << 0)
+};
+
+#define DMA_BUF_IOCTL_IMPORT_FENCE _IOW(DMA_BUF_BASE, 1, struct dma_buf_fence)
+#define DMA_BUF_IOCTL_EXPORT_FENCE _IOWR(DMA_BUF_BASE, 2, struct dma_buf_fence)
+
 #endif