diff mbox

[RFC,v3] drm/i915: Android native sync support

Message ID 1422358176-27158-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin Jan. 27, 2015, 11:29 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Add Android native sync support with fences exported as file descriptors via
the execbuf ioctl (rsvd2 field is used).

This is a continuation of Jesse Barnes's previous work, squashed to arrive at
the final destination, cleaned up, with some fixes and preliminary light
testing.

GEM requests are extended with fence structures which are associated with
Android sync fences exported to user space via file descriptors. Fences which
are waited upon, and while exported to userspace, are referenced and added to
the irq_queue so they are signalled when requests are completed. There is no
overhead apart from the where fences are not requested.

Based on patches by Jesse Barnes:
   drm/i915: Android sync points for i915 v3
   drm/i915: add fences to the request struct
   drm/i915: sync fence fixes/updates

To do:
   * Extend driver data with context id / ring id (TBD).

v2:
   * Code review comments. (Chris Wilson)
   * ring->add_request() was a wrong thing to call - rebase on top of John
     Harrison's (drm/i915: Early alloc request) to ensure correct request is
     present before creating a fence.
   * Take a request reference from signalling path as well to ensure request
     sticks around while fence is on the request completion wait queue.

v3:
   * Use worker to unreference on completion so it can lock the mutex from
     interrupt context.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/Kconfig               |  14 ++
 drivers/gpu/drm/i915/Makefile              |   1 +
 drivers/gpu/drm/i915/i915_drv.h            |  27 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  24 +++
 drivers/gpu/drm/i915/i915_sync.c           | 254 +++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h                |   8 +-
 6 files changed, 325 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_sync.c

Comments

Chris Wilson Jan. 27, 2015, 11:40 a.m. UTC | #1
On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
> +static void i915_gem_request_unreference_worker(struct work_struct *work)
> +{
> +	struct drm_i915_gem_request *req =
> +		container_of(work, struct drm_i915_gem_request, unref_work);
> +	struct drm_device *dev = req->ring->dev;
> +
> +	mutex_lock(&dev->struct_mutex);
> +	i915_gem_request_unreference(req);
> +	mutex_unlock(&dev->struct_mutex);
> +}
> +
> +static int
> +i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
> +{
> +	struct drm_i915_gem_request *req = wait->private;
> +	struct intel_engine_cs *ring = req->ring;
> +
> +	if (!i915_gem_request_completed(req, false))
> +		return 0;
> +
> +	fence_signal_locked(&req->fence);
> +
> +	__remove_wait_queue(&ring->irq_queue, wait);
> +	ring->irq_put(ring);
> +
> +	INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker);
> +	schedule_work(&req->unref_work);
> +
> +	return 0;
> +}
> +
> +static bool i915_fence_ring_enable_signaling(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct intel_engine_cs *ring = req->ring;
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	wait_queue_t *wait = &req->wait;
> +
> +	/* queue fence wait queue on irq queue and get fence */
> +	if (i915_gem_request_completed(req, false) ||
> +	    i915_terminally_wedged(&dev_priv->gpu_error))
> +		return false;
> +
> +	if (!ring->irq_get(ring))
> +		return false;
> +
> +	if (i915_gem_request_completed(req, false)) {
> +		ring->irq_put(ring);
> +		return true;
> +	}
> +
> +	wait->flags = 0;
> +	wait->private = req;
> +	wait->func = i915_fence_ring_check;
> +
> +	i915_gem_request_reference(req);
> +
> +	__add_wait_queue(&ring->irq_queue, wait);
> +
> +	return true;
> +}

Explain how fence_release interacts with enable_signalling. Presumably
either the core fence routines cleanup the outstanding signalling, or we
are expected to. Doing so will remove the requirement for the extra
ref/unref (including the worker).
-Chris
Tvrtko Ursulin Jan. 27, 2015, 12:13 p.m. UTC | #2
On 01/27/2015 11:40 AM, Chris Wilson wrote:
> On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
>> +static void i915_gem_request_unreference_worker(struct work_struct *work)
>> +{
>> +	struct drm_i915_gem_request *req =
>> +		container_of(work, struct drm_i915_gem_request, unref_work);
>> +	struct drm_device *dev = req->ring->dev;
>> +
>> +	mutex_lock(&dev->struct_mutex);
>> +	i915_gem_request_unreference(req);
>> +	mutex_unlock(&dev->struct_mutex);
>> +}
>> +
>> +static int
>> +i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
>> +{
>> +	struct drm_i915_gem_request *req = wait->private;
>> +	struct intel_engine_cs *ring = req->ring;
>> +
>> +	if (!i915_gem_request_completed(req, false))
>> +		return 0;
>> +
>> +	fence_signal_locked(&req->fence);
>> +
>> +	__remove_wait_queue(&ring->irq_queue, wait);
>> +	ring->irq_put(ring);
>> +
>> +	INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker);
>> +	schedule_work(&req->unref_work);
>> +
>> +	return 0;
>> +}
>> +
>> +static bool i915_fence_ring_enable_signaling(struct fence *fence)
>> +{
>> +	struct drm_i915_gem_request *req = to_i915_request(fence);
>> +	struct intel_engine_cs *ring = req->ring;
>> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>> +	wait_queue_t *wait = &req->wait;
>> +
>> +	/* queue fence wait queue on irq queue and get fence */
>> +	if (i915_gem_request_completed(req, false) ||
>> +	    i915_terminally_wedged(&dev_priv->gpu_error))
>> +		return false;
>> +
>> +	if (!ring->irq_get(ring))
>> +		return false;
>> +
>> +	if (i915_gem_request_completed(req, false)) {
>> +		ring->irq_put(ring);
>> +		return true;
>> +	}
>> +
>> +	wait->flags = 0;
>> +	wait->private = req;
>> +	wait->func = i915_fence_ring_check;
>> +
>> +	i915_gem_request_reference(req);
>> +
>> +	__add_wait_queue(&ring->irq_queue, wait);
>> +
>> +	return true;
>> +}
>
> Explain how fence_release interacts with enable_signalling. Presumably
> either the core fence routines cleanup the outstanding signalling, or we
> are expected to. Doing so will remove the requirement for the extra
> ref/unref (including the worker).

I think normally we would be expected to use fence_get/put on the 
signaling side of things but that is not possible here since struct 
fence is embedded in the request.

So as it is, sync_fence_release will tear everything down with our 
callback still on the irq_queue which is what taking a request reference 
sorts out.

Regards,

Tvrtko
Chris Wilson Jan. 27, 2015, 12:18 p.m. UTC | #3
On Tue, Jan 27, 2015 at 12:13:14PM +0000, Tvrtko Ursulin wrote:
> On 01/27/2015 11:40 AM, Chris Wilson wrote:
> >On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
> >>+static void i915_gem_request_unreference_worker(struct work_struct *work)
> >>+{
> >>+	struct drm_i915_gem_request *req =
> >>+		container_of(work, struct drm_i915_gem_request, unref_work);
> >>+	struct drm_device *dev = req->ring->dev;
> >>+
> >>+	mutex_lock(&dev->struct_mutex);
> >>+	i915_gem_request_unreference(req);
> >>+	mutex_unlock(&dev->struct_mutex);
> >>+}
> >>+
> >>+static int
> >>+i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
> >>+{
> >>+	struct drm_i915_gem_request *req = wait->private;
> >>+	struct intel_engine_cs *ring = req->ring;
> >>+
> >>+	if (!i915_gem_request_completed(req, false))
> >>+		return 0;
> >>+
> >>+	fence_signal_locked(&req->fence);
> >>+
> >>+	__remove_wait_queue(&ring->irq_queue, wait);
> >>+	ring->irq_put(ring);
> >>+
> >>+	INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker);
> >>+	schedule_work(&req->unref_work);
> >>+
> >>+	return 0;
> >>+}
> >>+
> >>+static bool i915_fence_ring_enable_signaling(struct fence *fence)
> >>+{
> >>+	struct drm_i915_gem_request *req = to_i915_request(fence);
> >>+	struct intel_engine_cs *ring = req->ring;
> >>+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> >>+	wait_queue_t *wait = &req->wait;
> >>+
> >>+	/* queue fence wait queue on irq queue and get fence */
> >>+	if (i915_gem_request_completed(req, false) ||
> >>+	    i915_terminally_wedged(&dev_priv->gpu_error))
> >>+		return false;
> >>+
> >>+	if (!ring->irq_get(ring))
> >>+		return false;
> >>+
> >>+	if (i915_gem_request_completed(req, false)) {
> >>+		ring->irq_put(ring);
> >>+		return true;
> >>+	}
> >>+
> >>+	wait->flags = 0;
> >>+	wait->private = req;
> >>+	wait->func = i915_fence_ring_check;
> >>+
> >>+	i915_gem_request_reference(req);
> >>+
> >>+	__add_wait_queue(&ring->irq_queue, wait);
> >>+
> >>+	return true;
> >>+}
> >
> >Explain how fence_release interacts with enable_signalling. Presumably
> >either the core fence routines cleanup the outstanding signalling, or we
> >are expected to. Doing so will remove the requirement for the extra
> >ref/unref (including the worker).
> 
> I think normally we would be expected to use fence_get/put on the
> signaling side of things but that is not possible here since struct
> fence is embedded in the request.
> 
> So as it is, sync_fence_release will tear everything down with our
> callback still on the irq_queue which is what taking a request
> reference sorts out.

So you are saying that we have a callback for when the fence is
discarded by userspace and we ignore that opportunity for disabling
interrupt generation, and remove the extra request references?
-Chris
Tvrtko Ursulin Jan. 27, 2015, 1:43 p.m. UTC | #4
On 01/27/2015 12:18 PM, Chris Wilson wrote:
> On Tue, Jan 27, 2015 at 12:13:14PM +0000, Tvrtko Ursulin wrote:
>> On 01/27/2015 11:40 AM, Chris Wilson wrote:

[snip]

>>> Explain how fence_release interacts with enable_signalling. Presumably
>>> either the core fence routines cleanup the outstanding signalling, or we
>>> are expected to. Doing so will remove the requirement for the extra
>>> ref/unref (including the worker).
>>
>> I think normally we would be expected to use fence_get/put on the
>> signaling side of things but that is not possible here since struct
>> fence is embedded in the request.
>>
>> So as it is, sync_fence_release will tear everything down with our
>> callback still on the irq_queue which is what taking a request
>> reference sorts out.
>
> So you are saying that we have a callback for when the fence is
> discarded by userspace and we ignore that opportunity for disabling
> interrupt generation, and remove the extra request references?

I can change it to work like that sure, don't see anything obvious 
preventing this rework. Can use the fence lock to avoid wait queue 
removal race and that should be pretty much it.

On a related not, do you have any ideas for submitting a batch which can 
either keep the GPU busy for a configurable time, or until signaled by 
something else (possibly another batch). Gen agnostic ideally? :)

Regards,

Tvrtko
Daniel Vetter Jan. 28, 2015, 9:25 a.m. UTC | #5
On Tue, Jan 27, 2015 at 01:43:02PM +0000, Tvrtko Ursulin wrote:
> 
> On 01/27/2015 12:18 PM, Chris Wilson wrote:
> >On Tue, Jan 27, 2015 at 12:13:14PM +0000, Tvrtko Ursulin wrote:
> >>On 01/27/2015 11:40 AM, Chris Wilson wrote:
> 
> [snip]
> 
> >>>Explain how fence_release interacts with enable_signalling. Presumably
> >>>either the core fence routines cleanup the outstanding signalling, or we
> >>>are expected to. Doing so will remove the requirement for the extra
> >>>ref/unref (including the worker).
> >>
> >>I think normally we would be expected to use fence_get/put on the
> >>signaling side of things but that is not possible here since struct
> >>fence is embedded in the request.
> >>
> >>So as it is, sync_fence_release will tear everything down with our
> >>callback still on the irq_queue which is what taking a request
> >>reference sorts out.
> >
> >So you are saying that we have a callback for when the fence is
> >discarded by userspace and we ignore that opportunity for disabling
> >interrupt generation, and remove the extra request references?
> 
> I can change it to work like that sure, don't see anything obvious
> preventing this rework. Can use the fence lock to avoid wait queue removal
> race and that should be pretty much it.
> 
> On a related not, do you have any ideas for submitting a batch which can
> either keep the GPU busy for a configurable time, or until signaled by
> something else (possibly another batch). Gen agnostic ideally? :)

self-tuning busy loads using the blitter/rendercopy. We have bazillion of
existing copypasta in igt tests for this, would be great if someone
extracts it into a real igt helper library and converts everyone over.

Iirc the one in kms_flip or gem_wait is probably the best starting point.
There's no way afaik to block a batch on an mbox or similar from
userspace.
-Daniel
Daniel Vetter Jan. 28, 2015, 9:29 a.m. UTC | #6
On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Add Android native sync support with fences exported as file descriptors via
> the execbuf ioctl (rsvd2 field is used).
> 
> This is a continuation of Jesse Barnes's previous work, squashed to arrive at
> the final destination, cleaned up, with some fixes and preliminary light
> testing.
> 
> GEM requests are extended with fence structures which are associated with
> Android sync fences exported to user space via file descriptors. Fences which
> are waited upon, and while exported to userspace, are referenced and added to
> the irq_queue so they are signalled when requests are completed. There is no
> overhead apart from the where fences are not requested.
> 
> Based on patches by Jesse Barnes:
>    drm/i915: Android sync points for i915 v3
>    drm/i915: add fences to the request struct
>    drm/i915: sync fence fixes/updates
> 
> To do:
>    * Extend driver data with context id / ring id (TBD).
> 
> v2:
>    * Code review comments. (Chris Wilson)
>    * ring->add_request() was a wrong thing to call - rebase on top of John
>      Harrison's (drm/i915: Early alloc request) to ensure correct request is
>      present before creating a fence.
>    * Take a request reference from signalling path as well to ensure request
>      sticks around while fence is on the request completion wait queue.
> 
> v3:
>    * Use worker to unreference on completion so it can lock the mutex from
>      interrupt context.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> Cc: John Harrison <John.C.Harrison@Intel.com>

btw for merging we need to split the conversion to fences out from the
actual userspace interface. And imo we should replace a lot of our
existing usage of i915_gem_request with fences within the driver, too. Not
tacked on the side like in your patch here. All the recent refactoring
have been aiming to match i915 requests to the internal fence interfaces,
so we should be pretty much there.

We also need this to be able to integrate with the scheduler properly - if
that needs to deal both with fences and i915 requests separately it'll be
a bit more messy. If it's all just fences the code should be streamlined a
lot.

Sorry for spotting this only just now, I've thought Jesse's old patches
had this already and you've carried that approach over.
-Daniel

> ---
>  drivers/gpu/drm/i915/Kconfig               |  14 ++
>  drivers/gpu/drm/i915/Makefile              |   1 +
>  drivers/gpu/drm/i915/i915_drv.h            |  27 +++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  24 +++
>  drivers/gpu/drm/i915/i915_sync.c           | 254 +++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h                |   8 +-
>  6 files changed, 325 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_sync.c
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 4e39ab3..abafe68 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -43,6 +43,20 @@ config DRM_I915_KMS
>  
>  	  If in doubt, say "Y".
>  
> +config DRM_I915_SYNC
> +	bool "Enable explicit sync support"
> +	depends on DRM_I915
> +	default y if STAGING
> +	select STAGING
> +	select ANDROID
> +	select SYNC
> +	help
> +	  Choose this option to enable Android native sync support and the
> +	  corresponding i915 driver code to expose it.  Slightly increases
> +	  driver size and pulls in sync support from staging.
> +
> +	  If in doubt, say "Y".
> +
>  config DRM_I915_FBDEV
>  	bool "Enable legacy fbdev support for the modesetting intel driver"
>  	depends on DRM_I915
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 16e3dc3..bcff481 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -58,6 +58,7 @@ i915-y += intel_audio.o \
>  	  intel_sprite.o
>  i915-$(CONFIG_ACPI)		+= intel_acpi.o intel_opregion.o
>  i915-$(CONFIG_DRM_I915_FBDEV)	+= intel_fbdev.o
> +i915-$(CONFIG_DRM_I915_SYNC)	+= i915_sync.o
>  
>  # modesetting output/encoder code
>  i915-y += dvo_ch7017.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 023f422..c4d254c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -49,6 +49,7 @@
>  #include <linux/intel-iommu.h>
>  #include <linux/kref.h>
>  #include <linux/pm_qos.h>
> +#include <linux/fence.h>
>  
>  /* General customization:
>   */
> @@ -2092,6 +2093,15 @@ struct drm_i915_gem_request {
>  	struct list_head client_list;
>  
>  	uint32_t uniq;
> +
> +#ifdef CONFIG_DRM_I915_SYNC
> +	/* core fence obj for this request, may be exported */
> +	struct fence fence;
> +
> +	wait_queue_t wait;
> +
> +	struct work_struct unref_work;
> +#endif
>  };
>  
>  void i915_gem_request_free(struct kref *req_ref);
> @@ -2583,6 +2593,23 @@ void i915_init_vm(struct drm_i915_private *dev_priv,
>  void i915_gem_free_object(struct drm_gem_object *obj);
>  void i915_gem_vma_destroy(struct i915_vma *vma);
>  
> +/* i915_sync.c */
> +struct sync_fence;
> +
> +#ifdef CONFIG_DRM_I915_SYNC
> +int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
> +				struct intel_context *ctx,
> +				struct sync_fence **sync_fence, int *fence_fd);
> +#else
> +static inline
> +int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
> +				struct intel_context *ctx,
> +				struct sync_fence **sync_fence, int *fence_fd)
> +{
> +	return -ENODEV;
> +}
> +#endif
> +
>  #define PIN_MAPPABLE 0x1
>  #define PIN_NONBLOCK 0x2
>  #define PIN_GLOBAL 0x4
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2a3d1a9..b04157a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -32,6 +32,7 @@
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  #include <linux/dma_remapping.h>
> +#include "../../../staging/android/sync.h"
>  
>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> @@ -1356,6 +1357,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	u32 flags;
>  	int ret;
>  	bool need_relocs;
> +	struct sync_fence *sync_fence = NULL;
> +	int fence_fd = -1;
>  
>  	if (!i915_gem_check_execbuffer(args))
>  		return -EINVAL;
> @@ -1509,8 +1512,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	if (ret)
>  		goto req_err;
>  
> +	if (args->flags & I915_EXEC_FENCE_OUT) {
> +		ret = i915_create_sync_fence_ring(ring, ctx,
> +						  &sync_fence, &fence_fd);
> +		if (ret)
> +			goto req_err;
> +	}
> +
>  	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
>  				      &eb->vmas, batch_obj, exec_start, flags);
> +	if (ret)
> +		goto exec_err;
> +
> +	if (sync_fence) {
> +		sync_fence_install(sync_fence, fence_fd);
> +		args->rsvd2 = fence_fd;
> +		sync_fence = NULL;
> +	}
> +
> +exec_err:
> +	if (sync_fence) {
> +		fput(sync_fence->file);
> +		put_unused_fd(fence_fd);
> +	}
>  
>  req_err:
>  	/*
> diff --git a/drivers/gpu/drm/i915/i915_sync.c b/drivers/gpu/drm/i915/i915_sync.c
> new file mode 100644
> index 0000000..77b7bc9
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_sync.c
> @@ -0,0 +1,254 @@
> +/*
> + * Copyright © 2013-2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Jesse Barnes <jbarnes@virtuousgeek.org>
> + *    Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> + *
> + */
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_vma_manager.h>
> +#include <drm/i915_drm.h>
> +#include "i915_drv.h"
> +#include "i915_trace.h"
> +#include "intel_drv.h"
> +#include <linux/oom.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/slab.h>
> +#include <linux/swap.h>
> +#include <linux/pci.h>
> +#include <linux/dma-buf.h>
> +#include "../../../staging/android/sync.h"
> +
> +static DEFINE_SPINLOCK(fence_lock);
> +
> +/*
> + * i915 Android native sync fences.
> + *
> + * We implement sync points in terms of i915 seqnos. They're exposed through
> + * the DRM_IOCTL_I915_GEM_EXECBUFFER2 ioctl, and can be mixed and matched
> + * with other Android timelines and aggregated into sync_fences, etc.
> + *
> + * TODO:
> + *   * Display engine fences.
> + *   * Extend driver data with context id / ring id.
> + */
> +
> +#define to_i915_request(x) container_of(x, struct drm_i915_gem_request, fence)
> +
> +static const char *i915_fence_get_driver_name(struct fence *fence)
> +{
> +	return "i915";
> +}
> +
> +static const char *i915_fence_ring_get_timeline_name(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +
> +	return req->ring->name;
> +}
> +
> +static void i915_gem_request_unreference_worker(struct work_struct *work)
> +{
> +	struct drm_i915_gem_request *req =
> +		container_of(work, struct drm_i915_gem_request, unref_work);
> +	struct drm_device *dev = req->ring->dev;
> +
> +	mutex_lock(&dev->struct_mutex);
> +	i915_gem_request_unreference(req);
> +	mutex_unlock(&dev->struct_mutex);
> +}
> +
> +static int
> +i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
> +{
> +	struct drm_i915_gem_request *req = wait->private;
> +	struct intel_engine_cs *ring = req->ring;
> +
> +	if (!i915_gem_request_completed(req, false))
> +		return 0;
> +
> +	fence_signal_locked(&req->fence);
> +
> +	__remove_wait_queue(&ring->irq_queue, wait);
> +	ring->irq_put(ring);
> +
> +	INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker);
> +	schedule_work(&req->unref_work);
> +
> +	return 0;
> +}
> +
> +static bool i915_fence_ring_enable_signaling(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct intel_engine_cs *ring = req->ring;
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	wait_queue_t *wait = &req->wait;
> +
> +	/* queue fence wait queue on irq queue and get fence */
> +	if (i915_gem_request_completed(req, false) ||
> +	    i915_terminally_wedged(&dev_priv->gpu_error))
> +		return false;
> +
> +	if (!ring->irq_get(ring))
> +		return false;
> +
> +	if (i915_gem_request_completed(req, false)) {
> +		ring->irq_put(ring);
> +		return true;
> +	}
> +
> +	wait->flags = 0;
> +	wait->private = req;
> +	wait->func = i915_fence_ring_check;
> +
> +	i915_gem_request_reference(req);
> +
> +	__add_wait_queue(&ring->irq_queue, wait);
> +
> +	return true;
> +}
> +
> +static bool i915_fence_ring_signaled(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +
> +	return i915_gem_request_completed(req, false);
> +}
> +
> +static signed long
> +i915_fence_ring_wait(struct fence *fence, bool intr, signed long timeout_js)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
> +	int ret;
> +	s64 timeout;
> +
> +	timeout = jiffies_to_nsecs(timeout_js);
> +
> +	ret = __i915_wait_request(req,
> +				  atomic_read(&dev_priv->gpu_error.reset_counter),
> +				  intr, &timeout, NULL);
> +	if (ret == -ETIME)
> +		return 0;
> +
> +	return ret;
> +}
> +
> +static int
> +i915_fence_ring_fill_driver_data(struct fence *fence, void *data, int size)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +
> +	if (size < sizeof(req->seqno))
> +		return -ENOMEM;
> +
> +	memcpy(data, &req->seqno,
> +	       sizeof(req->seqno));
> +
> +	return sizeof(req->seqno);
> +}
> +
> +static void i915_fence_ring_value_str(struct fence *fence, char *str, int size)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +
> +	snprintf(str, size, "%u", req->seqno);
> +}
> +
> +static void
> +i915_fence_ring_timeline_value_str(struct fence *fence, char *str, int size)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct intel_engine_cs *ring = req->ring;
> +
> +	snprintf(str, size, "%u", ring->get_seqno(ring, false));
> +}
> +
> +static void i915_fence_release(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct drm_device *dev = req->ring->dev;
> +
> +	mutex_lock(&dev->struct_mutex);
> +	i915_gem_request_unreference(req);
> +	mutex_unlock(&dev->struct_mutex);
> +}
> +
> +static struct fence_ops i915_fence_ring_ops = {
> +	.get_driver_name =	i915_fence_get_driver_name,
> +	.get_timeline_name =	i915_fence_ring_get_timeline_name,
> +	.enable_signaling =	i915_fence_ring_enable_signaling,
> +	.signaled =		i915_fence_ring_signaled,
> +	.wait =			i915_fence_ring_wait,
> +	.fill_driver_data =	i915_fence_ring_fill_driver_data,
> +	.fence_value_str =	i915_fence_ring_value_str,
> +	.timeline_value_str =	i915_fence_ring_timeline_value_str,
> +	.release =		i915_fence_release
> +};
> +
> +int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
> +				struct intel_context *ctx,
> +				struct sync_fence **sync_fence, int *fence_fd)
> +{
> +	struct drm_i915_gem_request *req;
> +	struct sync_fence *sfence;
> +	char ring_name[6] = "ring0";
> +	int fd;
> +
> +	if (!intel_ring_initialized(ring)) {
> +		DRM_DEBUG("Ring not initialized!\n");
> +		return -EAGAIN;
> +	}
> +
> +	req = ring->outstanding_lazy_request;
> +	if (WARN_ON_ONCE(!req))
> +		return -ECANCELED;
> +
> +	fd = get_unused_fd_flags(O_CLOEXEC);
> +	if (fd < 0) {
> +		DRM_DEBUG("No available file descriptor!\n");
> +		return fd;
> +	}
> +
> +	i915_gem_request_reference(req);
> +
> +	fence_init(&req->fence, &i915_fence_ring_ops, &fence_lock,
> +		   ctx->user_handle, req->seqno);
> +
> +	ring_name[4] += ring->id;
> +	sfence = sync_fence_create_dma(ring_name, &req->fence);
> +	if (!sfence)
> +		goto err;
> +
> +	*sync_fence = sfence;
> +	*fence_fd = fd;
> +
> +	return 0;
> +
> +err:
> +	i915_gem_request_unreference(req);
> +	put_unused_fd(fd);
> +	return -ENOMEM;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 2e559f6e..c197770 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -246,7 +246,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
>  #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
>  #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
> -#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
>  #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
>  #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
>  #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
> @@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
>  #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
>  	__u64 flags;
>  	__u64 rsvd1; /* now used for context info */
> -	__u64 rsvd2;
> +	__u64 rsvd2; /* now used for fence fd */
>  };
>  
>  /** Resets the SO write offset registers for transform feedback on gen7. */
> @@ -750,7 +750,9 @@ struct drm_i915_gem_execbuffer2 {
>   */
>  #define I915_EXEC_HANDLE_LUT		(1<<12)
>  
> -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
> +#define I915_EXEC_FENCE_OUT		(1<<13)
> +
> +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_FENCE_OUT<<1)
>  
>  #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
>  #define i915_execbuffer2_set_context_id(eb2, context) \
> -- 
> 2.2.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Tvrtko Ursulin Jan. 28, 2015, 4:52 p.m. UTC | #7
On 01/28/2015 09:29 AM, Daniel Vetter wrote:
> On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Add Android native sync support with fences exported as file descriptors via
>> the execbuf ioctl (rsvd2 field is used).
>>
>> This is a continuation of Jesse Barnes's previous work, squashed to arrive at
>> the final destination, cleaned up, with some fixes and preliminary light
>> testing.
>>
>> GEM requests are extended with fence structures which are associated with
>> Android sync fences exported to user space via file descriptors. Fences which
>> are waited upon, and while exported to userspace, are referenced and added to
>> the irq_queue so they are signalled when requests are completed. There is no
>> overhead apart from the where fences are not requested.
>>
>> Based on patches by Jesse Barnes:
>>     drm/i915: Android sync points for i915 v3
>>     drm/i915: add fences to the request struct
>>     drm/i915: sync fence fixes/updates
>>
>> To do:
>>     * Extend driver data with context id / ring id (TBD).
>>
>> v2:
>>     * Code review comments. (Chris Wilson)
>>     * ring->add_request() was a wrong thing to call - rebase on top of John
>>       Harrison's (drm/i915: Early alloc request) to ensure correct request is
>>       present before creating a fence.
>>     * Take a request reference from signalling path as well to ensure request
>>       sticks around while fence is on the request completion wait queue.
>>
>> v3:
>>     * Use worker to unreference on completion so it can lock the mutex from
>>       interrupt context.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
>> Cc: John Harrison <John.C.Harrison@Intel.com>
>
> btw for merging we need to split the conversion to fences out from the
> actual userspace interface. And imo we should replace a lot of our
> existing usage of i915_gem_request with fences within the driver, too. Not
> tacked on the side like in your patch here. All the recent refactoring
> have been aiming to match i915 requests to the internal fence interfaces,
> so we should be pretty much there.

Ok I did not realize this. It did not make sense to me to split it out 
if the only access point to create them is via Android native sync, but 
from what you are saying fences should be initialized and active for all 
requests all the time. With the native sync part established on demand.

In what respects has the refactoring been aligning requests and fences?

> We also need this to be able to integrate with the scheduler properly - if
> that needs to deal both with fences and i915 requests separately it'll be
> a bit more messy. If it's all just fences the code should be streamlined a
> lot.

Requests will remain as main data structure representing the unit of GPU 
work?

Is so, it sounds logical that fences are associated (or aggregated) with 
requests. I don't see how scheduler would work with fences and not requests.

Regards,

Tvrtko

P.S. Thanks for pointing out self-tuning busy loads in another thread - 
it works nicely.
Daniel Vetter Jan. 29, 2015, 4:14 p.m. UTC | #8
On Wed, Jan 28, 2015 at 04:52:53PM +0000, Tvrtko Ursulin wrote:
> 
> On 01/28/2015 09:29 AM, Daniel Vetter wrote:
> >On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
> >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >>Add Android native sync support with fences exported as file descriptors via
> >>the execbuf ioctl (rsvd2 field is used).
> >>
> >>This is a continuation of Jesse Barnes's previous work, squashed to arrive at
> >>the final destination, cleaned up, with some fixes and preliminary light
> >>testing.
> >>
> >>GEM requests are extended with fence structures which are associated with
> >>Android sync fences exported to user space via file descriptors. Fences which
> >>are waited upon, and while exported to userspace, are referenced and added to
> >>the irq_queue so they are signalled when requests are completed. There is no
> >>overhead apart from the where fences are not requested.
> >>
> >>Based on patches by Jesse Barnes:
> >>    drm/i915: Android sync points for i915 v3
> >>    drm/i915: add fences to the request struct
> >>    drm/i915: sync fence fixes/updates
> >>
> >>To do:
> >>    * Extend driver data with context id / ring id (TBD).
> >>
> >>v2:
> >>    * Code review comments. (Chris Wilson)
> >>    * ring->add_request() was a wrong thing to call - rebase on top of John
> >>      Harrison's (drm/i915: Early alloc request) to ensure correct request is
> >>      present before creating a fence.
> >>    * Take a request reference from signalling path as well to ensure request
> >>      sticks around while fence is on the request completion wait queue.
> >>
> >>v3:
> >>    * Use worker to unreference on completion so it can lock the mutex from
> >>      interrupt context.
> >>
> >>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> >>Cc: John Harrison <John.C.Harrison@Intel.com>
> >
> >btw for merging we need to split the conversion to fences out from the
> >actual userspace interface. And imo we should replace a lot of our
> >existing usage of i915_gem_request with fences within the driver, too. Not
> >tacked on the side like in your patch here. All the recent refactoring
> >have been aiming to match i915 requests to the internal fence interfaces,
> >so we should be pretty much there.
> 
> Ok I did not realize this. It did not make sense to me to split it out if
> the only access point to create them is via Android native sync, but from
> what you are saying fences should be initialized and active for all requests
> all the time. With the native sync part established on demand.
> 
> In what respects has the refactoring been aligning requests and fences?

requests are also refcounted, like fences. The actual interface for
waiting might still have a slight mismatch.

> >We also need this to be able to integrate with the scheduler properly - if
> >that needs to deal both with fences and i915 requests separately it'll be
> >a bit more messy. If it's all just fences the code should be streamlined a
> >lot.
> 
> Requests will remain as main data structure representing the unit of GPU
> work?

Yes. requests will be just a subclass of fences (through structure
embedding).

> Is so, it sounds logical that fences are associated (or aggregated) with
> requests. I don't see how scheduler would work with fences and not requests.

The important part is that the scheduler can work with fences which are
_not_ i915 requests (e.g. from the camera pipeline or similar things).
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 4e39ab3..abafe68 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -43,6 +43,20 @@  config DRM_I915_KMS
 
 	  If in doubt, say "Y".
 
+config DRM_I915_SYNC
+	bool "Enable explicit sync support"
+	depends on DRM_I915
+	default y if STAGING
+	select STAGING
+	select ANDROID
+	select SYNC
+	help
+	  Choose this option to enable Android native sync support and the
+	  corresponding i915 driver code to expose it.  Slightly increases
+	  driver size and pulls in sync support from staging.
+
+	  If in doubt, say "Y".
+
 config DRM_I915_FBDEV
 	bool "Enable legacy fbdev support for the modesetting intel driver"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 16e3dc3..bcff481 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -58,6 +58,7 @@  i915-y += intel_audio.o \
 	  intel_sprite.o
 i915-$(CONFIG_ACPI)		+= intel_acpi.o intel_opregion.o
 i915-$(CONFIG_DRM_I915_FBDEV)	+= intel_fbdev.o
+i915-$(CONFIG_DRM_I915_SYNC)	+= i915_sync.o
 
 # modesetting output/encoder code
 i915-y += dvo_ch7017.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 023f422..c4d254c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -49,6 +49,7 @@ 
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include <linux/fence.h>
 
 /* General customization:
  */
@@ -2092,6 +2093,15 @@  struct drm_i915_gem_request {
 	struct list_head client_list;
 
 	uint32_t uniq;
+
+#ifdef CONFIG_DRM_I915_SYNC
+	/* core fence obj for this request, may be exported */
+	struct fence fence;
+
+	wait_queue_t wait;
+
+	struct work_struct unref_work;
+#endif
 };
 
 void i915_gem_request_free(struct kref *req_ref);
@@ -2583,6 +2593,23 @@  void i915_init_vm(struct drm_i915_private *dev_priv,
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_vma_destroy(struct i915_vma *vma);
 
+/* i915_sync.c */
+struct sync_fence;
+
+#ifdef CONFIG_DRM_I915_SYNC
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd);
+#else
+static inline
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd)
+{
+	return -ENODEV;
+}
+#endif
+
 #define PIN_MAPPABLE 0x1
 #define PIN_NONBLOCK 0x2
 #define PIN_GLOBAL 0x4
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2a3d1a9..b04157a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -32,6 +32,7 @@ 
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
+#include "../../../staging/android/sync.h"
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
@@ -1356,6 +1357,8 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	u32 flags;
 	int ret;
 	bool need_relocs;
+	struct sync_fence *sync_fence = NULL;
+	int fence_fd = -1;
 
 	if (!i915_gem_check_execbuffer(args))
 		return -EINVAL;
@@ -1509,8 +1512,29 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (ret)
 		goto req_err;
 
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		ret = i915_create_sync_fence_ring(ring, ctx,
+						  &sync_fence, &fence_fd);
+		if (ret)
+			goto req_err;
+	}
+
 	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
 				      &eb->vmas, batch_obj, exec_start, flags);
+	if (ret)
+		goto exec_err;
+
+	if (sync_fence) {
+		sync_fence_install(sync_fence, fence_fd);
+		args->rsvd2 = fence_fd;
+		sync_fence = NULL;
+	}
+
+exec_err:
+	if (sync_fence) {
+		fput(sync_fence->file);
+		put_unused_fd(fence_fd);
+	}
 
 req_err:
 	/*
diff --git a/drivers/gpu/drm/i915/i915_sync.c b/drivers/gpu/drm/i915/i915_sync.c
new file mode 100644
index 0000000..77b7bc9
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sync.c
@@ -0,0 +1,254 @@ 
+/*
+ * Copyright © 2013-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jesse Barnes <jbarnes@virtuousgeek.org>
+ *    Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_vma_manager.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/oom.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+#include "../../../staging/android/sync.h"
+
+static DEFINE_SPINLOCK(fence_lock);
+
+/*
+ * i915 Android native sync fences.
+ *
+ * We implement sync points in terms of i915 seqnos. They're exposed through
+ * the DRM_IOCTL_I915_GEM_EXECBUFFER2 ioctl, and can be mixed and matched
+ * with other Android timelines and aggregated into sync_fences, etc.
+ *
+ * TODO:
+ *   * Display engine fences.
+ *   * Extend driver data with context id / ring id.
+ */
+
+#define to_i915_request(x) container_of(x, struct drm_i915_gem_request, fence)
+
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+	return "i915";
+}
+
+static const char *i915_fence_ring_get_timeline_name(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	return req->ring->name;
+}
+
+static void i915_gem_request_unreference_worker(struct work_struct *work)
+{
+	struct drm_i915_gem_request *req =
+		container_of(work, struct drm_i915_gem_request, unref_work);
+	struct drm_device *dev = req->ring->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	i915_gem_request_unreference(req);
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static int
+i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+	struct drm_i915_gem_request *req = wait->private;
+	struct intel_engine_cs *ring = req->ring;
+
+	if (!i915_gem_request_completed(req, false))
+		return 0;
+
+	fence_signal_locked(&req->fence);
+
+	__remove_wait_queue(&ring->irq_queue, wait);
+	ring->irq_put(ring);
+
+	INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker);
+	schedule_work(&req->unref_work);
+
+	return 0;
+}
+
+static bool i915_fence_ring_enable_signaling(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct intel_engine_cs *ring = req->ring;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	wait_queue_t *wait = &req->wait;
+
+	/* queue fence wait queue on irq queue and get fence */
+	if (i915_gem_request_completed(req, false) ||
+	    i915_terminally_wedged(&dev_priv->gpu_error))
+		return false;
+
+	if (!ring->irq_get(ring))
+		return false;
+
+	if (i915_gem_request_completed(req, false)) {
+		ring->irq_put(ring);
+		return true;
+	}
+
+	wait->flags = 0;
+	wait->private = req;
+	wait->func = i915_fence_ring_check;
+
+	i915_gem_request_reference(req);
+
+	__add_wait_queue(&ring->irq_queue, wait);
+
+	return true;
+}
+
+static bool i915_fence_ring_signaled(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	return i915_gem_request_completed(req, false);
+}
+
+static signed long
+i915_fence_ring_wait(struct fence *fence, bool intr, signed long timeout_js)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
+	int ret;
+	s64 timeout;
+
+	timeout = jiffies_to_nsecs(timeout_js);
+
+	ret = __i915_wait_request(req,
+				  atomic_read(&dev_priv->gpu_error.reset_counter),
+				  intr, &timeout, NULL);
+	if (ret == -ETIME)
+		return 0;
+
+	return ret;
+}
+
+static int
+i915_fence_ring_fill_driver_data(struct fence *fence, void *data, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	if (size < sizeof(req->seqno))
+		return -ENOMEM;
+
+	memcpy(data, &req->seqno,
+	       sizeof(req->seqno));
+
+	return sizeof(req->seqno);
+}
+
+static void i915_fence_ring_value_str(struct fence *fence, char *str, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	snprintf(str, size, "%u", req->seqno);
+}
+
+static void
+i915_fence_ring_timeline_value_str(struct fence *fence, char *str, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct intel_engine_cs *ring = req->ring;
+
+	snprintf(str, size, "%u", ring->get_seqno(ring, false));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct drm_device *dev = req->ring->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	i915_gem_request_unreference(req);
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static struct fence_ops i915_fence_ring_ops = {
+	.get_driver_name =	i915_fence_get_driver_name,
+	.get_timeline_name =	i915_fence_ring_get_timeline_name,
+	.enable_signaling =	i915_fence_ring_enable_signaling,
+	.signaled =		i915_fence_ring_signaled,
+	.wait =			i915_fence_ring_wait,
+	.fill_driver_data =	i915_fence_ring_fill_driver_data,
+	.fence_value_str =	i915_fence_ring_value_str,
+	.timeline_value_str =	i915_fence_ring_timeline_value_str,
+	.release =		i915_fence_release
+};
+
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd)
+{
+	struct drm_i915_gem_request *req;
+	struct sync_fence *sfence;
+	char ring_name[6] = "ring0";
+	int fd;
+
+	if (!intel_ring_initialized(ring)) {
+		DRM_DEBUG("Ring not initialized!\n");
+		return -EAGAIN;
+	}
+
+	req = ring->outstanding_lazy_request;
+	if (WARN_ON_ONCE(!req))
+		return -ECANCELED;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		DRM_DEBUG("No available file descriptor!\n");
+		return fd;
+	}
+
+	i915_gem_request_reference(req);
+
+	fence_init(&req->fence, &i915_fence_ring_ops, &fence_lock,
+		   ctx->user_handle, req->seqno);
+
+	ring_name[4] += ring->id;
+	sfence = sync_fence_create_dma(ring_name, &req->fence);
+	if (!sfence)
+		goto err;
+
+	*sync_fence = sfence;
+	*fence_fd = fd;
+
+	return 0;
+
+err:
+	i915_gem_request_unreference(req);
+	put_unused_fd(fd);
+	return -ENOMEM;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2e559f6e..c197770 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,7 +246,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
-#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -718,7 +718,7 @@  struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
 	__u64 flags;
 	__u64 rsvd1; /* now used for context info */
-	__u64 rsvd2;
+	__u64 rsvd2; /* now used for fence fd */
 };
 
 /** Resets the SO write offset registers for transform feedback on gen7. */
@@ -750,7 +750,9 @@  struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT		(1<<12)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+#define I915_EXEC_FENCE_OUT		(1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_FENCE_OUT<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \