diff mbox series

[01/13] drm/msm: Track GPU fences with idr

Message ID 1538397105-19581-2-git-send-email-smasetty@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series drm/msm: Hook up the DRM gpu scheduler | expand

Commit Message

Sharat Masetty Oct. 1, 2018, 12:31 p.m. UTC
Track the GPU fences created at submit time with idr instead of the ring
the sequence number. This helps with easily changing the underlying
fence to something we don't truly own, like the scheduler fence.

Also move the GPU fence allocation to msm_gpu_submit() and have
the function return the fence. This suits well when integrating with the
GPU scheduler.

Additionally remove the non-interruptible wait variant from msm_wait_fence()
as it is not used.

Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
---
 drivers/gpu/drm/msm/msm_drv.c        |  3 +--
 drivers/gpu/drm/msm/msm_fence.c      | 30 ++++++++++++++----------------
 drivers/gpu/drm/msm/msm_fence.h      |  5 +++--
 drivers/gpu/drm/msm/msm_gem.h        |  1 +
 drivers/gpu/drm/msm/msm_gem_submit.c | 26 ++++++++++++++++++--------
 drivers/gpu/drm/msm/msm_gpu.c        | 10 ++++++++--
 drivers/gpu/drm/msm/msm_gpu.h        |  4 ++--
 drivers/gpu/drm/msm/msm_ringbuffer.c |  5 +++++
 drivers/gpu/drm/msm/msm_ringbuffer.h |  1 +
 9 files changed, 53 insertions(+), 32 deletions(-)

Comments

Jordan Crouse Oct. 1, 2018, 7:03 p.m. UTC | #1
On Mon, Oct 01, 2018 at 06:01:33PM +0530, Sharat Masetty wrote:
> Track the GPU fences created at submit time with idr instead of the ring
> the sequence number. This helps with easily changing the underlying
> fence to something we don't truly own, like the scheduler fence.
> 
> Also move the GPU fence allocation to msm_gpu_submit() and have
> the function return the fence. This suits well when integrating with the
> GPU scheduler.
> 
> Additionally remove the non-interruptible wait variant from msm_wait_fence()
> as it is not used.

So basically this is just propping up the msm_wait_fence ioctl a bit more?
At what point should we just deprecate that bad boy and move on with our lives?

> Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
> ---
>  drivers/gpu/drm/msm/msm_drv.c        |  3 +--
>  drivers/gpu/drm/msm/msm_fence.c      | 30 ++++++++++++++----------------
>  drivers/gpu/drm/msm/msm_fence.h      |  5 +++--
>  drivers/gpu/drm/msm/msm_gem.h        |  1 +
>  drivers/gpu/drm/msm/msm_gem_submit.c | 26 ++++++++++++++++++--------
>  drivers/gpu/drm/msm/msm_gpu.c        | 10 ++++++++--
>  drivers/gpu/drm/msm/msm_gpu.h        |  4 ++--
>  drivers/gpu/drm/msm/msm_ringbuffer.c |  5 +++++
>  drivers/gpu/drm/msm/msm_ringbuffer.h |  1 +
>  9 files changed, 53 insertions(+), 32 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 021a0b6..8eaa1bd 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -746,8 +746,7 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
>  	if (!queue)
>  		return -ENOENT;
>  
> -	ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout,
> -		true);
> +	ret = msm_wait_fence(gpu->rb[queue->prio], args->fence, &timeout);
>  
>  	msm_submitqueue_put(queue);
>  	return ret;
> diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
> index 349c12f..0e7912b 100644
> --- a/drivers/gpu/drm/msm/msm_fence.c
> +++ b/drivers/gpu/drm/msm/msm_fence.c
> @@ -50,41 +50,39 @@ static inline bool fence_completed(struct msm_fence_context *fctx, uint32_t fenc
>  }
>  
>  /* legacy path for WAIT_FENCE ioctl: */
> -int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence,
> -		ktime_t *timeout, bool interruptible)
> +int msm_wait_fence(struct msm_ringbuffer *ring, uint32_t fence_id,
> +		ktime_t *timeout)
>  {
> +	struct dma_fence *fence;
>  	int ret;
>  
> -	if (fence > fctx->last_fence) {
> -		DRM_ERROR("%s: waiting on invalid fence: %u (of %u)\n",
> -				fctx->name, fence, fctx->last_fence);
> -		return -EINVAL;
> +	rcu_read_lock();
> +	fence = idr_find(&ring->fence_idr, fence_id);
> +
> +	if (!fence || !dma_fence_get_rcu(fence)) {
> +		rcu_read_unlock();
> +		return 0;
>  	}
> +	rcu_read_unlock();
>  
>  	if (!timeout) {
>  		/* no-wait: */
> -		ret = fence_completed(fctx, fence) ? 0 : -EBUSY;
> +		ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
>  	} else {
>  		unsigned long remaining_jiffies = timeout_to_jiffies(timeout);
>  
> -		if (interruptible)
> -			ret = wait_event_interruptible_timeout(fctx->event,
> -				fence_completed(fctx, fence),
> -				remaining_jiffies);
> -		else
> -			ret = wait_event_timeout(fctx->event,
> -				fence_completed(fctx, fence),
> -				remaining_jiffies);
> +		ret = dma_fence_wait_timeout(fence, true, remaining_jiffies);
>  
>  		if (ret == 0) {
>  			DBG("timeout waiting for fence: %u (completed: %u)",
> -					fence, fctx->completed_fence);
> +					fence_id, ring->memptrs->fence);
>  			ret = -ETIMEDOUT;
>  		} else if (ret != -ERESTARTSYS) {
>  			ret = 0;
>  		}
>  	}
>  
> +	dma_fence_put(fence);
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h
> index b9fe059..a8133f7 100644
> --- a/drivers/gpu/drm/msm/msm_fence.h
> +++ b/drivers/gpu/drm/msm/msm_fence.h
> @@ -19,6 +19,7 @@
>  #define __MSM_FENCE_H__
>  
>  #include "msm_drv.h"
> +#include "msm_ringbuffer.h"
>  
>  struct msm_fence_context {
>  	struct drm_device *dev;
> @@ -35,8 +36,8 @@ struct msm_fence_context * msm_fence_context_alloc(struct drm_device *dev,
>  		const char *name);
>  void msm_fence_context_free(struct msm_fence_context *fctx);
>  
> -int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence,
> -		ktime_t *timeout, bool interruptible);
> +int msm_wait_fence(struct msm_ringbuffer *ring, uint32_t fence_id,
> +		ktime_t *timeout);
>  void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence);
>  
>  struct dma_fence * msm_fence_alloc(struct msm_fence_context *fctx);
> diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
> index c5d9bd3..287f974 100644
> --- a/drivers/gpu/drm/msm/msm_gem.h
> +++ b/drivers/gpu/drm/msm/msm_gem.h
> @@ -143,6 +143,7 @@ struct msm_gem_submit {
>  	struct ww_acquire_ctx ticket;
>  	uint32_t seqno;		/* Sequence number of the submit on the ring */
>  	struct dma_fence *fence;
> +	int out_fence_id;
>  	struct msm_gpu_submitqueue *queue;
>  	struct pid *pid;    /* submitting process */
>  	bool valid;         /* true if no cmdstream patching needed */
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index 7bd83e0..00e6347 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -48,6 +48,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
>  	submit->dev = dev;
>  	submit->gpu = gpu;
>  	submit->fence = NULL;
> +	submit->out_fence_id = -1;
>  	submit->pid = get_pid(task_pid(current));
>  	submit->cmd = (void *)&submit->bos[nr_bos];
>  	submit->queue = queue;
> @@ -66,6 +67,8 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
>  
>  void msm_gem_submit_free(struct msm_gem_submit *submit)
>  {
> +	idr_remove(&submit->ring->fence_idr, submit->out_fence_id);
> +
>  	dma_fence_put(submit->fence);
>  	list_del(&submit->node);
>  	put_pid(submit->pid);
> @@ -557,26 +560,33 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
>  
>  	submit->nr_cmds = i;
>  
> -	submit->fence = msm_fence_alloc(ring->fctx);
> +	msm_gpu_submit(gpu, submit, ctx);
>  	if (IS_ERR(submit->fence)) {
>  		ret = PTR_ERR(submit->fence);
>  		submit->fence = NULL;
>  		goto out;
>  	}
>  
> +	/*
> +	 * No protection should be okay here since this is protected by the big
> +	 * GPU lock.
> +	 */
> +	submit->out_fence_id = idr_alloc_cyclic(&ring->fence_idr, submit->fence,
> +			0, INT_MAX, GFP_KERNEL);
> +
> +	if (submit->out_fence_id < 0) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	args->fence = submit->out_fence_id;
> +
>  	if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
>  		sync_file = sync_file_create(submit->fence);
>  		if (!sync_file) {
>  			ret = -ENOMEM;
>  			goto out;
>  		}
> -	}
> -
> -	msm_gpu_submit(gpu, submit, ctx);
> -
> -	args->fence = submit->fence->seqno;
> -
> -	if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
>  		fd_install(out_fence_fd, sync_file->file);
>  		args->fence_fd = out_fence_fd;
>  	}
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index 1c09acf..eb67172 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -602,8 +602,8 @@ void msm_gpu_retire(struct msm_gpu *gpu)
>  }
>  
>  /* add bo's to gpu's ring, and kick gpu: */
> -void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
> -		struct msm_file_private *ctx)
> +struct dma_fence *msm_gpu_submit(struct msm_gpu *gpu,
> +		struct msm_gem_submit *submit, struct msm_file_private *ctx)
>  {
>  	struct drm_device *dev = gpu->dev;
>  	struct msm_drm_private *priv = dev->dev_private;
> @@ -612,6 +612,10 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
>  
>  	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
>  
> +	submit->fence = msm_fence_alloc(ring->fctx);
> +	if (IS_ERR(submit->fence))
> +		return submit->fence;
> +
>  	pm_runtime_get_sync(&gpu->pdev->dev);
>  
>  	msm_gpu_hw_init(gpu);
> @@ -648,6 +652,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
>  	priv->lastctx = ctx;
>  
>  	hangcheck_timer_reset(gpu);
> +
> +	return submit->fence;
>  }
>  
>  /*
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index b824117..b562b25 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -235,8 +235,8 @@ int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
>  		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
>  
>  void msm_gpu_retire(struct msm_gpu *gpu);
> -void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
> -		struct msm_file_private *ctx);
> +struct dma_fence *msm_gpu_submit(struct msm_gpu *gpu,
> +		struct msm_gem_submit *submit, struct msm_file_private *ctx);
>  
>  int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
>  		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
> index 6f5295b..734f2b8 100644
> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c
> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
> @@ -59,6 +59,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
>  
>  	ring->fctx = msm_fence_context_alloc(gpu->dev, name);
>  
> +	idr_init(&ring->fence_idr);
> +
>  	return ring;
>  
>  fail:
> @@ -78,5 +80,8 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring)
>  		msm_gem_put_vaddr(ring->bo);
>  		drm_gem_object_put_unlocked(ring->bo);
>  	}
> +
> +	idr_destroy(&ring->fence_idr);
> +
>  	kfree(ring);
>  }
> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
> index cffce09..b74a0a9 100644
> --- a/drivers/gpu/drm/msm/msm_ringbuffer.h
> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
> @@ -40,6 +40,7 @@ struct msm_ringbuffer {
>  	struct msm_rbmemptrs *memptrs;
>  	uint64_t memptrs_iova;
>  	struct msm_fence_context *fctx;
> +	struct idr fence_idr;
>  	spinlock_t lock;
>  };
>  
> -- 
> 1.9.1
>
Rob Clark Oct. 1, 2018, 9:49 p.m. UTC | #2
On Mon, Oct 1, 2018 at 3:04 PM Jordan Crouse <jcrouse@codeaurora.org> wrote:
>
> On Mon, Oct 01, 2018 at 06:01:33PM +0530, Sharat Masetty wrote:
> > Track the GPU fences created at submit time with idr instead of the ring
> > the sequence number. This helps with easily changing the underlying
> > fence to something we don't truly own, like the scheduler fence.
> >
> > Also move the GPU fence allocation to msm_gpu_submit() and have
> > the function return the fence. This suits well when integrating with the
> > GPU scheduler.
> >
> > Additionally remove the non-interruptible wait variant from msm_wait_fence()
> > as it is not used.
>
> So basically this is just propping up the msm_wait_fence ioctl a bit more?
> At what point should we just deprecate that bad boy and move on with our lives?
>

As I mentioned on IRC, wait_fence ioctl is still in use, so
unfortunately I don't think we can just deprecate it.  I guess it is
worth some profiling, and if this shows up as enough overhead to care
about perhaps we can introduce a submit ioctl flag to disable "old"
fences.

Personally, my guestimate about what we want to do for performance
from a kernel standpoint is more towards introducing 64b reloc's
(rather than using 2x 32b relocs), or possibly skip straight ahead to
something like i915's softpin.. there are a couple other things that
I'd started on a few months back to reduce userspace draw-overhead
that I think I need to pick back up, but reloc overhead is something
near the top of the list.  (Reloc and submit overhead is partially
mitigated with freedreno, since the way the driver is structured it
gets pushed off to a background thread, but still I think there is
some room for improvement.)

BR,
-R
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 021a0b6..8eaa1bd 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -746,8 +746,7 @@  static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
 	if (!queue)
 		return -ENOENT;
 
-	ret = msm_wait_fence(gpu->rb[queue->prio]->fctx, args->fence, &timeout,
-		true);
+	ret = msm_wait_fence(gpu->rb[queue->prio], args->fence, &timeout);
 
 	msm_submitqueue_put(queue);
 	return ret;
diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
index 349c12f..0e7912b 100644
--- a/drivers/gpu/drm/msm/msm_fence.c
+++ b/drivers/gpu/drm/msm/msm_fence.c
@@ -50,41 +50,39 @@  static inline bool fence_completed(struct msm_fence_context *fctx, uint32_t fenc
 }
 
 /* legacy path for WAIT_FENCE ioctl: */
-int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence,
-		ktime_t *timeout, bool interruptible)
+int msm_wait_fence(struct msm_ringbuffer *ring, uint32_t fence_id,
+		ktime_t *timeout)
 {
+	struct dma_fence *fence;
 	int ret;
 
-	if (fence > fctx->last_fence) {
-		DRM_ERROR("%s: waiting on invalid fence: %u (of %u)\n",
-				fctx->name, fence, fctx->last_fence);
-		return -EINVAL;
+	rcu_read_lock();
+	fence = idr_find(&ring->fence_idr, fence_id);
+
+	if (!fence || !dma_fence_get_rcu(fence)) {
+		rcu_read_unlock();
+		return 0;
 	}
+	rcu_read_unlock();
 
 	if (!timeout) {
 		/* no-wait: */
-		ret = fence_completed(fctx, fence) ? 0 : -EBUSY;
+		ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY;
 	} else {
 		unsigned long remaining_jiffies = timeout_to_jiffies(timeout);
 
-		if (interruptible)
-			ret = wait_event_interruptible_timeout(fctx->event,
-				fence_completed(fctx, fence),
-				remaining_jiffies);
-		else
-			ret = wait_event_timeout(fctx->event,
-				fence_completed(fctx, fence),
-				remaining_jiffies);
+		ret = dma_fence_wait_timeout(fence, true, remaining_jiffies);
 
 		if (ret == 0) {
 			DBG("timeout waiting for fence: %u (completed: %u)",
-					fence, fctx->completed_fence);
+					fence_id, ring->memptrs->fence);
 			ret = -ETIMEDOUT;
 		} else if (ret != -ERESTARTSYS) {
 			ret = 0;
 		}
 	}
 
+	dma_fence_put(fence);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h
index b9fe059..a8133f7 100644
--- a/drivers/gpu/drm/msm/msm_fence.h
+++ b/drivers/gpu/drm/msm/msm_fence.h
@@ -19,6 +19,7 @@ 
 #define __MSM_FENCE_H__
 
 #include "msm_drv.h"
+#include "msm_ringbuffer.h"
 
 struct msm_fence_context {
 	struct drm_device *dev;
@@ -35,8 +36,8 @@  struct msm_fence_context * msm_fence_context_alloc(struct drm_device *dev,
 		const char *name);
 void msm_fence_context_free(struct msm_fence_context *fctx);
 
-int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence,
-		ktime_t *timeout, bool interruptible);
+int msm_wait_fence(struct msm_ringbuffer *ring, uint32_t fence_id,
+		ktime_t *timeout);
 void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence);
 
 struct dma_fence * msm_fence_alloc(struct msm_fence_context *fctx);
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index c5d9bd3..287f974 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -143,6 +143,7 @@  struct msm_gem_submit {
 	struct ww_acquire_ctx ticket;
 	uint32_t seqno;		/* Sequence number of the submit on the ring */
 	struct dma_fence *fence;
+	int out_fence_id;
 	struct msm_gpu_submitqueue *queue;
 	struct pid *pid;    /* submitting process */
 	bool valid;         /* true if no cmdstream patching needed */
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 7bd83e0..00e6347 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -48,6 +48,7 @@  static struct msm_gem_submit *submit_create(struct drm_device *dev,
 	submit->dev = dev;
 	submit->gpu = gpu;
 	submit->fence = NULL;
+	submit->out_fence_id = -1;
 	submit->pid = get_pid(task_pid(current));
 	submit->cmd = (void *)&submit->bos[nr_bos];
 	submit->queue = queue;
@@ -66,6 +67,8 @@  static struct msm_gem_submit *submit_create(struct drm_device *dev,
 
 void msm_gem_submit_free(struct msm_gem_submit *submit)
 {
+	idr_remove(&submit->ring->fence_idr, submit->out_fence_id);
+
 	dma_fence_put(submit->fence);
 	list_del(&submit->node);
 	put_pid(submit->pid);
@@ -557,26 +560,33 @@  int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 
 	submit->nr_cmds = i;
 
-	submit->fence = msm_fence_alloc(ring->fctx);
+	msm_gpu_submit(gpu, submit, ctx);
 	if (IS_ERR(submit->fence)) {
 		ret = PTR_ERR(submit->fence);
 		submit->fence = NULL;
 		goto out;
 	}
 
+	/*
+	 * No protection should be okay here since this is protected by the big
+	 * GPU lock.
+	 */
+	submit->out_fence_id = idr_alloc_cyclic(&ring->fence_idr, submit->fence,
+			0, INT_MAX, GFP_KERNEL);
+
+	if (submit->out_fence_id < 0) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	args->fence = submit->out_fence_id;
+
 	if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
 		sync_file = sync_file_create(submit->fence);
 		if (!sync_file) {
 			ret = -ENOMEM;
 			goto out;
 		}
-	}
-
-	msm_gpu_submit(gpu, submit, ctx);
-
-	args->fence = submit->fence->seqno;
-
-	if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) {
 		fd_install(out_fence_fd, sync_file->file);
 		args->fence_fd = out_fence_fd;
 	}
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 1c09acf..eb67172 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -602,8 +602,8 @@  void msm_gpu_retire(struct msm_gpu *gpu)
 }
 
 /* add bo's to gpu's ring, and kick gpu: */
-void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
-		struct msm_file_private *ctx)
+struct dma_fence *msm_gpu_submit(struct msm_gpu *gpu,
+		struct msm_gem_submit *submit, struct msm_file_private *ctx)
 {
 	struct drm_device *dev = gpu->dev;
 	struct msm_drm_private *priv = dev->dev_private;
@@ -612,6 +612,10 @@  void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
+	submit->fence = msm_fence_alloc(ring->fctx);
+	if (IS_ERR(submit->fence))
+		return submit->fence;
+
 	pm_runtime_get_sync(&gpu->pdev->dev);
 
 	msm_gpu_hw_init(gpu);
@@ -648,6 +652,8 @@  void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	priv->lastctx = ctx;
 
 	hangcheck_timer_reset(gpu);
+
+	return submit->fence;
 }
 
 /*
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index b824117..b562b25 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -235,8 +235,8 @@  int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
 		uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
 
 void msm_gpu_retire(struct msm_gpu *gpu);
-void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
-		struct msm_file_private *ctx);
+struct dma_fence *msm_gpu_submit(struct msm_gpu *gpu,
+		struct msm_gem_submit *submit, struct msm_file_private *ctx);
 
 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 6f5295b..734f2b8 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -59,6 +59,8 @@  struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
 
 	ring->fctx = msm_fence_context_alloc(gpu->dev, name);
 
+	idr_init(&ring->fence_idr);
+
 	return ring;
 
 fail:
@@ -78,5 +80,8 @@  void msm_ringbuffer_destroy(struct msm_ringbuffer *ring)
 		msm_gem_put_vaddr(ring->bo);
 		drm_gem_object_put_unlocked(ring->bo);
 	}
+
+	idr_destroy(&ring->fence_idr);
+
 	kfree(ring);
 }
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
index cffce09..b74a0a9 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.h
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
@@ -40,6 +40,7 @@  struct msm_ringbuffer {
 	struct msm_rbmemptrs *memptrs;
 	uint64_t memptrs_iova;
 	struct msm_fence_context *fctx;
+	struct idr fence_idr;
 	spinlock_t lock;
 };