diff mbox

[1/2,v3] drm/i915: reference count for i915_hw_contexts

Message ID 1364942743-6041-1-git-send-email-ben@bwidawsk.net (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky April 2, 2013, 10:45 p.m. UTC
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>

In preparation to do analysis of which context was
guilty of gpu hung, store kreffed context pointer
into request struct.

This allows us to inspect contexts when gpu is reset
even if those contexts would already be released
by userspace.

v2: track i915_hw_context pointers instead of using ctx_ids
    (from Chris Wilson)

v3 (Ben): Get rid of do_release() and handle refcounting more compactly.
(recommended by Chris)

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h            | 10 ++++++++--
 drivers/gpu/drm/i915/i915_gem.c            | 16 +++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.c    | 17 +++++++++++++----
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  7 ++++---
 4 files changed, 40 insertions(+), 10 deletions(-)

Comments

Ben Widawsky April 3, 2013, 1:27 a.m. UTC | #1
On Tue, Apr 02, 2013 at 03:45:42PM -0700, Ben Widawsky wrote:
> From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> 
> In preparation to do analysis of which context was
> guilty of gpu hung, store kreffed context pointer
> into request struct.
> 
> This allows us to inspect contexts when gpu is reset
> even if those contexts would already be released
> by userspace.
> 
> v2: track i915_hw_context pointers instead of using ctx_ids
>     (from Chris Wilson)
> 
> v3 (Ben): Get rid of do_release() and handle refcounting more compactly.
> (recommended by Chris)
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>

Now I remember why my version of reference counting was so much more
complicated. In my case, I want to keep the last context around instead
of the last context object. To do this we can't do a kref_put until
we've switched to the next context (similar to how we manage the context
object). I want to do this since the context stores the PPGTT which will
currently be in use. I need to switch PDEs at context switch time.

So the below isn't really useful to me, I think, and I believe I need a
more complex refcounting mechanism as I described on IRC earlier today.

Daniel, Chris, thoughts?


> ---
>  drivers/gpu/drm/i915/i915_drv.h            | 10 ++++++++--
>  drivers/gpu/drm/i915/i915_gem.c            | 16 +++++++++++++++-
>  drivers/gpu/drm/i915/i915_gem_context.c    | 17 +++++++++++++----
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  7 ++++---
>  4 files changed, 40 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5b0c699..b88b67d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -437,6 +437,7 @@ struct i915_hw_ppgtt {
>  /* This must match up with the value previously used for execbuf2.rsvd1. */
>  #define DEFAULT_CONTEXT_ID 0
>  struct i915_hw_context {
> +	struct kref ref;
>  	int id;
>  	bool is_initialized;
>  	struct drm_i915_file_private *file_priv;
> @@ -1240,6 +1241,9 @@ struct drm_i915_gem_request {
>  	/** Postion in the ringbuffer of the end of the request */
>  	u32 tail;
>  
> +	/** Context related to this request */
> +	struct i915_hw_context *ctx;
> +
>  	/** Time at which this request was emitted, in jiffies. */
>  	unsigned long emitted_jiffies;
>  
> @@ -1630,9 +1634,10 @@ int __must_check i915_gpu_idle(struct drm_device *dev);
>  int __must_check i915_gem_idle(struct drm_device *dev);
>  int i915_do_add_request(struct intel_ring_buffer *ring,
>  			u32 *seqno,
> -			struct drm_file *file);
> +			struct drm_file *file,
> +			struct i915_hw_context *ctx);
>  #define i915_add_request(ring, seqno) \
> -	i915_do_add_request(ring, seqno, NULL)
> +	i915_do_add_request(ring, seqno, NULL, NULL)
>  int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
>  				 uint32_t seqno);
>  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
> @@ -1676,6 +1681,7 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
>  struct i915_hw_context * __must_check
>  i915_switch_context(struct intel_ring_buffer *ring,
>  		    struct drm_file *file, int to_id);
> +void i915_gem_context_free(struct kref *ctx_ref);
>  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>  				  struct drm_file *file);
>  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index fbbe7d9..e55c4a8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1997,7 +1997,8 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
>  int
>  i915_do_add_request(struct intel_ring_buffer *ring,
>  		    u32 *out_seqno,
> -		    struct drm_file *file)
> +		    struct drm_file *file,
> +		    struct i915_hw_context *ctx)
>  {
>  	drm_i915_private_t *dev_priv = ring->dev->dev_private;
>  	struct drm_i915_gem_request *request;
> @@ -2037,6 +2038,11 @@ i915_do_add_request(struct intel_ring_buffer *ring,
>  	request->seqno = intel_ring_get_seqno(ring);
>  	request->ring = ring;
>  	request->tail = request_ring_position;
> +	request->ctx = ctx;
> +
> +	if (request->ctx)
> +		kref_get(&request->ctx->ref);
> +
>  	request->emitted_jiffies = jiffies;
>  	was_empty = list_empty(&ring->request_list);
>  	list_add_tail(&request->list, &ring->request_list);
> @@ -2101,6 +2107,10 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
>  
>  		list_del(&request->list);
>  		i915_gem_request_remove_from_client(request);
> +
> +		if (request->ctx)
> +			kref_put(&request->ctx->ref, i915_gem_context_free);
> +
>  		kfree(request);
>  	}
>  
> @@ -2195,6 +2205,10 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
>  
>  		list_del(&request->list);
>  		i915_gem_request_remove_from_client(request);
> +
> +		if (request->ctx)
> +			kref_put(&request->ctx->ref, i915_gem_context_free);
> +
>  		kfree(request);
>  	}
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index ddf26a6..19feeb6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -126,11 +126,18 @@ static int get_context_size(struct drm_device *dev)
>  
>  static void do_destroy(struct i915_hw_context *ctx)
>  {
> +	drm_gem_object_unreference(&ctx->obj->base);
> +	kfree(ctx);
> +}
> +
> +void i915_gem_context_free(struct kref *ctx_ref)
> +{
> +	struct i915_hw_context *ctx = container_of(ctx_ref,
> +						   typeof(*ctx), ref);
>  	if (ctx->file_priv)
>  		idr_remove(&ctx->file_priv->context_idr, ctx->id);
>  
> -	drm_gem_object_unreference(&ctx->obj->base);
> -	kfree(ctx);
> +	do_destroy(ctx);
>  }
>  
>  static struct i915_hw_context *
> @@ -145,6 +152,7 @@ create_hw_context(struct drm_device *dev,
>  	if (ctx == NULL)
>  		return ERR_PTR(-ENOMEM);
>  
> +	kref_init(&ctx->ref);
>  	ctx->obj = i915_gem_alloc_object(dev, dev_priv->hw_context_size);
>  	if (ctx->obj == NULL) {
>  		kfree(ctx);
> @@ -275,7 +283,8 @@ static int context_idr_cleanup(int id, void *p, void *data)
>  
>  	BUG_ON(id == DEFAULT_CONTEXT_ID);
>  
> -	do_destroy(ctx);
> +	ctx->file_priv = NULL;
> +	kref_put(&ctx->ref, i915_gem_context_free);
>  
>  	return 0;
>  }
> @@ -511,7 +520,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>  		return -ENOENT;
>  	}
>  
> -	do_destroy(ctx);
> +	kref_put(&ctx->ref, i915_gem_context_free);
>  
>  	mutex_unlock(&dev->struct_mutex);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index d87b94b..7f58b2e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -794,13 +794,14 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
>  static void
>  i915_gem_execbuffer_retire_commands(struct drm_device *dev,
>  				    struct drm_file *file,
> -				    struct intel_ring_buffer *ring)
> +				    struct intel_ring_buffer *ring,
> +				    struct i915_hw_context *ctx)
>  {
>  	/* Unconditionally force add_request to emit a full flush. */
>  	ring->gpu_caches_dirty = true;
>  
>  	/* Add a breadcrumb for the completion of the batch buffer */
> -	(void)i915_do_add_request(ring, NULL, file);
> +	(void)i915_do_add_request(ring, NULL, file, ctx);
>  }
>  
>  static int
> @@ -1076,7 +1077,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
>  
>  	i915_gem_execbuffer_move_to_active(&eb->objects, ring);
> -	i915_gem_execbuffer_retire_commands(dev, file, ring);
> +	i915_gem_execbuffer_retire_commands(dev, file, ring, ctx);
>  
>  err:
>  	eb_destroy(eb);
> -- 
> 1.8.2
>
Chris Wilson April 3, 2013, 11:56 a.m. UTC | #2
On Tue, Apr 02, 2013 at 06:27:00PM -0700, Ben Widawsky wrote:
> On Tue, Apr 02, 2013 at 03:45:42PM -0700, Ben Widawsky wrote:
> > From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > 
> > In preparation to do analysis of which context was
> > guilty of gpu hung, store kreffed context pointer
> > into request struct.
> > 
> > This allows us to inspect contexts when gpu is reset
> > even if those contexts would already be released
> > by userspace.
> > 
> > v2: track i915_hw_context pointers instead of using ctx_ids
> >     (from Chris Wilson)
> > 
> > v3 (Ben): Get rid of do_release() and handle refcounting more compactly.
> > (recommended by Chris)
> > 
> > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> 
> Now I remember why my version of reference counting was so much more
> complicated. In my case, I want to keep the last context around instead
> of the last context object. To do this we can't do a kref_put until
> we've switched to the next context (similar to how we manage the context
> object). I want to do this since the context stores the PPGTT which will
> currently be in use. I need to switch PDEs at context switch time.

This seems feasible using requests and a callback from retire. The
alternative is something hairy like intel_overlay, hence my desire for
keeping all ring operations as a i915_gem_request.
-Chris
Daniel Vetter April 3, 2013, 4:37 p.m. UTC | #3
On Wed, Apr 03, 2013 at 12:56:11PM +0100, Chris Wilson wrote:
> On Tue, Apr 02, 2013 at 06:27:00PM -0700, Ben Widawsky wrote:
> > On Tue, Apr 02, 2013 at 03:45:42PM -0700, Ben Widawsky wrote:
> > > From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> > > 
> > > In preparation to do analysis of which context was
> > > guilty of gpu hung, store kreffed context pointer
> > > into request struct.
> > > 
> > > This allows us to inspect contexts when gpu is reset
> > > even if those contexts would already be released
> > > by userspace.
> > > 
> > > v2: track i915_hw_context pointers instead of using ctx_ids
> > >     (from Chris Wilson)
> > > 
> > > v3 (Ben): Get rid of do_release() and handle refcounting more compactly.
> > > (recommended by Chris)
> > > 
> > > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > 
> > Now I remember why my version of reference counting was so much more
> > complicated. In my case, I want to keep the last context around instead
> > of the last context object. To do this we can't do a kref_put until
> > we've switched to the next context (similar to how we manage the context
> > object). I want to do this since the context stores the PPGTT which will
> > currently be in use. I need to switch PDEs at context switch time.
> 
> This seems feasible using requests and a callback from retire. The
> alternative is something hairy like intel_overlay, hence my desire for
> keeping all ring operations as a i915_gem_request.

As long as the request object keeps a ref while the request is still
oustanding and the ring itself keeps a ref to whatever is the currently
last scheduled context, everything should work out fine. So I don't think
we need to jump through any complicated hoops here.

One quick bikeshed on the patch itself though: I'd like to see some static
inlines for kref_get/put on contexts ...
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5b0c699..b88b67d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -437,6 +437,7 @@  struct i915_hw_ppgtt {
 /* This must match up with the value previously used for execbuf2.rsvd1. */
 #define DEFAULT_CONTEXT_ID 0
 struct i915_hw_context {
+	struct kref ref;
 	int id;
 	bool is_initialized;
 	struct drm_i915_file_private *file_priv;
@@ -1240,6 +1241,9 @@  struct drm_i915_gem_request {
 	/** Postion in the ringbuffer of the end of the request */
 	u32 tail;
 
+	/** Context related to this request */
+	struct i915_hw_context *ctx;
+
 	/** Time at which this request was emitted, in jiffies. */
 	unsigned long emitted_jiffies;
 
@@ -1630,9 +1634,10 @@  int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_idle(struct drm_device *dev);
 int i915_do_add_request(struct intel_ring_buffer *ring,
 			u32 *seqno,
-			struct drm_file *file);
+			struct drm_file *file,
+			struct i915_hw_context *ctx);
 #define i915_add_request(ring, seqno) \
-	i915_do_add_request(ring, seqno, NULL)
+	i915_do_add_request(ring, seqno, NULL, NULL)
 int __must_check i915_wait_seqno(struct intel_ring_buffer *ring,
 				 uint32_t seqno);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
@@ -1676,6 +1681,7 @@  void i915_gem_context_close(struct drm_device *dev, struct drm_file *file);
 struct i915_hw_context * __must_check
 i915_switch_context(struct intel_ring_buffer *ring,
 		    struct drm_file *file, int to_id);
+void i915_gem_context_free(struct kref *ctx_ref);
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file);
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fbbe7d9..e55c4a8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1997,7 +1997,8 @@  i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
 int
 i915_do_add_request(struct intel_ring_buffer *ring,
 		    u32 *out_seqno,
-		    struct drm_file *file)
+		    struct drm_file *file,
+		    struct i915_hw_context *ctx)
 {
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_request *request;
@@ -2037,6 +2038,11 @@  i915_do_add_request(struct intel_ring_buffer *ring,
 	request->seqno = intel_ring_get_seqno(ring);
 	request->ring = ring;
 	request->tail = request_ring_position;
+	request->ctx = ctx;
+
+	if (request->ctx)
+		kref_get(&request->ctx->ref);
+
 	request->emitted_jiffies = jiffies;
 	was_empty = list_empty(&ring->request_list);
 	list_add_tail(&request->list, &ring->request_list);
@@ -2101,6 +2107,10 @@  static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
 
 		list_del(&request->list);
 		i915_gem_request_remove_from_client(request);
+
+		if (request->ctx)
+			kref_put(&request->ctx->ref, i915_gem_context_free);
+
 		kfree(request);
 	}
 
@@ -2195,6 +2205,10 @@  i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 
 		list_del(&request->list);
 		i915_gem_request_remove_from_client(request);
+
+		if (request->ctx)
+			kref_put(&request->ctx->ref, i915_gem_context_free);
+
 		kfree(request);
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ddf26a6..19feeb6 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -126,11 +126,18 @@  static int get_context_size(struct drm_device *dev)
 
 static void do_destroy(struct i915_hw_context *ctx)
 {
+	drm_gem_object_unreference(&ctx->obj->base);
+	kfree(ctx);
+}
+
+void i915_gem_context_free(struct kref *ctx_ref)
+{
+	struct i915_hw_context *ctx = container_of(ctx_ref,
+						   typeof(*ctx), ref);
 	if (ctx->file_priv)
 		idr_remove(&ctx->file_priv->context_idr, ctx->id);
 
-	drm_gem_object_unreference(&ctx->obj->base);
-	kfree(ctx);
+	do_destroy(ctx);
 }
 
 static struct i915_hw_context *
@@ -145,6 +152,7 @@  create_hw_context(struct drm_device *dev,
 	if (ctx == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	kref_init(&ctx->ref);
 	ctx->obj = i915_gem_alloc_object(dev, dev_priv->hw_context_size);
 	if (ctx->obj == NULL) {
 		kfree(ctx);
@@ -275,7 +283,8 @@  static int context_idr_cleanup(int id, void *p, void *data)
 
 	BUG_ON(id == DEFAULT_CONTEXT_ID);
 
-	do_destroy(ctx);
+	ctx->file_priv = NULL;
+	kref_put(&ctx->ref, i915_gem_context_free);
 
 	return 0;
 }
@@ -511,7 +520,7 @@  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 	}
 
-	do_destroy(ctx);
+	kref_put(&ctx->ref, i915_gem_context_free);
 
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d87b94b..7f58b2e 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -794,13 +794,14 @@  i915_gem_execbuffer_move_to_active(struct list_head *objects,
 static void
 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 				    struct drm_file *file,
-				    struct intel_ring_buffer *ring)
+				    struct intel_ring_buffer *ring,
+				    struct i915_hw_context *ctx)
 {
 	/* Unconditionally force add_request to emit a full flush. */
 	ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	(void)i915_do_add_request(ring, NULL, file);
+	(void)i915_do_add_request(ring, NULL, file, ctx);
 }
 
 static int
@@ -1076,7 +1077,7 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
 
 	i915_gem_execbuffer_move_to_active(&eb->objects, ring);
-	i915_gem_execbuffer_retire_commands(dev, file, ring);
+	i915_gem_execbuffer_retire_commands(dev, file, ring, ctx);
 
 err:
 	eb_destroy(eb);