Message ID | 1364942743-6041-1-git-send-email-ben@bwidawsk.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Apr 02, 2013 at 03:45:42PM -0700, Ben Widawsky wrote: > From: Mika Kuoppala <mika.kuoppala@linux.intel.com> > > In preparation to do analysis of which context was > guilty of gpu hung, store kreffed context pointer > into request struct. > > This allows us to inspect contexts when gpu is reset > even if those contexts would already be released > by userspace. > > v2: track i915_hw_context pointers instead of using ctx_ids > (from Chris Wilson) > > v3 (Ben): Get rid of do_release() and handle refcounting more compactly. > (recommended by Chris) > > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> Now I remember why my version of reference counting was so much more complicated. In my case, I want to keep the last context around instead of the last context object. To do this we can't do a kref_put until we've switched to the next context (similar to how we manage the context object). I want to do this since the context stores the PPGTT which will currently be in use. I need to switch PDEs at context switch time. So the below isn't really useful to me, I think, and I believe I need a more complex refcounting mechanism as I described on IRC earlier today. Daniel, Chris, thoughts? > --- > drivers/gpu/drm/i915/i915_drv.h | 10 ++++++++-- > drivers/gpu/drm/i915/i915_gem.c | 16 +++++++++++++++- > drivers/gpu/drm/i915/i915_gem_context.c | 17 +++++++++++++---- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 ++++--- > 4 files changed, 40 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 5b0c699..b88b67d 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -437,6 +437,7 @@ struct i915_hw_ppgtt { > /* This must match up with the value previously used for execbuf2.rsvd1. */ > #define DEFAULT_CONTEXT_ID 0 > struct i915_hw_context { > + struct kref ref; > int id; > bool is_initialized; > struct drm_i915_file_private *file_priv; > @@ -1240,6 +1241,9 @@ struct drm_i915_gem_request { > /** Postion in the ringbuffer of the end of the request */ > u32 tail; > > + /** Context related to this request */ > + struct i915_hw_context *ctx; > + > /** Time at which this request was emitted, in jiffies. */ > unsigned long emitted_jiffies; > > @@ -1630,9 +1634,10 @@ int __must_check i915_gpu_idle(struct drm_device *dev); > int __must_check i915_gem_idle(struct drm_device *dev); > int i915_do_add_request(struct intel_ring_buffer *ring, > u32 *seqno, > - struct drm_file *file); > + struct drm_file *file, > + struct i915_hw_context *ctx); > #define i915_add_request(ring, seqno) \ > - i915_do_add_request(ring, seqno, NULL) > + i915_do_add_request(ring, seqno, NULL, NULL) > int __must_check i915_wait_seqno(struct intel_ring_buffer *ring, > uint32_t seqno); > int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); > @@ -1676,6 +1681,7 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); > struct i915_hw_context * __must_check > i915_switch_context(struct intel_ring_buffer *ring, > struct drm_file *file, int to_id); > +void i915_gem_context_free(struct kref *ctx_ref); > int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index fbbe7d9..e55c4a8 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1997,7 +1997,8 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) > int > i915_do_add_request(struct intel_ring_buffer *ring, > u32 *out_seqno, > - struct drm_file *file) > + struct drm_file *file, > + struct i915_hw_context *ctx) > { > drm_i915_private_t *dev_priv = ring->dev->dev_private; > struct drm_i915_gem_request *request; > @@ -2037,6 +2038,11 @@ i915_do_add_request(struct intel_ring_buffer *ring, > request->seqno = intel_ring_get_seqno(ring); > request->ring = ring; > request->tail = request_ring_position; > + request->ctx = ctx; > + > + if (request->ctx) > + kref_get(&request->ctx->ref); > + > request->emitted_jiffies = jiffies; > was_empty = list_empty(&ring->request_list); > list_add_tail(&request->list, &ring->request_list); > @@ -2101,6 +2107,10 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, > > list_del(&request->list); > i915_gem_request_remove_from_client(request); > + > + if (request->ctx) > + kref_put(&request->ctx->ref, i915_gem_context_free); > + > kfree(request); > } > > @@ -2195,6 +2205,10 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) > > list_del(&request->list); > i915_gem_request_remove_from_client(request); > + > + if (request->ctx) > + kref_put(&request->ctx->ref, i915_gem_context_free); > + > kfree(request); > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index ddf26a6..19feeb6 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -126,11 +126,18 @@ static int get_context_size(struct drm_device *dev) > > static void do_destroy(struct i915_hw_context *ctx) > { > + drm_gem_object_unreference(&ctx->obj->base); > + kfree(ctx); > +} > + > +void i915_gem_context_free(struct kref *ctx_ref) > +{ > + struct i915_hw_context *ctx = container_of(ctx_ref, > + typeof(*ctx), ref); > if (ctx->file_priv) > idr_remove(&ctx->file_priv->context_idr, ctx->id); > > - drm_gem_object_unreference(&ctx->obj->base); > - kfree(ctx); > + do_destroy(ctx); > } > > static struct i915_hw_context * > @@ -145,6 +152,7 @@ create_hw_context(struct drm_device *dev, > if (ctx == NULL) > return ERR_PTR(-ENOMEM); > > + kref_init(&ctx->ref); > ctx->obj = i915_gem_alloc_object(dev, dev_priv->hw_context_size); > if (ctx->obj == NULL) { > kfree(ctx); > @@ -275,7 +283,8 @@ static int context_idr_cleanup(int id, void *p, void *data) > > BUG_ON(id == DEFAULT_CONTEXT_ID); > > - do_destroy(ctx); > + ctx->file_priv = NULL; > + kref_put(&ctx->ref, i915_gem_context_free); > > return 0; > } > @@ -511,7 +520,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, > return -ENOENT; > } > > - do_destroy(ctx); > + kref_put(&ctx->ref, i915_gem_context_free); > > mutex_unlock(&dev->struct_mutex); > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index d87b94b..7f58b2e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -794,13 +794,14 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, > static void > i915_gem_execbuffer_retire_commands(struct drm_device *dev, > struct drm_file *file, > - struct intel_ring_buffer *ring) > + struct intel_ring_buffer *ring, > + struct i915_hw_context *ctx) > { > /* Unconditionally force add_request to emit a full flush. */ > ring->gpu_caches_dirty = true; > > /* Add a breadcrumb for the completion of the batch buffer */ > - (void)i915_do_add_request(ring, NULL, file); > + (void)i915_do_add_request(ring, NULL, file, ctx); > } > > static int > @@ -1076,7 +1077,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); > > i915_gem_execbuffer_move_to_active(&eb->objects, ring); > - i915_gem_execbuffer_retire_commands(dev, file, ring); > + i915_gem_execbuffer_retire_commands(dev, file, ring, ctx); > > err: > eb_destroy(eb); > -- > 1.8.2 >
On Tue, Apr 02, 2013 at 06:27:00PM -0700, Ben Widawsky wrote: > On Tue, Apr 02, 2013 at 03:45:42PM -0700, Ben Widawsky wrote: > > From: Mika Kuoppala <mika.kuoppala@linux.intel.com> > > > > In preparation to do analysis of which context was > > guilty of gpu hung, store kreffed context pointer > > into request struct. > > > > This allows us to inspect contexts when gpu is reset > > even if those contexts would already be released > > by userspace. > > > > v2: track i915_hw_context pointers instead of using ctx_ids > > (from Chris Wilson) > > > > v3 (Ben): Get rid of do_release() and handle refcounting more compactly. > > (recommended by Chris) > > > > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > > Now I remember why my version of reference counting was so much more > complicated. In my case, I want to keep the last context around instead > of the last context object. To do this we can't do a kref_put until > we've switched to the next context (similar to how we manage the context > object). I want to do this since the context stores the PPGTT which will > currently be in use. I need to switch PDEs at context switch time. This seems feasible using requests and a callback from retire. The alternative is something hairy like intel_overlay, hence my desire for keeping all ring operations as a i915_gem_request. -Chris
On Wed, Apr 03, 2013 at 12:56:11PM +0100, Chris Wilson wrote: > On Tue, Apr 02, 2013 at 06:27:00PM -0700, Ben Widawsky wrote: > > On Tue, Apr 02, 2013 at 03:45:42PM -0700, Ben Widawsky wrote: > > > From: Mika Kuoppala <mika.kuoppala@linux.intel.com> > > > > > > In preparation to do analysis of which context was > > > guilty of gpu hung, store kreffed context pointer > > > into request struct. > > > > > > This allows us to inspect contexts when gpu is reset > > > even if those contexts would already be released > > > by userspace. > > > > > > v2: track i915_hw_context pointers instead of using ctx_ids > > > (from Chris Wilson) > > > > > > v3 (Ben): Get rid of do_release() and handle refcounting more compactly. > > > (recommended by Chris) > > > > > > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> > > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > > > > Now I remember why my version of reference counting was so much more > > complicated. In my case, I want to keep the last context around instead > > of the last context object. To do this we can't do a kref_put until > > we've switched to the next context (similar to how we manage the context > > object). I want to do this since the context stores the PPGTT which will > > currently be in use. I need to switch PDEs at context switch time. > > This seems feasible using requests and a callback from retire. The > alternative is something hairy like intel_overlay, hence my desire for > keeping all ring operations as a i915_gem_request. As long as the request object keeps a ref while the request is still oustanding and the ring itself keeps a ref to whatever is the currently last scheduled context, everything should work out fine. So I don't think we need to jump through any complicated hoops here. One quick bikeshed on the patch itself though: I'd like to see some static inlines for kref_get/put on contexts ... -Daniel
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5b0c699..b88b67d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -437,6 +437,7 @@ struct i915_hw_ppgtt { /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_ID 0 struct i915_hw_context { + struct kref ref; int id; bool is_initialized; struct drm_i915_file_private *file_priv; @@ -1240,6 +1241,9 @@ struct drm_i915_gem_request { /** Postion in the ringbuffer of the end of the request */ u32 tail; + /** Context related to this request */ + struct i915_hw_context *ctx; + /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; @@ -1630,9 +1634,10 @@ int __must_check i915_gpu_idle(struct drm_device *dev); int __must_check i915_gem_idle(struct drm_device *dev); int i915_do_add_request(struct intel_ring_buffer *ring, u32 *seqno, - struct drm_file *file); + struct drm_file *file, + struct i915_hw_context *ctx); #define i915_add_request(ring, seqno) \ - i915_do_add_request(ring, seqno, NULL) + i915_do_add_request(ring, seqno, NULL, NULL) int __must_check i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); @@ -1676,6 +1681,7 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); struct i915_hw_context * __must_check i915_switch_context(struct intel_ring_buffer *ring, struct drm_file *file, int to_id); +void i915_gem_context_free(struct kref *ctx_ref); int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fbbe7d9..e55c4a8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1997,7 +1997,8 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) int i915_do_add_request(struct intel_ring_buffer *ring, u32 *out_seqno, - struct drm_file *file) + struct drm_file *file, + struct i915_hw_context *ctx) { drm_i915_private_t *dev_priv = ring->dev->dev_private; struct drm_i915_gem_request *request; @@ -2037,6 +2038,11 @@ i915_do_add_request(struct intel_ring_buffer *ring, request->seqno = intel_ring_get_seqno(ring); request->ring = ring; request->tail = request_ring_position; + request->ctx = ctx; + + if (request->ctx) + kref_get(&request->ctx->ref); + request->emitted_jiffies = jiffies; was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); @@ -2101,6 +2107,10 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, list_del(&request->list); i915_gem_request_remove_from_client(request); + + if (request->ctx) + kref_put(&request->ctx->ref, i915_gem_context_free); + kfree(request); } @@ -2195,6 +2205,10 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) list_del(&request->list); i915_gem_request_remove_from_client(request); + + if (request->ctx) + kref_put(&request->ctx->ref, i915_gem_context_free); + kfree(request); } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index ddf26a6..19feeb6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -126,11 +126,18 @@ static int get_context_size(struct drm_device *dev) static void do_destroy(struct i915_hw_context *ctx) { + drm_gem_object_unreference(&ctx->obj->base); + kfree(ctx); +} + +void i915_gem_context_free(struct kref *ctx_ref) +{ + struct i915_hw_context *ctx = container_of(ctx_ref, + typeof(*ctx), ref); if (ctx->file_priv) idr_remove(&ctx->file_priv->context_idr, ctx->id); - drm_gem_object_unreference(&ctx->obj->base); - kfree(ctx); + do_destroy(ctx); } static struct i915_hw_context * @@ -145,6 +152,7 @@ create_hw_context(struct drm_device *dev, if (ctx == NULL) return ERR_PTR(-ENOMEM); + kref_init(&ctx->ref); ctx->obj = i915_gem_alloc_object(dev, dev_priv->hw_context_size); if (ctx->obj == NULL) { kfree(ctx); @@ -275,7 +283,8 @@ static int context_idr_cleanup(int id, void *p, void *data) BUG_ON(id == DEFAULT_CONTEXT_ID); - do_destroy(ctx); + ctx->file_priv = NULL; + kref_put(&ctx->ref, i915_gem_context_free); return 0; } @@ -511,7 +520,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return -ENOENT; } - do_destroy(ctx); + kref_put(&ctx->ref, i915_gem_context_free); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d87b94b..7f58b2e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -794,13 +794,14 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, static void i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct drm_file *file, - struct intel_ring_buffer *ring) + struct intel_ring_buffer *ring, + struct i915_hw_context *ctx) { /* Unconditionally force add_request to emit a full flush. */ ring->gpu_caches_dirty = true; /* Add a breadcrumb for the completion of the batch buffer */ - (void)i915_do_add_request(ring, NULL, file); + (void)i915_do_add_request(ring, NULL, file, ctx); } static int @@ -1076,7 +1077,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); i915_gem_execbuffer_move_to_active(&eb->objects, ring); - i915_gem_execbuffer_retire_commands(dev, file, ring); + i915_gem_execbuffer_retire_commands(dev, file, ring, ctx); err: eb_destroy(eb);