diff mbox

drm/i915: Fix context/engine cleanup order

Message ID 1450110604-19534-1-git-send-email-nicholas.hoath@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nick Hoath Dec. 14, 2015, 4:30 p.m. UTC
Swap the order of context & engine cleanup, so that it is now
contexts, then engines.
This allows the context clean up code to do things like confirm
that ring->dev->struct_mutex is locked without a NULL pointer
dereference.
This came about as a result of the 'intel_ring_initialized() must
be simple and inline' patch now using ring->dev as an initialised
flag.
Rename the cleanup function to reflect what it actually does.
Also clean up some very annoying whitespace issues at the same time.
Previous code did a kunmap() on the wrong page, and didn't account for
the fact that the HWSP and the default context are the different offsets
within the same object.

v2: Also make the fix in i915_load_modeset_init, not just
    in i915_driver_unload (Chris Wilson)
v3: Folded in Dave Gordon's fix for HWSP kunmap issues.

Signed-off-by: Nick Hoath <nicholas.hoath@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Gordon <david.s.gordon@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c         |  4 +--
 drivers/gpu/drm/i915/i915_drv.h         |  2 +-
 drivers/gpu/drm/i915/i915_gem.c         | 23 ++++++-------
 drivers/gpu/drm/i915/i915_gem_context.c |  9 ++++--
 drivers/gpu/drm/i915/intel_lrc.c        | 57 +++++++++++++++++++++++----------
 5 files changed, 62 insertions(+), 33 deletions(-)

Comments

Chris Wilson Dec. 14, 2015, 5:13 p.m. UTC | #1
On Mon, Dec 14, 2015 at 04:30:04PM +0000, Nick Hoath wrote:
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 900ffd0..7df3c7a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -431,17 +431,22 @@ void i915_gem_context_fini(struct drm_device *dev)
>  		i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
>  	}
>  
> -	for (i = 0; i < I915_NUM_RINGS; i++) {
> +	for (i = I915_NUM_RINGS; --i >= 0;) {
>  		struct intel_engine_cs *ring = &dev_priv->ring[i];
>  
>  		if (ring->last_context)
>  			i915_gem_context_unreference(ring->last_context);
>  
> -		ring->default_context = NULL;
>  		ring->last_context = NULL;
>  	}
>  
>  	i915_gem_context_unreference(dctx);
> +
> +	for (i = I915_NUM_RINGS; --i >= 0;) {
> +		struct intel_engine_cs *ring = &dev_priv->ring[i];
> +
> +		ring->default_context = NULL;
> +	}
>  }

Why?
-Chris
Chris Wilson Dec. 14, 2015, 10:17 p.m. UTC | #2
On Mon, Dec 14, 2015 at 05:13:43PM +0000, Chris Wilson wrote:
> On Mon, Dec 14, 2015 at 04:30:04PM +0000, Nick Hoath wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> > index 900ffd0..7df3c7a 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > @@ -431,17 +431,22 @@ void i915_gem_context_fini(struct drm_device *dev)
> >  		i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
> >  	}
> >  
> > -	for (i = 0; i < I915_NUM_RINGS; i++) {
> > +	for (i = I915_NUM_RINGS; --i >= 0;) {
> >  		struct intel_engine_cs *ring = &dev_priv->ring[i];
> >  
> >  		if (ring->last_context)
> >  			i915_gem_context_unreference(ring->last_context);
> >  
> > -		ring->default_context = NULL;
> >  		ring->last_context = NULL;
> >  	}
> >  
> >  	i915_gem_context_unreference(dctx);
> > +
> > +	for (i = I915_NUM_RINGS; --i >= 0;) {
> > +		struct intel_engine_cs *ring = &dev_priv->ring[i];
> > +
> > +		ring->default_context = NULL;
> > +	}
> >  }
> 
> Why?

Ok, as you say intel_lrc needs to iterate over all rings to look at the
default context in context_free. Feels odd, and breaks the onion of
context_init / context_fini. The pecularity is in lrc and I would push
the oddness back there.

That engine->default_context is pinned and mapped is only because of the
HWS and for no other reason does it exist (seqno writes are to address
not relative the per-context HWS index). You could simply allocate a
page for each engine and use that as the global HWS irrespective of
contexts.
-Chris
Dave Gordon Dec. 15, 2015, 7:11 p.m. UTC | #3
On 14/12/15 22:17, Chris Wilson wrote:
> On Mon, Dec 14, 2015 at 05:13:43PM +0000, Chris Wilson wrote:
>> On Mon, Dec 14, 2015 at 04:30:04PM +0000, Nick Hoath wrote:
>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 900ffd0..7df3c7a 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -431,17 +431,22 @@ void i915_gem_context_fini(struct drm_device *dev)
>>>   		i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
>>>   	}
>>>
>>> -	for (i = 0; i < I915_NUM_RINGS; i++) {
>>> +	for (i = I915_NUM_RINGS; --i >= 0;) {
>>>   		struct intel_engine_cs *ring = &dev_priv->ring[i];
>>>
>>>   		if (ring->last_context)
>>>   			i915_gem_context_unreference(ring->last_context);
>>>
>>> -		ring->default_context = NULL;
>>>   		ring->last_context = NULL;
>>>   	}
>>>
>>>   	i915_gem_context_unreference(dctx);
>>> +
>>> +	for (i = I915_NUM_RINGS; --i >= 0;) {
>>> +		struct intel_engine_cs *ring = &dev_priv->ring[i];
>>> +
>>> +		ring->default_context = NULL;
>>> +	}
>>>   }
>>
>> Why?
>
> Ok, as you say intel_lrc needs to iterate over all rings to look at the
> default context in context_free. Feels odd, and breaks the onion of
> context_init / context_fini. The pecularity is in lrc and I would push
> the oddness back there.
>
> That engine->default_context is pinned and mapped is only because of the
> HWS and for no other reason does it exist (seqno writes are to address
> not relative the per-context HWS index). You could simply allocate a
> page for each engine and use that as the global HWS irrespective of
> contexts.
> -Chris

The default (render) context has to be pinned anyway (nowadays), because 
the GuC can access it asynchronously if it decides to reset an engine. 
Also we use it when we need to give the GuC a valid LRCA for things that 
aren't requests, e.g. suspend/resume et al.

Having a default context that's created and owned by the driver is an 
old (pre-execlist) decision, and it allows us to switch away from all 
user-visible contexts for idling. Leaving it pinned at all times means 
we don't risk being unable to find space when we're trying to idle in 
order to reclaim space!

Seqno writes are relative to the global HWSP independent of which 
context is doing them. User code could be using the PPHWSP for its own 
purposes.

I agree that it might be nicer to have a separate object for the global 
HWSP, as we do in legacy mode. That would cost an extra page per engine, 
but meh.

However a simpler solution for now might be to put a flag inside the 
default context, rather than the LRC code deducing that that it's the 
default one because the engine structure points back at it. That removes 
the dependency of the LRC-specific code on knowing what the ring 
(engine!) management code is doing :)

I'll discuss with Nick tomorrow, and I think we'll find a neater patch :)

.Dave.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 84e2b20..4dad121 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -449,8 +449,8 @@  static int i915_load_modeset_init(struct drm_device *dev)
 
 cleanup_gem:
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_cleanup_ringbuffer(dev);
 	i915_gem_context_fini(dev);
+	i915_gem_cleanup_engines(dev);
 	mutex_unlock(&dev->struct_mutex);
 cleanup_irq:
 	intel_guc_ucode_fini(dev);
@@ -1188,8 +1188,8 @@  int i915_driver_unload(struct drm_device *dev)
 
 	intel_guc_ucode_fini(dev);
 	mutex_lock(&dev->struct_mutex);
-	i915_gem_cleanup_ringbuffer(dev);
 	i915_gem_context_fini(dev);
+	i915_gem_cleanup_engines(dev);
 	mutex_unlock(&dev->struct_mutex);
 	intel_fbc_cleanup_cfb(dev_priv);
 	i915_gem_cleanup_stolen(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5edd393..e317f88 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3016,7 +3016,7 @@  int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
 int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
-void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
+void i915_gem_cleanup_engines(struct drm_device *dev);
 int __must_check i915_gpu_idle(struct drm_device *dev);
 int __must_check i915_gem_suspend(struct drm_device *dev);
 void __i915_add_request(struct drm_i915_gem_request *req,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8e2acde..04a22db 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4823,7 +4823,7 @@  i915_gem_init_hw(struct drm_device *dev)
 
 		ret = i915_gem_request_alloc(ring, ring->default_context, &req);
 		if (ret) {
-			i915_gem_cleanup_ringbuffer(dev);
+			i915_gem_cleanup_engines(dev);
 			goto out;
 		}
 
@@ -4836,7 +4836,7 @@  i915_gem_init_hw(struct drm_device *dev)
 		if (ret && ret != -EIO) {
 			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
 			i915_gem_request_cancel(req);
-			i915_gem_cleanup_ringbuffer(dev);
+			i915_gem_cleanup_engines(dev);
 			goto out;
 		}
 
@@ -4844,7 +4844,7 @@  i915_gem_init_hw(struct drm_device *dev)
 		if (ret && ret != -EIO) {
 			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
 			i915_gem_request_cancel(req);
-			i915_gem_cleanup_ringbuffer(dev);
+			i915_gem_cleanup_engines(dev);
 			goto out;
 		}
 
@@ -4919,7 +4919,7 @@  out_unlock:
 }
 
 void
-i915_gem_cleanup_ringbuffer(struct drm_device *dev)
+i915_gem_cleanup_engines(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
@@ -4928,13 +4928,14 @@  i915_gem_cleanup_ringbuffer(struct drm_device *dev)
 	for_each_ring(ring, dev_priv, i)
 		dev_priv->gt.cleanup_ring(ring);
 
-    if (i915.enable_execlists)
-            /*
-             * Neither the BIOS, ourselves or any other kernel
-             * expects the system to be in execlists mode on startup,
-             * so we need to reset the GPU back to legacy mode.
-             */
-            intel_gpu_reset(dev);
+	if (i915.enable_execlists) {
+		/*
+		 * Neither the BIOS, ourselves or any other kernel
+		 * expects the system to be in execlists mode on startup,
+		 * so we need to reset the GPU back to legacy mode.
+		 */
+		intel_gpu_reset(dev);
+	}
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 900ffd0..7df3c7a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -431,17 +431,22 @@  void i915_gem_context_fini(struct drm_device *dev)
 		i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state);
 	}
 
-	for (i = 0; i < I915_NUM_RINGS; i++) {
+	for (i = I915_NUM_RINGS; --i >= 0;) {
 		struct intel_engine_cs *ring = &dev_priv->ring[i];
 
 		if (ring->last_context)
 			i915_gem_context_unreference(ring->last_context);
 
-		ring->default_context = NULL;
 		ring->last_context = NULL;
 	}
 
 	i915_gem_context_unreference(dctx);
+
+	for (i = I915_NUM_RINGS; --i >= 0;) {
+		struct intel_engine_cs *ring = &dev_priv->ring[i];
+
+		ring->default_context = NULL;
+	}
 }
 
 int i915_gem_context_enable(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7644c48..ad12304 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -226,9 +226,8 @@  enum {
 #define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT  0x17
 
 static int intel_lr_context_pin(struct drm_i915_gem_request *rq);
-static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
-		struct drm_i915_gem_object *default_ctx_obj);
-
+static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring);
+static void lrc_teardown_hardware_status_page(struct intel_engine_cs *ring);
 
 /**
  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
@@ -1473,8 +1472,7 @@  static int gen8_init_common_ring(struct intel_engine_cs *ring)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u8 next_context_status_buffer_hw;
 
-	lrc_setup_hardware_status_page(ring,
-				ring->default_context->engine[ring->id].state);
+	lrc_setup_hardware_status_page(ring);
 
 	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
 	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
@@ -1905,10 +1903,9 @@  void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
 	i915_cmd_parser_fini_ring(ring);
 	i915_gem_batch_pool_fini(&ring->batch_pool);
 
-	if (ring->status_page.obj) {
-		kunmap(sg_page(ring->status_page.obj->pages->sgl));
-		ring->status_page.obj = NULL;
-	}
+	/* Status page should have gone already */
+	WARN_ON(ring->status_page.page_addr);
+	WARN_ON(ring->status_page.obj);
 
 	lrc_destroy_wa_ctx_obj(ring);
 	ring->dev = NULL;
@@ -2370,7 +2367,7 @@  void intel_lr_context_free(struct intel_context *ctx)
 {
 	int i;
 
-	for (i = 0; i < I915_NUM_RINGS; i++) {
+	for (i = I915_NUM_RINGS; --i >= 0;) {
 		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
 
 		if (ctx_obj) {
@@ -2379,6 +2376,11 @@  void intel_lr_context_free(struct intel_context *ctx)
 			struct intel_engine_cs *ring = ringbuf->ring;
 
 			if (ctx == ring->default_context) {
+				/*
+				 * The HWSP is part of the default context
+				 * object in LRC mode.
+				 */
+				lrc_teardown_hardware_status_page(ring);
 				intel_unpin_ringbuffer_obj(ringbuf);
 				i915_gem_object_ggtt_unpin(ctx_obj);
 			}
@@ -2413,24 +2415,45 @@  static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
 	return ret;
 }
 
-static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
-		struct drm_i915_gem_object *default_ctx_obj)
+static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct intel_context *dctx = ring->default_context;
+	struct drm_i915_gem_object *dctx_obj = dctx->engine[ring->id].state;
+	u64 dctx_addr = i915_gem_obj_ggtt_offset(dctx_obj);
 	struct page *page;
 
-	/* The HWSP is part of the default context object in LRC mode. */
-	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
-			+ LRC_PPHWSP_PN * PAGE_SIZE;
-	page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
+	/*
+	 * The HWSP is part of the default context object in LRC mode.
+	 * Note that it doesn't contribute to the refcount!
+	 */
+	page = i915_gem_object_get_page(dctx_obj, LRC_PPHWSP_PN);
 	ring->status_page.page_addr = kmap(page);
-	ring->status_page.obj = default_ctx_obj;
+	ring->status_page.gfx_addr = dctx_addr + LRC_PPHWSP_PN * PAGE_SIZE;
+	ring->status_page.obj = dctx_obj;
 
 	I915_WRITE(RING_HWS_PGA(ring->mmio_base),
 			(u32)ring->status_page.gfx_addr);
 	POSTING_READ(RING_HWS_PGA(ring->mmio_base));
 }
 
+/* This should be called before the default context is destroyed */
+static void lrc_teardown_hardware_status_page(struct intel_engine_cs *ring)
+{
+	struct intel_context *dctx = ring->default_context;
+	struct drm_i915_gem_object *dctx_obj = dctx->engine[ring->id].state;
+	struct page *page;
+
+	if (ring->status_page.page_addr) {
+		page = i915_gem_object_get_page(dctx_obj, LRC_PPHWSP_PN);
+		kunmap(page);
+		ring->status_page.page_addr = NULL;
+	}
+
+	ring->status_page.obj = NULL;
+}
+
+
 /**
  * intel_lr_context_deferred_alloc() - create the LRC specific bits of a context
  * @ctx: LR context to create.