diff mbox

[CI-ping,14/15] drm/i915: Reorganise legacy context switch to cope with late failure

Message ID 1460491389-8602-14-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 12, 2016, 8:03 p.m. UTC
After mi_set_context() succeeds, we need to update the state of the
engine's last_context. This ensures that we hold a pin on the context
whilst the hardware may write to it. However, since we didn't complete
the post-switch setup of the context, we need to force the subsequent
use of the same context to complete the setup (which means updating
should_skip_switch()).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c | 215 ++++++++++++++++----------------
 1 file changed, 109 insertions(+), 106 deletions(-)

Comments

Daniel Vetter April 13, 2016, 9:59 a.m. UTC | #1
On Tue, Apr 12, 2016 at 09:03:08PM +0100, Chris Wilson wrote:
> After mi_set_context() succeeds, we need to update the state of the
> engine's last_context. This ensures that we hold a pin on the context
> whilst the hardware may write to it. However, since we didn't complete
> the post-switch setup of the context, we need to force the subsequent
> use of the same context to complete the setup (which means updating
> should_skip_switch()).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>

Specializing do_rcs_switch makes sense, but shouldn't be intermingled with
the bugfix. Assuming I'm reading things correctly the real bugfix is to
add the check for hw_ctx.initialized to should_skip_switch()? If so,
please split into 2 patches - first to add just that check, 2nd to do the
RCS specialization.
-Daniel


> ---
>  drivers/gpu/drm/i915/i915_gem_context.c | 215 ++++++++++++++++----------------
>  1 file changed, 109 insertions(+), 106 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index e5ad7b21e356..361959b1d53a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -609,50 +609,40 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
>  	return ret;
>  }
>  
> -static inline bool should_skip_switch(struct intel_engine_cs *engine,
> -				      struct intel_context *from,
> +static inline bool should_skip_switch(struct intel_context *from,
>  				      struct intel_context *to)
>  {
>  	if (to->remap_slice)
>  		return false;
>  
> -	if (to->ppgtt && from == to &&
> -	    !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings))
> -		return true;
> +	if (!to->legacy_hw_ctx.initialized)
> +		return false;
>  
> -	return false;
> +	if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS))
> +		return false;
> +
> +	return to == from;
>  }
>  
>  static bool
> -needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to)
> +needs_pd_load_pre(struct intel_context *to)
>  {
> -	struct drm_i915_private *dev_priv = engine->dev->dev_private;
> -
>  	if (!to->ppgtt)
>  		return false;
>  
> -	if (INTEL_INFO(engine->dev)->gen < 8)
> -		return true;
> -
> -	if (engine != &dev_priv->engine[RCS])
> +	if (INTEL_INFO(to->i915)->gen < 8)
>  		return true;
>  
>  	return false;
>  }
>  
>  static bool
> -needs_pd_load_post(struct intel_engine_cs *engine, struct intel_context *to,
> -		   u32 hw_flags)
> +needs_pd_load_post(struct intel_context *to, u32 hw_flags)
>  {
> -	struct drm_i915_private *dev_priv = engine->dev->dev_private;
> -
>  	if (!to->ppgtt)
>  		return false;
>  
> -	if (!IS_GEN8(engine->dev))
> -		return false;
> -
> -	if (engine != &dev_priv->engine[RCS])
> +	if (!IS_GEN8(to->i915))
>  		return false;
>  
>  	if (hw_flags & MI_RESTORE_INHIBIT)
> @@ -661,60 +651,33 @@ needs_pd_load_post(struct intel_engine_cs *engine, struct intel_context *to,
>  	return false;
>  }
>  
> -static int do_switch(struct drm_i915_gem_request *req)
> +static int do_rcs_switch(struct drm_i915_gem_request *req)
>  {
>  	struct intel_context *to = req->ctx;
>  	struct intel_engine_cs *engine = req->engine;
> -	struct drm_i915_private *dev_priv = req->i915;
> -	struct intel_context *from = engine->last_context;
> -	u32 hw_flags = 0;
> -	bool uninitialized = false;
> +	struct intel_context *from;
> +	u32 hw_flags;
>  	int ret, i;
>  
> -	if (from != NULL && engine == &dev_priv->engine[RCS]) {
> -		BUG_ON(from->legacy_hw_ctx.rcs_state == NULL);
> -		BUG_ON(!i915_gem_obj_is_pinned(from->legacy_hw_ctx.rcs_state));
> -	}
> -
> -	if (should_skip_switch(engine, from, to))
> +	if (should_skip_switch(engine->last_context, to))
>  		return 0;
>  
>  	/* Trying to pin first makes error handling easier. */
> -	if (engine == &dev_priv->engine[RCS]) {
> -		ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state,
> -					    get_context_alignment(engine->dev),
> -					    0);
> -		if (ret)
> -			return ret;
> -	}
> +	ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state,
> +				    get_context_alignment(engine->dev),
> +				    0);
> +	if (ret)
> +		return ret;
>  
>  	/*
>  	 * Pin can switch back to the default context if we end up calling into
>  	 * evict_everything - as a last ditch gtt defrag effort that also
>  	 * switches to the default context. Hence we need to reload from here.
> +	 *
> +	 * XXX: Doing so is painfully broken!
>  	 */
>  	from = engine->last_context;
>  
> -	if (needs_pd_load_pre(engine, to)) {
> -		/* Older GENs and non render rings still want the load first,
> -		 * "PP_DCLV followed by PP_DIR_BASE register through Load
> -		 * Register Immediate commands in Ring Buffer before submitting
> -		 * a context."*/
> -		trace_switch_mm(engine, to);
> -		ret = to->ppgtt->switch_mm(to->ppgtt, req);
> -		if (ret)
> -			goto unpin_out;
> -
> -		/* Doing a PD load always reloads the page dirs */
> -		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> -	}
> -
> -	if (engine != &dev_priv->engine[RCS]) {
> -		if (from)
> -			i915_gem_context_unreference(from);
> -		goto done;
> -	}
> -
>  	/*
>  	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
>  	 * that thanks to write = false in this call and us not setting any gpu
> @@ -727,55 +690,46 @@ static int do_switch(struct drm_i915_gem_request *req)
>  	if (ret)
>  		goto unpin_out;
>  
> -	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) {
> -		hw_flags |= MI_RESTORE_INHIBIT;
> +	if (needs_pd_load_pre(to)) {
> +		/* Older GENs and non render rings still want the load first,
> +		 * "PP_DCLV followed by PP_DIR_BASE register through Load
> +		 * Register Immediate commands in Ring Buffer before submitting
> +		 * a context."*/
> +		trace_switch_mm(engine, to);
> +		ret = to->ppgtt->switch_mm(to->ppgtt, req);
> +		if (ret)
> +			goto unpin_out;
> +
> +		/* Doing a PD load always reloads the page dirs */
> +		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
> +	}
> +
> +	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
>  		/* NB: If we inhibit the restore, the context is not allowed to
>  		 * die because future work may end up depending on valid address
>  		 * space. This means we must enforce that a page table load
>  		 * occur when this occurs. */
> -	} else if (to->ppgtt &&
> -		   (intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) {
> -		hw_flags |= MI_FORCE_RESTORE;
> -		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> -	}
> +		hw_flags = MI_RESTORE_INHIBIT;
> +	else if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS))
> +		hw_flags = MI_FORCE_RESTORE;
> +	else
> +		hw_flags = 0;
>  
>  	/* We should never emit switch_mm more than once */
> -	WARN_ON(needs_pd_load_pre(engine, to) &&
> -		needs_pd_load_post(engine, to, hw_flags));
> +	WARN_ON(needs_pd_load_pre(to) && needs_pd_load_post(to, hw_flags));
>  
> -	ret = mi_set_context(req, hw_flags);
> -	if (ret)
> -		goto unpin_out;
> -
> -	/* GEN8 does *not* require an explicit reload if the PDPs have been
> -	 * setup, and we do not wish to move them.
> -	 */
> -	if (needs_pd_load_post(engine, to, hw_flags)) {
> -		trace_switch_mm(engine, to);
> -		ret = to->ppgtt->switch_mm(to->ppgtt, req);
> -		/* The hardware context switch is emitted, but we haven't
> -		 * actually changed the state - so it's probably safe to bail
> -		 * here. Still, let the user know something dangerous has
> -		 * happened.
> -		 */
> +	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
> +		ret = mi_set_context(req, hw_flags);
>  		if (ret) {
> -			DRM_ERROR("Failed to change address space on context switch\n");
> +			/* Force the reload of this and the previous mm */
> +			if (needs_pd_load_pre(to))
> +				to->ppgtt->pd_dirty_rings |= 1 << RCS;
> +			if (from && needs_pd_load_pre(from))
> +				from->ppgtt->pd_dirty_rings |= 1 << RCS;
>  			goto unpin_out;
>  		}
>  	}
>  
> -	for (i = 0; i < MAX_L3_SLICES; i++) {
> -		if (!(to->remap_slice & (1<<i)))
> -			continue;
> -
> -		ret = i915_gem_l3_remap(req, i);
> -		/* If it failed, try again next round */
> -		if (ret)
> -			DRM_DEBUG_DRIVER("L3 remapping failed\n");
> -		else
> -			to->remap_slice &= ~(1<<i);
> -	}
> -
>  	/* The backing object for the context is done after switching to the
>  	 * *next* context. Therefore we cannot retire the previous context until
>  	 * the next context has already started running. In fact, the below code
> @@ -798,27 +752,52 @@ static int do_switch(struct drm_i915_gem_request *req)
>  		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
>  		i915_gem_context_unreference(from);
>  	}
> -
> -	uninitialized = !to->legacy_hw_ctx.initialized;
> -	to->legacy_hw_ctx.initialized = true;
> -
> -done:
>  	i915_gem_context_reference(to);
>  	engine->last_context = to;
>  
> -	if (uninitialized) {
> +	/* GEN8 does *not* require an explicit reload if the PDPs have been
> +	 * setup, and we do not wish to move them.
> +	 */
> +	if (needs_pd_load_post(to, hw_flags)) {
> +		trace_switch_mm(engine, to);
> +		ret = to->ppgtt->switch_mm(to->ppgtt, req);
> +		/* The hardware context switch is emitted, but we haven't
> +		 * actually changed the state - so it's probably safe to bail
> +		 * here. Still, let the user know something dangerous has
> +		 * happened.
> +		 */
> +		if (ret)
> +			return ret;
> +
> +		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
> +	}
> +	if (hw_flags & MI_FORCE_RESTORE)
> +		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
> +
> +	for (i = 0; i < MAX_L3_SLICES; i++) {
> +		if (!(to->remap_slice & (1<<i)))
> +			continue;
> +
> +		ret = i915_gem_l3_remap(req, i);
> +		if (ret)
> +			return ret;
> +
> +		to->remap_slice &= ~(1<<i);
> +	}
> +
> +	if (!to->legacy_hw_ctx.initialized) {
>  		if (engine->init_context) {
>  			ret = engine->init_context(req);
>  			if (ret)
> -				DRM_ERROR("ring init context: %d\n", ret);
> +				return ret;
>  		}
> +		to->legacy_hw_ctx.initialized = true;
>  	}
>  
>  	return 0;
>  
>  unpin_out:
> -	if (engine->id == RCS)
> -		i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state);
> +	i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state);
>  	return ret;
>  }
>  
> @@ -853,7 +832,31 @@ int i915_switch_context(struct drm_i915_gem_request *req)
>  		return 0;
>  	}
>  
> -	return do_switch(req);
> +	if (engine->id != RCS) {
> +		struct intel_context *from = engine->last_context;
> +		struct intel_context *to = req->ctx;
> +
> +		if (to->ppgtt &&
> +		    (from != to ||
> +		     intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) {
> +			int ret;
> +
> +			trace_switch_mm(engine, to);
> +			ret = to->ppgtt->switch_mm(to->ppgtt, req);
> +			if (ret)
> +				return ret;
> +
> +			to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
> +		}
> +
> +		if (from)
> +			i915_gem_context_unreference(from);
> +		i915_gem_context_reference(to);
> +		engine->last_context = to;
> +		return 0;
> +	}
> +
> +	return do_rcs_switch(req);
>  }
>  
>  static bool contexts_enabled(struct drm_device *dev)
> -- 
> 2.8.0.rc3
>
Chris Wilson April 13, 2016, 10:05 a.m. UTC | #2
On Wed, Apr 13, 2016 at 11:59:06AM +0200, Daniel Vetter wrote:
> On Tue, Apr 12, 2016 at 09:03:08PM +0100, Chris Wilson wrote:
> > After mi_set_context() succeeds, we need to update the state of the
> > engine's last_context. This ensures that we hold a pin on the context
> > whilst the hardware may write to it. However, since we didn't complete
> > the post-switch setup of the context, we need to force the subsequent
> > use of the same context to complete the setup (which means updating
> > should_skip_switch()).
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> 
> Specializing do_rcs_switch makes sense, but shouldn't be intermingled with
> the bugfix. Assuming I'm reading things correctly the real bugfix is to
> add the check for hw_ctx.initialized to should_skip_switch()? If so,
> please split into 2 patches - first to add just that check, 2nd to do the
> RCS specialization.

Not quite. That is a critical step, but we also have to set the
engine->last_context earlier. That gets quite muddled with threading the
!rcs path through it. I felt that separating them and making it clear
what was happening to each was much easier to understand. The proviso
being as always we duplicate some code (later patches will ease some of
that, but nothing can eliminate that RCS is special).
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index e5ad7b21e356..361959b1d53a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -609,50 +609,40 @@  mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
 	return ret;
 }
 
-static inline bool should_skip_switch(struct intel_engine_cs *engine,
-				      struct intel_context *from,
+static inline bool should_skip_switch(struct intel_context *from,
 				      struct intel_context *to)
 {
 	if (to->remap_slice)
 		return false;
 
-	if (to->ppgtt && from == to &&
-	    !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings))
-		return true;
+	if (!to->legacy_hw_ctx.initialized)
+		return false;
 
-	return false;
+	if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS))
+		return false;
+
+	return to == from;
 }
 
 static bool
-needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to)
+needs_pd_load_pre(struct intel_context *to)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-
 	if (!to->ppgtt)
 		return false;
 
-	if (INTEL_INFO(engine->dev)->gen < 8)
-		return true;
-
-	if (engine != &dev_priv->engine[RCS])
+	if (INTEL_INFO(to->i915)->gen < 8)
 		return true;
 
 	return false;
 }
 
 static bool
-needs_pd_load_post(struct intel_engine_cs *engine, struct intel_context *to,
-		   u32 hw_flags)
+needs_pd_load_post(struct intel_context *to, u32 hw_flags)
 {
-	struct drm_i915_private *dev_priv = engine->dev->dev_private;
-
 	if (!to->ppgtt)
 		return false;
 
-	if (!IS_GEN8(engine->dev))
-		return false;
-
-	if (engine != &dev_priv->engine[RCS])
+	if (!IS_GEN8(to->i915))
 		return false;
 
 	if (hw_flags & MI_RESTORE_INHIBIT)
@@ -661,60 +651,33 @@  needs_pd_load_post(struct intel_engine_cs *engine, struct intel_context *to,
 	return false;
 }
 
-static int do_switch(struct drm_i915_gem_request *req)
+static int do_rcs_switch(struct drm_i915_gem_request *req)
 {
 	struct intel_context *to = req->ctx;
 	struct intel_engine_cs *engine = req->engine;
-	struct drm_i915_private *dev_priv = req->i915;
-	struct intel_context *from = engine->last_context;
-	u32 hw_flags = 0;
-	bool uninitialized = false;
+	struct intel_context *from;
+	u32 hw_flags;
 	int ret, i;
 
-	if (from != NULL && engine == &dev_priv->engine[RCS]) {
-		BUG_ON(from->legacy_hw_ctx.rcs_state == NULL);
-		BUG_ON(!i915_gem_obj_is_pinned(from->legacy_hw_ctx.rcs_state));
-	}
-
-	if (should_skip_switch(engine, from, to))
+	if (should_skip_switch(engine->last_context, to))
 		return 0;
 
 	/* Trying to pin first makes error handling easier. */
-	if (engine == &dev_priv->engine[RCS]) {
-		ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state,
-					    get_context_alignment(engine->dev),
-					    0);
-		if (ret)
-			return ret;
-	}
+	ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state,
+				    get_context_alignment(engine->dev),
+				    0);
+	if (ret)
+		return ret;
 
 	/*
 	 * Pin can switch back to the default context if we end up calling into
 	 * evict_everything - as a last ditch gtt defrag effort that also
 	 * switches to the default context. Hence we need to reload from here.
+	 *
+	 * XXX: Doing so is painfully broken!
 	 */
 	from = engine->last_context;
 
-	if (needs_pd_load_pre(engine, to)) {
-		/* Older GENs and non render rings still want the load first,
-		 * "PP_DCLV followed by PP_DIR_BASE register through Load
-		 * Register Immediate commands in Ring Buffer before submitting
-		 * a context."*/
-		trace_switch_mm(engine, to);
-		ret = to->ppgtt->switch_mm(to->ppgtt, req);
-		if (ret)
-			goto unpin_out;
-
-		/* Doing a PD load always reloads the page dirs */
-		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
-	}
-
-	if (engine != &dev_priv->engine[RCS]) {
-		if (from)
-			i915_gem_context_unreference(from);
-		goto done;
-	}
-
 	/*
 	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
 	 * that thanks to write = false in this call and us not setting any gpu
@@ -727,55 +690,46 @@  static int do_switch(struct drm_i915_gem_request *req)
 	if (ret)
 		goto unpin_out;
 
-	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) {
-		hw_flags |= MI_RESTORE_INHIBIT;
+	if (needs_pd_load_pre(to)) {
+		/* Older GENs and non render rings still want the load first,
+		 * "PP_DCLV followed by PP_DIR_BASE register through Load
+		 * Register Immediate commands in Ring Buffer before submitting
+		 * a context."*/
+		trace_switch_mm(engine, to);
+		ret = to->ppgtt->switch_mm(to->ppgtt, req);
+		if (ret)
+			goto unpin_out;
+
+		/* Doing a PD load always reloads the page dirs */
+		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
+	}
+
+	if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to))
 		/* NB: If we inhibit the restore, the context is not allowed to
 		 * die because future work may end up depending on valid address
 		 * space. This means we must enforce that a page table load
 		 * occur when this occurs. */
-	} else if (to->ppgtt &&
-		   (intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) {
-		hw_flags |= MI_FORCE_RESTORE;
-		to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
-	}
+		hw_flags = MI_RESTORE_INHIBIT;
+	else if (to->ppgtt && to->ppgtt->pd_dirty_rings & (1 << RCS))
+		hw_flags = MI_FORCE_RESTORE;
+	else
+		hw_flags = 0;
 
 	/* We should never emit switch_mm more than once */
-	WARN_ON(needs_pd_load_pre(engine, to) &&
-		needs_pd_load_post(engine, to, hw_flags));
+	WARN_ON(needs_pd_load_pre(to) && needs_pd_load_post(to, hw_flags));
 
-	ret = mi_set_context(req, hw_flags);
-	if (ret)
-		goto unpin_out;
-
-	/* GEN8 does *not* require an explicit reload if the PDPs have been
-	 * setup, and we do not wish to move them.
-	 */
-	if (needs_pd_load_post(engine, to, hw_flags)) {
-		trace_switch_mm(engine, to);
-		ret = to->ppgtt->switch_mm(to->ppgtt, req);
-		/* The hardware context switch is emitted, but we haven't
-		 * actually changed the state - so it's probably safe to bail
-		 * here. Still, let the user know something dangerous has
-		 * happened.
-		 */
+	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
+		ret = mi_set_context(req, hw_flags);
 		if (ret) {
-			DRM_ERROR("Failed to change address space on context switch\n");
+			/* Force the reload of this and the previous mm */
+			if (needs_pd_load_pre(to))
+				to->ppgtt->pd_dirty_rings |= 1 << RCS;
+			if (from && needs_pd_load_pre(from))
+				from->ppgtt->pd_dirty_rings |= 1 << RCS;
 			goto unpin_out;
 		}
 	}
 
-	for (i = 0; i < MAX_L3_SLICES; i++) {
-		if (!(to->remap_slice & (1<<i)))
-			continue;
-
-		ret = i915_gem_l3_remap(req, i);
-		/* If it failed, try again next round */
-		if (ret)
-			DRM_DEBUG_DRIVER("L3 remapping failed\n");
-		else
-			to->remap_slice &= ~(1<<i);
-	}
-
 	/* The backing object for the context is done after switching to the
 	 * *next* context. Therefore we cannot retire the previous context until
 	 * the next context has already started running. In fact, the below code
@@ -798,27 +752,52 @@  static int do_switch(struct drm_i915_gem_request *req)
 		i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
 		i915_gem_context_unreference(from);
 	}
-
-	uninitialized = !to->legacy_hw_ctx.initialized;
-	to->legacy_hw_ctx.initialized = true;
-
-done:
 	i915_gem_context_reference(to);
 	engine->last_context = to;
 
-	if (uninitialized) {
+	/* GEN8 does *not* require an explicit reload if the PDPs have been
+	 * setup, and we do not wish to move them.
+	 */
+	if (needs_pd_load_post(to, hw_flags)) {
+		trace_switch_mm(engine, to);
+		ret = to->ppgtt->switch_mm(to->ppgtt, req);
+		/* The hardware context switch is emitted, but we haven't
+		 * actually changed the state - so it's probably safe to bail
+		 * here. Still, let the user know something dangerous has
+		 * happened.
+		 */
+		if (ret)
+			return ret;
+
+		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
+	}
+	if (hw_flags & MI_FORCE_RESTORE)
+		to->ppgtt->pd_dirty_rings &= ~(1 << RCS);
+
+	for (i = 0; i < MAX_L3_SLICES; i++) {
+		if (!(to->remap_slice & (1<<i)))
+			continue;
+
+		ret = i915_gem_l3_remap(req, i);
+		if (ret)
+			return ret;
+
+		to->remap_slice &= ~(1<<i);
+	}
+
+	if (!to->legacy_hw_ctx.initialized) {
 		if (engine->init_context) {
 			ret = engine->init_context(req);
 			if (ret)
-				DRM_ERROR("ring init context: %d\n", ret);
+				return ret;
 		}
+		to->legacy_hw_ctx.initialized = true;
 	}
 
 	return 0;
 
 unpin_out:
-	if (engine->id == RCS)
-		i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state);
+	i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state);
 	return ret;
 }
 
@@ -853,7 +832,31 @@  int i915_switch_context(struct drm_i915_gem_request *req)
 		return 0;
 	}
 
-	return do_switch(req);
+	if (engine->id != RCS) {
+		struct intel_context *from = engine->last_context;
+		struct intel_context *to = req->ctx;
+
+		if (to->ppgtt &&
+		    (from != to ||
+		     intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) {
+			int ret;
+
+			trace_switch_mm(engine, to);
+			ret = to->ppgtt->switch_mm(to->ppgtt, req);
+			if (ret)
+				return ret;
+
+			to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
+		}
+
+		if (from)
+			i915_gem_context_unreference(from);
+		i915_gem_context_reference(to);
+		engine->last_context = to;
+		return 0;
+	}
+
+	return do_rcs_switch(req);
 }
 
 static bool contexts_enabled(struct drm_device *dev)