diff mbox

[1/4] drm/i915: Switch to kernel context before idling at runtime

Message ID 20180531082246.9763-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 31, 2018, 8:22 a.m. UTC
We can reduce our exposure to random neutrinos by resting on the kernel
context having flushed out the user contexts to system memory and
beyond. The corollary is that we then we require two passes through the
idle handler to go to sleep, which on a truly idle system involves an
extra pass through the slow and irregular retire work handler.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  9 +++++--
 drivers/gpu/drm/i915/i915_gem.c         | 36 +++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
 3 files changed, 39 insertions(+), 8 deletions(-)

Comments

Chris Wilson May 31, 2018, 6:34 p.m. UTC | #1
Quoting Patchwork (2018-05-31 11:04:13)
>     igt@gem_eio@suspend:
>       shard-snb:          PASS -> INCOMPLETE (fdo#105411) +1

I was concerned by this since gem_eio targets the code being changed. In
particular, I was concerned that this was bringing back the snb gem_eio
nightmare (where we would take out the machine). However, running
locally and looking at the previous CI results doesn't show that this is
a persistent problem, so I think this failure was a fluke.

That isn't to say that there isn't something weird happening on snb, but
I don't think it's any weirder than normal.

Thank you all for the reviews, let's just hope we don't have to stare
at suspend again for a few weeks^W days.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a8e7761cdc7d..15e86d34a81c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4226,8 +4226,13 @@  i915_drop_caches_set(void *data, u64 val)
 		i915_gem_shrink_all(dev_priv);
 	fs_reclaim_release(GFP_KERNEL);
 
-	if (val & DROP_IDLE)
-		drain_delayed_work(&dev_priv->gt.idle_work);
+	if (val & DROP_IDLE) {
+		do {
+			if (READ_ONCE(dev_priv->gt.active_requests))
+				flush_delayed_work(&dev_priv->gt.retire_work);
+			drain_delayed_work(&dev_priv->gt.idle_work);
+		} while (READ_ONCE(dev_priv->gt.awake));
+	}
 
 	if (val & DROP_FREED)
 		i915_gem_drain_freed_objects(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 530d6d0109b4..173d1e4ad963 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -139,6 +139,8 @@  int i915_mutex_lock_interruptible(struct drm_device *dev)
 
 static u32 __i915_gem_park(struct drm_i915_private *i915)
 {
+	GEM_TRACE("\n");
+
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(i915->gt.active_requests);
 	GEM_BUG_ON(!list_empty(&i915->gt.active_rings));
@@ -181,6 +183,8 @@  static u32 __i915_gem_park(struct drm_i915_private *i915)
 
 void i915_gem_park(struct drm_i915_private *i915)
 {
+	GEM_TRACE("\n");
+
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(i915->gt.active_requests);
 
@@ -193,6 +197,8 @@  void i915_gem_park(struct drm_i915_private *i915)
 
 void i915_gem_unpark(struct drm_i915_private *i915)
 {
+	GEM_TRACE("\n");
+
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915->gt.active_requests);
 
@@ -3503,6 +3509,24 @@  i915_gem_idle_work_handler(struct work_struct *work)
 	if (!READ_ONCE(dev_priv->gt.awake))
 		return;
 
+	if (READ_ONCE(dev_priv->gt.active_requests))
+		return;
+
+	/*
+	 * Flush out the last user context, leaving only the pinned
+	 * kernel context resident. When we are idling on the kernel_context,
+	 * no more new requests (with a context switch) are emitted and we
+	 * can finally rest. A consequence is that the idle work handler is
+	 * always called at least twice before idling (and if the system is
+	 * idle that implies a round trip through the retire worker).
+	 */
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	i915_gem_switch_to_kernel_context(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+	GEM_TRACE("active_requests=%d (after switch-to-kernel-context)\n",
+		  READ_ONCE(dev_priv->gt.active_requests));
+
 	/*
 	 * Wait for last execlists context complete, but bail out in case a
 	 * new request is submitted. As we don't trust the hardware, we
@@ -4913,11 +4937,9 @@  static void assert_kernel_context_is_current(struct drm_i915_private *i915)
 
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
-	if (i915_terminally_wedged(&i915->gpu_error)) {
-		mutex_lock(&i915->drm.struct_mutex);
+	mutex_lock(&i915->drm.struct_mutex);
+	if (i915_terminally_wedged(&i915->gpu_error))
 		i915_gem_unset_wedged(i915);
-		mutex_unlock(&i915->drm.struct_mutex);
-	}
 
 	/*
 	 * If we inherit context state from the BIOS or earlier occupants
@@ -4929,6 +4951,9 @@  void i915_gem_sanitize(struct drm_i915_private *i915)
 	 */
 	if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
 		WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
+
+	i915_gem_contexts_lost(i915);
+	mutex_unlock(&i915->drm.struct_mutex);
 }
 
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
@@ -4964,7 +4989,6 @@  int i915_gem_suspend(struct drm_i915_private *dev_priv)
 
 		assert_kernel_context_is_current(dev_priv);
 	}
-	i915_gem_contexts_lost(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 
 	intel_uc_suspend(dev_priv);
@@ -5017,6 +5041,8 @@  int i915_gem_suspend(struct drm_i915_private *dev_priv)
 
 void i915_gem_resume(struct drm_i915_private *i915)
 {
+	GEM_TRACE("\n");
+
 	WARN_ON(i915->gt.awake);
 
 	mutex_lock(&i915->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 45393f6e0208..81f086397d10 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -649,7 +649,7 @@  int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	GEM_TRACE("\n");
+	GEM_TRACE("awake?=%s\n", yesno(i915->gt.awake));
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 	GEM_BUG_ON(!i915->kernel_context);