diff mbox

[2/2] drm/i915: Reset the gpu on takeover

Message ID 20170121145045.27351-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Jan. 21, 2017, 2:50 p.m. UTC
The GPU may be in an unknown state following resume and module load. The
previous occupant may have left contexts loaded, or other dangerous
state, which can cause an immediate GPU hang for us. The only save
course of action is to reset the GPU prior to using it - similarly to
how we reset the GPU prior to unload (before a second user may be
affected by our leftover state).

We need to reset the GPU very early in our load/resume sequence so that
any stale HW pointers are revoked prior to any resource allocations we
make (that may conflict).

A reset should only be a couple of milliseconds on a slow device, a cost
we should easily be able to absorb into our initialisation times.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c |  2 ++
 drivers/gpu/drm/i915/i915_drv.h |  2 ++
 drivers/gpu/drm/i915/i915_gem.c | 27 +++++++++++++++++++++++----
 3 files changed, 27 insertions(+), 4 deletions(-)

Comments

Joonas Lahtinen Jan. 23, 2017, 9:39 a.m. UTC | #1
On la, 2017-01-21 at 14:50 +0000, Chris Wilson wrote:
> The GPU may be in an unknown state following resume and module load. The
> previous occupant may have left contexts loaded, or other dangerous
> state, which can cause an immediate GPU hang for us. The only save
> course of action is to reset the GPU prior to using it - similarly to
> how we reset the GPU prior to unload (before a second user may be
> affected by our leftover state).
> 
> We need to reset the GPU very early in our load/resume sequence so that
> any stale HW pointers are revoked prior to any resource allocations we
> make (that may conflict).
> 
> A reset should only be a couple of milliseconds on a slow device, a cost
> we should easily be able to absorb into our initialisation times.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Can't really suggest better names for the functions, so;

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index b19ec224831c..24e9da1e4caf 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -950,6 +950,7 @@  static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)
 		goto put_bridge;
 
 	intel_uncore_init(dev_priv);
+	i915_gem_init_mmio(dev_priv);
 
 	return 0;
 
@@ -1579,6 +1580,7 @@  static int i915_drm_resume(struct drm_device *dev)
 
 	disable_rpm_wakeref_asserts(dev_priv);
 	intel_sanitize_gt_powersave(dev_priv);
+	i915_gem_sanitize(dev_priv);
 
 	ret = i915_ggtt_enable_hw(dev_priv);
 	if (ret)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2467faa4a9e4..5ee4b778bf04 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3121,6 +3121,7 @@  int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
+void i915_gem_sanitize(struct drm_i915_private *i915);
 int i915_gem_load_init(struct drm_i915_private *dev_priv);
 void i915_gem_load_cleanup(struct drm_i915_private *dev_priv);
 void i915_gem_load_init_fences(struct drm_i915_private *dev_priv);
@@ -3336,6 +3337,7 @@  int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
 void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
+void i915_gem_init_mmio(struct drm_i915_private *i915);
 int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
 int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a07b62732923..2522d4895ff7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4201,6 +4201,23 @@  static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv)
 			   !i915_gem_context_is_kernel(engine->last_retired_context));
 }
 
+void i915_gem_sanitize(struct drm_i915_private *i915)
+{
+	/*
+	 * If we inherit context state from the BIOS or earlier occupants
+	 * of the GPU, the GPU may be in an inconsistent state when we
+	 * try to take over. The only way to remove the earlier state
+	 * is by resetting. However, resetting on earlier gen is tricky as
+	 * it may impact the display and we are uncertain about the stability
+	 * of the reset, so we only reset recent machines with logical
+	 * context support (that must be reset to remove any stray contexts).
+	 */
+	if (HAS_HW_CONTEXTS(i915)) {
+		int reset = intel_gpu_reset(i915, ALL_ENGINES);
+		WARN_ON(reset && reset != -ENODEV);
+	}
+}
+
 int i915_gem_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
@@ -4271,10 +4288,7 @@  int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	 * machines is a good idea, we don't - just in case it leaves the
 	 * machine in an unusable condition.
 	 */
-	if (HAS_HW_CONTEXTS(dev_priv)) {
-		int reset = intel_gpu_reset(dev_priv, ALL_ENGINES);
-		WARN_ON(reset && reset != -ENODEV);
-	}
+	i915_gem_sanitize(dev_priv);
 
 	return 0;
 
@@ -4492,6 +4506,11 @@  int i915_gem_init(struct drm_i915_private *dev_priv)
 	return ret;
 }
 
+void i915_gem_init_mmio(struct drm_i915_private *i915)
+{
+	i915_gem_sanitize(i915);
+}
+
 void
 i915_gem_cleanup_engines(struct drm_i915_private *dev_priv)
 {