diff mbox

[5/5] drm/i915: Only sanitize GEM from late suspend

Message ID 20180529132922.6831-5-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 29, 2018, 1:29 p.m. UTC
During testing we encounter a conflict between SUSPEND_TEST_DEVICES and
disabling reset (gem_eio/suspend). This results in the device continuing
on without being reset, but since it has gone through HW sanitization to
account for the suspend/resume cycle, we have to assume the device has
been reset to its defaults. A simple way around this is to skip the
sanitize phase for SUSPEND_TEST_DEVICES by moving it to suspend-late.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c |  6 +++++-
 drivers/gpu/drm/i915/i915_drv.h |  1 +
 drivers/gpu/drm/i915/i915_gem.c | 22 +++++++++++++---------
 3 files changed, 19 insertions(+), 10 deletions(-)

Comments

Tvrtko Ursulin May 30, 2018, 4:56 p.m. UTC | #1
On 29/05/2018 14:29, Chris Wilson wrote:
> During testing we encounter a conflict between SUSPEND_TEST_DEVICES and
> disabling reset (gem_eio/suspend). This results in the device continuing
> on without being reset, but since it has gone through HW sanitization to

Has some test been failing because of this and since when? gem_eio/suspend?

> account for the suspend/resume cycle, we have to assume the device has
> been reset to its defaults. A simple way around this is to skip the
> sanitize phase for SUSPEND_TEST_DEVICES by moving it to suspend-late.

So suspend_late is not called when suspending via SUSPEND_TEST_DEVICES? 
Sounds weird to me that core allows a "half-way" suspend mode. But I am 
not familiar with that code so don't know.

Either way, if we skip it, that only skips the reset which is skipped 
anyway in gem_eio/suspend so I did not figure out the difference.

Regards,

Tvrtko

> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.c |  6 +++++-
>   drivers/gpu/drm/i915/i915_drv.h |  1 +
>   drivers/gpu/drm/i915/i915_gem.c | 22 +++++++++++++---------
>   3 files changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 8f002ae22e62..0d9b8cc0436d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -636,6 +636,8 @@ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
>   
>   static void i915_gem_fini(struct drm_i915_private *dev_priv)
>   {
> +	i915_gem_suspend_late(dev_priv);
> +
>   	/* Flush any outstanding unpin_work. */
>   	i915_gem_drain_workqueue(dev_priv);
>   
> @@ -1611,7 +1613,6 @@ static int i915_drm_suspend(struct drm_device *dev)
>   	opregion_target_state = suspend_to_idle(dev_priv) ? PCI_D1 : PCI_D3cold;
>   	intel_opregion_notify_adapter(dev_priv, opregion_target_state);
>   
> -	intel_uncore_suspend(dev_priv);
>   	intel_opregion_unregister(dev_priv);
>   
>   	intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
> @@ -1633,7 +1634,10 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
>   
>   	disable_rpm_wakeref_asserts(dev_priv);
>   
> +	i915_gem_suspend_late(dev_priv);
> +
>   	intel_display_set_init_power(dev_priv, false);
> +	intel_uncore_suspend(dev_priv);
>   
>   	/*
>   	 * In case of firmware assisted context save/restore don't manually
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 487922f88b76..4257ffc1bae1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3169,6 +3169,7 @@ void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
>   int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
>   			   unsigned int flags);
>   int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
> +void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
>   void i915_gem_resume(struct drm_i915_private *dev_priv);
>   int i915_gem_fault(struct vm_fault *vmf);
>   int i915_gem_object_wait(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 75bdfafc97a2..874ac1a8ec47 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5050,6 +5050,17 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
>   	if (WARN_ON(!intel_engines_are_idle(dev_priv)))
>   		i915_gem_set_wedged(dev_priv); /* no hope, discard everything */
>   
> +	intel_runtime_pm_put(dev_priv);
> +	return 0;
> +
> +err_unlock:
> +	mutex_unlock(&dev->struct_mutex);
> +	intel_runtime_pm_put(dev_priv);
> +	return ret;
> +}
> +
> +void i915_gem_suspend_late(struct drm_i915_private *i915)
> +{
>   	/*
>   	 * Neither the BIOS, ourselves or any other kernel
>   	 * expects the system to be in execlists mode on startup,
> @@ -5069,16 +5080,9 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
>   	 * machines is a good idea, we don't - just in case it leaves the
>   	 * machine in an unusable condition.
>   	 */
> -	intel_uc_sanitize(dev_priv);
> -	i915_gem_sanitize(dev_priv);
>   
> -	intel_runtime_pm_put(dev_priv);
> -	return 0;
> -
> -err_unlock:
> -	mutex_unlock(&dev->struct_mutex);
> -	intel_runtime_pm_put(dev_priv);
> -	return ret;
> +	intel_uc_sanitize(i915);
> +	i915_gem_sanitize(i915);
>   }
>   
>   void i915_gem_resume(struct drm_i915_private *i915)
>
Chris Wilson May 30, 2018, 5:07 p.m. UTC | #2
Quoting Tvrtko Ursulin (2018-05-30 17:56:20)
> 
> On 29/05/2018 14:29, Chris Wilson wrote:
> > During testing we encounter a conflict between SUSPEND_TEST_DEVICES and
> > disabling reset (gem_eio/suspend). This results in the device continuing
> > on without being reset, but since it has gone through HW sanitization to
> 
> Has some test been failing because of this and since when? gem_eio/suspend?

Yes. It will fail in the future because we'll remove all mmio from the
process_csb(); it fails right now because we lose track of what
interrupts we process across the suspend and may double handle CSB events.
E.g. https://bugs.freedesktop.org/show_bug.cgi?id=106702
 
> > account for the suspend/resume cycle, we have to assume the device has
> > been reset to its defaults. A simple way around this is to skip the
> > sanitize phase for SUSPEND_TEST_DEVICES by moving it to suspend-late.
> 
> So suspend_late is not called when suspending via SUSPEND_TEST_DEVICES? 
> Sounds weird to me that core allows a "half-way" suspend mode. But I am 
> not familiar with that code so don't know.

Yes, weird is one way to describe it.
 
> Either way, if we skip it, that only skips the reset which is skipped 
> anyway in gem_eio/suspend so I did not figure out the difference.

We then also skip the call to reset->reset which we need after a real
resume (when the device is powered up) and/or after the reset in
sanitization. It's fiddly.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 8f002ae22e62..0d9b8cc0436d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -636,6 +636,8 @@  static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
 
 static void i915_gem_fini(struct drm_i915_private *dev_priv)
 {
+	i915_gem_suspend_late(dev_priv);
+
 	/* Flush any outstanding unpin_work. */
 	i915_gem_drain_workqueue(dev_priv);
 
@@ -1611,7 +1613,6 @@  static int i915_drm_suspend(struct drm_device *dev)
 	opregion_target_state = suspend_to_idle(dev_priv) ? PCI_D1 : PCI_D3cold;
 	intel_opregion_notify_adapter(dev_priv, opregion_target_state);
 
-	intel_uncore_suspend(dev_priv);
 	intel_opregion_unregister(dev_priv);
 
 	intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
@@ -1633,7 +1634,10 @@  static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 
 	disable_rpm_wakeref_asserts(dev_priv);
 
+	i915_gem_suspend_late(dev_priv);
+
 	intel_display_set_init_power(dev_priv, false);
+	intel_uncore_suspend(dev_priv);
 
 	/*
 	 * In case of firmware assisted context save/restore don't manually
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 487922f88b76..4257ffc1bae1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3169,6 +3169,7 @@  void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv);
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 			   unsigned int flags);
 int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
+void i915_gem_suspend_late(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
 int i915_gem_fault(struct vm_fault *vmf);
 int i915_gem_object_wait(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 75bdfafc97a2..874ac1a8ec47 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5050,6 +5050,17 @@  int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	if (WARN_ON(!intel_engines_are_idle(dev_priv)))
 		i915_gem_set_wedged(dev_priv); /* no hope, discard everything */
 
+	intel_runtime_pm_put(dev_priv);
+	return 0;
+
+err_unlock:
+	mutex_unlock(&dev->struct_mutex);
+	intel_runtime_pm_put(dev_priv);
+	return ret;
+}
+
+void i915_gem_suspend_late(struct drm_i915_private *i915)
+{
 	/*
 	 * Neither the BIOS, ourselves or any other kernel
 	 * expects the system to be in execlists mode on startup,
@@ -5069,16 +5080,9 @@  int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	 * machines is a good idea, we don't - just in case it leaves the
 	 * machine in an unusable condition.
 	 */
-	intel_uc_sanitize(dev_priv);
-	i915_gem_sanitize(dev_priv);
 
-	intel_runtime_pm_put(dev_priv);
-	return 0;
-
-err_unlock:
-	mutex_unlock(&dev->struct_mutex);
-	intel_runtime_pm_put(dev_priv);
-	return ret;
+	intel_uc_sanitize(i915);
+	i915_gem_sanitize(i915);
 }
 
 void i915_gem_resume(struct drm_i915_private *i915)