drm/i915: Prepare GEM for suspend earlier

Message ID	20180525092629.1456-1-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Fri, 25 May 2018 10:26:29 +0100 Message-Id: <20180525092629.1456-1-chris@chris-wilson.co.uk> In-Reply-To: <20180525065344.26846-1-chris@chris-wilson.co.uk> References: <20180525065344.26846-1-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH] drm/i915: Prepare GEM for suspend earlier Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

20180525092629.1456-1-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Fri, 25 May 2018 10:26:29 +0100
Message-Id: <20180525092629.1456-1-chris@chris-wilson.co.uk>
In-Reply-To: <20180525065344.26846-1-chris@chris-wilson.co.uk>
References: <20180525065344.26846-1-chris@chris-wilson.co.uk>
Subject: [Intel-gfx] [PATCH] drm/i915: Prepare GEM for suspend earlier
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Chris Wilson May 25, 2018, 9:26 a.m. UTC

In order to prepare the GPU for sleeping, we may want to submit commands
to it. This is a complicated process that may even require some swapping
in from shmemfs, if the GPU was in the wrong state. As such, we need to
do this preparation step synchronously before the rest of the system has
started to turn off (e.g. swapin fails if scsi is suspended).
Fortunately, we are provided with a such a hook, pm_ops.prepare().

v2: Compile cleanup
v3: Fewer asserts, fewer problems?

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106640
Testcase: igt/drv_suspend after igt/gem_tiled_swapping
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c | 41 +++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 10 deletions(-)

Comments

Ville Syrjälä May 25, 2018, 10:51 a.m. UTC | #1

On Fri, May 25, 2018 at 10:26:29AM +0100, Chris Wilson wrote:
> In order to prepare the GPU for sleeping, we may want to submit commands
> to it. This is a complicated process that may even require some swapping
> in from shmemfs, if the GPU was in the wrong state. As such, we need to
> do this preparation step synchronously before the rest of the system has
> started to turn off (e.g. swapin fails if scsi is suspended).
> Fortunately, we are provided with a such a hook, pm_ops.prepare().
> 
> v2: Compile cleanup
> v3: Fewer asserts, fewer problems?
> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106640
> Testcase: igt/drv_suspend after igt/gem_tiled_swapping
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.c | 41 +++++++++++++++++++++++++--------
>  1 file changed, 31 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 9c449b8d8eab..9d6ac7f44812 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1553,12 +1553,24 @@ static bool suspend_to_idle(struct drm_i915_private *dev_priv)
>  	return false;
>  }
>  
> +static int i915_drm_prepare(struct drm_device *dev)
> +{
> +	struct drm_i915_private *i915 = to_i915(dev);
> +	int err;
> +
> +	err = i915_gem_suspend(i915);
> +	if (err)
> +		dev_err(&i915->drm.pdev->dev,
> +			"GEM idle failed, suspend/resume might fail\n");
> +
> +	return err;
> +}
> +
>  static int i915_drm_suspend(struct drm_device *dev)
>  {
>  	struct drm_i915_private *dev_priv = to_i915(dev);
>  	struct pci_dev *pdev = dev_priv->drm.pdev;
>  	pci_power_t opregion_target_state;
> -	int error;
>  
>  	/* ignore lid events during suspend */
>  	mutex_lock(&dev_priv->modeset_restore_lock);
> @@ -1575,13 +1587,6 @@ static int i915_drm_suspend(struct drm_device *dev)
>  
>  	pci_save_state(pdev);
>  
> -	error = i915_gem_suspend(dev_priv);
> -	if (error) {
> -		dev_err(&pdev->dev,
> -			"GEM idle failed, resume might fail\n");
> -		goto out;
> -	}
> -
>  	intel_display_suspend(dev);
>  
>  	intel_dp_mst_suspend(dev);
> @@ -1609,10 +1614,9 @@ static int i915_drm_suspend(struct drm_device *dev)
>  
>  	intel_csr_ucode_suspend(dev_priv);
>  
> -out:
>  	enable_rpm_wakeref_asserts(dev_priv);
>  
> -	return error;
> +	return 0;
>  }
>  
>  static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
> @@ -2081,6 +2085,22 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
>  	return ret;
>  }
>  
> +static int i915_pm_prepare(struct device *kdev)
> +{
> +	struct pci_dev *pdev = to_pci_dev(kdev);
> +	struct drm_device *dev = pci_get_drvdata(pdev);
> +
> +	if (!dev) {
> +		dev_err(kdev, "DRM not initialized, aborting suspend.\n");
> +		return -ENODEV;
> +	}

How can this happen?

IIRC I actually wrote a patch once to move the gem suspend to happen
after display suspend. The idea being that shutting down the display(s)
may require gem services (MI_OVERLAY_OFF being the prime example I
had in mind at the time). Just wondering if we can split the gem suspend
somehow to allow that, or would we need to just move display suspend
earlier as well?

> +
> +	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
> +		return 0;
> +
> +	return i915_drm_prepare(dev);
> +}
> +
>  static int i915_pm_suspend(struct device *kdev)
>  {
>  	struct pci_dev *pdev = to_pci_dev(kdev);
> @@ -2731,6 +2751,7 @@ const struct dev_pm_ops i915_pm_ops = {
>  	 * S0ix (via system suspend) and S3 event handlers [PMSG_SUSPEND,
>  	 * PMSG_RESUME]
>  	 */
> +	.prepare = i915_pm_prepare,
>  	.suspend = i915_pm_suspend,
>  	.suspend_late = i915_pm_suspend_late,
>  	.resume_early = i915_pm_resume_early,
> -- 
> 2.17.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Chris Wilson May 25, 2018, 11:02 a.m. UTC | #2

Quoting Ville Syrjälä (2018-05-25 11:51:13)
> On Fri, May 25, 2018 at 10:26:29AM +0100, Chris Wilson wrote:
> > In order to prepare the GPU for sleeping, we may want to submit commands
> > to it. This is a complicated process that may even require some swapping
> > in from shmemfs, if the GPU was in the wrong state. As such, we need to
> > do this preparation step synchronously before the rest of the system has
> > started to turn off (e.g. swapin fails if scsi is suspended).
> > Fortunately, we are provided with a such a hook, pm_ops.prepare().
> > 
> > v2: Compile cleanup
> > v3: Fewer asserts, fewer problems?
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106640
> > Testcase: igt/drv_suspend after igt/gem_tiled_swapping
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c | 41 +++++++++++++++++++++++++--------
> >  1 file changed, 31 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 9c449b8d8eab..9d6ac7f44812 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -1553,12 +1553,24 @@ static bool suspend_to_idle(struct drm_i915_private *dev_priv)
> >       return false;
> >  }
> >  
> > +static int i915_drm_prepare(struct drm_device *dev)
> > +{
> > +     struct drm_i915_private *i915 = to_i915(dev);
> > +     int err;
> > +
> > +     err = i915_gem_suspend(i915);
> > +     if (err)
> > +             dev_err(&i915->drm.pdev->dev,
> > +                     "GEM idle failed, suspend/resume might fail\n");
> > +
> > +     return err;
> > +}
> > +
> >  static int i915_drm_suspend(struct drm_device *dev)
> >  {
> >       struct drm_i915_private *dev_priv = to_i915(dev);
> >       struct pci_dev *pdev = dev_priv->drm.pdev;
> >       pci_power_t opregion_target_state;
> > -     int error;
> >  
> >       /* ignore lid events during suspend */
> >       mutex_lock(&dev_priv->modeset_restore_lock);
> > @@ -1575,13 +1587,6 @@ static int i915_drm_suspend(struct drm_device *dev)
> >  
> >       pci_save_state(pdev);
> >  
> > -     error = i915_gem_suspend(dev_priv);
> > -     if (error) {
> > -             dev_err(&pdev->dev,
> > -                     "GEM idle failed, resume might fail\n");
> > -             goto out;
> > -     }
> > -
> >       intel_display_suspend(dev);
> >  
> >       intel_dp_mst_suspend(dev);
> > @@ -1609,10 +1614,9 @@ static int i915_drm_suspend(struct drm_device *dev)
> >  
> >       intel_csr_ucode_suspend(dev_priv);
> >  
> > -out:
> >       enable_rpm_wakeref_asserts(dev_priv);
> >  
> > -     return error;
> > +     return 0;
> >  }
> >  
> >  static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
> > @@ -2081,6 +2085,22 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
> >       return ret;
> >  }
> >  
> > +static int i915_pm_prepare(struct device *kdev)
> > +{
> > +     struct pci_dev *pdev = to_pci_dev(kdev);
> > +     struct drm_device *dev = pci_get_drvdata(pdev);
> > +
> > +     if (!dev) {
> > +             dev_err(kdev, "DRM not initialized, aborting suspend.\n");
> > +             return -ENODEV;
> > +     }
> 
> How can this happen?

Just copypaste, and not wanting to have to answer too many questions.
 
> IIRC I actually wrote a patch once to move the gem suspend to happen
> after display suspend. The idea being that shutting down the display(s)
> may require gem services (MI_OVERLAY_OFF being the prime example I
> had in mind at the time). Just wondering if we can split the gem suspend
> somehow to allow that, or would we need to just move display suspend
> earlier as well?

That would require doing that portion of display shutdown earlier. The
problem is in starting GPU activity from within the async suspend is
error prone. For overlay, it's not much of an issue, we could just
submit the command and wait on it synchronously, as those systems don't
have the fiddly things like contexts and rc6 to contend with ;)

I can't think of a different way of slicing gem suspend, as it's core
operation is to switch the GPU to the kernel context, and that involves
submitting a GPU command. After gem suspend, we shouldn't tolerate any
more requests.

I am splitting the reset at the end of gem suspend into suspend_late
though (mainly so that we don't do it for SUSPEND_TEST_DEVICES as it
conflicts nastily with reset failure testing).
-Chris

Chris Wilson May 25, 2018, 1:55 p.m. UTC | #3

Quoting Ville Syrjälä (2018-05-25 11:51:13)
> On Fri, May 25, 2018 at 10:26:29AM +0100, Chris Wilson wrote:
> > In order to prepare the GPU for sleeping, we may want to submit commands
> > to it. This is a complicated process that may even require some swapping
> > in from shmemfs, if the GPU was in the wrong state. As such, we need to
> > do this preparation step synchronously before the rest of the system has
> > started to turn off (e.g. swapin fails if scsi is suspended).
> > Fortunately, we are provided with a such a hook, pm_ops.prepare().
> > 
> > v2: Compile cleanup
> > v3: Fewer asserts, fewer problems?
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106640
> > Testcase: igt/drv_suspend after igt/gem_tiled_swapping
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c | 41 +++++++++++++++++++++++++--------
> >  1 file changed, 31 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 9c449b8d8eab..9d6ac7f44812 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -1553,12 +1553,24 @@ static bool suspend_to_idle(struct drm_i915_private *dev_priv)
> >       return false;
> >  }
> >  
> > +static int i915_drm_prepare(struct drm_device *dev)
> > +{
> > +     struct drm_i915_private *i915 = to_i915(dev);
> > +     int err;
> > +
> > +     err = i915_gem_suspend(i915);
> > +     if (err)
> > +             dev_err(&i915->drm.pdev->dev,
> > +                     "GEM idle failed, suspend/resume might fail\n");
> > +
> > +     return err;
> > +}
> > +
> >  static int i915_drm_suspend(struct drm_device *dev)
> >  {
> >       struct drm_i915_private *dev_priv = to_i915(dev);
> >       struct pci_dev *pdev = dev_priv->drm.pdev;
> >       pci_power_t opregion_target_state;
> > -     int error;
> >  
> >       /* ignore lid events during suspend */
> >       mutex_lock(&dev_priv->modeset_restore_lock);
> > @@ -1575,13 +1587,6 @@ static int i915_drm_suspend(struct drm_device *dev)
> >  
> >       pci_save_state(pdev);
> >  
> > -     error = i915_gem_suspend(dev_priv);
> > -     if (error) {
> > -             dev_err(&pdev->dev,
> > -                     "GEM idle failed, resume might fail\n");
> > -             goto out;
> > -     }
> > -
> >       intel_display_suspend(dev);
> >  
> >       intel_dp_mst_suspend(dev);
> > @@ -1609,10 +1614,9 @@ static int i915_drm_suspend(struct drm_device *dev)
> >  
> >       intel_csr_ucode_suspend(dev_priv);
> >  
> > -out:
> >       enable_rpm_wakeref_asserts(dev_priv);
> >  
> > -     return error;
> > +     return 0;
> >  }
> >  
> >  static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
> > @@ -2081,6 +2085,22 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
> >       return ret;
> >  }
> >  
> > +static int i915_pm_prepare(struct device *kdev)
> > +{
> > +     struct pci_dev *pdev = to_pci_dev(kdev);
> > +     struct drm_device *dev = pci_get_drvdata(pdev);
> > +
> > +     if (!dev) {
> > +             dev_err(kdev, "DRM not initialized, aborting suspend.\n");
> > +             return -ENODEV;
> > +     }
> 
> How can this happen?
> 
> IIRC I actually wrote a patch once to move the gem suspend to happen
> after display suspend. The idea being that shutting down the display(s)
> may require gem services (MI_OVERLAY_OFF being the prime example I
> had in mind at the time). Just wondering if we can split the gem suspend
> somehow to allow that, or would we need to just move display suspend
> earlier as well?

Ville accepted that this didn't really change the status quo (on irc)
and so was ok with postponing such fixes until later. I added
+       /*
+        * NB intel_display_suspend() may issue new requests after we've
+        * ostensibly marked the GPU as ready-to-sleep here. We need to
+        * split out that work and pull it forward so that after point,
+        * the GPU is not woken again.
+        */
to record the issue so that hopefully we might fix it before any one
notices.

I pulled in Mika's review from a later thread and pushed so I can close
the bug.

Thanks for the review,
-Chris

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9c449b8d8eab..9d6ac7f44812 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1553,12 +1553,24 @@  static bool suspend_to_idle(struct drm_i915_private *dev_priv)
 	return false;
 }
 
+static int i915_drm_prepare(struct drm_device *dev)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	int err;
+
+	err = i915_gem_suspend(i915);
+	if (err)
+		dev_err(&i915->drm.pdev->dev,
+			"GEM idle failed, suspend/resume might fail\n");
+
+	return err;
+}
+
 static int i915_drm_suspend(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	pci_power_t opregion_target_state;
-	int error;
 
 	/* ignore lid events during suspend */
 	mutex_lock(&dev_priv->modeset_restore_lock);
@@ -1575,13 +1587,6 @@  static int i915_drm_suspend(struct drm_device *dev)
 
 	pci_save_state(pdev);
 
-	error = i915_gem_suspend(dev_priv);
-	if (error) {
-		dev_err(&pdev->dev,
-			"GEM idle failed, resume might fail\n");
-		goto out;
-	}
-
 	intel_display_suspend(dev);
 
 	intel_dp_mst_suspend(dev);
@@ -1609,10 +1614,9 @@  static int i915_drm_suspend(struct drm_device *dev)
 
 	intel_csr_ucode_suspend(dev_priv);
 
-out:
 	enable_rpm_wakeref_asserts(dev_priv);
 
-	return error;
+	return 0;
 }
 
 static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
@@ -2081,6 +2085,22 @@  int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 	return ret;
 }
 
+static int i915_pm_prepare(struct device *kdev)
+{
+	struct pci_dev *pdev = to_pci_dev(kdev);
+	struct drm_device *dev = pci_get_drvdata(pdev);
+
+	if (!dev) {
+		dev_err(kdev, "DRM not initialized, aborting suspend.\n");
+		return -ENODEV;
+	}
+
+	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		return 0;
+
+	return i915_drm_prepare(dev);
+}
+
 static int i915_pm_suspend(struct device *kdev)
 {
 	struct pci_dev *pdev = to_pci_dev(kdev);
@@ -2731,6 +2751,7 @@  const struct dev_pm_ops i915_pm_ops = {
 	 * S0ix (via system suspend) and S3 event handlers [PMSG_SUSPEND,
 	 * PMSG_RESUME]
 	 */
+	.prepare = i915_pm_prepare,
 	.suspend = i915_pm_suspend,
 	.suspend_late = i915_pm_suspend_late,
 	.resume_early = i915_pm_resume_early,

drm/i915: Prepare GEM for suspend earlier

Commit Message

Comments

Patch