diff mbox

[v8,1/8] drm/i915: Create GEM runtime resume helper and handle GEM suspend/resume errors

Message ID 1506432285-14979-1-git-send-email-sagar.a.kamble@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sagar.a.kamble@intel.com Sept. 26, 2017, 1:24 p.m. UTC
Prepared helper i915_gem_runtime_resume to recreate gem setup.
Returning status from i915_gem_runtime_suspend and i915_gem_resume.
This will be placeholder for handling any errors from uC suspend/resume
in upcoming patches. Restructured the suspend/resume routines w.r.t setup
creation and rollback order.
This also fixes issue of ordering of i915_gem_runtime_resume with
intel_runtime_pm_enable_interrupts.

v2: Fixed return from intel_runtime_resume. (Michał Winiarski)

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Michał Winiarski <michal.winiarski@intel.com>
Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 34 ++++++++++++++++++++--------------
 drivers/gpu/drm/i915/i915_drv.h |  5 +++--
 drivers/gpu/drm/i915/i915_gem.c | 20 ++++++++++++++++++--
 3 files changed, 41 insertions(+), 18 deletions(-)

Comments

sagar.a.kamble@intel.com Sept. 26, 2017, 1:29 p.m. UTC | #1
And I have forgot to amend the ordering of tags cc, s-o-b etc. Sorry for 
the same.


On 9/26/2017 6:54 PM, Sagar Arun Kamble wrote:
> Prepared helper i915_gem_runtime_resume to recreate gem setup.
> Returning status from i915_gem_runtime_suspend and i915_gem_resume.
> This will be placeholder for handling any errors from uC suspend/resume
> in upcoming patches. Restructured the suspend/resume routines w.r.t setup
> creation and rollback order.
> This also fixes issue of ordering of i915_gem_runtime_resume with
> intel_runtime_pm_enable_interrupts.
>
> v2: Fixed return from intel_runtime_resume. (Michał Winiarski)
>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Imre Deak <imre.deak@intel.com>
> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Michał Winiarski <michal.winiarski@intel.com>
> Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.c | 34 ++++++++++++++++++++--------------
>   drivers/gpu/drm/i915/i915_drv.h |  5 +++--
>   drivers/gpu/drm/i915/i915_gem.c | 20 ++++++++++++++++++--
>   3 files changed, 41 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 7056bb2..a3bbf18 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1655,6 +1655,7 @@ static int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state)
>   static int i915_drm_resume(struct drm_device *dev)
>   {
>   	struct drm_i915_private *dev_priv = to_i915(dev);
> +	struct pci_dev *pdev = dev_priv->drm.pdev;
>   	int ret;
>   
>   	disable_rpm_wakeref_asserts(dev_priv);
> @@ -1666,7 +1667,9 @@ static int i915_drm_resume(struct drm_device *dev)
>   
>   	intel_csr_ucode_resume(dev_priv);
>   
> -	i915_gem_resume(dev_priv);
> +	ret = i915_gem_resume(dev_priv);
> +	if (ret)
> +		dev_err(&pdev->dev, "GEM resume failed\n");
>   
>   	i915_restore_state(dev_priv);
>   	intel_pps_unlock_regs_wa(dev_priv);
> @@ -2495,7 +2498,11 @@ static int intel_runtime_suspend(struct device *kdev)
>   	 * We are safe here against re-faults, since the fault handler takes
>   	 * an RPM reference.
>   	 */
> -	i915_gem_runtime_suspend(dev_priv);
> +	ret = i915_gem_runtime_suspend(dev_priv);
> +	if (ret) {
> +		enable_rpm_wakeref_asserts(dev_priv);
> +		return ret;
> +	}
>   
>   	intel_guc_suspend(dev_priv);
>   
> @@ -2515,6 +2522,8 @@ static int intel_runtime_suspend(struct device *kdev)
>   		DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret);
>   		intel_runtime_pm_enable_interrupts(dev_priv);
>   
> +		intel_guc_resume(dev_priv);
> +		i915_gem_runtime_resume(dev_priv);
>   		enable_rpm_wakeref_asserts(dev_priv);
>   
>   		return ret;
> @@ -2567,7 +2576,7 @@ static int intel_runtime_resume(struct device *kdev)
>   	struct pci_dev *pdev = to_pci_dev(kdev);
>   	struct drm_device *dev = pci_get_drvdata(pdev);
>   	struct drm_i915_private *dev_priv = to_i915(dev);
> -	int ret = 0;
> +	int err = 0, ret;
>   
>   	if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv)))
>   		return -ENODEV;
> @@ -2593,16 +2602,9 @@ static int intel_runtime_resume(struct device *kdev)
>   	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
>   		hsw_disable_pc8(dev_priv);
>   	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
> -		ret = vlv_resume_prepare(dev_priv, true);
> +		err = vlv_resume_prepare(dev_priv, true);
>   	}
>   
> -	/*
> -	 * No point of rolling back things in case of an error, as the best
> -	 * we can do is to hope that things will still work (and disable RPM).
> -	 */
> -	i915_gem_init_swizzling(dev_priv);
> -	i915_gem_restore_fences(dev_priv);
> -
>   	intel_runtime_pm_enable_interrupts(dev_priv);
>   
>   	/*
> @@ -2615,14 +2617,18 @@ static int intel_runtime_resume(struct device *kdev)
>   
>   	intel_enable_ipc(dev_priv);
>   
> +	ret = i915_gem_runtime_resume(dev_priv);
> +	if (!err)
> +		err = ret;
> +
>   	enable_rpm_wakeref_asserts(dev_priv);
>   
> -	if (ret)
> -		DRM_ERROR("Runtime resume failed, disabling it (%d)\n", ret);
> +	if (err)
> +		DRM_ERROR("Runtime resume failed, disabling it (%d)\n", err);
>   	else
>   		DRM_DEBUG_KMS("Device resumed\n");
>   
> -	return ret;
> +	return err;
>   }
>   
>   const struct dev_pm_ops i915_pm_ops = {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 61a4be9..69370c1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3471,7 +3471,8 @@ struct i915_vma * __must_check
>   int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
>   void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
>   
> -void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
> +int i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
> +int i915_gem_runtime_resume(struct drm_i915_private *dev_priv);
>   
>   static inline int __sg_page_count(const struct scatterlist *sg)
>   {
> @@ -3674,7 +3675,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
>   int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
>   			   unsigned int flags);
>   int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
> -void i915_gem_resume(struct drm_i915_private *dev_priv);
> +int i915_gem_resume(struct drm_i915_private *dev_priv);
>   int i915_gem_fault(struct vm_fault *vmf);
>   int i915_gem_object_wait(struct drm_i915_gem_object *obj,
>   			 unsigned int flags,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 73eeb6b..dbe181b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2022,7 +2022,7 @@ int i915_gem_fault(struct vm_fault *vmf)
>   	intel_runtime_pm_put(i915);
>   }
>   
> -void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
> +int i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
>   {
>   	struct drm_i915_gem_object *obj, *on;
>   	int i;
> @@ -2065,6 +2065,20 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
>   		GEM_BUG_ON(!list_empty(&reg->vma->obj->userfault_link));
>   		reg->dirty = true;
>   	}
> +
> +	return 0;
> +}
> +
> +int i915_gem_runtime_resume(struct drm_i915_private *dev_priv)
> +{
> +	/*
> +	 * No point of rolling back things in case of an error, as the best
> +	 * we can do is to hope that things will still work (and disable RPM).
> +	 */
> +	i915_gem_init_swizzling(dev_priv);
> +	i915_gem_restore_fences(dev_priv);
> +
> +	return 0;
>   }
>   
>   static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
> @@ -4587,7 +4601,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
>   	return ret;
>   }
>   
> -void i915_gem_resume(struct drm_i915_private *dev_priv)
> +int i915_gem_resume(struct drm_i915_private *dev_priv)
>   {
>   	struct drm_device *dev = &dev_priv->drm;
>   
> @@ -4603,6 +4617,8 @@ void i915_gem_resume(struct drm_i915_private *dev_priv)
>   	dev_priv->gt.resume(dev_priv);
>   
>   	mutex_unlock(&dev->struct_mutex);
> +
> +	return 0;
>   }
>   
>   void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
Chris Wilson Sept. 26, 2017, 1:37 p.m. UTC | #2
Quoting Sagar Arun Kamble (2017-09-26 14:24:38)
>  drivers/gpu/drm/i915/i915_drv.c | 34 ++++++++++++++++++++--------------
>  drivers/gpu/drm/i915/i915_drv.h |  5 +++--
>  drivers/gpu/drm/i915/i915_gem.c | 20 ++++++++++++++++++--
>  3 files changed, 41 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 7056bb2..a3bbf18 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1655,6 +1655,7 @@ static int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state)
>  static int i915_drm_resume(struct drm_device *dev)
>  {
>         struct drm_i915_private *dev_priv = to_i915(dev);
> +       struct pci_dev *pdev = dev_priv->drm.pdev;
>         int ret;
>  
>         disable_rpm_wakeref_asserts(dev_priv);
> @@ -1666,7 +1667,9 @@ static int i915_drm_resume(struct drm_device *dev)
>  
>         intel_csr_ucode_resume(dev_priv);
>  
> -       i915_gem_resume(dev_priv);
> +       ret = i915_gem_resume(dev_priv);
> +       if (ret)
> +               dev_err(&pdev->dev, "GEM resume failed\n");
>  
>         i915_restore_state(dev_priv);
>         intel_pps_unlock_regs_wa(dev_priv);
> @@ -2615,14 +2617,18 @@ static int intel_runtime_resume(struct device *kdev)
>  
>         intel_enable_ipc(dev_priv);
>  
> +       ret = i915_gem_runtime_resume(dev_priv);
> +       if (!err)
> +               err = ret;
> +
>         enable_rpm_wakeref_asserts(dev_priv);
>  
> -       if (ret)
> -               DRM_ERROR("Runtime resume failed, disabling it (%d)\n", ret);
> +       if (err)
> +               DRM_ERROR("Runtime resume failed, disabling it (%d)\n", err);
>         else
>                 DRM_DEBUG_KMS("Device resumed\n");
>  
> -       return ret;
> +       return err;

What we've tried to do is to limit the damage that can happen if we
fail to re-enable GEM. We have tried to let the device resume, but
mark the device as wedged to prevent future execution, and so let the
device live long enough to be able to show error messages and whatnot
(system critical clients should also survive and fallover to alternative
paths).
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 7056bb2..a3bbf18 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1655,6 +1655,7 @@  static int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state)
 static int i915_drm_resume(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct pci_dev *pdev = dev_priv->drm.pdev;
 	int ret;
 
 	disable_rpm_wakeref_asserts(dev_priv);
@@ -1666,7 +1667,9 @@  static int i915_drm_resume(struct drm_device *dev)
 
 	intel_csr_ucode_resume(dev_priv);
 
-	i915_gem_resume(dev_priv);
+	ret = i915_gem_resume(dev_priv);
+	if (ret)
+		dev_err(&pdev->dev, "GEM resume failed\n");
 
 	i915_restore_state(dev_priv);
 	intel_pps_unlock_regs_wa(dev_priv);
@@ -2495,7 +2498,11 @@  static int intel_runtime_suspend(struct device *kdev)
 	 * We are safe here against re-faults, since the fault handler takes
 	 * an RPM reference.
 	 */
-	i915_gem_runtime_suspend(dev_priv);
+	ret = i915_gem_runtime_suspend(dev_priv);
+	if (ret) {
+		enable_rpm_wakeref_asserts(dev_priv);
+		return ret;
+	}
 
 	intel_guc_suspend(dev_priv);
 
@@ -2515,6 +2522,8 @@  static int intel_runtime_suspend(struct device *kdev)
 		DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret);
 		intel_runtime_pm_enable_interrupts(dev_priv);
 
+		intel_guc_resume(dev_priv);
+		i915_gem_runtime_resume(dev_priv);
 		enable_rpm_wakeref_asserts(dev_priv);
 
 		return ret;
@@ -2567,7 +2576,7 @@  static int intel_runtime_resume(struct device *kdev)
 	struct pci_dev *pdev = to_pci_dev(kdev);
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	int ret = 0;
+	int err = 0, ret;
 
 	if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev_priv)))
 		return -ENODEV;
@@ -2593,16 +2602,9 @@  static int intel_runtime_resume(struct device *kdev)
 	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
 		hsw_disable_pc8(dev_priv);
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-		ret = vlv_resume_prepare(dev_priv, true);
+		err = vlv_resume_prepare(dev_priv, true);
 	}
 
-	/*
-	 * No point of rolling back things in case of an error, as the best
-	 * we can do is to hope that things will still work (and disable RPM).
-	 */
-	i915_gem_init_swizzling(dev_priv);
-	i915_gem_restore_fences(dev_priv);
-
 	intel_runtime_pm_enable_interrupts(dev_priv);
 
 	/*
@@ -2615,14 +2617,18 @@  static int intel_runtime_resume(struct device *kdev)
 
 	intel_enable_ipc(dev_priv);
 
+	ret = i915_gem_runtime_resume(dev_priv);
+	if (!err)
+		err = ret;
+
 	enable_rpm_wakeref_asserts(dev_priv);
 
-	if (ret)
-		DRM_ERROR("Runtime resume failed, disabling it (%d)\n", ret);
+	if (err)
+		DRM_ERROR("Runtime resume failed, disabling it (%d)\n", err);
 	else
 		DRM_DEBUG_KMS("Device resumed\n");
 
-	return ret;
+	return err;
 }
 
 const struct dev_pm_ops i915_pm_ops = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 61a4be9..69370c1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3471,7 +3471,8 @@  struct i915_vma * __must_check
 int i915_gem_object_unbind(struct drm_i915_gem_object *obj);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 
-void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
+int i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
+int i915_gem_runtime_resume(struct drm_i915_private *dev_priv);
 
 static inline int __sg_page_count(const struct scatterlist *sg)
 {
@@ -3674,7 +3675,7 @@  void i915_gem_reset_engine(struct intel_engine_cs *engine,
 int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 			   unsigned int flags);
 int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
-void i915_gem_resume(struct drm_i915_private *dev_priv);
+int i915_gem_resume(struct drm_i915_private *dev_priv);
 int i915_gem_fault(struct vm_fault *vmf);
 int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 			 unsigned int flags,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 73eeb6b..dbe181b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2022,7 +2022,7 @@  int i915_gem_fault(struct vm_fault *vmf)
 	intel_runtime_pm_put(i915);
 }
 
-void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
+int i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *obj, *on;
 	int i;
@@ -2065,6 +2065,20 @@  void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 		GEM_BUG_ON(!list_empty(&reg->vma->obj->userfault_link));
 		reg->dirty = true;
 	}
+
+	return 0;
+}
+
+int i915_gem_runtime_resume(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * No point of rolling back things in case of an error, as the best
+	 * we can do is to hope that things will still work (and disable RPM).
+	 */
+	i915_gem_init_swizzling(dev_priv);
+	i915_gem_restore_fences(dev_priv);
+
+	return 0;
 }
 
 static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
@@ -4587,7 +4601,7 @@  int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	return ret;
 }
 
-void i915_gem_resume(struct drm_i915_private *dev_priv)
+int i915_gem_resume(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
 
@@ -4603,6 +4617,8 @@  void i915_gem_resume(struct drm_i915_private *dev_priv)
 	dev_priv->gt.resume(dev_priv);
 
 	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
 }
 
 void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)