diff mbox series

drm/i915: Set wedged if enable guc communication failed

Message ID 20230224231724.769343-1-zhanjun.dong@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Set wedged if enable guc communication failed | expand

Commit Message

Dong, Zhanjun Feb. 24, 2023, 11:17 p.m. UTC
Add err code check for enable_communication on resume path, set wedged if failed.

Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_pm.c | 5 ++++-
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 +++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

Comments

Jani Nikula Feb. 27, 2023, 11:30 a.m. UTC | #1
On Fri, 24 Feb 2023, Zhanjun Dong <zhanjun.dong@intel.com> wrote:
> Add err code check for enable_communication on resume path, set wedged if failed.

I can see that this is *what* the code does, but the commit message
should answer the question *why*.

>
> Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_pm.c | 5 ++++-
>  drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 +++++++--
>  2 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> index cef3d6f5c34e..f3bb7cbbd293 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> @@ -401,8 +401,11 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
>  	intel_ggtt_restore_fences(gt->ggtt);
>  
>  	ret = intel_uc_runtime_resume(&gt->uc);
> -	if (ret)
> +	if (ret) {
> +		/* Set wedge if uc resume failed */

This comment is just a reiteration of the C code in English, but doesn't
provide any useful additional information.

BR,
Jani.

> +		intel_gt_set_wedged(gt);
>  		return ret;
> +	}
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 6648691bd645..d4f428acf20a 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -698,8 +698,13 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication)
>  	/* Make sure we enable communication if and only if it's disabled */
>  	GEM_BUG_ON(enable_communication == intel_guc_ct_enabled(&guc->ct));
>  
> -	if (enable_communication)
> -		guc_enable_communication(guc);
> +	if (enable_communication) {
> +		err = guc_enable_communication(guc);
> +		if (err) {
> +			guc_dbg(guc, "Failed to resume, %pe", ERR_PTR(err));
> +			return err;
> +		}
> +	}
>  
>  	/* If we are only resuming GuC communication but not reloading
>  	 * GuC, we need to ensure the ARAT timer interrupt is enabled
Dong, Zhanjun March 2, 2023, 10:08 p.m. UTC | #2
Thanks Jani.
Updated patch sent, let me know if you have any comments.

Regards,
Zhanjun

> -----Original Message-----
> From: Jani Nikula <jani.nikula@linux.intel.com>
> Sent: February 27, 2023 6:30 AM
> To: Dong, Zhanjun <zhanjun.dong@intel.com>; intel-
> gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org
> Cc: Dong, Zhanjun <zhanjun.dong@intel.com>
> Subject: Re: [PATCH] drm/i915: Set wedged if enable guc communication
> failed
> 
> On Fri, 24 Feb 2023, Zhanjun Dong <zhanjun.dong@intel.com> wrote:
> > Add err code check for enable_communication on resume path, set
> wedged if failed.
> 
> I can see that this is *what* the code does, but the commit message should
> answer the question *why*.
> 
> >
> > Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt_pm.c | 5 ++++-
> > drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 +++++++--
> >  2 files changed, 11 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> > b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> > index cef3d6f5c34e..f3bb7cbbd293 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
> > @@ -401,8 +401,11 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
> >  	intel_ggtt_restore_fences(gt->ggtt);
> >
> >  	ret = intel_uc_runtime_resume(&gt->uc);
> > -	if (ret)
> > +	if (ret) {
> > +		/* Set wedge if uc resume failed */
> 
> This comment is just a reiteration of the C code in English, but doesn't
> provide any useful additional information.
> 
> BR,
> Jani.
> 
> > +		intel_gt_set_wedged(gt);
> >  		return ret;
> > +	}
> >
> >  	return 0;
> >  }
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> > b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> > index 6648691bd645..d4f428acf20a 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> > @@ -698,8 +698,13 @@ static int __uc_resume(struct intel_uc *uc, bool
> enable_communication)
> >  	/* Make sure we enable communication if and only if it's disabled */
> >  	GEM_BUG_ON(enable_communication ==
> intel_guc_ct_enabled(&guc->ct));
> >
> > -	if (enable_communication)
> > -		guc_enable_communication(guc);
> > +	if (enable_communication) {
> > +		err = guc_enable_communication(guc);
> > +		if (err) {
> > +			guc_dbg(guc, "Failed to resume, %pe", ERR_PTR(err));
> > +			return err;
> > +		}
> > +	}
> >
> >  	/* If we are only resuming GuC communication but not reloading
> >  	 * GuC, we need to ensure the ARAT timer interrupt is enabled
> 
> --
> Jani Nikula, Intel Open Source Graphics Center
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index cef3d6f5c34e..f3bb7cbbd293 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -401,8 +401,11 @@  int intel_gt_runtime_resume(struct intel_gt *gt)
 	intel_ggtt_restore_fences(gt->ggtt);
 
 	ret = intel_uc_runtime_resume(&gt->uc);
-	if (ret)
+	if (ret) {
+		/* Set wedge if uc resume failed */
+		intel_gt_set_wedged(gt);
 		return ret;
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 6648691bd645..d4f428acf20a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -698,8 +698,13 @@  static int __uc_resume(struct intel_uc *uc, bool enable_communication)
 	/* Make sure we enable communication if and only if it's disabled */
 	GEM_BUG_ON(enable_communication == intel_guc_ct_enabled(&guc->ct));
 
-	if (enable_communication)
-		guc_enable_communication(guc);
+	if (enable_communication) {
+		err = guc_enable_communication(guc);
+		if (err) {
+			guc_dbg(guc, "Failed to resume, %pe", ERR_PTR(err));
+			return err;
+		}
+	}
 
 	/* If we are only resuming GuC communication but not reloading
 	 * GuC, we need to ensure the ARAT timer interrupt is enabled