diff mbox series

[1/2] drm/i915/guc: Don't check CT descriptor status before CT write / read

Message ID 20220120183655.3228-2-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series A few CT updates | expand

Commit Message

Matthew Brost Jan. 20, 2022, 6:36 p.m. UTC
Don't check CT descriptor status, unless CONFIG_DRM_I915_DEBUG_GUC is
set, before CT write / read as this could result in a read across the
PCIe bus thus adding latency to every CT write / read. On well behavied
systems this vaue should always read as zero. For some reason it doesn't
the CT channel is broken and will eventually recover from a GT reset,
albeit the GT reset will not be triggered immediately by seeing that
descriptor status is non-zero.

v2:
 (CI)
  - Fix build error (hide corrupted label in write function behind
    CONFIG_DRM_I915_DEBUG_GUC)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 ++++++
 1 file changed, 6 insertions(+)

Comments

Jani Nikula Jan. 21, 2022, 7:28 a.m. UTC | #1
On Thu, 20 Jan 2022, Matthew Brost <matthew.brost@intel.com> wrote:
> Don't check CT descriptor status, unless CONFIG_DRM_I915_DEBUG_GUC is
> set, before CT write / read as this could result in a read across the
> PCIe bus thus adding latency to every CT write / read. On well behavied
> systems this vaue should always read as zero. For some reason it doesn't
> the CT channel is broken and will eventually recover from a GT reset,
> albeit the GT reset will not be triggered immediately by seeing that
> descriptor status is non-zero.
>
> v2:
>  (CI)
>   - Fix build error (hide corrupted label in write function behind
>     CONFIG_DRM_I915_DEBUG_GUC)
>
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 ++++++
>  1 file changed, 6 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> index de89d40abd38d..948cf31429412 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> @@ -379,8 +379,10 @@ static int ct_write(struct intel_guc_ct *ct,
>  	u32 *cmds = ctb->cmds;
>  	unsigned int i;
>  
> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>  	if (unlikely(desc->status))
>  		goto corrupted;
> +#endif

Please don't add #ifdefs inline. You can use
IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) in if statements, but otherwise
the code needs to be split out to a separate function.

BR,
Jani.

>  
>  	GEM_BUG_ON(tail > size);
>  
> @@ -445,11 +447,13 @@ static int ct_write(struct intel_guc_ct *ct,
>  
>  	return 0;
>  
> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>  corrupted:
>  	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>  		 desc->head, desc->tail, desc->status);
>  	ctb->broken = true;
>  	return -EPIPE;
> +#endif
>  }
>  
>  /**
> @@ -815,8 +819,10 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>  	if (unlikely(ctb->broken))
>  		return -EPIPE;
>  
> +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>  	if (unlikely(desc->status))
>  		goto corrupted;
> +#endif
>  
>  	GEM_BUG_ON(head > size);
Matthew Brost Jan. 21, 2022, 9:03 p.m. UTC | #2
On Fri, Jan 21, 2022 at 09:28:46AM +0200, Jani Nikula wrote:
> On Thu, 20 Jan 2022, Matthew Brost <matthew.brost@intel.com> wrote:
> > Don't check CT descriptor status, unless CONFIG_DRM_I915_DEBUG_GUC is
> > set, before CT write / read as this could result in a read across the
> > PCIe bus thus adding latency to every CT write / read. On well behavied
> > systems this vaue should always read as zero. For some reason it doesn't
> > the CT channel is broken and will eventually recover from a GT reset,
> > albeit the GT reset will not be triggered immediately by seeing that
> > descriptor status is non-zero.
> >
> > v2:
> >  (CI)
> >   - Fix build error (hide corrupted label in write function behind
> >     CONFIG_DRM_I915_DEBUG_GUC)
> >
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 ++++++
> >  1 file changed, 6 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > index de89d40abd38d..948cf31429412 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
> > @@ -379,8 +379,10 @@ static int ct_write(struct intel_guc_ct *ct,
> >  	u32 *cmds = ctb->cmds;
> >  	unsigned int i;
> >  
> > +#ifdef CONFIG_DRM_I915_DEBUG_GUC
> >  	if (unlikely(desc->status))
> >  		goto corrupted;
> > +#endif
> 
> Please don't add #ifdefs inline. You can use
> IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) in if statements, but otherwise
> the code needs to be split out to a separate function.
> 

Sure, but I feel like I've actually been by someone else to not use the
IS_ENABLED macro and use ifdefs inlines...

Matt

> BR,
> Jani.
> 
> >  
> >  	GEM_BUG_ON(tail > size);
> >  
> > @@ -445,11 +447,13 @@ static int ct_write(struct intel_guc_ct *ct,
> >  
> >  	return 0;
> >  
> > +#ifdef CONFIG_DRM_I915_DEBUG_GUC
> >  corrupted:
> >  	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
> >  		 desc->head, desc->tail, desc->status);
> >  	ctb->broken = true;
> >  	return -EPIPE;
> > +#endif
> >  }
> >  
> >  /**
> > @@ -815,8 +819,10 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
> >  	if (unlikely(ctb->broken))
> >  		return -EPIPE;
> >  
> > +#ifdef CONFIG_DRM_I915_DEBUG_GUC
> >  	if (unlikely(desc->status))
> >  		goto corrupted;
> > +#endif
> >  
> >  	GEM_BUG_ON(head > size);
> 
> -- 
> Jani Nikula, Intel Open Source Graphics Center
Jani Nikula Jan. 24, 2022, 1:07 p.m. UTC | #3
On Fri, 21 Jan 2022, Matthew Brost <matthew.brost@intel.com> wrote:
> On Fri, Jan 21, 2022 at 09:28:46AM +0200, Jani Nikula wrote:
>> On Thu, 20 Jan 2022, Matthew Brost <matthew.brost@intel.com> wrote:
>> > Don't check CT descriptor status, unless CONFIG_DRM_I915_DEBUG_GUC is
>> > set, before CT write / read as this could result in a read across the
>> > PCIe bus thus adding latency to every CT write / read. On well behavied
>> > systems this vaue should always read as zero. For some reason it doesn't
>> > the CT channel is broken and will eventually recover from a GT reset,
>> > albeit the GT reset will not be triggered immediately by seeing that
>> > descriptor status is non-zero.
>> >
>> > v2:
>> >  (CI)
>> >   - Fix build error (hide corrupted label in write function behind
>> >     CONFIG_DRM_I915_DEBUG_GUC)
>> >
>> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 ++++++
>> >  1 file changed, 6 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> > index de89d40abd38d..948cf31429412 100644
>> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
>> > @@ -379,8 +379,10 @@ static int ct_write(struct intel_guc_ct *ct,
>> >  	u32 *cmds = ctb->cmds;
>> >  	unsigned int i;
>> >  
>> > +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>> >  	if (unlikely(desc->status))
>> >  		goto corrupted;
>> > +#endif
>> 
>> Please don't add #ifdefs inline. You can use
>> IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC) in if statements, but otherwise
>> the code needs to be split out to a separate function.
>> 
>
> Sure, but I feel like I've actually been by someone else to not use the
> IS_ENABLED macro and use ifdefs inlines...

Citation needed.

Basically never use #if/#ifdef inline. Only use them at the top level
like this:

#if IS_ENABLED(CONFIG_FOO)
static int bar(void)
{
	/* implementation with foo */
}
#else
static int bar(void)
{
	/* implementation without foo */
}
#endif

Sometimes you can avoid the above boilerplate with IS_ENABLED() inline:

	if (IS_ENABLED(CONFIG_FOO))
		...

Basically if you think #if/#ifdef inline is the easiest, you need to
refactor the code to do it cleanly without them.

BR,
Jani.


>
> Matt
>
>> BR,
>> Jani.
>> 
>> >  
>> >  	GEM_BUG_ON(tail > size);
>> >  
>> > @@ -445,11 +447,13 @@ static int ct_write(struct intel_guc_ct *ct,
>> >  
>> >  	return 0;
>> >  
>> > +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>> >  corrupted:
>> >  	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
>> >  		 desc->head, desc->tail, desc->status);
>> >  	ctb->broken = true;
>> >  	return -EPIPE;
>> > +#endif
>> >  }
>> >  
>> >  /**
>> > @@ -815,8 +819,10 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
>> >  	if (unlikely(ctb->broken))
>> >  		return -EPIPE;
>> >  
>> > +#ifdef CONFIG_DRM_I915_DEBUG_GUC
>> >  	if (unlikely(desc->status))
>> >  		goto corrupted;
>> > +#endif
>> >  
>> >  	GEM_BUG_ON(head > size);
>> 
>> -- 
>> Jani Nikula, Intel Open Source Graphics Center
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index de89d40abd38d..948cf31429412 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -379,8 +379,10 @@  static int ct_write(struct intel_guc_ct *ct,
 	u32 *cmds = ctb->cmds;
 	unsigned int i;
 
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
 	if (unlikely(desc->status))
 		goto corrupted;
+#endif
 
 	GEM_BUG_ON(tail > size);
 
@@ -445,11 +447,13 @@  static int ct_write(struct intel_guc_ct *ct,
 
 	return 0;
 
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
 corrupted:
 	CT_ERROR(ct, "Corrupted descriptor head=%u tail=%u status=%#x\n",
 		 desc->head, desc->tail, desc->status);
 	ctb->broken = true;
 	return -EPIPE;
+#endif
 }
 
 /**
@@ -815,8 +819,10 @@  static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
 	if (unlikely(ctb->broken))
 		return -EPIPE;
 
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
 	if (unlikely(desc->status))
 		goto corrupted;
+#endif
 
 	GEM_BUG_ON(head > size);