diff mbox series

[02/27] drm/i915/guc: Fix outstanding G2H accounting

Message ID 20210826032327.18078-3-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series Clean up GuC CI failures, simplify locking, and kernel DOC | expand

Commit Message

Matthew Brost Aug. 26, 2021, 3:23 a.m. UTC
A small race that could result in incorrect accounting of the number
of outstanding G2H. Basically prior to this patch we did not increment
the number of outstanding G2H if we encoutered a GT reset while sending
a H2G. This was incorrect as the context state had already been updated
to anticipate a G2H response thus the counter should be incremented.

Also always use helper when decrementing this value.

Fixes: f4eb1f3fe946 ("drm/i915/guc: Ensure G2H response has space in buffer")
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Cc: <stable@vger.kernel.org>
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

Comments

Daniele Ceraolo Spurio Aug. 26, 2021, 11:09 p.m. UTC | #1
On 8/25/2021 8:23 PM, Matthew Brost wrote:
> A small race that could result in incorrect accounting of the number
> of outstanding G2H. Basically prior to this patch we did not increment
> the number of outstanding G2H if we encoutered a GT reset while sending
> a H2G. This was incorrect as the context state had already been updated
> to anticipate a G2H response thus the counter should be incremented.
>
> Also always use helper when decrementing this value.
>
> Fixes: f4eb1f3fe946 ("drm/i915/guc: Ensure G2H response has space in buffer")
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> Cc: <stable@vger.kernel.org>
> ---
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 23 ++++++++++---------
>   1 file changed, 12 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 69faa39da178..03a86da6011e 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -352,6 +352,12 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
>   	xa_unlock_irqrestore(&guc->context_lookup, flags);
>   }
>   
> +static void decr_outstanding_submission_g2h(struct intel_guc *guc)
> +{
> +	if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
> +		wake_up_all(&guc->ct.wq);
> +}
> +
>   static int guc_submission_send_busy_loop(struct intel_guc *guc,
>   					 const u32 *action,
>   					 u32 len,
> @@ -360,11 +366,12 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
>   {
>   	int err;
>   
> -	err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
> -
> -	if (!err && g2h_len_dw)
> +	if (g2h_len_dw)
>   		atomic_inc(&guc->outstanding_submission_g2h);
>   
> +	err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
> +	GEM_BUG_ON(g2h_len_dw && err == -EBUSY);

AFAICS having a return g2h is not tied to not returning EBUSY, the only 
way to avoid  EBUSY seems to be for loop to be true. maybe have instead:

GEM_BUG_ON(g2h_len_dw && !loop);

earlier on?

Daniele

> +
>   	return err;
>   }
>   
> @@ -616,7 +623,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
>   		init_sched_state(ce);
>   
>   		if (pending_enable || destroyed || deregister) {
> -			atomic_dec(&guc->outstanding_submission_g2h);
> +			decr_outstanding_submission_g2h(guc);
>   			if (deregister)
>   				guc_signal_context_fence(ce);
>   			if (destroyed) {
> @@ -635,7 +642,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
>   				intel_engine_signal_breadcrumbs(ce->engine);
>   			}
>   			intel_context_sched_disable_unpin(ce);
> -			atomic_dec(&guc->outstanding_submission_g2h);
> +			decr_outstanding_submission_g2h(guc);
>   			spin_lock_irqsave(&ce->guc_state.lock, flags);
>   			guc_blocked_fence_complete(ce);
>   			spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> @@ -2583,12 +2590,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
>   	return ce;
>   }
>   
> -static void decr_outstanding_submission_g2h(struct intel_guc *guc)
> -{
> -	if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
> -		wake_up_all(&guc->ct.wq);
> -}
> -
>   int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
>   					  const u32 *msg,
>   					  u32 len)
Matthew Brost Aug. 27, 2021, 1:36 a.m. UTC | #2
On Thu, Aug 26, 2021 at 04:09:59PM -0700, Daniele Ceraolo Spurio wrote:
> 
> 
> On 8/25/2021 8:23 PM, Matthew Brost wrote:
> > A small race that could result in incorrect accounting of the number
> > of outstanding G2H. Basically prior to this patch we did not increment
> > the number of outstanding G2H if we encoutered a GT reset while sending
> > a H2G. This was incorrect as the context state had already been updated
> > to anticipate a G2H response thus the counter should be incremented.
> > 
> > Also always use helper when decrementing this value.
> > 
> > Fixes: f4eb1f3fe946 ("drm/i915/guc: Ensure G2H response has space in buffer")
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > Cc: <stable@vger.kernel.org>
> > ---
> >   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 23 ++++++++++---------
> >   1 file changed, 12 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index 69faa39da178..03a86da6011e 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -352,6 +352,12 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
> >   	xa_unlock_irqrestore(&guc->context_lookup, flags);
> >   }
> > +static void decr_outstanding_submission_g2h(struct intel_guc *guc)
> > +{
> > +	if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
> > +		wake_up_all(&guc->ct.wq);
> > +}
> > +
> >   static int guc_submission_send_busy_loop(struct intel_guc *guc,
> >   					 const u32 *action,
> >   					 u32 len,
> > @@ -360,11 +366,12 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
> >   {
> >   	int err;
> > -	err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
> > -
> > -	if (!err && g2h_len_dw)
> > +	if (g2h_len_dw)
> >   		atomic_inc(&guc->outstanding_submission_g2h);
> > +	err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
> > +	GEM_BUG_ON(g2h_len_dw && err == -EBUSY);
> 
> AFAICS having a return g2h is not tied to not returning EBUSY, the only way
> to avoid  EBUSY seems to be for loop to be true. maybe have instead:
> 
> GEM_BUG_ON(g2h_len_dw && !loop);
> 
> earlier on?
> 

Yep, that is better. Can you respin this for me while I'm out?

Matt

> Daniele
> 
> > +
> >   	return err;
> >   }
> > @@ -616,7 +623,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
> >   		init_sched_state(ce);
> >   		if (pending_enable || destroyed || deregister) {
> > -			atomic_dec(&guc->outstanding_submission_g2h);
> > +			decr_outstanding_submission_g2h(guc);
> >   			if (deregister)
> >   				guc_signal_context_fence(ce);
> >   			if (destroyed) {
> > @@ -635,7 +642,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
> >   				intel_engine_signal_breadcrumbs(ce->engine);
> >   			}
> >   			intel_context_sched_disable_unpin(ce);
> > -			atomic_dec(&guc->outstanding_submission_g2h);
> > +			decr_outstanding_submission_g2h(guc);
> >   			spin_lock_irqsave(&ce->guc_state.lock, flags);
> >   			guc_blocked_fence_complete(ce);
> >   			spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> > @@ -2583,12 +2590,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
> >   	return ce;
> >   }
> > -static void decr_outstanding_submission_g2h(struct intel_guc *guc)
> > -{
> > -	if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
> > -		wake_up_all(&guc->ct.wq);
> > -}
> > -
> >   int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
> >   					  const u32 *msg,
> >   					  u32 len)
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 69faa39da178..03a86da6011e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -352,6 +352,12 @@  static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id,
 	xa_unlock_irqrestore(&guc->context_lookup, flags);
 }
 
+static void decr_outstanding_submission_g2h(struct intel_guc *guc)
+{
+	if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
+		wake_up_all(&guc->ct.wq);
+}
+
 static int guc_submission_send_busy_loop(struct intel_guc *guc,
 					 const u32 *action,
 					 u32 len,
@@ -360,11 +366,12 @@  static int guc_submission_send_busy_loop(struct intel_guc *guc,
 {
 	int err;
 
-	err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
-
-	if (!err && g2h_len_dw)
+	if (g2h_len_dw)
 		atomic_inc(&guc->outstanding_submission_g2h);
 
+	err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+	GEM_BUG_ON(g2h_len_dw && err == -EBUSY);
+
 	return err;
 }
 
@@ -616,7 +623,7 @@  static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
 		init_sched_state(ce);
 
 		if (pending_enable || destroyed || deregister) {
-			atomic_dec(&guc->outstanding_submission_g2h);
+			decr_outstanding_submission_g2h(guc);
 			if (deregister)
 				guc_signal_context_fence(ce);
 			if (destroyed) {
@@ -635,7 +642,7 @@  static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
 				intel_engine_signal_breadcrumbs(ce->engine);
 			}
 			intel_context_sched_disable_unpin(ce);
-			atomic_dec(&guc->outstanding_submission_g2h);
+			decr_outstanding_submission_g2h(guc);
 			spin_lock_irqsave(&ce->guc_state.lock, flags);
 			guc_blocked_fence_complete(ce);
 			spin_unlock_irqrestore(&ce->guc_state.lock, flags);
@@ -2583,12 +2590,6 @@  g2h_context_lookup(struct intel_guc *guc, u32 desc_idx)
 	return ce;
 }
 
-static void decr_outstanding_submission_g2h(struct intel_guc *guc)
-{
-	if (atomic_dec_and_test(&guc->outstanding_submission_g2h))
-		wake_up_all(&guc->ct.wq);
-}
-
 int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
 					  const u32 *msg,
 					  u32 len)