diff mbox series

[v2,2/3] drm/i915/guc: Close deregister-context race against CT-loss

Message ID 20230815011210.1188379-3-alan.previn.teres.alexis@intel.com (mailing list archive)
State New, archived
Headers show
Series Resolve suspend-resume racing with GuC destroy-context-worker | expand

Commit Message

Teres Alexis, Alan Previn Aug. 15, 2023, 1:12 a.m. UTC
If we are at the end of suspend or very early in resume
its possible an async fence signal could lead us to the
execution of the context destruction worker (after the
prior worker flush).

Even if checking that the CT is enabled before calling
destroyed_worker_func, guc_lrc_desc_unpin may still fail
because in corner cases, as we traverse the GuC's
context-destroy list, the CT could get disabled in the mid
of it right before calling the GuC's CT send function.

We've witnessed this race condition once every ~6000-8000
suspend-resume cycles while ensuring workloads that render
something onscreen is continuously started just before
we suspend (and the workload is small enough to complete
and trigger the queued engine/context free-up either very
late in suspend or very early in resume).

In such a case, we need to unroll the unpin process because
guc-lrc-unpin takes a gt wakeref which only gets released in
the G2H IRQ reply that never comes through in this corner
case. That will cascade into a kernel hang later at the tail
end of suspend in this function:

   intel_wakeref_wait_for_idle(&gt->wakeref)
   (called by) - intel_gt_pm_wait_for_idle
   (called by) - wait_for_suspend

Doing this unroll and keeping the context in the GuC's
destroy-list will allow the context to get picked up on
the next destroy worker invocation or purged as part of a
major GuC sanitization or reset flow.

Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 40 +++++++++++++++++--
 1 file changed, 36 insertions(+), 4 deletions(-)

Comments

Rodrigo Vivi Aug. 15, 2023, 1:56 p.m. UTC | #1
On Mon, Aug 14, 2023 at 06:12:09PM -0700, Alan Previn wrote:
> If we are at the end of suspend or very early in resume
> its possible an async fence signal could lead us to the
> execution of the context destruction worker (after the
> prior worker flush).
> 
> Even if checking that the CT is enabled before calling
> destroyed_worker_func, guc_lrc_desc_unpin may still fail
> because in corner cases, as we traverse the GuC's
> context-destroy list, the CT could get disabled in the mid
> of it right before calling the GuC's CT send function.
> 
> We've witnessed this race condition once every ~6000-8000
> suspend-resume cycles while ensuring workloads that render
> something onscreen is continuously started just before
> we suspend (and the workload is small enough to complete
> and trigger the queued engine/context free-up either very
> late in suspend or very early in resume).
> 
> In such a case, we need to unroll the unpin process because
> guc-lrc-unpin takes a gt wakeref which only gets released in
> the G2H IRQ reply that never comes through in this corner
> case. That will cascade into a kernel hang later at the tail
> end of suspend in this function:
> 
>    intel_wakeref_wait_for_idle(&gt->wakeref)
>    (called by) - intel_gt_pm_wait_for_idle
>    (called by) - wait_for_suspend
> 
> Doing this unroll and keeping the context in the GuC's
> destroy-list will allow the context to get picked up on
> the next destroy worker invocation or purged as part of a
> major GuC sanitization or reset flow.
> 
> Signed-off-by: Alan Previn <alan.previn.teres.alexis@intel.com>
> ---
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 40 +++++++++++++++++--
>  1 file changed, 36 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 050572bb8dbe..ddb4ee4c4e51 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -235,6 +235,13 @@ set_context_destroyed(struct intel_context *ce)
>  	ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
>  }
>  
> +static inline void
> +clr_context_destroyed(struct intel_context *ce)
> +{
> +	lockdep_assert_held(&ce->guc_state.lock);
> +	ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED;
> +}
> +
>  static inline bool context_pending_disable(struct intel_context *ce)
>  {
>  	return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
> @@ -3175,7 +3182,7 @@ static void guc_context_close(struct intel_context *ce)
>  	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
>  }
>  
> -static inline void guc_lrc_desc_unpin(struct intel_context *ce)
> +static inline int guc_lrc_desc_unpin(struct intel_context *ce)
>  {
>  	struct intel_guc *guc = ce_to_guc(ce);
>  	struct intel_gt *gt = guc_to_gt(guc);
> @@ -3199,10 +3206,20 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
>  	if (unlikely(disabled)) {
>  		release_guc_id(guc, ce);
>  		__guc_context_destroy(ce);
> -		return;
> +		return 0;
> +	}
> +
> +	if (deregister_context(ce, ce->guc_id.id)) {
> +		/* Seal race with concurrent suspend by unrolling */
> +		spin_lock_irqsave(&ce->guc_state.lock, flags);
> +		set_context_registered(ce);
> +		clr_context_destroyed(ce);
> +		intel_gt_pm_put(gt);

how sure we are this is not calling unbalanced puts?
could we wrap this in some existent function to make this clear?

> +		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> +		return -ENODEV;
>  	}
>  
> -	deregister_context(ce, ce->guc_id.id);
> +	return 0;
>  }
>  
>  static void __guc_context_destroy(struct intel_context *ce)
> @@ -3270,7 +3287,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc)
>  		if (!ce)
>  			break;
>  
> -		guc_lrc_desc_unpin(ce);
> +		if (guc_lrc_desc_unpin(ce)) {
> +			/*
> +			 * This means GuC's CT link severed mid-way which could happen
> +			 * in suspend-resume corner cases. In this case, put the
> +			 * context back into the destroyed_contexts list which will
> +			 * get picked up on the next context deregistration event or
> +			 * purged in a GuC sanitization event (reset/unload/wedged/...).
> +			 */
> +			spin_lock_irqsave(&guc->submission_state.lock, flags);
> +			list_add_tail(&ce->destroyed_link,
> +				      &guc->submission_state.destroyed_contexts);
> +			spin_unlock_irqrestore(&guc->submission_state.lock, flags);
> +			/* Bail now since the list might never be emptied if h2gs fail */
> +			break;
> +		}
> +
>  	}
>  }
>  
> -- 
> 2.39.0
>
Teres Alexis, Alan Previn Aug. 15, 2023, 7:08 p.m. UTC | #2
On Tue, 2023-08-15 at 09:56 -0400, Vivi, Rodrigo wrote:
> On Mon, Aug 14, 2023 at 06:12:09PM -0700, Alan Previn wrote:
> > If we are at the end of suspend or very early in resume
> > its possible an async fence signal could lead us to the
> > execution of the context destruction worker (after the
> > prior worker flush).

[snip]
> 
> > @@ -3199,10 +3206,20 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
> >  	if (unlikely(disabled)) {
> >  		release_guc_id(guc, ce);
> >  		__guc_context_destroy(ce);
> > -		return;
> > +		return 0;
> > +	}
> > +
> > +	if (deregister_context(ce, ce->guc_id.id)) {
> > +		/* Seal race with concurrent suspend by unrolling */
> > +		spin_lock_irqsave(&ce->guc_state.lock, flags);
> > +		set_context_registered(ce);
> > +		clr_context_destroyed(ce);
> > +		intel_gt_pm_put(gt);
> 
> how sure we are this is not calling unbalanced puts?
alan: in this function (guc_lrc_desc_unpin), the summarized flow is:

	check-status-stuff
	if guc-is-not-disabled, take-pm, change ctx-state-bits
	[implied else] if guc-is-disabled, scub-ctx and return
	
thus derigster_context is only called if we didnt exit, i.e. when guc-is-not-disabled, i.e. after the pm was taken.
> could we wrap this in some existent function to make this clear?
alan: yeah - not so readible as it now - let me tweak this function and make it cleaner

> 
> > +		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
> > +		return -ENODEV;
> >  	}
> >  
> > -	deregister_context(ce, ce->guc_id.id);
> > +	return 0;
> >  }
Teres Alexis, Alan Previn Aug. 25, 2023, 6:54 p.m. UTC | #3
just a follow up note-to-self:

On Tue, 2023-08-15 at 12:08 -0700, Teres Alexis, Alan Previn wrote:
> On Tue, 2023-08-15 at 09:56 -0400, Vivi, Rodrigo wrote:
> > On Mon, Aug 14, 2023 at 06:12:09PM -0700, Alan Previn wrote:
> > > 
[snip]

in guc_submission_send_busy_loop, we are incrementing the following
that needs to be decremented if the function fails.

atomic_inc(&guc->outstanding_submission_g2h);

also, it seems that even with thie unroll design - we are still
leaking a wakeref elsewhere. this is despite a cleaner redesign of
flows in function "guc_lrc_desc_unpin"
(discussed earlier that wasnt very readible).

will re-rev today but will probably need more follow ups
tracking that one more leaking gt-wakeref (one in thousands-cycles)
but at least now we are not hanging mid-suspend.. we bail from suspend
with useful kernel messages.
Teres Alexis, Alan Previn Aug. 28, 2023, 9:06 p.m. UTC | #4
Additional update from the most recent testing.

When relying solely on guc_lrc_desc_unpin getting a failure from deregister_context
as a means for identifying that we are in the "deregister-context-vs-suspend-late" race,
it is too late a location to handle this safely. This is because one of the
first things destroyed_worker_func does it to take a gt pm wakeref - which triggers
the gt_unpark function that does a whole lot bunch of other flows including triggering more
workers and taking additional refs. That said, its best to not even call
deregister_destroyed_contexts from the worker when !intel_guc_is_ready (ct-is-disabled).

...alan


On Fri, 2023-08-25 at 11:54 -0700, Teres Alexis, Alan Previn wrote:
> just a follow up note-to-self:
> 
> On Tue, 2023-08-15 at 12:08 -0700, Teres Alexis, Alan Previn wrote:
> > On Tue, 2023-08-15 at 09:56 -0400, Vivi, Rodrigo wrote:
> > > On Mon, Aug 14, 2023 at 06:12:09PM -0700, Alan Previn wrote:
> > > > 
> [snip]
> 
> in guc_submission_send_busy_loop, we are incrementing the following
> that needs to be decremented if the function fails.
> 
> atomic_inc(&guc->outstanding_submission_g2h);
> 
> also, it seems that even with thie unroll design - we are still
> leaking a wakeref elsewhere. this is despite a cleaner redesign of
> flows in function "guc_lrc_desc_unpin"
> (discussed earlier that wasnt very readible).
> 
> will re-rev today but will probably need more follow ups
> tracking that one more leaking gt-wakeref (one in thousands-cycles)
> but at least now we are not hanging mid-suspend.. we bail from suspend
> with useful kernel messages.
> 
> 
> 
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 050572bb8dbe..ddb4ee4c4e51 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -235,6 +235,13 @@  set_context_destroyed(struct intel_context *ce)
 	ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
 }
 
+static inline void
+clr_context_destroyed(struct intel_context *ce)
+{
+	lockdep_assert_held(&ce->guc_state.lock);
+	ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED;
+}
+
 static inline bool context_pending_disable(struct intel_context *ce)
 {
 	return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
@@ -3175,7 +3182,7 @@  static void guc_context_close(struct intel_context *ce)
 	spin_unlock_irqrestore(&ce->guc_state.lock, flags);
 }
 
-static inline void guc_lrc_desc_unpin(struct intel_context *ce)
+static inline int guc_lrc_desc_unpin(struct intel_context *ce)
 {
 	struct intel_guc *guc = ce_to_guc(ce);
 	struct intel_gt *gt = guc_to_gt(guc);
@@ -3199,10 +3206,20 @@  static inline void guc_lrc_desc_unpin(struct intel_context *ce)
 	if (unlikely(disabled)) {
 		release_guc_id(guc, ce);
 		__guc_context_destroy(ce);
-		return;
+		return 0;
+	}
+
+	if (deregister_context(ce, ce->guc_id.id)) {
+		/* Seal race with concurrent suspend by unrolling */
+		spin_lock_irqsave(&ce->guc_state.lock, flags);
+		set_context_registered(ce);
+		clr_context_destroyed(ce);
+		intel_gt_pm_put(gt);
+		spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+		return -ENODEV;
 	}
 
-	deregister_context(ce, ce->guc_id.id);
+	return 0;
 }
 
 static void __guc_context_destroy(struct intel_context *ce)
@@ -3270,7 +3287,22 @@  static void deregister_destroyed_contexts(struct intel_guc *guc)
 		if (!ce)
 			break;
 
-		guc_lrc_desc_unpin(ce);
+		if (guc_lrc_desc_unpin(ce)) {
+			/*
+			 * This means GuC's CT link severed mid-way which could happen
+			 * in suspend-resume corner cases. In this case, put the
+			 * context back into the destroyed_contexts list which will
+			 * get picked up on the next context deregistration event or
+			 * purged in a GuC sanitization event (reset/unload/wedged/...).
+			 */
+			spin_lock_irqsave(&guc->submission_state.lock, flags);
+			list_add_tail(&ce->destroyed_link,
+				      &guc->submission_state.destroyed_contexts);
+			spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+			/* Bail now since the list might never be emptied if h2gs fail */
+			break;
+		}
+
 	}
 }