@@ -235,6 +235,13 @@ set_context_destroyed(struct intel_context *ce)
ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
}
+static inline void
+clr_context_destroyed(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED;
+}
+
static inline bool context_pending_disable(struct intel_context *ce)
{
return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
@@ -612,6 +619,8 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
u32 g2h_len_dw,
bool loop)
{
+ int ret;
+
/*
* We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
* so we don't handle the case where we don't get a reply because we
@@ -622,7 +631,11 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
if (g2h_len_dw)
atomic_inc(&guc->outstanding_submission_g2h);
- return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ if (ret)
+ atomic_dec(&guc->outstanding_submission_g2h);
+
+ return ret;
}
int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
@@ -3173,12 +3186,13 @@ static void guc_context_close(struct intel_context *ce)
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
}
-static inline void guc_lrc_desc_unpin(struct intel_context *ce)
+static inline int guc_lrc_desc_unpin(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
struct intel_gt *gt = guc_to_gt(guc);
unsigned long flags;
bool disabled;
+ int ret;
GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
@@ -3188,19 +3202,33 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
/* Seal race with Reset */
spin_lock_irqsave(&ce->guc_state.lock, flags);
disabled = submission_disabled(guc);
- if (likely(!disabled)) {
- __intel_gt_pm_get(gt);
- set_context_destroyed(ce);
- clr_context_registered(ce);
- }
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
if (unlikely(disabled)) {
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
release_guc_id(guc, ce);
__guc_context_destroy(ce);
- return;
+ return 0;
}
- deregister_context(ce, ce->guc_id.id);
+ /* GuC is active, lets destroy this context,
+ * but at this point we can still be racing with
+ * suspend, so we undo everything if the H2G fails
+ */
+
+ /* Change context state to destroyed and get gt-pm */
+ __intel_gt_pm_get(gt);
+ set_context_destroyed(ce);
+ clr_context_registered(ce);
+
+ ret = deregister_context(ce, ce->guc_id.id);
+ if (ret) {
+ /* Undo the state change and put gt-pm if that failed */
+ set_context_registered(ce);
+ clr_context_destroyed(ce);
+ intel_gt_pm_put(gt);
+ }
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
+ return 0;
}
static void __guc_context_destroy(struct intel_context *ce)
@@ -3268,7 +3296,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc)
if (!ce)
break;
- guc_lrc_desc_unpin(ce);
+ if (guc_lrc_desc_unpin(ce)) {
+ /*
+ * This means GuC's CT link severed mid-way which could happen
+ * in suspend-resume corner cases. In this case, put the
+ * context back into the destroyed_contexts list which will
+ * get picked up on the next context deregistration event or
+ * purged in a GuC sanitization event (reset/unload/wedged/...).
+ */
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ list_add_tail(&ce->destroyed_link,
+ &guc->submission_state.destroyed_contexts);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+ /* Bail now since the list might never be emptied if h2gs fail */
+ break;
+ }
+
}
}
@@ -3279,6 +3322,17 @@ static void destroyed_worker_func(struct work_struct *w)
struct intel_gt *gt = guc_to_gt(guc);
int tmp;
+ /*
+ * In rare cases we can get here via async context-free fence-signals that
+ * come very late in suspend flow or very early in resume flows. In these
+ * cases, GuC won't be ready but just skipping it here is fine as these
+ * pending-destroy-contexts get destroyed totally at GuC reset time at the
+ * end of suspend.. OR.. this worker can be picked up later on the next
+ * context destruction trigger after resume-completes
+ */
+ if (!intel_guc_is_ready(guc))
+ return;
+
with_intel_gt_pm(gt, tmp)
deregister_destroyed_contexts(guc);
}