@@ -373,8 +373,13 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
intel_ggtt_restore_fences(gt->ggtt);
ret = intel_uc_runtime_resume(>->uc);
- if (ret)
+ if (ret && intel_uc_uses_guc_submission(>->uc)) {
+ /* Resume failed on GuC submission, we can no longer use the GPU, marking the GPU
+ * as wedged.
+ */
+ intel_gt_set_wedged_flag(gt);
return ret;
+ }
return 0;
}
@@ -962,6 +962,20 @@ static void nop_submit_request(struct i915_request *request)
}
}
+void intel_gt_set_wedged_flag(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ if (test_bit(I915_WEDGED, >->reset.flags))
+ return;
+
+ for_each_engine(engine, gt, id)
+ engine->submit_request = nop_submit_request;
+
+ set_bit(I915_WEDGED, >->reset.flags);
+}
+
static void __intel_gt_set_wedged(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
@@ -984,8 +998,8 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
__intel_gt_reset(gt, ALL_ENGINES);
- for_each_engine(engine, gt, id)
- engine->submit_request = nop_submit_request;
+
+ intel_gt_set_wedged_flag(gt);
/*
* Make sure no request can slip through without getting completed by
@@ -993,7 +1007,6 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
* in nop_submit_request.
*/
synchronize_rcu_expedited();
- set_bit(I915_WEDGED, >->reset.flags);
/* Mark all executing requests as skipped */
local_bh_disable();
@@ -42,6 +42,7 @@ int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
int __must_check intel_gt_reset_lock_interruptible(struct intel_gt *gt, int *srcu);
void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
+void intel_gt_set_wedged_flag(struct intel_gt *gt);
void intel_gt_set_wedged(struct intel_gt *gt);
bool intel_gt_unset_wedged(struct intel_gt *gt);
int intel_gt_terminally_wedged(struct intel_gt *gt);
@@ -700,8 +700,13 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication)
/* Make sure we enable communication if and only if it's disabled */
GEM_BUG_ON(enable_communication == intel_guc_ct_enabled(&guc->ct));
- if (enable_communication)
- guc_enable_communication(guc);
+ if (enable_communication) {
+ err = guc_enable_communication(guc);
+ if (err) {
+ DRM_DEBUG_DRIVER("Failed to enable communication, %pe", ERR_PTR(err));
+ return err;
+ }
+ }
/* If we are only resuming GuC communication but not reloading
* GuC, we need to ensure the ARAT timer interrupt is enabled
Add err code check for enable_communication on resume path. When resume failed, we can no longer use the GPU, marking the GPU as wedged. Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com> --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 7 ++++++- drivers/gpu/drm/i915/gt/intel_reset.c | 19 ++++++++++++++++--- drivers/gpu/drm/i915/gt/intel_reset.h | 1 + drivers/gpu/drm/i915/gt/uc/intel_uc.c | 9 +++++++-- 4 files changed, 30 insertions(+), 6 deletions(-)