@@ -1564,6 +1564,7 @@ struct i915_gpu_error {
unsigned long flags;
#define I915_RESET_BACKOFF 0
#define I915_RESET_HANDOFF 1
+#define I915_RESET_MODESET 2
#define I915_WEDGED (BITS_PER_LONG - 1)
#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES)
@@ -3471,10 +3471,9 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv)
!gpu_reset_clobbers_display(dev_priv))
return;
- /* We have a modeset vs reset deadlock, defensively unbreak it.
- *
- * FIXME: We can do a _lot_ better, this is just a first iteration.*/
- i915_gem_set_wedged(dev_priv);
+ /* We have a modeset vs reset deadlock, defensively unbreak it. */
+ set_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags);
+ wake_up_all(&dev_priv->gpu_error.wait_queue);
/*
* Need mode_config.mutex so that we don't
@@ -3572,6 +3571,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv)
drm_modeset_drop_locks(ctx);
drm_modeset_acquire_fini(ctx);
mutex_unlock(&dev->mode_config.mutex);
+
+ clear_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags);
}
static bool abort_flip_on_reset(struct intel_crtc *crtc)
@@ -12377,6 +12378,30 @@ static void intel_atomic_helper_free_state_worker(struct work_struct *work)
intel_atomic_helper_free_state(dev_priv);
}
+static void intel_atomic_commit_fence_wait(struct intel_atomic_state *intel_state)
+{
+ wait_queue_t wait_fence, wait_reset;
+ struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev);
+
+ init_wait_entry(&wait_fence, 0);
+ init_wait_entry(&wait_reset, 0);
+ for (;;) {
+ prepare_to_wait(&intel_state->commit_ready.wait,
+ &wait_fence, TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(&dev_priv->gpu_error.wait_queue,
+ &wait_reset, TASK_UNINTERRUPTIBLE);
+
+
+ if (i915_sw_fence_done(&intel_state->commit_ready)
+ || (dev_priv->gpu_error.flags & I915_RESET_MODESET))
+ break;
+
+ schedule();
+ }
+ finish_wait(&intel_state->commit_ready.wait, &wait_fence);
+ finish_wait(&dev_priv->gpu_error.wait_queue, &wait_reset);
+}
+
static void intel_atomic_commit_tail(struct drm_atomic_state *state)
{
struct drm_device *dev = state->dev;
@@ -12390,7 +12415,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
unsigned crtc_vblank_mask = 0;
int i;
- i915_sw_fence_wait(&intel_state->commit_ready);
+ intel_atomic_commit_fence_wait(intel_state);
drm_atomic_helper_wait_for_dependencies(state);
There's no reason to entirely wedge the gpu, for the minimal deadlock bugfix we only need to unbreak/decouple the atomic commit from the gpu reset. The simplest wait to fix that is by replacing the unconditional fence wait a the top of commit_tail by a wait which completes either when the fences are done (normal case, or when a reset doesn't need to touch the display state). Or when the gpu reset needs to force-unblock all pending modeset states. Note that in both cases TDR itself keeps working, so from a userspace pov this trickery isn't observable. Users themselvs might spot a short glitch while the rendering is catching up again, but that's still better than pre-TDR where we've thrown away all the rendering, including innocent batches. Also, this fixes the regression TDR introduced of making gpu resets deadlock-prone when we do need to touch the display. One thing I noticed is that gpu_error.flags seems to use both our own wait-queue in gpu_error.wait_queue, and the generic wait_on_bit facilities. Not entirely sure why this inconsistency exists, I just picked one style. A possible future avenue could be to insert the gpu reset in-between ongoing modeset changes, which would avoid the momentary glitch. But that's a lot more work to implement in the atomic commit machinery, and given that we only need this for pre-g4x hw, of questionable utility just for the sake of polishing gpu reset even more on those old boxes. It might be useful for other features though. Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 35 ++++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 5 deletions(-)