diff mbox

[v8,01/12] drm/i915: Park before resetting the submission backend

Message ID 20180409122332.24788-1-michal.wajdeczko@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michal Wajdeczko April 9, 2018, 12:23 p.m. UTC
From: Chris Wilson <chris@chris-wilson.co.uk>

As different backends may have different park/unpark callbacks, we
should only ever switch backends (reset_default_submission on wedge
recovery, or on enabling the guc) while parked.

v2: Remove the assert from the guc code, as we are currently trying to
modify the engine vfuncs pointer on a live system after reset (not just
wedging). We will just have to hope that the system is balanced.
v3: Rebase onto __i915_gem_park and improve grammar.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c        | 15 ++++++++++++---
 drivers/gpu/drm/i915/intel_engine_cs.c |  3 +++
 2 files changed, 15 insertions(+), 3 deletions(-)

Comments

Michal Wajdeczko April 9, 2018, 3:32 p.m. UTC | #1
On Mon, 09 Apr 2018 17:09:18 +0200, Patchwork  
<patchwork@emeril.freedesktop.org> wrote:

> == Series Details ==
>
> Series: series starting with [v8,01/12] drm/i915: Park before resetting  
> the submission backend
> URL   : https://patchwork.freedesktop.org/series/41365/
> State : failure
>
> == Summary ==
>
> ---- Possible new issues:

two variants:

>
> Test drm_mm:
>         Subgroup sanitycheck:
>                 pass       -> INCOMPLETE (shard-apl)

#1

<0>[  400.245461] drv_self-5775    1.... 400208508us :  
intel_guc_submission_disable: intel_guc_submission_disable:1255  
GEM_BUG_ON(dev_priv->gt.awake)

<4>[  400.245871] Call Trace:
<4>[  400.245959]  intel_uc_fini_hw+0x4b/0xe0 [i915]
<4>[  400.246047]  i915_gem_fini_hw+0x16/0x30 [i915]
<4>[  400.246129]  i915_reset+0x1e8/0x2b0 [i915]
<4>[  400.246222]  igt_global_reset+0x38/0xe0 [i915]

> Test drv_hangman:
>         Subgroup error-state-capture-blt:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup error-state-capture-bsd:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup error-state-capture-render:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup error-state-capture-vebox:
>                 pass       -> INCOMPLETE (shard-apl)
> Test drv_selftest:
>         Subgroup live_guc:
>                 pass       -> SKIP       (shard-apl)
>         Subgroup live_hangcheck:
>                 pass       -> DMESG-FAIL (shard-apl)
> Test gem_eio:
>         Subgroup execbuf:
>                 pass       -> INCOMPLETE (shard-apl)

#2:

<3>[  227.833798] intel_engine_unpin_breadcrumbs_irq:219  
GEM_BUG_ON(!b->irq_enabled)

<4>[  227.834607] Call Trace:
<4>[  227.834691]  intel_engines_park+0xef/0x180 [i915]
<4>[  227.834709]  ? synchronize_irq+0x3e/0xb0
<4>[  227.834781]  __i915_gem_park+0x3e/0x160 [i915]
<4>[  227.834850]  i915_gem_idle_work_handler+0x1cd/0x220 [i915]
<4>[  227.834868]  process_one_work+0x21a/0x640


>         Subgroup in-flight-external:
>                 pass       -> INCOMPLETE (shard-apl)
> Test gem_mocs_settings:
>         Subgroup mocs-reset-dirty-render:
>                 pass       -> INCOMPLETE (shard-apl)
> Test gem_request_retire:
>         Subgroup retire-vma-not-inactive:
>                 pass       -> INCOMPLETE (shard-apl)
> Test gem_workarounds:
>         Subgroup reset-context:
>                 pass       -> INCOMPLETE (shard-apl)
> Test kms_vblank:
>         Subgroup pipe-a-query-idle-hang:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup pipe-a-ts-continuation-idle-hang:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup pipe-a-wait-busy-hang:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup pipe-a-wait-forked-busy-hang:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup pipe-a-wait-idle-hang:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup pipe-b-query-forked-hang:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup pipe-c-query-busy-hang:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup pipe-c-query-forked-busy-hang:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup pipe-c-query-forked-hang:
>                 pass       -> INCOMPLETE (shard-apl)
>         Subgroup pipe-c-ts-continuation-idle-hang:
>                 pass       -> INCOMPLETE (shard-apl)
> Test perf:
>         Subgroup gen8-unprivileged-single-ctx-counters:
>                 pass       -> FAIL       (shard-apl)
>
> ---- Known issues:
>
> Test drv_missed_irq:
>                 pass       -> SKIP       (shard-apl) fdo#103199
> Test gem_eio:
>         Subgroup in-flight-suspend:
>                 pass       -> INCOMPLETE (shard-apl) fdo#103375
> Test kms_flip:
>         Subgroup flip-vs-expired-vblank:
>                 fail       -> PASS       (shard-hsw) fdo#102887
>         Subgroup modeset-vs-vblank-race-interruptible:
>                 pass       -> FAIL       (shard-hsw) fdo#103060
> Test kms_plane_multiple:
>         Subgroup atomic-pipe-c-tiling-x:
>                 pass       -> FAIL       (shard-apl) fdo#103166
> Test kms_rotation_crc:
>         Subgroup sprite-rotation-90:
>                 fail       -> PASS       (shard-apl) fdo#103925
>
> fdo#103199 https://bugs.freedesktop.org/show_bug.cgi?id=103199
> fdo#103375 https://bugs.freedesktop.org/show_bug.cgi?id=103375
> fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887
> fdo#103060 https://bugs.freedesktop.org/show_bug.cgi?id=103060
> fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
> fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
>
> shard-apl        total:1541 pass:1003 dwarn:1   dfail:1   fail:9    
> skip:497 time:2569s
> shard-hsw        total:2680 pass:1784 dwarn:1   dfail:0   fail:3    
> skip:891 time:11411s
> Blacklisted hosts:
> shard-kbl        total:1439 pass:1014 dwarn:1   dfail:1   fail:6    
> skip:386 time:1390s
> shard-snb        total:2680 pass:1378 dwarn:1   dfail:0   fail:3    
> skip:1298 time:6927s
>
> == Logs ==
>
> For more details see:  
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_8640/shards.html
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 28ab0be..dd3e292 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -144,8 +144,6 @@  static u32 __i915_gem_park(struct drm_i915_private *i915)
 	if (!i915->gt.awake)
 		return I915_EPOCH_INVALID;
 
-	GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID);
-
 	/*
 	 * Be paranoid and flush a concurrent interrupt to make sure
 	 * we don't reactivate any irq tasklets after parking.
@@ -173,6 +171,7 @@  static u32 __i915_gem_park(struct drm_i915_private *i915)
 
 	intel_runtime_pm_put(i915);
 
+	GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID);
 	return i915->gt.epoch;
 }
 
@@ -3435,7 +3434,17 @@  bool i915_gem_unset_wedged(struct drm_i915_private *i915)
 		}
 	}
 	i915_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
+
+	/*
+	 * Park before disengaging the old submit mechanism as different
+	 * backends may have different park/unpack callbacks.
+	 *
+	 * We are idle; the idle-worker will be queued, but we need to run
+	 * it now. As we already hold the struct mutex, we can park the GPU
+	 * right away, letting the lazy worker see that we are already active
+	 * again by the time it acquires the mutex.
+	 */
+	__i915_gem_park(i915);
 
 	/*
 	 * Undo nop_submit_request. We prevent all new i915 requests from
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 12486d8..b4ea77a 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1651,6 +1651,9 @@  void intel_engines_reset_default_submission(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	/* Must be parked first! */
+	GEM_BUG_ON(i915->gt.awake);
+
 	for_each_engine(engine, i915, id)
 		engine->set_default_submission(engine);
 }