Message ID | 20180716080332.32283-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 16/07/2018 09:03, Chris Wilson wrote: > Inject a failure into preemption completion to pretend as if the HW > didn't successfully handle preemption and we are forced to do a reset in > the middle. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/intel_guc_submission.c | 3 + > drivers/gpu/drm/i915/intel_lrc.c | 3 + > drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + > drivers/gpu/drm/i915/selftests/intel_lrc.c | 101 ++++++++++++++++++++ > 4 files changed, 109 insertions(+) > > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c > index cc444dc5f3ad..de57cf6085d1 100644 > --- a/drivers/gpu/drm/i915/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c > @@ -628,6 +628,9 @@ static void complete_preempt_context(struct intel_engine_cs *engine) > > GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); > > + if (I915_SELFTEST_ONLY(execlists->preempt_hang)) > + return; > + > execlists_cancel_port_requests(execlists); > execlists_unwind_incomplete_requests(execlists); > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 8fd8de71c2b5..703b76dcfcd2 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -559,6 +559,9 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists) > { > GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); > > + if (I915_SELFTEST_ONLY(execlists->preempt_hang)) > + return; > + > execlists_cancel_port_requests(execlists); > __unwind_incomplete_requests(container_of(execlists, > struct intel_engine_cs, > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index d1eee08e5f6b..37a7c819435a 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -333,6 +333,8 @@ struct intel_engine_execlists { > * @csb_head: context status buffer head > */ > u8 csb_head; > + > + I915_SELFTEST_DECLARE(bool preempt_hang;) > }; > > #define INTEL_ENGINE_CS_MAX_NAME 8 > diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c > index 636cb68191e3..d642e78ef145 100644 > --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c > +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c > @@ -451,12 +451,113 @@ static int live_late_preempt(void *arg) > goto err_ctx_lo; > } > > +static int live_preempt_hang(void *arg) > +{ > + struct drm_i915_private *i915 = arg; > + struct i915_gem_context *ctx_hi, *ctx_lo; > + struct spinner spin_hi, spin_lo; > + struct intel_engine_cs *engine; > + enum intel_engine_id id; > + int err = -ENOMEM; > + > + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) > + return 0; > + > + if (!intel_has_reset_engine(i915)) > + return 0; > + > + mutex_lock(&i915->drm.struct_mutex); > + > + if (spinner_init(&spin_hi, i915)) > + goto err_unlock; > + > + if (spinner_init(&spin_lo, i915)) > + goto err_spin_hi; > + > + ctx_hi = kernel_context(i915); > + if (!ctx_hi) > + goto err_spin_lo; > + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; Use more than max user to be more robust against possible scheduling policy changes in the future? > + > + ctx_lo = kernel_context(i915); > + if (!ctx_lo) > + goto err_ctx_hi; > + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; > + > + for_each_engine(engine, i915, id) { > + struct i915_request *rq; > + > + rq = spinner_create_request(&spin_lo, ctx_lo, engine, > + MI_ARB_CHECK); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + goto err_ctx_lo; > + } > + > + i915_request_add(rq); > + if (!wait_for_spinner(&spin_lo, rq)) { > + GEM_TRACE("lo spinner failed to start\n"); > + GEM_TRACE_DUMP(); > + i915_gem_set_wedged(i915); > + err = -EIO; > + goto err_ctx_lo; > + } > + > + rq = spinner_create_request(&spin_hi, ctx_hi, engine, > + MI_ARB_CHECK); This one doesn't need to be preemptable but doesn't matter. > + if (IS_ERR(rq)) { > + spinner_end(&spin_lo); > + err = PTR_ERR(rq); > + goto err_ctx_lo; > + } > + > + engine->execlists.preempt_hang = true; > + i915_request_add(rq); Wait for timeout waiting for preempting spinner to run? Otherwise we are less sure that preempt_hang injection worked. > + > + set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); > + i915_reset_engine(engine, NULL); > + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); > + > + engine->execlists.preempt_hang = false; > + > + if (!wait_for_spinner(&spin_hi, rq)) { > + GEM_TRACE("hi spinner failed to start\n"); > + GEM_TRACE_DUMP(); > + i915_gem_set_wedged(i915); > + err = -EIO; > + goto err_ctx_lo; > + } > + > + spinner_end(&spin_hi); > + spinner_end(&spin_lo); > + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { > + err = -EIO; > + goto err_ctx_lo; > + } > + } > + > + err = 0; > +err_ctx_lo: > + kernel_context_close(ctx_lo); > +err_ctx_hi: > + kernel_context_close(ctx_hi); > +err_spin_lo: > + spinner_fini(&spin_lo); > +err_spin_hi: > + spinner_fini(&spin_hi); > +err_unlock: > + igt_flush_test(i915, I915_WAIT_LOCKED); > + mutex_unlock(&i915->drm.struct_mutex); > + return err; > +} > + > int intel_execlists_live_selftests(struct drm_i915_private *i915) > { > static const struct i915_subtest tests[] = { > SUBTEST(live_sanitycheck), > SUBTEST(live_preempt), > SUBTEST(live_late_preempt), > + SUBTEST(live_preempt_hang), > }; > > if (!HAS_EXECLISTS(i915)) > Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index cc444dc5f3ad..de57cf6085d1 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -628,6 +628,9 @@ static void complete_preempt_context(struct intel_engine_cs *engine) GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); + if (I915_SELFTEST_ONLY(execlists->preempt_hang)) + return; + execlists_cancel_port_requests(execlists); execlists_unwind_incomplete_requests(execlists); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8fd8de71c2b5..703b76dcfcd2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -559,6 +559,9 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists) { GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); + if (I915_SELFTEST_ONLY(execlists->preempt_hang)) + return; + execlists_cancel_port_requests(execlists); __unwind_incomplete_requests(container_of(execlists, struct intel_engine_cs, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index d1eee08e5f6b..37a7c819435a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -333,6 +333,8 @@ struct intel_engine_execlists { * @csb_head: context status buffer head */ u8 csb_head; + + I915_SELFTEST_DECLARE(bool preempt_hang;) }; #define INTEL_ENGINE_CS_MAX_NAME 8 diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 636cb68191e3..d642e78ef145 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -451,12 +451,113 @@ static int live_late_preempt(void *arg) goto err_ctx_lo; } +static int live_preempt_hang(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct spinner spin_hi, spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + if (!intel_has_reset_engine(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + if (spinner_init(&spin_hi, i915)) + goto err_unlock; + + if (spinner_init(&spin_lo, i915)) + goto err_spin_hi; + + ctx_hi = kernel_context(i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; + + ctx_lo = kernel_context(i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!wait_for_spinner(&spin_lo, rq)) { + GEM_TRACE("lo spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + engine->execlists.preempt_hang = true; + i915_request_add(rq); + + set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + i915_reset_engine(engine, NULL); + clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); + + engine->execlists.preempt_hang = false; + + if (!wait_for_spinner(&spin_hi, rq)) { + GEM_TRACE("hi spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + spinner_end(&spin_hi); + spinner_end(&spin_lo); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + err = -EIO; + goto err_ctx_lo; + } + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + spinner_fini(&spin_lo); +err_spin_hi: + spinner_fini(&spin_hi); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int intel_execlists_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_sanitycheck), SUBTEST(live_preempt), SUBTEST(live_late_preempt), + SUBTEST(live_preempt_hang), }; if (!HAS_EXECLISTS(i915))
Inject a failure into preemption completion to pretend as if the HW didn't successfully handle preemption and we are forced to do a reset in the middle. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/intel_guc_submission.c | 3 + drivers/gpu/drm/i915/intel_lrc.c | 3 + drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + drivers/gpu/drm/i915/selftests/intel_lrc.c | 101 ++++++++++++++++++++ 4 files changed, 109 insertions(+)