[09/12] drm/i915/execlists: Cancel banned contexts on schedule-out

Message ID	20191006165002.30312-9-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=Zxrt=X7=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 1BFC520835 From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Sun, 6 Oct 2019 17:49:59 +0100 Message-Id: <20191006165002.30312-9-chris@chris-wilson.co.uk> In-Reply-To: <20191006165002.30312-1-chris@chris-wilson.co.uk> References: <20191006165002.30312-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 09/12] drm/i915/execlists: Cancel banned contexts on schedule-out Precedence: list Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[01/12] drm/i915/execlists: Fix annotation for decoupling virtual request \| expand [01/12] drm/i915/execlists: Fix annotation for decoupling virtual request [02/12] drm/i915/selftests: Appease lockdep [03/12] drm/i915/gt: Restore dropped 'interruptible' flag [04/12] drm/i915/gt: Treat a busy timeline as 'active' while waiting [05/12] drm/i915: Expose engine properties via sysfs [06/12] drm/i915/execlists: Force preemption [07/12] drm/i915: Mark up "sentinel" requests [08/12] drm/i915/gt: Introduce barrier pulses along engines [09/12] drm/i915/execlists: Cancel banned contexts on schedule-out [10/12] drm/i915: Cancel non-persistent contexts on close [11/12] drm/i915: Replace hangcheck by heartbeats [12/12] drm/i915: Flush idle barriers when waiting

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 177b0f6874dd..9cfc436634d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -234,6 +234,9 @@ static void execlists_init_reg_state(u32 *reg_state, const struct intel_engine_cs *engine, const struct intel_ring *ring, bool close); +static void +__execlists_update_reg_state(const struct intel_context *ce, + const struct intel_engine_cs *engine); static void __context_pin_acquire(struct intel_context *ce) { @@ -1022,6 +1025,58 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) tasklet_schedule(&ve->base.execlists.tasklet); } +static void +mark_complete(struct i915_request *rq, struct intel_engine_cs *engine) +{ + const struct intel_timeline * const tl = rcu_dereference(rq->timeline); + + *(u32 *)tl->hwsp_seqno = rq->fence.seqno; + GEM_BUG_ON(!i915_request_completed(rq)); + + list_for_each_entry_from_reverse(rq, &tl->requests, link) { + if (i915_request_signaled(rq)) + break; + + mark_eio(rq); + } + + intel_engine_queue_breadcrumbs(engine); +} + +static void cancel_active(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct intel_context * const ce = rq->hw_context; + u32 *regs = ce->lrc_reg_state; + + if (i915_request_completed(rq)) + return; + + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", + __func__, engine->name, rq->fence.context, rq->fence.seqno); + __context_pin_acquire(ce); + + /* Scrub the context image to prevent replaying the previous batch */ + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + execlists_init_reg_state(regs, ce, engine, ce->ring, false); + + /* Ring will be advanced on retire; here we need to reset the context */ + ce->ring->head = intel_ring_wrap(ce->ring, rq->wa_tail); + __execlists_update_reg_state(ce, engine); + + /* We've switched away, so this should be a no-op, but intent matters */ + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; + + /* Let everyone know that the request may now be retired */ + rcu_read_lock(); + mark_complete(rq, engine); + rcu_read_unlock(); + + __context_pin_release(ce); +} + static inline void __execlists_schedule_out(struct i915_request *rq, struct intel_engine_cs * const engine) @@ -1032,6 +1087,9 @@ __execlists_schedule_out(struct i915_request *rq, execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put(engine->gt); + if (unlikely(i915_gem_context_is_banned(ce->gem_context))) + cancel_active(rq, engine); + /* * If this is part of a virtual engine, its next request may * have been blocked waiting for access to the active context. diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 198cf2f754f4..1703130ef0ef 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_engine_heartbeat.h" #include "gt/intel_reset.h" #include "i915_selftest.h" @@ -986,6 +987,277 @@ static int live_nopreempt(void *arg) goto err_client_b; } +struct live_preempt_cancel { + struct intel_engine_cs *engine; + struct preempt_client a, b; +}; + +static int __cancel_active0(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP0 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + i915_gem_context_set_banned(arg->a.ctx); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_active1(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[2] = {}; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP1 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* no preemption */ + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); + rq[1] = spinner_create_request(&arg->b.spin, + arg->b.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + i915_gem_context_set_banned(arg->b.ctx); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + igt_spinner_end(&arg->a.spin); + if (i915_request_wait(rq[1], 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq[0]->fence.error != 0) { + pr_err("Normal inflight0 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != -EIO) { + pr_err("Cancelled inflight1 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_queued(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[3] = {}; + struct igt_live_test t; + int err; + + /* Full ELSP and one in the wings */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + clear_bit(CONTEXT_BANNED, &arg->a.ctx->flags); + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + clear_bit(CONTEXT_BANNED, &arg->b.ctx->flags); + rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + rq[2] = spinner_create_request(&arg->b.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[2])) { + err = PTR_ERR(rq[2]); + goto out; + } + + i915_request_get(rq[2]); + err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); + i915_request_add(rq[2]); + if (err) + goto out; + + i915_gem_context_set_banned(arg->a.ctx); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { + err = -EIO; + goto out; + } + + if (rq[0]->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != 0) { + pr_err("Normal inflight1 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[2]->fence.error != -EIO) { + pr_err("Cancelled queued request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[2]); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int live_preempt_cancel(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct live_preempt_cancel data; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * To cancel an inflight context, we need to first remove it from the + * GPU. That sounds like preemption! Plus a little bit of bookkeeping. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + if (preempt_client_init(i915, &data.a)) + return -ENOMEM; + if (preempt_client_init(i915, &data.b)) + goto err_client_a; + + for_each_engine(data.engine, i915, id) { + if (!intel_engine_has_preemption(data.engine)) + continue; + + err = __cancel_active0(&data); + if (err) + goto err_wedged; + + err = __cancel_active1(&data); + if (err) + goto err_wedged; + + err = __cancel_queued(&data); + if (err) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&data.b); +err_client_a: + preempt_client_fini(&data.a); + return err; + +err_wedged: + GEM_TRACE_DUMP(); + igt_spinner_end(&data.b.spin); + igt_spinner_end(&data.a.spin); + intel_gt_set_wedged(&i915->gt); + goto err_client_b; +} + static int live_suppress_self_preempt(void *arg) { struct drm_i915_private *i915 = arg; @@ -2270,6 +2542,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt), SUBTEST(live_late_preempt), SUBTEST(live_nopreempt), + SUBTEST(live_preempt_cancel), SUBTEST(live_suppress_self_preempt), SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt),

[09/12] drm/i915/execlists: Cancel banned contexts on schedule-out

Commit Message

Patch