Message ID | 20190301140404.26690-16-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/38] drm/i915/execlists: Suppress redundant preemption | expand |
On 01/03/2019 14:03, Chris Wilson wrote: > In the next patch, we will want to update live state within a context. > As this state may be in use by the GPU and we haven't been explicitly > tracking its activity, we instead attach it to a request we send down > the context setup with its new state and on retiring that request > cleanup the old state as we then know that it is no longer live. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gem_context.c | 74 +++++++++++++ > .../gpu/drm/i915/selftests/i915_gem_context.c | 103 ++++++++++++++++++ > 2 files changed, 177 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 3b5145b30d85..91926a407548 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -707,6 +707,80 @@ last_request_on_engine(struct i915_timeline *timeline, > return NULL; > } > > +struct context_barrier_task { > + struct i915_active base; > + void (*task)(void *data); > + void *data; > +}; > + > +static void cb_retire(struct i915_active *base) > +{ > + struct context_barrier_task *cb = container_of(base, typeof(*cb), base); > + > + if (cb->task) > + cb->task(cb->data); > + > + i915_active_fini(&cb->base); > + kfree(cb); > +} > + > +I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault); > +static int context_barrier_task(struct i915_gem_context *ctx, > + unsigned long engines, I'm in two minds about usefulness of intel_engine_mask_t. > + void (*task)(void *data), > + void *data) > +{ > + struct drm_i915_private *i915 = ctx->i915; > + struct context_barrier_task *cb; > + struct intel_context *ce; > + intel_wakeref_t wakeref; > + int err = 0; > + > + lockdep_assert_held(&i915->drm.struct_mutex); > + GEM_BUG_ON(!task); > + > + cb = kmalloc(sizeof(*cb), GFP_KERNEL); > + if (!cb) > + return -ENOMEM; > + > + i915_active_init(i915, &cb->base, cb_retire); > + i915_active_acquire(&cb->base); > + > + wakeref = intel_runtime_pm_get(i915); > + list_for_each_entry(ce, &ctx->active_engines, active_link) { > + struct intel_engine_cs *engine = ce->engine; > + struct i915_request *rq; > + > + if (!(ce->engine->mask & engines)) > + continue; > + > + if (I915_SELFTEST_ONLY(context_barrier_inject_fault & > + engine->mask)) { > + err = -ENXIO; > + break; > + } > + > + rq = i915_request_alloc(engine, ctx); > + if (IS_ERR(rq)) { > + err = PTR_ERR(rq); > + break; > + } > + > + err = i915_active_ref(&cb->base, rq->fence.context, rq); > + i915_request_add(rq); > + if (err) > + break; > + } > + intel_runtime_pm_put(i915, wakeref); > + > + cb->task = err ? NULL : task; /* caller needs to unwind instead */ > + cb->data = data; > + > + i915_active_release(&cb->base); > + > + return err; > +} > + > int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, > unsigned long mask) > { > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > index 7ae5033457b6..4f7c04247354 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > @@ -1594,10 +1594,113 @@ static int igt_switch_to_kernel_context(void *arg) > return err; > } > > +static void mock_barrier_task(void *data) > +{ > + unsigned int *counter = data; > + > + ++*counter; > +} > + > +static int mock_context_barrier(void *arg) > +{ > +#undef pr_fmt > +#define pr_fmt(x) "context_barrier_task():" # x > + struct drm_i915_private *i915 = arg; > + struct i915_gem_context *ctx; > + struct i915_request *rq; > + intel_wakeref_t wakeref; > + unsigned int counter; > + int err; > + > + /* > + * The context barrier provides us with a callback after it emits > + * a request; useful for retiring old state after loading new. > + */ > + > + mutex_lock(&i915->drm.struct_mutex); > + > + ctx = mock_context(i915, "mock"); > + if (IS_ERR(ctx)) { > + err = PTR_ERR(ctx); > + goto unlock; > + } > + > + counter = 0; > + err = context_barrier_task(ctx, 0, mock_barrier_task, &counter); > + if (err) { > + pr_err("Failed at line %d, err=%d\n", __LINE__, err); > + goto out; > + } > + if (counter == 0) { > + pr_err("Did not retire immediately with 0 engines\n"); > + err = -EINVAL; > + goto out; > + } > + > + counter = 0; > + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); > + if (err) { > + pr_err("Failed at line %d, err=%d\n", __LINE__, err); > + goto out; > + } > + if (counter == 0) { > + pr_err("Did not retire immediately for all inactive engines\n"); Why would this one retire immediately? It will send requests down the pipe, no? So don't you actually need to wait for the tracker to be signalled and that counter == num_engines? > + err = -EINVAL; > + goto out; > + } > + > + rq = ERR_PTR(-ENODEV); > + with_intel_runtime_pm(i915, wakeref) > + rq = i915_request_alloc(i915->engine[RCS], ctx); > + if (IS_ERR(rq)) { > + pr_err("Request allocation failed!\n"); > + goto out; > + } > + i915_request_add(rq); Doesn't this need to go under the wakeref as well? > + GEM_BUG_ON(list_empty(&ctx->active_engines)); > + > + counter = 0; > + context_barrier_inject_fault = BIT(RCS); > + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); > + context_barrier_inject_fault = 0; > + if (err == -ENXIO) > + err = 0; > + else > + pr_err("Did not hit fault injection!\n"); > + if (counter != 0) { > + pr_err("Invoked callback on error!\n"); > + err = -EIO; > + } > + if (err) > + goto out; > + > + counter = 0; > + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); > + if (err) { > + pr_err("Failed at line %d, err=%d\n", __LINE__, err); > + goto out; > + } > + mock_device_flush(i915); > + if (counter == 0) { > + pr_err("Did not retire on each active engines\n"); > + err = -EINVAL; > + goto out; > + } This one is inline with my understanding, and the context_barrier_task arguments are the same as the one above.. hm.. I am confused. > + > +out: > + mock_context_close(ctx); > +unlock: > + mutex_unlock(&i915->drm.struct_mutex); > + return err; > +#undef pr_fmt > +#define pr_fmt(x) x > +} > + > int i915_gem_context_mock_selftests(void) > { > static const struct i915_subtest tests[] = { > SUBTEST(igt_switch_to_kernel_context), > + SUBTEST(mock_context_barrier), > }; > struct drm_i915_private *i915; > int err; > Regards, Tvrtko
Quoting Tvrtko Ursulin (2019-03-01 16:12:33) > > On 01/03/2019 14:03, Chris Wilson wrote: > > + counter = 0; > > + err = context_barrier_task(ctx, 0, mock_barrier_task, &counter); > > + if (err) { > > + pr_err("Failed at line %d, err=%d\n", __LINE__, err); > > + goto out; > > + } > > + if (counter == 0) { > > + pr_err("Did not retire immediately with 0 engines\n"); > > + err = -EINVAL; > > + goto out; > > + } > > + > > + counter = 0; > > + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); > > + if (err) { > > + pr_err("Failed at line %d, err=%d\n", __LINE__, err); > > + goto out; > > + } > > + if (counter == 0) { > > + pr_err("Did not retire immediately for all inactive engines\n"); > > Why would this one retire immediately? It will send requests down the > pipe, no? So don't you actually need to wait for the tracker to be > signalled and that counter == num_engines? Nothing has used the context at this point, so we don't emit a request on any engine, and the barrier can be immediately executed. > > + err = -EINVAL; > > + goto out; > > + } > > + > > + rq = ERR_PTR(-ENODEV); > > + with_intel_runtime_pm(i915, wakeref) > > + rq = i915_request_alloc(i915->engine[RCS], ctx); > > + if (IS_ERR(rq)) { > > + pr_err("Request allocation failed!\n"); > > + goto out; > > + } > > + i915_request_add(rq); > > Doesn't this need to go under the wakeref as well? No, we only need the wakeref to start (that's only to avoid blocking inside request_alloc --- yeah, that's not exactly how it works, we'll be back later to fix that!). The request then carries the GT wakeref with it. > > + GEM_BUG_ON(list_empty(&ctx->active_engines)); > > + > > + counter = 0; > > + context_barrier_inject_fault = BIT(RCS); > > + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); > > + context_barrier_inject_fault = 0; > > + if (err == -ENXIO) > > + err = 0; > > + else > > + pr_err("Did not hit fault injection!\n"); > > + if (counter != 0) { > > + pr_err("Invoked callback on error!\n"); > > + err = -EIO; > > + } > > + if (err) > > + goto out; > > + > > + counter = 0; > > + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); > > + if (err) { > > + pr_err("Failed at line %d, err=%d\n", __LINE__, err); > > + goto out; > > + } > > + mock_device_flush(i915); > > + if (counter == 0) { > > + pr_err("Did not retire on each active engines\n"); > > + err = -EINVAL; > > + goto out; > > + } > > This one is inline with my understanding, and the context_barrier_task > arguments are the same as the one above.. hm.. I am confused. This time it is active, so we actually have to wait as the barrier waits for the GPU before firing. -Chris
Quoting Tvrtko Ursulin (2019-03-01 16:12:33) > > On 01/03/2019 14:03, Chris Wilson wrote: > > +I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault); > > +static int context_barrier_task(struct i915_gem_context *ctx, > > + unsigned long engines, > > I'm in two minds about usefulness of intel_engine_mask_t. I'm thinking if we store it in a struct, then yes, always use intel_engine_mask_t. For function parameters, the natural type is unsigned long and we have some time before worrying about the lack of bits. So it's a problem for future self and I'm ambivalent about the need to enforce it just yet. -Chris
On 01/03/2019 19:03, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2019-03-01 16:12:33) >> >> On 01/03/2019 14:03, Chris Wilson wrote: >>> + counter = 0; >>> + err = context_barrier_task(ctx, 0, mock_barrier_task, &counter); >>> + if (err) { >>> + pr_err("Failed at line %d, err=%d\n", __LINE__, err); >>> + goto out; >>> + } >>> + if (counter == 0) { >>> + pr_err("Did not retire immediately with 0 engines\n"); >>> + err = -EINVAL; >>> + goto out; >>> + } >>> + >>> + counter = 0; >>> + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); >>> + if (err) { >>> + pr_err("Failed at line %d, err=%d\n", __LINE__, err); >>> + goto out; >>> + } >>> + if (counter == 0) { >>> + pr_err("Did not retire immediately for all inactive engines\n"); >> >> Why would this one retire immediately? It will send requests down the >> pipe, no? So don't you actually need to wait for the tracker to be >> signalled and that counter == num_engines? > > Nothing has used the context at this point, so we don't emit a request > on any engine, and the barrier can be immediately executed. Yep, that it uses ctx->active_engines slipped my mind. >>> + err = -EINVAL; >>> + goto out; >>> + } >>> + >>> + rq = ERR_PTR(-ENODEV); >>> + with_intel_runtime_pm(i915, wakeref) >>> + rq = i915_request_alloc(i915->engine[RCS], ctx); >>> + if (IS_ERR(rq)) { >>> + pr_err("Request allocation failed!\n"); >>> + goto out; >>> + } >>> + i915_request_add(rq); >> >> Doesn't this need to go under the wakeref as well? > > No, we only need the wakeref to start (that's only to avoid blocking > inside request_alloc --- yeah, that's not exactly how it works, we'll be > back later to fix that!). The request then carries the GT wakeref with it. > >>> + GEM_BUG_ON(list_empty(&ctx->active_engines)); >>> + >>> + counter = 0; >>> + context_barrier_inject_fault = BIT(RCS); >>> + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); >>> + context_barrier_inject_fault = 0; >>> + if (err == -ENXIO) >>> + err = 0; >>> + else >>> + pr_err("Did not hit fault injection!\n"); >>> + if (counter != 0) { >>> + pr_err("Invoked callback on error!\n"); >>> + err = -EIO; >>> + } >>> + if (err) >>> + goto out; >>> + >>> + counter = 0; >>> + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); >>> + if (err) { >>> + pr_err("Failed at line %d, err=%d\n", __LINE__, err); >>> + goto out; >>> + } >>> + mock_device_flush(i915); >>> + if (counter == 0) { >>> + pr_err("Did not retire on each active engines\n"); >>> + err = -EINVAL; >>> + goto out; >>> + } >> >> This one is inline with my understanding, and the context_barrier_task >> arguments are the same as the one above.. hm.. I am confused. > > This time it is active, so we actually have to wait as the barrier waits > for the GPU before firing. Yep. Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 3b5145b30d85..91926a407548 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -707,6 +707,80 @@ last_request_on_engine(struct i915_timeline *timeline, return NULL; } +struct context_barrier_task { + struct i915_active base; + void (*task)(void *data); + void *data; +}; + +static void cb_retire(struct i915_active *base) +{ + struct context_barrier_task *cb = container_of(base, typeof(*cb), base); + + if (cb->task) + cb->task(cb->data); + + i915_active_fini(&cb->base); + kfree(cb); +} + +I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault); +static int context_barrier_task(struct i915_gem_context *ctx, + unsigned long engines, + void (*task)(void *data), + void *data) +{ + struct drm_i915_private *i915 = ctx->i915; + struct context_barrier_task *cb; + struct intel_context *ce; + intel_wakeref_t wakeref; + int err = 0; + + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(!task); + + cb = kmalloc(sizeof(*cb), GFP_KERNEL); + if (!cb) + return -ENOMEM; + + i915_active_init(i915, &cb->base, cb_retire); + i915_active_acquire(&cb->base); + + wakeref = intel_runtime_pm_get(i915); + list_for_each_entry(ce, &ctx->active_engines, active_link) { + struct intel_engine_cs *engine = ce->engine; + struct i915_request *rq; + + if (!(ce->engine->mask & engines)) + continue; + + if (I915_SELFTEST_ONLY(context_barrier_inject_fault & + engine->mask)) { + err = -ENXIO; + break; + } + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + + err = i915_active_ref(&cb->base, rq->fence.context, rq); + i915_request_add(rq); + if (err) + break; + } + intel_runtime_pm_put(i915, wakeref); + + cb->task = err ? NULL : task; /* caller needs to unwind instead */ + cb->data = data; + + i915_active_release(&cb->base); + + return err; +} + int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915, unsigned long mask) { diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 7ae5033457b6..4f7c04247354 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -1594,10 +1594,113 @@ static int igt_switch_to_kernel_context(void *arg) return err; } +static void mock_barrier_task(void *data) +{ + unsigned int *counter = data; + + ++*counter; +} + +static int mock_context_barrier(void *arg) +{ +#undef pr_fmt +#define pr_fmt(x) "context_barrier_task():" # x + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx; + struct i915_request *rq; + intel_wakeref_t wakeref; + unsigned int counter; + int err; + + /* + * The context barrier provides us with a callback after it emits + * a request; useful for retiring old state after loading new. + */ + + mutex_lock(&i915->drm.struct_mutex); + + ctx = mock_context(i915, "mock"); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto unlock; + } + + counter = 0; + err = context_barrier_task(ctx, 0, mock_barrier_task, &counter); + if (err) { + pr_err("Failed at line %d, err=%d\n", __LINE__, err); + goto out; + } + if (counter == 0) { + pr_err("Did not retire immediately with 0 engines\n"); + err = -EINVAL; + goto out; + } + + counter = 0; + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); + if (err) { + pr_err("Failed at line %d, err=%d\n", __LINE__, err); + goto out; + } + if (counter == 0) { + pr_err("Did not retire immediately for all inactive engines\n"); + err = -EINVAL; + goto out; + } + + rq = ERR_PTR(-ENODEV); + with_intel_runtime_pm(i915, wakeref) + rq = i915_request_alloc(i915->engine[RCS], ctx); + if (IS_ERR(rq)) { + pr_err("Request allocation failed!\n"); + goto out; + } + i915_request_add(rq); + GEM_BUG_ON(list_empty(&ctx->active_engines)); + + counter = 0; + context_barrier_inject_fault = BIT(RCS); + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); + context_barrier_inject_fault = 0; + if (err == -ENXIO) + err = 0; + else + pr_err("Did not hit fault injection!\n"); + if (counter != 0) { + pr_err("Invoked callback on error!\n"); + err = -EIO; + } + if (err) + goto out; + + counter = 0; + err = context_barrier_task(ctx, -1, mock_barrier_task, &counter); + if (err) { + pr_err("Failed at line %d, err=%d\n", __LINE__, err); + goto out; + } + mock_device_flush(i915); + if (counter == 0) { + pr_err("Did not retire on each active engines\n"); + err = -EINVAL; + goto out; + } + +out: + mock_context_close(ctx); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +#undef pr_fmt +#define pr_fmt(x) x +} + int i915_gem_context_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_switch_to_kernel_context), + SUBTEST(mock_context_barrier), }; struct drm_i915_private *i915; int err;
In the next patch, we will want to update live state within a context. As this state may be in use by the GPU and we haven't been explicitly tracking its activity, we instead attach it to a request we send down the context setup with its new state and on retiring that request cleanup the old state as we then know that it is no longer live. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_gem_context.c | 74 +++++++++++++ .../gpu/drm/i915/selftests/i915_gem_context.c | 103 ++++++++++++++++++ 2 files changed, 177 insertions(+)