diff mbox series

[16/38] drm/i915: Introduce a context barrier callback

Message ID 20190301140404.26690-16-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/38] drm/i915/execlists: Suppress redundant preemption | expand

Commit Message

Chris Wilson March 1, 2019, 2:03 p.m. UTC
In the next patch, we will want to update live state within a context.
As this state may be in use by the GPU and we haven't been explicitly
tracking its activity, we instead attach it to a request we send down
the context setup with its new state and on retiring that request
cleanup the old state as we then know that it is no longer live.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_context.c       |  74 +++++++++++++
 .../gpu/drm/i915/selftests/i915_gem_context.c | 103 ++++++++++++++++++
 2 files changed, 177 insertions(+)

Comments

Tvrtko Ursulin March 1, 2019, 4:12 p.m. UTC | #1
On 01/03/2019 14:03, Chris Wilson wrote:
> In the next patch, we will want to update live state within a context.
> As this state may be in use by the GPU and we haven't been explicitly
> tracking its activity, we instead attach it to a request we send down
> the context setup with its new state and on retiring that request
> cleanup the old state as we then know that it is no longer live.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_gem_context.c       |  74 +++++++++++++
>   .../gpu/drm/i915/selftests/i915_gem_context.c | 103 ++++++++++++++++++
>   2 files changed, 177 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 3b5145b30d85..91926a407548 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -707,6 +707,80 @@ last_request_on_engine(struct i915_timeline *timeline,
>   	return NULL;
>   }
>   
> +struct context_barrier_task {
> +	struct i915_active base;
> +	void (*task)(void *data);
> +	void *data;
> +};
> +
> +static void cb_retire(struct i915_active *base)
> +{
> +	struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
> +
> +	if (cb->task)
> +		cb->task(cb->data);
> +
> +	i915_active_fini(&cb->base);
> +	kfree(cb);
> +}
> +
> +I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault);
> +static int context_barrier_task(struct i915_gem_context *ctx,
> +				unsigned long engines,

I'm in two minds about usefulness of intel_engine_mask_t.

> +				void (*task)(void *data),
> +				void *data)
> +{
> +	struct drm_i915_private *i915 = ctx->i915;
> +	struct context_barrier_task *cb;
> +	struct intel_context *ce;
> +	intel_wakeref_t wakeref;
> +	int err = 0;
> +
> +	lockdep_assert_held(&i915->drm.struct_mutex);
> +	GEM_BUG_ON(!task);
> +
> +	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
> +	if (!cb)
> +		return -ENOMEM;
> +
> +	i915_active_init(i915, &cb->base, cb_retire);
> +	i915_active_acquire(&cb->base);
> +
> +	wakeref = intel_runtime_pm_get(i915);
> +	list_for_each_entry(ce, &ctx->active_engines, active_link) {
> +		struct intel_engine_cs *engine = ce->engine;
> +		struct i915_request *rq;
> +
> +		if (!(ce->engine->mask & engines))
> +			continue;
> +
> +		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
> +				       engine->mask)) {
> +			err = -ENXIO;
> +			break;
> +		}
> +
> +		rq = i915_request_alloc(engine, ctx);
> +		if (IS_ERR(rq)) {
> +			err = PTR_ERR(rq);
> +			break;
> +		}
> +
> +		err = i915_active_ref(&cb->base, rq->fence.context, rq);
> +		i915_request_add(rq);
> +		if (err)
> +			break;
> +	}
> +	intel_runtime_pm_put(i915, wakeref);
> +
> +	cb->task = err ? NULL : task; /* caller needs to unwind instead */
> +	cb->data = data;
> +
> +	i915_active_release(&cb->base);
> +
> +	return err;
> +}
> +
>   int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
>   				      unsigned long mask)
>   {
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 7ae5033457b6..4f7c04247354 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -1594,10 +1594,113 @@ static int igt_switch_to_kernel_context(void *arg)
>   	return err;
>   }
>   
> +static void mock_barrier_task(void *data)
> +{
> +	unsigned int *counter = data;
> +
> +	++*counter;
> +}
> +
> +static int mock_context_barrier(void *arg)
> +{
> +#undef pr_fmt
> +#define pr_fmt(x) "context_barrier_task():" # x
> +	struct drm_i915_private *i915 = arg;
> +	struct i915_gem_context *ctx;
> +	struct i915_request *rq;
> +	intel_wakeref_t wakeref;
> +	unsigned int counter;
> +	int err;
> +
> +	/*
> +	 * The context barrier provides us with a callback after it emits
> +	 * a request; useful for retiring old state after loading new.
> +	 */
> +
> +	mutex_lock(&i915->drm.struct_mutex);
> +
> +	ctx = mock_context(i915, "mock");
> +	if (IS_ERR(ctx)) {
> +		err = PTR_ERR(ctx);
> +		goto unlock;
> +	}
> +
> +	counter = 0;
> +	err = context_barrier_task(ctx, 0, mock_barrier_task, &counter);
> +	if (err) {
> +		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
> +		goto out;
> +	}
> +	if (counter == 0) {
> +		pr_err("Did not retire immediately with 0 engines\n");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	counter = 0;
> +	err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
> +	if (err) {
> +		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
> +		goto out;
> +	}
> +	if (counter == 0) {
> +		pr_err("Did not retire immediately for all inactive engines\n");

Why would this one retire immediately? It will send requests down the 
pipe, no? So don't you actually need to wait for the tracker to be 
signalled and that counter == num_engines?

> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	rq = ERR_PTR(-ENODEV);
> +	with_intel_runtime_pm(i915, wakeref)
> +		rq = i915_request_alloc(i915->engine[RCS], ctx);
> +	if (IS_ERR(rq)) {
> +		pr_err("Request allocation failed!\n");
> +		goto out;
> +	}
> +	i915_request_add(rq);

Doesn't this need to go under the wakeref as well?

> +	GEM_BUG_ON(list_empty(&ctx->active_engines));
> +
> +	counter = 0;
> +	context_barrier_inject_fault = BIT(RCS);
> +	err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
> +	context_barrier_inject_fault = 0;
> +	if (err == -ENXIO)
> +		err = 0;
> +	else
> +		pr_err("Did not hit fault injection!\n");
> +	if (counter != 0) {
> +		pr_err("Invoked callback on error!\n");
> +		err = -EIO;
> +	}
> +	if (err)
> +		goto out;
> +
> +	counter = 0;
> +	err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
> +	if (err) {
> +		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
> +		goto out;
> +	}
> +	mock_device_flush(i915);
> +	if (counter == 0) {
> +		pr_err("Did not retire on each active engines\n");
> +		err = -EINVAL;
> +		goto out;
> +	}

This one is inline with my understanding, and the context_barrier_task 
arguments are the same as the one above.. hm.. I am confused.

> +
> +out:
> +	mock_context_close(ctx);
> +unlock:
> +	mutex_unlock(&i915->drm.struct_mutex);
> +	return err;
> +#undef pr_fmt
> +#define pr_fmt(x) x
> +}
> +
>   int i915_gem_context_mock_selftests(void)
>   {
>   	static const struct i915_subtest tests[] = {
>   		SUBTEST(igt_switch_to_kernel_context),
> +		SUBTEST(mock_context_barrier),
>   	};
>   	struct drm_i915_private *i915;
>   	int err;
> 

Regards,

Tvrtko
Chris Wilson March 1, 2019, 7:03 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-03-01 16:12:33)
> 
> On 01/03/2019 14:03, Chris Wilson wrote:
> > +     counter = 0;
> > +     err = context_barrier_task(ctx, 0, mock_barrier_task, &counter);
> > +     if (err) {
> > +             pr_err("Failed at line %d, err=%d\n", __LINE__, err);
> > +             goto out;
> > +     }
> > +     if (counter == 0) {
> > +             pr_err("Did not retire immediately with 0 engines\n");
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +
> > +     counter = 0;
> > +     err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
> > +     if (err) {
> > +             pr_err("Failed at line %d, err=%d\n", __LINE__, err);
> > +             goto out;
> > +     }
> > +     if (counter == 0) {
> > +             pr_err("Did not retire immediately for all inactive engines\n");
> 
> Why would this one retire immediately? It will send requests down the 
> pipe, no? So don't you actually need to wait for the tracker to be 
> signalled and that counter == num_engines?

Nothing has used the context at this point, so we don't emit a request
on any engine, and the barrier can be immediately executed.

> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> > +
> > +     rq = ERR_PTR(-ENODEV);
> > +     with_intel_runtime_pm(i915, wakeref)
> > +             rq = i915_request_alloc(i915->engine[RCS], ctx);
> > +     if (IS_ERR(rq)) {
> > +             pr_err("Request allocation failed!\n");
> > +             goto out;
> > +     }
> > +     i915_request_add(rq);
> 
> Doesn't this need to go under the wakeref as well?

No, we only need the wakeref to start (that's only to avoid blocking
inside request_alloc --- yeah, that's not exactly how it works, we'll be
back later to fix that!). The request then carries the GT wakeref with it.

> > +     GEM_BUG_ON(list_empty(&ctx->active_engines));
> > +
> > +     counter = 0;
> > +     context_barrier_inject_fault = BIT(RCS);
> > +     err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
> > +     context_barrier_inject_fault = 0;
> > +     if (err == -ENXIO)
> > +             err = 0;
> > +     else
> > +             pr_err("Did not hit fault injection!\n");
> > +     if (counter != 0) {
> > +             pr_err("Invoked callback on error!\n");
> > +             err = -EIO;
> > +     }
> > +     if (err)
> > +             goto out;
> > +
> > +     counter = 0;
> > +     err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
> > +     if (err) {
> > +             pr_err("Failed at line %d, err=%d\n", __LINE__, err);
> > +             goto out;
> > +     }
> > +     mock_device_flush(i915);
> > +     if (counter == 0) {
> > +             pr_err("Did not retire on each active engines\n");
> > +             err = -EINVAL;
> > +             goto out;
> > +     }
> 
> This one is inline with my understanding, and the context_barrier_task 
> arguments are the same as the one above.. hm.. I am confused.

This time it is active, so we actually have to wait as the barrier waits
for the GPU before firing.
-Chris
Chris Wilson March 2, 2019, 10:01 a.m. UTC | #3
Quoting Tvrtko Ursulin (2019-03-01 16:12:33)
> 
> On 01/03/2019 14:03, Chris Wilson wrote:
> > +I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault);
> > +static int context_barrier_task(struct i915_gem_context *ctx,
> > +                             unsigned long engines,
> 
> I'm in two minds about usefulness of intel_engine_mask_t.

I'm thinking if we store it in a struct, then yes, always use
intel_engine_mask_t. For function parameters, the natural type is
unsigned long and we have some time before worrying about the lack of
bits. So it's a problem for future self and I'm ambivalent about the
need to enforce it just yet.
-Chris
Tvrtko Ursulin March 4, 2019, 8:55 a.m. UTC | #4
On 01/03/2019 19:03, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-03-01 16:12:33)
>>
>> On 01/03/2019 14:03, Chris Wilson wrote:
>>> +     counter = 0;
>>> +     err = context_barrier_task(ctx, 0, mock_barrier_task, &counter);
>>> +     if (err) {
>>> +             pr_err("Failed at line %d, err=%d\n", __LINE__, err);
>>> +             goto out;
>>> +     }
>>> +     if (counter == 0) {
>>> +             pr_err("Did not retire immediately with 0 engines\n");
>>> +             err = -EINVAL;
>>> +             goto out;
>>> +     }
>>> +
>>> +     counter = 0;
>>> +     err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
>>> +     if (err) {
>>> +             pr_err("Failed at line %d, err=%d\n", __LINE__, err);
>>> +             goto out;
>>> +     }
>>> +     if (counter == 0) {
>>> +             pr_err("Did not retire immediately for all inactive engines\n");
>>
>> Why would this one retire immediately? It will send requests down the
>> pipe, no? So don't you actually need to wait for the tracker to be
>> signalled and that counter == num_engines?
> 
> Nothing has used the context at this point, so we don't emit a request
> on any engine, and the barrier can be immediately executed.

Yep, that it uses ctx->active_engines slipped my mind.

>>> +             err = -EINVAL;
>>> +             goto out;
>>> +     }
>>> +
>>> +     rq = ERR_PTR(-ENODEV);
>>> +     with_intel_runtime_pm(i915, wakeref)
>>> +             rq = i915_request_alloc(i915->engine[RCS], ctx);
>>> +     if (IS_ERR(rq)) {
>>> +             pr_err("Request allocation failed!\n");
>>> +             goto out;
>>> +     }
>>> +     i915_request_add(rq);
>>
>> Doesn't this need to go under the wakeref as well?
> 
> No, we only need the wakeref to start (that's only to avoid blocking
> inside request_alloc --- yeah, that's not exactly how it works, we'll be
> back later to fix that!). The request then carries the GT wakeref with it.
> 
>>> +     GEM_BUG_ON(list_empty(&ctx->active_engines));
>>> +
>>> +     counter = 0;
>>> +     context_barrier_inject_fault = BIT(RCS);
>>> +     err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
>>> +     context_barrier_inject_fault = 0;
>>> +     if (err == -ENXIO)
>>> +             err = 0;
>>> +     else
>>> +             pr_err("Did not hit fault injection!\n");
>>> +     if (counter != 0) {
>>> +             pr_err("Invoked callback on error!\n");
>>> +             err = -EIO;
>>> +     }
>>> +     if (err)
>>> +             goto out;
>>> +
>>> +     counter = 0;
>>> +     err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
>>> +     if (err) {
>>> +             pr_err("Failed at line %d, err=%d\n", __LINE__, err);
>>> +             goto out;
>>> +     }
>>> +     mock_device_flush(i915);
>>> +     if (counter == 0) {
>>> +             pr_err("Did not retire on each active engines\n");
>>> +             err = -EINVAL;
>>> +             goto out;
>>> +     }
>>
>> This one is inline with my understanding, and the context_barrier_task
>> arguments are the same as the one above.. hm.. I am confused.
> 
> This time it is active, so we actually have to wait as the barrier waits
> for the GPU before firing.

Yep.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3b5145b30d85..91926a407548 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -707,6 +707,80 @@  last_request_on_engine(struct i915_timeline *timeline,
 	return NULL;
 }
 
+struct context_barrier_task {
+	struct i915_active base;
+	void (*task)(void *data);
+	void *data;
+};
+
+static void cb_retire(struct i915_active *base)
+{
+	struct context_barrier_task *cb = container_of(base, typeof(*cb), base);
+
+	if (cb->task)
+		cb->task(cb->data);
+
+	i915_active_fini(&cb->base);
+	kfree(cb);
+}
+
+I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault);
+static int context_barrier_task(struct i915_gem_context *ctx,
+				unsigned long engines,
+				void (*task)(void *data),
+				void *data)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct context_barrier_task *cb;
+	struct intel_context *ce;
+	intel_wakeref_t wakeref;
+	int err = 0;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+	GEM_BUG_ON(!task);
+
+	cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+	if (!cb)
+		return -ENOMEM;
+
+	i915_active_init(i915, &cb->base, cb_retire);
+	i915_active_acquire(&cb->base);
+
+	wakeref = intel_runtime_pm_get(i915);
+	list_for_each_entry(ce, &ctx->active_engines, active_link) {
+		struct intel_engine_cs *engine = ce->engine;
+		struct i915_request *rq;
+
+		if (!(ce->engine->mask & engines))
+			continue;
+
+		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
+				       engine->mask)) {
+			err = -ENXIO;
+			break;
+		}
+
+		rq = i915_request_alloc(engine, ctx);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			break;
+		}
+
+		err = i915_active_ref(&cb->base, rq->fence.context, rq);
+		i915_request_add(rq);
+		if (err)
+			break;
+	}
+	intel_runtime_pm_put(i915, wakeref);
+
+	cb->task = err ? NULL : task; /* caller needs to unwind instead */
+	cb->data = data;
+
+	i915_active_release(&cb->base);
+
+	return err;
+}
+
 int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
 				      unsigned long mask)
 {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index 7ae5033457b6..4f7c04247354 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -1594,10 +1594,113 @@  static int igt_switch_to_kernel_context(void *arg)
 	return err;
 }
 
+static void mock_barrier_task(void *data)
+{
+	unsigned int *counter = data;
+
+	++*counter;
+}
+
+static int mock_context_barrier(void *arg)
+{
+#undef pr_fmt
+#define pr_fmt(x) "context_barrier_task():" # x
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *ctx;
+	struct i915_request *rq;
+	intel_wakeref_t wakeref;
+	unsigned int counter;
+	int err;
+
+	/*
+	 * The context barrier provides us with a callback after it emits
+	 * a request; useful for retiring old state after loading new.
+	 */
+
+	mutex_lock(&i915->drm.struct_mutex);
+
+	ctx = mock_context(i915, "mock");
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto unlock;
+	}
+
+	counter = 0;
+	err = context_barrier_task(ctx, 0, mock_barrier_task, &counter);
+	if (err) {
+		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
+		goto out;
+	}
+	if (counter == 0) {
+		pr_err("Did not retire immediately with 0 engines\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	counter = 0;
+	err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
+	if (err) {
+		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
+		goto out;
+	}
+	if (counter == 0) {
+		pr_err("Did not retire immediately for all inactive engines\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	rq = ERR_PTR(-ENODEV);
+	with_intel_runtime_pm(i915, wakeref)
+		rq = i915_request_alloc(i915->engine[RCS], ctx);
+	if (IS_ERR(rq)) {
+		pr_err("Request allocation failed!\n");
+		goto out;
+	}
+	i915_request_add(rq);
+	GEM_BUG_ON(list_empty(&ctx->active_engines));
+
+	counter = 0;
+	context_barrier_inject_fault = BIT(RCS);
+	err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
+	context_barrier_inject_fault = 0;
+	if (err == -ENXIO)
+		err = 0;
+	else
+		pr_err("Did not hit fault injection!\n");
+	if (counter != 0) {
+		pr_err("Invoked callback on error!\n");
+		err = -EIO;
+	}
+	if (err)
+		goto out;
+
+	counter = 0;
+	err = context_barrier_task(ctx, -1, mock_barrier_task, &counter);
+	if (err) {
+		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
+		goto out;
+	}
+	mock_device_flush(i915);
+	if (counter == 0) {
+		pr_err("Did not retire on each active engines\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	mock_context_close(ctx);
+unlock:
+	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+#undef pr_fmt
+#define pr_fmt(x) x
+}
+
 int i915_gem_context_mock_selftests(void)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_switch_to_kernel_context),
+		SUBTEST(mock_context_barrier),
 	};
 	struct drm_i915_private *i915;
 	int err;