[i-g-t] i915/gem_exec_balance: Check chain of submit dependencies
diff mbox series

Message ID 20191127145026.3438876-1-chris@chris-wilson.co.uk
State New
Headers show
Series
  • [i-g-t] i915/gem_exec_balance: Check chain of submit dependencies
Related show

Commit Message

Chris Wilson Nov. 27, 2019, 2:50 p.m. UTC
Check that the submit fence couples into the dependency chain so that
the bonded pair cannot over take any of the earlier requests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/i915/gem_exec_balancer.c | 88 ++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

Comments

Tvrtko Ursulin Nov. 27, 2019, 3:08 p.m. UTC | #1
On 27/11/2019 14:50, Chris Wilson wrote:
> Check that the submit fence couples into the dependency chain so that
> the bonded pair cannot over take any of the earlier requests.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/i915/gem_exec_balancer.c | 88 ++++++++++++++++++++++++++++++++++
>   1 file changed, 88 insertions(+)
> 
> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> index 86028cfdd..ff5bd0134 100644
> --- a/tests/i915/gem_exec_balancer.c
> +++ b/tests/i915/gem_exec_balancer.c
> @@ -855,6 +855,91 @@ static void bonded_slice(int i915)
>   	munmap(stop, 4096);
>   }
>   
> +static void bonded_chain(int i915)
> +{
> +	struct drm_i915_gem_exec_object2 batch = {
> +		.handle = batch_create(i915),
> +	};
> +	uint32_t ctx;
> +
> +	/*
> +	 * Given batches A, B and B', where B and B' are a bonded pair, with
> +	 * B' depending on B with a submit fence and B depending on A as
> +	 * an ordinary fence; prove B' cannot complete before A.
> +	 */
> +
> +	ctx = gem_context_create(i915);
> +
> +	for (int class = 0; class < 32; class++) {
> +		struct i915_engine_class_instance *siblings;
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&batch),
> +			.buffer_count = 1,
> +			.rsvd1 = ctx,
> +		};
> +		unsigned int count;
> +		igt_spin_t *spin;
> +
> +		siblings = list_engines(i915, 1u << class, &count);
> +		if (!siblings)
> +			continue;
> +
> +		if (count < 2) {
> +			free(siblings);
> +			continue;
> +		}
> +
> +		/* A: spin forever on engine 1 */
> +		set_load_balancer(i915, ctx, siblings, count, NULL);
> +		spin = igt_spin_new(i915,
> +				    .ctx = ctx,
> +				    .engine = 1,
> +				    .flags = (IGT_SPIN_POLL_RUN |
> +					      IGT_SPIN_FENCE_OUT));
> +		igt_spin_busywait_until_started(spin);
> +
> +		/* B: waits for A on engine 2 */
> +		set_load_balancer(i915, ctx, siblings, count, NULL);
> +		execbuf.rsvd2 = spin->out_fence;
> +		execbuf.flags = I915_EXEC_FENCE_IN | I915_EXEC_FENCE_OUT;
> +		execbuf.flags |= 2; /* opposite engine to spinner */

Why or?

> +		gem_execbuf_wr(i915, &execbuf);
> +
> +		/* B': run in parallel with B on engine 1, i.e. not before A! */
> +		set_load_balancer(i915, ctx, siblings, count, NULL);

Why are you repeating the same set_load_balancer calls?

> +		execbuf.flags = I915_EXEC_FENCE_SUBMIT | I915_EXEC_FENCE_OUT;
> +		execbuf.flags |= 1; /* same engine as spinner */

Engine 3? But there are only two engines in the map, plus the virtual. 
So 0, 1, 2 - how does this work for you?

> +		execbuf.rsvd2 >>= 32;
> +		gem_execbuf_wr(i915, &execbuf);
> +
> +		/* Wait for any magic timeslicing or preemptions... */
> +		igt_assert_eq(sync_fence_wait(execbuf.rsvd2 >> 32, 1000),
> +			      -ETIME);
> +
> +		igt_debugfs_dump(i915, "i915_engine_info");
> +
> +		/*
> +		 * ... which should not have happened, so everything is still
> +		 * waiting on the spinner
> +		 */
> +		igt_assert_eq(sync_fence_status(spin->out_fence), 0);
> +		igt_assert_eq(sync_fence_status(execbuf.rsvd2 & 0xffffffff), 0);
> +		igt_assert_eq(sync_fence_status(execbuf.rsvd2 >> 32), 0);
> +
> +		igt_spin_free(i915, spin);
> +		gem_sync(i915, batch.handle);
> +
> +		igt_assert_eq(sync_fence_status(execbuf.rsvd2 & 0xffffffff), 1);
> +		igt_assert_eq(sync_fence_status(execbuf.rsvd2 >> 32), 1);
> +
> +		close(execbuf.rsvd2);
> +		close(execbuf.rsvd2 >> 32);
> +	}

How evil to lower ctx prio for A only?

> +
> +	gem_context_destroy(i915, ctx);
> +	gem_close(i915, batch.handle);
> +}
> +
>   static void indices(int i915)
>   {
>   	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
> @@ -1580,6 +1665,9 @@ igt_main
>   	igt_subtest("bonded-slice")
>   		bonded_slice(i915);
>   
> +	igt_subtest("bonded-chain")
> +		bonded_chain(i915);
> +
>   	igt_fixture {
>   		igt_stop_hang_detector();
>   	}
> 

Regards,

Tvrtko
Chris Wilson Nov. 27, 2019, 3:15 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-11-27 15:08:25)
> 
> On 27/11/2019 14:50, Chris Wilson wrote:
> > Check that the submit fence couples into the dependency chain so that
> > the bonded pair cannot over take any of the earlier requests.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   tests/i915/gem_exec_balancer.c | 88 ++++++++++++++++++++++++++++++++++
> >   1 file changed, 88 insertions(+)
> > 
> > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> > index 86028cfdd..ff5bd0134 100644
> > --- a/tests/i915/gem_exec_balancer.c
> > +++ b/tests/i915/gem_exec_balancer.c
> > @@ -855,6 +855,91 @@ static void bonded_slice(int i915)
> >       munmap(stop, 4096);
> >   }
> >   
> > +static void bonded_chain(int i915)
> > +{
> > +     struct drm_i915_gem_exec_object2 batch = {
> > +             .handle = batch_create(i915),
> > +     };
> > +     uint32_t ctx;
> > +
> > +     /*
> > +      * Given batches A, B and B', where B and B' are a bonded pair, with
> > +      * B' depending on B with a submit fence and B depending on A as
> > +      * an ordinary fence; prove B' cannot complete before A.
> > +      */
> > +
> > +     ctx = gem_context_create(i915);
> > +
> > +     for (int class = 0; class < 32; class++) {
> > +             struct i915_engine_class_instance *siblings;
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(&batch),
> > +                     .buffer_count = 1,
> > +                     .rsvd1 = ctx,
> > +             };
> > +             unsigned int count;
> > +             igt_spin_t *spin;
> > +
> > +             siblings = list_engines(i915, 1u << class, &count);
> > +             if (!siblings)
> > +                     continue;
> > +
> > +             if (count < 2) {
> > +                     free(siblings);
> > +                     continue;
> > +             }
> > +
> > +             /* A: spin forever on engine 1 */
> > +             set_load_balancer(i915, ctx, siblings, count, NULL);
> > +             spin = igt_spin_new(i915,
> > +                                 .ctx = ctx,
> > +                                 .engine = 1,
> > +                                 .flags = (IGT_SPIN_POLL_RUN |
> > +                                           IGT_SPIN_FENCE_OUT));
> > +             igt_spin_busywait_until_started(spin);
> > +
> > +             /* B: waits for A on engine 2 */
> > +             set_load_balancer(i915, ctx, siblings, count, NULL);
> > +             execbuf.rsvd2 = spin->out_fence;
> > +             execbuf.flags = I915_EXEC_FENCE_IN | I915_EXEC_FENCE_OUT;
> > +             execbuf.flags |= 2; /* opposite engine to spinner */
> 
> Why or?

execbuf.flags = 2 | FENCE_IN | FENCE_OUT

> 
> > +             gem_execbuf_wr(i915, &execbuf);
> > +
> > +             /* B': run in parallel with B on engine 1, i.e. not before A! */
> > +             set_load_balancer(i915, ctx, siblings, count, NULL);
> 
> Why are you repeating the same set_load_balancer calls?

Because we need a new timeline wrt the spinner.
(Well B' does. I originally tried using the virtual engine here, but
semaphores are not guaranteed so it works better by forcing the engine
selection to the known bad pattern.)

> > +             execbuf.flags = I915_EXEC_FENCE_SUBMIT | I915_EXEC_FENCE_OUT;
> > +             execbuf.flags |= 1; /* same engine as spinner */
> 
> Engine 3? But there are only two engines in the map, plus the virtual. 
> So 0, 1, 2 - how does this work for you?

execbuf.flags = 1 | FENCE_SUBMIT | FENCE_OUT;

> > +             execbuf.rsvd2 >>= 32;
> > +             gem_execbuf_wr(i915, &execbuf);
> > +
> > +             /* Wait for any magic timeslicing or preemptions... */
> > +             igt_assert_eq(sync_fence_wait(execbuf.rsvd2 >> 32, 1000),
> > +                           -ETIME);
> > +
> > +             igt_debugfs_dump(i915, "i915_engine_info");
> > +
> > +             /*
> > +              * ... which should not have happened, so everything is still
> > +              * waiting on the spinner
> > +              */
> > +             igt_assert_eq(sync_fence_status(spin->out_fence), 0);
> > +             igt_assert_eq(sync_fence_status(execbuf.rsvd2 & 0xffffffff), 0);
> > +             igt_assert_eq(sync_fence_status(execbuf.rsvd2 >> 32), 0);
> > +
> > +             igt_spin_free(i915, spin);
> > +             gem_sync(i915, batch.handle);
> > +
> > +             igt_assert_eq(sync_fence_status(execbuf.rsvd2 & 0xffffffff), 1);
> > +             igt_assert_eq(sync_fence_status(execbuf.rsvd2 >> 32), 1);
> > +
> > +             close(execbuf.rsvd2);
> > +             close(execbuf.rsvd2 >> 32);
> > +     }
> 
> How evil to lower ctx prio for A only?

A draw. You swap the timeslicing catch for the explicit preemption
check.
-Chris
Tvrtko Ursulin Nov. 27, 2019, 3:20 p.m. UTC | #3
On 27/11/2019 15:15, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-11-27 15:08:25)
>>
>> On 27/11/2019 14:50, Chris Wilson wrote:
>>> Check that the submit fence couples into the dependency chain so that
>>> the bonded pair cannot over take any of the earlier requests.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    tests/i915/gem_exec_balancer.c | 88 ++++++++++++++++++++++++++++++++++
>>>    1 file changed, 88 insertions(+)
>>>
>>> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
>>> index 86028cfdd..ff5bd0134 100644
>>> --- a/tests/i915/gem_exec_balancer.c
>>> +++ b/tests/i915/gem_exec_balancer.c
>>> @@ -855,6 +855,91 @@ static void bonded_slice(int i915)
>>>        munmap(stop, 4096);
>>>    }
>>>    
>>> +static void bonded_chain(int i915)
>>> +{
>>> +     struct drm_i915_gem_exec_object2 batch = {
>>> +             .handle = batch_create(i915),
>>> +     };
>>> +     uint32_t ctx;
>>> +
>>> +     /*
>>> +      * Given batches A, B and B', where B and B' are a bonded pair, with
>>> +      * B' depending on B with a submit fence and B depending on A as
>>> +      * an ordinary fence; prove B' cannot complete before A.
>>> +      */
>>> +
>>> +     ctx = gem_context_create(i915);
>>> +
>>> +     for (int class = 0; class < 32; class++) {
>>> +             struct i915_engine_class_instance *siblings;
>>> +             struct drm_i915_gem_execbuffer2 execbuf = {
>>> +                     .buffers_ptr = to_user_pointer(&batch),
>>> +                     .buffer_count = 1,
>>> +                     .rsvd1 = ctx,
>>> +             };
>>> +             unsigned int count;
>>> +             igt_spin_t *spin;
>>> +
>>> +             siblings = list_engines(i915, 1u << class, &count);
>>> +             if (!siblings)
>>> +                     continue;
>>> +
>>> +             if (count < 2) {
>>> +                     free(siblings);
>>> +                     continue;
>>> +             }
>>> +
>>> +             /* A: spin forever on engine 1 */
>>> +             set_load_balancer(i915, ctx, siblings, count, NULL);
>>> +             spin = igt_spin_new(i915,
>>> +                                 .ctx = ctx,
>>> +                                 .engine = 1,
>>> +                                 .flags = (IGT_SPIN_POLL_RUN |
>>> +                                           IGT_SPIN_FENCE_OUT));
>>> +             igt_spin_busywait_until_started(spin);
>>> +
>>> +             /* B: waits for A on engine 2 */
>>> +             set_load_balancer(i915, ctx, siblings, count, NULL);
>>> +             execbuf.rsvd2 = spin->out_fence;
>>> +             execbuf.flags = I915_EXEC_FENCE_IN | I915_EXEC_FENCE_OUT;
>>> +             execbuf.flags |= 2; /* opposite engine to spinner */
>>
>> Why or?
> 
> execbuf.flags = 2 | FENCE_IN | FENCE_OUT

/me facepalms

>>
>>> +             gem_execbuf_wr(i915, &execbuf);
>>> +
>>> +             /* B': run in parallel with B on engine 1, i.e. not before A! */
>>> +             set_load_balancer(i915, ctx, siblings, count, NULL);
>>
>> Why are you repeating the same set_load_balancer calls?
> 
> Because we need a new timeline wrt the spinner.
> (Well B' does. I originally tried using the virtual engine here, but
> semaphores are not guaranteed so it works better by forcing the engine
> selection to the known bad pattern.)

Yeah.. makes sense. Please drop a short comment to that effect and with 
that:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

>>> +             execbuf.flags = I915_EXEC_FENCE_SUBMIT | I915_EXEC_FENCE_OUT;
>>> +             execbuf.flags |= 1; /* same engine as spinner */
>>
>> Engine 3? But there are only two engines in the map, plus the virtual.
>> So 0, 1, 2 - how does this work for you?
> 
> execbuf.flags = 1 | FENCE_SUBMIT | FENCE_OUT;
> 
>>> +             execbuf.rsvd2 >>= 32;
>>> +             gem_execbuf_wr(i915, &execbuf);
>>> +
>>> +             /* Wait for any magic timeslicing or preemptions... */
>>> +             igt_assert_eq(sync_fence_wait(execbuf.rsvd2 >> 32, 1000),
>>> +                           -ETIME);
>>> +
>>> +             igt_debugfs_dump(i915, "i915_engine_info");
>>> +
>>> +             /*
>>> +              * ... which should not have happened, so everything is still
>>> +              * waiting on the spinner
>>> +              */
>>> +             igt_assert_eq(sync_fence_status(spin->out_fence), 0);
>>> +             igt_assert_eq(sync_fence_status(execbuf.rsvd2 & 0xffffffff), 0);
>>> +             igt_assert_eq(sync_fence_status(execbuf.rsvd2 >> 32), 0);
>>> +
>>> +             igt_spin_free(i915, spin);
>>> +             gem_sync(i915, batch.handle);
>>> +
>>> +             igt_assert_eq(sync_fence_status(execbuf.rsvd2 & 0xffffffff), 1);
>>> +             igt_assert_eq(sync_fence_status(execbuf.rsvd2 >> 32), 1);
>>> +
>>> +             close(execbuf.rsvd2);
>>> +             close(execbuf.rsvd2 >> 32);
>>> +     }
>>
>> How evil to lower ctx prio for A only?
> 
> A draw. You swap the timeslicing catch for the explicit preemption
> check.
> -Chris
>

Patch
diff mbox series

diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index 86028cfdd..ff5bd0134 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -855,6 +855,91 @@  static void bonded_slice(int i915)
 	munmap(stop, 4096);
 }
 
+static void bonded_chain(int i915)
+{
+	struct drm_i915_gem_exec_object2 batch = {
+		.handle = batch_create(i915),
+	};
+	uint32_t ctx;
+
+	/*
+	 * Given batches A, B and B', where B and B' are a bonded pair, with
+	 * B' depending on B with a submit fence and B depending on A as
+	 * an ordinary fence; prove B' cannot complete before A.
+	 */
+
+	ctx = gem_context_create(i915);
+
+	for (int class = 0; class < 32; class++) {
+		struct i915_engine_class_instance *siblings;
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&batch),
+			.buffer_count = 1,
+			.rsvd1 = ctx,
+		};
+		unsigned int count;
+		igt_spin_t *spin;
+
+		siblings = list_engines(i915, 1u << class, &count);
+		if (!siblings)
+			continue;
+
+		if (count < 2) {
+			free(siblings);
+			continue;
+		}
+
+		/* A: spin forever on engine 1 */
+		set_load_balancer(i915, ctx, siblings, count, NULL);
+		spin = igt_spin_new(i915,
+				    .ctx = ctx,
+				    .engine = 1,
+				    .flags = (IGT_SPIN_POLL_RUN |
+					      IGT_SPIN_FENCE_OUT));
+		igt_spin_busywait_until_started(spin);
+
+		/* B: waits for A on engine 2 */
+		set_load_balancer(i915, ctx, siblings, count, NULL);
+		execbuf.rsvd2 = spin->out_fence;
+		execbuf.flags = I915_EXEC_FENCE_IN | I915_EXEC_FENCE_OUT;
+		execbuf.flags |= 2; /* opposite engine to spinner */
+		gem_execbuf_wr(i915, &execbuf);
+
+		/* B': run in parallel with B on engine 1, i.e. not before A! */
+		set_load_balancer(i915, ctx, siblings, count, NULL);
+		execbuf.flags = I915_EXEC_FENCE_SUBMIT | I915_EXEC_FENCE_OUT;
+		execbuf.flags |= 1; /* same engine as spinner */
+		execbuf.rsvd2 >>= 32;
+		gem_execbuf_wr(i915, &execbuf);
+
+		/* Wait for any magic timeslicing or preemptions... */
+		igt_assert_eq(sync_fence_wait(execbuf.rsvd2 >> 32, 1000),
+			      -ETIME);
+
+		igt_debugfs_dump(i915, "i915_engine_info");
+
+		/*
+		 * ... which should not have happened, so everything is still
+		 * waiting on the spinner
+		 */
+		igt_assert_eq(sync_fence_status(spin->out_fence), 0);
+		igt_assert_eq(sync_fence_status(execbuf.rsvd2 & 0xffffffff), 0);
+		igt_assert_eq(sync_fence_status(execbuf.rsvd2 >> 32), 0);
+
+		igt_spin_free(i915, spin);
+		gem_sync(i915, batch.handle);
+
+		igt_assert_eq(sync_fence_status(execbuf.rsvd2 & 0xffffffff), 1);
+		igt_assert_eq(sync_fence_status(execbuf.rsvd2 >> 32), 1);
+
+		close(execbuf.rsvd2);
+		close(execbuf.rsvd2 >> 32);
+	}
+
+	gem_context_destroy(i915, ctx);
+	gem_close(i915, batch.handle);
+}
+
 static void indices(int i915)
 {
 	I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, I915_EXEC_RING_MASK + 1);
@@ -1580,6 +1665,9 @@  igt_main
 	igt_subtest("bonded-slice")
 		bonded_slice(i915);
 
+	igt_subtest("bonded-chain")
+		bonded_chain(i915);
+
 	igt_fixture {
 		igt_stop_hang_detector();
 	}