[i-g-t] i915/gem_exec_balancer: Randomise bonded submission

Message ID	20200527131409.699882-1-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=AdCD=7J=lists.freedesktop.org=intel-gfx-bounces@kernel.org> DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 3BB9E208DB From: Chris Wilson <chris@chris-wilson.co.uk> To: igt-dev@lists.freedesktop.org Date: Wed, 27 May 2020 14:14:09 +0100 Message-Id: <20200527131409.699882-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH i-g-t] i915/gem_exec_balancer: Randomise bonded submission Precedence: list Cc: intel-gfx@lists.freedesktop.org, Chris Wilson <chris@chris-wilson.co.uk> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
Series	[i-g-t] i915/gem_exec_balancer: Randomise bonded submission \| expand [i-g-t] i915/gem_exec_balancer: Randomise bonded submission

Message ID

20200527131409.699882-1-chris@chris-wilson.co.uk (mailing list archive)

State

New, archived

Headers

DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 3BB9E208DB
From: Chris Wilson <chris@chris-wilson.co.uk>
To: igt-dev@lists.freedesktop.org
Date: Wed, 27 May 2020 14:14:09 +0100
Message-Id: <20200527131409.699882-1-chris@chris-wilson.co.uk>
MIME-Version: 1.0
Subject: [Intel-gfx] [PATCH i-g-t] i915/gem_exec_balancer: Randomise bonded
 submission
Precedence: list
Cc: intel-gfx@lists.freedesktop.org, Chris Wilson <chris@chris-wilson.co.uk>
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Series

[i-g-t] i915/gem_exec_balancer: Randomise bonded submission | expand

Commit Message

Chris Wilson May 27, 2020, 1:14 p.m. UTC

Randomly submit a paired spinner and its cancellation as a bonded
(submit fence) pair. Apply congestion to the engine with more bonded
pairs to see if the execution order fails. If we prevent a cancellation
from running, then the spinner will remain spinning forever.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/i915/gem_exec_balancer.c | 108 +++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)

Comments

Tvrtko Ursulin May 27, 2020, 3:51 p.m. UTC | #1

On 27/05/2020 14:14, Chris Wilson wrote:
> Randomly submit a paired spinner and its cancellation as a bonded
> (submit fence) pair. Apply congestion to the engine with more bonded
> pairs to see if the execution order fails. If we prevent a cancellation
> from running, then the spinner will remain spinning forever.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/i915/gem_exec_balancer.c | 108 +++++++++++++++++++++++++++++++++
>   1 file changed, 108 insertions(+)
> 
> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> index 80ae82416..98715d726 100644
> --- a/tests/i915/gem_exec_balancer.c
> +++ b/tests/i915/gem_exec_balancer.c
> @@ -1154,6 +1154,111 @@ static void bonded_semaphore(int i915)
>   	gem_context_destroy(i915, ctx);
>   }
>   
> +static void __bonded_dual(int i915,
> +			  const struct i915_engine_class_instance *siblings,
> +			  unsigned int count)
> +{
> +	struct drm_i915_gem_exec_object2 batch = {};
> +	struct drm_i915_gem_execbuffer2 execbuf = {
> +		.buffers_ptr = to_user_pointer(&batch),
> +		.buffer_count = 1,
> +	};
> +	unsigned long cycles = 0;
> +	uint32_t A, B;
> +
> +	A = gem_context_create(i915);
> +	set_load_balancer(i915, A, siblings, count, NULL);
> +
> +	B = gem_context_create(i915);
> +	set_load_balancer(i915, B, siblings, count, NULL);
> +
> +	igt_until_timeout(5) {
> +		unsigned int master = rand() % count + 1;
> +		int timeline, fence;
> +		igt_spin_t *a, *b;
> +
> +		timeline = sw_sync_timeline_create();
> +		fence = sw_sync_timeline_create_fence(timeline, 1);
> +
> +		a = igt_spin_new(i915, A,
> +				 .engine = master,
> +				 .fence = fence,
> +				 .flags = (IGT_SPIN_FENCE_IN |
> +					   IGT_SPIN_POLL_RUN |
> +					   IGT_SPIN_NO_PREEMPTION |
> +					   IGT_SPIN_FENCE_OUT));
> +		b = igt_spin_new(i915, B,
> +				 .engine = master,
> +				 .fence = fence,
> +				 .flags = (IGT_SPIN_FENCE_IN |
> +					   IGT_SPIN_POLL_RUN |
> +					   IGT_SPIN_NO_PREEMPTION |
> +					   IGT_SPIN_FENCE_OUT));
> +
> +		close(fence);

Does this or close(timeline) releases the submissions?

I suggest a variant without the fence anyway for a slightly different 
profile.

> +
> +		if (rand() % 1)
> +			igt_swap(a, b);
> +
> +		batch.handle = create_semaphore_to_spinner(i915, a);

These will be preemptible right? More so they schedule out on semaphore 
interrupt straight away? So I don't see how slaves can be prevented from 
running because they always are on a different physical engine.

We would need a test flavour where slave spins non-preemptively until 
either the master or CPU signals to stop. Coming up with the exact 
scheme to achieve this might be challenging. I can think about this more 
tomorrow.

And also I don't know why this would fail with my patch! I'll want to 
have a experiment with that tomorrow as well.

Regards,

Tvrtko

> +		execbuf.rsvd1 = a->execbuf.rsvd1;
> +		execbuf.rsvd2 = a->out_fence;
> +		do {
> +			execbuf.flags = rand() % count + 1;
> +		} while (execbuf.flags == master);
> +		execbuf.flags |= I915_EXEC_FENCE_SUBMIT;
> +		gem_execbuf(i915, &execbuf);
> +		gem_close(i915, batch.handle);
> +
> +		batch.handle = create_semaphore_to_spinner(i915, b);
> +		execbuf.rsvd1 = b->execbuf.rsvd1;
> +		execbuf.rsvd2 = b->out_fence;
> +		do {
> +			execbuf.flags = rand() % count + 1;
> +		} while (execbuf.flags == master);
> +		execbuf.flags |= I915_EXEC_FENCE_SUBMIT;
> +		gem_execbuf(i915, &execbuf);
> +		gem_close(i915, batch.handle);
> +
> +		close(timeline);
> +
> +		gem_sync(i915, a->handle);
> +		gem_sync(i915, b->handle);
> +
> +		igt_spin_free(i915, a);
> +		igt_spin_free(i915, b);
> +		cycles++;
> +	}
> +
> +	igt_info("%lu cycles\n", cycles);
> +
> +	gem_context_destroy(i915, A);
> +	gem_context_destroy(i915, B);
> +}
> +
> +static void bonded_dual(int i915)
> +{
> +	/*
> +	 * The purpose of bonded submission is to execute one or more requests
> +	 * concurrently. However, the very nature of that requires coordinated
> +	 * submission across multiple engines.
> +	 */
> +	igt_require(gem_scheduler_has_preemption(i915));
> +
> +	for (int class = 1; class < 32; class++) {
> +		struct i915_engine_class_instance *siblings;
> +		unsigned int count;
> +
> +		siblings = list_engines(i915, 1u << class, &count);
> +		if (count > 1) {
> +			igt_fork(child, count + 1)
> +				__bonded_dual(i915, siblings, count);
> +			igt_waitchildren();
> +		}
> +		free(siblings);
> +	}
> +}
> +
>   static void __bonded_nohang(int i915, uint32_t ctx,
>   			    const struct i915_engine_class_instance *siblings,
>   			    unsigned int count,
> @@ -2284,6 +2389,9 @@ igt_main
>   	igt_subtest("bonded-semaphore")
>   		bonded_semaphore(i915);
>   
> +	igt_subtest("bonded-dual")
> +		bonded_dual(i915);
> +
>   	igt_fixture {
>   		igt_stop_hang_detector();
>   	}
>

Chris Wilson May 27, 2020, 4 p.m. UTC | #2

Quoting Tvrtko Ursulin (2020-05-27 16:51:50)
> 
> On 27/05/2020 14:14, Chris Wilson wrote:
> > Randomly submit a paired spinner and its cancellation as a bonded
> > (submit fence) pair. Apply congestion to the engine with more bonded
> > pairs to see if the execution order fails. If we prevent a cancellation
> > from running, then the spinner will remain spinning forever.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   tests/i915/gem_exec_balancer.c | 108 +++++++++++++++++++++++++++++++++
> >   1 file changed, 108 insertions(+)
> > 
> > diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
> > index 80ae82416..98715d726 100644
> > --- a/tests/i915/gem_exec_balancer.c
> > +++ b/tests/i915/gem_exec_balancer.c
> > @@ -1154,6 +1154,111 @@ static void bonded_semaphore(int i915)
> >       gem_context_destroy(i915, ctx);
> >   }
> >   
> > +static void __bonded_dual(int i915,
> > +                       const struct i915_engine_class_instance *siblings,
> > +                       unsigned int count)
> > +{
> > +     struct drm_i915_gem_exec_object2 batch = {};
> > +     struct drm_i915_gem_execbuffer2 execbuf = {
> > +             .buffers_ptr = to_user_pointer(&batch),
> > +             .buffer_count = 1,
> > +     };
> > +     unsigned long cycles = 0;
> > +     uint32_t A, B;
> > +
> > +     A = gem_context_create(i915);
> > +     set_load_balancer(i915, A, siblings, count, NULL);
> > +
> > +     B = gem_context_create(i915);
> > +     set_load_balancer(i915, B, siblings, count, NULL);
> > +
> > +     igt_until_timeout(5) {
> > +             unsigned int master = rand() % count + 1;
> > +             int timeline, fence;
> > +             igt_spin_t *a, *b;
> > +
> > +             timeline = sw_sync_timeline_create();
> > +             fence = sw_sync_timeline_create_fence(timeline, 1);
> > +
> > +             a = igt_spin_new(i915, A,
> > +                              .engine = master,
> > +                              .fence = fence,
> > +                              .flags = (IGT_SPIN_FENCE_IN |
> > +                                        IGT_SPIN_POLL_RUN |
> > +                                        IGT_SPIN_NO_PREEMPTION |
> > +                                        IGT_SPIN_FENCE_OUT));
> > +             b = igt_spin_new(i915, B,
> > +                              .engine = master,
> > +                              .fence = fence,
> > +                              .flags = (IGT_SPIN_FENCE_IN |
> > +                                        IGT_SPIN_POLL_RUN |
> > +                                        IGT_SPIN_NO_PREEMPTION |
> > +                                        IGT_SPIN_FENCE_OUT));
> > +
> > +             close(fence);
> 
> Does this or close(timeline) releases the submissions?

The timeline is the release.
 
> I suggest a variant without the fence anyway for a slightly different 
> profile.

It didn't make any difference. But yes I contemplated the variant.
 
> > +
> > +             if (rand() % 1)
> > +                     igt_swap(a, b);
> > +
> > +             batch.handle = create_semaphore_to_spinner(i915, a);
> 
> These will be preemptible right? More so they schedule out on semaphore 
> interrupt straight away? So I don't see how slaves can be prevented from 
> running because they always are on a different physical engine.

Right, as I understood your description the masters could only be on one
engine with the bonded requests on the other.

And I don't know how to wait from the GPU other than with the
preemptible semaphore spin. A non preemptible wait would be another
spinner, but that would not coordinate across the bond. To reproduce the
non-preemptible HW might require the actual instruction flow.

> We would need a test flavour where slave spins non-preemptively until 
> either the master or CPU signals to stop. Coming up with the exact 
> scheme to achieve this might be challenging. I can think about this more 
> tomorrow.
> 
> And also I don't know why this would fail with my patch! I'll want to 
> have a experiment with that tomorrow as well.

That was indeed mysterious :) I don't have an explanation either yet.
-Chris

Chris Wilson May 27, 2020, 4:43 p.m. UTC | #3

Quoting Chris Wilson (2020-05-27 17:00:55)
> Quoting Tvrtko Ursulin (2020-05-27 16:51:50)
> > 
> > On 27/05/2020 14:14, Chris Wilson wrote:
> > > +
> > > +             if (rand() % 1)
> > > +                     igt_swap(a, b);
> > > +
> > > +             batch.handle = create_semaphore_to_spinner(i915, a);
> > 
> > These will be preemptible right? More so they schedule out on semaphore 
> > interrupt straight away? So I don't see how slaves can be prevented from 
> > running because they always are on a different physical engine.
> 
> Right, as I understood your description the masters could only be on one
> engine with the bonded requests on the other.
> 
> And I don't know how to wait from the GPU other than with the
> preemptible semaphore spin. A non preemptible wait would be another
> spinner, but that would not coordinate across the bond. To reproduce the
> non-preemptible HW might require the actual instruction flow.

The most obvious way is the same igt_spin_t submitted to both engines
with a submit fence. But that still requires the CPU to terminate the
spinner. And has similarities to the existing tests.

Now we could have one spinner that counted or watched the timestamp,
looping back on itself with a predicated jump, and on completion
terminated the bonded spinner. That way we could have two
non-preemptible spinner of finite duration. But the coupling via memory
is still very loose and doesn't require both requests to be running
concurrently. Slightly better than would be to rewrite the jump target
from A and then go in another spin on A waiting for a similar update
from B. It still does not explode if either is preempted out, but more
likely to be caught spinning forever.
-Chris

diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index 80ae82416..98715d726 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -1154,6 +1154,111 @@  static void bonded_semaphore(int i915)
 	gem_context_destroy(i915, ctx);
 }
 
+static void __bonded_dual(int i915,
+			  const struct i915_engine_class_instance *siblings,
+			  unsigned int count)
+{
+	struct drm_i915_gem_exec_object2 batch = {};
+	struct drm_i915_gem_execbuffer2 execbuf = {
+		.buffers_ptr = to_user_pointer(&batch),
+		.buffer_count = 1,
+	};
+	unsigned long cycles = 0;
+	uint32_t A, B;
+
+	A = gem_context_create(i915);
+	set_load_balancer(i915, A, siblings, count, NULL);
+
+	B = gem_context_create(i915);
+	set_load_balancer(i915, B, siblings, count, NULL);
+
+	igt_until_timeout(5) {
+		unsigned int master = rand() % count + 1;
+		int timeline, fence;
+		igt_spin_t *a, *b;
+
+		timeline = sw_sync_timeline_create();
+		fence = sw_sync_timeline_create_fence(timeline, 1);
+
+		a = igt_spin_new(i915, A,
+				 .engine = master,
+				 .fence = fence,
+				 .flags = (IGT_SPIN_FENCE_IN |
+					   IGT_SPIN_POLL_RUN |
+					   IGT_SPIN_NO_PREEMPTION |
+					   IGT_SPIN_FENCE_OUT));
+		b = igt_spin_new(i915, B,
+				 .engine = master,
+				 .fence = fence,
+				 .flags = (IGT_SPIN_FENCE_IN |
+					   IGT_SPIN_POLL_RUN |
+					   IGT_SPIN_NO_PREEMPTION |
+					   IGT_SPIN_FENCE_OUT));
+
+		close(fence);
+
+		if (rand() % 1)
+			igt_swap(a, b);
+
+		batch.handle = create_semaphore_to_spinner(i915, a);
+		execbuf.rsvd1 = a->execbuf.rsvd1;
+		execbuf.rsvd2 = a->out_fence;
+		do {
+			execbuf.flags = rand() % count + 1;
+		} while (execbuf.flags == master);
+		execbuf.flags |= I915_EXEC_FENCE_SUBMIT;
+		gem_execbuf(i915, &execbuf);
+		gem_close(i915, batch.handle);
+
+		batch.handle = create_semaphore_to_spinner(i915, b);
+		execbuf.rsvd1 = b->execbuf.rsvd1;
+		execbuf.rsvd2 = b->out_fence;
+		do {
+			execbuf.flags = rand() % count + 1;
+		} while (execbuf.flags == master);
+		execbuf.flags |= I915_EXEC_FENCE_SUBMIT;
+		gem_execbuf(i915, &execbuf);
+		gem_close(i915, batch.handle);
+
+		close(timeline);
+
+		gem_sync(i915, a->handle);
+		gem_sync(i915, b->handle);
+
+		igt_spin_free(i915, a);
+		igt_spin_free(i915, b);
+		cycles++;
+	}
+
+	igt_info("%lu cycles\n", cycles);
+
+	gem_context_destroy(i915, A);
+	gem_context_destroy(i915, B);
+}
+
+static void bonded_dual(int i915)
+{
+	/*
+	 * The purpose of bonded submission is to execute one or more requests
+	 * concurrently. However, the very nature of that requires coordinated
+	 * submission across multiple engines.
+	 */
+	igt_require(gem_scheduler_has_preemption(i915));
+
+	for (int class = 1; class < 32; class++) {
+		struct i915_engine_class_instance *siblings;
+		unsigned int count;
+
+		siblings = list_engines(i915, 1u << class, &count);
+		if (count > 1) {
+			igt_fork(child, count + 1)
+				__bonded_dual(i915, siblings, count);
+			igt_waitchildren();
+		}
+		free(siblings);
+	}
+}
+
 static void __bonded_nohang(int i915, uint32_t ctx,
 			    const struct i915_engine_class_instance *siblings,
 			    unsigned int count,
@@ -2284,6 +2389,9 @@  igt_main
 	igt_subtest("bonded-semaphore")
 		bonded_semaphore(i915);
 
+	igt_subtest("bonded-dual")
+		bonded_dual(i915);
+
 	igt_fixture {
 		igt_stop_hang_detector();
 	}

[i-g-t] i915/gem_exec_balancer: Randomise bonded submission

Commit Message

Comments

Patch