diff mbox series

[i-g-t,05/24] i915/gem_exec_schedule: Verify that using HW semaphores doesn't block

Message ID 20190322092155.1656-5-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [i-g-t,01/24] i915/gem_exec_latency: Measure the latency of context switching | expand

Commit Message

Chris Wilson March 22, 2019, 9:21 a.m. UTC
We may use HW semaphores to schedule nearly-ready work such that they
are already spinning on the GPU waiting for the completion on another
engine. However, we don't want for that spinning task to actually block
any real work should it be scheduled.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_schedule.c | 87 ++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

Comments

Tvrtko Ursulin March 26, 2019, 9:19 a.m. UTC | #1
On 22/03/2019 09:21, Chris Wilson wrote:
> We may use HW semaphores to schedule nearly-ready work such that they
> are already spinning on the GPU waiting for the completion on another
> engine. However, we don't want for that spinning task to actually block
> any real work should it be scheduled.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_schedule.c | 87 ++++++++++++++++++++++++++++++++++
>   1 file changed, 87 insertions(+)
> 
> diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> index 4f0577b4e..ae850c4a3 100644
> --- a/tests/i915/gem_exec_schedule.c
> +++ b/tests/i915/gem_exec_schedule.c
> @@ -48,6 +48,10 @@
>   
>   #define MAX_CONTEXTS 1024
>   
> +#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
> +#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> +#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
> +
>   IGT_TEST_DESCRIPTION("Check that we can control the order of execution");
>   
>   static inline
> @@ -320,6 +324,86 @@ static void smoketest(int fd, unsigned ring, unsigned timeout)
>   	}
>   }
>   
> +static uint32_t __batch_create(int i915, uint32_t offset)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint32_t handle;
> +
> +	handle = gem_create(i915, ALIGN(offset + 4, 4096));
> +	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> +
> +	return handle;
> +}
> +
> +static uint32_t batch_create(int i915)
> +{
> +	return __batch_create(i915, 0);
> +}
> +
> +static void semaphore_userlock(int i915)
> +{
> +	struct drm_i915_gem_exec_object2 obj = {
> +		.handle = batch_create(i915),
> +	};
> +	igt_spin_t *spin = NULL;
> +	unsigned int engine;
> +	uint32_t scratch;
> +
> +	igt_require(gem_scheduler_has_preemption(i915));
> +
> +	/*
> +	 * Given the use of semaphores to govern parallel submission
> +	 * of nearly-ready work to HW, we still want to run actually
> +	 * ready work immediately. Without semaphores, the dependent
> +	 * work wouldn't be submitted so our ready work will run.
> +	 */
> +
> +	scratch = gem_create(i915, 4096);
> +	for_each_physical_engine(i915, engine) {
> +		if (!spin) {
> +			spin = igt_spin_batch_new(i915,
> +						  .dependency = scratch,
> +						  .engine = engine);
> +		} else {
> +			typeof(spin->execbuf.flags) saved = spin->execbuf.flags;

u64 reads better and struct eb won't change anyway.

> +
> +			spin->execbuf.flags &= ~ENGINE_MASK;
> +			spin->execbuf.flags |= engine;
> +
> +			gem_execbuf(i915, &spin->execbuf);

Do you need to wait for spinner to be running before submitting these 
ones, to make sure the logic emits a semaphore poll for them and submits 
them straight away?

> +
> +			spin->execbuf.flags = saved;
> +		}
> +	}
> +	igt_require(spin);
> +	gem_close(i915, scratch);
> +
> +	/*
> +	 * On all dependent engines, the request may be executing (busywaiting
> +	 * on a HW semaphore) but it should not prevent any real work from
> +	 * taking precedence.
> +	 */
> +	scratch = gem_context_create(i915);
> +	for_each_physical_engine(i915, engine) {
> +		struct drm_i915_gem_execbuffer2 execbuf = {
> +			.buffers_ptr = to_user_pointer(&obj),
> +			.buffer_count = 1,
> +			.flags = engine,
> +			.rsvd1 = scratch,
> +		};
> +
> +		if (engine == (spin->execbuf.flags & ENGINE_MASK))
> +			continue;

Ugh saving and restoring eb flags to find the spinning engine here I 
feel will be a land mine for the upcoming for_each_physical_engine 
conversion but what can we do.

> +
> +		gem_execbuf(i915, &execbuf);
> +	}
> +	gem_context_destroy(i915, scratch);
> +	gem_sync(i915, obj.handle); /* to hang unless we can preempt */

I got lost - how does this work if the spinner is still keeping the 
obj.handle busy?

> +	gem_close(i915, obj.handle);
> +
> +	igt_spin_batch_free(i915, spin);
> +}
> +
>   static void reorder(int fd, unsigned ring, unsigned flags)
>   #define EQUAL 1
>   {
> @@ -1307,6 +1391,9 @@ igt_main
>   			igt_require(gem_scheduler_has_ctx_priority(fd));
>   		}
>   
> +		igt_subtest("semaphore-user")
> +			semaphore_userlock(fd);
> +
>   		igt_subtest("smoketest-all")
>   			smoketest(fd, ALL_ENGINES, 30);
>   
> 

Regards,

Tvrtko
Chris Wilson March 26, 2019, 10:03 a.m. UTC | #2
Quoting Tvrtko Ursulin (2019-03-26 09:19:33)
> 
> 
> On 22/03/2019 09:21, Chris Wilson wrote:
> > We may use HW semaphores to schedule nearly-ready work such that they
> > are already spinning on the GPU waiting for the completion on another
> > engine. However, we don't want for that spinning task to actually block
> > any real work should it be scheduled.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >   tests/i915/gem_exec_schedule.c | 87 ++++++++++++++++++++++++++++++++++
> >   1 file changed, 87 insertions(+)
> > 
> > diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> > index 4f0577b4e..ae850c4a3 100644
> > --- a/tests/i915/gem_exec_schedule.c
> > +++ b/tests/i915/gem_exec_schedule.c
> > @@ -48,6 +48,10 @@
> >   
> >   #define MAX_CONTEXTS 1024
> >   
> > +#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
> > +#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
> > +#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
> > +
> >   IGT_TEST_DESCRIPTION("Check that we can control the order of execution");
> >   
> >   static inline
> > @@ -320,6 +324,86 @@ static void smoketest(int fd, unsigned ring, unsigned timeout)
> >       }
> >   }
> >   
> > +static uint32_t __batch_create(int i915, uint32_t offset)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     uint32_t handle;
> > +
> > +     handle = gem_create(i915, ALIGN(offset + 4, 4096));
> > +     gem_write(i915, handle, offset, &bbe, sizeof(bbe));
> > +
> > +     return handle;
> > +}
> > +
> > +static uint32_t batch_create(int i915)
> > +{
> > +     return __batch_create(i915, 0);
> > +}
> > +
> > +static void semaphore_userlock(int i915)
> > +{
> > +     struct drm_i915_gem_exec_object2 obj = {
> > +             .handle = batch_create(i915),
> > +     };
> > +     igt_spin_t *spin = NULL;
> > +     unsigned int engine;
> > +     uint32_t scratch;
> > +
> > +     igt_require(gem_scheduler_has_preemption(i915));
> > +
> > +     /*
> > +      * Given the use of semaphores to govern parallel submission
> > +      * of nearly-ready work to HW, we still want to run actually
> > +      * ready work immediately. Without semaphores, the dependent
> > +      * work wouldn't be submitted so our ready work will run.
> > +      */
> > +
> > +     scratch = gem_create(i915, 4096);
> > +     for_each_physical_engine(i915, engine) {
> > +             if (!spin) {
> > +                     spin = igt_spin_batch_new(i915,
> > +                                               .dependency = scratch,
> > +                                               .engine = engine);
> > +             } else {
> > +                     typeof(spin->execbuf.flags) saved = spin->execbuf.flags;
> 
> u64 reads better and struct eb won't change anyway.

If it were only u64!

> > +                     spin->execbuf.flags &= ~ENGINE_MASK;
> > +                     spin->execbuf.flags |= engine;
> > +
> > +                     gem_execbuf(i915, &spin->execbuf);
> 
> Do you need to wait for spinner to be running before submitting these 
> ones, to make sure the logic emits a semaphore poll for them and submits 
> them straight away?

Not required, the semaphores are emitted based on completion status.

> > +                     spin->execbuf.flags = saved;
> > +             }
> > +     }
> > +     igt_require(spin);
> > +     gem_close(i915, scratch);
> > +
> > +     /*
> > +      * On all dependent engines, the request may be executing (busywaiting
> > +      * on a HW semaphore) but it should not prevent any real work from
> > +      * taking precedence.
> > +      */
> > +     scratch = gem_context_create(i915);
> > +     for_each_physical_engine(i915, engine) {
> > +             struct drm_i915_gem_execbuffer2 execbuf = {
> > +                     .buffers_ptr = to_user_pointer(&obj),
> > +                     .buffer_count = 1,
> > +                     .flags = engine,
> > +                     .rsvd1 = scratch,
> > +             };
> > +
> > +             if (engine == (spin->execbuf.flags & ENGINE_MASK))
> > +                     continue;
> 
> Ugh saving and restoring eb flags to find the spinning engine here I 
> feel will be a land mine for the upcoming for_each_physical_engine 
> conversion but what can we do.

It will be fine. Unless you plan to randomise discovery just to make
things interesting. :)

We can make reuse of engines explicit if use ctx->engines[] instead of
for_each_physical_engine().

> > +             gem_execbuf(i915, &execbuf);
> > +     }
> > +     gem_context_destroy(i915, scratch);
> > +     gem_sync(i915, obj.handle); /* to hang unless we can preempt */
> 
> I got lost - how does this work if the spinner is still keeping the 
> obj.handle busy?

obj.handle and spinner are separate, and on different contexts. So we
fill all engines with the spinner + semaphores. Submit a new batch that
has no dependencies, and our expectation is that it is able to run ahead
of the semaphore busywait. Is that a reasonable expectation for
userspace? (Note that this demonstrates a subtle change in the ABI with
the introduction of plain semaphores, as without preemption that patch
causes this test to hang. So whether or not it is a reasonable
expectation, the change in behaviour is unwanted, but may have gone
unnoticed)
-Chris
diff mbox series

Patch

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index 4f0577b4e..ae850c4a3 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -48,6 +48,10 @@ 
 
 #define MAX_CONTEXTS 1024
 
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
 IGT_TEST_DESCRIPTION("Check that we can control the order of execution");
 
 static inline
@@ -320,6 +324,86 @@  static void smoketest(int fd, unsigned ring, unsigned timeout)
 	}
 }
 
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, ALIGN(offset + 4, 4096));
+	gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+	return __batch_create(i915, 0);
+}
+
+static void semaphore_userlock(int i915)
+{
+	struct drm_i915_gem_exec_object2 obj = {
+		.handle = batch_create(i915),
+	};
+	igt_spin_t *spin = NULL;
+	unsigned int engine;
+	uint32_t scratch;
+
+	igt_require(gem_scheduler_has_preemption(i915));
+
+	/*
+	 * Given the use of semaphores to govern parallel submission
+	 * of nearly-ready work to HW, we still want to run actually
+	 * ready work immediately. Without semaphores, the dependent
+	 * work wouldn't be submitted so our ready work will run.
+	 */
+
+	scratch = gem_create(i915, 4096);
+	for_each_physical_engine(i915, engine) {
+		if (!spin) {
+			spin = igt_spin_batch_new(i915,
+						  .dependency = scratch,
+						  .engine = engine);
+		} else {
+			typeof(spin->execbuf.flags) saved = spin->execbuf.flags;
+
+			spin->execbuf.flags &= ~ENGINE_MASK;
+			spin->execbuf.flags |= engine;
+
+			gem_execbuf(i915, &spin->execbuf);
+
+			spin->execbuf.flags = saved;
+		}
+	}
+	igt_require(spin);
+	gem_close(i915, scratch);
+
+	/*
+	 * On all dependent engines, the request may be executing (busywaiting
+	 * on a HW semaphore) but it should not prevent any real work from
+	 * taking precedence.
+	 */
+	scratch = gem_context_create(i915);
+	for_each_physical_engine(i915, engine) {
+		struct drm_i915_gem_execbuffer2 execbuf = {
+			.buffers_ptr = to_user_pointer(&obj),
+			.buffer_count = 1,
+			.flags = engine,
+			.rsvd1 = scratch,
+		};
+
+		if (engine == (spin->execbuf.flags & ENGINE_MASK))
+			continue;
+
+		gem_execbuf(i915, &execbuf);
+	}
+	gem_context_destroy(i915, scratch);
+	gem_sync(i915, obj.handle); /* to hang unless we can preempt */
+	gem_close(i915, obj.handle);
+
+	igt_spin_batch_free(i915, spin);
+}
+
 static void reorder(int fd, unsigned ring, unsigned flags)
 #define EQUAL 1
 {
@@ -1307,6 +1391,9 @@  igt_main
 			igt_require(gem_scheduler_has_ctx_priority(fd));
 		}
 
+		igt_subtest("semaphore-user")
+			semaphore_userlock(fd);
+
 		igt_subtest("smoketest-all")
 			smoketest(fd, ALL_ENGINES, 30);