diff mbox series

[i-g-t,2/7] igt: Use COND_BBEND for busy spinning on gen9

Message ID 20191113154913.8787-2-mika.kuoppala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [i-g-t,1/7] lib/igt_dummyload: Send batch as first | expand

Commit Message

Mika Kuoppala Nov. 13, 2019, 3:49 p.m. UTC
From: Jon Bloomfield <jon.bloomfield@intel.com>

gen9+ introduces a cmdparser for the BLT engine which copies the
incoming BB to a kmd owned buffer for submission (to prevent changes
being made after the bb has been safely scanned). This breaks the
spin functionality because it relies on changing the submitted spin
buffers in order to terminate them.

Instead, for gen9+, we change the semantics by introducing a COND_BB_END
into the infinite loop, to wait until a memory flag (in anothe bo) is
cleared.

v2: Correct nop length to avoid overwriting bb_end instr when using
    a dependency bo (cork)
v3: fix conflicts on igt_dummyload (Mika)
v4: s/bool running/uint32_t running, fix r->delta (Mika)
v5: remove overzealous assert (Mika)
v6: rebase on top of lib changes (Mika)
v7: rework on top of public igt lib changes (Mika)
v8: rebase

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com> (v2)
Cc: Joonas Lahtinen <joonas.lahtinen@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
---
 lib/i830_reg.h                          |  2 +-
 lib/igt_dummyload.c                     | 46 +++++++++++++++++++++++--
 lib/igt_dummyload.h                     |  3 ++
 lib/intel_reg.h                         |  3 ++
 tests/i915/gem_double_irq_loop.c        |  2 --
 tests/i915/gem_write_read_ring_switch.c |  2 +-
 6 files changed, 52 insertions(+), 6 deletions(-)

Comments

Chris Wilson Nov. 13, 2019, 4:02 p.m. UTC | #1
Quoting Mika Kuoppala (2019-11-13 15:49:08)
> From: Jon Bloomfield <jon.bloomfield@intel.com>
> 
> gen9+ introduces a cmdparser for the BLT engine which copies the
> incoming BB to a kmd owned buffer for submission (to prevent changes
> being made after the bb has been safely scanned). This breaks the
> spin functionality because it relies on changing the submitted spin
> buffers in order to terminate them.
> 
> Instead, for gen9+, we change the semantics by introducing a COND_BB_END
> into the infinite loop, to wait until a memory flag (in anothe bo) is
> cleared.
> 
> v2: Correct nop length to avoid overwriting bb_end instr when using
>     a dependency bo (cork)
> v3: fix conflicts on igt_dummyload (Mika)
> v4: s/bool running/uint32_t running, fix r->delta (Mika)
> v5: remove overzealous assert (Mika)
> v6: rebase on top of lib changes (Mika)
> v7: rework on top of public igt lib changes (Mika)
> v8: rebase
> 
> Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com> (v2)
> Cc: Joonas Lahtinen <joonas.lahtinen@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
> ---
>  lib/i830_reg.h                          |  2 +-
>  lib/igt_dummyload.c                     | 46 +++++++++++++++++++++++--
>  lib/igt_dummyload.h                     |  3 ++
>  lib/intel_reg.h                         |  3 ++
>  tests/i915/gem_double_irq_loop.c        |  2 --
>  tests/i915/gem_write_read_ring_switch.c |  2 +-
>  6 files changed, 52 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/i830_reg.h b/lib/i830_reg.h
> index a57691c7..41020256 100644
> --- a/lib/i830_reg.h
> +++ b/lib/i830_reg.h
> @@ -43,7 +43,7 @@
>  /* broadwater flush bits */
>  #define BRW_MI_GLOBAL_SNAPSHOT_RESET   (1 << 3)
>  
> -#define MI_COND_BATCH_BUFFER_END       (0x36<<23 | 1)
> +#define MI_COND_BATCH_BUFFER_END       (0x36 << 23)
>  #define MI_DO_COMPARE                  (1<<21)
>  
>  #define MI_BATCH_BUFFER_END    (0xA << 23)
> diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
> index c079bd04..a88c8582 100644
> --- a/lib/igt_dummyload.c
> +++ b/lib/igt_dummyload.c
> @@ -130,7 +130,15 @@ emit_recursive_batch(igt_spin_t *spin,
>         spin->poll_handle = poll->handle;
>         execbuf->buffer_count++;
>  
> -       if (opts->flags & IGT_SPIN_POLL_RUN) {
> +       /*
> +        * For gen9+ we use a conditional loop rather than an
> +        * infinite loop, because we are unable to modify the
> +        * BB's once they have been scanned by the cmdparser
> +        * We share the poll buffer for the conditional test
> +        * and is always the first buffer in the batch list
> +        */
> +
> +       if (gen >= 9 || (opts->flags & IGT_SPIN_POLL_RUN)) {

Nah, you could just sample the batch buffer rather than always adding
the poll buffer (since the cmdparser implicitly creates the second buffer
for you). Using the comparison value of MI_BATCH_BUFFER_END you wouldn't
even have to worry about altering callers.
-Chris
Mika Kuoppala Nov. 13, 2019, 5:19 p.m. UTC | #2
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-11-13 15:49:08)
>> From: Jon Bloomfield <jon.bloomfield@intel.com>
>> 
>> gen9+ introduces a cmdparser for the BLT engine which copies the
>> incoming BB to a kmd owned buffer for submission (to prevent changes
>> being made after the bb has been safely scanned). This breaks the
>> spin functionality because it relies on changing the submitted spin
>> buffers in order to terminate them.
>> 
>> Instead, for gen9+, we change the semantics by introducing a COND_BB_END
>> into the infinite loop, to wait until a memory flag (in anothe bo) is
>> cleared.
>> 
>> v2: Correct nop length to avoid overwriting bb_end instr when using
>>     a dependency bo (cork)
>> v3: fix conflicts on igt_dummyload (Mika)
>> v4: s/bool running/uint32_t running, fix r->delta (Mika)
>> v5: remove overzealous assert (Mika)
>> v6: rebase on top of lib changes (Mika)
>> v7: rework on top of public igt lib changes (Mika)
>> v8: rebase
>> 
>> Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com> (v2)
>> Cc: Joonas Lahtinen <joonas.lahtinen@intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
>> ---
>>  lib/i830_reg.h                          |  2 +-
>>  lib/igt_dummyload.c                     | 46 +++++++++++++++++++++++--
>>  lib/igt_dummyload.h                     |  3 ++
>>  lib/intel_reg.h                         |  3 ++
>>  tests/i915/gem_double_irq_loop.c        |  2 --
>>  tests/i915/gem_write_read_ring_switch.c |  2 +-
>>  6 files changed, 52 insertions(+), 6 deletions(-)
>> 
>> diff --git a/lib/i830_reg.h b/lib/i830_reg.h
>> index a57691c7..41020256 100644
>> --- a/lib/i830_reg.h
>> +++ b/lib/i830_reg.h
>> @@ -43,7 +43,7 @@
>>  /* broadwater flush bits */
>>  #define BRW_MI_GLOBAL_SNAPSHOT_RESET   (1 << 3)
>>  
>> -#define MI_COND_BATCH_BUFFER_END       (0x36<<23 | 1)
>> +#define MI_COND_BATCH_BUFFER_END       (0x36 << 23)
>>  #define MI_DO_COMPARE                  (1<<21)
>>  
>>  #define MI_BATCH_BUFFER_END    (0xA << 23)
>> diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
>> index c079bd04..a88c8582 100644
>> --- a/lib/igt_dummyload.c
>> +++ b/lib/igt_dummyload.c
>> @@ -130,7 +130,15 @@ emit_recursive_batch(igt_spin_t *spin,
>>         spin->poll_handle = poll->handle;
>>         execbuf->buffer_count++;
>>  
>> -       if (opts->flags & IGT_SPIN_POLL_RUN) {
>> +       /*
>> +        * For gen9+ we use a conditional loop rather than an
>> +        * infinite loop, because we are unable to modify the
>> +        * BB's once they have been scanned by the cmdparser
>> +        * We share the poll buffer for the conditional test
>> +        * and is always the first buffer in the batch list
>> +        */
>> +
>> +       if (gen >= 9 || (opts->flags & IGT_SPIN_POLL_RUN)) {
>
> Nah, you could just sample the batch buffer rather than always adding
> the poll buffer (since the cmdparser implicitly creates the second buffer
> for you). Using the comparison value of MI_BATCH_BUFFER_END you wouldn't
> even have to worry about altering callers.

That is a neat trick! bb start is greater than bb end so
we have that opportunity. Might lead also to much less fixing
on mutables.

-Mika
diff mbox series

Patch

diff --git a/lib/i830_reg.h b/lib/i830_reg.h
index a57691c7..41020256 100644
--- a/lib/i830_reg.h
+++ b/lib/i830_reg.h
@@ -43,7 +43,7 @@ 
 /* broadwater flush bits */
 #define BRW_MI_GLOBAL_SNAPSHOT_RESET   (1 << 3)
 
-#define MI_COND_BATCH_BUFFER_END	(0x36<<23 | 1)
+#define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
 #define MI_DO_COMPARE			(1<<21)
 
 #define MI_BATCH_BUFFER_END	(0xA << 23)
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
index c079bd04..a88c8582 100644
--- a/lib/igt_dummyload.c
+++ b/lib/igt_dummyload.c
@@ -130,7 +130,15 @@  emit_recursive_batch(igt_spin_t *spin,
 	spin->poll_handle = poll->handle;
 	execbuf->buffer_count++;
 
-	if (opts->flags & IGT_SPIN_POLL_RUN) {
+	/*
+	 * For gen9+ we use a conditional loop rather than an
+	 * infinite loop, because we are unable to modify the
+	 * BB's once they have been scanned by the cmdparser
+	 * We share the poll buffer for the conditional test
+	 * and is always the first buffer in the batch list
+	 */
+
+	if (gen >= 9 || (opts->flags & IGT_SPIN_POLL_RUN)) {
 		r = &relocs[batch->relocation_count++];
 
 		if (gen == 4 || gen == 5) {
@@ -150,6 +158,13 @@  emit_recursive_batch(igt_spin_t *spin,
 
 		igt_assert_eq(spin->poll[SPIN_POLL_START_IDX], 0);
 
+		/*
+		 * 2nd word used to control conditional end, and is cleared
+		 * to terminate batch. Must be >=1 to spin
+		 */
+		spin->poll[SPIN_POLL_END_IDX] = 1;
+		__sync_synchronize();
+
 		/* batch is first */
 		r->presumed_offset = BATCH_SIZE;
 		r->target_handle = poll->handle;
@@ -208,7 +223,26 @@  emit_recursive_batch(igt_spin_t *spin,
 	 * trouble. See https://bugs.freedesktop.org/show_bug.cgi?id=102262
 	 */
 	if (!(opts->flags & IGT_SPIN_FAST))
-		cs += 1000;
+		cs += 960;
+
+	igt_assert_eq(spin->using_cond_end, 0);
+	if (gen >= 9) {
+		r = &relocs[batch->relocation_count++];
+
+		/* batch is first, so poll is second */
+		r->presumed_offset = BATCH_SIZE;
+		r->target_handle = poll->handle;
+		r->offset = (cs + 2 - batch_start) * sizeof(*cs);
+		r->read_domains = I915_GEM_DOMAIN_COMMAND;
+		r->delta = SPIN_POLL_END_IDX * sizeof(*cs);
+
+		*cs++ = MI_COND_BATCH_BUFFER_END | 1 << 21 | 2;
+		*cs++ = 0; /* Reference value */
+		*cs++ = r->presumed_offset + r->delta;
+		*cs++ = 0;
+
+		spin->using_cond_end = 1;
+	}
 
 	/* recurse */
 	r = &relocs[batch->relocation_count++];
@@ -401,6 +435,9 @@  void igt_spin_set_timeout(igt_spin_t *spin, int64_t ns)
  */
 void igt_spin_reset(igt_spin_t *spin)
 {
+	if (spin->using_cond_end)
+		spin->poll[SPIN_POLL_END_IDX] = 1;
+
 	if (igt_spin_has_poll(spin))
 		spin->poll[SPIN_POLL_START_IDX] = 0;
 
@@ -419,7 +456,12 @@  void igt_spin_end(igt_spin_t *spin)
 	if (!spin)
 		return;
 
+	if (spin->using_cond_end) {
+		spin->poll[SPIN_POLL_END_IDX] = 0;
+	} else {
 		*spin->condition = MI_BATCH_BUFFER_END;
+	}
+
 	__sync_synchronize();
 }
 
diff --git a/lib/igt_dummyload.h b/lib/igt_dummyload.h
index de5781d7..e9310b4e 100644
--- a/lib/igt_dummyload.h
+++ b/lib/igt_dummyload.h
@@ -51,6 +51,9 @@  typedef struct igt_spin {
 	uint32_t poll_handle;
 	uint32_t *poll;
 #define SPIN_POLL_START_IDX 0
+#define SPIN_POLL_END_IDX 1
+
+	bool using_cond_end;
 } igt_spin_t;
 
 struct igt_spin_factory {
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index 069440cb..10ca760a 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -2593,6 +2593,9 @@  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_BATCH_BUFFER		((0x30 << 23) | 1)
 #define MI_BATCH_BUFFER_START	(0x31 << 23)
 #define MI_BATCH_BUFFER_END	(0xA << 23)
+#define MI_COND_BATCH_BUFFER_END	(0x36 << 23)
+#define MI_DO_COMPARE                   (1<<21)
+
 #define MI_BATCH_NON_SECURE		(1)
 #define MI_BATCH_NON_SECURE_I965	(1 << 8)
 #define MI_BATCH_NON_SECURE_HSW		(1<<13) /* Additional bit for RCS */
diff --git a/tests/i915/gem_double_irq_loop.c b/tests/i915/gem_double_irq_loop.c
index b326fc58..f17f61c1 100644
--- a/tests/i915/gem_double_irq_loop.c
+++ b/tests/i915/gem_double_irq_loop.c
@@ -52,8 +52,6 @@  static drm_intel_bo *target_buffer, *blt_bo;
 IGT_TEST_DESCRIPTION("Basic check for missed IRQs on blt ring.");
 
 
-#define MI_COND_BATCH_BUFFER_END	(0x36<<23 | 1)
-#define MI_DO_COMPARE			(1<<21)
 static void
 dummy_reloc_loop(void)
 {
diff --git a/tests/i915/gem_write_read_ring_switch.c b/tests/i915/gem_write_read_ring_switch.c
index ef229cc5..095c13c3 100644
--- a/tests/i915/gem_write_read_ring_switch.c
+++ b/tests/i915/gem_write_read_ring_switch.c
@@ -115,7 +115,7 @@  static void run_test(int ring)
 	 * otherwise the obj->last_write_seqno will be updated. */
 	if (ring == I915_EXEC_RENDER) {
 		BEGIN_BATCH(4, 1);
-		OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
+		OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | 1);
 		OUT_BATCH(0xffffffff); /* compare dword */
 		OUT_RELOC(target_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 		OUT_BATCH(MI_NOOP);