[i-g-t] tests/i915/gem_ctx_switch: Update with engine discovery
diff mbox series

Message ID 20190627102019.9019-1-tvrtko.ursulin@linux.intel.com
State New
Headers show
Series
  • [i-g-t] tests/i915/gem_ctx_switch: Update with engine discovery
Related show

Commit Message

Tvrtko Ursulin June 27, 2019, 10:20 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

I converted this one as an example, or at least to drive the discussion,
how more complex tests can be converted.

I have kept the legacy execbuf abi testing prefixed with "legacy-".

New tests were added to exercise physical engines via engine discovery and
also "all" tests have been updated in the same way.

To keep things simpler and avoid having to create separate contexts legacy
tests have to be first since the __for_each_physical_engine iterator would
otherwise configure the default context and confuse the test.

So legacy tests run on the unconfigured (with engine map) context and use
a new helper gem_eb_flags_to_engine to look up the engine from the
intel_execution_engines2 static list. This is only to enable the core test
code to be shared.

Places where new contexts are created had to be updated to either equally
configure the contexts or not. Another new helper,
gem_context_has_engine_map was added to enable this.

Also beware of drive-by formatting changes.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ramalingam C <ramalingam.c@intel.com>
Cc: Andi Shyti <andi.shyti@intel.com>
---
 lib/i915/gem_engine_topology.c        |  27 +++++
 lib/i915/gem_engine_topology.h        |   5 +
 tests/i915/gem_ctx_switch.c           | 138 ++++++++++++++++++--------
 tests/intel-ci/blacklist.txt          |   2 +-
 tests/intel-ci/fast-feedback.testlist |   3 +-
 5 files changed, 132 insertions(+), 43 deletions(-)

Comments

Chris Wilson June 27, 2019, 10:35 a.m. UTC | #1
Quoting Tvrtko Ursulin (2019-06-27 11:20:19)
> diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
> index 647911d4c42e..de39748344b8 100644
> --- a/tests/i915/gem_ctx_switch.c
> +++ b/tests/i915/gem_ctx_switch.c
> @@ -55,7 +55,7 @@ static double elapsed(const struct timespec *start, const struct timespec *end)
>  
>  static int measure_qlen(int fd,
>                         struct drm_i915_gem_execbuffer2 *execbuf,
> -                       unsigned int *engine, unsigned int nengine,
> +                       const struct intel_engine_data *engines,
>                         int timeout)
>  {
>         const struct drm_i915_gem_exec_object2 * const obj =
> @@ -63,15 +63,17 @@ static int measure_qlen(int fd,
>         uint32_t ctx[64];
>         int min = INT_MAX, max = 0;
>  
> -       for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> +       for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
>                 ctx[i] = gem_context_create(fd);
> +               gem_context_set_all_engines(fd, ctx[i]);
> +       }
>  
> -       for (unsigned int n = 0; n < nengine; n++) {
> +       for (unsigned int n = 0; n < engines->nengines; n++) {
>                 uint64_t saved = execbuf->flags;
>                 struct timespec tv = {};
>                 int q;
>  
> -               execbuf->flags |= engine[n];
> +               execbuf->flags |= engines->engines[n].flags;
>  
>                 for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
>                         execbuf->rsvd1 = ctx[i];
> @@ -90,7 +92,8 @@ static int measure_qlen(int fd,
>                  * Be conservative and aim not to overshoot timeout, so scale
>                  * down by 8 for hopefully a max of 12.5% error.
>                  */
> -               q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 + 1;
> +               q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) /
> +                   8 + 1;
>                 if (q < min)
>                         min = q;
>                 if (q > max)
> @@ -107,7 +110,7 @@ static int measure_qlen(int fd,
>  }
>  
>  static void single(int fd, uint32_t handle,
> -                  const struct intel_execution_engine *e,
> +                  const struct intel_execution_engine2 *e2,
>                    unsigned flags,
>                    const int ncpus,
>                    int timeout)
> @@ -125,13 +128,14 @@ static void single(int fd, uint32_t handle,
>         shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
>         igt_assert(shared != MAP_FAILED);
>  
> -       gem_require_ring(fd, e->exec_id | e->flags);
> -
>         for (n = 0; n < 64; n++) {
>                 if (flags & QUEUE)
>                         contexts[n] = gem_queue_create(fd);
>                 else
>                         contexts[n] = gem_context_create(fd);
> +
> +               if (gem_context_has_engine_map(fd, 0))
> +                       gem_context_set_all_engines(fd, contexts[n]);

That was a moment of doubt on how well setting engines on a "queue"
worked :)

>         }
>  
>         memset(&obj, 0, sizeof(obj));
> @@ -152,12 +156,12 @@ static void single(int fd, uint32_t handle,
>         execbuf.buffers_ptr = to_user_pointer(&obj);
>         execbuf.buffer_count = 1;
>         execbuf.rsvd1 = contexts[0];
> -       execbuf.flags = e->exec_id | e->flags;
> +       execbuf.flags = e2->flags;
>         execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
>         execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
>         igt_require(__gem_execbuf(fd, &execbuf) == 0);
>         if (__gem_execbuf(fd, &execbuf)) {
> -               execbuf.flags = e->exec_id | e->flags;
> +               execbuf.flags = e2->flags;
>                 reloc.target_handle = obj.handle;
>                 gem_execbuf(fd, &execbuf);
>         }
> @@ -190,7 +194,8 @@ static void single(int fd, uint32_t handle,
>                 clock_gettime(CLOCK_MONOTONIC, &now);
>  
>                 igt_info("[%d] %s: %'u cycles: %.3fus%s\n",
> -                        child, e->name, count, elapsed(&start, &now)*1e6 / count,
> +                        child, e2->name, count,
> +                        elapsed(&start, &now) * 1e6 / count,
>                          flags & INTERRUPTIBLE ? " (interruptible)" : "");
>  
>                 shared[child].elapsed = elapsed(&start, &now);
> @@ -209,7 +214,7 @@ static void single(int fd, uint32_t handle,
>                 }
>  
>                 igt_info("Total %s: %'lu cycles: %.3fus%s\n",
> -                        e->name, total, max*1e6 / total,
> +                        e2->name, total, max*1e6 / total,
>                          flags & INTERRUPTIBLE ? " (interruptible)" : "");
>         }
>  
> @@ -223,25 +228,20 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>  {
>         struct drm_i915_gem_execbuffer2 execbuf;
>         struct drm_i915_gem_exec_object2 obj[2];
> -       unsigned int engine[16], e;
> -       const char *name[16];
> +       struct intel_engine_data engines = { };
>         uint32_t contexts[65];
> -       unsigned int nengine;
>         int n, qlen;
>  
> -       nengine = 0;
> -       for_each_physical_engine(fd, e) {
> -               engine[nengine] = e;
> -               name[nengine] = e__->name;
> -               nengine++;
> -       }
> -       igt_require(nengine);
> +       engines = intel_init_engine_list(fd, 0);
> +       igt_require(engines.nengines);
>  
>         for (n = 0; n < ARRAY_SIZE(contexts); n++) {
>                 if (flags & QUEUE)
>                         contexts[n] = gem_queue_create(fd);
>                 else
>                         contexts[n] = gem_context_create(fd);
> +
> +               gem_context_set_all_engines(fd, contexts[n]);
>         }
>  
>         memset(obj, 0, sizeof(obj));
> @@ -256,7 +256,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>         igt_require(__gem_execbuf(fd, &execbuf) == 0);
>         gem_sync(fd, handle);
>  
> -       qlen = measure_qlen(fd, &execbuf, engine, nengine, timeout);
> +       qlen = measure_qlen(fd, &execbuf, &engines, timeout);
>         igt_info("Using timing depth of %d batches\n", qlen);
>  
>         execbuf.buffers_ptr = to_user_pointer(obj);
> @@ -264,13 +264,15 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>  
>         for (int pot = 2; pot <= 64; pot *= 2) {
>                 for (int nctx = pot - 1; nctx <= pot + 1; nctx++) {
> -                       igt_fork(child, nengine) {
> +                       igt_fork(child, engines.nengines) {
>                                 struct timespec start, now;
>                                 unsigned int count = 0;
>  
>                                 obj[0].handle = gem_create(fd, 4096);
> -                               execbuf.flags |= engine[child];
> -                               for (int loop = 0; loop < ARRAY_SIZE(contexts); loop++) {
> +                               execbuf.flags |= engines.engines[child].flags;
> +                               for (int loop = 0;
> +                                    loop < ARRAY_SIZE(contexts);
> +                                    loop++) {
>                                         execbuf.rsvd1 = contexts[loop];
>                                         gem_execbuf(fd, &execbuf);
>                                 }
> @@ -279,7 +281,8 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>                                 clock_gettime(CLOCK_MONOTONIC, &start);
>                                 do {
>                                         for (int loop = 0; loop < qlen; loop++) {
> -                                               execbuf.rsvd1 = contexts[loop % nctx];
> +                                               execbuf.rsvd1 =
> +                                                       contexts[loop % nctx];
>                                                 gem_execbuf(fd, &execbuf);
>                                         }
>                                         count += qlen;
> @@ -291,8 +294,11 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>                                 gem_close(fd, obj[0].handle);
>  
>                                 igt_info("[%d:%d] %s: %'u cycles: %.3fus%s (elapsed: %.3fs)\n",
> -                                        nctx, child, name[child], count, elapsed(&start, &now)*1e6 / count,
> -                                        flags & INTERRUPTIBLE ? " (interruptible)" : "",
> +                                        nctx, child,
> +                                        engines.engines[child].name, count,
> +                                        elapsed(&start, &now) * 1e6 / count,
> +                                        flags & INTERRUPTIBLE ?
> +                                        " (interruptible)" : "",
>                                          elapsed(&start, &now));
>                         }
>                         igt_waitchildren();
> @@ -306,6 +312,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>  igt_main
>  {
>         const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
> +       const struct intel_execution_engine2 *e2;
>         const struct intel_execution_engine *e;
>         static const struct {
>                 const char *name;
> @@ -338,7 +345,49 @@ igt_main
>                 igt_fork_hang_detector(fd);
>         }
>  
> +       /* Legacy testing must be first. */
>         for (e = intel_execution_engines; e->name; e++) {
> +               e2 = gem_eb_flags_to_engine(e->exec_id | e->flags);
> +               if (!e2)
> +                       continue; /* I915_EXEC_BSD with no ring selectors */
> +
> +               for (typeof(*phases) *p = phases; p->name; p++) {
> +                       igt_subtest_group {
> +                               igt_fixture {
> +                                       gem_require_ring(fd, e2->flags);
> +                                       if (p->require)
> +                                               igt_require(p->require(fd));
> +                               }
> +
> +                               igt_subtest_f("legacy-%s%s%s",
> +                                             e->name,
> +                                             *p->name ? "-" : "",
> +                                             p->name)
> +                                       single(fd, light, e2, p->flags, 1, 5);
> +
> +                               igt_skip_on_simulation();
> +
> +                               igt_subtest_f("legacy-%s%s-heavy%s",
> +                                             e->exec_id == 0 ? "basic-" : "",
> +                                             e->name,
> +                                             p->name)
> +                                       single(fd, heavy, e2, p->flags, 1, 5);
> +                               igt_subtest_f("legacy-forked-%s%s",
> +                                             e->name,
> +                                             p->name)
> +                                       single(fd, light, e2, p->flags, ncpus,
> +                                              150);
> +                               igt_subtest_f("legacy-forked-%s-heavy%s",
> +                                             e->name,
> +                                             p->name)
> +                                       single(fd, heavy, e2, p->flags, ncpus,
> +                                              150);
> +                       }
> +               }
> +       }
> +
> +       /* Must come after legacy subtests. */
> +       __for_each_physical_engine(fd, e2) {
>                 for (typeof(*phases) *p = phases; p->name; p++) {
>                         igt_subtest_group {
>                                 igt_fixture {
> @@ -346,33 +395,40 @@ igt_main
>                                                 igt_require(p->require(fd));
>                                 }
>  
> -                               igt_subtest_f("%s%s%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
> -                                       single(fd, light, e, p->flags, 1, 5);
> +                               igt_subtest_f("%s%s%s",
> +                                             e2->name,
> +                                             *p->name ? "-" : "",
> +                                             p->name)
> +                                       single(fd, light, e2, p->flags, 1, 5);
>  
>                                 igt_skip_on_simulation();
>  
> -                               igt_subtest_f("%s%s-heavy%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
> -                                       single(fd, heavy, e, p->flags, 1, 5);
> -                               igt_subtest_f("forked-%s%s", e->name, p->name)
> -                                       single(fd, light, e, p->flags, ncpus, 150);
> -                               igt_subtest_f("forked-%s-heavy%s", e->name, p->name)
> -                                       single(fd, heavy, e, p->flags, ncpus, 150);
> +                               igt_subtest_f("%s-heavy%s", e2->name, p->name)
> +                                       single(fd, heavy, e2, p->flags, 1, 5);
> +                               igt_subtest_f("%s-forked%s", e2->name, p->name)
> +                                       single(fd, light, e2, p->flags, ncpus,
> +                                              150);
> +                               igt_subtest_f("%s-forked-heavy%s",
> +                                             e2->name,
> +                                             p->name)
> +                                       single(fd, heavy, e2, p->flags, ncpus,
> +                                              150);
>                         }
>                 }
>         }

Ok, that looks like what I would expect. For crux tests, we iterate over
both the legacy ring selector, and along the engine[] indices.

> -       igt_subtest("basic-all-light")
> +       igt_subtest("all-light")
>                 all(fd, light, 0, 5);
> -       igt_subtest("basic-all-heavy")
> +       igt_subtest("all-heavy")
>                 all(fd, heavy, 0, 5);

And for "all" tests where we are just trying to utilise all engines at
once, we only care about the underlying HW utilisation and so the new
query interface works best.
-Chris
Tvrtko Ursulin June 27, 2019, 10:55 a.m. UTC | #2
On 27/06/2019 11:35, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-06-27 11:20:19)
>> diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
>> index 647911d4c42e..de39748344b8 100644
>> --- a/tests/i915/gem_ctx_switch.c
>> +++ b/tests/i915/gem_ctx_switch.c
>> @@ -55,7 +55,7 @@ static double elapsed(const struct timespec *start, const struct timespec *end)
>>   
>>   static int measure_qlen(int fd,
>>                          struct drm_i915_gem_execbuffer2 *execbuf,
>> -                       unsigned int *engine, unsigned int nengine,
>> +                       const struct intel_engine_data *engines,
>>                          int timeout)
>>   {
>>          const struct drm_i915_gem_exec_object2 * const obj =
>> @@ -63,15 +63,17 @@ static int measure_qlen(int fd,
>>          uint32_t ctx[64];
>>          int min = INT_MAX, max = 0;
>>   
>> -       for (int i = 0; i < ARRAY_SIZE(ctx); i++)
>> +       for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
>>                  ctx[i] = gem_context_create(fd);
>> +               gem_context_set_all_engines(fd, ctx[i]);
>> +       }
>>   
>> -       for (unsigned int n = 0; n < nengine; n++) {
>> +       for (unsigned int n = 0; n < engines->nengines; n++) {
>>                  uint64_t saved = execbuf->flags;
>>                  struct timespec tv = {};
>>                  int q;
>>   
>> -               execbuf->flags |= engine[n];
>> +               execbuf->flags |= engines->engines[n].flags;
>>   
>>                  for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
>>                          execbuf->rsvd1 = ctx[i];
>> @@ -90,7 +92,8 @@ static int measure_qlen(int fd,
>>                   * Be conservative and aim not to overshoot timeout, so scale
>>                   * down by 8 for hopefully a max of 12.5% error.
>>                   */
>> -               q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 + 1;
>> +               q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) /
>> +                   8 + 1;
>>                  if (q < min)
>>                          min = q;
>>                  if (q > max)
>> @@ -107,7 +110,7 @@ static int measure_qlen(int fd,
>>   }
>>   
>>   static void single(int fd, uint32_t handle,
>> -                  const struct intel_execution_engine *e,
>> +                  const struct intel_execution_engine2 *e2,
>>                     unsigned flags,
>>                     const int ncpus,
>>                     int timeout)
>> @@ -125,13 +128,14 @@ static void single(int fd, uint32_t handle,
>>          shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
>>          igt_assert(shared != MAP_FAILED);
>>   
>> -       gem_require_ring(fd, e->exec_id | e->flags);
>> -
>>          for (n = 0; n < 64; n++) {
>>                  if (flags & QUEUE)
>>                          contexts[n] = gem_queue_create(fd);
>>                  else
>>                          contexts[n] = gem_context_create(fd);
>> +
>> +               if (gem_context_has_engine_map(fd, 0))
>> +                       gem_context_set_all_engines(fd, contexts[n]);
> 
> That was a moment of doubt on how well setting engines on a "queue"
> worked :)

Why it wouldn't work?

> 
>>          }
>>   
>>          memset(&obj, 0, sizeof(obj));
>> @@ -152,12 +156,12 @@ static void single(int fd, uint32_t handle,
>>          execbuf.buffers_ptr = to_user_pointer(&obj);
>>          execbuf.buffer_count = 1;
>>          execbuf.rsvd1 = contexts[0];
>> -       execbuf.flags = e->exec_id | e->flags;
>> +       execbuf.flags = e2->flags;
>>          execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
>>          execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
>>          igt_require(__gem_execbuf(fd, &execbuf) == 0);
>>          if (__gem_execbuf(fd, &execbuf)) {
>> -               execbuf.flags = e->exec_id | e->flags;
>> +               execbuf.flags = e2->flags;
>>                  reloc.target_handle = obj.handle;
>>                  gem_execbuf(fd, &execbuf);
>>          }
>> @@ -190,7 +194,8 @@ static void single(int fd, uint32_t handle,
>>                  clock_gettime(CLOCK_MONOTONIC, &now);
>>   
>>                  igt_info("[%d] %s: %'u cycles: %.3fus%s\n",
>> -                        child, e->name, count, elapsed(&start, &now)*1e6 / count,
>> +                        child, e2->name, count,
>> +                        elapsed(&start, &now) * 1e6 / count,
>>                           flags & INTERRUPTIBLE ? " (interruptible)" : "");
>>   
>>                  shared[child].elapsed = elapsed(&start, &now);
>> @@ -209,7 +214,7 @@ static void single(int fd, uint32_t handle,
>>                  }
>>   
>>                  igt_info("Total %s: %'lu cycles: %.3fus%s\n",
>> -                        e->name, total, max*1e6 / total,
>> +                        e2->name, total, max*1e6 / total,
>>                           flags & INTERRUPTIBLE ? " (interruptible)" : "");
>>          }
>>   
>> @@ -223,25 +228,20 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>   {
>>          struct drm_i915_gem_execbuffer2 execbuf;
>>          struct drm_i915_gem_exec_object2 obj[2];
>> -       unsigned int engine[16], e;
>> -       const char *name[16];
>> +       struct intel_engine_data engines = { };
>>          uint32_t contexts[65];
>> -       unsigned int nengine;
>>          int n, qlen;
>>   
>> -       nengine = 0;
>> -       for_each_physical_engine(fd, e) {
>> -               engine[nengine] = e;
>> -               name[nengine] = e__->name;
>> -               nengine++;
>> -       }
>> -       igt_require(nengine);
>> +       engines = intel_init_engine_list(fd, 0);
>> +       igt_require(engines.nengines);
>>   
>>          for (n = 0; n < ARRAY_SIZE(contexts); n++) {
>>                  if (flags & QUEUE)
>>                          contexts[n] = gem_queue_create(fd);
>>                  else
>>                          contexts[n] = gem_context_create(fd);
>> +
>> +               gem_context_set_all_engines(fd, contexts[n]);
>>          }
>>   
>>          memset(obj, 0, sizeof(obj));
>> @@ -256,7 +256,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>          igt_require(__gem_execbuf(fd, &execbuf) == 0);
>>          gem_sync(fd, handle);
>>   
>> -       qlen = measure_qlen(fd, &execbuf, engine, nengine, timeout);
>> +       qlen = measure_qlen(fd, &execbuf, &engines, timeout);
>>          igt_info("Using timing depth of %d batches\n", qlen);
>>   
>>          execbuf.buffers_ptr = to_user_pointer(obj);
>> @@ -264,13 +264,15 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>   
>>          for (int pot = 2; pot <= 64; pot *= 2) {
>>                  for (int nctx = pot - 1; nctx <= pot + 1; nctx++) {
>> -                       igt_fork(child, nengine) {
>> +                       igt_fork(child, engines.nengines) {
>>                                  struct timespec start, now;
>>                                  unsigned int count = 0;
>>   
>>                                  obj[0].handle = gem_create(fd, 4096);
>> -                               execbuf.flags |= engine[child];
>> -                               for (int loop = 0; loop < ARRAY_SIZE(contexts); loop++) {
>> +                               execbuf.flags |= engines.engines[child].flags;
>> +                               for (int loop = 0;
>> +                                    loop < ARRAY_SIZE(contexts);
>> +                                    loop++) {
>>                                          execbuf.rsvd1 = contexts[loop];
>>                                          gem_execbuf(fd, &execbuf);
>>                                  }
>> @@ -279,7 +281,8 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>                                  clock_gettime(CLOCK_MONOTONIC, &start);
>>                                  do {
>>                                          for (int loop = 0; loop < qlen; loop++) {
>> -                                               execbuf.rsvd1 = contexts[loop % nctx];
>> +                                               execbuf.rsvd1 =
>> +                                                       contexts[loop % nctx];
>>                                                  gem_execbuf(fd, &execbuf);
>>                                          }
>>                                          count += qlen;
>> @@ -291,8 +294,11 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>                                  gem_close(fd, obj[0].handle);
>>   
>>                                  igt_info("[%d:%d] %s: %'u cycles: %.3fus%s (elapsed: %.3fs)\n",
>> -                                        nctx, child, name[child], count, elapsed(&start, &now)*1e6 / count,
>> -                                        flags & INTERRUPTIBLE ? " (interruptible)" : "",
>> +                                        nctx, child,
>> +                                        engines.engines[child].name, count,
>> +                                        elapsed(&start, &now) * 1e6 / count,
>> +                                        flags & INTERRUPTIBLE ?
>> +                                        " (interruptible)" : "",
>>                                           elapsed(&start, &now));
>>                          }
>>                          igt_waitchildren();
>> @@ -306,6 +312,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
>>   igt_main
>>   {
>>          const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>> +       const struct intel_execution_engine2 *e2;
>>          const struct intel_execution_engine *e;
>>          static const struct {
>>                  const char *name;
>> @@ -338,7 +345,49 @@ igt_main
>>                  igt_fork_hang_detector(fd);
>>          }
>>   
>> +       /* Legacy testing must be first. */
>>          for (e = intel_execution_engines; e->name; e++) {
>> +               e2 = gem_eb_flags_to_engine(e->exec_id | e->flags);
>> +               if (!e2)
>> +                       continue; /* I915_EXEC_BSD with no ring selectors */
>> +
>> +               for (typeof(*phases) *p = phases; p->name; p++) {
>> +                       igt_subtest_group {
>> +                               igt_fixture {
>> +                                       gem_require_ring(fd, e2->flags);
>> +                                       if (p->require)
>> +                                               igt_require(p->require(fd));
>> +                               }
>> +
>> +                               igt_subtest_f("legacy-%s%s%s",
>> +                                             e->name,
>> +                                             *p->name ? "-" : "",
>> +                                             p->name)
>> +                                       single(fd, light, e2, p->flags, 1, 5);
>> +
>> +                               igt_skip_on_simulation();
>> +
>> +                               igt_subtest_f("legacy-%s%s-heavy%s",
>> +                                             e->exec_id == 0 ? "basic-" : "",
>> +                                             e->name,
>> +                                             p->name)
>> +                                       single(fd, heavy, e2, p->flags, 1, 5);
>> +                               igt_subtest_f("legacy-forked-%s%s",
>> +                                             e->name,
>> +                                             p->name)
>> +                                       single(fd, light, e2, p->flags, ncpus,
>> +                                              150);
>> +                               igt_subtest_f("legacy-forked-%s-heavy%s",
>> +                                             e->name,
>> +                                             p->name)
>> +                                       single(fd, heavy, e2, p->flags, ncpus,
>> +                                              150);
>> +                       }
>> +               }
>> +       }
>> +
>> +       /* Must come after legacy subtests. */
>> +       __for_each_physical_engine(fd, e2) {
>>                  for (typeof(*phases) *p = phases; p->name; p++) {
>>                          igt_subtest_group {
>>                                  igt_fixture {
>> @@ -346,33 +395,40 @@ igt_main
>>                                                  igt_require(p->require(fd));
>>                                  }
>>   
>> -                               igt_subtest_f("%s%s%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
>> -                                       single(fd, light, e, p->flags, 1, 5);
>> +                               igt_subtest_f("%s%s%s",
>> +                                             e2->name,
>> +                                             *p->name ? "-" : "",
>> +                                             p->name)
>> +                                       single(fd, light, e2, p->flags, 1, 5);
>>   
>>                                  igt_skip_on_simulation();
>>   
>> -                               igt_subtest_f("%s%s-heavy%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
>> -                                       single(fd, heavy, e, p->flags, 1, 5);
>> -                               igt_subtest_f("forked-%s%s", e->name, p->name)
>> -                                       single(fd, light, e, p->flags, ncpus, 150);
>> -                               igt_subtest_f("forked-%s-heavy%s", e->name, p->name)
>> -                                       single(fd, heavy, e, p->flags, ncpus, 150);
>> +                               igt_subtest_f("%s-heavy%s", e2->name, p->name)
>> +                                       single(fd, heavy, e2, p->flags, 1, 5);
>> +                               igt_subtest_f("%s-forked%s", e2->name, p->name)
>> +                                       single(fd, light, e2, p->flags, ncpus,
>> +                                              150);
>> +                               igt_subtest_f("%s-forked-heavy%s",
>> +                                             e2->name,
>> +                                             p->name)
>> +                                       single(fd, heavy, e2, p->flags, ncpus,
>> +                                              150);
>>                          }
>>                  }
>>          }
> 
> Ok, that looks like what I would expect. For crux tests, we iterate over
> both the legacy ring selector, and along the engine[] indices.
> 
>> -       igt_subtest("basic-all-light")
>> +       igt_subtest("all-light")
>>                  all(fd, light, 0, 5);
>> -       igt_subtest("basic-all-heavy")
>> +       igt_subtest("all-heavy")
>>                  all(fd, heavy, 0, 5);
> 
> And for "all" tests where we are just trying to utilise all engines at
> once, we only care about the underlying HW utilisation and so the new
> query interface works best.

Cool, so no complaints?

Regards,

Tvrtko
Chris Wilson June 27, 2019, 11:05 a.m. UTC | #3
Quoting Tvrtko Ursulin (2019-06-27 11:55:56)
> 
> On 27/06/2019 11:35, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-06-27 11:20:19)
> >> @@ -125,13 +128,14 @@ static void single(int fd, uint32_t handle,
> >>          shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> >>          igt_assert(shared != MAP_FAILED);
> >>   
> >> -       gem_require_ring(fd, e->exec_id | e->flags);
> >> -
> >>          for (n = 0; n < 64; n++) {
> >>                  if (flags & QUEUE)
> >>                          contexts[n] = gem_queue_create(fd);
> >>                  else
> >>                          contexts[n] = gem_context_create(fd);
> >> +
> >> +               if (gem_context_has_engine_map(fd, 0))
> >> +                       gem_context_set_all_engines(fd, contexts[n]);
> > 
> > That was a moment of doubt on how well setting engines on a "queue"
> > worked :)
> 
> Why it wouldn't work?

Because I expected to have made a mistake :)

> > Ok, that looks like what I would expect. For crux tests, we iterate over
> > both the legacy ring selector, and along the engine[] indices.
> > 
> >> -       igt_subtest("basic-all-light")
> >> +       igt_subtest("all-light")
> >>                  all(fd, light, 0, 5);
> >> -       igt_subtest("basic-all-heavy")
> >> +       igt_subtest("all-heavy")
> >>                  all(fd, heavy, 0, 5);
> > 
> > And for "all" tests where we are just trying to utilise all engines at
> > once, we only care about the underlying HW utilisation and so the new
> > query interface works best.
> 
> Cool, so no complaints?

Strategy works for me. We can argue about which tests need both and
hopefully come up with a minimum set that gives good coverage of active
legacy ABI.
-Chris
Chris Wilson June 29, 2019, 2:11 p.m. UTC | #4
Quoting Tvrtko Ursulin (2019-06-27 11:20:19)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> I converted this one as an example, or at least to drive the discussion,
> how more complex tests can be converted.
> 
> I have kept the legacy execbuf abi testing prefixed with "legacy-".
> 
> New tests were added to exercise physical engines via engine discovery and
> also "all" tests have been updated in the same way.
> 
> To keep things simpler and avoid having to create separate contexts legacy
> tests have to be first since the __for_each_physical_engine iterator would
> otherwise configure the default context and confuse the test.
> 
> So legacy tests run on the unconfigured (with engine map) context and use
> a new helper gem_eb_flags_to_engine to look up the engine from the
> intel_execution_engines2 static list. This is only to enable the core test
> code to be shared.

Note that this misses the I915_EXEC_DEFAULT slot. That a rather
significant oversight.
-Chris
Chris Wilson July 2, 2019, 10:43 a.m. UTC | #5
Quoting Tvrtko Ursulin (2019-06-27 11:20:19)
> +bool gem_context_has_engine_map(int fd, uint32_t ctx)
> +{
> +       struct drm_i915_gem_context_param param = {
> +               .param = I915_CONTEXT_PARAM_ENGINES,
> +               .ctx_id = ctx
> +       };
> +       int ret;
> +
> +       ret = __gem_context_get_param(fd, &param);
> +       igt_assert_eq(ret, 0);

ret is -EINVAL if the parameter doesn't exist on older kernels, where
naturally we have no engine map!

bool result;

result = false;
if (__gem_context_get_param(fd, &param) == 0)
	result = param.size;

return result;

?
-Chris

Patch
diff mbox series

diff --git a/lib/i915/gem_engine_topology.c b/lib/i915/gem_engine_topology.c
index fdd1b951672b..6cfe3468e3d8 100644
--- a/lib/i915/gem_engine_topology.c
+++ b/lib/i915/gem_engine_topology.c
@@ -289,3 +289,30 @@  bool gem_has_engine_topology(int fd)
 
 	return !__gem_context_get_param(fd, &param);
 }
+
+const struct intel_execution_engine2 *
+gem_eb_flags_to_engine(unsigned int flags)
+{
+	const struct intel_execution_engine2 *e2;
+
+	__for_each_static_engine(e2) {
+		if (e2->flags == flags)
+			return e2;
+	}
+
+	return NULL;
+}
+
+bool gem_context_has_engine_map(int fd, uint32_t ctx)
+{
+	struct drm_i915_gem_context_param param = {
+		.param = I915_CONTEXT_PARAM_ENGINES,
+		.ctx_id = ctx
+	};
+	int ret;
+
+	ret = __gem_context_get_param(fd, &param);
+	igt_assert_eq(ret, 0);
+
+	return param.size;
+}
diff --git a/lib/i915/gem_engine_topology.h b/lib/i915/gem_engine_topology.h
index 2415fd1e379b..b175483fac1c 100644
--- a/lib/i915/gem_engine_topology.h
+++ b/lib/i915/gem_engine_topology.h
@@ -53,6 +53,11 @@  int gem_context_lookup_engine(int fd, uint64_t engine, uint32_t ctx_id,
 
 void gem_context_set_all_engines(int fd, uint32_t ctx);
 
+bool gem_context_has_engine_map(int fd, uint32_t ctx);
+
+const struct intel_execution_engine2 *
+gem_eb_flags_to_engine(unsigned int flags);
+
 #define __for_each_static_engine(e__) \
 	for ((e__) = intel_execution_engines2; (e__)->name; (e__)++)
 
diff --git a/tests/i915/gem_ctx_switch.c b/tests/i915/gem_ctx_switch.c
index 647911d4c42e..de39748344b8 100644
--- a/tests/i915/gem_ctx_switch.c
+++ b/tests/i915/gem_ctx_switch.c
@@ -55,7 +55,7 @@  static double elapsed(const struct timespec *start, const struct timespec *end)
 
 static int measure_qlen(int fd,
 			struct drm_i915_gem_execbuffer2 *execbuf,
-			unsigned int *engine, unsigned int nengine,
+			const struct intel_engine_data *engines,
 			int timeout)
 {
 	const struct drm_i915_gem_exec_object2 * const obj =
@@ -63,15 +63,17 @@  static int measure_qlen(int fd,
 	uint32_t ctx[64];
 	int min = INT_MAX, max = 0;
 
-	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
 		ctx[i] = gem_context_create(fd);
+		gem_context_set_all_engines(fd, ctx[i]);
+	}
 
-	for (unsigned int n = 0; n < nengine; n++) {
+	for (unsigned int n = 0; n < engines->nengines; n++) {
 		uint64_t saved = execbuf->flags;
 		struct timespec tv = {};
 		int q;
 
-		execbuf->flags |= engine[n];
+		execbuf->flags |= engines->engines[n].flags;
 
 		for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
 			execbuf->rsvd1 = ctx[i];
@@ -90,7 +92,8 @@  static int measure_qlen(int fd,
 		 * Be conservative and aim not to overshoot timeout, so scale
 		 * down by 8 for hopefully a max of 12.5% error.
 		 */
-		q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 + 1;
+		q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) /
+		    8 + 1;
 		if (q < min)
 			min = q;
 		if (q > max)
@@ -107,7 +110,7 @@  static int measure_qlen(int fd,
 }
 
 static void single(int fd, uint32_t handle,
-		   const struct intel_execution_engine *e,
+		   const struct intel_execution_engine2 *e2,
 		   unsigned flags,
 		   const int ncpus,
 		   int timeout)
@@ -125,13 +128,14 @@  static void single(int fd, uint32_t handle,
 	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
 	igt_assert(shared != MAP_FAILED);
 
-	gem_require_ring(fd, e->exec_id | e->flags);
-
 	for (n = 0; n < 64; n++) {
 		if (flags & QUEUE)
 			contexts[n] = gem_queue_create(fd);
 		else
 			contexts[n] = gem_context_create(fd);
+
+		if (gem_context_has_engine_map(fd, 0))
+			gem_context_set_all_engines(fd, contexts[n]);
 	}
 
 	memset(&obj, 0, sizeof(obj));
@@ -152,12 +156,12 @@  static void single(int fd, uint32_t handle,
 	execbuf.buffers_ptr = to_user_pointer(&obj);
 	execbuf.buffer_count = 1;
 	execbuf.rsvd1 = contexts[0];
-	execbuf.flags = e->exec_id | e->flags;
+	execbuf.flags = e2->flags;
 	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
 	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
 	igt_require(__gem_execbuf(fd, &execbuf) == 0);
 	if (__gem_execbuf(fd, &execbuf)) {
-		execbuf.flags = e->exec_id | e->flags;
+		execbuf.flags = e2->flags;
 		reloc.target_handle = obj.handle;
 		gem_execbuf(fd, &execbuf);
 	}
@@ -190,7 +194,8 @@  static void single(int fd, uint32_t handle,
 		clock_gettime(CLOCK_MONOTONIC, &now);
 
 		igt_info("[%d] %s: %'u cycles: %.3fus%s\n",
-			 child, e->name, count, elapsed(&start, &now)*1e6 / count,
+			 child, e2->name, count,
+			 elapsed(&start, &now) * 1e6 / count,
 			 flags & INTERRUPTIBLE ? " (interruptible)" : "");
 
 		shared[child].elapsed = elapsed(&start, &now);
@@ -209,7 +214,7 @@  static void single(int fd, uint32_t handle,
 		}
 
 		igt_info("Total %s: %'lu cycles: %.3fus%s\n",
-			 e->name, total, max*1e6 / total,
+			 e2->name, total, max*1e6 / total,
 			 flags & INTERRUPTIBLE ? " (interruptible)" : "");
 	}
 
@@ -223,25 +228,20 @@  static void all(int fd, uint32_t handle, unsigned flags, int timeout)
 {
 	struct drm_i915_gem_execbuffer2 execbuf;
 	struct drm_i915_gem_exec_object2 obj[2];
-	unsigned int engine[16], e;
-	const char *name[16];
+	struct intel_engine_data engines = { };
 	uint32_t contexts[65];
-	unsigned int nengine;
 	int n, qlen;
 
-	nengine = 0;
-	for_each_physical_engine(fd, e) {
-		engine[nengine] = e;
-		name[nengine] = e__->name;
-		nengine++;
-	}
-	igt_require(nengine);
+	engines = intel_init_engine_list(fd, 0);
+	igt_require(engines.nengines);
 
 	for (n = 0; n < ARRAY_SIZE(contexts); n++) {
 		if (flags & QUEUE)
 			contexts[n] = gem_queue_create(fd);
 		else
 			contexts[n] = gem_context_create(fd);
+
+		gem_context_set_all_engines(fd, contexts[n]);
 	}
 
 	memset(obj, 0, sizeof(obj));
@@ -256,7 +256,7 @@  static void all(int fd, uint32_t handle, unsigned flags, int timeout)
 	igt_require(__gem_execbuf(fd, &execbuf) == 0);
 	gem_sync(fd, handle);
 
-	qlen = measure_qlen(fd, &execbuf, engine, nengine, timeout);
+	qlen = measure_qlen(fd, &execbuf, &engines, timeout);
 	igt_info("Using timing depth of %d batches\n", qlen);
 
 	execbuf.buffers_ptr = to_user_pointer(obj);
@@ -264,13 +264,15 @@  static void all(int fd, uint32_t handle, unsigned flags, int timeout)
 
 	for (int pot = 2; pot <= 64; pot *= 2) {
 		for (int nctx = pot - 1; nctx <= pot + 1; nctx++) {
-			igt_fork(child, nengine) {
+			igt_fork(child, engines.nengines) {
 				struct timespec start, now;
 				unsigned int count = 0;
 
 				obj[0].handle = gem_create(fd, 4096);
-				execbuf.flags |= engine[child];
-				for (int loop = 0; loop < ARRAY_SIZE(contexts); loop++) {
+				execbuf.flags |= engines.engines[child].flags;
+				for (int loop = 0;
+				     loop < ARRAY_SIZE(contexts);
+				     loop++) {
 					execbuf.rsvd1 = contexts[loop];
 					gem_execbuf(fd, &execbuf);
 				}
@@ -279,7 +281,8 @@  static void all(int fd, uint32_t handle, unsigned flags, int timeout)
 				clock_gettime(CLOCK_MONOTONIC, &start);
 				do {
 					for (int loop = 0; loop < qlen; loop++) {
-						execbuf.rsvd1 = contexts[loop % nctx];
+						execbuf.rsvd1 =
+							contexts[loop % nctx];
 						gem_execbuf(fd, &execbuf);
 					}
 					count += qlen;
@@ -291,8 +294,11 @@  static void all(int fd, uint32_t handle, unsigned flags, int timeout)
 				gem_close(fd, obj[0].handle);
 
 				igt_info("[%d:%d] %s: %'u cycles: %.3fus%s (elapsed: %.3fs)\n",
-					 nctx, child, name[child], count, elapsed(&start, &now)*1e6 / count,
-					 flags & INTERRUPTIBLE ? " (interruptible)" : "",
+					 nctx, child,
+					 engines.engines[child].name, count,
+					 elapsed(&start, &now) * 1e6 / count,
+					 flags & INTERRUPTIBLE ?
+					 " (interruptible)" : "",
 					 elapsed(&start, &now));
 			}
 			igt_waitchildren();
@@ -306,6 +312,7 @@  static void all(int fd, uint32_t handle, unsigned flags, int timeout)
 igt_main
 {
 	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	const struct intel_execution_engine2 *e2;
 	const struct intel_execution_engine *e;
 	static const struct {
 		const char *name;
@@ -338,7 +345,49 @@  igt_main
 		igt_fork_hang_detector(fd);
 	}
 
+	/* Legacy testing must be first. */
 	for (e = intel_execution_engines; e->name; e++) {
+		e2 = gem_eb_flags_to_engine(e->exec_id | e->flags);
+		if (!e2)
+			continue; /* I915_EXEC_BSD with no ring selectors */
+
+		for (typeof(*phases) *p = phases; p->name; p++) {
+			igt_subtest_group {
+				igt_fixture {
+					gem_require_ring(fd, e2->flags);
+					if (p->require)
+						igt_require(p->require(fd));
+				}
+
+				igt_subtest_f("legacy-%s%s%s",
+					      e->name,
+					      *p->name ? "-" : "",
+					      p->name)
+					single(fd, light, e2, p->flags, 1, 5);
+
+				igt_skip_on_simulation();
+
+				igt_subtest_f("legacy-%s%s-heavy%s",
+					      e->exec_id == 0 ? "basic-" : "",
+					      e->name,
+					      p->name)
+					single(fd, heavy, e2, p->flags, 1, 5);
+				igt_subtest_f("legacy-forked-%s%s",
+					      e->name,
+					      p->name)
+					single(fd, light, e2, p->flags, ncpus,
+					       150);
+				igt_subtest_f("legacy-forked-%s-heavy%s",
+					      e->name,
+					      p->name)
+					single(fd, heavy, e2, p->flags, ncpus,
+					       150);
+			}
+		}
+	}
+
+	/* Must come after legacy subtests. */
+	__for_each_physical_engine(fd, e2) {
 		for (typeof(*phases) *p = phases; p->name; p++) {
 			igt_subtest_group {
 				igt_fixture {
@@ -346,33 +395,40 @@  igt_main
 						igt_require(p->require(fd));
 				}
 
-				igt_subtest_f("%s%s%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
-					single(fd, light, e, p->flags, 1, 5);
+				igt_subtest_f("%s%s%s",
+					      e2->name,
+					      *p->name ? "-" : "",
+					      p->name)
+					single(fd, light, e2, p->flags, 1, 5);
 
 				igt_skip_on_simulation();
 
-				igt_subtest_f("%s%s-heavy%s", e->exec_id == 0 ? "basic-" : "", e->name, p->name)
-					single(fd, heavy, e, p->flags, 1, 5);
-				igt_subtest_f("forked-%s%s", e->name, p->name)
-					single(fd, light, e, p->flags, ncpus, 150);
-				igt_subtest_f("forked-%s-heavy%s", e->name, p->name)
-					single(fd, heavy, e, p->flags, ncpus, 150);
+				igt_subtest_f("%s-heavy%s", e2->name, p->name)
+					single(fd, heavy, e2, p->flags, 1, 5);
+				igt_subtest_f("%s-forked%s", e2->name, p->name)
+					single(fd, light, e2, p->flags, ncpus,
+					       150);
+				igt_subtest_f("%s-forked-heavy%s",
+					      e2->name,
+					      p->name)
+					single(fd, heavy, e2, p->flags, ncpus,
+					       150);
 			}
 		}
 	}
 
-	igt_subtest("basic-all-light")
+	igt_subtest("all-light")
 		all(fd, light, 0, 5);
-	igt_subtest("basic-all-heavy")
+	igt_subtest("all-heavy")
 		all(fd, heavy, 0, 5);
 
 	igt_subtest_group {
 		igt_fixture {
 			igt_require(gem_has_queues(fd));
 		}
-		igt_subtest("basic-queue-light")
+		igt_subtest("queue-light")
 			all(fd, light, QUEUE, 5);
-		igt_subtest("basic-queue-heavy")
+		igt_subtest("queue-heavy")
 			all(fd, heavy, QUEUE, 5);
 	}
 
diff --git a/tests/intel-ci/blacklist.txt b/tests/intel-ci/blacklist.txt
index 2a5893ce1b37..8e3dafa9836b 100644
--- a/tests/intel-ci/blacklist.txt
+++ b/tests/intel-ci/blacklist.txt
@@ -25,7 +25,7 @@  igt@gem_cs_prefetch(@.*)?
 igt@gem_ctx_create@(?!.*basic).*
 igt@gem_ctx_exec@(?!.*basic).*
 igt@gem_ctx_shared@*exhaust*
-igt@gem_ctx_switch@(?!.*basic).*
+igt@gem_ctx_switch@.*(forked|interruptible).*
 igt@gem_ctx_thrash(@.*)?
 igt@gem_evict_alignment(@.*)?
 igt@gem_evict_everything(@.*)?
diff --git a/tests/intel-ci/fast-feedback.testlist b/tests/intel-ci/fast-feedback.testlist
index 58e6b5c5f882..cfe6102a39f7 100644
--- a/tests/intel-ci/fast-feedback.testlist
+++ b/tests/intel-ci/fast-feedback.testlist
@@ -14,7 +14,8 @@  igt@gem_ctx_create@basic-files
 igt@gem_ctx_exec@basic
 igt@gem_ctx_param@basic
 igt@gem_ctx_param@basic-default
-igt@gem_ctx_switch@basic-default
+igt@gem_ctx_switch@legacy-render
+igt@gem_ctx_switch@rcs0
 igt@gem_exec_basic@basic-all
 igt@gem_exec_create@basic
 igt@gem_exec_fence@basic-busy-default