Message ID | 20180606124907.13139-7-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Just a few suggestions below. Otherwise looks good to me. On 06/06/18 13:49, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Basic tests to cover engine queued/runnable/running metric as reported > by the DRM_I915_QUERY_ENGINE_QUEUES query. > > v2: > * Update ABI for i915 changes. > * Use igt_spin_busywait_until_running. > * Support no hardware contexts. > * More comments. (Lionel Landwerlin) > Chris Wilson: > * Check for sw sync support. > * Multiple contexts queued test. > * Simplify context and bb allocation. > * Fix asserts in the running subtest. > > v3: > * Update for uAPI change. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > tests/i915_query.c | 442 +++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 442 insertions(+) > > diff --git a/tests/i915_query.c b/tests/i915_query.c > index c7de8cbd8371..588428749c50 100644 > --- a/tests/i915_query.c > +++ b/tests/i915_query.c > @@ -22,6 +22,7 @@ > */ > > #include "igt.h" > +#include "sw_sync.h" > > #include <limits.h> > > @@ -477,8 +478,414 @@ test_query_topology_known_pci_ids(int fd, int devid) > free(topo_info); > } > > +#define DRM_I915_QUERY_ENGINE_QUEUES 2 > + > +struct drm_i915_query_engine_queues { > + /** Engine class as in enum drm_i915_gem_engine_class. */ > + __u16 class; > + > + /** Engine instance number. */ > + __u16 instance; > + > + /** Number of requests with unresolved fences and dependencies. */ > + __u32 queued; > + > + /** Number of ready requests waiting on a slot on GPU. */ > + __u32 runnable; > + > + /** Number of requests executing on the GPU. */ > + __u32 running; > + > + __u32 rsvd[6]; > +}; I suppose this above would be in the updated i915_drm.h. I thought you would have put it there as part of your first commit. > + > +static bool query_engine_queues_supported(int fd) > +{ > + struct drm_i915_query_item item = { > + .query_id = DRM_I915_QUERY_ENGINE_QUEUES, > + }; > + > + return __i915_query_items(fd, &item, 1) == 0 && item.length > 0; > +} > + > +static void engine_queues_invalid(int fd) > +{ > + struct drm_i915_query_engine_queues queues; > + struct drm_i915_query_item item; > + unsigned int len; > + unsigned int i; > + > + /* Flags is MBZ. */ > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + item.flags = 1; > + i915_query_items(fd, &item, 1); > + igt_assert_eq(item.length, -EINVAL); > + > + /* Length not zero and not greater or equal required size. */ > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + item.length = 1; > + i915_query_items(fd, &item, 1); > + igt_assert_eq(item.length, -EINVAL); > + > + /* Query correct length. */ > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + i915_query_items(fd, &item, 1); > + igt_assert(item.length >= 0); Could be item.length > 0? > + len = item.length; > + > + /* Ivalid pointer. */ > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + item.length = len; > + i915_query_items(fd, &item, 1); > + igt_assert_eq(item.length, -EFAULT); Maybe verify ULONG_MAX too? > + > + /* Reserved fields are MBZ. */ > + > + for (i = 0; i < ARRAY_SIZE(queues.rsvd); i++) { > + memset(&queues, 0, sizeof(queues)); > + queues.rsvd[i] = 1; > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + item.length = len; > + item.data_ptr = to_user_pointer(&queues); > + i915_query_items(fd, &item, 1); > + igt_assert_eq(item.length, -EINVAL); > + } > + > + memset(&queues, 0, sizeof(queues)); > + queues.class = -1; class = I915_ENGINE_CLASS_INVALID ? > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + item.length = len; > + item.data_ptr = to_user_pointer(&queues); > + i915_query_items(fd, &item, 1); > + igt_assert_eq(item.length, -ENOENT); > + > + memset(&queues, 0, sizeof(queues)); > + queues.instance = -1; > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + item.length = len; > + item.data_ptr = to_user_pointer(&queues); > + i915_query_items(fd, &item, 1); > + igt_assert_eq(item.length, -ENOENT); > +} > + > +static void engine_queues(int fd, const struct intel_execution_engine2 *e) > +{ > + struct drm_i915_query_engine_queues queues; > + struct drm_i915_query_item item; > + unsigned int len; > + > + /* Query required buffer length. */ > + memset(&queues, 0, sizeof(queues)); > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + item.data_ptr = to_user_pointer(&queues); > + i915_query_items(fd, &item, 1); > + igt_assert(item.length >= 0); item.length > 0? > + igt_assert(item.length <= sizeof(queues)); I guess we can expect item.length == sizeof(queues) here. > + len = item.length; > + > + /* Check length larger than required works and reports same length. */ > + memset(&queues, 0, sizeof(queues)); > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + item.data_ptr = to_user_pointer(&queues); > + item.length = len + 1; > + i915_query_items(fd, &item, 1); > + igt_assert_eq(item.length, len); > + > + /* Actual query. */ > + memset(&queues, 0, sizeof(queues)); > + queues.class = e->class; > + queues.instance = e->instance; > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + item.data_ptr = to_user_pointer(&queues); > + item.length = len; > + i915_query_items(fd, &item, 1); > + igt_assert_eq(item.length, len); > +} > + > +static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e) > +{ > + return gem_class_instance_to_eb_flags(gem_fd, e->class, e->instance); > +} > + > +static void > +__query_queues(int fd, const struct intel_execution_engine2 *e, > + struct drm_i915_query_engine_queues *queues) > +{ > + struct drm_i915_query_item item; > + > + memset(queues, 0, sizeof(*queues)); > + queues->class = e->class; > + queues->instance = e->instance; > + memset(&item, 0, sizeof(item)); > + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; > + item.data_ptr = to_user_pointer(queues); > + item.length = sizeof(*queues); > + i915_query_items(fd, &item, 1); > + igt_assert_eq(item.length, sizeof(*queues)); > +} > + > +#define TEST_CONTEXTS (1 << 0) > + > +/* > + * Test that the reported number of queued (not ready for execution due fences > + * or dependencies) requests on an engine is correct. > + */ > +static void > +engine_queued(int gem_fd, const struct intel_execution_engine2 *e, > + unsigned int flags) > +{ > + const unsigned long engine = e2ring(gem_fd, e); > + struct drm_i915_query_engine_queues queues; > + const uint32_t bbe = MI_BATCH_BUFFER_END; > + const unsigned int max_rq = 10; > + uint32_t queued[max_rq + 1]; > + unsigned int n, i; > + uint32_t bo; > + > + igt_require_sw_sync(); > + if (flags & TEST_CONTEXTS) > + gem_require_contexts(gem_fd); > + > + memset(queued, 0, sizeof(queued)); > + > + bo = gem_create(gem_fd, 4096); > + gem_write(gem_fd, bo, 4092, &bbe, sizeof(bbe)); > + > + /* Create a specific queue depth of unready requests. */ > + for (n = 0; n <= max_rq; n++) { > + int fence = -1; > + IGT_CORK_FENCE(cork); > + > + gem_quiescent_gpu(gem_fd); > + > + /* Create a cork so we can create a dependency chain. */ > + if (n) > + fence = igt_cork_plug(&cork, -1); > + > + /* Submit n unready requests depending on the cork. */ > + for (i = 0; i < n; i++) { > + struct drm_i915_gem_exec_object2 obj = { }; > + struct drm_i915_gem_execbuffer2 eb = { }; > + > + obj.handle = bo; > + > + eb.buffer_count = 1; > + eb.buffers_ptr = to_user_pointer(&obj); > + > + eb.flags = engine | I915_EXEC_FENCE_IN; > + > + /* > + * In context mode each submission is on a separate > + * context. > + */ > + if (flags & TEST_CONTEXTS) > + eb.rsvd1 = gem_context_create(gem_fd); > + > + eb.rsvd2 = fence; > + > + gem_execbuf(gem_fd, &eb); > + > + if (flags & TEST_CONTEXTS) > + gem_context_destroy(gem_fd, eb.rsvd1); > + } > + > + /* Store reported queue depth to assert against later. */ > + __query_queues(gem_fd, e, &queues); > + queued[n] = queues.queued; > + igt_info("n=%u queued=%u\n", n, queued[n]); > + > + /* Unplug the queue and proceed to the next queue depth. */ > + if (fence >= 0) > + igt_cork_unplug(&cork); > + > + gem_sync(gem_fd, bo); > + } > + > + gem_close(gem_fd, bo); > + > + for (i = 0; i <= max_rq; i++) > + igt_assert_eq(queued[i], i); > +} > + > +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags) > +{ > + if (gem_can_store_dword(fd, flags)) > + return __igt_spin_batch_new_poll(fd, ctx, flags); > + else > + return __igt_spin_batch_new(fd, ctx, flags, 0); > +} > + > +static unsigned long __spin_wait(igt_spin_t *spin, unsigned int n) > +{ > + struct timespec ts = { }; > + unsigned long t; > + > + igt_nsec_elapsed(&ts); > + > + if (spin->running) { > + igt_spin_busywait_until_running(spin); > + } else { > + igt_debug("__spin_wait - usleep mode\n"); > + usleep(500e3); /* Better than nothing! */ > + } > + > + t = igt_nsec_elapsed(&ts); > + > + return spin->running ? t : 500e6 / n; > +} > + > +/* > + * Test that the number of requests ready for execution but waiting on space on > + * GPU is correctly reported. > + */ > +static void > +engine_runnable(int gem_fd, const struct intel_execution_engine2 *e) > +{ > + const unsigned long engine = e2ring(gem_fd, e); > + struct drm_i915_query_engine_queues queues; > + bool contexts = gem_has_contexts(gem_fd); > + const unsigned int max_rq = 10; > + igt_spin_t *spin[max_rq + 1]; > + uint32_t runnable[max_rq + 1]; > + uint32_t ctx[max_rq]; > + unsigned int n, i; > + > + memset(runnable, 0, sizeof(runnable)); > + > + if (contexts) { > + for (i = 0; i < max_rq; i++) > + ctx[i] = gem_context_create(gem_fd); > + } > + > + /* > + * Submit different number of requests, potentially against different > + * contexts, in order to provoke engine runnable metric returning > + * different numbers. > + */ > + for (n = 0; n <= max_rq; n++) { > + gem_quiescent_gpu(gem_fd); > + > + for (i = 0; i < n; i++) { > + uint32_t ctx_ = contexts ? ctx[i] : 0; > + > + if (i == 0) > + spin[i] = __spin_poll(gem_fd, ctx_, engine); > + else > + spin[i] = __igt_spin_batch_new(gem_fd, ctx_, > + engine, 0); > + } > + > + if (n) > + usleep(__spin_wait(spin[0], n) * n); > + > + /* Query and store for later checking. */ > + __query_queues(gem_fd, e, &queues); > + runnable[n] = queues.runnable; > + igt_info("n=%u runnable=%u\n", n, runnable[n]); > + > + for (i = 0; i < n; i++) { > + igt_spin_batch_end(spin[i]); > + gem_sync(gem_fd, spin[i]->handle); > + igt_spin_batch_free(gem_fd, spin[i]); > + } > + } > + > + if (contexts) { > + for (i = 0; i < max_rq; i++) > + gem_context_destroy(gem_fd, ctx[i]); > + } > + > + /* > + * Check that the runnable metric is zero when nothing is submitted, > + * and that it is greater than zero on the maximum queue depth. > + * > + * We cannot assert the exact value since we do not know how many > + * requests can the submission backend consume. > + */ > + igt_assert_eq(runnable[0], 0); > + igt_assert(runnable[max_rq] > 0); > + > + /* > + * We can only test that the runnable metric is growing by one if we > + * have context support. > + */ > + if (contexts) > + igt_assert_eq(runnable[max_rq] - runnable[max_rq - 1], 1); > +} > + > +/* > + * Test that the number of requests currently executing on the GPU is correctly > + * reported. > + */ > +static void > +engine_running(int gem_fd, const struct intel_execution_engine2 *e) > +{ > + const unsigned long engine = e2ring(gem_fd, e); > + struct drm_i915_query_engine_queues queues; > + const unsigned int max_rq = 10; > + igt_spin_t *spin[max_rq + 1]; > + uint32_t running[max_rq + 1]; > + unsigned int n, i; > + > + memset(running, 0, sizeof(running)); > + memset(spin, 0, sizeof(spin)); > + > + /* > + * Create various queue depths of requests against the same context to > + * try and get submission backed execute one or more on the GPU. > + */ > + for (n = 0; n <= max_rq; n++) { > + gem_quiescent_gpu(gem_fd); > + > + for (i = 0; i < n; i++) { > + if (i == 0) > + spin[i] = __spin_poll(gem_fd, 0, engine); > + else > + spin[i] = __igt_spin_batch_new(gem_fd, 0, > + engine, 0); > + } > + > + if (n) > + usleep(__spin_wait(spin[0], n) * n); > + > + /* Query and store for later checking. */ > + __query_queues(gem_fd, e, &queues); > + running[n] = queues.running; > + igt_info("n=%u running=%u\n", n, running[n]); > + > + for (i = 0; i < n; i++) { > + igt_spin_batch_end(spin[i]); > + gem_sync(gem_fd, spin[i]->handle); > + igt_spin_batch_free(gem_fd, spin[i]); > + } > + } > + > + /* > + * Check that the running metric is zero when nothing is submitted, > + * one when one request is submitted, and at least one for any greater > + * queue depth. > + * > + * We cannot assert the exact value since we do not know how many > + * requests can the submission backend consume. > + */ > + igt_assert_eq(running[0], 0); > + for (i = 1; i <= max_rq; i++) > + igt_assert(running[i] > 0); > +} > + > igt_main > { > + const struct intel_execution_engine2 *e; > int fd = -1; > int devid; > > @@ -524,6 +931,41 @@ igt_main > test_query_topology_known_pci_ids(fd, devid); > } > > + igt_subtest_group { > + igt_fixture { > + igt_require(query_engine_queues_supported(fd)); > + } > + > + igt_subtest("engine-queues-invalid") > + engine_queues_invalid(fd); > + > + __for_each_engine_class_instance(fd, e) { > + igt_subtest_group { > + igt_fixture { > + gem_require_engine(fd, > + e->class, > + e->instance); > + } > + > + igt_subtest_f("engine-queues-%s", e->name) > + engine_queues(fd, e); > + > + igt_subtest_f("engine-queued-%s", e->name) > + engine_queued(fd, e, 0); > + > + igt_subtest_f("engine-queued-contexts-%s", > + e->name) > + engine_queued(fd, e, TEST_CONTEXTS); > + > + igt_subtest_f("engine-runnable-%s", e->name) > + engine_runnable(fd, e); > + > + igt_subtest_f("engine-running-%s", e->name) > + engine_running(fd, e); > + } > + } > + } > + > igt_fixture { > close(fd); > }
On 06/06/2018 15:33, Lionel Landwerlin wrote: > Just a few suggestions below. Otherwise looks good to me. > > On 06/06/18 13:49, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> Basic tests to cover engine queued/runnable/running metric as reported >> by the DRM_I915_QUERY_ENGINE_QUEUES query. >> >> v2: >> * Update ABI for i915 changes. >> * Use igt_spin_busywait_until_running. >> * Support no hardware contexts. >> * More comments. (Lionel Landwerlin) >> Chris Wilson: >> * Check for sw sync support. >> * Multiple contexts queued test. >> * Simplify context and bb allocation. >> * Fix asserts in the running subtest. >> >> v3: >> * Update for uAPI change. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> --- >> tests/i915_query.c | 442 +++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 442 insertions(+) >> >> diff --git a/tests/i915_query.c b/tests/i915_query.c >> index c7de8cbd8371..588428749c50 100644 >> --- a/tests/i915_query.c >> +++ b/tests/i915_query.c >> @@ -22,6 +22,7 @@ >> */ >> #include "igt.h" >> +#include "sw_sync.h" >> #include <limits.h> >> @@ -477,8 +478,414 @@ test_query_topology_known_pci_ids(int fd, int >> devid) >> free(topo_info); >> } >> +#define DRM_I915_QUERY_ENGINE_QUEUES 2 >> + >> +struct drm_i915_query_engine_queues { >> + /** Engine class as in enum drm_i915_gem_engine_class. */ >> + __u16 class; >> + >> + /** Engine instance number. */ >> + __u16 instance; >> + >> + /** Number of requests with unresolved fences and dependencies. */ >> + __u32 queued; >> + >> + /** Number of ready requests waiting on a slot on GPU. */ >> + __u32 runnable; >> + >> + /** Number of requests executing on the GPU. */ >> + __u32 running; >> + >> + __u32 rsvd[6]; >> +}; > > I suppose this above would be in the updated i915_drm.h. > I thought you would have put it there as part of your first commit. Yes, or I should prefix the copy with local_ as we usually do. >> + >> +static bool query_engine_queues_supported(int fd) >> +{ >> + struct drm_i915_query_item item = { >> + .query_id = DRM_I915_QUERY_ENGINE_QUEUES, >> + }; >> + >> + return __i915_query_items(fd, &item, 1) == 0 && item.length > 0; >> +} >> + >> +static void engine_queues_invalid(int fd) >> +{ >> + struct drm_i915_query_engine_queues queues; >> + struct drm_i915_query_item item; >> + unsigned int len; >> + unsigned int i; >> + >> + /* Flags is MBZ. */ >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + item.flags = 1; >> + i915_query_items(fd, &item, 1); >> + igt_assert_eq(item.length, -EINVAL); >> + >> + /* Length not zero and not greater or equal required size. */ >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + item.length = 1; >> + i915_query_items(fd, &item, 1); >> + igt_assert_eq(item.length, -EINVAL); >> + >> + /* Query correct length. */ >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + i915_query_items(fd, &item, 1); >> + igt_assert(item.length >= 0); > > Could be item.length > 0? True, should be even. :) > >> + len = item.length; >> + >> + /* Ivalid pointer. */ >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + item.length = len; >> + i915_query_items(fd, &item, 1); >> + igt_assert_eq(item.length, -EFAULT); > > Maybe verify ULONG_MAX too? item.data_ptr = ULONG_MAX? Can do. > >> + >> + /* Reserved fields are MBZ. */ >> + >> + for (i = 0; i < ARRAY_SIZE(queues.rsvd); i++) { >> + memset(&queues, 0, sizeof(queues)); >> + queues.rsvd[i] = 1; >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + item.length = len; >> + item.data_ptr = to_user_pointer(&queues); >> + i915_query_items(fd, &item, 1); >> + igt_assert_eq(item.length, -EINVAL); >> + } >> + >> + memset(&queues, 0, sizeof(queues)); >> + queues.class = -1; > > class = I915_ENGINE_CLASS_INVALID ? Yes thanks! > >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + item.length = len; >> + item.data_ptr = to_user_pointer(&queues); >> + i915_query_items(fd, &item, 1); >> + igt_assert_eq(item.length, -ENOENT); >> + >> + memset(&queues, 0, sizeof(queues)); >> + queues.instance = -1; >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + item.length = len; >> + item.data_ptr = to_user_pointer(&queues); >> + i915_query_items(fd, &item, 1); >> + igt_assert_eq(item.length, -ENOENT); >> +} >> + >> +static void engine_queues(int fd, const struct >> intel_execution_engine2 *e) >> +{ >> + struct drm_i915_query_engine_queues queues; >> + struct drm_i915_query_item item; >> + unsigned int len; >> + >> + /* Query required buffer length. */ >> + memset(&queues, 0, sizeof(queues)); >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + item.data_ptr = to_user_pointer(&queues); >> + i915_query_items(fd, &item, 1); >> + igt_assert(item.length >= 0); > > item.length > 0? Marking on TODO.. > >> + igt_assert(item.length <= sizeof(queues)); > > I guess we can expect item.length == sizeof(queues) here. Hmm yes, no idea what I was thinking there. :I > > >> + len = item.length; >> + >> + /* Check length larger than required works and reports same >> length. */ >> + memset(&queues, 0, sizeof(queues)); >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + item.data_ptr = to_user_pointer(&queues); >> + item.length = len + 1; >> + i915_query_items(fd, &item, 1); >> + igt_assert_eq(item.length, len); >> + >> + /* Actual query. */ >> + memset(&queues, 0, sizeof(queues)); >> + queues.class = e->class; >> + queues.instance = e->instance; >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + item.data_ptr = to_user_pointer(&queues); >> + item.length = len; >> + i915_query_items(fd, &item, 1); >> + igt_assert_eq(item.length, len); >> +} >> + >> +static unsigned int e2ring(int gem_fd, const struct >> intel_execution_engine2 *e) >> +{ >> + return gem_class_instance_to_eb_flags(gem_fd, e->class, >> e->instance); >> +} >> + >> +static void >> +__query_queues(int fd, const struct intel_execution_engine2 *e, >> + struct drm_i915_query_engine_queues *queues) >> +{ >> + struct drm_i915_query_item item; >> + >> + memset(queues, 0, sizeof(*queues)); >> + queues->class = e->class; >> + queues->instance = e->instance; >> + memset(&item, 0, sizeof(item)); >> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >> + item.data_ptr = to_user_pointer(queues); >> + item.length = sizeof(*queues); >> + i915_query_items(fd, &item, 1); >> + igt_assert_eq(item.length, sizeof(*queues)); >> +} >> + >> +#define TEST_CONTEXTS (1 << 0) >> + >> +/* >> + * Test that the reported number of queued (not ready for execution >> due fences >> + * or dependencies) requests on an engine is correct. >> + */ >> +static void >> +engine_queued(int gem_fd, const struct intel_execution_engine2 *e, >> + unsigned int flags) >> +{ >> + const unsigned long engine = e2ring(gem_fd, e); >> + struct drm_i915_query_engine_queues queues; >> + const uint32_t bbe = MI_BATCH_BUFFER_END; >> + const unsigned int max_rq = 10; >> + uint32_t queued[max_rq + 1]; >> + unsigned int n, i; >> + uint32_t bo; >> + >> + igt_require_sw_sync(); >> + if (flags & TEST_CONTEXTS) >> + gem_require_contexts(gem_fd); >> + >> + memset(queued, 0, sizeof(queued)); >> + >> + bo = gem_create(gem_fd, 4096); >> + gem_write(gem_fd, bo, 4092, &bbe, sizeof(bbe)); >> + >> + /* Create a specific queue depth of unready requests. */ >> + for (n = 0; n <= max_rq; n++) { >> + int fence = -1; >> + IGT_CORK_FENCE(cork); >> + >> + gem_quiescent_gpu(gem_fd); >> + >> + /* Create a cork so we can create a dependency chain. */ >> + if (n) >> + fence = igt_cork_plug(&cork, -1); >> + >> + /* Submit n unready requests depending on the cork. */ >> + for (i = 0; i < n; i++) { >> + struct drm_i915_gem_exec_object2 obj = { }; >> + struct drm_i915_gem_execbuffer2 eb = { }; >> + >> + obj.handle = bo; >> + >> + eb.buffer_count = 1; >> + eb.buffers_ptr = to_user_pointer(&obj); >> + >> + eb.flags = engine | I915_EXEC_FENCE_IN; >> + >> + /* >> + * In context mode each submission is on a separate >> + * context. >> + */ >> + if (flags & TEST_CONTEXTS) >> + eb.rsvd1 = gem_context_create(gem_fd); >> + >> + eb.rsvd2 = fence; >> + >> + gem_execbuf(gem_fd, &eb); >> + >> + if (flags & TEST_CONTEXTS) >> + gem_context_destroy(gem_fd, eb.rsvd1); >> + } >> + >> + /* Store reported queue depth to assert against later. */ >> + __query_queues(gem_fd, e, &queues); >> + queued[n] = queues.queued; >> + igt_info("n=%u queued=%u\n", n, queued[n]); >> + >> + /* Unplug the queue and proceed to the next queue depth. */ >> + if (fence >= 0) >> + igt_cork_unplug(&cork); >> + >> + gem_sync(gem_fd, bo); >> + } >> + >> + gem_close(gem_fd, bo); >> + >> + for (i = 0; i <= max_rq; i++) >> + igt_assert_eq(queued[i], i); >> +} >> + >> +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long >> flags) >> +{ >> + if (gem_can_store_dword(fd, flags)) >> + return __igt_spin_batch_new_poll(fd, ctx, flags); >> + else >> + return __igt_spin_batch_new(fd, ctx, flags, 0); >> +} >> + >> +static unsigned long __spin_wait(igt_spin_t *spin, unsigned int n) >> +{ >> + struct timespec ts = { }; >> + unsigned long t; >> + >> + igt_nsec_elapsed(&ts); >> + >> + if (spin->running) { >> + igt_spin_busywait_until_running(spin); >> + } else { >> + igt_debug("__spin_wait - usleep mode\n"); >> + usleep(500e3); /* Better than nothing! */ >> + } >> + >> + t = igt_nsec_elapsed(&ts); >> + >> + return spin->running ? t : 500e6 / n; >> +} >> + >> +/* >> + * Test that the number of requests ready for execution but waiting >> on space on >> + * GPU is correctly reported. >> + */ >> +static void >> +engine_runnable(int gem_fd, const struct intel_execution_engine2 *e) >> +{ >> + const unsigned long engine = e2ring(gem_fd, e); >> + struct drm_i915_query_engine_queues queues; >> + bool contexts = gem_has_contexts(gem_fd); >> + const unsigned int max_rq = 10; >> + igt_spin_t *spin[max_rq + 1]; >> + uint32_t runnable[max_rq + 1]; >> + uint32_t ctx[max_rq]; >> + unsigned int n, i; >> + >> + memset(runnable, 0, sizeof(runnable)); >> + >> + if (contexts) { >> + for (i = 0; i < max_rq; i++) >> + ctx[i] = gem_context_create(gem_fd); >> + } >> + >> + /* >> + * Submit different number of requests, potentially against >> different >> + * contexts, in order to provoke engine runnable metric returning >> + * different numbers. >> + */ >> + for (n = 0; n <= max_rq; n++) { >> + gem_quiescent_gpu(gem_fd); >> + >> + for (i = 0; i < n; i++) { >> + uint32_t ctx_ = contexts ? ctx[i] : 0; >> + >> + if (i == 0) >> + spin[i] = __spin_poll(gem_fd, ctx_, engine); >> + else >> + spin[i] = __igt_spin_batch_new(gem_fd, ctx_, >> + engine, 0); >> + } >> + >> + if (n) >> + usleep(__spin_wait(spin[0], n) * n); >> + >> + /* Query and store for later checking. */ >> + __query_queues(gem_fd, e, &queues); >> + runnable[n] = queues.runnable; >> + igt_info("n=%u runnable=%u\n", n, runnable[n]); >> + >> + for (i = 0; i < n; i++) { >> + igt_spin_batch_end(spin[i]); >> + gem_sync(gem_fd, spin[i]->handle); >> + igt_spin_batch_free(gem_fd, spin[i]); >> + } >> + } >> + >> + if (contexts) { >> + for (i = 0; i < max_rq; i++) >> + gem_context_destroy(gem_fd, ctx[i]); >> + } >> + >> + /* >> + * Check that the runnable metric is zero when nothing is submitted, >> + * and that it is greater than zero on the maximum queue depth. >> + * >> + * We cannot assert the exact value since we do not know how many >> + * requests can the submission backend consume. >> + */ >> + igt_assert_eq(runnable[0], 0); >> + igt_assert(runnable[max_rq] > 0); >> + >> + /* >> + * We can only test that the runnable metric is growing by one if we >> + * have context support. >> + */ >> + if (contexts) >> + igt_assert_eq(runnable[max_rq] - runnable[max_rq - 1], 1); >> +} >> + >> +/* >> + * Test that the number of requests currently executing on the GPU is >> correctly >> + * reported. >> + */ >> +static void >> +engine_running(int gem_fd, const struct intel_execution_engine2 *e) >> +{ >> + const unsigned long engine = e2ring(gem_fd, e); >> + struct drm_i915_query_engine_queues queues; >> + const unsigned int max_rq = 10; >> + igt_spin_t *spin[max_rq + 1]; >> + uint32_t running[max_rq + 1]; >> + unsigned int n, i; >> + >> + memset(running, 0, sizeof(running)); >> + memset(spin, 0, sizeof(spin)); >> + >> + /* >> + * Create various queue depths of requests against the same >> context to >> + * try and get submission backed execute one or more on the GPU. >> + */ >> + for (n = 0; n <= max_rq; n++) { >> + gem_quiescent_gpu(gem_fd); >> + >> + for (i = 0; i < n; i++) { >> + if (i == 0) >> + spin[i] = __spin_poll(gem_fd, 0, engine); >> + else >> + spin[i] = __igt_spin_batch_new(gem_fd, 0, >> + engine, 0); >> + } >> + >> + if (n) >> + usleep(__spin_wait(spin[0], n) * n); >> + >> + /* Query and store for later checking. */ >> + __query_queues(gem_fd, e, &queues); >> + running[n] = queues.running; >> + igt_info("n=%u running=%u\n", n, running[n]); >> + >> + for (i = 0; i < n; i++) { >> + igt_spin_batch_end(spin[i]); >> + gem_sync(gem_fd, spin[i]->handle); >> + igt_spin_batch_free(gem_fd, spin[i]); >> + } >> + } >> + >> + /* >> + * Check that the running metric is zero when nothing is submitted, >> + * one when one request is submitted, and at least one for any >> greater >> + * queue depth. >> + * >> + * We cannot assert the exact value since we do not know how many >> + * requests can the submission backend consume. >> + */ >> + igt_assert_eq(running[0], 0); >> + for (i = 1; i <= max_rq; i++) >> + igt_assert(running[i] > 0); >> +} >> + >> igt_main >> { >> + const struct intel_execution_engine2 *e; >> int fd = -1; >> int devid; >> @@ -524,6 +931,41 @@ igt_main >> test_query_topology_known_pci_ids(fd, devid); >> } >> + igt_subtest_group { >> + igt_fixture { >> + igt_require(query_engine_queues_supported(fd)); >> + } >> + >> + igt_subtest("engine-queues-invalid") >> + engine_queues_invalid(fd); >> + >> + __for_each_engine_class_instance(fd, e) { >> + igt_subtest_group { >> + igt_fixture { >> + gem_require_engine(fd, >> + e->class, >> + e->instance); >> + } >> + >> + igt_subtest_f("engine-queues-%s", e->name) >> + engine_queues(fd, e); >> + >> + igt_subtest_f("engine-queued-%s", e->name) >> + engine_queued(fd, e, 0); >> + >> + igt_subtest_f("engine-queued-contexts-%s", >> + e->name) >> + engine_queued(fd, e, TEST_CONTEXTS); >> + >> + igt_subtest_f("engine-runnable-%s", e->name) >> + engine_runnable(fd, e); >> + >> + igt_subtest_f("engine-running-%s", e->name) >> + engine_running(fd, e); >> + } >> + } >> + } >> + >> igt_fixture { >> close(fd); >> } Thanks for reading it through. I have marked the tweaks for the next resend. Problem with this query is that userspace is AWOL so until it materializes it will be just for reference. Regards, Tvrtko
On 08/06/18 11:02, Tvrtko Ursulin wrote: > > On 06/06/2018 15:33, Lionel Landwerlin wrote: >> Just a few suggestions below. Otherwise looks good to me. >> >> On 06/06/18 13:49, Tvrtko Ursulin wrote: >>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>> >>> Basic tests to cover engine queued/runnable/running metric as reported >>> by the DRM_I915_QUERY_ENGINE_QUEUES query. >>> >>> v2: >>> * Update ABI for i915 changes. >>> * Use igt_spin_busywait_until_running. >>> * Support no hardware contexts. >>> * More comments. (Lionel Landwerlin) >>> Chris Wilson: >>> * Check for sw sync support. >>> * Multiple contexts queued test. >>> * Simplify context and bb allocation. >>> * Fix asserts in the running subtest. >>> >>> v3: >>> * Update for uAPI change. >>> >>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >>> --- >>> tests/i915_query.c | 442 >>> +++++++++++++++++++++++++++++++++++++++++++++ >>> 1 file changed, 442 insertions(+) >>> >>> diff --git a/tests/i915_query.c b/tests/i915_query.c >>> index c7de8cbd8371..588428749c50 100644 >>> --- a/tests/i915_query.c >>> +++ b/tests/i915_query.c >>> @@ -22,6 +22,7 @@ >>> */ >>> #include "igt.h" >>> +#include "sw_sync.h" >>> #include <limits.h> >>> @@ -477,8 +478,414 @@ test_query_topology_known_pci_ids(int fd, int >>> devid) >>> free(topo_info); >>> } >>> +#define DRM_I915_QUERY_ENGINE_QUEUES 2 >>> + >>> +struct drm_i915_query_engine_queues { >>> + /** Engine class as in enum drm_i915_gem_engine_class. */ >>> + __u16 class; >>> + >>> + /** Engine instance number. */ >>> + __u16 instance; >>> + >>> + /** Number of requests with unresolved fences and dependencies. */ >>> + __u32 queued; >>> + >>> + /** Number of ready requests waiting on a slot on GPU. */ >>> + __u32 runnable; >>> + >>> + /** Number of requests executing on the GPU. */ >>> + __u32 running; >>> + >>> + __u32 rsvd[6]; >>> +}; >> >> I suppose this above would be in the updated i915_drm.h. >> I thought you would have put it there as part of your first commit. > > Yes, or I should prefix the copy with local_ as we usually do. I think the point of copying the headers locally was to get rid of the local_ etc... > >>> + >>> +static bool query_engine_queues_supported(int fd) >>> +{ >>> + struct drm_i915_query_item item = { >>> + .query_id = DRM_I915_QUERY_ENGINE_QUEUES, >>> + }; >>> + >>> + return __i915_query_items(fd, &item, 1) == 0 && item.length > 0; >>> +} >>> + >>> +static void engine_queues_invalid(int fd) >>> +{ >>> + struct drm_i915_query_engine_queues queues; >>> + struct drm_i915_query_item item; >>> + unsigned int len; >>> + unsigned int i; >>> + >>> + /* Flags is MBZ. */ >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + item.flags = 1; >>> + i915_query_items(fd, &item, 1); >>> + igt_assert_eq(item.length, -EINVAL); >>> + >>> + /* Length not zero and not greater or equal required size. */ >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + item.length = 1; >>> + i915_query_items(fd, &item, 1); >>> + igt_assert_eq(item.length, -EINVAL); >>> + >>> + /* Query correct length. */ >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + i915_query_items(fd, &item, 1); >>> + igt_assert(item.length >= 0); >> >> Could be item.length > 0? > > True, should be even. :) > >> >>> + len = item.length; >>> + >>> + /* Ivalid pointer. */ >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + item.length = len; >>> + i915_query_items(fd, &item, 1); >>> + igt_assert_eq(item.length, -EFAULT); >> >> Maybe verify ULONG_MAX too? > > item.data_ptr = ULONG_MAX? Can do. > >> >>> + >>> + /* Reserved fields are MBZ. */ >>> + >>> + for (i = 0; i < ARRAY_SIZE(queues.rsvd); i++) { >>> + memset(&queues, 0, sizeof(queues)); >>> + queues.rsvd[i] = 1; >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + item.length = len; >>> + item.data_ptr = to_user_pointer(&queues); >>> + i915_query_items(fd, &item, 1); >>> + igt_assert_eq(item.length, -EINVAL); >>> + } >>> + >>> + memset(&queues, 0, sizeof(queues)); >>> + queues.class = -1; >> >> class = I915_ENGINE_CLASS_INVALID ? > > Yes thanks! > >> >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + item.length = len; >>> + item.data_ptr = to_user_pointer(&queues); >>> + i915_query_items(fd, &item, 1); >>> + igt_assert_eq(item.length, -ENOENT); >>> + >>> + memset(&queues, 0, sizeof(queues)); >>> + queues.instance = -1; >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + item.length = len; >>> + item.data_ptr = to_user_pointer(&queues); >>> + i915_query_items(fd, &item, 1); >>> + igt_assert_eq(item.length, -ENOENT); >>> +} >>> + >>> +static void engine_queues(int fd, const struct >>> intel_execution_engine2 *e) >>> +{ >>> + struct drm_i915_query_engine_queues queues; >>> + struct drm_i915_query_item item; >>> + unsigned int len; >>> + >>> + /* Query required buffer length. */ >>> + memset(&queues, 0, sizeof(queues)); >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + item.data_ptr = to_user_pointer(&queues); >>> + i915_query_items(fd, &item, 1); >>> + igt_assert(item.length >= 0); >> >> item.length > 0? > > Marking on TODO.. > >> >>> + igt_assert(item.length <= sizeof(queues)); >> >> I guess we can expect item.length == sizeof(queues) here. > > Hmm yes, no idea what I was thinking there. :I > >> >> >>> + len = item.length; >>> + >>> + /* Check length larger than required works and reports same >>> length. */ >>> + memset(&queues, 0, sizeof(queues)); >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + item.data_ptr = to_user_pointer(&queues); >>> + item.length = len + 1; >>> + i915_query_items(fd, &item, 1); >>> + igt_assert_eq(item.length, len); >>> + >>> + /* Actual query. */ >>> + memset(&queues, 0, sizeof(queues)); >>> + queues.class = e->class; >>> + queues.instance = e->instance; >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + item.data_ptr = to_user_pointer(&queues); >>> + item.length = len; >>> + i915_query_items(fd, &item, 1); >>> + igt_assert_eq(item.length, len); >>> +} >>> + >>> +static unsigned int e2ring(int gem_fd, const struct >>> intel_execution_engine2 *e) >>> +{ >>> + return gem_class_instance_to_eb_flags(gem_fd, e->class, >>> e->instance); >>> +} >>> + >>> +static void >>> +__query_queues(int fd, const struct intel_execution_engine2 *e, >>> + struct drm_i915_query_engine_queues *queues) >>> +{ >>> + struct drm_i915_query_item item; >>> + >>> + memset(queues, 0, sizeof(*queues)); >>> + queues->class = e->class; >>> + queues->instance = e->instance; >>> + memset(&item, 0, sizeof(item)); >>> + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; >>> + item.data_ptr = to_user_pointer(queues); >>> + item.length = sizeof(*queues); >>> + i915_query_items(fd, &item, 1); >>> + igt_assert_eq(item.length, sizeof(*queues)); >>> +} >>> + >>> +#define TEST_CONTEXTS (1 << 0) >>> + >>> +/* >>> + * Test that the reported number of queued (not ready for execution >>> due fences >>> + * or dependencies) requests on an engine is correct. >>> + */ >>> +static void >>> +engine_queued(int gem_fd, const struct intel_execution_engine2 *e, >>> + unsigned int flags) >>> +{ >>> + const unsigned long engine = e2ring(gem_fd, e); >>> + struct drm_i915_query_engine_queues queues; >>> + const uint32_t bbe = MI_BATCH_BUFFER_END; >>> + const unsigned int max_rq = 10; >>> + uint32_t queued[max_rq + 1]; >>> + unsigned int n, i; >>> + uint32_t bo; >>> + >>> + igt_require_sw_sync(); >>> + if (flags & TEST_CONTEXTS) >>> + gem_require_contexts(gem_fd); >>> + >>> + memset(queued, 0, sizeof(queued)); >>> + >>> + bo = gem_create(gem_fd, 4096); >>> + gem_write(gem_fd, bo, 4092, &bbe, sizeof(bbe)); >>> + >>> + /* Create a specific queue depth of unready requests. */ >>> + for (n = 0; n <= max_rq; n++) { >>> + int fence = -1; >>> + IGT_CORK_FENCE(cork); >>> + >>> + gem_quiescent_gpu(gem_fd); >>> + >>> + /* Create a cork so we can create a dependency chain. */ >>> + if (n) >>> + fence = igt_cork_plug(&cork, -1); >>> + >>> + /* Submit n unready requests depending on the cork. */ >>> + for (i = 0; i < n; i++) { >>> + struct drm_i915_gem_exec_object2 obj = { }; >>> + struct drm_i915_gem_execbuffer2 eb = { }; >>> + >>> + obj.handle = bo; >>> + >>> + eb.buffer_count = 1; >>> + eb.buffers_ptr = to_user_pointer(&obj); >>> + >>> + eb.flags = engine | I915_EXEC_FENCE_IN; >>> + >>> + /* >>> + * In context mode each submission is on a separate >>> + * context. >>> + */ >>> + if (flags & TEST_CONTEXTS) >>> + eb.rsvd1 = gem_context_create(gem_fd); >>> + >>> + eb.rsvd2 = fence; >>> + >>> + gem_execbuf(gem_fd, &eb); >>> + >>> + if (flags & TEST_CONTEXTS) >>> + gem_context_destroy(gem_fd, eb.rsvd1); >>> + } >>> + >>> + /* Store reported queue depth to assert against later. */ >>> + __query_queues(gem_fd, e, &queues); >>> + queued[n] = queues.queued; >>> + igt_info("n=%u queued=%u\n", n, queued[n]); >>> + >>> + /* Unplug the queue and proceed to the next queue depth. */ >>> + if (fence >= 0) >>> + igt_cork_unplug(&cork); >>> + >>> + gem_sync(gem_fd, bo); >>> + } >>> + >>> + gem_close(gem_fd, bo); >>> + >>> + for (i = 0; i <= max_rq; i++) >>> + igt_assert_eq(queued[i], i); >>> +} >>> + >>> +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long >>> flags) >>> +{ >>> + if (gem_can_store_dword(fd, flags)) >>> + return __igt_spin_batch_new_poll(fd, ctx, flags); >>> + else >>> + return __igt_spin_batch_new(fd, ctx, flags, 0); >>> +} >>> + >>> +static unsigned long __spin_wait(igt_spin_t *spin, unsigned int n) >>> +{ >>> + struct timespec ts = { }; >>> + unsigned long t; >>> + >>> + igt_nsec_elapsed(&ts); >>> + >>> + if (spin->running) { >>> + igt_spin_busywait_until_running(spin); >>> + } else { >>> + igt_debug("__spin_wait - usleep mode\n"); >>> + usleep(500e3); /* Better than nothing! */ >>> + } >>> + >>> + t = igt_nsec_elapsed(&ts); >>> + >>> + return spin->running ? t : 500e6 / n; >>> +} >>> + >>> +/* >>> + * Test that the number of requests ready for execution but waiting >>> on space on >>> + * GPU is correctly reported. >>> + */ >>> +static void >>> +engine_runnable(int gem_fd, const struct intel_execution_engine2 *e) >>> +{ >>> + const unsigned long engine = e2ring(gem_fd, e); >>> + struct drm_i915_query_engine_queues queues; >>> + bool contexts = gem_has_contexts(gem_fd); >>> + const unsigned int max_rq = 10; >>> + igt_spin_t *spin[max_rq + 1]; >>> + uint32_t runnable[max_rq + 1]; >>> + uint32_t ctx[max_rq]; >>> + unsigned int n, i; >>> + >>> + memset(runnable, 0, sizeof(runnable)); >>> + >>> + if (contexts) { >>> + for (i = 0; i < max_rq; i++) >>> + ctx[i] = gem_context_create(gem_fd); >>> + } >>> + >>> + /* >>> + * Submit different number of requests, potentially against >>> different >>> + * contexts, in order to provoke engine runnable metric returning >>> + * different numbers. >>> + */ >>> + for (n = 0; n <= max_rq; n++) { >>> + gem_quiescent_gpu(gem_fd); >>> + >>> + for (i = 0; i < n; i++) { >>> + uint32_t ctx_ = contexts ? ctx[i] : 0; >>> + >>> + if (i == 0) >>> + spin[i] = __spin_poll(gem_fd, ctx_, engine); >>> + else >>> + spin[i] = __igt_spin_batch_new(gem_fd, ctx_, >>> + engine, 0); >>> + } >>> + >>> + if (n) >>> + usleep(__spin_wait(spin[0], n) * n); >>> + >>> + /* Query and store for later checking. */ >>> + __query_queues(gem_fd, e, &queues); >>> + runnable[n] = queues.runnable; >>> + igt_info("n=%u runnable=%u\n", n, runnable[n]); >>> + >>> + for (i = 0; i < n; i++) { >>> + igt_spin_batch_end(spin[i]); >>> + gem_sync(gem_fd, spin[i]->handle); >>> + igt_spin_batch_free(gem_fd, spin[i]); >>> + } >>> + } >>> + >>> + if (contexts) { >>> + for (i = 0; i < max_rq; i++) >>> + gem_context_destroy(gem_fd, ctx[i]); >>> + } >>> + >>> + /* >>> + * Check that the runnable metric is zero when nothing is >>> submitted, >>> + * and that it is greater than zero on the maximum queue depth. >>> + * >>> + * We cannot assert the exact value since we do not know how many >>> + * requests can the submission backend consume. >>> + */ >>> + igt_assert_eq(runnable[0], 0); >>> + igt_assert(runnable[max_rq] > 0); >>> + >>> + /* >>> + * We can only test that the runnable metric is growing by one >>> if we >>> + * have context support. >>> + */ >>> + if (contexts) >>> + igt_assert_eq(runnable[max_rq] - runnable[max_rq - 1], 1); >>> +} >>> + >>> +/* >>> + * Test that the number of requests currently executing on the GPU >>> is correctly >>> + * reported. >>> + */ >>> +static void >>> +engine_running(int gem_fd, const struct intel_execution_engine2 *e) >>> +{ >>> + const unsigned long engine = e2ring(gem_fd, e); >>> + struct drm_i915_query_engine_queues queues; >>> + const unsigned int max_rq = 10; >>> + igt_spin_t *spin[max_rq + 1]; >>> + uint32_t running[max_rq + 1]; >>> + unsigned int n, i; >>> + >>> + memset(running, 0, sizeof(running)); >>> + memset(spin, 0, sizeof(spin)); >>> + >>> + /* >>> + * Create various queue depths of requests against the same >>> context to >>> + * try and get submission backed execute one or more on the GPU. >>> + */ >>> + for (n = 0; n <= max_rq; n++) { >>> + gem_quiescent_gpu(gem_fd); >>> + >>> + for (i = 0; i < n; i++) { >>> + if (i == 0) >>> + spin[i] = __spin_poll(gem_fd, 0, engine); >>> + else >>> + spin[i] = __igt_spin_batch_new(gem_fd, 0, >>> + engine, 0); >>> + } >>> + >>> + if (n) >>> + usleep(__spin_wait(spin[0], n) * n); >>> + >>> + /* Query and store for later checking. */ >>> + __query_queues(gem_fd, e, &queues); >>> + running[n] = queues.running; >>> + igt_info("n=%u running=%u\n", n, running[n]); >>> + >>> + for (i = 0; i < n; i++) { >>> + igt_spin_batch_end(spin[i]); >>> + gem_sync(gem_fd, spin[i]->handle); >>> + igt_spin_batch_free(gem_fd, spin[i]); >>> + } >>> + } >>> + >>> + /* >>> + * Check that the running metric is zero when nothing is >>> submitted, >>> + * one when one request is submitted, and at least one for any >>> greater >>> + * queue depth. >>> + * >>> + * We cannot assert the exact value since we do not know how many >>> + * requests can the submission backend consume. >>> + */ >>> + igt_assert_eq(running[0], 0); >>> + for (i = 1; i <= max_rq; i++) >>> + igt_assert(running[i] > 0); >>> +} >>> + >>> igt_main >>> { >>> + const struct intel_execution_engine2 *e; >>> int fd = -1; >>> int devid; >>> @@ -524,6 +931,41 @@ igt_main >>> test_query_topology_known_pci_ids(fd, devid); >>> } >>> + igt_subtest_group { >>> + igt_fixture { >>> + igt_require(query_engine_queues_supported(fd)); >>> + } >>> + >>> + igt_subtest("engine-queues-invalid") >>> + engine_queues_invalid(fd); >>> + >>> + __for_each_engine_class_instance(fd, e) { >>> + igt_subtest_group { >>> + igt_fixture { >>> + gem_require_engine(fd, >>> + e->class, >>> + e->instance); >>> + } >>> + >>> + igt_subtest_f("engine-queues-%s", e->name) >>> + engine_queues(fd, e); >>> + >>> + igt_subtest_f("engine-queued-%s", e->name) >>> + engine_queued(fd, e, 0); >>> + >>> + igt_subtest_f("engine-queued-contexts-%s", >>> + e->name) >>> + engine_queued(fd, e, TEST_CONTEXTS); >>> + >>> + igt_subtest_f("engine-runnable-%s", e->name) >>> + engine_runnable(fd, e); >>> + >>> + igt_subtest_f("engine-running-%s", e->name) >>> + engine_running(fd, e); >>> + } >>> + } >>> + } >>> + >>> igt_fixture { >>> close(fd); >>> } > > Thanks for reading it through. I have marked the tweaks for the next > resend. Problem with this query is that userspace is AWOL so until it > materializes it will be just for reference. > > Regards, > > Tvrtko >
diff --git a/tests/i915_query.c b/tests/i915_query.c index c7de8cbd8371..588428749c50 100644 --- a/tests/i915_query.c +++ b/tests/i915_query.c @@ -22,6 +22,7 @@ */ #include "igt.h" +#include "sw_sync.h" #include <limits.h> @@ -477,8 +478,414 @@ test_query_topology_known_pci_ids(int fd, int devid) free(topo_info); } +#define DRM_I915_QUERY_ENGINE_QUEUES 2 + +struct drm_i915_query_engine_queues { + /** Engine class as in enum drm_i915_gem_engine_class. */ + __u16 class; + + /** Engine instance number. */ + __u16 instance; + + /** Number of requests with unresolved fences and dependencies. */ + __u32 queued; + + /** Number of ready requests waiting on a slot on GPU. */ + __u32 runnable; + + /** Number of requests executing on the GPU. */ + __u32 running; + + __u32 rsvd[6]; +}; + +static bool query_engine_queues_supported(int fd) +{ + struct drm_i915_query_item item = { + .query_id = DRM_I915_QUERY_ENGINE_QUEUES, + }; + + return __i915_query_items(fd, &item, 1) == 0 && item.length > 0; +} + +static void engine_queues_invalid(int fd) +{ + struct drm_i915_query_engine_queues queues; + struct drm_i915_query_item item; + unsigned int len; + unsigned int i; + + /* Flags is MBZ. */ + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + item.flags = 1; + i915_query_items(fd, &item, 1); + igt_assert_eq(item.length, -EINVAL); + + /* Length not zero and not greater or equal required size. */ + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + item.length = 1; + i915_query_items(fd, &item, 1); + igt_assert_eq(item.length, -EINVAL); + + /* Query correct length. */ + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + i915_query_items(fd, &item, 1); + igt_assert(item.length >= 0); + len = item.length; + + /* Ivalid pointer. */ + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + item.length = len; + i915_query_items(fd, &item, 1); + igt_assert_eq(item.length, -EFAULT); + + /* Reserved fields are MBZ. */ + + for (i = 0; i < ARRAY_SIZE(queues.rsvd); i++) { + memset(&queues, 0, sizeof(queues)); + queues.rsvd[i] = 1; + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + item.length = len; + item.data_ptr = to_user_pointer(&queues); + i915_query_items(fd, &item, 1); + igt_assert_eq(item.length, -EINVAL); + } + + memset(&queues, 0, sizeof(queues)); + queues.class = -1; + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + item.length = len; + item.data_ptr = to_user_pointer(&queues); + i915_query_items(fd, &item, 1); + igt_assert_eq(item.length, -ENOENT); + + memset(&queues, 0, sizeof(queues)); + queues.instance = -1; + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + item.length = len; + item.data_ptr = to_user_pointer(&queues); + i915_query_items(fd, &item, 1); + igt_assert_eq(item.length, -ENOENT); +} + +static void engine_queues(int fd, const struct intel_execution_engine2 *e) +{ + struct drm_i915_query_engine_queues queues; + struct drm_i915_query_item item; + unsigned int len; + + /* Query required buffer length. */ + memset(&queues, 0, sizeof(queues)); + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + item.data_ptr = to_user_pointer(&queues); + i915_query_items(fd, &item, 1); + igt_assert(item.length >= 0); + igt_assert(item.length <= sizeof(queues)); + len = item.length; + + /* Check length larger than required works and reports same length. */ + memset(&queues, 0, sizeof(queues)); + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + item.data_ptr = to_user_pointer(&queues); + item.length = len + 1; + i915_query_items(fd, &item, 1); + igt_assert_eq(item.length, len); + + /* Actual query. */ + memset(&queues, 0, sizeof(queues)); + queues.class = e->class; + queues.instance = e->instance; + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + item.data_ptr = to_user_pointer(&queues); + item.length = len; + i915_query_items(fd, &item, 1); + igt_assert_eq(item.length, len); +} + +static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e) +{ + return gem_class_instance_to_eb_flags(gem_fd, e->class, e->instance); +} + +static void +__query_queues(int fd, const struct intel_execution_engine2 *e, + struct drm_i915_query_engine_queues *queues) +{ + struct drm_i915_query_item item; + + memset(queues, 0, sizeof(*queues)); + queues->class = e->class; + queues->instance = e->instance; + memset(&item, 0, sizeof(item)); + item.query_id = DRM_I915_QUERY_ENGINE_QUEUES; + item.data_ptr = to_user_pointer(queues); + item.length = sizeof(*queues); + i915_query_items(fd, &item, 1); + igt_assert_eq(item.length, sizeof(*queues)); +} + +#define TEST_CONTEXTS (1 << 0) + +/* + * Test that the reported number of queued (not ready for execution due fences + * or dependencies) requests on an engine is correct. + */ +static void +engine_queued(int gem_fd, const struct intel_execution_engine2 *e, + unsigned int flags) +{ + const unsigned long engine = e2ring(gem_fd, e); + struct drm_i915_query_engine_queues queues; + const uint32_t bbe = MI_BATCH_BUFFER_END; + const unsigned int max_rq = 10; + uint32_t queued[max_rq + 1]; + unsigned int n, i; + uint32_t bo; + + igt_require_sw_sync(); + if (flags & TEST_CONTEXTS) + gem_require_contexts(gem_fd); + + memset(queued, 0, sizeof(queued)); + + bo = gem_create(gem_fd, 4096); + gem_write(gem_fd, bo, 4092, &bbe, sizeof(bbe)); + + /* Create a specific queue depth of unready requests. */ + for (n = 0; n <= max_rq; n++) { + int fence = -1; + IGT_CORK_FENCE(cork); + + gem_quiescent_gpu(gem_fd); + + /* Create a cork so we can create a dependency chain. */ + if (n) + fence = igt_cork_plug(&cork, -1); + + /* Submit n unready requests depending on the cork. */ + for (i = 0; i < n; i++) { + struct drm_i915_gem_exec_object2 obj = { }; + struct drm_i915_gem_execbuffer2 eb = { }; + + obj.handle = bo; + + eb.buffer_count = 1; + eb.buffers_ptr = to_user_pointer(&obj); + + eb.flags = engine | I915_EXEC_FENCE_IN; + + /* + * In context mode each submission is on a separate + * context. + */ + if (flags & TEST_CONTEXTS) + eb.rsvd1 = gem_context_create(gem_fd); + + eb.rsvd2 = fence; + + gem_execbuf(gem_fd, &eb); + + if (flags & TEST_CONTEXTS) + gem_context_destroy(gem_fd, eb.rsvd1); + } + + /* Store reported queue depth to assert against later. */ + __query_queues(gem_fd, e, &queues); + queued[n] = queues.queued; + igt_info("n=%u queued=%u\n", n, queued[n]); + + /* Unplug the queue and proceed to the next queue depth. */ + if (fence >= 0) + igt_cork_unplug(&cork); + + gem_sync(gem_fd, bo); + } + + gem_close(gem_fd, bo); + + for (i = 0; i <= max_rq; i++) + igt_assert_eq(queued[i], i); +} + +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags) +{ + if (gem_can_store_dword(fd, flags)) + return __igt_spin_batch_new_poll(fd, ctx, flags); + else + return __igt_spin_batch_new(fd, ctx, flags, 0); +} + +static unsigned long __spin_wait(igt_spin_t *spin, unsigned int n) +{ + struct timespec ts = { }; + unsigned long t; + + igt_nsec_elapsed(&ts); + + if (spin->running) { + igt_spin_busywait_until_running(spin); + } else { + igt_debug("__spin_wait - usleep mode\n"); + usleep(500e3); /* Better than nothing! */ + } + + t = igt_nsec_elapsed(&ts); + + return spin->running ? t : 500e6 / n; +} + +/* + * Test that the number of requests ready for execution but waiting on space on + * GPU is correctly reported. + */ +static void +engine_runnable(int gem_fd, const struct intel_execution_engine2 *e) +{ + const unsigned long engine = e2ring(gem_fd, e); + struct drm_i915_query_engine_queues queues; + bool contexts = gem_has_contexts(gem_fd); + const unsigned int max_rq = 10; + igt_spin_t *spin[max_rq + 1]; + uint32_t runnable[max_rq + 1]; + uint32_t ctx[max_rq]; + unsigned int n, i; + + memset(runnable, 0, sizeof(runnable)); + + if (contexts) { + for (i = 0; i < max_rq; i++) + ctx[i] = gem_context_create(gem_fd); + } + + /* + * Submit different number of requests, potentially against different + * contexts, in order to provoke engine runnable metric returning + * different numbers. + */ + for (n = 0; n <= max_rq; n++) { + gem_quiescent_gpu(gem_fd); + + for (i = 0; i < n; i++) { + uint32_t ctx_ = contexts ? ctx[i] : 0; + + if (i == 0) + spin[i] = __spin_poll(gem_fd, ctx_, engine); + else + spin[i] = __igt_spin_batch_new(gem_fd, ctx_, + engine, 0); + } + + if (n) + usleep(__spin_wait(spin[0], n) * n); + + /* Query and store for later checking. */ + __query_queues(gem_fd, e, &queues); + runnable[n] = queues.runnable; + igt_info("n=%u runnable=%u\n", n, runnable[n]); + + for (i = 0; i < n; i++) { + igt_spin_batch_end(spin[i]); + gem_sync(gem_fd, spin[i]->handle); + igt_spin_batch_free(gem_fd, spin[i]); + } + } + + if (contexts) { + for (i = 0; i < max_rq; i++) + gem_context_destroy(gem_fd, ctx[i]); + } + + /* + * Check that the runnable metric is zero when nothing is submitted, + * and that it is greater than zero on the maximum queue depth. + * + * We cannot assert the exact value since we do not know how many + * requests can the submission backend consume. + */ + igt_assert_eq(runnable[0], 0); + igt_assert(runnable[max_rq] > 0); + + /* + * We can only test that the runnable metric is growing by one if we + * have context support. + */ + if (contexts) + igt_assert_eq(runnable[max_rq] - runnable[max_rq - 1], 1); +} + +/* + * Test that the number of requests currently executing on the GPU is correctly + * reported. + */ +static void +engine_running(int gem_fd, const struct intel_execution_engine2 *e) +{ + const unsigned long engine = e2ring(gem_fd, e); + struct drm_i915_query_engine_queues queues; + const unsigned int max_rq = 10; + igt_spin_t *spin[max_rq + 1]; + uint32_t running[max_rq + 1]; + unsigned int n, i; + + memset(running, 0, sizeof(running)); + memset(spin, 0, sizeof(spin)); + + /* + * Create various queue depths of requests against the same context to + * try and get submission backed execute one or more on the GPU. + */ + for (n = 0; n <= max_rq; n++) { + gem_quiescent_gpu(gem_fd); + + for (i = 0; i < n; i++) { + if (i == 0) + spin[i] = __spin_poll(gem_fd, 0, engine); + else + spin[i] = __igt_spin_batch_new(gem_fd, 0, + engine, 0); + } + + if (n) + usleep(__spin_wait(spin[0], n) * n); + + /* Query and store for later checking. */ + __query_queues(gem_fd, e, &queues); + running[n] = queues.running; + igt_info("n=%u running=%u\n", n, running[n]); + + for (i = 0; i < n; i++) { + igt_spin_batch_end(spin[i]); + gem_sync(gem_fd, spin[i]->handle); + igt_spin_batch_free(gem_fd, spin[i]); + } + } + + /* + * Check that the running metric is zero when nothing is submitted, + * one when one request is submitted, and at least one for any greater + * queue depth. + * + * We cannot assert the exact value since we do not know how many + * requests can the submission backend consume. + */ + igt_assert_eq(running[0], 0); + for (i = 1; i <= max_rq; i++) + igt_assert(running[i] > 0); +} + igt_main { + const struct intel_execution_engine2 *e; int fd = -1; int devid; @@ -524,6 +931,41 @@ igt_main test_query_topology_known_pci_ids(fd, devid); } + igt_subtest_group { + igt_fixture { + igt_require(query_engine_queues_supported(fd)); + } + + igt_subtest("engine-queues-invalid") + engine_queues_invalid(fd); + + __for_each_engine_class_instance(fd, e) { + igt_subtest_group { + igt_fixture { + gem_require_engine(fd, + e->class, + e->instance); + } + + igt_subtest_f("engine-queues-%s", e->name) + engine_queues(fd, e); + + igt_subtest_f("engine-queued-%s", e->name) + engine_queued(fd, e, 0); + + igt_subtest_f("engine-queued-contexts-%s", + e->name) + engine_queued(fd, e, TEST_CONTEXTS); + + igt_subtest_f("engine-runnable-%s", e->name) + engine_runnable(fd, e); + + igt_subtest_f("engine-running-%s", e->name) + engine_running(fd, e); + } + } + } + igt_fixture { close(fd); }