diff mbox

[i-g-t,5/5] tests/i915_query: Engine queues tests

Message ID 20180319182205.29454-6-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin March 19, 2018, 6:22 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

...

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/i915_query.c | 381 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 381 insertions(+)

Comments

Lionel Landwerlin March 22, 2018, 9:22 p.m. UTC | #1
On 19/03/18 18:22, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> ...
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/i915_query.c | 381 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 381 insertions(+)
>
> diff --git a/tests/i915_query.c b/tests/i915_query.c
> index c7de8cbd8371..94e7a3297ebd 100644
> --- a/tests/i915_query.c
> +++ b/tests/i915_query.c
> @@ -477,8 +477,358 @@ test_query_topology_known_pci_ids(int fd, int devid)
>   	free(topo_info);
>   }
>   
> +#define DRM_I915_QUERY_ENGINE_QUEUES	2
> +
> +struct drm_i915_query_engine_queues {
> +	/** Engine class as in enum drm_i915_gem_engine_class. */
> +	__u16 class;
> +
> +	/** Engine instance number. */
> +	__u16 instance;
> +
> +	/** Number of requests with unresolved fences and dependencies. */
> +	__u32 queued;
> +
> +	/** Number of ready requests waiting on a slot on GPU. */
> +	__u32 runnable;
> +
> +	/** Number of requests executing on the GPU. */
> +	__u32 running;
> +
> +	__u32 rsvd[5];
> +};
> +
> +static bool query_engine_queues_supported(int fd)
> +{
> +	struct drm_i915_query_item item = {
> +		.query_id = DRM_I915_QUERY_ENGINE_QUEUES,
> +	};
> +
> +	return __i915_query_items(fd, &item, 1) == 0 && item.length > 0;
> +}
> +
> +static void engine_queues_invalid(int fd)
> +{
> +	struct drm_i915_query_engine_queues queues;
> +	struct drm_i915_query_item item;
> +	unsigned int len;
> +	unsigned int i;
> +
> +	/* Flags is MBZ. */
> +	memset(&item, 0, sizeof(item));
> +	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +	item.flags = 1;
> +	i915_query_items(fd, &item, 1);
> +	igt_assert_eq(item.length, -EINVAL);
> +
> +	/* Length not zero and not greater or equal required size. */
> +	memset(&item, 0, sizeof(item));
> +	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +	item.length = 1;
> +	i915_query_items(fd, &item, 1);
> +	igt_assert_eq(item.length, -ENOSPC);
> +
> +	/* Query correct length. */
> +	memset(&item, 0, sizeof(item));
> +	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +	i915_query_items(fd, &item, 1);
> +	igt_assert(item.length >= 0);
> +	len = item.length;
> +
> +	/* Ivalid pointer. */
> +	memset(&item, 0, sizeof(item));
> +	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +	item.length = len;
> +	i915_query_items(fd, &item, 1);
> +	igt_assert_eq(item.length, -EFAULT);
> +
> +	/* Reserved fields are MBZ. */
> +
> +	for (i = 0; i < ARRAY_SIZE(queues.rsvd); i++) {
> +		memset(&queues, 0, sizeof(queues));
> +		queues.rsvd[i] = 1;
> +		memset(&item, 0, sizeof(item));
> +		item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +		item.length = len;
> +		item.data_ptr = to_user_pointer(&queues);
> +		i915_query_items(fd, &item, 1);
> +		igt_assert_eq(item.length, -EINVAL);
> +	}
> +
> +	memset(&queues, 0, sizeof(queues));
> +	queues.class = -1;
> +	memset(&item, 0, sizeof(item));
> +	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +	item.length = len;
> +	item.data_ptr = to_user_pointer(&queues);
> +	i915_query_items(fd, &item, 1);
> +	igt_assert_eq(item.length, -ENOENT);
> +

Looks like you've copied the few lines above after.
It seems to be the same test.

> +	memset(&queues, 0, sizeof(queues));
> +	queues.instance = -1;
> +	memset(&item, 0, sizeof(item));
> +	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +	item.length = len;
> +		item.data_ptr = to_user_pointer(&queues);
> +	i915_query_items(fd, &item, 1);
> +	igt_assert_eq(item.length, -ENOENT);
> +}
> +
> +static void engine_queues(int fd, const struct intel_execution_engine2 *e)
> +{
> +	struct drm_i915_query_engine_queues queues;
> +	struct drm_i915_query_item item;
> +	unsigned int len;
> +
> +	/* Query required buffer length. */
> +	memset(&queues, 0, sizeof(queues));
> +	memset(&item, 0, sizeof(item));
> +	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +	item.data_ptr = to_user_pointer(&queues);
> +	i915_query_items(fd, &item, 1);
> +	igt_assert(item.length >= 0);
> +	igt_assert(item.length <= sizeof(queues));
> +	len = item.length;
> +
> +	/* Check length larger than required works and reports same length. */
> +	memset(&queues, 0, sizeof(queues));
> +	memset(&item, 0, sizeof(item));
> +	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +	item.data_ptr = to_user_pointer(&queues);
> +	item.length = len + 1;
> +	i915_query_items(fd, &item, 1);
> +	igt_assert_eq(item.length, len);
> +
> +	/* Actual query. */
> +	memset(&queues, 0, sizeof(queues));
> +	queues.class = e->class;
> +	queues.instance = e->instance;
> +	memset(&item, 0, sizeof(item));
> +	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +	item.data_ptr = to_user_pointer(&queues);
> +	item.length = len;
> +	i915_query_items(fd, &item, 1);
> +	igt_assert_eq(item.length, len);
> +}
> +
> +static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
> +{
> +	return gem_class_instance_to_eb_flags(gem_fd, e->class, e->instance);
> +}
> +
> +static void
> +__query_queues(int fd, const struct intel_execution_engine2 *e,
> +	       struct drm_i915_query_engine_queues *queues)
> +{
> +	struct drm_i915_query_item item;
> +
> +	memset(queues, 0, sizeof(*queues));
> +	queues->class = e->class;
> +	queues->instance = e->instance;
> +	memset(&item, 0, sizeof(item));
> +	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
> +	item.data_ptr = to_user_pointer(queues);
> +	item.length = sizeof(*queues);
> +	i915_query_items(fd, &item, 1);
> +	igt_assert_eq(item.length, sizeof(*queues));
> +}
> +
> +static void
> +engine_queued(int gem_fd, const struct intel_execution_engine2 *e)
> +{
> +	const unsigned long engine = e2ring(gem_fd, e);
> +	struct drm_i915_query_engine_queues queues;
> +	const unsigned int max_rq = 10;
> +	uint32_t queued[max_rq + 1];
> +	uint32_t bo[max_rq + 1];
> +	unsigned int n, i;
> +
> +	memset(queued, 0, sizeof(queued));
> +	memset(bo, 0, sizeof(bo));
> +
> +	for (n = 0; n <= max_rq; n++) {
> +		int fence = -1;
> +		struct igt_cork cork = { .fd = fence, .type = CORK_SYNC_FD };
> +
> +		gem_quiescent_gpu(gem_fd);
> +
> +		if (n)
> +			fence = igt_cork_plug(&cork, -1);
> +
> +		for (i = 0; i < n; i++) {
> +			struct drm_i915_gem_exec_object2 obj = { };
> +			struct drm_i915_gem_execbuffer2 eb = { };
> +
> +			if (!bo[i]) {
> +				const uint32_t bbe = MI_BATCH_BUFFER_END;
> +
> +				bo[i] = gem_create(gem_fd, 4096);
> +				gem_write(gem_fd, bo[i], 4092, &bbe,
> +					  sizeof(bbe));
> +			}
> +
> +			obj.handle = bo[i];
> +
> +			eb.buffer_count = 1;
> +			eb.buffers_ptr = to_user_pointer(&obj);
> +
> +			eb.flags = engine | I915_EXEC_FENCE_IN;
> +			eb.rsvd2 = fence;
> +
> +			gem_execbuf(gem_fd, &eb);
> +		}
> +
> +		__query_queues(gem_fd, e, &queues);
> +		queued[n] = queues.queued;
> +		igt_info("n=%u queued=%u\n", n, queued[n]);
> +
> +		if (fence >= 0)
> +			igt_cork_unplug(&cork);
> +
> +		for (i = 0; i < n; i++)
> +			gem_sync(gem_fd, bo[i]);
> +	}
> +
> +	for (i = 0; i < max_rq; i++) {
> +		if (bo[i])
> +			gem_close(gem_fd, bo[i]);
> +	}
> +
> +	for (i = 0; i <= max_rq; i++)
> +		igt_assert_eq(queued[i], i);
> +}
> +

I'm not sure to understand what the 2 function below are meant to do.
Could you put a comment?

__igt_spin_batch_new_poll also appear to be missing.

> +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags)
> +{
> +	if (gem_can_store_dword(fd, flags))
> +		return __igt_spin_batch_new_poll(fd, ctx, flags);
> +	else
> +		return __igt_spin_batch_new(fd, ctx, flags, 0);
> +}
> +
> +static unsigned long __spin_wait(int fd, igt_spin_t *spin)
> +{
> +	struct timespec start = { };
> +
> +	igt_nsec_elapsed(&start);
> +
> +	if (gem_can_store_dword(fd, spin->execbuf.flags)) {
> +		unsigned long timeout = 0;
> +
> +		while (!spin->running) {
> +			unsigned long t = igt_nsec_elapsed(&start);
> +
> +			if ((t - timeout) > 250e6) {
> +				timeout = t;
> +				igt_warn("Spinner not running after %.2fms\n",
> +					 (double)t / 1e6);
> +			}
> +		};
> +	} else {
> +		igt_debug("__spin_wait - usleep mode\n");
> +		usleep(500e3); /* Better than nothing! */
> +	}
> +
> +	return igt_nsec_elapsed(&start);
> +}
> +
> +static void
> +engine_runnable(int gem_fd, const struct intel_execution_engine2 *e)
> +{
> +	const unsigned long engine = e2ring(gem_fd, e);
> +	struct drm_i915_query_engine_queues queues;
> +	const unsigned int max_rq = 10;
> +	igt_spin_t *spin[max_rq + 1];
> +	uint32_t runnable[max_rq + 1];
> +	uint32_t ctx[max_rq];
> +	unsigned int n, i;
> +
> +	memset(runnable, 0, sizeof(runnable));
> +	memset(ctx, 0, sizeof(ctx));
> +
> +	for (n = 0; n <= max_rq; n++) {
> +		gem_quiescent_gpu(gem_fd);
> +
> +		for (i = 0; i < n; i++) {
> +			if (!ctx[i])
> +				ctx[i] = gem_context_create(gem_fd);
> +
> +			if (i == 0)
> +				spin[i] = __spin_poll(gem_fd, ctx[i], engine);
> +			else
> +				spin[i] = __igt_spin_batch_new(gem_fd, ctx[i],
> +							       engine, 0);
> +		}
> +
> +		if (n)
> +			__spin_wait(gem_fd, spin[0]);
> +
> +		__query_queues(gem_fd, e, &queues);
> +		runnable[n] = queues.runnable;
> +		igt_info("n=%u runnable=%u\n", n, runnable[n]);
> +
> +		for (i = 0; i < n; i++) {
> +			igt_spin_batch_end(spin[i]);
> +			gem_sync(gem_fd, spin[i]->handle);
> +			igt_spin_batch_free(gem_fd, spin[i]);
> +		}
> +	}
> +
> +	for (i = 0; i < max_rq; i++) {
> +		if (ctx[i])
> +			gem_context_destroy(gem_fd, ctx[i]);
> +	}
> +
> +	igt_assert_eq(runnable[0], 0);

Why only checking the first & last items?
It seems that the results should be consistent? no?

> +	igt_assert(runnable[max_rq] > 0);
> +	igt_assert_eq(runnable[max_rq] - runnable[max_rq - 1], 1);
> +}
> +
> +static void
> +engine_running(int gem_fd, const struct intel_execution_engine2 *e)
> +{
> +	const unsigned long engine = e2ring(gem_fd, e);
> +	struct drm_i915_query_engine_queues queues;
> +	const unsigned int max_rq = 10;
> +	igt_spin_t *spin[max_rq + 1];
> +	uint32_t running[max_rq + 1];
> +	unsigned int n, i;
> +
> +	memset(running, 0, sizeof(running));
> +	memset(spin, 0, sizeof(spin));
> +
> +	for (n = 0; n <= max_rq; n++) {
> +		gem_quiescent_gpu(gem_fd);
> +
> +		for (i = 0; i < n; i++) {
> +			if (i == 0)
> +				spin[i] = __spin_poll(gem_fd, 0, engine);
> +			else
> +				spin[i] = __igt_spin_batch_new(gem_fd, 0,
> +							       engine, 0);
> +		}
> +
> +		if (n)
> +			__spin_wait(gem_fd, spin[0]);
> +
> +		__query_queues(gem_fd, e, &queues);
> +		running[n] = queues.running;
> +		igt_info("n=%u running=%u\n", n, running[n]);
> +
> +		for (i = 0; i < n; i++) {
> +			igt_spin_batch_end(spin[i]);
> +			gem_sync(gem_fd, spin[i]->handle);
> +			igt_spin_batch_free(gem_fd, spin[i]);
> +		}
> +	}
> +
> +	for (i = 0; i <= max_rq; i++)
> +		igt_assert_eq(running[i], i);
> +}
> +
>   igt_main
>   {
> +	const struct intel_execution_engine2 *e;
>   	int fd = -1;
>   	int devid;
>   
> @@ -524,6 +874,37 @@ igt_main
>   		test_query_topology_known_pci_ids(fd, devid);
>   	}

I guess we could add a group for the topology too.

>   
> +	igt_subtest_group {
> +		igt_fixture {
> +			igt_require(query_engine_queues_supported(fd));
> +		}
> +
> +		igt_subtest("engine-queues-invalid")
> +			engine_queues_invalid(fd);
> +
> +		for_each_engine_class_instance(fd, e) {
> +			igt_subtest_group {
> +				igt_fixture {
> +					gem_require_engine(fd,
> +							   e->class,
> +							   e->instance);
> +				}
> +
> +				igt_subtest_f("engine-queues-%s", e->name)
> +					engine_queues(fd, e);
> +
> +				igt_subtest_f("engine-queued-%s", e->name)
> +					engine_queued(fd, e);
> +
> +				igt_subtest_f("engine-runnable-%s", e->name)
> +					engine_runnable(fd, e);
> +
> +				igt_subtest_f("engine-running-%s", e->name)
> +					engine_running(fd, e);
> +			}
> +		}
> +	}
> +
>   	igt_fixture {
>   		close(fd);
>   	}
Tvrtko Ursulin March 23, 2018, 10:18 a.m. UTC | #2
On 22/03/2018 21:22, Lionel Landwerlin wrote:
> On 19/03/18 18:22, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> ...
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   tests/i915_query.c | 381 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 381 insertions(+)
>>
>> diff --git a/tests/i915_query.c b/tests/i915_query.c
>> index c7de8cbd8371..94e7a3297ebd 100644
>> --- a/tests/i915_query.c
>> +++ b/tests/i915_query.c
>> @@ -477,8 +477,358 @@ test_query_topology_known_pci_ids(int fd, int 
>> devid)
>>       free(topo_info);
>>   }
>> +#define DRM_I915_QUERY_ENGINE_QUEUES    2
>> +
>> +struct drm_i915_query_engine_queues {
>> +    /** Engine class as in enum drm_i915_gem_engine_class. */
>> +    __u16 class;
>> +
>> +    /** Engine instance number. */
>> +    __u16 instance;
>> +
>> +    /** Number of requests with unresolved fences and dependencies. */
>> +    __u32 queued;
>> +
>> +    /** Number of ready requests waiting on a slot on GPU. */
>> +    __u32 runnable;
>> +
>> +    /** Number of requests executing on the GPU. */
>> +    __u32 running;
>> +
>> +    __u32 rsvd[5];
>> +};
>> +
>> +static bool query_engine_queues_supported(int fd)
>> +{
>> +    struct drm_i915_query_item item = {
>> +        .query_id = DRM_I915_QUERY_ENGINE_QUEUES,
>> +    };
>> +
>> +    return __i915_query_items(fd, &item, 1) == 0 && item.length > 0;
>> +}
>> +
>> +static void engine_queues_invalid(int fd)
>> +{
>> +    struct drm_i915_query_engine_queues queues;
>> +    struct drm_i915_query_item item;
>> +    unsigned int len;
>> +    unsigned int i;
>> +
>> +    /* Flags is MBZ. */
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.flags = 1;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, -EINVAL);
>> +
>> +    /* Length not zero and not greater or equal required size. */
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.length = 1;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, -ENOSPC);
>> +
>> +    /* Query correct length. */
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert(item.length >= 0);
>> +    len = item.length;
>> +
>> +    /* Ivalid pointer. */
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.length = len;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, -EFAULT);
>> +
>> +    /* Reserved fields are MBZ. */
>> +
>> +    for (i = 0; i < ARRAY_SIZE(queues.rsvd); i++) {
>> +        memset(&queues, 0, sizeof(queues));
>> +        queues.rsvd[i] = 1;
>> +        memset(&item, 0, sizeof(item));
>> +        item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +        item.length = len;
>> +        item.data_ptr = to_user_pointer(&queues);
>> +        i915_query_items(fd, &item, 1);
>> +        igt_assert_eq(item.length, -EINVAL);
>> +    }
>> +
>> +    memset(&queues, 0, sizeof(queues));
>> +    queues.class = -1;
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.length = len;
>> +    item.data_ptr = to_user_pointer(&queues);
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, -ENOENT);
>> +
> 
> Looks like you've copied the few lines above after.
> It seems to be the same test.

Above is checking invalid class is rejected, below is checking invalid 
instance - if I understood correctly what you are pointing at?

>> +    memset(&queues, 0, sizeof(queues));
>> +    queues.instance = -1;
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.length = len;
>> +        item.data_ptr = to_user_pointer(&queues);
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, -ENOENT);
>> +}
>> +
>> +static void engine_queues(int fd, const struct 
>> intel_execution_engine2 *e)
>> +{
>> +    struct drm_i915_query_engine_queues queues;
>> +    struct drm_i915_query_item item;
>> +    unsigned int len;
>> +
>> +    /* Query required buffer length. */
>> +    memset(&queues, 0, sizeof(queues));
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.data_ptr = to_user_pointer(&queues);
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert(item.length >= 0);
>> +    igt_assert(item.length <= sizeof(queues));
>> +    len = item.length;
>> +
>> +    /* Check length larger than required works and reports same 
>> length. */
>> +    memset(&queues, 0, sizeof(queues));
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.data_ptr = to_user_pointer(&queues);
>> +    item.length = len + 1;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, len);
>> +
>> +    /* Actual query. */
>> +    memset(&queues, 0, sizeof(queues));
>> +    queues.class = e->class;
>> +    queues.instance = e->instance;
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.data_ptr = to_user_pointer(&queues);
>> +    item.length = len;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, len);
>> +}
>> +
>> +static unsigned int e2ring(int gem_fd, const struct 
>> intel_execution_engine2 *e)
>> +{
>> +    return gem_class_instance_to_eb_flags(gem_fd, e->class, 
>> e->instance);
>> +}
>> +
>> +static void
>> +__query_queues(int fd, const struct intel_execution_engine2 *e,
>> +           struct drm_i915_query_engine_queues *queues)
>> +{
>> +    struct drm_i915_query_item item;
>> +
>> +    memset(queues, 0, sizeof(*queues));
>> +    queues->class = e->class;
>> +    queues->instance = e->instance;
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.data_ptr = to_user_pointer(queues);
>> +    item.length = sizeof(*queues);
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, sizeof(*queues));
>> +}
>> +
>> +static void
>> +engine_queued(int gem_fd, const struct intel_execution_engine2 *e)
>> +{
>> +    const unsigned long engine = e2ring(gem_fd, e);
>> +    struct drm_i915_query_engine_queues queues;
>> +    const unsigned int max_rq = 10;
>> +    uint32_t queued[max_rq + 1];
>> +    uint32_t bo[max_rq + 1];
>> +    unsigned int n, i;
>> +
>> +    memset(queued, 0, sizeof(queued));
>> +    memset(bo, 0, sizeof(bo));
>> +
>> +    for (n = 0; n <= max_rq; n++) {
>> +        int fence = -1;
>> +        struct igt_cork cork = { .fd = fence, .type = CORK_SYNC_FD };
>> +
>> +        gem_quiescent_gpu(gem_fd);
>> +
>> +        if (n)
>> +            fence = igt_cork_plug(&cork, -1);
>> +
>> +        for (i = 0; i < n; i++) {
>> +            struct drm_i915_gem_exec_object2 obj = { };
>> +            struct drm_i915_gem_execbuffer2 eb = { };
>> +
>> +            if (!bo[i]) {
>> +                const uint32_t bbe = MI_BATCH_BUFFER_END;
>> +
>> +                bo[i] = gem_create(gem_fd, 4096);
>> +                gem_write(gem_fd, bo[i], 4092, &bbe,
>> +                      sizeof(bbe));
>> +            }
>> +
>> +            obj.handle = bo[i];
>> +
>> +            eb.buffer_count = 1;
>> +            eb.buffers_ptr = to_user_pointer(&obj);
>> +
>> +            eb.flags = engine | I915_EXEC_FENCE_IN;
>> +            eb.rsvd2 = fence;
>> +
>> +            gem_execbuf(gem_fd, &eb);
>> +        }
>> +
>> +        __query_queues(gem_fd, e, &queues);
>> +        queued[n] = queues.queued;
>> +        igt_info("n=%u queued=%u\n", n, queued[n]);
>> +
>> +        if (fence >= 0)
>> +            igt_cork_unplug(&cork);
>> +
>> +        for (i = 0; i < n; i++)
>> +            gem_sync(gem_fd, bo[i]);
>> +    }
>> +
>> +    for (i = 0; i < max_rq; i++) {
>> +        if (bo[i])
>> +            gem_close(gem_fd, bo[i]);
>> +    }
>> +
>> +    for (i = 0; i <= max_rq; i++)
>> +        igt_assert_eq(queued[i], i);
>> +}
>> +
> 
> I'm not sure to understand what the 2 function below are meant to do.
> Could you put a comment?

Yeah, will need more comments.

> __igt_spin_batch_new_poll also appear to be missing.

I think I mentioned in the commit it depends on another yet unmerged 
patch so I was sending it only for reference for now.

This particular API has little chance of being merged any time soon due 
lack of userspace. This is mostly so the product group interested in it 
can slurp the two (i915 + IGT) from the mailing list for their use.

>> +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long 
>> flags)
>> +{
>> +    if (gem_can_store_dword(fd, flags))
>> +        return __igt_spin_batch_new_poll(fd, ctx, flags);
>> +    else
>> +        return __igt_spin_batch_new(fd, ctx, flags, 0);
>> +}
>> +
>> +static unsigned long __spin_wait(int fd, igt_spin_t *spin)
>> +{
>> +    struct timespec start = { };
>> +
>> +    igt_nsec_elapsed(&start);
>> +
>> +    if (gem_can_store_dword(fd, spin->execbuf.flags)) {
>> +        unsigned long timeout = 0;
>> +
>> +        while (!spin->running) {
>> +            unsigned long t = igt_nsec_elapsed(&start);
>> +
>> +            if ((t - timeout) > 250e6) {
>> +                timeout = t;
>> +                igt_warn("Spinner not running after %.2fms\n",
>> +                     (double)t / 1e6);
>> +            }
>> +        };
>> +    } else {
>> +        igt_debug("__spin_wait - usleep mode\n");
>> +        usleep(500e3); /* Better than nothing! */
>> +    }
>> +
>> +    return igt_nsec_elapsed(&start);
>> +}
>> +
>> +static void
>> +engine_runnable(int gem_fd, const struct intel_execution_engine2 *e)
>> +{
>> +    const unsigned long engine = e2ring(gem_fd, e);
>> +    struct drm_i915_query_engine_queues queues;
>> +    const unsigned int max_rq = 10;
>> +    igt_spin_t *spin[max_rq + 1];
>> +    uint32_t runnable[max_rq + 1];
>> +    uint32_t ctx[max_rq];
>> +    unsigned int n, i;
>> +
>> +    memset(runnable, 0, sizeof(runnable));
>> +    memset(ctx, 0, sizeof(ctx));
>> +
>> +    for (n = 0; n <= max_rq; n++) {
>> +        gem_quiescent_gpu(gem_fd);
>> +
>> +        for (i = 0; i < n; i++) {
>> +            if (!ctx[i])
>> +                ctx[i] = gem_context_create(gem_fd);
>> +
>> +            if (i == 0)
>> +                spin[i] = __spin_poll(gem_fd, ctx[i], engine);
>> +            else
>> +                spin[i] = __igt_spin_batch_new(gem_fd, ctx[i],
>> +                                   engine, 0);
>> +        }
>> +
>> +        if (n)
>> +            __spin_wait(gem_fd, spin[0]);
>> +
>> +        __query_queues(gem_fd, e, &queues);
>> +        runnable[n] = queues.runnable;
>> +        igt_info("n=%u runnable=%u\n", n, runnable[n]);
>> +
>> +        for (i = 0; i < n; i++) {
>> +            igt_spin_batch_end(spin[i]);
>> +            gem_sync(gem_fd, spin[i]->handle);
>> +            igt_spin_batch_free(gem_fd, spin[i]);
>> +        }
>> +    }
>> +
>> +    for (i = 0; i < max_rq; i++) {
>> +        if (ctx[i])
>> +            gem_context_destroy(gem_fd, ctx[i]);
>> +    }
>> +
>> +    igt_assert_eq(runnable[0], 0);
> 
> Why only checking the first & last items?
> It seems that the results should be consistent? no?

Depending on the submission backend (ringbuffer/execlists/guc), and 
number of submission ports, the split between runnable and running 
counter for a given submission pattern will vary.

This series of three asserts is supposed to make it work with any 
backend and any number of ports (as long as less than ten).

I think I cannot make the assert stronger without embedding this 
knowledge in the test, but I'll have another think about it.

> 
>> +    igt_assert(runnable[max_rq] > 0);
>> +    igt_assert_eq(runnable[max_rq] - runnable[max_rq - 1], 1);
>> +}
>> +
>> +static void
>> +engine_running(int gem_fd, const struct intel_execution_engine2 *e)
>> +{
>> +    const unsigned long engine = e2ring(gem_fd, e);
>> +    struct drm_i915_query_engine_queues queues;
>> +    const unsigned int max_rq = 10;
>> +    igt_spin_t *spin[max_rq + 1];
>> +    uint32_t running[max_rq + 1];
>> +    unsigned int n, i;
>> +
>> +    memset(running, 0, sizeof(running));
>> +    memset(spin, 0, sizeof(spin));
>> +
>> +    for (n = 0; n <= max_rq; n++) {
>> +        gem_quiescent_gpu(gem_fd);
>> +
>> +        for (i = 0; i < n; i++) {
>> +            if (i == 0)
>> +                spin[i] = __spin_poll(gem_fd, 0, engine);
>> +            else
>> +                spin[i] = __igt_spin_batch_new(gem_fd, 0,
>> +                                   engine, 0);
>> +        }
>> +
>> +        if (n)
>> +            __spin_wait(gem_fd, spin[0]);
>> +
>> +        __query_queues(gem_fd, e, &queues);
>> +        running[n] = queues.running;
>> +        igt_info("n=%u running=%u\n", n, running[n]);
>> +
>> +        for (i = 0; i < n; i++) {
>> +            igt_spin_batch_end(spin[i]);
>> +            gem_sync(gem_fd, spin[i]->handle);
>> +            igt_spin_batch_free(gem_fd, spin[i]);
>> +        }
>> +    }
>> +
>> +    for (i = 0; i <= max_rq; i++)
>> +        igt_assert_eq(running[i], i);
>> +}
>> +
>>   igt_main
>>   {
>> +    const struct intel_execution_engine2 *e;
>>       int fd = -1;
>>       int devid;
>> @@ -524,6 +874,37 @@ igt_main
>>           test_query_topology_known_pci_ids(fd, devid);
>>       }
> 
> I guess we could add a group for the topology too.

My dilemma was whether to stuff tests for any query into i915_query.c, 
or split out to separate binaries.

I can imagine, if/when the number of queries grows, i915_query.c would 
become unmanageable with the former approach. What do you think?

Regards,

Tvrtko

> 
>> +    igt_subtest_group {
>> +        igt_fixture {
>> +            igt_require(query_engine_queues_supported(fd));
>> +        }
>> +
>> +        igt_subtest("engine-queues-invalid")
>> +            engine_queues_invalid(fd);
>> +
>> +        for_each_engine_class_instance(fd, e) {
>> +            igt_subtest_group {
>> +                igt_fixture {
>> +                    gem_require_engine(fd,
>> +                               e->class,
>> +                               e->instance);
>> +                }
>> +
>> +                igt_subtest_f("engine-queues-%s", e->name)
>> +                    engine_queues(fd, e);
>> +
>> +                igt_subtest_f("engine-queued-%s", e->name)
>> +                    engine_queued(fd, e);
>> +
>> +                igt_subtest_f("engine-runnable-%s", e->name)
>> +                    engine_runnable(fd, e);
>> +
>> +                igt_subtest_f("engine-running-%s", e->name)
>> +                    engine_running(fd, e);
>> +            }
>> +        }
>> +    }
>> +
>>       igt_fixture {
>>           close(fd);
>>       }
> 
> 
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
Lionel Landwerlin March 23, 2018, 10:47 a.m. UTC | #3
On 23/03/18 10:18, Tvrtko Ursulin wrote:
>
> On 22/03/2018 21:22, Lionel Landwerlin wrote:
>> On 19/03/18 18:22, Tvrtko Ursulin wrote:
>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>
>>> ...
>>>
>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>   tests/i915_query.c | 381 
>>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 381 insertions(+)
>>>
>>> diff --git a/tests/i915_query.c b/tests/i915_query.c
>>> index c7de8cbd8371..94e7a3297ebd 100644
>>> --- a/tests/i915_query.c
>>> +++ b/tests/i915_query.c
>>> @@ -477,8 +477,358 @@ test_query_topology_known_pci_ids(int fd, int 
>>> devid)
>>>       free(topo_info);
>>>   }
>>> +#define DRM_I915_QUERY_ENGINE_QUEUES    2
>>> +
>>> +struct drm_i915_query_engine_queues {
>>> +    /** Engine class as in enum drm_i915_gem_engine_class. */
>>> +    __u16 class;
>>> +
>>> +    /** Engine instance number. */
>>> +    __u16 instance;
>>> +
>>> +    /** Number of requests with unresolved fences and dependencies. */
>>> +    __u32 queued;
>>> +
>>> +    /** Number of ready requests waiting on a slot on GPU. */
>>> +    __u32 runnable;
>>> +
>>> +    /** Number of requests executing on the GPU. */
>>> +    __u32 running;
>>> +
>>> +    __u32 rsvd[5];
>>> +};
>>> +
>>> +static bool query_engine_queues_supported(int fd)
>>> +{
>>> +    struct drm_i915_query_item item = {
>>> +        .query_id = DRM_I915_QUERY_ENGINE_QUEUES,
>>> +    };
>>> +
>>> +    return __i915_query_items(fd, &item, 1) == 0 && item.length > 0;
>>> +}
>>> +
>>> +static void engine_queues_invalid(int fd)
>>> +{
>>> +    struct drm_i915_query_engine_queues queues;
>>> +    struct drm_i915_query_item item;
>>> +    unsigned int len;
>>> +    unsigned int i;
>>> +
>>> +    /* Flags is MBZ. */
>>> +    memset(&item, 0, sizeof(item));
>>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +    item.flags = 1;
>>> +    i915_query_items(fd, &item, 1);
>>> +    igt_assert_eq(item.length, -EINVAL);
>>> +
>>> +    /* Length not zero and not greater or equal required size. */
>>> +    memset(&item, 0, sizeof(item));
>>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +    item.length = 1;
>>> +    i915_query_items(fd, &item, 1);
>>> +    igt_assert_eq(item.length, -ENOSPC);
>>> +
>>> +    /* Query correct length. */
>>> +    memset(&item, 0, sizeof(item));
>>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +    i915_query_items(fd, &item, 1);
>>> +    igt_assert(item.length >= 0);
>>> +    len = item.length;
>>> +
>>> +    /* Ivalid pointer. */
>>> +    memset(&item, 0, sizeof(item));
>>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +    item.length = len;
>>> +    i915_query_items(fd, &item, 1);
>>> +    igt_assert_eq(item.length, -EFAULT);
>>> +
>>> +    /* Reserved fields are MBZ. */
>>> +
>>> +    for (i = 0; i < ARRAY_SIZE(queues.rsvd); i++) {
>>> +        memset(&queues, 0, sizeof(queues));
>>> +        queues.rsvd[i] = 1;
>>> +        memset(&item, 0, sizeof(item));
>>> +        item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +        item.length = len;
>>> +        item.data_ptr = to_user_pointer(&queues);
>>> +        i915_query_items(fd, &item, 1);
>>> +        igt_assert_eq(item.length, -EINVAL);
>>> +    }
>>> +
>>> +    memset(&queues, 0, sizeof(queues));
>>> +    queues.class = -1;
>>> +    memset(&item, 0, sizeof(item));
>>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +    item.length = len;
>>> +    item.data_ptr = to_user_pointer(&queues);
>>> +    i915_query_items(fd, &item, 1);
>>> +    igt_assert_eq(item.length, -ENOENT);
>>> +
>>
>> Looks like you've copied the few lines above after.
>> It seems to be the same test.
>
> Above is checking invalid class is rejected, below is checking invalid 
> instance - if I understood correctly what you are pointing at?

Oops, missed that.

>
>>> +    memset(&queues, 0, sizeof(queues));
>>> +    queues.instance = -1;
>>> +    memset(&item, 0, sizeof(item));
>>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +    item.length = len;
>>> +        item.data_ptr = to_user_pointer(&queues);
>>> +    i915_query_items(fd, &item, 1);
>>> +    igt_assert_eq(item.length, -ENOENT);
>>> +}
>>> +
>>> +static void engine_queues(int fd, const struct 
>>> intel_execution_engine2 *e)
>>> +{
>>> +    struct drm_i915_query_engine_queues queues;
>>> +    struct drm_i915_query_item item;
>>> +    unsigned int len;
>>> +
>>> +    /* Query required buffer length. */
>>> +    memset(&queues, 0, sizeof(queues));
>>> +    memset(&item, 0, sizeof(item));
>>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +    item.data_ptr = to_user_pointer(&queues);
>>> +    i915_query_items(fd, &item, 1);
>>> +    igt_assert(item.length >= 0);
>>> +    igt_assert(item.length <= sizeof(queues));
>>> +    len = item.length;
>>> +
>>> +    /* Check length larger than required works and reports same 
>>> length. */
>>> +    memset(&queues, 0, sizeof(queues));
>>> +    memset(&item, 0, sizeof(item));
>>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +    item.data_ptr = to_user_pointer(&queues);
>>> +    item.length = len + 1;
>>> +    i915_query_items(fd, &item, 1);
>>> +    igt_assert_eq(item.length, len);
>>> +
>>> +    /* Actual query. */
>>> +    memset(&queues, 0, sizeof(queues));
>>> +    queues.class = e->class;
>>> +    queues.instance = e->instance;
>>> +    memset(&item, 0, sizeof(item));
>>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +    item.data_ptr = to_user_pointer(&queues);
>>> +    item.length = len;
>>> +    i915_query_items(fd, &item, 1);
>>> +    igt_assert_eq(item.length, len);
>>> +}
>>> +
>>> +static unsigned int e2ring(int gem_fd, const struct 
>>> intel_execution_engine2 *e)
>>> +{
>>> +    return gem_class_instance_to_eb_flags(gem_fd, e->class, 
>>> e->instance);
>>> +}
>>> +
>>> +static void
>>> +__query_queues(int fd, const struct intel_execution_engine2 *e,
>>> +           struct drm_i915_query_engine_queues *queues)
>>> +{
>>> +    struct drm_i915_query_item item;
>>> +
>>> +    memset(queues, 0, sizeof(*queues));
>>> +    queues->class = e->class;
>>> +    queues->instance = e->instance;
>>> +    memset(&item, 0, sizeof(item));
>>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>>> +    item.data_ptr = to_user_pointer(queues);
>>> +    item.length = sizeof(*queues);
>>> +    i915_query_items(fd, &item, 1);
>>> +    igt_assert_eq(item.length, sizeof(*queues));
>>> +}
>>> +
>>> +static void
>>> +engine_queued(int gem_fd, const struct intel_execution_engine2 *e)
>>> +{
>>> +    const unsigned long engine = e2ring(gem_fd, e);
>>> +    struct drm_i915_query_engine_queues queues;
>>> +    const unsigned int max_rq = 10;
>>> +    uint32_t queued[max_rq + 1];
>>> +    uint32_t bo[max_rq + 1];
>>> +    unsigned int n, i;
>>> +
>>> +    memset(queued, 0, sizeof(queued));
>>> +    memset(bo, 0, sizeof(bo));
>>> +
>>> +    for (n = 0; n <= max_rq; n++) {
>>> +        int fence = -1;
>>> +        struct igt_cork cork = { .fd = fence, .type = CORK_SYNC_FD };
>>> +
>>> +        gem_quiescent_gpu(gem_fd);
>>> +
>>> +        if (n)
>>> +            fence = igt_cork_plug(&cork, -1);
>>> +
>>> +        for (i = 0; i < n; i++) {
>>> +            struct drm_i915_gem_exec_object2 obj = { };
>>> +            struct drm_i915_gem_execbuffer2 eb = { };
>>> +
>>> +            if (!bo[i]) {
>>> +                const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +
>>> +                bo[i] = gem_create(gem_fd, 4096);
>>> +                gem_write(gem_fd, bo[i], 4092, &bbe,
>>> +                      sizeof(bbe));
>>> +            }
>>> +
>>> +            obj.handle = bo[i];
>>> +
>>> +            eb.buffer_count = 1;
>>> +            eb.buffers_ptr = to_user_pointer(&obj);
>>> +
>>> +            eb.flags = engine | I915_EXEC_FENCE_IN;
>>> +            eb.rsvd2 = fence;
>>> +
>>> +            gem_execbuf(gem_fd, &eb);
>>> +        }
>>> +
>>> +        __query_queues(gem_fd, e, &queues);
>>> +        queued[n] = queues.queued;
>>> +        igt_info("n=%u queued=%u\n", n, queued[n]);
>>> +
>>> +        if (fence >= 0)
>>> +            igt_cork_unplug(&cork);
>>> +
>>> +        for (i = 0; i < n; i++)
>>> +            gem_sync(gem_fd, bo[i]);
>>> +    }
>>> +
>>> +    for (i = 0; i < max_rq; i++) {
>>> +        if (bo[i])
>>> +            gem_close(gem_fd, bo[i]);
>>> +    }
>>> +
>>> +    for (i = 0; i <= max_rq; i++)
>>> +        igt_assert_eq(queued[i], i);
>>> +}
>>> +
>>
>> I'm not sure to understand what the 2 function below are meant to do.
>> Could you put a comment?
>
> Yeah, will need more comments.
>
>> __igt_spin_batch_new_poll also appear to be missing.
>
> I think I mentioned in the commit it depends on another yet unmerged 
> patch so I was sending it only for reference for now.
>
> This particular API has little chance of being merged any time soon 
> due lack of userspace. This is mostly so the product group interested 
> in it can slurp the two (i915 + IGT) from the mailing list for their use.
>
>>> +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long 
>>> flags)
>>> +{
>>> +    if (gem_can_store_dword(fd, flags))
>>> +        return __igt_spin_batch_new_poll(fd, ctx, flags);
>>> +    else
>>> +        return __igt_spin_batch_new(fd, ctx, flags, 0);
>>> +}
>>> +
>>> +static unsigned long __spin_wait(int fd, igt_spin_t *spin)
>>> +{
>>> +    struct timespec start = { };
>>> +
>>> +    igt_nsec_elapsed(&start);
>>> +
>>> +    if (gem_can_store_dword(fd, spin->execbuf.flags)) {
>>> +        unsigned long timeout = 0;
>>> +
>>> +        while (!spin->running) {
>>> +            unsigned long t = igt_nsec_elapsed(&start);
>>> +
>>> +            if ((t - timeout) > 250e6) {
>>> +                timeout = t;
>>> +                igt_warn("Spinner not running after %.2fms\n",
>>> +                     (double)t / 1e6);
>>> +            }
>>> +        };
>>> +    } else {
>>> +        igt_debug("__spin_wait - usleep mode\n");
>>> +        usleep(500e3); /* Better than nothing! */
>>> +    }
>>> +
>>> +    return igt_nsec_elapsed(&start);
>>> +}
>>> +
>>> +static void
>>> +engine_runnable(int gem_fd, const struct intel_execution_engine2 *e)
>>> +{
>>> +    const unsigned long engine = e2ring(gem_fd, e);
>>> +    struct drm_i915_query_engine_queues queues;
>>> +    const unsigned int max_rq = 10;
>>> +    igt_spin_t *spin[max_rq + 1];
>>> +    uint32_t runnable[max_rq + 1];
>>> +    uint32_t ctx[max_rq];
>>> +    unsigned int n, i;
>>> +
>>> +    memset(runnable, 0, sizeof(runnable));
>>> +    memset(ctx, 0, sizeof(ctx));
>>> +
>>> +    for (n = 0; n <= max_rq; n++) {
>>> +        gem_quiescent_gpu(gem_fd);
>>> +
>>> +        for (i = 0; i < n; i++) {
>>> +            if (!ctx[i])
>>> +                ctx[i] = gem_context_create(gem_fd);
>>> +
>>> +            if (i == 0)
>>> +                spin[i] = __spin_poll(gem_fd, ctx[i], engine);
>>> +            else
>>> +                spin[i] = __igt_spin_batch_new(gem_fd, ctx[i],
>>> +                                   engine, 0);
>>> +        }
>>> +
>>> +        if (n)
>>> +            __spin_wait(gem_fd, spin[0]);
>>> +
>>> +        __query_queues(gem_fd, e, &queues);
>>> +        runnable[n] = queues.runnable;
>>> +        igt_info("n=%u runnable=%u\n", n, runnable[n]);
>>> +
>>> +        for (i = 0; i < n; i++) {
>>> +            igt_spin_batch_end(spin[i]);
>>> +            gem_sync(gem_fd, spin[i]->handle);
>>> +            igt_spin_batch_free(gem_fd, spin[i]);
>>> +        }
>>> +    }
>>> +
>>> +    for (i = 0; i < max_rq; i++) {
>>> +        if (ctx[i])
>>> +            gem_context_destroy(gem_fd, ctx[i]);
>>> +    }
>>> +
>>> +    igt_assert_eq(runnable[0], 0);
>>
>> Why only checking the first & last items?
>> It seems that the results should be consistent? no?
>
> Depending on the submission backend (ringbuffer/execlists/guc), and 
> number of submission ports, the split between runnable and running 
> counter for a given submission pattern will vary.
>
> This series of three asserts is supposed to make it work with any 
> backend and any number of ports (as long as less than ten).
>
> I think I cannot make the assert stronger without embedding this 
> knowledge in the test, but I'll have another think about it.

Thanks, maybe some comments here would be a good idea.

Is there any verification you can make on runnable + running?

>
>>
>>> +    igt_assert(runnable[max_rq] > 0);
>>> +    igt_assert_eq(runnable[max_rq] - runnable[max_rq - 1], 1);
>>> +}
>>> +
>>> +static void
>>> +engine_running(int gem_fd, const struct intel_execution_engine2 *e)
>>> +{
>>> +    const unsigned long engine = e2ring(gem_fd, e);
>>> +    struct drm_i915_query_engine_queues queues;
>>> +    const unsigned int max_rq = 10;
>>> +    igt_spin_t *spin[max_rq + 1];
>>> +    uint32_t running[max_rq + 1];
>>> +    unsigned int n, i;
>>> +
>>> +    memset(running, 0, sizeof(running));
>>> +    memset(spin, 0, sizeof(spin));
>>> +
>>> +    for (n = 0; n <= max_rq; n++) {
>>> +        gem_quiescent_gpu(gem_fd);
>>> +
>>> +        for (i = 0; i < n; i++) {
>>> +            if (i == 0)
>>> +                spin[i] = __spin_poll(gem_fd, 0, engine);
>>> +            else
>>> +                spin[i] = __igt_spin_batch_new(gem_fd, 0,
>>> +                                   engine, 0);
>>> +        }
>>> +
>>> +        if (n)
>>> +            __spin_wait(gem_fd, spin[0]);
>>> +
>>> +        __query_queues(gem_fd, e, &queues);
>>> +        running[n] = queues.running;
>>> +        igt_info("n=%u running=%u\n", n, running[n]);
>>> +
>>> +        for (i = 0; i < n; i++) {
>>> +            igt_spin_batch_end(spin[i]);
>>> +            gem_sync(gem_fd, spin[i]->handle);
>>> +            igt_spin_batch_free(gem_fd, spin[i]);
>>> +        }
>>> +    }
>>> +
>>> +    for (i = 0; i <= max_rq; i++)
>>> +        igt_assert_eq(running[i], i);
>>> +}
>>> +
>>>   igt_main
>>>   {
>>> +    const struct intel_execution_engine2 *e;
>>>       int fd = -1;
>>>       int devid;
>>> @@ -524,6 +874,37 @@ igt_main
>>>           test_query_topology_known_pci_ids(fd, devid);
>>>       }
>>
>> I guess we could add a group for the topology too.
>
> My dilemma was whether to stuff tests for any query into i915_query.c, 
> or split out to separate binaries.
>
> I can imagine, if/when the number of queries grows, i915_query.c would 
> become unmanageable with the former approach. What do you think?

It was just a suggestion, no strong feeling either way.

>
> Regards,
>
> Tvrtko
>
>>
>>> +    igt_subtest_group {
>>> +        igt_fixture {
>>> +            igt_require(query_engine_queues_supported(fd));
>>> +        }
>>> +
>>> +        igt_subtest("engine-queues-invalid")
>>> +            engine_queues_invalid(fd);
>>> +
>>> +        for_each_engine_class_instance(fd, e) {
>>> +            igt_subtest_group {
>>> +                igt_fixture {
>>> +                    gem_require_engine(fd,
>>> +                               e->class,
>>> +                               e->instance);
>>> +                }
>>> +
>>> +                igt_subtest_f("engine-queues-%s", e->name)
>>> +                    engine_queues(fd, e);
>>> +
>>> +                igt_subtest_f("engine-queued-%s", e->name)
>>> +                    engine_queued(fd, e);
>>> +
>>> +                igt_subtest_f("engine-runnable-%s", e->name)
>>> +                    engine_runnable(fd, e);
>>> +
>>> +                igt_subtest_f("engine-running-%s", e->name)
>>> +                    engine_running(fd, e);
>>> +            }
>>> +        }
>>> +    }
>>> +
>>>       igt_fixture {
>>>           close(fd);
>>>       }
>>
>>
>> _______________________________________________
>> igt-dev mailing list
>> igt-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
>
Chris Wilson March 23, 2018, 10:53 a.m. UTC | #4
Quoting Tvrtko Ursulin (2018-03-23 10:18:37)
> 
> On 22/03/2018 21:22, Lionel Landwerlin wrote:
> > I guess we could add a group for the topology too.
> 
> My dilemma was whether to stuff tests for any query into i915_query.c, 
> or split out to separate binaries.
> 
> I can imagine, if/when the number of queries grows, i915_query.c would 
> become unmanageable with the former approach. What do you think?

#tags ;)
-Chris
diff mbox

Patch

diff --git a/tests/i915_query.c b/tests/i915_query.c
index c7de8cbd8371..94e7a3297ebd 100644
--- a/tests/i915_query.c
+++ b/tests/i915_query.c
@@ -477,8 +477,358 @@  test_query_topology_known_pci_ids(int fd, int devid)
 	free(topo_info);
 }
 
+#define DRM_I915_QUERY_ENGINE_QUEUES	2
+
+struct drm_i915_query_engine_queues {
+	/** Engine class as in enum drm_i915_gem_engine_class. */
+	__u16 class;
+
+	/** Engine instance number. */
+	__u16 instance;
+
+	/** Number of requests with unresolved fences and dependencies. */
+	__u32 queued;
+
+	/** Number of ready requests waiting on a slot on GPU. */
+	__u32 runnable;
+
+	/** Number of requests executing on the GPU. */
+	__u32 running;
+
+	__u32 rsvd[5];
+};
+
+static bool query_engine_queues_supported(int fd)
+{
+	struct drm_i915_query_item item = {
+		.query_id = DRM_I915_QUERY_ENGINE_QUEUES,
+	};
+
+	return __i915_query_items(fd, &item, 1) == 0 && item.length > 0;
+}
+
+static void engine_queues_invalid(int fd)
+{
+	struct drm_i915_query_engine_queues queues;
+	struct drm_i915_query_item item;
+	unsigned int len;
+	unsigned int i;
+
+	/* Flags is MBZ. */
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+	item.flags = 1;
+	i915_query_items(fd, &item, 1);
+	igt_assert_eq(item.length, -EINVAL);
+
+	/* Length not zero and not greater or equal required size. */
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+	item.length = 1;
+	i915_query_items(fd, &item, 1);
+	igt_assert_eq(item.length, -ENOSPC);
+
+	/* Query correct length. */
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+	i915_query_items(fd, &item, 1);
+	igt_assert(item.length >= 0);
+	len = item.length;
+
+	/* Ivalid pointer. */
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+	item.length = len;
+	i915_query_items(fd, &item, 1);
+	igt_assert_eq(item.length, -EFAULT);
+
+	/* Reserved fields are MBZ. */
+
+	for (i = 0; i < ARRAY_SIZE(queues.rsvd); i++) {
+		memset(&queues, 0, sizeof(queues));
+		queues.rsvd[i] = 1;
+		memset(&item, 0, sizeof(item));
+		item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+		item.length = len;
+		item.data_ptr = to_user_pointer(&queues);
+		i915_query_items(fd, &item, 1);
+		igt_assert_eq(item.length, -EINVAL);
+	}
+
+	memset(&queues, 0, sizeof(queues));
+	queues.class = -1;
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+	item.length = len;
+	item.data_ptr = to_user_pointer(&queues);
+	i915_query_items(fd, &item, 1);
+	igt_assert_eq(item.length, -ENOENT);
+
+	memset(&queues, 0, sizeof(queues));
+	queues.instance = -1;
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+	item.length = len;
+		item.data_ptr = to_user_pointer(&queues);
+	i915_query_items(fd, &item, 1);
+	igt_assert_eq(item.length, -ENOENT);
+}
+
+static void engine_queues(int fd, const struct intel_execution_engine2 *e)
+{
+	struct drm_i915_query_engine_queues queues;
+	struct drm_i915_query_item item;
+	unsigned int len;
+
+	/* Query required buffer length. */
+	memset(&queues, 0, sizeof(queues));
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+	item.data_ptr = to_user_pointer(&queues);
+	i915_query_items(fd, &item, 1);
+	igt_assert(item.length >= 0);
+	igt_assert(item.length <= sizeof(queues));
+	len = item.length;
+
+	/* Check length larger than required works and reports same length. */
+	memset(&queues, 0, sizeof(queues));
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+	item.data_ptr = to_user_pointer(&queues);
+	item.length = len + 1;
+	i915_query_items(fd, &item, 1);
+	igt_assert_eq(item.length, len);
+
+	/* Actual query. */
+	memset(&queues, 0, sizeof(queues));
+	queues.class = e->class;
+	queues.instance = e->instance;
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+	item.data_ptr = to_user_pointer(&queues);
+	item.length = len;
+	i915_query_items(fd, &item, 1);
+	igt_assert_eq(item.length, len);
+}
+
+static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
+{
+	return gem_class_instance_to_eb_flags(gem_fd, e->class, e->instance);
+}
+
+static void
+__query_queues(int fd, const struct intel_execution_engine2 *e,
+	       struct drm_i915_query_engine_queues *queues)
+{
+	struct drm_i915_query_item item;
+
+	memset(queues, 0, sizeof(*queues));
+	queues->class = e->class;
+	queues->instance = e->instance;
+	memset(&item, 0, sizeof(item));
+	item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
+	item.data_ptr = to_user_pointer(queues);
+	item.length = sizeof(*queues);
+	i915_query_items(fd, &item, 1);
+	igt_assert_eq(item.length, sizeof(*queues));
+}
+
+static void
+engine_queued(int gem_fd, const struct intel_execution_engine2 *e)
+{
+	const unsigned long engine = e2ring(gem_fd, e);
+	struct drm_i915_query_engine_queues queues;
+	const unsigned int max_rq = 10;
+	uint32_t queued[max_rq + 1];
+	uint32_t bo[max_rq + 1];
+	unsigned int n, i;
+
+	memset(queued, 0, sizeof(queued));
+	memset(bo, 0, sizeof(bo));
+
+	for (n = 0; n <= max_rq; n++) {
+		int fence = -1;
+		struct igt_cork cork = { .fd = fence, .type = CORK_SYNC_FD };
+
+		gem_quiescent_gpu(gem_fd);
+
+		if (n)
+			fence = igt_cork_plug(&cork, -1);
+
+		for (i = 0; i < n; i++) {
+			struct drm_i915_gem_exec_object2 obj = { };
+			struct drm_i915_gem_execbuffer2 eb = { };
+
+			if (!bo[i]) {
+				const uint32_t bbe = MI_BATCH_BUFFER_END;
+
+				bo[i] = gem_create(gem_fd, 4096);
+				gem_write(gem_fd, bo[i], 4092, &bbe,
+					  sizeof(bbe));
+			}
+
+			obj.handle = bo[i];
+
+			eb.buffer_count = 1;
+			eb.buffers_ptr = to_user_pointer(&obj);
+
+			eb.flags = engine | I915_EXEC_FENCE_IN;
+			eb.rsvd2 = fence;
+
+			gem_execbuf(gem_fd, &eb);
+		}
+
+		__query_queues(gem_fd, e, &queues);
+		queued[n] = queues.queued;
+		igt_info("n=%u queued=%u\n", n, queued[n]);
+
+		if (fence >= 0)
+			igt_cork_unplug(&cork);
+
+		for (i = 0; i < n; i++)
+			gem_sync(gem_fd, bo[i]);
+	}
+
+	for (i = 0; i < max_rq; i++) {
+		if (bo[i])
+			gem_close(gem_fd, bo[i]);
+	}
+
+	for (i = 0; i <= max_rq; i++)
+		igt_assert_eq(queued[i], i);
+}
+
+static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags)
+{
+	if (gem_can_store_dword(fd, flags))
+		return __igt_spin_batch_new_poll(fd, ctx, flags);
+	else
+		return __igt_spin_batch_new(fd, ctx, flags, 0);
+}
+
+static unsigned long __spin_wait(int fd, igt_spin_t *spin)
+{
+	struct timespec start = { };
+
+	igt_nsec_elapsed(&start);
+
+	if (gem_can_store_dword(fd, spin->execbuf.flags)) {
+		unsigned long timeout = 0;
+
+		while (!spin->running) {
+			unsigned long t = igt_nsec_elapsed(&start);
+
+			if ((t - timeout) > 250e6) {
+				timeout = t;
+				igt_warn("Spinner not running after %.2fms\n",
+					 (double)t / 1e6);
+			}
+		};
+	} else {
+		igt_debug("__spin_wait - usleep mode\n");
+		usleep(500e3); /* Better than nothing! */
+	}
+
+	return igt_nsec_elapsed(&start);
+}
+
+static void
+engine_runnable(int gem_fd, const struct intel_execution_engine2 *e)
+{
+	const unsigned long engine = e2ring(gem_fd, e);
+	struct drm_i915_query_engine_queues queues;
+	const unsigned int max_rq = 10;
+	igt_spin_t *spin[max_rq + 1];
+	uint32_t runnable[max_rq + 1];
+	uint32_t ctx[max_rq];
+	unsigned int n, i;
+
+	memset(runnable, 0, sizeof(runnable));
+	memset(ctx, 0, sizeof(ctx));
+
+	for (n = 0; n <= max_rq; n++) {
+		gem_quiescent_gpu(gem_fd);
+
+		for (i = 0; i < n; i++) {
+			if (!ctx[i])
+				ctx[i] = gem_context_create(gem_fd);
+
+			if (i == 0)
+				spin[i] = __spin_poll(gem_fd, ctx[i], engine);
+			else
+				spin[i] = __igt_spin_batch_new(gem_fd, ctx[i],
+							       engine, 0);
+		}
+
+		if (n)
+			__spin_wait(gem_fd, spin[0]);
+
+		__query_queues(gem_fd, e, &queues);
+		runnable[n] = queues.runnable;
+		igt_info("n=%u runnable=%u\n", n, runnable[n]);
+
+		for (i = 0; i < n; i++) {
+			igt_spin_batch_end(spin[i]);
+			gem_sync(gem_fd, spin[i]->handle);
+			igt_spin_batch_free(gem_fd, spin[i]);
+		}
+	}
+
+	for (i = 0; i < max_rq; i++) {
+		if (ctx[i])
+			gem_context_destroy(gem_fd, ctx[i]);
+	}
+
+	igt_assert_eq(runnable[0], 0);
+	igt_assert(runnable[max_rq] > 0);
+	igt_assert_eq(runnable[max_rq] - runnable[max_rq - 1], 1);
+}
+
+static void
+engine_running(int gem_fd, const struct intel_execution_engine2 *e)
+{
+	const unsigned long engine = e2ring(gem_fd, e);
+	struct drm_i915_query_engine_queues queues;
+	const unsigned int max_rq = 10;
+	igt_spin_t *spin[max_rq + 1];
+	uint32_t running[max_rq + 1];
+	unsigned int n, i;
+
+	memset(running, 0, sizeof(running));
+	memset(spin, 0, sizeof(spin));
+
+	for (n = 0; n <= max_rq; n++) {
+		gem_quiescent_gpu(gem_fd);
+
+		for (i = 0; i < n; i++) {
+			if (i == 0)
+				spin[i] = __spin_poll(gem_fd, 0, engine);
+			else
+				spin[i] = __igt_spin_batch_new(gem_fd, 0,
+							       engine, 0);
+		}
+
+		if (n)
+			__spin_wait(gem_fd, spin[0]);
+
+		__query_queues(gem_fd, e, &queues);
+		running[n] = queues.running;
+		igt_info("n=%u running=%u\n", n, running[n]);
+
+		for (i = 0; i < n; i++) {
+			igt_spin_batch_end(spin[i]);
+			gem_sync(gem_fd, spin[i]->handle);
+			igt_spin_batch_free(gem_fd, spin[i]);
+		}
+	}
+
+	for (i = 0; i <= max_rq; i++)
+		igt_assert_eq(running[i], i);
+}
+
 igt_main
 {
+	const struct intel_execution_engine2 *e;
 	int fd = -1;
 	int devid;
 
@@ -524,6 +874,37 @@  igt_main
 		test_query_topology_known_pci_ids(fd, devid);
 	}
 
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(query_engine_queues_supported(fd));
+		}
+
+		igt_subtest("engine-queues-invalid")
+			engine_queues_invalid(fd);
+
+		for_each_engine_class_instance(fd, e) {
+			igt_subtest_group {
+				igt_fixture {
+					gem_require_engine(fd,
+							   e->class,
+							   e->instance);
+				}
+
+				igt_subtest_f("engine-queues-%s", e->name)
+					engine_queues(fd, e);
+
+				igt_subtest_f("engine-queued-%s", e->name)
+					engine_queued(fd, e);
+
+				igt_subtest_f("engine-runnable-%s", e->name)
+					engine_runnable(fd, e);
+
+				igt_subtest_f("engine-running-%s", e->name)
+					engine_running(fd, e);
+			}
+		}
+	}
+
 	igt_fixture {
 		close(fd);
 	}