diff mbox series

[i-g-t,06/24] i915/gem_exec_nop: poll-sequential requires ordering between rings

Message ID 20190322092155.1656-6-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [i-g-t,01/24] i915/gem_exec_latency: Measure the latency of context switching | expand

Commit Message

Chris Wilson March 22, 2019, 9:21 a.m. UTC
In order to correctly serialise the order of execution between rings, we
need to flag the scratch address as being written. Make it so.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_nop.c | 152 +++++++++++++++++++++++++++++++++-----
 1 file changed, 133 insertions(+), 19 deletions(-)

Comments

Tvrtko Ursulin March 26, 2019, 9:38 a.m. UTC | #1
On 22/03/2019 09:21, Chris Wilson wrote:
> In order to correctly serialise the order of execution between rings, we
> need to flag the scratch address as being written. Make it so.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_nop.c | 152 +++++++++++++++++++++++++++++++++-----
>   1 file changed, 133 insertions(+), 19 deletions(-)
> 
> diff --git a/tests/i915/gem_exec_nop.c b/tests/i915/gem_exec_nop.c
> index 59a08ad08..b91b4d0f6 100644
> --- a/tests/i915/gem_exec_nop.c
> +++ b/tests/i915/gem_exec_nop.c
> @@ -104,7 +104,7 @@ static double nop_on_ring(int fd, uint32_t handle, unsigned ring_id,
>   	return elapsed(&start, &now);
>   }
>   
> -static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
> +static void poll_ring(int fd, unsigned engine, const char *name, int timeout)
>   {
>   	const int gen = intel_gen(intel_get_drm_devid(fd));
>   	const uint32_t MI_ARB_CHK = 0x5 << 23;
> @@ -112,29 +112,17 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
>   	struct drm_i915_gem_exec_object2 obj;
>   	struct drm_i915_gem_relocation_entry reloc[4], *r;
>   	uint32_t *bbe[2], *state, *batch;
> -	unsigned engines[16], nengine, flags;
>   	struct timespec tv = {};
>   	unsigned long cycles;
> +	unsigned flags;
>   	uint64_t elapsed;
>   
>   	flags = I915_EXEC_NO_RELOC;
>   	if (gen == 4 || gen == 5)
>   		flags |= I915_EXEC_SECURE;
>   
> -	nengine = 0;
> -	if (ring == ALL_ENGINES) {
> -		for_each_physical_engine(fd, ring) {
> -			if (!gem_can_store_dword(fd, ring))
> -				continue;
> -
> -			engines[nengine++] = ring;
> -		}
> -	} else {
> -		gem_require_ring(fd, ring);
> -		igt_require(gem_can_store_dword(fd, ring));
> -		engines[nengine++] = ring;
> -	}
> -	igt_require(nengine);
> +	gem_require_ring(fd, engine);
> +	igt_require(gem_can_store_dword(fd, engine));
>   
>   	memset(&obj, 0, sizeof(obj));
>   	obj.handle = gem_create(fd, 4096);
> @@ -198,7 +186,7 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
>   	memset(&execbuf, 0, sizeof(execbuf));
>   	execbuf.buffers_ptr = to_user_pointer(&obj);
>   	execbuf.buffer_count = 1;
> -	execbuf.flags = engines[0];
> +	execbuf.flags = engine | flags;
>   
>   	cycles = 0;
>   	do {
> @@ -208,7 +196,6 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
>   		execbuf.batch_start_offset =
>   			(bbe[idx] - batch) * sizeof(*batch) - 64;
>   
> -		execbuf.flags = engines[cycles % nengine] | flags;
>   		gem_execbuf(fd, &execbuf);
>   
>   		*bbe[!idx] = MI_BATCH_BUFFER_END;
> @@ -227,6 +214,133 @@ static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
>   	gem_close(fd, obj.handle);
>   }
>   
> +static void poll_sequential(int fd, const char *name, int timeout)
> +{
> +	const int gen = intel_gen(intel_get_drm_devid(fd));
> +	const uint32_t MI_ARB_CHK = 0x5 << 23;
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	struct drm_i915_gem_exec_object2 obj[2];
> +	struct drm_i915_gem_relocation_entry reloc[4], *r;
> +	uint32_t *bbe[2], *state, *batch;
> +	unsigned engines[16], nengine, engine, flags;
> +	struct timespec tv = {};
> +	unsigned long cycles;
> +	uint64_t elapsed;
> +	bool cached;
> +
> +	flags = I915_EXEC_NO_RELOC;
> +	if (gen == 4 || gen == 5)
> +		flags |= I915_EXEC_SECURE;
> +
> +	nengine = 0;
> +	for_each_physical_engine(fd, engine) {
> +		if (!gem_can_store_dword(fd, engine))
> +			continue;
> +
> +		engines[nengine++] = engine;
> +	}
> +	igt_require(nengine);
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = gem_create(fd, 4096);
> +	obj[0].flags = EXEC_OBJECT_WRITE;
> +	cached = __gem_set_caching(fd, obj[0].handle, 1) == 0;
> +	obj[1].handle = gem_create(fd, 4096);
> +	obj[1].relocs_ptr = to_user_pointer(reloc);
> +	obj[1].relocation_count = ARRAY_SIZE(reloc);
> +
> +	r = memset(reloc, 0, sizeof(reloc));
> +	batch = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_WRITE);
> +
> +	for (unsigned int start_offset = 0;
> +	     start_offset <= 128;
> +	     start_offset += 128) {
> +		uint32_t *b = batch + start_offset / sizeof(*batch);
> +
> +		r->target_handle = obj[0].handle;
> +		r->offset = (b - batch + 1) * sizeof(uint32_t);
> +		r->delta = 0;
> +		r->read_domains = I915_GEM_DOMAIN_RENDER;
> +		r->write_domain = I915_GEM_DOMAIN_RENDER;
> +
> +		*b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
> +		if (gen >= 8) {
> +			*++b = r->delta;
> +			*++b = 0;
> +		} else if (gen >= 4) {
> +			r->offset += sizeof(uint32_t);
> +			*++b = 0;
> +			*++b = r->delta;
> +		} else {
> +			*b -= 1;
> +			*++b = r->delta;
> +		}
> +		*++b = start_offset != 0;
> +		r++;
> +
> +		b = batch + (start_offset + 64) / sizeof(*batch);
> +		bbe[start_offset != 0] = b;
> +		*b++ = MI_ARB_CHK;
> +
> +		r->target_handle = obj[1].handle;
> +		r->offset = (b - batch + 1) * sizeof(uint32_t);
> +		r->read_domains = I915_GEM_DOMAIN_COMMAND;
> +		r->delta = start_offset + 64;
> +		if (gen >= 8) {
> +			*b++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
> +			*b++ = r->delta;
> +			*b++ = 0;
> +		} else if (gen >= 6) {
> +			*b++ = MI_BATCH_BUFFER_START | 1 << 8;
> +			*b++ = r->delta;
> +		} else {
> +			*b++ = MI_BATCH_BUFFER_START | 2 << 6;
> +			if (gen < 4)
> +				r->delta |= 1;
> +			*b++ = r->delta;
> +		}
> +		r++;
> +	}
> +	igt_assert(r == reloc + ARRAY_SIZE(reloc));
> +
> +	if (cached)
> +		state = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_READ);
> +	else
> +		state = gem_mmap__wc(fd, obj[0].handle, 0, 4096, PROT_READ);
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = ARRAY_SIZE(obj);
> +
> +	cycles = 0;
> +	do {
> +		unsigned int idx = ++cycles & 1;
> +
> +		*bbe[idx] = MI_ARB_CHK;
> +		execbuf.batch_start_offset =
> +			(bbe[idx] - batch) * sizeof(*batch) - 64;
> +
> +		execbuf.flags = engines[cycles % nengine] | flags;
> +		gem_execbuf(fd, &execbuf);
> +
> +		*bbe[!idx] = MI_BATCH_BUFFER_END;
> +		__sync_synchronize();
> +
> +		while (READ_ONCE(*state) != idx)
> +			;
> +	} while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout);
> +	*bbe[cycles & 1] = MI_BATCH_BUFFER_END;
> +	gem_sync(fd, obj[1].handle);
> +
> +	igt_info("%s completed %ld cycles: %.3f us\n",
> +		 name, cycles, elapsed*1e-3/cycles);
> +
> +	munmap(state, 4096);
> +	munmap(batch, 4096);
> +	gem_close(fd, obj[1].handle);
> +	gem_close(fd, obj[0].handle);
> +}
> +
>   static void single(int fd, uint32_t handle,
>   		   unsigned ring_id, const char *ring_name)
>   {
> @@ -813,7 +927,7 @@ igt_main
>   		}
>   
>   		igt_subtest("poll-sequential")
> -			poll_ring(device, ALL_ENGINES, "Sequential", 20);
> +			poll_sequential(device, "Sequential", 20);
>   
>   		igt_subtest("headless") {
>   			/* Requires master for changing display modes */
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/tests/i915/gem_exec_nop.c b/tests/i915/gem_exec_nop.c
index 59a08ad08..b91b4d0f6 100644
--- a/tests/i915/gem_exec_nop.c
+++ b/tests/i915/gem_exec_nop.c
@@ -104,7 +104,7 @@  static double nop_on_ring(int fd, uint32_t handle, unsigned ring_id,
 	return elapsed(&start, &now);
 }
 
-static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
+static void poll_ring(int fd, unsigned engine, const char *name, int timeout)
 {
 	const int gen = intel_gen(intel_get_drm_devid(fd));
 	const uint32_t MI_ARB_CHK = 0x5 << 23;
@@ -112,29 +112,17 @@  static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
 	struct drm_i915_gem_exec_object2 obj;
 	struct drm_i915_gem_relocation_entry reloc[4], *r;
 	uint32_t *bbe[2], *state, *batch;
-	unsigned engines[16], nengine, flags;
 	struct timespec tv = {};
 	unsigned long cycles;
+	unsigned flags;
 	uint64_t elapsed;
 
 	flags = I915_EXEC_NO_RELOC;
 	if (gen == 4 || gen == 5)
 		flags |= I915_EXEC_SECURE;
 
-	nengine = 0;
-	if (ring == ALL_ENGINES) {
-		for_each_physical_engine(fd, ring) {
-			if (!gem_can_store_dword(fd, ring))
-				continue;
-
-			engines[nengine++] = ring;
-		}
-	} else {
-		gem_require_ring(fd, ring);
-		igt_require(gem_can_store_dword(fd, ring));
-		engines[nengine++] = ring;
-	}
-	igt_require(nengine);
+	gem_require_ring(fd, engine);
+	igt_require(gem_can_store_dword(fd, engine));
 
 	memset(&obj, 0, sizeof(obj));
 	obj.handle = gem_create(fd, 4096);
@@ -198,7 +186,7 @@  static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
 	memset(&execbuf, 0, sizeof(execbuf));
 	execbuf.buffers_ptr = to_user_pointer(&obj);
 	execbuf.buffer_count = 1;
-	execbuf.flags = engines[0];
+	execbuf.flags = engine | flags;
 
 	cycles = 0;
 	do {
@@ -208,7 +196,6 @@  static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
 		execbuf.batch_start_offset =
 			(bbe[idx] - batch) * sizeof(*batch) - 64;
 
-		execbuf.flags = engines[cycles % nengine] | flags;
 		gem_execbuf(fd, &execbuf);
 
 		*bbe[!idx] = MI_BATCH_BUFFER_END;
@@ -227,6 +214,133 @@  static void poll_ring(int fd, unsigned ring, const char *name, int timeout)
 	gem_close(fd, obj.handle);
 }
 
+static void poll_sequential(int fd, const char *name, int timeout)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const uint32_t MI_ARB_CHK = 0x5 << 23;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_relocation_entry reloc[4], *r;
+	uint32_t *bbe[2], *state, *batch;
+	unsigned engines[16], nengine, engine, flags;
+	struct timespec tv = {};
+	unsigned long cycles;
+	uint64_t elapsed;
+	bool cached;
+
+	flags = I915_EXEC_NO_RELOC;
+	if (gen == 4 || gen == 5)
+		flags |= I915_EXEC_SECURE;
+
+	nengine = 0;
+	for_each_physical_engine(fd, engine) {
+		if (!gem_can_store_dword(fd, engine))
+			continue;
+
+		engines[nengine++] = engine;
+	}
+	igt_require(nengine);
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(fd, 4096);
+	obj[0].flags = EXEC_OBJECT_WRITE;
+	cached = __gem_set_caching(fd, obj[0].handle, 1) == 0;
+	obj[1].handle = gem_create(fd, 4096);
+	obj[1].relocs_ptr = to_user_pointer(reloc);
+	obj[1].relocation_count = ARRAY_SIZE(reloc);
+
+	r = memset(reloc, 0, sizeof(reloc));
+	batch = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_WRITE);
+
+	for (unsigned int start_offset = 0;
+	     start_offset <= 128;
+	     start_offset += 128) {
+		uint32_t *b = batch + start_offset / sizeof(*batch);
+
+		r->target_handle = obj[0].handle;
+		r->offset = (b - batch + 1) * sizeof(uint32_t);
+		r->delta = 0;
+		r->read_domains = I915_GEM_DOMAIN_RENDER;
+		r->write_domain = I915_GEM_DOMAIN_RENDER;
+
+		*b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+		if (gen >= 8) {
+			*++b = r->delta;
+			*++b = 0;
+		} else if (gen >= 4) {
+			r->offset += sizeof(uint32_t);
+			*++b = 0;
+			*++b = r->delta;
+		} else {
+			*b -= 1;
+			*++b = r->delta;
+		}
+		*++b = start_offset != 0;
+		r++;
+
+		b = batch + (start_offset + 64) / sizeof(*batch);
+		bbe[start_offset != 0] = b;
+		*b++ = MI_ARB_CHK;
+
+		r->target_handle = obj[1].handle;
+		r->offset = (b - batch + 1) * sizeof(uint32_t);
+		r->read_domains = I915_GEM_DOMAIN_COMMAND;
+		r->delta = start_offset + 64;
+		if (gen >= 8) {
+			*b++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
+			*b++ = r->delta;
+			*b++ = 0;
+		} else if (gen >= 6) {
+			*b++ = MI_BATCH_BUFFER_START | 1 << 8;
+			*b++ = r->delta;
+		} else {
+			*b++ = MI_BATCH_BUFFER_START | 2 << 6;
+			if (gen < 4)
+				r->delta |= 1;
+			*b++ = r->delta;
+		}
+		r++;
+	}
+	igt_assert(r == reloc + ARRAY_SIZE(reloc));
+
+	if (cached)
+		state = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_READ);
+	else
+		state = gem_mmap__wc(fd, obj[0].handle, 0, 4096, PROT_READ);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+
+	cycles = 0;
+	do {
+		unsigned int idx = ++cycles & 1;
+
+		*bbe[idx] = MI_ARB_CHK;
+		execbuf.batch_start_offset =
+			(bbe[idx] - batch) * sizeof(*batch) - 64;
+
+		execbuf.flags = engines[cycles % nengine] | flags;
+		gem_execbuf(fd, &execbuf);
+
+		*bbe[!idx] = MI_BATCH_BUFFER_END;
+		__sync_synchronize();
+
+		while (READ_ONCE(*state) != idx)
+			;
+	} while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout);
+	*bbe[cycles & 1] = MI_BATCH_BUFFER_END;
+	gem_sync(fd, obj[1].handle);
+
+	igt_info("%s completed %ld cycles: %.3f us\n",
+		 name, cycles, elapsed*1e-3/cycles);
+
+	munmap(state, 4096);
+	munmap(batch, 4096);
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[0].handle);
+}
+
 static void single(int fd, uint32_t handle,
 		   unsigned ring_id, const char *ring_name)
 {
@@ -813,7 +927,7 @@  igt_main
 		}
 
 		igt_subtest("poll-sequential")
-			poll_ring(device, ALL_ENGINES, "Sequential", 20);
+			poll_sequential(device, "Sequential", 20);
 
 		igt_subtest("headless") {
 			/* Requires master for changing display modes */