diff mbox

[i-g-t,1/3] benchmarks/gem_syslatency: Pass a write hazard around

Message ID 20180522110044.26439-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 22, 2018, 11 a.m. UTC
Extend the i915 load to (optionally) pass a write hazard between
engines, causing us to wait on the interrupt between engines. Thus
adding MI_USER_INTERRUPT irq handling to our list of sins.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 benchmarks/gem_syslatency.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

Comments

Mika Kuoppala May 22, 2018, 11:24 a.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> Extend the i915 load to (optionally) pass a write hazard between
> engines, causing us to wait on the interrupt between engines. Thus
> adding MI_USER_INTERRUPT irq handling to our list of sins.


Is it the eb_move_to_gpu waiting then for the object
due to write?

..and this then arming the interrupts later down the
chain?

-Mika

>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  benchmarks/gem_syslatency.c | 28 ++++++++++++++++++----------
>  1 file changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c
> index de59eaf82..9160e2199 100644
> --- a/benchmarks/gem_syslatency.c
> +++ b/benchmarks/gem_syslatency.c
> @@ -53,6 +53,7 @@ struct gem_busyspin {
>  	pthread_t thread;
>  	unsigned long count;
>  	bool leak;
> +	bool interrupts;
>  };
>  
>  struct sys_wait {
> @@ -94,7 +95,7 @@ static void *gem_busyspin(void *arg)
>  	const uint32_t bbe = MI_BATCH_BUFFER_END;
>  	struct gem_busyspin *bs = arg;
>  	struct drm_i915_gem_execbuffer2 execbuf;
> -	struct drm_i915_gem_exec_object2 obj;
> +	struct drm_i915_gem_exec_object2 obj[2];
>  	const unsigned sz = bs->leak ? 16 << 20 : 4 << 10;
>  	unsigned engines[16];
>  	unsigned nengine;
> @@ -107,13 +108,15 @@ static void *gem_busyspin(void *arg)
>  	for_each_engine(fd, engine)
>  		if (!ignore_engine(fd, engine)) engines[nengine++] = engine;
>  
> -	memset(&obj, 0, sizeof(obj));
> -	obj.handle = gem_create(fd, sz);
> -	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = gem_create(fd, 4096);
> +	obj[0].flags = EXEC_OBJECT_WRITE;
> +	obj[1].handle = gem_create(fd, sz);
> +	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
>  
>  	memset(&execbuf, 0, sizeof(execbuf));
> -	execbuf.buffers_ptr = (uintptr_t)&obj;
> -	execbuf.buffer_count = 1;
> +	execbuf.buffers_ptr = (uintptr_t)(obj + !bs->interrupts);
> +	execbuf.buffer_count = 1 + !!bs->interrupts;
>  	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
>  	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
>  	if (__gem_execbuf(fd, &execbuf)) {
> @@ -129,9 +132,9 @@ static void *gem_busyspin(void *arg)
>  		}
>  		bs->count += nengine;
>  		if (bs->leak) {
> -			gem_madvise(fd, obj.handle, I915_MADV_DONTNEED);
> -			obj.handle = gem_create(fd, sz);
> -			gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
> +			gem_madvise(fd, obj[1].handle, I915_MADV_DONTNEED);
> +			obj[1].handle = gem_create(fd, sz);
> +			gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
>  		}
>  	}
>  
> @@ -305,13 +308,17 @@ int main(int argc, char **argv)
>  	int field = -1;
>  	int enable_gem_sysbusy = 1;
>  	bool leak = false;
> +	bool interrupts = false;
>  	int n, c;
>  
> -	while ((c = getopt(argc, argv, "t:f:bmn")) != -1) {
> +	while ((c = getopt(argc, argv, "t:f:bmni")) != -1) {
>  		switch (c) {
>  		case 'n': /* dry run, measure baseline system latency */
>  			enable_gem_sysbusy = 0;
>  			break;
> +		case 'i': /* interrupts ahoy! */
> +			interrupts = true;
> +			break;
>  		case 't':
>  			/* How long to run the benchmark for (seconds) */
>  			time = atoi(optarg);
> @@ -346,6 +353,7 @@ int main(int argc, char **argv)
>  		for (n = 0; n < ncpus; n++) {
>  			bind_cpu(&attr, n);
>  			busy[n].leak = leak;
> +			busy[n].interrupts = interrupts;
>  			pthread_create(&busy[n].thread, &attr,
>  				       gem_busyspin, &busy[n]);
>  		}
> -- 
> 2.17.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson May 22, 2018, 11:28 a.m. UTC | #2
Quoting Mika Kuoppala (2018-05-22 12:24:59)
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > Extend the i915 load to (optionally) pass a write hazard between
> > engines, causing us to wait on the interrupt between engines. Thus
> > adding MI_USER_INTERRUPT irq handling to our list of sins.
> 
> 
> Is it the eb_move_to_gpu waiting then for the object
> due to write?

Don't be silly! That was like 3 years ago :-p
 
> ..and this then arming the interrupts later down the
> chain?

i915_gem_request_await_object adds the callback for the request to be
submitted when its dependencies are complete.
-Chris
Tvrtko Ursulin May 22, 2018, 11:37 a.m. UTC | #3
On 22/05/2018 12:00, Chris Wilson wrote:
> Extend the i915 load to (optionally) pass a write hazard between
> engines, causing us to wait on the interrupt between engines. Thus
> adding MI_USER_INTERRUPT irq handling to our list of sins.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   benchmarks/gem_syslatency.c | 28 ++++++++++++++++++----------
>   1 file changed, 18 insertions(+), 10 deletions(-)
> 
> diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c
> index de59eaf82..9160e2199 100644
> --- a/benchmarks/gem_syslatency.c
> +++ b/benchmarks/gem_syslatency.c
> @@ -53,6 +53,7 @@ struct gem_busyspin {
>   	pthread_t thread;
>   	unsigned long count;
>   	bool leak;
> +	bool interrupts;
>   };
>   
>   struct sys_wait {
> @@ -94,7 +95,7 @@ static void *gem_busyspin(void *arg)
>   	const uint32_t bbe = MI_BATCH_BUFFER_END;
>   	struct gem_busyspin *bs = arg;
>   	struct drm_i915_gem_execbuffer2 execbuf;
> -	struct drm_i915_gem_exec_object2 obj;
> +	struct drm_i915_gem_exec_object2 obj[2];
>   	const unsigned sz = bs->leak ? 16 << 20 : 4 << 10;
>   	unsigned engines[16];
>   	unsigned nengine;
> @@ -107,13 +108,15 @@ static void *gem_busyspin(void *arg)
>   	for_each_engine(fd, engine)
>   		if (!ignore_engine(fd, engine)) engines[nengine++] = engine;
>   
> -	memset(&obj, 0, sizeof(obj));
> -	obj.handle = gem_create(fd, sz);
> -	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = gem_create(fd, 4096);
> +	obj[0].flags = EXEC_OBJECT_WRITE;
> +	obj[1].handle = gem_create(fd, sz);
> +	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
>   
>   	memset(&execbuf, 0, sizeof(execbuf));
> -	execbuf.buffers_ptr = (uintptr_t)&obj;
> -	execbuf.buffer_count = 1;
> +	execbuf.buffers_ptr = (uintptr_t)(obj + !bs->interrupts);
> +	execbuf.buffer_count = 1 + !!bs->interrupts;

Above two lines are to hacky. :/ Suggest a more pedestrian approach with 
a ternary or something.

>   	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
>   	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
>   	if (__gem_execbuf(fd, &execbuf)) {
> @@ -129,9 +132,9 @@ static void *gem_busyspin(void *arg)
>   		}
>   		bs->count += nengine;
>   		if (bs->leak) {
> -			gem_madvise(fd, obj.handle, I915_MADV_DONTNEED);
> -			obj.handle = gem_create(fd, sz);
> -			gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
> +			gem_madvise(fd, obj[1].handle, I915_MADV_DONTNEED);
> +			obj[1].handle = gem_create(fd, sz);
> +			gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
>   		}
>   	}
>   
> @@ -305,13 +308,17 @@ int main(int argc, char **argv)
>   	int field = -1;
>   	int enable_gem_sysbusy = 1;
>   	bool leak = false;
> +	bool interrupts = false;
>   	int n, c;
>   
> -	while ((c = getopt(argc, argv, "t:f:bmn")) != -1) {
> +	while ((c = getopt(argc, argv, "t:f:bmni")) != -1) {
>   		switch (c) {
>   		case 'n': /* dry run, measure baseline system latency */
>   			enable_gem_sysbusy = 0;
>   			break;
> +		case 'i': /* interrupts ahoy! */
> +			interrupts = true;
> +			break;
>   		case 't':
>   			/* How long to run the benchmark for (seconds) */
>   			time = atoi(optarg);
> @@ -346,6 +353,7 @@ int main(int argc, char **argv)
>   		for (n = 0; n < ncpus; n++) {
>   			bind_cpu(&attr, n);
>   			busy[n].leak = leak;
> +			busy[n].interrupts = interrupts;
>   			pthread_create(&busy[n].thread, &attr,
>   				       gem_busyspin, &busy[n]);
>   		}
> 

With the hackery eliminated:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
diff mbox

Patch

diff --git a/benchmarks/gem_syslatency.c b/benchmarks/gem_syslatency.c
index de59eaf82..9160e2199 100644
--- a/benchmarks/gem_syslatency.c
+++ b/benchmarks/gem_syslatency.c
@@ -53,6 +53,7 @@  struct gem_busyspin {
 	pthread_t thread;
 	unsigned long count;
 	bool leak;
+	bool interrupts;
 };
 
 struct sys_wait {
@@ -94,7 +95,7 @@  static void *gem_busyspin(void *arg)
 	const uint32_t bbe = MI_BATCH_BUFFER_END;
 	struct gem_busyspin *bs = arg;
 	struct drm_i915_gem_execbuffer2 execbuf;
-	struct drm_i915_gem_exec_object2 obj;
+	struct drm_i915_gem_exec_object2 obj[2];
 	const unsigned sz = bs->leak ? 16 << 20 : 4 << 10;
 	unsigned engines[16];
 	unsigned nengine;
@@ -107,13 +108,15 @@  static void *gem_busyspin(void *arg)
 	for_each_engine(fd, engine)
 		if (!ignore_engine(fd, engine)) engines[nengine++] = engine;
 
-	memset(&obj, 0, sizeof(obj));
-	obj.handle = gem_create(fd, sz);
-	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(fd, 4096);
+	obj[0].flags = EXEC_OBJECT_WRITE;
+	obj[1].handle = gem_create(fd, sz);
+	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
 
 	memset(&execbuf, 0, sizeof(execbuf));
-	execbuf.buffers_ptr = (uintptr_t)&obj;
-	execbuf.buffer_count = 1;
+	execbuf.buffers_ptr = (uintptr_t)(obj + !bs->interrupts);
+	execbuf.buffer_count = 1 + !!bs->interrupts;
 	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
 	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
 	if (__gem_execbuf(fd, &execbuf)) {
@@ -129,9 +132,9 @@  static void *gem_busyspin(void *arg)
 		}
 		bs->count += nengine;
 		if (bs->leak) {
-			gem_madvise(fd, obj.handle, I915_MADV_DONTNEED);
-			obj.handle = gem_create(fd, sz);
-			gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));
+			gem_madvise(fd, obj[1].handle, I915_MADV_DONTNEED);
+			obj[1].handle = gem_create(fd, sz);
+			gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
 		}
 	}
 
@@ -305,13 +308,17 @@  int main(int argc, char **argv)
 	int field = -1;
 	int enable_gem_sysbusy = 1;
 	bool leak = false;
+	bool interrupts = false;
 	int n, c;
 
-	while ((c = getopt(argc, argv, "t:f:bmn")) != -1) {
+	while ((c = getopt(argc, argv, "t:f:bmni")) != -1) {
 		switch (c) {
 		case 'n': /* dry run, measure baseline system latency */
 			enable_gem_sysbusy = 0;
 			break;
+		case 'i': /* interrupts ahoy! */
+			interrupts = true;
+			break;
 		case 't':
 			/* How long to run the benchmark for (seconds) */
 			time = atoi(optarg);
@@ -346,6 +353,7 @@  int main(int argc, char **argv)
 		for (n = 0; n < ncpus; n++) {
 			bind_cpu(&attr, n);
 			busy[n].leak = leak;
+			busy[n].interrupts = interrupts;
 			pthread_create(&busy[n].thread, &attr,
 				       gem_busyspin, &busy[n]);
 		}