diff mbox series

[i-g-t,03/24] i915/gem_exec_schedule: Measure semaphore power consumption

Message ID 20190322092155.1656-3-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [i-g-t,01/24] i915/gem_exec_latency: Measure the latency of context switching | expand

Commit Message

Chris Wilson March 22, 2019, 9:21 a.m. UTC
How much energy does spinning on a semaphore consume relative to plain
old spinning?

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/i915/gem_exec_schedule.c | 72 +++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

Comments

Tvrtko Ursulin March 26, 2019, 8:46 a.m. UTC | #1
On 22/03/2019 09:21, Chris Wilson wrote:
> How much energy does spinning on a semaphore consume relative to plain
> old spinning?
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   tests/i915/gem_exec_schedule.c | 72 +++++++++++++++++++++++++++++++++-
>   1 file changed, 71 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
> index a9383000a..4f0577b4e 100644
> --- a/tests/i915/gem_exec_schedule.c
> +++ b/tests/i915/gem_exec_schedule.c
> @@ -29,9 +29,10 @@
>   #include <signal.h>
>   
>   #include "igt.h"
> -#include "igt_vgem.h"
> +#include "igt_gpu_power.h"
>   #include "igt_rand.h"
>   #include "igt_sysfs.h"
> +#include "igt_vgem.h"
>   #include "i915/gem_ring.h"
>   
>   #define LO 0
> @@ -1202,6 +1203,65 @@ static void test_pi_ringfull(int fd, unsigned int engine)
>   	munmap(result, 4096);
>   }
>   
> +static void measure_semaphore_power(int i915)
> +{
> +	struct gpu_power power;
> +	unsigned int engine, signaler;
> +
> +	igt_require(gpu_power_open(&power) == 0);
> +
> +	for_each_physical_engine(i915, signaler) {
> +		struct gpu_power_sample s_spin[2];
> +		struct gpu_power_sample s_sema[2];
> +		double baseline, total;
> +		int64_t jiffie = 1;
> +		igt_spin_t *spin;
> +
> +		spin = __igt_spin_batch_new(i915,
> +					    .engine = signaler,
> +					    .flags = IGT_SPIN_POLL_RUN);

		if (!spin)
			continue;

To skip over !can_store_dword, since you are using the low level 
constructor which doesn't check.

> +		gem_wait(i915, spin->handle, &jiffie); /* waitboost */

Waitboost why? Will it be deterministic either way?

> +		igt_assert(spin->running);
> +		igt_spin_busywait_until_running(spin);
> +
> +		gpu_power_read(&power, &s_spin[0]);
> +		usleep(100*1000);
> +		gpu_power_read(&power, &s_spin[1]);
> +
> +		/* Add a waiter to each engine */
> +		for_each_physical_engine(i915, engine) {
> +			igt_spin_t *sema;
> +
> +			if (engine == signaler)
> +				continue;
> +
> +			sema = __igt_spin_batch_new(i915,
> +						    .engine = engine,
> +						    .dependency = spin->handle);
> +
> +			igt_spin_batch_free(i915, sema);
> +		}
> +		usleep(10); /* just give the tasklets a chance to run */
> +
> +		gpu_power_read(&power, &s_sema[0]);
> +		usleep(100*1000);
> +		gpu_power_read(&power, &s_sema[1]);
> +
> +		igt_spin_batch_free(i915, spin);
> +
> +		baseline = gpu_power_W(&power, &s_spin[0], &s_spin[1]);
> +		total = gpu_power_W(&power, &s_sema[0], &s_sema[1]);
> +
> +		igt_info("%s: %.1fmW + %.1fmW (total %1.fmW)\n",
> +			 e__->name,
> +			 1e3 * baseline,
> +			 1e3 * (total - baseline),
> +			 1e3 * total);
> +	}
> +
> +	gpu_power_close(&power);
> +}
> +
>   igt_main
>   {
>   	const struct intel_execution_engine *e;
> @@ -1362,6 +1422,16 @@ igt_main
>   		}
>   	}
>   
> +	igt_subtest_group {
> +		igt_fixture {
> +			igt_require(gem_scheduler_enabled(fd));
> +			igt_require(gem_scheduler_has_semaphores(fd));
> +		}
> +
> +		igt_subtest("semaphore-power")
> +			measure_semaphore_power(fd);
> +	}
> +
>   	igt_fixture {
>   		igt_stop_hang_detector();
>   		close(fd);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Chris Wilson March 26, 2019, 9:23 a.m. UTC | #2
Quoting Tvrtko Ursulin (2019-03-26 08:46:34)
> 
> On 22/03/2019 09:21, Chris Wilson wrote:
> > +static void measure_semaphore_power(int i915)
> > +{
> > +     struct gpu_power power;
> > +     unsigned int engine, signaler;
> > +
> > +     igt_require(gpu_power_open(&power) == 0);
> > +
> > +     for_each_physical_engine(i915, signaler) {
> > +             struct gpu_power_sample s_spin[2];
> > +             struct gpu_power_sample s_sema[2];
> > +             double baseline, total;
> > +             int64_t jiffie = 1;
> > +             igt_spin_t *spin;
> > +
> > +             spin = __igt_spin_batch_new(i915,
> > +                                         .engine = signaler,
> > +                                         .flags = IGT_SPIN_POLL_RUN);
> 
>                 if (!spin)
>                         continue;
> 
> To skip over !can_store_dword, since you are using the low level 
> constructor which doesn't check.

True, will need store-dword checking. Too much pain from
spin_batch_new() mystery locking up inside loops has caused me to shy
away from using it.

> > +             gem_wait(i915, spin->handle, &jiffie); /* waitboost */
> 
> Waitboost why? Will it be deterministic either way?

There's nothing in here that should trigger a waitboost, the idea was to
put the GPU into as pessimistic state as possible to measure peak power
consumption. We could set the freq to min/max via sysfs to see if there
is a significant difference. There's always another test waiting in the
corner.
-Chris
diff mbox series

Patch

diff --git a/tests/i915/gem_exec_schedule.c b/tests/i915/gem_exec_schedule.c
index a9383000a..4f0577b4e 100644
--- a/tests/i915/gem_exec_schedule.c
+++ b/tests/i915/gem_exec_schedule.c
@@ -29,9 +29,10 @@ 
 #include <signal.h>
 
 #include "igt.h"
-#include "igt_vgem.h"
+#include "igt_gpu_power.h"
 #include "igt_rand.h"
 #include "igt_sysfs.h"
+#include "igt_vgem.h"
 #include "i915/gem_ring.h"
 
 #define LO 0
@@ -1202,6 +1203,65 @@  static void test_pi_ringfull(int fd, unsigned int engine)
 	munmap(result, 4096);
 }
 
+static void measure_semaphore_power(int i915)
+{
+	struct gpu_power power;
+	unsigned int engine, signaler;
+
+	igt_require(gpu_power_open(&power) == 0);
+
+	for_each_physical_engine(i915, signaler) {
+		struct gpu_power_sample s_spin[2];
+		struct gpu_power_sample s_sema[2];
+		double baseline, total;
+		int64_t jiffie = 1;
+		igt_spin_t *spin;
+
+		spin = __igt_spin_batch_new(i915,
+					    .engine = signaler,
+					    .flags = IGT_SPIN_POLL_RUN);
+		gem_wait(i915, spin->handle, &jiffie); /* waitboost */
+		igt_assert(spin->running);
+		igt_spin_busywait_until_running(spin);
+
+		gpu_power_read(&power, &s_spin[0]);
+		usleep(100*1000);
+		gpu_power_read(&power, &s_spin[1]);
+
+		/* Add a waiter to each engine */
+		for_each_physical_engine(i915, engine) {
+			igt_spin_t *sema;
+
+			if (engine == signaler)
+				continue;
+
+			sema = __igt_spin_batch_new(i915,
+						    .engine = engine,
+						    .dependency = spin->handle);
+
+			igt_spin_batch_free(i915, sema);
+		}
+		usleep(10); /* just give the tasklets a chance to run */
+
+		gpu_power_read(&power, &s_sema[0]);
+		usleep(100*1000);
+		gpu_power_read(&power, &s_sema[1]);
+
+		igt_spin_batch_free(i915, spin);
+
+		baseline = gpu_power_W(&power, &s_spin[0], &s_spin[1]);
+		total = gpu_power_W(&power, &s_sema[0], &s_sema[1]);
+
+		igt_info("%s: %.1fmW + %.1fmW (total %1.fmW)\n",
+			 e__->name,
+			 1e3 * baseline,
+			 1e3 * (total - baseline),
+			 1e3 * total);
+	}
+
+	gpu_power_close(&power);
+}
+
 igt_main
 {
 	const struct intel_execution_engine *e;
@@ -1362,6 +1422,16 @@  igt_main
 		}
 	}
 
+	igt_subtest_group {
+		igt_fixture {
+			igt_require(gem_scheduler_enabled(fd));
+			igt_require(gem_scheduler_has_semaphores(fd));
+		}
+
+		igt_subtest("semaphore-power")
+			measure_semaphore_power(fd);
+	}
+
 	igt_fixture {
 		igt_stop_hang_detector();
 		close(fd);