Message ID | 20201217172618.1637044-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [i-g-t] i915/perf_pmu: Exercise I915_PMU_SOFTWARE_GT_AWAKE_TIME | expand |
On 17/12/2020 17:26, Chris Wilson wrote: > Measure the sample gt-awake time while each engine and every engine is > busy. They should all report the same duration, the elapsed runtime of > the batch. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > --- > include/drm-uapi/i915_drm.h | 3 ++- > tests/i915/perf_pmu.c | 42 +++++++++++++++++++++++++++++++++++++ > 2 files changed, 44 insertions(+), 1 deletion(-) > > diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h > index ef696d1a5..bf9ea471c 100644 > --- a/include/drm-uapi/i915_drm.h > +++ b/include/drm-uapi/i915_drm.h > @@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample { > #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) > #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) > #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) > +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) > > -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY > +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY > > /* Each region is a minimum of 16k, and there are at most 255 of them. > */ > diff --git a/tests/i915/perf_pmu.c b/tests/i915/perf_pmu.c > index c2b186d39..90182074b 100644 > --- a/tests/i915/perf_pmu.c > +++ b/tests/i915/perf_pmu.c > @@ -883,6 +883,42 @@ sema_busy(int gem_fd, > close(fd[1]); > } > > +static void test_awake(int i915) > +{ > + const struct intel_execution_engine2 *e; > + unsigned long slept; > + uint64_t val; > + int fd; > + > + fd = perf_i915_open(i915, I915_PMU_SOFTWARE_GT_AWAKE_TIME); > + igt_skip_on(fd < 0); > + > + /* Check that each engine is captured by the GT wakeref */ > + __for_each_physical_engine(i915, e) { > + igt_spin_new(i915, .engine = e->flags); > + > + val = pmu_read_single(fd); > + slept = measured_usleep(batch_duration_ns / 1000); > + val = pmu_read_single(fd) - val; > + > + gem_quiescent_gpu(i915); > + assert_within_epsilon(val, slept, tolerance); > + } > + > + /* And that the total GT wakeref matches walltime not summation */ > + __for_each_physical_engine(i915, e) > + igt_spin_new(i915, .engine = e->flags); > + > + val = pmu_read_single(fd); > + slept = measured_usleep(batch_duration_ns / 1000); > + val = pmu_read_single(fd) - val; > + > + gem_quiescent_gpu(i915); > + assert_within_epsilon(val, slept, tolerance); > + Erm who terminates all those spinners? :) Regards, Tvrtko > + close(fd); > +} > + > #define MI_WAIT_FOR_PIPE_C_VBLANK (1<<21) > #define MI_WAIT_FOR_PIPE_B_VBLANK (1<<11) > #define MI_WAIT_FOR_PIPE_A_VBLANK (1<<3) > @@ -2250,6 +2286,12 @@ igt_main > igt_subtest("rc6-suspend") > test_rc6(fd, TEST_S3); > > + /** > + * Test GT wakeref tracking (similar to RC0, opposite of RC6) > + */ > + igt_subtest("gt-awake") > + test_awake(fd); > + > /** > * Check render nodes are counted. > */ >
Quoting Tvrtko Ursulin (2020-12-17 17:33:05) > > On 17/12/2020 17:26, Chris Wilson wrote: > > Measure the sample gt-awake time while each engine and every engine is > > busy. They should all report the same duration, the elapsed runtime of > > the batch. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> > > --- > > include/drm-uapi/i915_drm.h | 3 ++- > > tests/i915/perf_pmu.c | 42 +++++++++++++++++++++++++++++++++++++ > > 2 files changed, 44 insertions(+), 1 deletion(-) > > > > diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h > > index ef696d1a5..bf9ea471c 100644 > > --- a/include/drm-uapi/i915_drm.h > > +++ b/include/drm-uapi/i915_drm.h > > @@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample { > > #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) > > #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) > > #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) > > +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) > > > > -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY > > +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY > > > > /* Each region is a minimum of 16k, and there are at most 255 of them. > > */ > > diff --git a/tests/i915/perf_pmu.c b/tests/i915/perf_pmu.c > > index c2b186d39..90182074b 100644 > > --- a/tests/i915/perf_pmu.c > > +++ b/tests/i915/perf_pmu.c > > @@ -883,6 +883,42 @@ sema_busy(int gem_fd, > > close(fd[1]); > > } > > > > +static void test_awake(int i915) > > +{ > > + const struct intel_execution_engine2 *e; > > + unsigned long slept; > > + uint64_t val; > > + int fd; > > + > > + fd = perf_i915_open(i915, I915_PMU_SOFTWARE_GT_AWAKE_TIME); > > + igt_skip_on(fd < 0); > > + > > + /* Check that each engine is captured by the GT wakeref */ > > + __for_each_physical_engine(i915, e) { > > + igt_spin_new(i915, .engine = e->flags); > > + > > + val = pmu_read_single(fd); > > + slept = measured_usleep(batch_duration_ns / 1000); > > + val = pmu_read_single(fd) - val; > > + > > + gem_quiescent_gpu(i915); > > + assert_within_epsilon(val, slept, tolerance); > > + } > > + > > + /* And that the total GT wakeref matches walltime not summation */ > > + __for_each_physical_engine(i915, e) > > + igt_spin_new(i915, .engine = e->flags); > > + > > + val = pmu_read_single(fd); > > + slept = measured_usleep(batch_duration_ns / 1000); > > + val = pmu_read_single(fd) - val; > > + > > + gem_quiescent_gpu(i915); > > + assert_within_epsilon(val, slept, tolerance); > > + > > Erm who terminates all those spinners? :) gem_quiescent_gpu() calls igt_terminate_spins(). Hmm, I actually thought it freed them too, but what's a small leak... -Chris
On 17/12/2020 17:39, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2020-12-17 17:33:05) >> >> On 17/12/2020 17:26, Chris Wilson wrote: >>> Measure the sample gt-awake time while each engine and every engine is >>> busy. They should all report the same duration, the elapsed runtime of >>> the batch. >>> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> >>> --- >>> include/drm-uapi/i915_drm.h | 3 ++- >>> tests/i915/perf_pmu.c | 42 +++++++++++++++++++++++++++++++++++++ >>> 2 files changed, 44 insertions(+), 1 deletion(-) >>> >>> diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h >>> index ef696d1a5..bf9ea471c 100644 >>> --- a/include/drm-uapi/i915_drm.h >>> +++ b/include/drm-uapi/i915_drm.h >>> @@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample { >>> #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) >>> #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) >>> #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) >>> +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) >>> >>> -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY >>> +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY >>> >>> /* Each region is a minimum of 16k, and there are at most 255 of them. >>> */ >>> diff --git a/tests/i915/perf_pmu.c b/tests/i915/perf_pmu.c >>> index c2b186d39..90182074b 100644 >>> --- a/tests/i915/perf_pmu.c >>> +++ b/tests/i915/perf_pmu.c >>> @@ -883,6 +883,42 @@ sema_busy(int gem_fd, >>> close(fd[1]); >>> } >>> >>> +static void test_awake(int i915) >>> +{ >>> + const struct intel_execution_engine2 *e; >>> + unsigned long slept; >>> + uint64_t val; >>> + int fd; >>> + >>> + fd = perf_i915_open(i915, I915_PMU_SOFTWARE_GT_AWAKE_TIME); >>> + igt_skip_on(fd < 0); >>> + >>> + /* Check that each engine is captured by the GT wakeref */ >>> + __for_each_physical_engine(i915, e) { >>> + igt_spin_new(i915, .engine = e->flags); >>> + >>> + val = pmu_read_single(fd); >>> + slept = measured_usleep(batch_duration_ns / 1000); >>> + val = pmu_read_single(fd) - val; >>> + >>> + gem_quiescent_gpu(i915); >>> + assert_within_epsilon(val, slept, tolerance); >>> + } >>> + >>> + /* And that the total GT wakeref matches walltime not summation */ >>> + __for_each_physical_engine(i915, e) >>> + igt_spin_new(i915, .engine = e->flags); >>> + >>> + val = pmu_read_single(fd); >>> + slept = measured_usleep(batch_duration_ns / 1000); >>> + val = pmu_read_single(fd) - val; >>> + >>> + gem_quiescent_gpu(i915); >>> + assert_within_epsilon(val, slept, tolerance); >>> + >> >> Erm who terminates all those spinners? :) > > gem_quiescent_gpu() calls igt_terminate_spins(). Hmm, I actually thought > it freed them too, but what's a small leak... Okay, I'm fine with that. Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h index ef696d1a5..bf9ea471c 100644 --- a/include/drm-uapi/i915_drm.h +++ b/include/drm-uapi/i915_drm.h @@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY /* Each region is a minimum of 16k, and there are at most 255 of them. */ diff --git a/tests/i915/perf_pmu.c b/tests/i915/perf_pmu.c index c2b186d39..90182074b 100644 --- a/tests/i915/perf_pmu.c +++ b/tests/i915/perf_pmu.c @@ -883,6 +883,42 @@ sema_busy(int gem_fd, close(fd[1]); } +static void test_awake(int i915) +{ + const struct intel_execution_engine2 *e; + unsigned long slept; + uint64_t val; + int fd; + + fd = perf_i915_open(i915, I915_PMU_SOFTWARE_GT_AWAKE_TIME); + igt_skip_on(fd < 0); + + /* Check that each engine is captured by the GT wakeref */ + __for_each_physical_engine(i915, e) { + igt_spin_new(i915, .engine = e->flags); + + val = pmu_read_single(fd); + slept = measured_usleep(batch_duration_ns / 1000); + val = pmu_read_single(fd) - val; + + gem_quiescent_gpu(i915); + assert_within_epsilon(val, slept, tolerance); + } + + /* And that the total GT wakeref matches walltime not summation */ + __for_each_physical_engine(i915, e) + igt_spin_new(i915, .engine = e->flags); + + val = pmu_read_single(fd); + slept = measured_usleep(batch_duration_ns / 1000); + val = pmu_read_single(fd) - val; + + gem_quiescent_gpu(i915); + assert_within_epsilon(val, slept, tolerance); + + close(fd); +} + #define MI_WAIT_FOR_PIPE_C_VBLANK (1<<21) #define MI_WAIT_FOR_PIPE_B_VBLANK (1<<11) #define MI_WAIT_FOR_PIPE_A_VBLANK (1<<3) @@ -2250,6 +2286,12 @@ igt_main igt_subtest("rc6-suspend") test_rc6(fd, TEST_S3); + /** + * Test GT wakeref tracking (similar to RC0, opposite of RC6) + */ + igt_subtest("gt-awake") + test_awake(fd); + /** * Check render nodes are counted. */
Measure the sample gt-awake time while each engine and every engine is busy. They should all report the same duration, the elapsed runtime of the batch. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> --- include/drm-uapi/i915_drm.h | 3 ++- tests/i915/perf_pmu.c | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-)