Message ID | 20180219191251.29766-1-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Tvrtko Ursulin (2018-02-19 19:12:51) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Verify that the reported busyness is in line with what would we expect > from a batch which causes a hang and gets kicked out from the engine. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > tests/perf_pmu.c | 41 +++++++++++++++++++++++++++++++++++------ > 1 file changed, 35 insertions(+), 6 deletions(-) > > diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c > index 7fab73e22c2d..90b6ec4db32d 100644 > --- a/tests/perf_pmu.c > +++ b/tests/perf_pmu.c > @@ -168,6 +168,7 @@ static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e) > #define TEST_TRAILING_IDLE (4) > #define TEST_RUNTIME_PM (8) > #define FLAG_LONG (16) > +#define FLAG_HANG (32) > > static void end_spin(int fd, igt_spin_t *spin, unsigned int flags) > { > @@ -186,11 +187,15 @@ static void end_spin(int fd, igt_spin_t *spin, unsigned int flags) > static void > single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags) > { > + const unsigned int hang_us = 10e6; > unsigned long slept; > igt_spin_t *spin; > - uint64_t val; > + uint64_t val[2], ts[2]; > int fd; > > + if (flags & FLAG_HANG) > + gem_quiescent_gpu(gem_fd); > + > fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance)); > > if (flags & TEST_BUSY) > @@ -198,17 +203,36 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags) > else > spin = NULL; > > - val = pmu_read_single(fd); > - slept = measured_usleep(batch_duration_ns / 1000); > + val[0] = __pmu_read_single(fd, &ts[0]); > + slept = measured_usleep(flags & FLAG_HANG ? > + hang_us : batch_duration_ns / 1000); > if (flags & TEST_TRAILING_IDLE) > end_spin(gem_fd, spin, flags); > - val = pmu_read_single(fd) - val; > + val[1] = pmu_read_single(fd); > > end_spin(gem_fd, spin, FLAG_SYNC); > igt_spin_batch_free(gem_fd, spin); > - close(fd); > > - assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance); > + if ((flags & TEST_BUSY) && (flags & FLAG_HANG)) { > + val[1] = __pmu_read_single(fd, &ts[1]); > + close(fd); > + igt_info("sampled with hang %.3fms / %.3fms\n", > + (val[1] - val[0]) / 1e6, (ts[1] - ts[0]) / 1e6); > + /* Check that some busyness was reported. */ > + igt_assert(val[1] - val[0] > 0); > + /* > + * But not more than some reasonable value before which we > + * expected the spinner to be kicked out. > + */ So 120s? And even that carries internal knowledge from across the ages. I don't think this is a sensible test. What would be reasonable is something like spinner() val[0] = pmu() sleep() igt_force_gpu_reset() val[1] = pmu(); d_busy = val[1] - val[0] sleep() val[2] = pmu() d_idle = val[2] - val[1]; Then d_busy should be d_ts, and d_idle should be 0. i.e. the igt_force_gpu_reset() is just an indirect igt_spin_batch_end(). -Chris
On 19/02/2018 19:21, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-02-19 19:12:51) >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> Verify that the reported busyness is in line with what would we expect >> from a batch which causes a hang and gets kicked out from the engine. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> --- >> tests/perf_pmu.c | 41 +++++++++++++++++++++++++++++++++++------ >> 1 file changed, 35 insertions(+), 6 deletions(-) >> >> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c >> index 7fab73e22c2d..90b6ec4db32d 100644 >> --- a/tests/perf_pmu.c >> +++ b/tests/perf_pmu.c >> @@ -168,6 +168,7 @@ static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e) >> #define TEST_TRAILING_IDLE (4) >> #define TEST_RUNTIME_PM (8) >> #define FLAG_LONG (16) >> +#define FLAG_HANG (32) >> >> static void end_spin(int fd, igt_spin_t *spin, unsigned int flags) >> { >> @@ -186,11 +187,15 @@ static void end_spin(int fd, igt_spin_t *spin, unsigned int flags) >> static void >> single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags) >> { >> + const unsigned int hang_us = 10e6; >> unsigned long slept; >> igt_spin_t *spin; >> - uint64_t val; >> + uint64_t val[2], ts[2]; >> int fd; >> >> + if (flags & FLAG_HANG) >> + gem_quiescent_gpu(gem_fd); >> + >> fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance)); >> >> if (flags & TEST_BUSY) >> @@ -198,17 +203,36 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags) >> else >> spin = NULL; >> >> - val = pmu_read_single(fd); >> - slept = measured_usleep(batch_duration_ns / 1000); >> + val[0] = __pmu_read_single(fd, &ts[0]); >> + slept = measured_usleep(flags & FLAG_HANG ? >> + hang_us : batch_duration_ns / 1000); >> if (flags & TEST_TRAILING_IDLE) >> end_spin(gem_fd, spin, flags); >> - val = pmu_read_single(fd) - val; >> + val[1] = pmu_read_single(fd); >> >> end_spin(gem_fd, spin, FLAG_SYNC); >> igt_spin_batch_free(gem_fd, spin); >> - close(fd); >> >> - assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance); >> + if ((flags & TEST_BUSY) && (flags & FLAG_HANG)) { >> + val[1] = __pmu_read_single(fd, &ts[1]); >> + close(fd); >> + igt_info("sampled with hang %.3fms / %.3fms\n", >> + (val[1] - val[0]) / 1e6, (ts[1] - ts[0]) / 1e6); >> + /* Check that some busyness was reported. */ >> + igt_assert(val[1] - val[0] > 0); >> + /* >> + * But not more than some reasonable value before which we >> + * expected the spinner to be kicked out. >> + */ > > So 120s? And even that carries internal knowledge from across the ages. > > I don't think this is a sensible test. What would be reasonable is > something like > > spinner() > val[0] = pmu() > sleep() > igt_force_gpu_reset() > val[1] = pmu(); > d_busy = val[1] - val[0] > sleep() > val[2] = pmu() > d_idle = val[2] - val[1]; > > Then d_busy should be d_ts, and d_idle should be 0. i.e. the > igt_force_gpu_reset() is just an indirect igt_spin_batch_end(). Yeah I am not claiming the test is great. I threw it together quickly when I suspected something is going bad. Just want to get some results overnight so I can despair tomorrow. Regards, Tvrtko
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c index 7fab73e22c2d..90b6ec4db32d 100644 --- a/tests/perf_pmu.c +++ b/tests/perf_pmu.c @@ -168,6 +168,7 @@ static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e) #define TEST_TRAILING_IDLE (4) #define TEST_RUNTIME_PM (8) #define FLAG_LONG (16) +#define FLAG_HANG (32) static void end_spin(int fd, igt_spin_t *spin, unsigned int flags) { @@ -186,11 +187,15 @@ static void end_spin(int fd, igt_spin_t *spin, unsigned int flags) static void single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags) { + const unsigned int hang_us = 10e6; unsigned long slept; igt_spin_t *spin; - uint64_t val; + uint64_t val[2], ts[2]; int fd; + if (flags & FLAG_HANG) + gem_quiescent_gpu(gem_fd); + fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance)); if (flags & TEST_BUSY) @@ -198,17 +203,36 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags) else spin = NULL; - val = pmu_read_single(fd); - slept = measured_usleep(batch_duration_ns / 1000); + val[0] = __pmu_read_single(fd, &ts[0]); + slept = measured_usleep(flags & FLAG_HANG ? + hang_us : batch_duration_ns / 1000); if (flags & TEST_TRAILING_IDLE) end_spin(gem_fd, spin, flags); - val = pmu_read_single(fd) - val; + val[1] = pmu_read_single(fd); end_spin(gem_fd, spin, FLAG_SYNC); igt_spin_batch_free(gem_fd, spin); - close(fd); - assert_within_epsilon(val, flags & TEST_BUSY ? slept : 0.f, tolerance); + if ((flags & TEST_BUSY) && (flags & FLAG_HANG)) { + val[1] = __pmu_read_single(fd, &ts[1]); + close(fd); + igt_info("sampled with hang %.3fms / %.3fms\n", + (val[1] - val[0]) / 1e6, (ts[1] - ts[0]) / 1e6); + /* Check that some busyness was reported. */ + igt_assert(val[1] - val[0] > 0); + /* + * But not more than some reasonable value before which we + * expected the spinner to be kicked out. + */ + igt_assert((val[1] - val[0]) / 1e3 < (double)hang_us * 0.75); + __assert_within_epsilon(val[1] - val[0], hang_us * 1e3, + 0.02f, 10.0f); + } else { + close(fd); + assert_within_epsilon(val[1] - val[0], + flags & TEST_BUSY ? + slept : 0.f, tolerance); + } gem_quiescent_gpu(gem_fd); } @@ -1695,6 +1719,11 @@ igt_main pct[i], e->name) accuracy(fd, e, pct[i]); } + + igt_subtest_f("busy-hang-%s", e->name) { + single(fd, e, TEST_BUSY | FLAG_HANG); + single(fd, e, TEST_BUSY | FLAG_HANG); + } } /**