[i-g-t,v9] tests/perf_pmu: Improve accuracy by waiting on spinner to start

Message ID	20180320135150.8155-1-tvrtko.ursulin@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Tvrtko Ursulin <tursulin@ursulin.net> To: igt-dev@lists.freedesktop.org Date: Tue, 20 Mar 2018 13:51:50 +0000 Message-Id: <20180320135150.8155-1-tvrtko.ursulin@linux.intel.com> In-Reply-To: <20180319165947.18234-1-tvrtko.ursulin@linux.intel.com> References: <20180319165947.18234-1-tvrtko.ursulin@linux.intel.com> Subject: [Intel-gfx] [PATCH i-g-t v9] tests/perf_pmu: Improve accuracy by waiting on spinner to start Precedence: list Cc: Intel-gfx@lists.freedesktop.org MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c index 4b20f23dfe26..ce84628095b5 100644 --- a/lib/igt_dummyload.c +++ b/lib/igt_dummyload.c @@ -74,35 +74,48 @@ fill_reloc(struct drm_i915_gem_relocation_entry *reloc, reloc->write_domain = write_domains; } -static int emit_recursive_batch(igt_spin_t *spin, - int fd, uint32_t ctx, unsigned engine, - uint32_t dep, bool out_fence) +#define OUT_FENCE (1 << 0) +#define POLL_RUN (1 << 1) + +static int +emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine, + uint32_t dep, unsigned int flags) { #define SCRATCH 0 #define BATCH 1 const int gen = intel_gen(intel_get_drm_devid(fd)); - struct drm_i915_gem_exec_object2 obj[2]; - struct drm_i915_gem_relocation_entry relocs[2]; - struct drm_i915_gem_execbuffer2 execbuf; + struct drm_i915_gem_relocation_entry relocs[2], *r; + struct drm_i915_gem_execbuffer2 *execbuf; + struct drm_i915_gem_exec_object2 *obj; unsigned int engines[16]; unsigned int nengine; int fence_fd = -1; - uint32_t *batch; + uint32_t *batch, *batch_start; int i; nengine = 0; if (engine == -1) { - for_each_engine(fd, engine) - if (engine) + for_each_engine(fd, engine) { + if (engine) { + if (flags & POLL_RUN) + igt_require(!(flags & POLL_RUN) || + gem_can_store_dword(fd, engine)); + engines[nengine++] = engine; + } + } } else { gem_require_ring(fd, engine); + igt_require(!(flags & POLL_RUN) || + gem_can_store_dword(fd, engine)); engines[nengine++] = engine; } igt_require(nengine); - memset(&execbuf, 0, sizeof(execbuf)); - memset(obj, 0, sizeof(obj)); + memset(&spin->execbuf, 0, sizeof(spin->execbuf)); + execbuf = &spin->execbuf; + memset(spin->obj, 0, sizeof(spin->obj)); + obj = spin->obj; memset(relocs, 0, sizeof(relocs)); obj[BATCH].handle = gem_create(fd, BATCH_SIZE); @@ -113,19 +126,66 @@ static int emit_recursive_batch(igt_spin_t *spin, BATCH_SIZE, PROT_WRITE); gem_set_domain(fd, obj[BATCH].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); - execbuf.buffer_count++; + execbuf->buffer_count++; + batch_start = batch; if (dep) { + igt_assert(!(flags & POLL_RUN)); + /* dummy write to dependency */ obj[SCRATCH].handle = dep; fill_reloc(&relocs[obj[BATCH].relocation_count++], dep, 1020, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); - execbuf.buffer_count++; + execbuf->buffer_count++; + } else if (flags & POLL_RUN) { + unsigned int offset; + + igt_assert(!dep); + + if (gen == 4 || gen == 5) + execbuf->flags |= I915_EXEC_SECURE; + + spin->poll_handle = gem_create(fd, 4096); + + if (__gem_set_caching(fd, spin->poll_handle, + I915_CACHING_CACHED) == 0) + spin->running = __gem_mmap__cpu(fd, spin->poll_handle, + 0, 4096, + PROT_READ | PROT_WRITE); + else + spin->running = __gem_mmap__wc(fd, spin->poll_handle, + 0, 4096, + PROT_READ | PROT_WRITE); + igt_assert(spin->running); + igt_assert_eq(*spin->running, 0); + + *batch++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + + if (gen >= 8) { + offset = 1; + *batch++ = 0; + *batch++ = 0; + } else if (gen >= 4) { + offset = 2; + *batch++ = 0; + *batch++ = 0; + } else { + offset = 1; + batch[-1]--; + *batch++ = 0; + } + + *batch++ = 1; + + obj[SCRATCH].handle = spin->poll_handle; + fill_reloc(&relocs[obj[BATCH].relocation_count++], + spin->poll_handle, offset, 0, 0); + execbuf->buffer_count++; } - spin->batch = batch; + spin->batch = batch = batch_start + 64 / sizeof(*batch); spin->handle = obj[BATCH].handle; /* Allow ourselves to be preempted */ @@ -145,40 +205,42 @@ static int emit_recursive_batch(igt_spin_t *spin, batch += 1000; /* recurse */ - fill_reloc(&relocs[obj[BATCH].relocation_count], - obj[BATCH].handle, (batch - spin->batch) + 1, - I915_GEM_DOMAIN_COMMAND, 0); + r = &relocs[obj[BATCH].relocation_count++]; + r->target_handle = obj[BATCH].handle; + r->offset = (batch + 1 - batch_start) * sizeof(*batch); + r->read_domains = I915_GEM_DOMAIN_COMMAND; + r->delta = 64; if (gen >= 8) { *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; - *batch++ = 0; + *batch++ = r->delta; *batch++ = 0; } else if (gen >= 6) { *batch++ = MI_BATCH_BUFFER_START | 1 << 8; - *batch++ = 0; + *batch++ = r->delta; } else { *batch++ = MI_BATCH_BUFFER_START | 2 << 6; - *batch = 0; - if (gen < 4) { - *batch |= 1; - relocs[obj[BATCH].relocation_count].delta = 1; - } + if (gen < 4) + r->delta |= 1; + *batch = r->delta; batch++; } - obj[BATCH].relocation_count++; obj[BATCH].relocs_ptr = to_user_pointer(relocs); - execbuf.buffers_ptr = to_user_pointer(obj + (2 - execbuf.buffer_count)); - execbuf.rsvd1 = ctx; + execbuf->buffers_ptr = to_user_pointer(obj + + (2 - execbuf->buffer_count)); + execbuf->rsvd1 = ctx; - if (out_fence) - execbuf.flags |= I915_EXEC_FENCE_OUT; + if (flags & OUT_FENCE) + execbuf->flags |= I915_EXEC_FENCE_OUT; for (i = 0; i < nengine; i++) { - execbuf.flags &= ~ENGINE_MASK; - execbuf.flags |= engines[i]; - gem_execbuf_wr(fd, &execbuf); - if (out_fence) { - int _fd = execbuf.rsvd2 >> 32; + execbuf->flags &= ~ENGINE_MASK; + execbuf->flags |= engines[i]; + + gem_execbuf_wr(fd, execbuf); + + if (flags & OUT_FENCE) { + int _fd = execbuf->rsvd2 >> 32; igt_assert(_fd >= 0); if (fence_fd == -1) { @@ -194,12 +256,20 @@ static int emit_recursive_batch(igt_spin_t *spin, } } + /* Make it easier for callers to resubmit. */ + + obj[BATCH].relocation_count = 0; + obj[BATCH].relocs_ptr = 0; + + obj[SCRATCH].flags = EXEC_OBJECT_PINNED; + obj[BATCH].flags = EXEC_OBJECT_PINNED; + return fence_fd; } static igt_spin_t * ___igt_spin_batch_new(int fd, uint32_t ctx, unsigned engine, uint32_t dep, - int out_fence) + unsigned int flags) { igt_spin_t *spin; @@ -207,7 +277,7 @@ ___igt_spin_batch_new(int fd, uint32_t ctx, unsigned engine, uint32_t dep, igt_assert(spin); spin->out_fence = emit_recursive_batch(spin, fd, ctx, engine, dep, - out_fence); + flags); pthread_mutex_lock(&list_lock); igt_list_add(&spin->link, &spin_list); @@ -219,7 +289,7 @@ ___igt_spin_batch_new(int fd, uint32_t ctx, unsigned engine, uint32_t dep, igt_spin_t * __igt_spin_batch_new(int fd, uint32_t ctx, unsigned engine, uint32_t dep) { - return ___igt_spin_batch_new(fd, ctx, engine, dep, false); + return ___igt_spin_batch_new(fd, ctx, engine, dep, 0); } /** @@ -253,7 +323,7 @@ igt_spin_batch_new(int fd, uint32_t ctx, unsigned engine, uint32_t dep) igt_spin_t * __igt_spin_batch_new_fence(int fd, uint32_t ctx, unsigned engine) { - return ___igt_spin_batch_new(fd, ctx, engine, 0, true); + return ___igt_spin_batch_new(fd, ctx, engine, 0, OUT_FENCE); } /** @@ -286,6 +356,42 @@ igt_spin_batch_new_fence(int fd, uint32_t ctx, unsigned engine) return spin; } +igt_spin_t * +__igt_spin_batch_new_poll(int fd, uint32_t ctx, unsigned engine) +{ + return ___igt_spin_batch_new(fd, ctx, engine, 0, POLL_RUN); +} + +/** + * igt_spin_batch_new_poll: + * @fd: open i915 drm file descriptor + * @engine: Ring to execute batch OR'd with execbuf flags. If value is less + * than 0, execute on all available rings. + * + * Start a recursive batch on a ring. Immediately returns a #igt_spin_t that + * contains the batch's handle that can be waited upon. The returned structure + * must be passed to igt_spin_batch_free() for post-processing. + * + * igt_spin_t->running will containt a pointer which target will change from + * zero to one once the spinner actually starts executing on the GPU. + * + * Returns: + * Structure with helper internal state for igt_spin_batch_free(). + */ +igt_spin_t * +igt_spin_batch_new_poll(int fd, uint32_t ctx, unsigned engine) +{ + igt_spin_t *spin; + + igt_require_gem(fd); + igt_require(gem_mmap__has_wc(fd)); + + spin = __igt_spin_batch_new_poll(fd, ctx, engine); + igt_assert(gem_bo_busy(fd, spin->handle)); + + return spin; +} + static void notify(union sigval arg) { igt_spin_t *spin = arg.sival_ptr; @@ -340,6 +446,8 @@ void igt_spin_batch_end(igt_spin_t *spin) if (!spin) return; + igt_assert(*spin->batch == MI_ARB_CHK || + *spin->batch == MI_BATCH_BUFFER_END); *spin->batch = MI_BATCH_BUFFER_END; __sync_synchronize(); } @@ -365,7 +473,13 @@ void igt_spin_batch_free(int fd, igt_spin_t *spin) timer_delete(spin->timer); igt_spin_batch_end(spin); - gem_munmap(spin->batch, BATCH_SIZE); + gem_munmap((void *)((unsigned long)spin->batch & (~4095UL)), + BATCH_SIZE); + + if (spin->running) { + gem_munmap(spin->running, 4096); + gem_close(fd, spin->poll_handle); + } gem_close(fd, spin->handle); diff --git a/lib/igt_dummyload.h b/lib/igt_dummyload.h index 4103e4ab9e36..3103935a309b 100644 --- a/lib/igt_dummyload.h +++ b/lib/igt_dummyload.h @@ -36,6 +36,10 @@ typedef struct igt_spin { struct igt_list link; uint32_t *batch; int out_fence; + struct drm_i915_gem_exec_object2 obj[2]; + struct drm_i915_gem_execbuffer2 execbuf; + uint32_t poll_handle; + bool *running; } igt_spin_t; igt_spin_t *__igt_spin_batch_new(int fd, @@ -55,6 +59,13 @@ igt_spin_t *igt_spin_batch_new_fence(int fd, uint32_t ctx, unsigned engine); +igt_spin_t *__igt_spin_batch_new_poll(int fd, + uint32_t ctx, + unsigned engine); +igt_spin_t *igt_spin_batch_new_poll(int fd, + uint32_t ctx, + unsigned engine); + void igt_spin_batch_set_timeout(igt_spin_t *spin, int64_t ns); void igt_spin_batch_end(igt_spin_t *spin); void igt_spin_batch_free(int fd, igt_spin_t *spin); diff --git a/lib/igt_gt.c b/lib/igt_gt.c index 01aebc670862..4569fd36bd85 100644 --- a/lib/igt_gt.c +++ b/lib/igt_gt.c @@ -609,7 +609,7 @@ bool gem_can_store_dword(int fd, unsigned int engine) if (gen == 3 && (info->is_grantsdale || info->is_alviso)) return false; /* only supports physical addresses */ - if (gen == 6 && (engine & ~(3<<13)) == I915_EXEC_BSD) + if (gen == 6 && ((engine & 0x3f) == I915_EXEC_BSD)) return false; /* kills the machine! */ if (info->is_broadwater) diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c index 8748cfcfc04f..4e1a08bf06b4 100644 --- a/lib/ioctl_wrappers.c +++ b/lib/ioctl_wrappers.c @@ -198,7 +198,7 @@ void gem_set_tiling(int fd, uint32_t handle, uint32_t tiling, uint32_t stride) igt_assert(__gem_set_tiling(fd, handle, tiling, stride) == 0); } -static int __gem_set_caching(int fd, uint32_t handle, uint32_t caching) +int __gem_set_caching(int fd, uint32_t handle, uint32_t caching) { struct drm_i915_gem_caching arg; int err; diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h index 13fbe3c103c0..b966f72c90a8 100644 --- a/lib/ioctl_wrappers.h +++ b/lib/ioctl_wrappers.h @@ -61,6 +61,7 @@ bool gem_get_tiling(int fd, uint32_t handle, uint32_t *tiling, uint32_t *swizzle void gem_set_tiling(int fd, uint32_t handle, uint32_t tiling, uint32_t stride); int __gem_set_tiling(int fd, uint32_t handle, uint32_t tiling, uint32_t stride); +int __gem_set_caching(int fd, uint32_t handle, uint32_t caching); void gem_set_caching(int fd, uint32_t handle, uint32_t caching); uint32_t gem_get_caching(int fd, uint32_t handle); uint32_t gem_flink(int fd, uint32_t handle); diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c index 19fcc95ffc7f..5eaaaecab399 100644 --- a/tests/perf_pmu.c +++ b/tests/perf_pmu.c @@ -170,6 +170,56 @@ static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e) #define FLAG_LONG (16) #define FLAG_HANG (32) +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags) +{ + if (gem_can_store_dword(fd, flags)) + return __igt_spin_batch_new_poll(fd, ctx, flags); + else + return __igt_spin_batch_new(fd, ctx, flags, 0); +} + +static unsigned long __spin_wait(int fd, igt_spin_t *spin) +{ + struct timespec start = { }; + + igt_nsec_elapsed(&start); + + if (spin->running) { + unsigned long timeout = 0; + + while (!*((volatile bool *)spin->running)) { + unsigned long t = igt_nsec_elapsed(&start); + + if ((t - timeout) > 250e6) { + timeout = t; + igt_warn("Spinner not running after %.2fms\n", + (double)t / 1e6); + } + } + } else { + igt_debug("__spin_wait - usleep mode\n"); + usleep(500e3); /* Better than nothing! */ + } + + return igt_nsec_elapsed(&start); +} + +static igt_spin_t * __spin_sync(int fd, uint32_t ctx, unsigned long flags) +{ + igt_spin_t *spin = __spin_poll(fd, ctx, flags); + + __spin_wait(fd, spin); + + return spin; +} + +static igt_spin_t * spin_sync(int fd, uint32_t ctx, unsigned long flags) +{ + igt_require_gem(fd); + + return __spin_sync(fd, ctx, flags); +} + static void end_spin(int fd, igt_spin_t *spin, unsigned int flags) { if (!spin) @@ -180,8 +230,25 @@ static void end_spin(int fd, igt_spin_t *spin, unsigned int flags) if (flags & FLAG_SYNC) gem_sync(fd, spin->handle); - if (flags & TEST_TRAILING_IDLE) - usleep(batch_duration_ns / 5000); + if (flags & TEST_TRAILING_IDLE) { + unsigned long t, timeout = 0; + struct timespec start = { }; + + igt_nsec_elapsed(&start); + + do { + t = igt_nsec_elapsed(&start); + + if (gem_bo_busy(fd, spin->handle) && + (t - timeout) > 10e6) { + timeout = t; + igt_warn("Spinner not idle after %.2fms\n", + (double)t / 1e6); + } + + usleep(1e3); + } while (t < batch_duration_ns / 5); + } } static void @@ -195,7 +262,7 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags) fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance)); if (flags & TEST_BUSY) - spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + spin = spin_sync(gem_fd, 0, e2ring(gem_fd, e)); else spin = NULL; @@ -251,13 +318,7 @@ busy_start(int gem_fd, const struct intel_execution_engine2 *e) */ sleep(2); - spin = __igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); - - /* - * Sleep for a bit after making the engine busy to make sure the PMU - * gets enabled when the batch is already running. - */ - usleep(500e3); + spin = __spin_sync(gem_fd, 0, e2ring(gem_fd, e)); fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance)); @@ -300,7 +361,7 @@ busy_double_start(int gem_fd, const struct intel_execution_engine2 *e) * re-submission in execlists mode. Make sure busyness is correctly * reported with the engine busy, and after the engine went idle. */ - spin[0] = __igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + spin[0] = __spin_sync(gem_fd, 0, e2ring(gem_fd, e)); usleep(500e3); spin[1] = __igt_spin_batch_new(gem_fd, ctx, e2ring(gem_fd, e), 0); @@ -386,7 +447,7 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e, igt_assert_eq(i, num_engines); - spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + spin = spin_sync(gem_fd, 0, e2ring(gem_fd, e)); pmu_read_multi(fd[0], num_engines, tval[0]); slept = measured_usleep(batch_duration_ns / 1000); if (flags & TEST_TRAILING_IDLE) @@ -412,15 +473,15 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e, } static void -__submit_spin_batch(int gem_fd, - struct drm_i915_gem_exec_object2 *obj, - const struct intel_execution_engine2 *e) +__submit_spin_batch(int gem_fd, igt_spin_t *spin, + const struct intel_execution_engine2 *e, + int offset) { - struct drm_i915_gem_execbuffer2 eb = { - .buffer_count = 1, - .buffers_ptr = to_user_pointer(obj), - .flags = e2ring(gem_fd, e), - }; + struct drm_i915_gem_execbuffer2 eb = spin->execbuf; + + eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK); + eb.flags |= e2ring(gem_fd, e) | I915_EXEC_NO_RELOC; + eb.batch_start_offset += offset; gem_execbuf(gem_fd, &eb); } @@ -429,7 +490,6 @@ static void most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e, const unsigned int num_engines, unsigned int flags) { - struct drm_i915_gem_exec_object2 obj = {}; const struct intel_execution_engine2 *e_; uint64_t tval[2][num_engines]; uint64_t val[num_engines]; @@ -443,15 +503,12 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e, if (!gem_has_engine(gem_fd, e_->class, e_->instance)) continue; - if (e == e_) { + if (e == e_) idle_idx = i; - } else if (spin) { - __submit_spin_batch(gem_fd, &obj, e_); - } else { - spin = igt_spin_batch_new(gem_fd, 0, - e2ring(gem_fd, e_), 0); - obj.handle = spin->handle; - } + else if (spin) + __submit_spin_batch(gem_fd, spin, e_, 64); + else + spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e_)); val[i++] = I915_PMU_ENGINE_BUSY(e_->class, e_->instance); } @@ -461,6 +518,9 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e, for (i = 0; i < num_engines; i++) fd[i] = open_group(val[i], fd[0]); + /* Small delay to allow engines to start. */ + usleep(__spin_wait(gem_fd, spin) * num_engines / 1e3); + pmu_read_multi(fd[0], num_engines, tval[0]); slept = measured_usleep(batch_duration_ns / 1000); if (flags & TEST_TRAILING_IDLE) @@ -489,7 +549,6 @@ static void all_busy_check_all(int gem_fd, const unsigned int num_engines, unsigned int flags) { - struct drm_i915_gem_exec_object2 obj = {}; const struct intel_execution_engine2 *e; uint64_t tval[2][num_engines]; uint64_t val[num_engines]; @@ -503,13 +562,10 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines, if (!gem_has_engine(gem_fd, e->class, e->instance)) continue; - if (spin) { - __submit_spin_batch(gem_fd, &obj, e); - } else { - spin = igt_spin_batch_new(gem_fd, 0, - e2ring(gem_fd, e), 0); - obj.handle = spin->handle; - } + if (spin) + __submit_spin_batch(gem_fd, spin, e, 64); + else + spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e)); val[i++] = I915_PMU_ENGINE_BUSY(e->class, e->instance); } @@ -519,6 +575,9 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines, for (i = 0; i < num_engines; i++) fd[i] = open_group(val[i], fd[0]); + /* Small delay to allow engines to start. */ + usleep(__spin_wait(gem_fd, spin) * num_engines / 1e3); + pmu_read_multi(fd[0], num_engines, tval[0]); slept = measured_usleep(batch_duration_ns / 1000); if (flags & TEST_TRAILING_IDLE) @@ -550,7 +609,7 @@ no_sema(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags) open_group(I915_PMU_ENGINE_WAIT(e->class, e->instance), fd); if (flags & TEST_BUSY) - spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + spin = spin_sync(gem_fd, 0, e2ring(gem_fd, e)); else spin = NULL; @@ -884,7 +943,7 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e) */ fd[1] = open_pmu(config); - spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); + spin = spin_sync(gem_fd, 0, e2ring(gem_fd, e)); val[0] = val[1] = __pmu_read_single(fd[0], &ts[0]); slept[1] = measured_usleep(batch_duration_ns / 1000); @@ -1248,7 +1307,7 @@ test_frequency(int gem_fd) igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == min_freq); gem_quiescent_gpu(gem_fd); /* Idle to be sure the change takes effect */ - spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0); + spin = spin_sync(gem_fd, 0, I915_EXEC_RENDER); slept = pmu_read_multi(fd, 2, start); measured_usleep(batch_duration_ns / 1000); @@ -1274,7 +1333,7 @@ test_frequency(int gem_fd) igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == max_freq); gem_quiescent_gpu(gem_fd); - spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0); + spin = spin_sync(gem_fd, 0, I915_EXEC_RENDER); slept = pmu_read_multi(fd, 2, start); measured_usleep(batch_duration_ns / 1000); @@ -1455,6 +1514,11 @@ static void __rearm_spin_batch(igt_spin_t *spin) { const uint32_t mi_arb_chk = 0x5 << 23; + if (spin->running) { + igt_assert(*spin->running); + *spin->running = 0; + } + igt_assert_eq(*spin->batch, MI_BATCH_BUFFER_END); *spin->batch = mi_arb_chk; __sync_synchronize(); } @@ -1489,6 +1553,9 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e, /* Sampling platforms cannot reach the high accuracy criteria. */ igt_require(gem_has_execlists(gem_fd)); + /* Need store dword for accurate PWM. */ + igt_require(gem_can_store_dword(gem_fd, e2ring(gem_fd, e))); + while (idle_us < 2500) { busy_us *= 2; idle_us *= 2; @@ -1517,8 +1584,8 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e, const unsigned long timeout[] = { pwm_calibration_us * 1000, test_us * 1000 }; - struct drm_i915_gem_exec_object2 obj = {}; uint64_t total_busy_ns = 0, total_idle_ns = 0; + uint64_t target_idle_us = idle_us; igt_spin_t *spin; int ret; @@ -1530,12 +1597,9 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e, igt_warn("Failed to set scheduling policy!\n"); /* Allocate our spin batch and idle it. */ - spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0); - obj.handle = spin->handle; - __submit_spin_batch(gem_fd, &obj, e); /* record its location */ + spin = spin_sync(gem_fd, 0, e2ring(gem_fd, e)); igt_spin_batch_end(spin); - gem_sync(gem_fd, obj.handle); - obj.flags |= EXEC_OBJECT_PINNED; + gem_sync(gem_fd, spin->handle); /* 1st pass is calibration, second pass is the test. */ for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) { @@ -1545,27 +1609,42 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e, igt_nsec_elapsed(&test_start); do { - unsigned int target_idle_us, t_busy; + unsigned long prep_delay_ns, run_delay_ns; + struct timespec start = { }; + double error; /* Restart the spinbatch. */ + igt_nsec_elapsed(&start); + __rearm_spin_batch(spin); - __submit_spin_batch(gem_fd, &obj, e); + __submit_spin_batch(gem_fd, spin, e, 0); - /* - * Note that the submission may be delayed to a - * tasklet (ksoftirqd) which cannot run until we - * sleep as we hog the cpu (we are RT). - */ + prep_delay_ns = igt_nsec_elapsed(&start); + run_delay_ns = __spin_wait(gem_fd, spin); + + /* PWM busy sleep. */ + measured_usleep(busy_us); - t_busy = measured_usleep(busy_us); igt_spin_batch_end(spin); - gem_sync(gem_fd, obj.handle); - total_busy_ns += t_busy; + total_busy_ns += igt_nsec_elapsed(&start) - + prep_delay_ns - run_delay_ns; + + /* PWM idle sleep. */ + memset(&start, 0, sizeof(start)); + igt_nsec_elapsed(&start); + gem_sync(gem_fd, spin->handle); + measured_usleep(target_idle_us - + prep_delay_ns / 1000); + total_idle_ns += igt_nsec_elapsed(&start); + + /* Re-calibrate. */ + error = (double)total_busy_ns / + (total_busy_ns + total_idle_ns) - + (double)target_busy_pct / 100.0; - target_idle_us = - (100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000; - total_idle_ns += measured_usleep(target_idle_us); + target_idle_us = (double)target_idle_us * + (1.0 + error); } while (igt_nsec_elapsed(&test_start) < timeout[pass]); busy_ns += total_busy_ns;

[i-g-t,v9] tests/perf_pmu: Improve accuracy by waiting on spinner to start

Commit Message

Patch