@@ -74,16 +74,19 @@ fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
reloc->write_domain = write_domains;
}
-static int emit_recursive_batch(igt_spin_t *spin,
- int fd, uint32_t ctx, unsigned engine,
- uint32_t dep, bool out_fence)
+#define OUT_FENCE (1 << 0)
+#define POLL_RUN (1 << 1)
+
+static int
+emit_recursive_batch(igt_spin_t *spin, int fd, uint32_t ctx, unsigned engine,
+ uint32_t dep, unsigned int flags)
{
#define SCRATCH 0
#define BATCH 1
const int gen = intel_gen(intel_get_drm_devid(fd));
- struct drm_i915_gem_exec_object2 obj[2];
struct drm_i915_gem_relocation_entry relocs[2];
- struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_execbuffer2 *execbuf;
+ struct drm_i915_gem_exec_object2 *obj;
unsigned int engines[16];
unsigned int nengine;
int fence_fd = -1;
@@ -101,8 +104,10 @@ static int emit_recursive_batch(igt_spin_t *spin,
}
igt_require(nengine);
- memset(&execbuf, 0, sizeof(execbuf));
- memset(obj, 0, sizeof(obj));
+ memset(&spin->execbuf, 0, sizeof(spin->execbuf));
+ execbuf = &spin->execbuf;
+ memset(spin->obj, 0, sizeof(spin->obj));
+ obj = spin->obj;
memset(relocs, 0, sizeof(relocs));
obj[BATCH].handle = gem_create(fd, BATCH_SIZE);
@@ -113,16 +118,62 @@ static int emit_recursive_batch(igt_spin_t *spin,
BATCH_SIZE, PROT_WRITE);
gem_set_domain(fd, obj[BATCH].handle,
I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
- execbuf.buffer_count++;
+ execbuf->buffer_count++;
if (dep) {
+ igt_assert(!(flags & POLL_RUN));
+
/* dummy write to dependency */
obj[SCRATCH].handle = dep;
fill_reloc(&relocs[obj[BATCH].relocation_count++],
dep, 1020,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER);
- execbuf.buffer_count++;
+ execbuf->buffer_count++;
+ } else if (flags & POLL_RUN) {
+ unsigned int offset;
+
+ igt_assert(!dep);
+
+ if (gen == 4 || gen == 5)
+ execbuf->flags |= I915_EXEC_SECURE;
+
+ spin->poll_handle = gem_create(fd, 4096);
+
+ if (__gem_set_caching(fd, spin->poll_handle,
+ I915_CACHING_CACHED) == 0)
+ spin->running = __gem_mmap__cpu(fd, spin->poll_handle,
+ 0, 4096,
+ PROT_READ | PROT_WRITE);
+ else
+ spin->running = __gem_mmap__wc(fd, spin->poll_handle,
+ 0, 4096,
+ PROT_READ | PROT_WRITE);
+ igt_assert(spin->running);
+ igt_assert_eq(*spin->running, 0);
+
+ *batch++ = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
+
+ if (gen >= 8) {
+ offset = sizeof(uint32_t);
+ *batch++ = 0;
+ *batch++ = 0;
+ } else if (gen >= 4) {
+ offset = 2 * sizeof(uint32_t);
+ *batch++ = 0;
+ *batch++ = 0;
+ } else {
+ offset = sizeof(uint32_t);
+ batch[-1]--;
+ *batch++ = 0;
+ }
+
+ *batch++ = 1;
+
+ obj[SCRATCH].handle = spin->poll_handle;
+ fill_reloc(&relocs[obj[BATCH].relocation_count++],
+ spin->poll_handle, offset, 0, 0);
+ execbuf->buffer_count++;
}
spin->batch = batch;
@@ -167,18 +218,23 @@ static int emit_recursive_batch(igt_spin_t *spin,
obj[BATCH].relocation_count++;
obj[BATCH].relocs_ptr = to_user_pointer(relocs);
- execbuf.buffers_ptr = to_user_pointer(obj + (2 - execbuf.buffer_count));
- execbuf.rsvd1 = ctx;
+ execbuf->buffers_ptr = to_user_pointer(obj +
+ (2 - execbuf->buffer_count));
+ execbuf->rsvd1 = ctx;
- if (out_fence)
- execbuf.flags |= I915_EXEC_FENCE_OUT;
+ if (flags & OUT_FENCE)
+ execbuf->flags |= I915_EXEC_FENCE_OUT;
for (i = 0; i < nengine; i++) {
- execbuf.flags &= ~ENGINE_MASK;
- execbuf.flags |= engines[i];
- gem_execbuf_wr(fd, &execbuf);
- if (out_fence) {
- int _fd = execbuf.rsvd2 >> 32;
+ execbuf->flags &= ~ENGINE_MASK;
+ execbuf->flags |= engines[i];
+
+ if (flags & POLL_RUN)
+ igt_require(gem_can_store_dword(fd, execbuf->flags));
+
+ gem_execbuf_wr(fd, execbuf);
+ if (flags & OUT_FENCE) {
+ int _fd = execbuf->rsvd2 >> 32;
igt_assert(_fd >= 0);
if (fence_fd == -1) {
@@ -194,12 +250,20 @@ static int emit_recursive_batch(igt_spin_t *spin,
}
}
+ /* Make it easier for callers to resubmit. */
+
+ obj[BATCH].relocation_count = 0;
+ obj[BATCH].relocs_ptr = 0;
+
+ obj[SCRATCH].flags = EXEC_OBJECT_PINNED;
+ obj[BATCH].flags = EXEC_OBJECT_PINNED;
+
return fence_fd;
}
static igt_spin_t *
___igt_spin_batch_new(int fd, uint32_t ctx, unsigned engine, uint32_t dep,
- int out_fence)
+ unsigned int flags)
{
igt_spin_t *spin;
@@ -207,7 +271,7 @@ ___igt_spin_batch_new(int fd, uint32_t ctx, unsigned engine, uint32_t dep,
igt_assert(spin);
spin->out_fence = emit_recursive_batch(spin, fd, ctx, engine, dep,
- out_fence);
+ flags);
pthread_mutex_lock(&list_lock);
igt_list_add(&spin->link, &spin_list);
@@ -219,7 +283,7 @@ ___igt_spin_batch_new(int fd, uint32_t ctx, unsigned engine, uint32_t dep,
igt_spin_t *
__igt_spin_batch_new(int fd, uint32_t ctx, unsigned engine, uint32_t dep)
{
- return ___igt_spin_batch_new(fd, ctx, engine, dep, false);
+ return ___igt_spin_batch_new(fd, ctx, engine, dep, 0);
}
/**
@@ -253,7 +317,7 @@ igt_spin_batch_new(int fd, uint32_t ctx, unsigned engine, uint32_t dep)
igt_spin_t *
__igt_spin_batch_new_fence(int fd, uint32_t ctx, unsigned engine)
{
- return ___igt_spin_batch_new(fd, ctx, engine, 0, true);
+ return ___igt_spin_batch_new(fd, ctx, engine, 0, OUT_FENCE);
}
/**
@@ -286,6 +350,42 @@ igt_spin_batch_new_fence(int fd, uint32_t ctx, unsigned engine)
return spin;
}
+igt_spin_t *
+__igt_spin_batch_new_poll(int fd, uint32_t ctx, unsigned engine)
+{
+ return ___igt_spin_batch_new(fd, ctx, engine, 0, POLL_RUN);
+}
+
+/**
+ * igt_spin_batch_new_poll:
+ * @fd: open i915 drm file descriptor
+ * @engine: Ring to execute batch OR'd with execbuf flags. If value is less
+ * than 0, execute on all available rings.
+ *
+ * Start a recursive batch on a ring. Immediately returns a #igt_spin_t that
+ * contains the batch's handle that can be waited upon. The returned structure
+ * must be passed to igt_spin_batch_free() for post-processing.
+ *
+ * igt_spin_t->running will containt a pointer which target will change from
+ * zero to one once the spinner actually starts executing on the GPU.
+ *
+ * Returns:
+ * Structure with helper internal state for igt_spin_batch_free().
+ */
+igt_spin_t *
+igt_spin_batch_new_poll(int fd, uint32_t ctx, unsigned engine)
+{
+ igt_spin_t *spin;
+
+ igt_require_gem(fd);
+ igt_require(gem_mmap__has_wc(fd));
+
+ spin = __igt_spin_batch_new_poll(fd, ctx, engine);
+ igt_assert(gem_bo_busy(fd, spin->handle));
+
+ return spin;
+}
+
static void notify(union sigval arg)
{
igt_spin_t *spin = arg.sival_ptr;
@@ -367,6 +467,11 @@ void igt_spin_batch_free(int fd, igt_spin_t *spin)
igt_spin_batch_end(spin);
gem_munmap(spin->batch, BATCH_SIZE);
+ if (spin->running) {
+ gem_munmap(spin->running, 4096);
+ gem_close(fd, spin->poll_handle);
+ }
+
gem_close(fd, spin->handle);
if (spin->out_fence >= 0)
@@ -36,6 +36,10 @@ typedef struct igt_spin {
struct igt_list link;
uint32_t *batch;
int out_fence;
+ struct drm_i915_gem_exec_object2 obj[2];
+ struct drm_i915_gem_execbuffer2 execbuf;
+ uint32_t poll_handle;
+ bool *running;
} igt_spin_t;
igt_spin_t *__igt_spin_batch_new(int fd,
@@ -55,6 +59,13 @@ igt_spin_t *igt_spin_batch_new_fence(int fd,
uint32_t ctx,
unsigned engine);
+igt_spin_t *__igt_spin_batch_new_poll(int fd,
+ uint32_t ctx,
+ unsigned engine);
+igt_spin_t *igt_spin_batch_new_poll(int fd,
+ uint32_t ctx,
+ unsigned engine);
+
void igt_spin_batch_set_timeout(igt_spin_t *spin, int64_t ns);
void igt_spin_batch_end(igt_spin_t *spin);
void igt_spin_batch_free(int fd, igt_spin_t *spin);
@@ -198,7 +198,7 @@ void gem_set_tiling(int fd, uint32_t handle, uint32_t tiling, uint32_t stride)
igt_assert(__gem_set_tiling(fd, handle, tiling, stride) == 0);
}
-static int __gem_set_caching(int fd, uint32_t handle, uint32_t caching)
+int __gem_set_caching(int fd, uint32_t handle, uint32_t caching)
{
struct drm_i915_gem_caching arg;
int err;
@@ -61,6 +61,7 @@ bool gem_get_tiling(int fd, uint32_t handle, uint32_t *tiling, uint32_t *swizzle
void gem_set_tiling(int fd, uint32_t handle, uint32_t tiling, uint32_t stride);
int __gem_set_tiling(int fd, uint32_t handle, uint32_t tiling, uint32_t stride);
+int __gem_set_caching(int fd, uint32_t handle, uint32_t caching);
void gem_set_caching(int fd, uint32_t handle, uint32_t caching);
uint32_t gem_get_caching(int fd, uint32_t handle);
uint32_t gem_flink(int fd, uint32_t handle);
@@ -170,6 +170,46 @@ static unsigned int e2ring(int gem_fd, const struct intel_execution_engine2 *e)
#define FLAG_LONG (16)
#define FLAG_HANG (32)
+static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long flags)
+{
+ if (gem_can_store_dword(fd, flags))
+ return __igt_spin_batch_new_poll(fd, ctx, flags);
+ else
+ return __igt_spin_batch_new(fd, ctx, flags, 0);
+}
+
+static unsigned long __spin_wait(int fd, igt_spin_t *spin)
+{
+ struct timespec start = { };
+
+ igt_nsec_elapsed(&start);
+
+ if (gem_can_store_dword(fd, spin->execbuf.flags)) {
+ while (!spin->running)
+ ;
+ } else {
+ usleep(500e3); /* Better than nothing! */
+ }
+
+ return igt_nsec_elapsed(&start);
+}
+
+static igt_spin_t * __spin_sync(int fd, uint32_t ctx, unsigned long flags)
+{
+ igt_spin_t *spin = __spin_poll(fd, ctx, flags);
+
+ __spin_wait(fd, spin);
+
+ return spin;
+}
+
+static igt_spin_t * spin_sync(int fd, uint32_t ctx, unsigned long flags)
+{
+ igt_require_gem(fd);
+
+ return __spin_sync(fd, ctx, flags);
+}
+
static void end_spin(int fd, igt_spin_t *spin, unsigned int flags)
{
if (!spin)
@@ -195,7 +235,7 @@ single(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
if (flags & TEST_BUSY)
- spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ spin = spin_sync(gem_fd, 0, e2ring(gem_fd, e));
else
spin = NULL;
@@ -251,13 +291,7 @@ busy_start(int gem_fd, const struct intel_execution_engine2 *e)
*/
sleep(2);
- spin = __igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
-
- /*
- * Sleep for a bit after making the engine busy to make sure the PMU
- * gets enabled when the batch is already running.
- */
- usleep(500e3);
+ spin = __spin_sync(gem_fd, 0, e2ring(gem_fd, e));
fd = open_pmu(I915_PMU_ENGINE_BUSY(e->class, e->instance));
@@ -300,7 +334,7 @@ busy_double_start(int gem_fd, const struct intel_execution_engine2 *e)
* re-submission in execlists mode. Make sure busyness is correctly
* reported with the engine busy, and after the engine went idle.
*/
- spin[0] = __igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ spin[0] = __spin_sync(gem_fd, 0, e2ring(gem_fd, e));
usleep(500e3);
spin[1] = __igt_spin_batch_new(gem_fd, ctx, e2ring(gem_fd, e), 0);
@@ -386,7 +420,7 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
igt_assert_eq(i, num_engines);
- spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ spin = spin_sync(gem_fd, 0, e2ring(gem_fd, e));
pmu_read_multi(fd[0], num_engines, tval[0]);
slept = measured_usleep(batch_duration_ns / 1000);
if (flags & TEST_TRAILING_IDLE)
@@ -412,15 +446,13 @@ busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
}
static void
-__submit_spin_batch(int gem_fd,
- struct drm_i915_gem_exec_object2 *obj,
+__submit_spin_batch(int gem_fd, igt_spin_t *spin,
const struct intel_execution_engine2 *e)
{
- struct drm_i915_gem_execbuffer2 eb = {
- .buffer_count = 1,
- .buffers_ptr = to_user_pointer(obj),
- .flags = e2ring(gem_fd, e),
- };
+ struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
+
+ eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
+ eb.flags |= e2ring(gem_fd, e);
gem_execbuf(gem_fd, &eb);
}
@@ -429,7 +461,6 @@ static void
most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
const unsigned int num_engines, unsigned int flags)
{
- struct drm_i915_gem_exec_object2 obj = {};
const struct intel_execution_engine2 *e_;
uint64_t tval[2][num_engines];
uint64_t val[num_engines];
@@ -443,15 +474,12 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
if (!gem_has_engine(gem_fd, e_->class, e_->instance))
continue;
- if (e == e_) {
+ if (e == e_)
idle_idx = i;
- } else if (spin) {
- __submit_spin_batch(gem_fd, &obj, e_);
- } else {
- spin = igt_spin_batch_new(gem_fd, 0,
- e2ring(gem_fd, e_), 0);
- obj.handle = spin->handle;
- }
+ else if (spin)
+ __submit_spin_batch(gem_fd, spin, e_);
+ else
+ spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e_));
val[i++] = I915_PMU_ENGINE_BUSY(e_->class, e_->instance);
}
@@ -461,6 +489,9 @@ most_busy_check_all(int gem_fd, const struct intel_execution_engine2 *e,
for (i = 0; i < num_engines; i++)
fd[i] = open_group(val[i], fd[0]);
+ /* Small delay to allow engines to start. */
+ usleep(__spin_wait(gem_fd, spin) * num_engines / 1e3);
+
pmu_read_multi(fd[0], num_engines, tval[0]);
slept = measured_usleep(batch_duration_ns / 1000);
if (flags & TEST_TRAILING_IDLE)
@@ -489,7 +520,6 @@ static void
all_busy_check_all(int gem_fd, const unsigned int num_engines,
unsigned int flags)
{
- struct drm_i915_gem_exec_object2 obj = {};
const struct intel_execution_engine2 *e;
uint64_t tval[2][num_engines];
uint64_t val[num_engines];
@@ -503,13 +533,10 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines,
if (!gem_has_engine(gem_fd, e->class, e->instance))
continue;
- if (spin) {
- __submit_spin_batch(gem_fd, &obj, e);
- } else {
- spin = igt_spin_batch_new(gem_fd, 0,
- e2ring(gem_fd, e), 0);
- obj.handle = spin->handle;
- }
+ if (spin)
+ __submit_spin_batch(gem_fd, spin, e);
+ else
+ spin = __spin_poll(gem_fd, 0, e2ring(gem_fd, e));
val[i++] = I915_PMU_ENGINE_BUSY(e->class, e->instance);
}
@@ -519,6 +546,9 @@ all_busy_check_all(int gem_fd, const unsigned int num_engines,
for (i = 0; i < num_engines; i++)
fd[i] = open_group(val[i], fd[0]);
+ /* Small delay to allow engines to start. */
+ usleep(__spin_wait(gem_fd, spin) * num_engines / 1e3);
+
pmu_read_multi(fd[0], num_engines, tval[0]);
slept = measured_usleep(batch_duration_ns / 1000);
if (flags & TEST_TRAILING_IDLE)
@@ -550,7 +580,7 @@ no_sema(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
open_group(I915_PMU_ENGINE_WAIT(e->class, e->instance), fd);
if (flags & TEST_BUSY)
- spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ spin = spin_sync(gem_fd, 0, e2ring(gem_fd, e));
else
spin = NULL;
@@ -884,7 +914,7 @@ multi_client(int gem_fd, const struct intel_execution_engine2 *e)
*/
fd[1] = open_pmu(config);
- spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
+ spin = spin_sync(gem_fd, 0, e2ring(gem_fd, e));
val[0] = val[1] = __pmu_read_single(fd[0], &ts[0]);
slept[1] = measured_usleep(batch_duration_ns / 1000);
@@ -1248,7 +1278,7 @@ test_frequency(int gem_fd)
igt_require(igt_sysfs_get_u32(sysfs, "gt_boost_freq_mhz") == min_freq);
gem_quiescent_gpu(gem_fd); /* Idle to be sure the change takes effect */
- spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+ spin = spin_sync(gem_fd, 0, I915_EXEC_RENDER);
slept = pmu_read_multi(fd, 2, start);
measured_usleep(batch_duration_ns / 1000);
@@ -1274,7 +1304,7 @@ test_frequency(int gem_fd)
igt_require(igt_sysfs_get_u32(sysfs, "gt_min_freq_mhz") == max_freq);
gem_quiescent_gpu(gem_fd);
- spin = igt_spin_batch_new(gem_fd, 0, I915_EXEC_RENDER, 0);
+ spin = spin_sync(gem_fd, 0, I915_EXEC_RENDER);
slept = pmu_read_multi(fd, 2, start);
measured_usleep(batch_duration_ns / 1000);
@@ -1455,6 +1485,10 @@ static void __rearm_spin_batch(igt_spin_t *spin)
{
const uint32_t mi_arb_chk = 0x5 << 23;
+ if (spin->running) {
+ igt_assert(*spin->running);
+ *spin->running = 0;
+ }
*spin->batch = mi_arb_chk;
__sync_synchronize();
}
@@ -1517,7 +1551,6 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
const unsigned long timeout[] = {
pwm_calibration_us * 1000, test_us * 1000
};
- struct drm_i915_gem_exec_object2 obj = {};
uint64_t total_busy_ns = 0, total_idle_ns = 0;
igt_spin_t *spin;
int ret;
@@ -1530,12 +1563,9 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
igt_warn("Failed to set scheduling policy!\n");
/* Allocate our spin batch and idle it. */
- spin = igt_spin_batch_new(gem_fd, 0, e2ring(gem_fd, e), 0);
- obj.handle = spin->handle;
- __submit_spin_batch(gem_fd, &obj, e); /* record its location */
+ spin = spin_sync(gem_fd, 0, e2ring(gem_fd, e));
igt_spin_batch_end(spin);
- gem_sync(gem_fd, obj.handle);
- obj.flags |= EXEC_OBJECT_PINNED;
+ gem_sync(gem_fd, spin->handle);
/* 1st pass is calibration, second pass is the test. */
for (int pass = 0; pass < ARRAY_SIZE(timeout); pass++) {
@@ -1545,24 +1575,30 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
igt_nsec_elapsed(&test_start);
do {
- unsigned int target_idle_us, t_busy;
+ unsigned int target_idle_us;
+ struct timespec start = { };
+ unsigned long prep_delay_ns;
/* Restart the spinbatch. */
+ igt_nsec_elapsed(&start);
__rearm_spin_batch(spin);
- __submit_spin_batch(gem_fd, &obj, e);
+ __submit_spin_batch(gem_fd, spin, e);
- /*
- * Note that the submission may be delayed to a
- * tasklet (ksoftirqd) which cannot run until we
- * sleep as we hog the cpu (we are RT).
- */
+ /* Wait for batch to start executing. */
+ __spin_wait(gem_fd, spin);
+ prep_delay_ns = igt_nsec_elapsed(&start);
- t_busy = measured_usleep(busy_us);
+ /* PWM busy sleep. */
+ memset(&start, 0, sizeof(start));
+ igt_nsec_elapsed(&start);
+ measured_usleep(busy_us);
igt_spin_batch_end(spin);
- gem_sync(gem_fd, obj.handle);
+ gem_sync(gem_fd, spin->handle);
- total_busy_ns += t_busy;
+ total_busy_ns += igt_nsec_elapsed(&start);
+ total_idle_ns += prep_delay_ns;
+ /* Re-calibrate. */
target_idle_us =
(100 * total_busy_ns / target_busy_pct - (total_busy_ns + total_idle_ns)) / 1000;
total_idle_ns += measured_usleep(target_idle_us);