@@ -23,6 +23,8 @@
#include "config.h"
+#include <fcntl.h>
+#include <sys/ioctl.h>
#include <stdio.h>
#include <unistd.h>
#include <sched.h>
@@ -321,8 +323,309 @@ static void virtual(int i915)
igt_assert_eq(count, expect);
}
+#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags))
+
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
+
+static unsigned int offset_in_page(void *addr)
+{
+ return (uintptr_t)addr & 4095;
+}
+
+static uint64_t div64_u64_round_up(uint64_t x, uint64_t y)
+{
+ return (x + y - 1) / y;
+}
+
+static int read_timestamp_frequency(int i915)
+{
+ int value = 0;
+ drm_i915_getparam_t gp = {
+ .value = &value,
+ .param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
+ };
+ ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp);
+ return value;
+}
+
+static uint64_t ns_to_ticks(int i915, uint64_t ns)
+{
+ return div64_u64_round_up(ns * read_timestamp_frequency(i915),
+ NSEC_PER_SEC);
+}
+
+static uint32_t __batch_create(int i915, uint32_t offset)
+{
+ const uint32_t bbe = MI_BATCH_BUFFER_END;
+ uint32_t handle;
+
+ handle = gem_create(i915, ALIGN(offset + 4, 4096));
+ gem_write(i915, handle, offset, &bbe, sizeof(bbe));
+
+ return handle;
+}
+
+static uint32_t batch_create(int i915)
+{
+ return __batch_create(i915, 0);
+}
+
+static void delay(int i915,
+ const struct intel_execution_engine2 *e,
+ uint32_t handle,
+ uint64_t addr,
+ uint64_t ns)
+{
+ const int use_64b = intel_gen(intel_get_drm_devid(i915)) >= 8;
+ const uint32_t base = gem_engine_mmio_base(i915, e->name);
+#define CS_GPR(x) (base + 0x600 + 8 * (x))
+#define RUNTIME (base + 0x3a8)
+ enum { START_TS, NOW_TS };
+ uint32_t *map, *cs, *jmp;
+
+ igt_require(base);
+
+ /* Loop until CTX_TIMESTAMP - initial > @ns */
+
+ cs = map = gem_mmap__device_coherent(i915, handle, 0, 4096, PROT_WRITE);
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(START_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(START_TS);
+
+ while (offset_in_page(cs) & 63)
+ *cs++ = 0;
+ jmp = cs;
+
+ *cs++ = 0x5 << 23; /* MI_ARB_CHECK */
+
+ *cs++ = MI_LOAD_REGISTER_IMM;
+ *cs++ = CS_GPR(NOW_TS) + 4;
+ *cs++ = 0;
+ *cs++ = MI_LOAD_REGISTER_REG;
+ *cs++ = RUNTIME;
+ *cs++ = CS_GPR(NOW_TS);
+
+ /* delta = now - start; inverted to match COND_BBE */
+ *cs++ = MI_MATH(4);
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+ *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+ *cs++ = MI_MATH_SUB;
+ *cs++ = MI_MATH_STOREINV(MI_MATH_REG(NOW_TS), MI_MATH_REG_ACCU);
+
+ /* Save delta for reading by COND_BBE */
+ *cs++ = 0x24 << 23 | (1 + use_64b); /* SRM */
+ *cs++ = CS_GPR(NOW_TS);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Delay between SRM and COND_BBE to post the writes */
+ for (int n = 0; n < 8; n++) {
+ *cs++ = MI_STORE_DWORD_IMM;
+ if (use_64b) {
+ *cs++ = addr + 4064;
+ *cs++ = addr >> 32;
+ } else {
+ *cs++ = 0;
+ *cs++ = addr + 4064;
+ }
+ *cs++ = 0;
+ }
+
+ /* Break if delta > ns */
+ *cs++ = MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE | (1 + use_64b);
+ *cs++ = ~ns_to_ticks(i915, ns);
+ *cs++ = addr + 4000;
+ *cs++ = addr >> 32;
+
+ /* Otherwise back to recalculating delta */
+ *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | use_64b;
+ *cs++ = addr + offset_in_page(jmp);
+ *cs++ = addr >> 32;
+
+ munmap(map, 4096);
+}
+
+static struct drm_i915_gem_exec_object2
+delay_create(int i915, uint32_t ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t target_ns)
+{
+ struct drm_i915_gem_exec_object2 obj = {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS,
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(&obj),
+ .buffer_count = 1,
+ .rsvd1 = ctx,
+ .flags = e->flags,
+ };
+
+ obj.offset = obj.handle << 12;
+ gem_execbuf(i915, &execbuf);
+ gem_sync(i915, obj.handle);
+
+ delay(i915, e, obj.handle, obj.offset, target_ns);
+
+ obj.flags |= EXEC_OBJECT_PINNED;
+ return obj;
+}
+
+static uint32_t vm_clone(int i915)
+{
+ uint32_t ctx = 0;
+ __gem_context_clone(i915, 0,
+ I915_CONTEXT_CLONE_VM |
+ I915_CONTEXT_CLONE_ENGINES,
+ I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
+ &ctx);
+ return ctx;
+}
+
+static int __execbuf(int i915, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+ int err;
+
+ err = 0;
+ if (ioctl(i915, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf)) {
+ err = -errno;
+ igt_assume(err);
+ }
+
+ errno = 0;
+ return err;
+}
+
+static uint32_t
+far_delay(int i915, unsigned long delay, unsigned int target,
+ const struct intel_execution_engine2 *e, int *fence)
+{
+ struct drm_i915_gem_exec_object2 obj = delay_create(i915, 0, e, delay);
+ struct drm_i915_gem_exec_object2 batch[2] = {
+ {
+ .handle = batch_create(i915),
+ .flags = EXEC_OBJECT_WRITE,
+ }
+ };
+ struct drm_i915_gem_execbuffer2 execbuf = {
+ .buffers_ptr = to_user_pointer(batch),
+ .buffer_count = 2,
+ .flags = e->flags,
+ };
+ uint32_t handle = gem_create(i915, 4096);
+ unsigned long count, submit;
+
+ igt_require(intel_gen(intel_get_drm_devid(i915)) >= 8);
+ igt_require(gem_class_can_store_dword(i915, e->class));
+
+ fcntl(i915, F_SETFL, fcntl(i915, F_GETFL) | O_NONBLOCK);
+
+ submit = 3 * target;
+ submit *= NSEC_PER_SEC;
+ submit /= 2 * delay;
+
+ /*
+ * Submit a few long chains of individually short pieces of work
+ * against a shared object.
+ */
+ for (count = 0; count < submit;) {
+ execbuf.rsvd1 = vm_clone(i915);
+ if (!execbuf.rsvd1)
+ break;
+
+ batch[1] = obj;
+ while (__execbuf(i915, &execbuf) == 0)
+ count++;
+ gem_context_destroy(i915, execbuf.rsvd1);
+ }
+
+ execbuf.flags |= I915_EXEC_FENCE_OUT;
+ execbuf.rsvd1 = 0;
+ batch[1] = batch[0];
+ batch[1].flags &= ~EXEC_OBJECT_WRITE;
+ batch[0].handle = handle;
+ assert(batch[0].flags & EXEC_OBJECT_WRITE);
+ gem_execbuf_wr(i915, &execbuf);
+
+ gem_close(i915, obj.handle);
+
+ /* And pass the resulting end fence out. */
+ *fence = execbuf.rsvd2 >> 32;
+
+ return handle;
+}
+
+static void
+far_fence(int i915, int timeout, const struct intel_execution_engine2 *e)
+{
+ int fence = -1;
+ uint32_t handle =
+ far_delay(i915, NSEC_PER_SEC / 250, timeout, e, &fence);
+
+ gem_close(i915, handle);
+
+ igt_assert_eq(sync_fence_wait(fence, -1), 0);
+
+ /*
+ * Many short pieces of work simulating independent clients working and
+ * presenting work to a consumer should not be interrupted by the
+ * watchdog.
+ *
+ * TODO/FIXME: Opens:
+ *
+ * 1)
+ * Missing fence error propagation means consumer may fail to notice
+ * the work hasn't actually been executed.
+ *
+ * There is also no clear agreement on whether error propagation is
+ * desired or not.
+ *
+ * 2)
+ * This assert could instead check that fence status is in error, if
+ * it will be accepted this kind of workload should suddenly start
+ * failing. Depends if the desire is to test watchdog could break
+ * existing userspace or whether it is acceptable to silently not
+ * execute workloads.
+ *
+ * 3)
+ * Implement subtest which actually renders to a shared buffer so
+ * watchdog effect on rendering result can also be demonstrated.
+ */
+ igt_assert_eq(sync_fence_status(fence), 1);
+
+ close(fence);
+}
+
igt_main
{
+ const struct intel_execution_engine2 *e;
int i915 = -1;
igt_fixture {
@@ -370,6 +673,13 @@ igt_main
virtual(i915);
}
+ igt_subtest_with_dynamic("far-fence") {
+ __for_each_physical_engine(i915, e) {
+ igt_dynamic_f("%s", e->name)
+ far_fence(i915, default_timeout_wait_s * 3, e);
+ }
+ }
+
igt_fixture {
close(i915);
}