@@ -141,7 +141,7 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
const char *name[16];
uint32_t contexts[65];
unsigned int nengine;
- int n;
+ int n, qlen;
nengine = 0;
for_each_physical_engine(fd, e) {
@@ -165,6 +165,25 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
igt_require(__gem_execbuf(fd, &execbuf) == 0);
gem_sync(fd, handle);
+
+ qlen = 64;
+ for (n = 0; n < nengine; n++) {
+ uint64_t saved = execbuf.flags;
+ struct timespec tv = {};
+
+ execbuf.flags |= engine[n];
+
+ igt_nsec_elapsed(&tv);
+ for (int loop = 0; loop < qlen; loop++)
+ gem_execbuf(fd, &execbuf);
+ gem_sync(fd, handle);
+
+ execbuf.flags = saved;
+
+ qlen = qlen * timeout * 1e9 / igt_nsec_elapsed(&tv) / 8 + 1;
+ }
+ igt_info("Using timing depth of %d batches\n", qlen);
+
execbuf.buffers_ptr = to_user_pointer(obj);
execbuf.buffer_count = 2;
@@ -184,11 +203,12 @@ static void all(int fd, uint32_t handle, unsigned flags, int timeout)
clock_gettime(CLOCK_MONOTONIC, &start);
do {
- for (int loop = 0; loop < 1024; loop++) {
+ for (int loop = 0; loop < qlen; loop++) {
execbuf.rsvd1 = contexts[loop % nctx];
gem_execbuf(fd, &execbuf);
}
- count += 1024;
+ count += qlen;
+ gem_sync(fd, obj[0].handle);
clock_gettime(CLOCK_MONOTONIC, &now);
} while (elapsed(&start, &now) < timeout);
gem_sync(fd, obj[0].handle);
Some platforms may execute the heavy workload very slowly, such that using a batch of 1024 takes tens of seconds and immediately overrunning the 5s timeout on a pass. Added up over a few dozen passes, this turns a 120 second test into 10 minutes. Counter this by doing a warmup loop to estimate the appropriate queue len for timing. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- tests/gem_ctx_switch.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-)