Message ID | 20180417084112.8352-1-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Tvrtko Ursulin (2018-04-17 09:41:11) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > We want to make sure RT tasks which use a lot of CPU times can submit > batch buffers with roughly the same latency (and certainly not worse) > compared to normal tasks. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > --- > tests/gem_exec_latency.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 176 insertions(+) > > diff --git a/tests/gem_exec_latency.c b/tests/gem_exec_latency.c > index 9498c0921e60..420ede0f83a0 100644 > --- a/tests/gem_exec_latency.c > +++ b/tests/gem_exec_latency.c > @@ -36,11 +36,15 @@ > #include <sys/time.h> > #include <sys/signal.h> > #include <time.h> > +#include <sched.h> > > #include "drm.h" > > #include "igt_sysfs.h" > #include "igt_vgem.h" > +#include "igt_dummyload.h" > +#include "igt_stats.h" > + > #include "i915/gem_ring.h" > > #define LOCAL_I915_EXEC_NO_RELOC (1<<11) > @@ -351,6 +355,172 @@ static void latency_from_ring(int fd, > } > } > > +static void __rearm_spin_batch(igt_spin_t *spin) > +{ > + const uint32_t mi_arb_chk = 0x5 << 23; > + > + *spin->batch = mi_arb_chk; > + *spin->running = 0; > + __sync_synchronize(); > +} > + > +static void > +__submit_spin_batch(int fd, igt_spin_t *spin, unsigned int flags) > +{ > + struct drm_i915_gem_execbuffer2 eb = spin->execbuf; > + > + eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK); > + eb.flags |= flags | I915_EXEC_NO_RELOC; > + > + gem_execbuf(fd, &eb); > +} > + > +struct rt_pkt > +{ > +#define RT_OK (0) > +#define RT_FAIL (1) > +#define RT_TIMEOUT (2) > + int status; > + struct igt_mean mean; > + double max; > +}; > + > +static void __spin_wait(struct rt_pkt *msg, igt_spin_t *spin, double *t_wait) > +{ > + struct timespec ts = { }; > + uint64_t t_last = 0; > + > + igt_nsec_elapsed(&ts); > + > + while (!READ_ONCE(*spin->running)) { > + uint64_t t = igt_nsec_elapsed(&ts); > + > + if ((t - t_last) > 5UL * NSEC_PER_SEC) { > + /* Absolute timeout to save time. */ > + msg->status = RT_TIMEOUT; > + } else if ((t - t_last) > NSEC_PER_SEC / 10) { > + /* Backoff every 100ms to give it chance to complete. */ > + t_last = t; > + usleep(1); > + } > + } > + > + *t_wait = igt_nsec_elapsed(&ts) / 1e9; (I would keep this as nanonseconds in the double until we need to convert into human readable units.) > + msg->status = RT_OK; > +} > + > +/* > + * Test whether RT thread which hogs the CPU a lot can submit work with > + * reasonable latency. > + */ > +static void > +rthog_latency_on_ring(int fd, unsigned int ring, const char *name) > +{ > + const char *passname[] = { "warmup", "normal", "rt" }; > + struct rt_pkt res[3]; > + unsigned int i; > + int link[2]; > + int ret; > + > + igt_require(gem_can_store_dword(fd, ring)); > + > + igt_assert(pipe(link) == 0); > + > + memset(res, 0, sizeof(res)); > + > + igt_fork(child, 1) { > + unsigned int pass = 0; /* Three passes: warmup, normal, rt. */ > + > + do { > + struct rt_pkt msg = { }; > + igt_spin_t *spin; > + > + igt_mean_init(&msg.mean); > + > + if (pass == 2) { > + struct sched_param rt = > + { .sched_priority = 99 }; > + > + ret = sched_setscheduler(0, > + SCHED_FIFO | SCHED_RESET_ON_FORK, > + &rt); > + if (ret) { > + igt_warn("Failed to set scheduling policy!\n"); > + msg.status = RT_FAIL; > + write(link[1], &msg, sizeof(msg)); > + exit(1); > + } > + } > + > + spin = __igt_spin_batch_new_poll(fd, 0, ring); > + if (!spin) { > + igt_warn("Failed to create spinner! (%s)\n", > + passname[pass]); > + msg.status = RT_FAIL; > + write(link[1], &msg, sizeof(msg)); > + exit(1); > + } > + igt_spin_busywait_until_running(spin); > + > + igt_until_timeout(pass > 0 ? 5 : 2) { > + double t; > + > + igt_spin_batch_end(spin); > + gem_sync(fd, spin->handle); > + > + __rearm_spin_batch(spin); > + __submit_spin_batch(fd, spin, ring); > + > + __spin_wait(&msg, spin, &t); > + if (msg.status != RT_OK) { > + igt_warn("Wait timeout! (%s)\n", > + passname[pass]); > + write(link[1], &msg, sizeof(msg)); > + exit(1); > + } > + > + if (t > msg.max) > + msg.max = t; > + > + igt_mean_add(&msg.mean, t); I think I would include a background load (normal process, submitting overlapping nops from alternate contexts, just so that we know that we should be saturating execlists_submission_tasklet) and then have the RT process compete with a MAX_PRIO timed request. As I understand the problem, it is the priority inversion between the RT hog spinning on the breadcrumb preventing ksoftirqd from running; so the challenge of the test setup is in tricking ksoftirqd into running on the same CPU as the hog. I think we need a cpuset to run both RT and background on the same CPU. > + } > + > + igt_spin_batch_free(fd, spin); > + > + igt_info("%10s: mean=%.2fus variance=%.2fus max=%.2fus (n=%lu)\n", > + passname[pass], > + igt_mean_get(&msg.mean) * 1e6, > + igt_mean_get_variance(&msg.mean) * 1e6, > + msg.max * 1e6, > + msg.mean.count); > + > + write(link[1], &msg, sizeof(msg)); > + } while (++pass < 3); > + > + exit(0); > + } > + > + for (i = 0; i < 3; i++) { > + ret = read(link[0], &res[i], sizeof(res[0])); > + igt_assert_eq(ret, sizeof(res[0])); > + > + igt_assert_eq(res[i].status, RT_OK); > + } > + > + close(link[0]); > + close(link[1]); > + > + igt_waitchildren(); > + > + /* > + * Check that the submission latency variance for a task with RT > + * priority is no larger than three times the same of a normal task. > + */ > + igt_assert(igt_mean_get_variance(&res[2].mean) < > + igt_mean_get_variance(&res[1].mean) * 3); So sample period of 5s, we expect to sample over many many requests, so mean/variance should be reasonably stable even on a noisy system. Please make this an assert_f and explain the failure in the error message. Even a s/2/RT/; s/1/NORMAL/ would help the stringify. -Chris
Quoting Tvrtko Ursulin (2018-04-17 09:41:11) > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > We want to make sure RT tasks which use a lot of CPU times can submit > batch buffers with roughly the same latency (and certainly not worse) > compared to normal tasks. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > --- > +/* > + * Test whether RT thread which hogs the CPU a lot can submit work with > + * reasonable latency. > + */ > +static void > +rthog_latency_on_ring(int fd, unsigned int ring, const char *name) > +{ > + const char *passname[] = { "warmup", "normal", "rt" }; > + struct rt_pkt res[3]; > + unsigned int i; > + int link[2]; > + int ret; > + > + igt_require(gem_can_store_dword(fd, ring)); > + > + igt_assert(pipe(link) == 0); > + > + memset(res, 0, sizeof(res)); > + > + igt_fork(child, 1) { > + unsigned int pass = 0; /* Three passes: warmup, normal, rt. */ > + > + do { > + struct rt_pkt msg = { }; > + igt_spin_t *spin; > + > + igt_mean_init(&msg.mean); > + > + if (pass == 2) { > + struct sched_param rt = > + { .sched_priority = 99 }; > + > + ret = sched_setscheduler(0, > + SCHED_FIFO | SCHED_RESET_ON_FORK, > + &rt); > + if (ret) { > + igt_warn("Failed to set scheduling policy!\n"); > + msg.status = RT_FAIL; > + write(link[1], &msg, sizeof(msg)); > + exit(1); > + } > + } > + > + spin = __igt_spin_batch_new_poll(fd, 0, ring); > + if (!spin) { > + igt_warn("Failed to create spinner! (%s)\n", > + passname[pass]); > + msg.status = RT_FAIL; > + write(link[1], &msg, sizeof(msg)); > + exit(1); > + } > + igt_spin_busywait_until_running(spin); > + > + igt_until_timeout(pass > 0 ? 5 : 2) { > + double t; > + > + igt_spin_batch_end(spin); > + gem_sync(fd, spin->handle); > + > + __rearm_spin_batch(spin); > + __submit_spin_batch(fd, spin, ring); > + > + __spin_wait(&msg, spin, &t); > + if (msg.status != RT_OK) { > + igt_warn("Wait timeout! (%s)\n", > + passname[pass]); > + write(link[1], &msg, sizeof(msg)); > + exit(1); > + } > + > + if (t > msg.max) > + msg.max = t; > + > + igt_mean_add(&msg.mean, t); > + } > + > + igt_spin_batch_free(fd, spin); > + > + igt_info("%10s: mean=%.2fus variance=%.2fus max=%.2fus (n=%lu)\n", > + passname[pass], > + igt_mean_get(&msg.mean) * 1e6, > + igt_mean_get_variance(&msg.mean) * 1e6, > + msg.max * 1e6, > + msg.mean.count); > + > + write(link[1], &msg, sizeof(msg)); > + } while (++pass < 3); > + > + exit(0); > + } > + > + for (i = 0; i < 3; i++) { > + ret = read(link[0], &res[i], sizeof(res[0])); > + igt_assert_eq(ret, sizeof(res[0])); > + > + igt_assert_eq(res[i].status, RT_OK); > + } > + > + close(link[0]); > + close(link[1]); > + > + igt_waitchildren(); > + > + /* > + * Check that the submission latency variance for a task with RT > + * priority is no larger than three times the same of a normal task. > + */ > + igt_assert(igt_mean_get_variance(&res[2].mean) < > + igt_mean_get_variance(&res[1].mean) * 3); > +} Fwiw, I think we should go full on ministat here, use igt_stats_t to record every result then print the overlapping histograms so we can see the distribution. I was pondering a simple metric to warn about long tails. I was wondering if the sum of values above the median would be interesting, but really it boils down to wanting 99% to be within a smidgen of each other. -Chris
diff --git a/tests/gem_exec_latency.c b/tests/gem_exec_latency.c index 9498c0921e60..420ede0f83a0 100644 --- a/tests/gem_exec_latency.c +++ b/tests/gem_exec_latency.c @@ -36,11 +36,15 @@ #include <sys/time.h> #include <sys/signal.h> #include <time.h> +#include <sched.h> #include "drm.h" #include "igt_sysfs.h" #include "igt_vgem.h" +#include "igt_dummyload.h" +#include "igt_stats.h" + #include "i915/gem_ring.h" #define LOCAL_I915_EXEC_NO_RELOC (1<<11) @@ -351,6 +355,172 @@ static void latency_from_ring(int fd, } } +static void __rearm_spin_batch(igt_spin_t *spin) +{ + const uint32_t mi_arb_chk = 0x5 << 23; + + *spin->batch = mi_arb_chk; + *spin->running = 0; + __sync_synchronize(); +} + +static void +__submit_spin_batch(int fd, igt_spin_t *spin, unsigned int flags) +{ + struct drm_i915_gem_execbuffer2 eb = spin->execbuf; + + eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK); + eb.flags |= flags | I915_EXEC_NO_RELOC; + + gem_execbuf(fd, &eb); +} + +struct rt_pkt +{ +#define RT_OK (0) +#define RT_FAIL (1) +#define RT_TIMEOUT (2) + int status; + struct igt_mean mean; + double max; +}; + +static void __spin_wait(struct rt_pkt *msg, igt_spin_t *spin, double *t_wait) +{ + struct timespec ts = { }; + uint64_t t_last = 0; + + igt_nsec_elapsed(&ts); + + while (!READ_ONCE(*spin->running)) { + uint64_t t = igt_nsec_elapsed(&ts); + + if ((t - t_last) > 5UL * NSEC_PER_SEC) { + /* Absolute timeout to save time. */ + msg->status = RT_TIMEOUT; + } else if ((t - t_last) > NSEC_PER_SEC / 10) { + /* Backoff every 100ms to give it chance to complete. */ + t_last = t; + usleep(1); + } + } + + *t_wait = igt_nsec_elapsed(&ts) / 1e9; + + msg->status = RT_OK; +} + +/* + * Test whether RT thread which hogs the CPU a lot can submit work with + * reasonable latency. + */ +static void +rthog_latency_on_ring(int fd, unsigned int ring, const char *name) +{ + const char *passname[] = { "warmup", "normal", "rt" }; + struct rt_pkt res[3]; + unsigned int i; + int link[2]; + int ret; + + igt_require(gem_can_store_dword(fd, ring)); + + igt_assert(pipe(link) == 0); + + memset(res, 0, sizeof(res)); + + igt_fork(child, 1) { + unsigned int pass = 0; /* Three passes: warmup, normal, rt. */ + + do { + struct rt_pkt msg = { }; + igt_spin_t *spin; + + igt_mean_init(&msg.mean); + + if (pass == 2) { + struct sched_param rt = + { .sched_priority = 99 }; + + ret = sched_setscheduler(0, + SCHED_FIFO | SCHED_RESET_ON_FORK, + &rt); + if (ret) { + igt_warn("Failed to set scheduling policy!\n"); + msg.status = RT_FAIL; + write(link[1], &msg, sizeof(msg)); + exit(1); + } + } + + spin = __igt_spin_batch_new_poll(fd, 0, ring); + if (!spin) { + igt_warn("Failed to create spinner! (%s)\n", + passname[pass]); + msg.status = RT_FAIL; + write(link[1], &msg, sizeof(msg)); + exit(1); + } + igt_spin_busywait_until_running(spin); + + igt_until_timeout(pass > 0 ? 5 : 2) { + double t; + + igt_spin_batch_end(spin); + gem_sync(fd, spin->handle); + + __rearm_spin_batch(spin); + __submit_spin_batch(fd, spin, ring); + + __spin_wait(&msg, spin, &t); + if (msg.status != RT_OK) { + igt_warn("Wait timeout! (%s)\n", + passname[pass]); + write(link[1], &msg, sizeof(msg)); + exit(1); + } + + if (t > msg.max) + msg.max = t; + + igt_mean_add(&msg.mean, t); + } + + igt_spin_batch_free(fd, spin); + + igt_info("%10s: mean=%.2fus variance=%.2fus max=%.2fus (n=%lu)\n", + passname[pass], + igt_mean_get(&msg.mean) * 1e6, + igt_mean_get_variance(&msg.mean) * 1e6, + msg.max * 1e6, + msg.mean.count); + + write(link[1], &msg, sizeof(msg)); + } while (++pass < 3); + + exit(0); + } + + for (i = 0; i < 3; i++) { + ret = read(link[0], &res[i], sizeof(res[0])); + igt_assert_eq(ret, sizeof(res[0])); + + igt_assert_eq(res[i].status, RT_OK); + } + + close(link[0]); + close(link[1]); + + igt_waitchildren(); + + /* + * Check that the submission latency variance for a task with RT + * priority is no larger than three times the same of a normal task. + */ + igt_assert(igt_mean_get_variance(&res[2].mean) < + igt_mean_get_variance(&res[1].mean) * 3); +} + igt_main { const struct intel_execution_engine *e; @@ -391,6 +561,12 @@ igt_main e->exec_id | e->flags, e->name, 0); + igt_subtest_f("%s-rthog-submit", e->name) + rthog_latency_on_ring(device, + e->exec_id | + e->flags, + e->name); + igt_subtest_f("%s-dispatch-queued", e->name) latency_on_ring(device, e->exec_id | e->flags,