diff mbox

[CI,i-g-t,1/2] tests/gem_exec_latency: New subtests for checking submission from RT tasks

Message ID 20180417084112.8352-1-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin April 17, 2018, 8:41 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We want to make sure RT tasks which use a lot of CPU times can submit
batch buffers with roughly the same latency (and certainly not worse)
compared to normal tasks.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/gem_exec_latency.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 176 insertions(+)

Comments

Chris Wilson April 17, 2018, 10:12 a.m. UTC | #1
Quoting Tvrtko Ursulin (2018-04-17 09:41:11)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We want to make sure RT tasks which use a lot of CPU times can submit
> batch buffers with roughly the same latency (and certainly not worse)
> compared to normal tasks.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  tests/gem_exec_latency.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 176 insertions(+)
> 
> diff --git a/tests/gem_exec_latency.c b/tests/gem_exec_latency.c
> index 9498c0921e60..420ede0f83a0 100644
> --- a/tests/gem_exec_latency.c
> +++ b/tests/gem_exec_latency.c
> @@ -36,11 +36,15 @@
>  #include <sys/time.h>
>  #include <sys/signal.h>
>  #include <time.h>
> +#include <sched.h>
>  
>  #include "drm.h"
>  
>  #include "igt_sysfs.h"
>  #include "igt_vgem.h"
> +#include "igt_dummyload.h"
> +#include "igt_stats.h"
> +
>  #include "i915/gem_ring.h"
>  
>  #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
> @@ -351,6 +355,172 @@ static void latency_from_ring(int fd,
>         }
>  }
>  
> +static void __rearm_spin_batch(igt_spin_t *spin)
> +{
> +       const uint32_t mi_arb_chk = 0x5 << 23;
> +
> +       *spin->batch = mi_arb_chk;
> +       *spin->running = 0;
> +       __sync_synchronize();
> +}
> +
> +static void
> +__submit_spin_batch(int fd, igt_spin_t *spin, unsigned int flags)
> +{
> +       struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
> +
> +       eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
> +       eb.flags |= flags | I915_EXEC_NO_RELOC;
> +
> +       gem_execbuf(fd, &eb);
> +}
> +
> +struct rt_pkt
> +{
> +#define RT_OK (0)
> +#define RT_FAIL (1)
> +#define RT_TIMEOUT (2)
> +       int status;
> +       struct igt_mean mean;
> +       double max;
> +};
> +
> +static void __spin_wait(struct rt_pkt *msg, igt_spin_t *spin, double *t_wait)
> +{
> +       struct timespec ts = { };
> +       uint64_t t_last = 0;
> +
> +       igt_nsec_elapsed(&ts);
> +
> +       while (!READ_ONCE(*spin->running)) {
> +               uint64_t t = igt_nsec_elapsed(&ts);
> +
> +               if ((t - t_last) > 5UL * NSEC_PER_SEC) {
> +                        /* Absolute timeout to save time. */
> +                       msg->status = RT_TIMEOUT;
> +               } else if ((t - t_last) > NSEC_PER_SEC / 10) {
> +                       /* Backoff every 100ms to give it chance to complete. */
> +                       t_last = t;
> +                       usleep(1);
> +               }
> +       }
> +
> +       *t_wait = igt_nsec_elapsed(&ts) / 1e9;

(I would keep this as nanonseconds in the double until we need to
convert into human readable units.)

> +       msg->status = RT_OK;
> +}
> +
> +/*
> + * Test whether RT thread which hogs the CPU a lot can submit work with
> + * reasonable latency.
> + */
> +static void
> +rthog_latency_on_ring(int fd, unsigned int ring, const char *name)
> +{
> +       const char *passname[] = { "warmup", "normal", "rt" };
> +       struct rt_pkt res[3];
> +       unsigned int i;
> +       int link[2];
> +       int ret;
> +
> +       igt_require(gem_can_store_dword(fd, ring));
> +
> +       igt_assert(pipe(link) == 0);
> +
> +       memset(res, 0, sizeof(res));
> +
> +        igt_fork(child, 1) {
> +               unsigned int pass = 0; /* Three passes: warmup, normal, rt. */
> +
> +               do {
> +                       struct rt_pkt msg = { };
> +                       igt_spin_t *spin;
> +
> +                       igt_mean_init(&msg.mean);
> +
> +                       if (pass == 2) {
> +                               struct sched_param rt =
> +                                               { .sched_priority = 99 };
> +
> +                               ret = sched_setscheduler(0,
> +                                                       SCHED_FIFO | SCHED_RESET_ON_FORK,
> +                                                       &rt);
> +                               if (ret) {
> +                                       igt_warn("Failed to set scheduling policy!\n");
> +                                       msg.status = RT_FAIL;
> +                                       write(link[1], &msg, sizeof(msg));
> +                                       exit(1);
> +                               }
> +                       }
> +
> +                       spin = __igt_spin_batch_new_poll(fd, 0, ring);
> +                       if (!spin) {
> +                               igt_warn("Failed to create spinner! (%s)\n",
> +                                        passname[pass]);
> +                               msg.status = RT_FAIL;
> +                               write(link[1], &msg, sizeof(msg));
> +                               exit(1);
> +                       }
> +                       igt_spin_busywait_until_running(spin);
> +
> +                       igt_until_timeout(pass > 0 ? 5 : 2) {
> +                               double t;
> +
> +                               igt_spin_batch_end(spin);
> +                               gem_sync(fd, spin->handle);
> +
> +                               __rearm_spin_batch(spin);
> +                               __submit_spin_batch(fd, spin, ring);
> +
> +                               __spin_wait(&msg, spin, &t);
> +                               if (msg.status != RT_OK) {
> +                                       igt_warn("Wait timeout! (%s)\n",
> +                                                passname[pass]);
> +                                       write(link[1], &msg, sizeof(msg));
> +                                       exit(1);
> +                               }
> +
> +                               if (t > msg.max)
> +                                       msg.max = t;
> +
> +                               igt_mean_add(&msg.mean, t);

I think I would include a background load (normal process, submitting
overlapping nops from alternate contexts, just so that we know that we
should be saturating execlists_submission_tasklet) and then have the RT
process compete with a MAX_PRIO timed request. As I understand the
problem, it is the priority inversion between the RT hog spinning on the
breadcrumb preventing ksoftirqd from running; so the challenge of the
test setup is in tricking ksoftirqd into running on the same CPU as the
hog. I think we need a cpuset to run both RT and background on the same
CPU.

> +                       }
> +
> +                       igt_spin_batch_free(fd, spin);
> +
> +                       igt_info("%10s: mean=%.2fus variance=%.2fus max=%.2fus (n=%lu)\n",
> +                                passname[pass],
> +                                igt_mean_get(&msg.mean) * 1e6,
> +                                igt_mean_get_variance(&msg.mean) * 1e6,
> +                                msg.max * 1e6,
> +                                msg.mean.count);
> +
> +                       write(link[1], &msg, sizeof(msg));
> +               } while (++pass < 3);
> +
> +               exit(0);
> +       }
> +
> +       for (i = 0; i < 3; i++) {
> +               ret = read(link[0], &res[i], sizeof(res[0]));
> +               igt_assert_eq(ret, sizeof(res[0]));
> +
> +               igt_assert_eq(res[i].status, RT_OK);
> +       }
> +
> +       close(link[0]);
> +       close(link[1]);
> +
> +       igt_waitchildren();
> +
> +       /*
> +        * Check that the submission latency variance for a task with RT
> +        * priority is no larger than three times the same of a normal task.
> +        */
> +       igt_assert(igt_mean_get_variance(&res[2].mean) <
> +                  igt_mean_get_variance(&res[1].mean) * 3);

So sample period of 5s, we expect to sample over many many requests, so
mean/variance should be reasonably stable even on a noisy system.

Please make this an assert_f and explain the failure in the error
message. Even a s/2/RT/; s/1/NORMAL/ would help the stringify.
-Chris
Chris Wilson April 18, 2018, 8:17 a.m. UTC | #2
Quoting Tvrtko Ursulin (2018-04-17 09:41:11)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> We want to make sure RT tasks which use a lot of CPU times can submit
> batch buffers with roughly the same latency (and certainly not worse)
> compared to normal tasks.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> +/*
> + * Test whether RT thread which hogs the CPU a lot can submit work with
> + * reasonable latency.
> + */
> +static void
> +rthog_latency_on_ring(int fd, unsigned int ring, const char *name)
> +{
> +       const char *passname[] = { "warmup", "normal", "rt" };
> +       struct rt_pkt res[3];
> +       unsigned int i;
> +       int link[2];
> +       int ret;
> +
> +       igt_require(gem_can_store_dword(fd, ring));
> +
> +       igt_assert(pipe(link) == 0);
> +
> +       memset(res, 0, sizeof(res));
> +
> +        igt_fork(child, 1) {
> +               unsigned int pass = 0; /* Three passes: warmup, normal, rt. */
> +
> +               do {
> +                       struct rt_pkt msg = { };
> +                       igt_spin_t *spin;
> +
> +                       igt_mean_init(&msg.mean);
> +
> +                       if (pass == 2) {
> +                               struct sched_param rt =
> +                                               { .sched_priority = 99 };
> +
> +                               ret = sched_setscheduler(0,
> +                                                       SCHED_FIFO | SCHED_RESET_ON_FORK,
> +                                                       &rt);
> +                               if (ret) {
> +                                       igt_warn("Failed to set scheduling policy!\n");
> +                                       msg.status = RT_FAIL;
> +                                       write(link[1], &msg, sizeof(msg));
> +                                       exit(1);
> +                               }
> +                       }
> +
> +                       spin = __igt_spin_batch_new_poll(fd, 0, ring);
> +                       if (!spin) {
> +                               igt_warn("Failed to create spinner! (%s)\n",
> +                                        passname[pass]);
> +                               msg.status = RT_FAIL;
> +                               write(link[1], &msg, sizeof(msg));
> +                               exit(1);
> +                       }
> +                       igt_spin_busywait_until_running(spin);
> +
> +                       igt_until_timeout(pass > 0 ? 5 : 2) {
> +                               double t;
> +
> +                               igt_spin_batch_end(spin);
> +                               gem_sync(fd, spin->handle);
> +
> +                               __rearm_spin_batch(spin);
> +                               __submit_spin_batch(fd, spin, ring);
> +
> +                               __spin_wait(&msg, spin, &t);
> +                               if (msg.status != RT_OK) {
> +                                       igt_warn("Wait timeout! (%s)\n",
> +                                                passname[pass]);
> +                                       write(link[1], &msg, sizeof(msg));
> +                                       exit(1);
> +                               }
> +
> +                               if (t > msg.max)
> +                                       msg.max = t;
> +
> +                               igt_mean_add(&msg.mean, t);
> +                       }
> +
> +                       igt_spin_batch_free(fd, spin);
> +
> +                       igt_info("%10s: mean=%.2fus variance=%.2fus max=%.2fus (n=%lu)\n",
> +                                passname[pass],
> +                                igt_mean_get(&msg.mean) * 1e6,
> +                                igt_mean_get_variance(&msg.mean) * 1e6,
> +                                msg.max * 1e6,
> +                                msg.mean.count);
> +
> +                       write(link[1], &msg, sizeof(msg));
> +               } while (++pass < 3);
> +
> +               exit(0);
> +       }
> +
> +       for (i = 0; i < 3; i++) {
> +               ret = read(link[0], &res[i], sizeof(res[0]));
> +               igt_assert_eq(ret, sizeof(res[0]));
> +
> +               igt_assert_eq(res[i].status, RT_OK);
> +       }
> +
> +       close(link[0]);
> +       close(link[1]);
> +
> +       igt_waitchildren();
> +
> +       /*
> +        * Check that the submission latency variance for a task with RT
> +        * priority is no larger than three times the same of a normal task.
> +        */
> +       igt_assert(igt_mean_get_variance(&res[2].mean) <
> +                  igt_mean_get_variance(&res[1].mean) * 3);
> +}

Fwiw, I think we should go full on ministat here, use igt_stats_t to
record every result then print the overlapping histograms so we can see
the distribution.

I was pondering a simple metric to warn about long tails. I was
wondering if the sum of values above the median would be interesting,
but really it boils down to wanting 99% to be within a smidgen of each
other.
-Chris
diff mbox

Patch

diff --git a/tests/gem_exec_latency.c b/tests/gem_exec_latency.c
index 9498c0921e60..420ede0f83a0 100644
--- a/tests/gem_exec_latency.c
+++ b/tests/gem_exec_latency.c
@@ -36,11 +36,15 @@ 
 #include <sys/time.h>
 #include <sys/signal.h>
 #include <time.h>
+#include <sched.h>
 
 #include "drm.h"
 
 #include "igt_sysfs.h"
 #include "igt_vgem.h"
+#include "igt_dummyload.h"
+#include "igt_stats.h"
+
 #include "i915/gem_ring.h"
 
 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
@@ -351,6 +355,172 @@  static void latency_from_ring(int fd,
 	}
 }
 
+static void __rearm_spin_batch(igt_spin_t *spin)
+{
+	const uint32_t mi_arb_chk = 0x5 << 23;
+
+       *spin->batch = mi_arb_chk;
+       *spin->running = 0;
+       __sync_synchronize();
+}
+
+static void
+__submit_spin_batch(int fd, igt_spin_t *spin, unsigned int flags)
+{
+	struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
+
+	eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
+	eb.flags |= flags | I915_EXEC_NO_RELOC;
+
+	gem_execbuf(fd, &eb);
+}
+
+struct rt_pkt
+{
+#define RT_OK (0)
+#define RT_FAIL (1)
+#define RT_TIMEOUT (2)
+	int status;
+	struct igt_mean mean;
+	double max;
+};
+
+static void __spin_wait(struct rt_pkt *msg, igt_spin_t *spin, double *t_wait)
+{
+	struct timespec ts = { };
+	uint64_t t_last = 0;
+
+	igt_nsec_elapsed(&ts);
+
+	while (!READ_ONCE(*spin->running)) {
+		uint64_t t = igt_nsec_elapsed(&ts);
+
+		if ((t - t_last) > 5UL * NSEC_PER_SEC) {
+			 /* Absolute timeout to save time. */
+			msg->status = RT_TIMEOUT;
+		} else if ((t - t_last) > NSEC_PER_SEC / 10) {
+			/* Backoff every 100ms to give it chance to complete. */
+			t_last = t;
+			usleep(1);
+		}
+	}
+
+	*t_wait = igt_nsec_elapsed(&ts) / 1e9;
+
+	msg->status = RT_OK;
+}
+
+/*
+ * Test whether RT thread which hogs the CPU a lot can submit work with
+ * reasonable latency.
+ */
+static void
+rthog_latency_on_ring(int fd, unsigned int ring, const char *name)
+{
+	const char *passname[] = { "warmup", "normal", "rt" };
+	struct rt_pkt res[3];
+	unsigned int i;
+	int link[2];
+	int ret;
+
+	igt_require(gem_can_store_dword(fd, ring));
+
+	igt_assert(pipe(link) == 0);
+
+	memset(res, 0, sizeof(res));
+
+        igt_fork(child, 1) {
+		unsigned int pass = 0; /* Three passes: warmup, normal, rt. */
+
+		do {
+			struct rt_pkt msg = { };
+			igt_spin_t *spin;
+
+			igt_mean_init(&msg.mean);
+
+			if (pass == 2) {
+				struct sched_param rt =
+						{ .sched_priority = 99 };
+
+				ret = sched_setscheduler(0,
+							SCHED_FIFO | SCHED_RESET_ON_FORK,
+							&rt);
+				if (ret) {
+					igt_warn("Failed to set scheduling policy!\n");
+					msg.status = RT_FAIL;
+					write(link[1], &msg, sizeof(msg));
+					exit(1);
+				}
+			}
+
+			spin = __igt_spin_batch_new_poll(fd, 0, ring);
+			if (!spin) {
+				igt_warn("Failed to create spinner! (%s)\n",
+					 passname[pass]);
+				msg.status = RT_FAIL;
+				write(link[1], &msg, sizeof(msg));
+				exit(1);
+			}
+			igt_spin_busywait_until_running(spin);
+
+			igt_until_timeout(pass > 0 ? 5 : 2) {
+				double t;
+
+				igt_spin_batch_end(spin);
+				gem_sync(fd, spin->handle);
+
+				__rearm_spin_batch(spin);
+				__submit_spin_batch(fd, spin, ring);
+
+				__spin_wait(&msg, spin, &t);
+				if (msg.status != RT_OK) {
+					igt_warn("Wait timeout! (%s)\n",
+						 passname[pass]);
+					write(link[1], &msg, sizeof(msg));
+					exit(1);
+				}
+
+				if (t > msg.max)
+					msg.max = t;
+
+				igt_mean_add(&msg.mean, t);
+			}
+
+			igt_spin_batch_free(fd, spin);
+
+			igt_info("%10s: mean=%.2fus variance=%.2fus max=%.2fus (n=%lu)\n",
+				 passname[pass],
+				 igt_mean_get(&msg.mean) * 1e6,
+				 igt_mean_get_variance(&msg.mean) * 1e6,
+				 msg.max * 1e6,
+				 msg.mean.count);
+
+			write(link[1], &msg, sizeof(msg));
+		} while (++pass < 3);
+
+		exit(0);
+	}
+
+	for (i = 0; i < 3; i++) {
+		ret = read(link[0], &res[i], sizeof(res[0]));
+		igt_assert_eq(ret, sizeof(res[0]));
+
+		igt_assert_eq(res[i].status, RT_OK);
+	}
+
+	close(link[0]);
+	close(link[1]);
+
+	igt_waitchildren();
+
+	/*
+	 * Check that the submission latency variance for a task with RT
+	 * priority is no larger than three times the same of a normal task.
+	 */
+	igt_assert(igt_mean_get_variance(&res[2].mean) <
+		   igt_mean_get_variance(&res[1].mean) * 3);
+}
+
 igt_main
 {
 	const struct intel_execution_engine *e;
@@ -391,6 +561,12 @@  igt_main
 							e->exec_id | e->flags,
 							e->name, 0);
 
+				igt_subtest_f("%s-rthog-submit", e->name)
+					rthog_latency_on_ring(device,
+							      e->exec_id |
+							      e->flags,
+							      e->name);
+
 				igt_subtest_f("%s-dispatch-queued", e->name)
 					latency_on_ring(device,
 							e->exec_id | e->flags,