From patchwork Tue Jun 19 10:49:19 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: Chris Wilson <chris@chris-wilson.co.uk>
X-Patchwork-Id: 10474033
Return-Path: <intel-gfx-bounces@lists.freedesktop.org>
Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org
	[172.30.200.125])
	by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id
	1193A60230 for <patchwork-intel-gfx@patchwork.kernel.org>;
	Tue, 19 Jun 2018 10:49:44 +0000 (UTC)
Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1])
	by mail.wl.linuxfoundation.org (Postfix) with ESMTP id EDEB5286C1
	for <patchwork-intel-gfx@patchwork.kernel.org>;
	Tue, 19 Jun 2018 10:49:43 +0000 (UTC)
Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486)
	id E2C9A28A69; Tue, 19 Jun 2018 10:49:43 +0000 (UTC)
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on
	pdx-wl-mail.web.codeaurora.org
X-Spam-Level: 
X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00, MAILING_LIST_MULTI,
	RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1
Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177])
	(using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256
	bits)) (No client certificate requested)
	by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 6A34D286C1
	for <patchwork-intel-gfx@patchwork.kernel.org>;
	Tue, 19 Jun 2018 10:49:43 +0000 (UTC)
Received: from gabe.freedesktop.org (localhost [127.0.0.1])
	by gabe.freedesktop.org (Postfix) with ESMTP id 2CAB56E51D;
	Tue, 19 Jun 2018 10:49:42 +0000 (UTC)
X-Original-To: intel-gfx@lists.freedesktop.org
Delivered-To: intel-gfx@lists.freedesktop.org
Received: from fireflyinternet.com (mail.fireflyinternet.com
	[109.228.58.192])
	by gabe.freedesktop.org (Postfix) with ESMTPS id CDF426E4FE;
	Tue, 19 Jun 2018 10:49:34 +0000 (UTC)
X-Default-Received-SPF: pass (skip=forwardok (res=PASS))
	x-ip-name=78.156.65.138;
Received: from haswell.alporthouse.com (unverified [78.156.65.138])
	by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id
	12091680-1500050 for multiple; Tue, 19 Jun 2018 11:49:31 +0100
Received: by haswell.alporthouse.com (sSMTP sendmail emulation);
	Tue, 19 Jun 2018 11:49:30 +0100
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Date: Tue, 19 Jun 2018 11:49:19 +0100
Message-Id: <20180619104920.17528-5-chris@chris-wilson.co.uk>
X-Mailer: git-send-email 2.18.0.rc2
In-Reply-To: <20180619104920.17528-1-chris@chris-wilson.co.uk>
References: <20180619104920.17528-1-chris@chris-wilson.co.uk>
MIME-Version: 1.0
X-Originating-IP: 78.156.65.138
X-Country: code=GB country="United Kingdom" ip=78.156.65.138
Subject: [Intel-gfx] [PATCH i-g-t 5/6] tests/gem_exec_latency: New subtests
	for checking submission from RT tasks
X-BeenThere: intel-gfx@lists.freedesktop.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: Intel graphics driver community testing & development
	<intel-gfx.lists.freedesktop.org>
List-Unsubscribe: <https://lists.freedesktop.org/mailman/options/intel-gfx>,
	<mailto:intel-gfx-request@lists.freedesktop.org?subject=unsubscribe>
List-Archive: <https://lists.freedesktop.org/archives/intel-gfx>
List-Post: <mailto:intel-gfx@lists.freedesktop.org>
List-Help: <mailto:intel-gfx-request@lists.freedesktop.org?subject=help>
List-Subscribe: <https://lists.freedesktop.org/mailman/listinfo/intel-gfx>,
	<mailto:intel-gfx-request@lists.freedesktop.org?subject=subscribe>
Cc: igt-dev@lists.freedesktop.org
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>
X-Virus-Scanned: ClamAV using ClamSMTP

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

We want to make sure RT tasks which use a lot of CPU times can submit
batch buffers with roughly the same latency (and certainly not worse)
compared to normal tasks.

v2: Add tests to run across all engines simultaneously to encourage
ksoftirqd to kick in even more often.
v3: More passes to improve measurement stability.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> #v1
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/gem_exec_latency.c | 234 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 234 insertions(+)

diff --git a/tests/gem_exec_latency.c b/tests/gem_exec_latency.c
index 9498c0921..d64dd73ab 100644
--- a/tests/gem_exec_latency.c
+++ b/tests/gem_exec_latency.c
@@ -36,11 +36,15 @@
 #include <sys/time.h>
 #include <sys/signal.h>
 #include <time.h>
+#include <sched.h>
 
 #include "drm.h"
 
 #include "igt_sysfs.h"
 #include "igt_vgem.h"
+#include "igt_dummyload.h"
+#include "igt_stats.h"
+
 #include "i915/gem_ring.h"
 
 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
@@ -351,6 +355,216 @@ static void latency_from_ring(int fd,
 	}
 }
 
+static void __rearm_spin_batch(igt_spin_t *spin)
+{
+	const uint32_t mi_arb_chk = 0x5 << 23;
+
+       *spin->batch = mi_arb_chk;
+       *spin->running = 0;
+       __sync_synchronize();
+}
+
+static void
+__submit_spin_batch(int fd, igt_spin_t *spin, unsigned int flags)
+{
+	struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
+
+	eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
+	eb.flags |= flags | I915_EXEC_NO_RELOC;
+
+	gem_execbuf(fd, &eb);
+}
+
+struct rt_pkt {
+	struct igt_mean mean;
+	double min, max;
+};
+
+static bool __spin_wait(int fd, igt_spin_t *spin)
+{
+	while (!READ_ONCE(*spin->running)) {
+		if (!gem_bo_busy(fd, spin->handle))
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * Test whether RT thread which hogs the CPU a lot can submit work with
+ * reasonable latency.
+ */
+static void
+rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned int flags)
+#define RTIDLE 0x1
+{
+	const char *passname[] = {
+		"warmup",
+		"normal",
+		"rt[0]",
+		"rt[1]",
+		"rt[2]",
+		"rt[3]",
+		"rt[4]",
+		"rt[5]",
+		"rt[6]",
+	};
+#define NPASS ARRAY_SIZE(passname)
+#define MMAP_SZ (64 << 10)
+	struct rt_pkt *results;
+	unsigned int engines[16];
+	const char *names[16];
+	unsigned int nengine;
+	int ret;
+
+	igt_assert(ARRAY_SIZE(engines) * NPASS * sizeof(*results) <= MMAP_SZ);
+	results = mmap(NULL, MMAP_SZ, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_assert(results != MAP_FAILED);
+
+	nengine = 0;
+	if (engine == ALL_ENGINES) {
+		for_each_physical_engine(fd, engine) {
+			if (!gem_can_store_dword(fd, engine))
+				continue;
+
+			engines[nengine] = engine;
+			names[nengine] = e__->name;
+			nengine++;
+		}
+		igt_require(nengine > 1);
+	} else {
+		igt_require(gem_can_store_dword(fd, engine));
+		engines[nengine] = engine;
+		names[nengine] = name;
+		nengine++;
+	}
+
+	gem_quiescent_gpu(fd);
+
+	igt_fork(child, nengine) {
+		unsigned int pass = 0; /* Three phases: warmup, normal, rt. */
+
+		engine = engines[child];
+		do {
+			struct igt_mean mean;
+			double min = HUGE_VAL;
+			double max = -HUGE_VAL;
+			igt_spin_t *spin;
+
+			igt_mean_init(&mean);
+
+			if (pass == 2) {
+				struct sched_param rt =
+				{ .sched_priority = 99 };
+
+				ret = sched_setscheduler(0,
+							 SCHED_FIFO | SCHED_RESET_ON_FORK,
+							 &rt);
+				if (ret) {
+					igt_warn("Failed to set scheduling policy!\n");
+					break;
+				}
+			}
+
+			usleep(250);
+
+			spin = __igt_spin_batch_new_poll(fd, 0, engine);
+			if (!spin) {
+				igt_warn("Failed to create spinner! (%s)\n",
+					 passname[pass]);
+				break;
+			}
+			igt_spin_busywait_until_running(spin);
+
+			igt_until_timeout(pass > 0 ? 5 : 2) {
+				struct timespec ts = { };
+				double t;
+
+				igt_spin_batch_end(spin);
+				gem_sync(fd, spin->handle);
+				if (flags & RTIDLE)
+					igt_drop_caches_set(fd, DROP_IDLE);
+
+				/*
+				 * If we are oversubscribed (more RT hogs than
+				 * cpus) give the others a change to run;
+				 * otherwise, they will interrupt us in the
+				 * middle of the measurement.
+				 */
+				if (nengine > 1)
+					usleep(10*nengine);
+
+				__rearm_spin_batch(spin);
+
+				igt_nsec_elapsed(&ts);
+				__submit_spin_batch(fd, spin, engine);
+				if (!__spin_wait(fd, spin)) {
+					igt_warn("Wait timeout! (%s)\n",
+						 passname[pass]);
+					break;
+				}
+
+				t = igt_nsec_elapsed(&ts) * 1e-9;
+				if (t > max)
+					max = t;
+				if (t < min)
+					min = t;
+
+				igt_mean_add(&mean, t);
+			}
+
+			igt_spin_batch_free(fd, spin);
+
+			igt_info("%8s %10s: mean=%.2fus stddev=%.3fus [%.2fus, %.2fus] (n=%lu)\n",
+				 names[child],
+				 passname[pass],
+				 igt_mean_get(&mean) * 1e6,
+				 sqrt(igt_mean_get_variance(&mean)) * 1e6,
+				 min * 1e6, max * 1e6,
+				 mean.count);
+
+			results[NPASS * child + pass].mean = mean;
+			results[NPASS * child + pass].min = min;
+			results[NPASS * child + pass].max = max;
+		} while (++pass < NPASS);
+	}
+
+	igt_waitchildren();
+
+	for (unsigned int child = 0; child < nengine; child++) {
+		struct rt_pkt normal = results[NPASS * child + 1];
+		igt_stats_t stats;
+		double variance;
+
+		igt_stats_init_with_size(&stats, NPASS);
+
+		for (unsigned int pass = 2; pass < NPASS; pass++) {
+			struct rt_pkt *rt = &results[NPASS * child + pass];
+
+			igt_assert(rt->max);
+
+			igt_stats_push_float(&stats, igt_mean_get(&rt->mean));
+			variance += igt_mean_get_variance(&rt->mean);
+		}
+		variance /= NPASS - 2;
+
+		igt_info("%8s: normal latency=%.2f±%.3fus, rt latency=%.2f±%.3fus\n",
+			 names[child],
+			 igt_mean_get(&normal.mean) * 1e6,
+			 sqrt(igt_mean_get_variance(&normal.mean)) * 1e6,
+			 igt_stats_get_median(&stats) * 1e6,
+			 sqrt(variance) * 1e6);
+
+		igt_assert(igt_stats_get_median(&stats) <
+			   igt_mean_get(&normal.mean) * 2);
+
+		/* The system is noisy; be conservative when declaring fail. */
+		igt_assert(variance < igt_mean_get_variance(&normal.mean) * 10);
+	}
+
+	munmap(results, MMAP_SZ);
+}
+
 igt_main
 {
 	const struct intel_execution_engine *e;
@@ -373,6 +587,12 @@ igt_main
 		intel_register_access_init(intel_get_pci_device(), false, device);
 	}
 
+	igt_subtest("all-rtidle-submit")
+		rthog_latency_on_ring(device, ALL_ENGINES, "all", RTIDLE);
+
+	igt_subtest("all-rthog-submit")
+		rthog_latency_on_ring(device, ALL_ENGINES, "all", 0);
+
 	igt_subtest_group {
 		igt_fixture
 			igt_require(intel_gen(intel_get_drm_devid(device)) >= 7);
@@ -391,6 +611,20 @@ igt_main
 							e->exec_id | e->flags,
 							e->name, 0);
 
+				igt_subtest_f("%s-rtidle-submit", e->name)
+					rthog_latency_on_ring(device,
+							      e->exec_id |
+							      e->flags,
+							      e->name,
+							      RTIDLE);
+
+				igt_subtest_f("%s-rthog-submit", e->name)
+					rthog_latency_on_ring(device,
+							      e->exec_id |
+							      e->flags,
+							      e->name,
+							      0);
+
 				igt_subtest_f("%s-dispatch-queued", e->name)
 					latency_on_ring(device,
 							e->exec_id | e->flags,