diff mbox

[i-g-t,v6] benchmarks/gem_wsim: Command submission workload simulator

Message ID 20170425123138.GG11432@nuc-i3427.alporthouse.com (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 25, 2017, 12:31 p.m. UTC
On Tue, Apr 25, 2017 at 01:25:48PM +0100, Chris Wilson wrote:
> On Tue, Apr 25, 2017 at 01:10:34PM +0100, Tvrtko Ursulin wrote:
> > 
> > On 25/04/2017 12:35, Chris Wilson wrote:
> > >On Tue, Apr 25, 2017 at 12:13:04PM +0100, Tvrtko Ursulin wrote:
> > >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> [snip]
> > >>+static enum intel_engine_id
> > >>+rt_balance(const struct workload_balancer *balancer,
> > >>+	   struct workload *wrk, struct w_step *w)
> > >>+{
> > >>+	enum intel_engine_id engine;
> > >>+	long qd[NUM_ENGINES];
> > >>+	unsigned int n;
> > >>+
> > >>+	igt_assert(w->engine == VCS);
> > >>+
> > >>+	/* Estimate the "speed" of the most recent batch
> > >>+	 *    (finish time - submit time)
> > >>+	 * and use that as an approximate for the total remaining time for
> > >>+	 * all batches on that engine. We try to keep the total remaining
> > >>+	 * balanced between the engines.
> > >>+	 */
> > >
> > >Next steps for this would be to move from an instantaneous speed, to an
> > >average. I'm thinking something like a exponential decay moving average
> > >just to make the estimation more robust.
> > 
> > Do you think it would be OK to merge these two tools at this point
> > and continue improving them in place?
> 
> Yes. Although there's no excuse no to make this NO_RELOC from the start,
> especially if we want to demonstrate how it should be done! Hopefully
> attached the delta.

Which I forgot. Let's try again...
-Chris
diff mbox

Patch

From 985f873f1c9cdaec396c5410738910da04e8f95b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 25 Apr 2017 13:22:39 +0100
Subject: [PATCH] no-reloc

---
 benchmarks/gem_wsim.c | 45 +++++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index 1f491a5..f13477a 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -90,9 +90,13 @@  struct w_step
 	struct drm_i915_gem_relocation_entry reloc[3];
 	unsigned long bb_sz;
 	uint32_t bb_handle;
-	uint32_t *mapped_batch, *mapped_seqno;
-	unsigned int mapped_len;
+	uint32_t *mapped_batch;
+	uint32_t *seqno_value;
+	uint32_t *seqno_address;
 	uint32_t *rt0_value;
+	uint32_t *rt0_address;
+	uint32_t *rt1_address;
+	unsigned int mapped_len;
 };
 
 struct workload
@@ -463,9 +467,10 @@  terminate_bb(struct w_step *w, unsigned int flags)
 		batch_start += 4 * sizeof(uint32_t);
 
 		*cs++ = MI_STORE_DWORD_IMM;
+		w->seqno_address = cs;
 		*cs++ = 0;
 		*cs++ = 0;
-		w->mapped_seqno = cs;
+		w->seqno_value = cs;
 		*cs++ = 0;
 	}
 
@@ -474,6 +479,7 @@  terminate_bb(struct w_step *w, unsigned int flags)
 		batch_start += 4 * sizeof(uint32_t);
 
 		*cs++ = MI_STORE_DWORD_IMM;
+		w->rt0_address = cs;
 		*cs++ = 0;
 		*cs++ = 0;
 		w->rt0_value = cs;
@@ -484,6 +490,7 @@  terminate_bb(struct w_step *w, unsigned int flags)
 
 		*cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */
 		*cs++ = RCS_TIMESTAMP;
+		w->rt1_address = cs;
 		*cs++ = 0;
 		*cs++ = 0;
 	}
@@ -500,8 +507,7 @@  eb_update_flags(struct w_step *w, enum intel_engine_id engine,
 {
 	w->eb.flags = eb_engine_map[engine];
 	w->eb.flags |= I915_EXEC_HANDLE_LUT;
-	if (!(flags & SEQNO))
-		w->eb.flags |= I915_EXEC_NO_RELOC;
+	w->eb.flags |= I915_EXEC_NO_RELOC;
 }
 
 static void
@@ -543,10 +549,8 @@  alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
 			w->obj[bb_i].relocation_count = 3;
 		else
 			w->obj[bb_i].relocation_count = 1;
-		for (int i = 0; i < w->obj[bb_i].relocation_count; i++) {
-			w->reloc[i].presumed_offset = -1;
+		for (int i = 0; i < w->obj[bb_i].relocation_count; i++)
 			w->reloc[i].target_handle = 1;
-		}
 	}
 
 	w->eb.buffers_ptr = to_user_pointer(w->obj);
@@ -782,10 +786,14 @@  update_bb_seqno(struct w_step *w, enum intel_engine_id engine, uint32_t seqno)
 	gem_set_domain(fd, w->bb_handle,
 		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
 
-	*w->mapped_seqno = seqno;
-
-	w->reloc[0].presumed_offset = -1;
 	w->reloc[0].delta = VCS_SEQNO_OFFSET(engine);
+
+	*w->seqno_value = seqno;
+	*w->seqno_address = w->reloc[0].presumed_offset + w->reloc[0].delta;
+
+	/* If not using NO_RELOC, force the relocations */
+	if ((w->eb.flags & I915_EXEC_NO_RELOC))
+		w->reloc[0].presumed_offset = -1;
 }
 
 static void
@@ -796,13 +804,18 @@  update_bb_rt(struct w_step *w, enum intel_engine_id engine)
 	gem_set_domain(fd, w->bb_handle,
 		       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
 
-	*w->rt0_value = *REG(RCS_TIMESTAMP);
-
-	w->reloc[1].presumed_offset = -1;
 	w->reloc[1].delta = VCS_SEQNO_OFFSET(engine) + sizeof(uint32_t);
-
-	w->reloc[2].presumed_offset = -1;
 	w->reloc[2].delta = VCS_SEQNO_OFFSET(engine) + 2 * sizeof(uint32_t);
+
+	*w->rt0_value = *REG(RCS_TIMESTAMP);
+	*w->rt0_address = w->reloc[1].presumed_offset + w->reloc[1].delta;
+	*w->rt1_address = w->reloc[1].presumed_offset + w->reloc[1].delta;
+
+	/* If not using NO_RELOC, force the relocations */
+	if ((w->eb.flags & I915_EXEC_NO_RELOC)) {
+		w->reloc[1].presumed_offset = -1;
+		w->reloc[2].presumed_offset = -1;
+	}
 }
 
 static void w_sync_to(struct workload *wrk, struct w_step *w, int target)
-- 
1.9.1