diff mbox series

[i-g-t,20/25] gem_wsim: Per context SSEU control

Message ID 20190517112526.6738-21-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Media scalability tooling | expand

Commit Message

Tvrtko Ursulin May 17, 2019, 11:25 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

A new workload command ('S') is added which allows per context slice
(re-)configuration.

v2:
 * Only query device SSEU on first use. (Chris)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 benchmarks/gem_wsim.c  | 83 ++++++++++++++++++++++++++++++++++++------
 benchmarks/wsim/README | 23 +++++++++++-
 2 files changed, 94 insertions(+), 12 deletions(-)

Comments

Chris Wilson May 17, 2019, 7:44 p.m. UTC | #1
Quoting Tvrtko Ursulin (2019-05-17 12:25:21)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> A new workload command ('S') is added which allows per context slice
> (re-)configuration.
> 
> v2:
>  * Only query device SSEU on first use. (Chris)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Fair enough,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
diff mbox series

Patch

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index 875838f65128..feb9650588a1 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -87,6 +87,7 @@  enum w_type
 	LOAD_BALANCE,
 	BOND,
 	TERMINATE,
+	SSEU
 };
 
 struct deps
@@ -136,6 +137,7 @@  struct w_step
 			uint64_t bond_mask;
 			enum intel_engine_id bond_master;
 		};
+		int sseu;
 	};
 
 	/* Implementation details */
@@ -171,6 +173,7 @@  struct ctx {
 	bool targets_instance;
 	bool wants_balance;
 	unsigned int static_vcs;
+	uint64_t sseu;
 };
 
 struct workload
@@ -241,6 +244,9 @@  static unsigned int context_vcs_rr;
 
 static int verbose = 1;
 static int fd;
+static struct drm_i915_gem_context_param_sseu device_sseu = {
+	.slice_mask = -1 /* Force read on first use. */
+};
 
 #define SWAPVCS		(1<<0)
 #define SEQNO		(1<<1)
@@ -482,6 +488,27 @@  parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
 				int_field(SYNC, target,
 					  tmp >= 0 || ((int)nr_steps + tmp) < 0,
 					  "Invalid sync target at step %u!\n");
+			} else if (!strcmp(field, "S")) {
+				unsigned int nr = 0;
+				while ((field = strtok_r(fstart, ".", &fctx))) {
+					tmp = atoi(field);
+					check_arg(tmp <= 0 && nr == 0,
+						  "Invalid context at step %u!\n",
+						  nr_steps);
+					check_arg(nr > 1,
+						  "Invalid SSEU format at step %u!\n",
+						  nr_steps);
+
+					if (nr == 0)
+						step.context = tmp;
+					else if (nr == 1)
+						step.sseu = tmp;
+
+					nr++;
+				}
+
+				step.type = SSEU;
+				goto add_step;
 			} else if (!strcmp(field, "t")) {
 				int_field(THROTTLE, throttle,
 					  tmp < 0,
@@ -1141,24 +1168,38 @@  find_engine(struct i915_engine_class_instance *ci, unsigned int count,
 	return 0;
 }
 
-static void
-set_ctx_sseu(uint32_t ctx)
+static struct drm_i915_gem_context_param_sseu get_device_sseu(void)
 {
-	struct drm_i915_gem_context_param_sseu sseu = { };
 	struct drm_i915_gem_context_param param = { };
 
-	sseu.class = I915_ENGINE_CLASS_RENDER;
-	sseu.instance = 0;
+	if (device_sseu.slice_mask == -1) {
+		param.param = I915_CONTEXT_PARAM_SSEU;
+		param.value = (uintptr_t)&device_sseu;
+
+		gem_context_get_param(fd, &param);
+	}
+
+	return device_sseu;
+}
+
+static uint64_t
+set_ctx_sseu(uint32_t ctx, uint64_t slice_mask)
+{
+	struct drm_i915_gem_context_param_sseu sseu = get_device_sseu();
+	struct drm_i915_gem_context_param param = { };
+
+	if (slice_mask == -1)
+		slice_mask = device_sseu.slice_mask;
+
+	sseu.slice_mask = slice_mask;
 
 	param.ctx_id = ctx;
 	param.param = I915_CONTEXT_PARAM_SSEU;
 	param.value = (uintptr_t)&sseu;
 
-	gem_context_get_param(fd, &param);
-
-	sseu.slice_mask = 1;
-
 	gem_context_set_param(fd, &param);
+
+	return slice_mask;
 }
 
 static int
@@ -1359,6 +1400,7 @@  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 
 		igt_assert(ctx_id);
 		ctx->id = ctx_id;
+		ctx->sseu = device_sseu.slice_mask;
 
 		if (flags & GLOBAL_BALANCE) {
 			ctx->static_vcs = context_vcs_rr;
@@ -1519,8 +1561,10 @@  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 			gem_context_set_param(fd, &param);
 		}
 
-		if (wrk->sseu)
-			set_ctx_sseu(arg.ctx_id);
+		if (wrk->sseu) {
+			/* Set to slice 0 only, one slice. */
+			ctx->sseu = set_ctx_sseu(ctx_id, 1);
+		}
 
 		if (share_vm)
 			vm_destroy(fd, share_vm);
@@ -1557,6 +1601,16 @@  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 		}
 	}
 
+	/*
+	 * Scan for SSEU control steps.
+	 */
+	for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+		if (w->type == SSEU) {
+			get_device_sseu();
+			break;
+		}
+	}
+
 	/*
 	 * Allocate batch buffers.
 	 */
@@ -2492,6 +2546,13 @@  static void *run_workload(void *data)
 				   w->type == LOAD_BALANCE ||
 				   w->type == BOND) {
 				continue;
+			} else if (w->type == SSEU) {
+				if (w->sseu != wrk->ctx_list[w->context].sseu) {
+					wrk->ctx_list[w->context].sseu =
+						set_ctx_sseu(wrk->ctx_list[w->context].id,
+							     w->sseu);
+				}
+				continue;
 			}
 
 			if (do_sleep || w->type == PERIOD) {
diff --git a/benchmarks/wsim/README b/benchmarks/wsim/README
index 552d8882010b..eea111ab7704 100644
--- a/benchmarks/wsim/README
+++ b/benchmarks/wsim/README
@@ -5,7 +5,7 @@  ctx.engine.duration_us.dependency.wait,...
 <uint>.<str>.<uint>[-<uint>]|*.<int <= 0>[/<int <= 0>][...].<0|1>,...
 B.<uint>
 M.<uint>.<str>[|<str>]...
-P|X.<uint>.<int>
+P|S|X.<uint>.<int>
 d|p|s|t|q|a|T.<int>,...
 b.<uint>.<str>[|<str>].<str>
 f
@@ -30,6 +30,7 @@  Additional workload steps are also supported:
  'b' - Set up engine bonds.
  'M' - Set up engine map.
  'P' - Context priority.
+ 'S' - Context SSEU configuration.
  'T' - Terminate an infinite batch.
  'X' - Context preemption control.
 
@@ -257,3 +258,23 @@  then look like:
   1.DEFAULT.1000.f-1.0
   2.DEFAULT.1000.s-1.0
   a.-3
+
+Context SSEU configuration
+--------------------------
+
+  S.1.1
+  1.RCS.1000.0.0
+  S.2.-1
+  2.RCS.1000.0.0
+
+Context 1 is configured to run with one enabled slice (slice mask 1) and a batch
+is sumitted against it. Context 2 is configured to run with all slices (this is
+the default so the command could also be omitted) and a batch submitted against
+it.
+
+This shows the dynamic SSEU reconfiguration cost beween two contexts competing
+for the render engine.
+
+Slice mask of -1 has a special meaning of "all slices". Otherwise any integer
+can be specifying as the slice mask, but beware any apart from 1 and -1 can make
+the workload not portable between different GPUs.