diff mbox series

[i-g-t,16/25] gem_wsim: Engine bond command

Message ID 20190517112526.6738-17-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Media scalability tooling | expand

Commit Message

Tvrtko Ursulin May 17, 2019, 11:25 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Engine bonds are an i915 uAPI applicable to load balanced contexts with
engine map. They allow expression rules of engine selection between two
contexts when submissions are also tied with submit fences.

Please refer to the README for a more detailed description.

v2:
 * Use list of symbolic engine names instead of the mask. (Chris)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 benchmarks/gem_wsim.c  | 159 +++++++++++++++++++++++++++++++++++++++--
 benchmarks/wsim/README |  50 +++++++++++++
 2 files changed, 202 insertions(+), 7 deletions(-)

Comments

Chris Wilson May 17, 2019, 7:41 p.m. UTC | #1
Quoting Tvrtko Ursulin (2019-05-17 12:25:17)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Engine bonds are an i915 uAPI applicable to load balanced contexts with
> engine map. They allow expression rules of engine selection between two
> contexts when submissions are also tied with submit fences.
> 
> Please refer to the README for a more detailed description.
> 
> v2:
>  * Use list of symbolic engine names instead of the mask. (Chris)
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  benchmarks/gem_wsim.c  | 159 +++++++++++++++++++++++++++++++++++++++--
>  benchmarks/wsim/README |  50 +++++++++++++
>  2 files changed, 202 insertions(+), 7 deletions(-)
> 
> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> index f7f84d05010a..bd9201c2928b 100644
> --- a/benchmarks/gem_wsim.c
> +++ b/benchmarks/gem_wsim.c
> @@ -85,6 +85,7 @@ enum w_type
>         PREEMPTION,
>         ENGINE_MAP,
>         LOAD_BALANCE,
> +       BOND,
>  };
>  
>  struct deps
> @@ -100,6 +101,11 @@ struct w_arg {
>         int prio;
>  };
>  
> +struct bond {
> +       uint64_t mask;
> +       enum intel_engine_id master;
> +};
> +
>  struct w_step
>  {
>         /* Workload step metadata */
> @@ -123,6 +129,10 @@ struct w_step
>                         enum intel_engine_id *engine_map;
>                 };
>                 bool load_balance;
> +               struct {
> +                       uint64_t bond_mask;
> +                       enum intel_engine_id bond_master;
> +               };
>         };
>  
>         /* Implementation details */
> @@ -152,6 +162,8 @@ struct ctx {
>         int priority;
>         unsigned int engine_map_count;
>         enum intel_engine_id *engine_map;
> +       unsigned int bond_count;
> +       struct bond *bonds;
>         bool targets_instance;
>         bool wants_balance;
>         unsigned int static_vcs;
> @@ -378,6 +390,26 @@ static int parse_engine_map(struct w_step *step, const char *_str)
>         return 0;
>  }
>  
> +static uint64_t engine_list_mask(const char *_str)
> +{
> +       uint64_t mask = 0;
> +
> +       char *token, *tctx = NULL, *tstart = (char *)_str;
> +
> +       while ((token = strtok_r(tstart, "|", &tctx))) {
> +               enum intel_engine_id engine = str_to_engine(token);
> +
> +               if ((int)engine < 0 || engine == DEFAULT || engine == VCS)
> +                       return 0;
> +
> +               mask |= 1 << engine;
> +
> +               tstart = NULL;
> +       }
> +
> +       return mask;
> +}
> +
>  #define int_field(_STEP_, _FIELD_, _COND_, _ERR_) \
>         if ((field = strtok_r(fstart, ".", &fctx))) { \
>                 tmp = atoi(field); \
> @@ -528,6 +560,39 @@ parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
>  
>                                 step.type = LOAD_BALANCE;
>                                 goto add_step;
> +                       } else if (!strcmp(field, "b")) {
> +                               unsigned int nr = 0;
> +                               while ((field = strtok_r(fstart, ".", &fctx))) {
> +                                       check_arg(nr > 2,
> +                                                 "Invalid bond format at step %u!\n",
> +                                                 nr_steps);
> +
> +                                       if (nr == 0) {
> +                                               tmp = atoi(field);
> +                                               step.context = tmp;
> +                                               check_arg(tmp <= 0,
> +                                                         "Invalid context at step %u!\n",
> +                                                         nr_steps);
> +                                       } else if (nr == 1) {
> +                                               step.bond_mask = engine_list_mask(field);
> +                                               check_arg(step.bond_mask == 0,
> +                                                       "Invalid siblings list at step %u!\n",
> +                                                       nr_steps);
> +                                       } else if (nr == 2) {
> +                                               tmp = str_to_engine(field);
> +                                               check_arg(tmp <= 0 ||
> +                                                         tmp == VCS ||
> +                                                         tmp == DEFAULT,
> +                                                         "Invalid master engine at step %u!\n",
> +                                                         nr_steps);
> +                                               step.bond_master = tmp;
> +                                       }
> +
> +                                       nr++;
> +                               }
> +
> +                               step.type = BOND;
> +                               goto add_step;
>                         }
>  
>                         if (!field) {
> @@ -1011,6 +1076,31 @@ static void vm_destroy(int i915, uint32_t vm_id)
>         igt_assert_eq(__vm_destroy(i915, vm_id), 0);
>  }
>  
> +static unsigned int
> +find_engine(struct i915_engine_class_instance *ci, unsigned int count,
> +           enum intel_engine_id engine)
> +{
> +       static struct i915_engine_class_instance map[] = {
> +               [RCS] = { I915_ENGINE_CLASS_RENDER, 0 },
> +               [BCS] = { I915_ENGINE_CLASS_COPY, 0 },
> +               [VCS1] = { I915_ENGINE_CLASS_VIDEO, 0 },
> +               [VCS2] = { I915_ENGINE_CLASS_VIDEO, 1 },
> +               [VECS] = { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0 },
> +       };
> +       unsigned int i;
> +
> +       igt_assert(engine < ARRAY_SIZE(map));
> +       igt_assert(engine == RCS || map[engine].engine_class);
> +
> +       for (i = 0; i < count; i++, ci++) {
> +               if (!memcmp(&map[engine], ci, sizeof(*ci)))
> +                       return i;
> +       }
> +
> +       igt_assert(0);
> +       return 0;
> +}
> +
>  static int
>  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>  {
> @@ -1078,6 +1168,8 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>          * Transfer over engine map configuration from the workload step.
>          */
>         for (j = 0; j < wrk->nr_ctxs; j += 2) {
> +               struct ctx *ctx = &wrk->ctx_list[j];
> +
>                 bool targets = false;
>                 bool balance = false;
>  
> @@ -1091,16 +1183,28 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>                                 else
>                                         targets = true;
>                         } else if (w->type == ENGINE_MAP) {
> -                               wrk->ctx_list[j].engine_map = w->engine_map;
> -                               wrk->ctx_list[j].engine_map_count =
> -                                       w->engine_map_count;
> +                               ctx->engine_map = w->engine_map;
> +                               ctx->engine_map_count = w->engine_map_count;
>                         } else if (w->type == LOAD_BALANCE) {
> -                               if (!wrk->ctx_list[j].engine_map) {
> +                               if (!ctx->engine_map) {
>                                         wsim_err("Load balancing needs an engine map!\n");
>                                         return 1;
>                                 }
> -                               wrk->ctx_list[j].wants_balance =
> -                                       w->load_balance;
> +                               ctx->wants_balance = w->load_balance;
> +                       } else if (w->type == BOND) {
> +                               if (!ctx->wants_balance) {
> +                                       wsim_err("Engine bonds need load balancing engine map!\n");
> +                                       return 1;
> +                               }
> +                               ctx->bond_count++;
> +                               ctx->bonds = realloc(ctx->bonds,
> +                                                    ctx->bond_count *
> +                                                    sizeof(struct bond));
> +                               igt_assert(ctx->bonds);
> +                               ctx->bonds[ctx->bond_count - 1].mask =
> +                                       w->bond_mask;
> +                               ctx->bonds[ctx->bond_count - 1].master =
> +                                       w->bond_master;
>                         }
>                 }
>  
> @@ -1281,6 +1385,46 @@ prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
>                                         ctx->engine_map[j - 1] - VCS1; /* FIXME */
>                         }
>  
> +                       for (j = 0; j < ctx->bond_count; j++) {
> +                               unsigned long mask = ctx->bonds[j].mask;
> +                               I915_DEFINE_CONTEXT_ENGINES_BOND(bond,
> +                                                                __builtin_popcount(mask));
> +                               struct i915_context_engines_bond *p = NULL, *prev;
> +                               unsigned int b, e;
> +
> +                               prev = p;
> +                               p = alloca(sizeof(bond));
> +                               assert(p);
> +                               memset(p, 0, sizeof(bond));
> +
> +                               if (j == 0)
> +                                       load_balance.base.next_extension =
> +                                               to_user_pointer(p);
> +                               else if (j < (ctx->bond_count - 1))
> +                                       prev->base.next_extension =
> +                                               to_user_pointer(p);
> +
> +                               p->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
> +                               p->virtual_index = 0;
> +                               p->master.engine_class =
> +                                       I915_ENGINE_CLASS_VIDEO;
> +                               p->master.engine_instance =
> +                                       ctx->bonds[j].master - VCS1;
> +
> +                               for (b = 0, e = 0; mask; e++, mask >>= 1) {
> +                                       unsigned int idx;
> +
> +                                       if (!(mask & 1))
> +                                               continue;
> +
> +                                       idx = find_engine(&set_engines.engines[1],
> +                                                         ctx->engine_map_count,
> +                                                         e);
> +                                       p->engines[b++] =
> +                                               set_engines.engines[1 + idx];
> +                               }
> +                       }

Ok, I was a little nervous of the transport through mask, but it checks
out.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
diff mbox series

Patch

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index f7f84d05010a..bd9201c2928b 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -85,6 +85,7 @@  enum w_type
 	PREEMPTION,
 	ENGINE_MAP,
 	LOAD_BALANCE,
+	BOND,
 };
 
 struct deps
@@ -100,6 +101,11 @@  struct w_arg {
 	int prio;
 };
 
+struct bond {
+	uint64_t mask;
+	enum intel_engine_id master;
+};
+
 struct w_step
 {
 	/* Workload step metadata */
@@ -123,6 +129,10 @@  struct w_step
 			enum intel_engine_id *engine_map;
 		};
 		bool load_balance;
+		struct {
+			uint64_t bond_mask;
+			enum intel_engine_id bond_master;
+		};
 	};
 
 	/* Implementation details */
@@ -152,6 +162,8 @@  struct ctx {
 	int priority;
 	unsigned int engine_map_count;
 	enum intel_engine_id *engine_map;
+	unsigned int bond_count;
+	struct bond *bonds;
 	bool targets_instance;
 	bool wants_balance;
 	unsigned int static_vcs;
@@ -378,6 +390,26 @@  static int parse_engine_map(struct w_step *step, const char *_str)
 	return 0;
 }
 
+static uint64_t engine_list_mask(const char *_str)
+{
+	uint64_t mask = 0;
+
+	char *token, *tctx = NULL, *tstart = (char *)_str;
+
+	while ((token = strtok_r(tstart, "|", &tctx))) {
+		enum intel_engine_id engine = str_to_engine(token);
+
+		if ((int)engine < 0 || engine == DEFAULT || engine == VCS)
+			return 0;
+
+		mask |= 1 << engine;
+
+		tstart = NULL;
+	}
+
+	return mask;
+}
+
 #define int_field(_STEP_, _FIELD_, _COND_, _ERR_) \
 	if ((field = strtok_r(fstart, ".", &fctx))) { \
 		tmp = atoi(field); \
@@ -528,6 +560,39 @@  parse_workload(struct w_arg *arg, unsigned int flags, struct workload *app_w)
 
 				step.type = LOAD_BALANCE;
 				goto add_step;
+			} else if (!strcmp(field, "b")) {
+				unsigned int nr = 0;
+				while ((field = strtok_r(fstart, ".", &fctx))) {
+					check_arg(nr > 2,
+						  "Invalid bond format at step %u!\n",
+						  nr_steps);
+
+					if (nr == 0) {
+						tmp = atoi(field);
+						step.context = tmp;
+						check_arg(tmp <= 0,
+							  "Invalid context at step %u!\n",
+							  nr_steps);
+					} else if (nr == 1) {
+						step.bond_mask = engine_list_mask(field);
+						check_arg(step.bond_mask == 0,
+							"Invalid siblings list at step %u!\n",
+							nr_steps);
+					} else if (nr == 2) {
+						tmp = str_to_engine(field);
+						check_arg(tmp <= 0 ||
+							  tmp == VCS ||
+							  tmp == DEFAULT,
+							  "Invalid master engine at step %u!\n",
+							  nr_steps);
+						step.bond_master = tmp;
+					}
+
+					nr++;
+				}
+
+				step.type = BOND;
+				goto add_step;
 			}
 
 			if (!field) {
@@ -1011,6 +1076,31 @@  static void vm_destroy(int i915, uint32_t vm_id)
 	igt_assert_eq(__vm_destroy(i915, vm_id), 0);
 }
 
+static unsigned int
+find_engine(struct i915_engine_class_instance *ci, unsigned int count,
+	    enum intel_engine_id engine)
+{
+	static struct i915_engine_class_instance map[] = {
+		[RCS] = { I915_ENGINE_CLASS_RENDER, 0 },
+		[BCS] = { I915_ENGINE_CLASS_COPY, 0 },
+		[VCS1] = { I915_ENGINE_CLASS_VIDEO, 0 },
+		[VCS2] = { I915_ENGINE_CLASS_VIDEO, 1 },
+		[VECS] = { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0 },
+	};
+	unsigned int i;
+
+	igt_assert(engine < ARRAY_SIZE(map));
+	igt_assert(engine == RCS || map[engine].engine_class);
+
+	for (i = 0; i < count; i++, ci++) {
+		if (!memcmp(&map[engine], ci, sizeof(*ci)))
+			return i;
+	}
+
+	igt_assert(0);
+	return 0;
+}
+
 static int
 prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 {
@@ -1078,6 +1168,8 @@  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 	 * Transfer over engine map configuration from the workload step.
 	 */
 	for (j = 0; j < wrk->nr_ctxs; j += 2) {
+		struct ctx *ctx = &wrk->ctx_list[j];
+
 		bool targets = false;
 		bool balance = false;
 
@@ -1091,16 +1183,28 @@  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 				else
 					targets = true;
 			} else if (w->type == ENGINE_MAP) {
-				wrk->ctx_list[j].engine_map = w->engine_map;
-				wrk->ctx_list[j].engine_map_count =
-					w->engine_map_count;
+				ctx->engine_map = w->engine_map;
+				ctx->engine_map_count = w->engine_map_count;
 			} else if (w->type == LOAD_BALANCE) {
-				if (!wrk->ctx_list[j].engine_map) {
+				if (!ctx->engine_map) {
 					wsim_err("Load balancing needs an engine map!\n");
 					return 1;
 				}
-				wrk->ctx_list[j].wants_balance =
-					w->load_balance;
+				ctx->wants_balance = w->load_balance;
+			} else if (w->type == BOND) {
+				if (!ctx->wants_balance) {
+					wsim_err("Engine bonds need load balancing engine map!\n");
+					return 1;
+				}
+				ctx->bond_count++;
+				ctx->bonds = realloc(ctx->bonds,
+						     ctx->bond_count *
+						     sizeof(struct bond));
+				igt_assert(ctx->bonds);
+				ctx->bonds[ctx->bond_count - 1].mask =
+					w->bond_mask;
+				ctx->bonds[ctx->bond_count - 1].master =
+					w->bond_master;
 			}
 		}
 
@@ -1281,6 +1385,46 @@  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 					ctx->engine_map[j - 1] - VCS1; /* FIXME */
 			}
 
+			for (j = 0; j < ctx->bond_count; j++) {
+				unsigned long mask = ctx->bonds[j].mask;
+				I915_DEFINE_CONTEXT_ENGINES_BOND(bond,
+								 __builtin_popcount(mask));
+				struct i915_context_engines_bond *p = NULL, *prev;
+				unsigned int b, e;
+
+				prev = p;
+				p = alloca(sizeof(bond));
+				assert(p);
+				memset(p, 0, sizeof(bond));
+
+				if (j == 0)
+					load_balance.base.next_extension =
+						to_user_pointer(p);
+				else if (j < (ctx->bond_count - 1))
+					prev->base.next_extension =
+						to_user_pointer(p);
+
+				p->base.name = I915_CONTEXT_ENGINES_EXT_BOND;
+				p->virtual_index = 0;
+				p->master.engine_class =
+					I915_ENGINE_CLASS_VIDEO;
+				p->master.engine_instance =
+					ctx->bonds[j].master - VCS1;
+
+				for (b = 0, e = 0; mask; e++, mask >>= 1) {
+					unsigned int idx;
+
+					if (!(mask & 1))
+						continue;
+
+					idx = find_engine(&set_engines.engines[1],
+							  ctx->engine_map_count,
+							  e);
+					p->engines[b++] =
+						set_engines.engines[1 + idx];
+				}
+			}
+
 			gem_context_set_param(fd, &param);
 		} else if (ctx->wants_balance) {
 			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, 2) = {
@@ -2255,7 +2399,8 @@  static void *run_workload(void *data)
 				continue;
 			} else if (w->type == PREEMPTION ||
 				   w->type == ENGINE_MAP ||
-				   w->type == LOAD_BALANCE) {
+				   w->type == LOAD_BALANCE ||
+				   w->type == BOND) {
 				continue;
 			}
 
diff --git a/benchmarks/wsim/README b/benchmarks/wsim/README
index 7adb3b89ffcc..e5dcf929519e 100644
--- a/benchmarks/wsim/README
+++ b/benchmarks/wsim/README
@@ -7,6 +7,7 @@  B.<uint>
 M.<uint>.<str>[|<str>]...
 P|X.<uint>.<int>
 d|p|s|t|q|a.<int>,...
+b.<uint>.<str>[|<str>].<str>
 f
 
 For duration a range can be given from which a random value will be picked
@@ -26,6 +27,7 @@  Additional workload steps are also supported:
  'f' - Create a sync fence.
  'a' - Advance the previously created sync fence.
  'B' - Turn on context load balancing.
+ 'b' - Set up engine bonds.
  'M' - Set up engine map.
  'P' - Context priority.
  'X' - Context preemption control.
@@ -202,3 +204,51 @@  This enables load balancing for context number one.
 
 Submissions to load balanced contexts are only allowed to use the DEFAULT engine
 specifier.
+
+Engine bonds
+------------
+
+Engine bonds are extensions on load balanced contexts. They allow expressing
+rules of engine selection between two co-operating contexts tied with submit
+fences. In other words, the rule expression is telling the driver: "If you pick
+this engine for context one, then you have to pick that engine for context two".
+
+Syntax is:
+  b.<context>.<engine_list>.<master_engine>
+
+Engine list is a list of one or more sibling engines separated by a pipe
+character (eg. "VCS1|VCS2").
+
+There can be multiple bonds tied to the same context.
+
+Example:
+
+  M.1.RCS|VECS
+  B.1
+  M.2.VCS1|VCS2
+  B.2
+  b.2.VCS1.RCS
+  b.2.VCS2.VECS
+
+This tells the driver that if it picked RCS for context one, it has to pick VCS1
+for context two. And if it picked VECS for context one, it has to pick VCS1 for
+context two.
+
+If we extend the above example with more workload directives:
+
+  1.DEFAULT.1000.0.0
+  2.DEFAULT.1000.s-1.0
+
+We get to a fully functional example where two batch buffers are submitted in a
+load balanced fashion, telling the driver they should run simultaneously and
+that valid engine pairs are either RCS + VCS1 (for two contexts respectively),
+or VECS + VCS2.
+
+This can also be extended using sync fences to improve chances of the first
+submission not getting on the hardware after the second one. Second block would
+then look like:
+
+  f
+  1.DEFAULT.1000.f-1.0
+  2.DEFAULT.1000.s-1.0
+  a.-3