diff mbox series

[i-g-t,05/21] wsim/media-bench: i915 balancing

Message ID 20190508121058.27038-6-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Media scalability tooling | expand

Commit Message

Tvrtko Ursulin May 8, 2019, 12:10 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Support i915 virtual engine from gem_wsim (-b i915) and media-bench.pl

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 benchmarks/gem_wsim.c  | 281 ++++++++++++++++++++++++++++++++++-------
 scripts/media-bench.pl |   9 +-
 2 files changed, 244 insertions(+), 46 deletions(-)

Comments

Chris Wilson May 10, 2019, 1:14 p.m. UTC | #1
Quoting Tvrtko Ursulin (2019-05-08 13:10:42)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Support i915 virtual engine from gem_wsim (-b i915) and media-bench.pl
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> +       /*
> +        * Create and configure contexts.
> +        */
> +       for (i = 0; i < wrk->nr_ctxs; i += 2) {
> +               struct ctx *ctx = &wrk->ctx_list[i];
> +               uint32_t ctx_id, share_vm = 0;
>  
> -                       wrk->ctx_list[w->context].id = arg.ctx_id;
> +               if (ctx->id)
> +                       continue;
>  
> -                       if (flags & GLOBAL_BALANCE) {
> -                               wrk->ctx_list[w->context].static_vcs = context_vcs_rr;
> -                               context_vcs_rr ^= 1;
> -                       } else {
> -                               wrk->ctx_list[w->context].static_vcs = ctx_vcs;
> -                               ctx_vcs ^= 1;
> -                       }
> +               if (flags & I915) {

vm sharing shouldn't be a i915-balancer only option. For single jobs split
across multiple contexts, I would expect they will want to share vm.

> +                       struct drm_i915_gem_context_create_ext_setparam ext = {
> +                               .base.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> +                               .param.param = I915_CONTEXT_PARAM_VM,
> +                       };
> +                       struct drm_i915_gem_context_create_ext args = { };
>  
> -                       if (wrk->prio) {
> +                       /* Find existing context to share ppgtt with. */
> +                       for (j = 0; j < wrk->nr_ctxs; j++) {
>                                 struct drm_i915_gem_context_param param = {
> -                                       .ctx_id = arg.ctx_id,
> -                                       .param = I915_CONTEXT_PARAM_PRIORITY,
> -                                       .value = wrk->prio,
> +                                       .param = I915_CONTEXT_PARAM_VM,
>                                 };
> -                               gem_context_set_param(fd, &param);
> +
> +                               if (!wrk->ctx_list[j].id)
> +                                       continue;
> +
> +                               param.ctx_id = wrk->ctx_list[j].id;
> +
> +                               gem_context_get_param(fd, &param);
> +                               igt_assert(param.value);
> +
> +                               share_vm = param.value;
> +
> +                               ext.param.value = share_vm;
> +                               args.flags =
> +                                   I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS;
> +                               args.extensions = to_user_pointer(&ext);
> +                               break;
>                         }
> +
> +                       if (!ctx->targets_instance)
> +                               args.flags |=
> +                                    I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE;
> +
> +                       drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT,
> +                                &args);
> +
> +                       ctx_id = args.ctx_id;
> +               } else {
> +                       struct drm_i915_gem_context_create args = {};
> +
> +                       drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &args);
> +                       ctx_id = args.ctx_id;
> +               }
> +
> +               igt_assert(ctx_id);
> +               ctx->id = ctx_id;
> +
> +               if (flags & GLOBAL_BALANCE) {
> +                       ctx->static_vcs = context_vcs_rr;
> +                       context_vcs_rr ^= 1;
> +               } else {
> +                       ctx->static_vcs = ctx_vcs;
> +                       ctx_vcs ^= 1;
> +               }
> +
> +               __ctx_set_prio(ctx_id, wrk->prio);
> +
> +               /*
> +                * Do we need a separate context to satisfy this workloads which
> +                * both want to target specific engines and be balanced by i915?
> +                */
> +               if ((flags & I915) && ctx->wants_balance &&
> +                   ctx->targets_instance) {
> +                       struct drm_i915_gem_context_create_ext_setparam ext = {
> +                               .base.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
> +                               .param.param = I915_CONTEXT_PARAM_VM,
> +                               .param.value = share_vm,
> +                       };
> +                       struct drm_i915_gem_context_create_ext args = {
> +                               .extensions = to_user_pointer(&ext),
> +                               .flags =
> +                                   I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS |
> +                                   I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
> +                       };
> +
> +                       igt_assert(share_vm);
> +
> +                       drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT,
> +                                &args);
> +
> +                       igt_assert(args.ctx_id);
> +                       ctx_id = args.ctx_id;
> +                       wrk->ctx_list[i + 1].id = args.ctx_id;
> +
> +                       __ctx_set_prio(ctx_id, wrk->prio);
> +               }
> +
> +               if (ctx->wants_balance) {
> +                       I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, 2) = {
> +                               .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
> +                               .num_siblings = 2,
> +                               .engines = {
> +                                       { .engine_class = I915_ENGINE_CLASS_VIDEO,
> +                                         .engine_instance = 0 },
> +                                       { .engine_class = I915_ENGINE_CLASS_VIDEO,
> +                                         .engine_instance = 1 },
> +                               },
> +                       };
> +                       I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, 3) = {
> +                               .extensions = to_user_pointer(&load_balance),
> +                               .engines = {
> +                                       { .engine_class = I915_ENGINE_CLASS_INVALID,
> +                                         .engine_instance = I915_ENGINE_CLASS_INVALID_NONE },
> +                                       { .engine_class = I915_ENGINE_CLASS_VIDEO,
> +                                         .engine_instance = 0 },
> +                                       { .engine_class = I915_ENGINE_CLASS_VIDEO,
> +                                         .engine_instance = 1 },
> +                               },
> +                       };
> +
> +                       struct drm_i915_gem_context_param param = {
> +                               .ctx_id = ctx_id,
> +                               .param = I915_CONTEXT_PARAM_ENGINES,
> +                               .size = sizeof(set_engines),
> +                               .value = to_user_pointer(&set_engines),
> +                       };
> +
> +                       gem_context_set_param(fd, &param);
>                 }

if (share_vm)
	gem_vm_destroy(share_vm);

Just to drop the local handle as the context has acquired its own
reference.

Other than that, it does what it sets out to do: create a context with
choice of engines and load balancing amongst them.

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
Chris Wilson May 10, 2019, 1:23 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-05-08 13:10:42)
> @@ -841,7 +846,11 @@ eb_set_engine(struct drm_i915_gem_execbuffer2 *eb,
>         if (engine == VCS2 && (flags & VCS2REMAP))
>                 engine = BCS;
>  
> -       eb->flags = eb_engine_map[engine];
> +       if ((flags & I915) && engine == VCS) {
> +               eb->flags = 0;
> +       } else {
> +               eb->flags = eb_engine_map[engine];
> +       }

You drop these brackets in a later patch.
-Chris
Tvrtko Ursulin May 13, 2019, 12:41 p.m. UTC | #3
On 10/05/2019 14:14, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-08 13:10:42)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Support i915 virtual engine from gem_wsim (-b i915) and media-bench.pl
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>> +       /*
>> +        * Create and configure contexts.
>> +        */
>> +       for (i = 0; i < wrk->nr_ctxs; i += 2) {
>> +               struct ctx *ctx = &wrk->ctx_list[i];
>> +               uint32_t ctx_id, share_vm = 0;
>>   
>> -                       wrk->ctx_list[w->context].id = arg.ctx_id;
>> +               if (ctx->id)
>> +                       continue;
>>   
>> -                       if (flags & GLOBAL_BALANCE) {
>> -                               wrk->ctx_list[w->context].static_vcs = context_vcs_rr;
>> -                               context_vcs_rr ^= 1;
>> -                       } else {
>> -                               wrk->ctx_list[w->context].static_vcs = ctx_vcs;
>> -                               ctx_vcs ^= 1;
>> -                       }
>> +               if (flags & I915) {
> 
> vm sharing shouldn't be a i915-balancer only option. For single jobs split
> across multiple contexts, I would expect they will want to share vm.

Could do but I wanted to limit the new features to new features. :) 
Pencil in for later okay?

>> +                       struct drm_i915_gem_context_create_ext_setparam ext = {
>> +                               .base.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
>> +                               .param.param = I915_CONTEXT_PARAM_VM,
>> +                       };
>> +                       struct drm_i915_gem_context_create_ext args = { };
>>   
>> -                       if (wrk->prio) {
>> +                       /* Find existing context to share ppgtt with. */
>> +                       for (j = 0; j < wrk->nr_ctxs; j++) {
>>                                  struct drm_i915_gem_context_param param = {
>> -                                       .ctx_id = arg.ctx_id,
>> -                                       .param = I915_CONTEXT_PARAM_PRIORITY,
>> -                                       .value = wrk->prio,
>> +                                       .param = I915_CONTEXT_PARAM_VM,
>>                                  };
>> -                               gem_context_set_param(fd, &param);
>> +
>> +                               if (!wrk->ctx_list[j].id)
>> +                                       continue;
>> +
>> +                               param.ctx_id = wrk->ctx_list[j].id;
>> +
>> +                               gem_context_get_param(fd, &param);
>> +                               igt_assert(param.value);
>> +
>> +                               share_vm = param.value;
>> +
>> +                               ext.param.value = share_vm;
>> +                               args.flags =
>> +                                   I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS;
>> +                               args.extensions = to_user_pointer(&ext);
>> +                               break;
>>                          }
>> +
>> +                       if (!ctx->targets_instance)
>> +                               args.flags |=
>> +                                    I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE;
>> +
>> +                       drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT,
>> +                                &args);
>> +
>> +                       ctx_id = args.ctx_id;
>> +               } else {
>> +                       struct drm_i915_gem_context_create args = {};
>> +
>> +                       drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &args);
>> +                       ctx_id = args.ctx_id;
>> +               }
>> +
>> +               igt_assert(ctx_id);
>> +               ctx->id = ctx_id;
>> +
>> +               if (flags & GLOBAL_BALANCE) {
>> +                       ctx->static_vcs = context_vcs_rr;
>> +                       context_vcs_rr ^= 1;
>> +               } else {
>> +                       ctx->static_vcs = ctx_vcs;
>> +                       ctx_vcs ^= 1;
>> +               }
>> +
>> +               __ctx_set_prio(ctx_id, wrk->prio);
>> +
>> +               /*
>> +                * Do we need a separate context to satisfy this workloads which
>> +                * both want to target specific engines and be balanced by i915?
>> +                */
>> +               if ((flags & I915) && ctx->wants_balance &&
>> +                   ctx->targets_instance) {
>> +                       struct drm_i915_gem_context_create_ext_setparam ext = {
>> +                               .base.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
>> +                               .param.param = I915_CONTEXT_PARAM_VM,
>> +                               .param.value = share_vm,
>> +                       };
>> +                       struct drm_i915_gem_context_create_ext args = {
>> +                               .extensions = to_user_pointer(&ext),
>> +                               .flags =
>> +                                   I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS |
>> +                                   I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
>> +                       };
>> +
>> +                       igt_assert(share_vm);
>> +
>> +                       drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT,
>> +                                &args);
>> +
>> +                       igt_assert(args.ctx_id);
>> +                       ctx_id = args.ctx_id;
>> +                       wrk->ctx_list[i + 1].id = args.ctx_id;
>> +
>> +                       __ctx_set_prio(ctx_id, wrk->prio);
>> +               }
>> +
>> +               if (ctx->wants_balance) {
>> +                       I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, 2) = {
>> +                               .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
>> +                               .num_siblings = 2,
>> +                               .engines = {
>> +                                       { .engine_class = I915_ENGINE_CLASS_VIDEO,
>> +                                         .engine_instance = 0 },
>> +                                       { .engine_class = I915_ENGINE_CLASS_VIDEO,
>> +                                         .engine_instance = 1 },
>> +                               },
>> +                       };
>> +                       I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, 3) = {
>> +                               .extensions = to_user_pointer(&load_balance),
>> +                               .engines = {
>> +                                       { .engine_class = I915_ENGINE_CLASS_INVALID,
>> +                                         .engine_instance = I915_ENGINE_CLASS_INVALID_NONE },
>> +                                       { .engine_class = I915_ENGINE_CLASS_VIDEO,
>> +                                         .engine_instance = 0 },
>> +                                       { .engine_class = I915_ENGINE_CLASS_VIDEO,
>> +                                         .engine_instance = 1 },
>> +                               },
>> +                       };
>> +
>> +                       struct drm_i915_gem_context_param param = {
>> +                               .ctx_id = ctx_id,
>> +                               .param = I915_CONTEXT_PARAM_ENGINES,
>> +                               .size = sizeof(set_engines),
>> +                               .value = to_user_pointer(&set_engines),
>> +                       };
>> +
>> +                       gem_context_set_param(fd, &param);
>>                  }
> 
> if (share_vm)
> 	gem_vm_destroy(share_vm);
> 
> Just to drop the local handle as the context has acquired its own
> reference.

Well spotted!

> Other than that, it does what it sets out to do: create a context with
> choice of engines and load balancing amongst them.
> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Thanks,

Tvrtko
Chris Wilson May 13, 2019, 12:54 p.m. UTC | #4
Quoting Tvrtko Ursulin (2019-05-13 13:41:47)
> 
> On 10/05/2019 14:14, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-05-08 13:10:42)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Support i915 virtual engine from gem_wsim (-b i915) and media-bench.pl
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> ---
> >> +       /*
> >> +        * Create and configure contexts.
> >> +        */
> >> +       for (i = 0; i < wrk->nr_ctxs; i += 2) {
> >> +               struct ctx *ctx = &wrk->ctx_list[i];
> >> +               uint32_t ctx_id, share_vm = 0;
> >>   
> >> -                       wrk->ctx_list[w->context].id = arg.ctx_id;
> >> +               if (ctx->id)
> >> +                       continue;
> >>   
> >> -                       if (flags & GLOBAL_BALANCE) {
> >> -                               wrk->ctx_list[w->context].static_vcs = context_vcs_rr;
> >> -                               context_vcs_rr ^= 1;
> >> -                       } else {
> >> -                               wrk->ctx_list[w->context].static_vcs = ctx_vcs;
> >> -                               ctx_vcs ^= 1;
> >> -                       }
> >> +               if (flags & I915) {
> > 
> > vm sharing shouldn't be a i915-balancer only option. For single jobs split
> > across multiple contexts, I would expect they will want to share vm.
> 
> Could do but I wanted to limit the new features to new features. :) 
> Pencil in for later okay?

Sure. Just checking I'm in the same ballpark with my understanding. I
did hope to enable vm sharing here by default -- in reality, I doubt
these wsim are impacted by vm switches as they are tiny. However, I
don't have any measurements for shared vm, and had better start
somewhere.
-Chris
diff mbox series

Patch

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index afb9644dd7f0..1084e95fa8df 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -142,6 +142,14 @@  struct w_step
 
 DECLARE_EWMA(uint64_t, rt, 4, 2)
 
+struct ctx {
+	uint32_t id;
+	int priority;
+	bool targets_instance;
+	bool wants_balance;
+	unsigned int static_vcs;
+};
+
 struct workload
 {
 	unsigned int id;
@@ -163,11 +171,7 @@  struct workload
 	struct timespec repeat_start;
 
 	unsigned int nr_ctxs;
-	struct {
-		uint32_t id;
-		int priority;
-		unsigned int static_vcs;
-	} *ctx_list;
+	struct ctx *ctx_list;
 
 	int sync_timeline;
 	uint32_t sync_seqno;
@@ -224,6 +228,7 @@  static int fd;
 #define HEARTBEAT	(1<<7)
 #define GLOBAL_BALANCE	(1<<8)
 #define DEPSYNC		(1<<9)
+#define I915		(1<<10)
 
 #define SEQNO_IDX(engine) ((engine) * 16)
 #define SEQNO_OFFSET(engine) (SEQNO_IDX(engine) * sizeof(uint32_t))
@@ -841,7 +846,11 @@  eb_set_engine(struct drm_i915_gem_execbuffer2 *eb,
 	if (engine == VCS2 && (flags & VCS2REMAP))
 		engine = BCS;
 
-	eb->flags = eb_engine_map[engine];
+	if ((flags & I915) && engine == VCS) {
+		eb->flags = 0;
+	} else {
+		eb->flags = eb_engine_map[engine];
+	}
 }
 
 static void
@@ -867,6 +876,23 @@  get_status_objects(struct workload *wrk)
 		return wrk->status_object;
 }
 
+static struct ctx *
+__get_ctx(struct workload *wrk, struct w_step *w)
+{
+	return &wrk->ctx_list[w->context * 2];
+}
+
+static uint32_t
+get_ctxid(struct workload *wrk, struct w_step *w)
+{
+	struct ctx *ctx = __get_ctx(wrk, w);
+
+	if (ctx->targets_instance && ctx->wants_balance && w->engine == VCS)
+		return wrk->ctx_list[w->context * 2 + 1].id;
+	else
+		return wrk->ctx_list[w->context * 2].id;
+}
+
 static void
 alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
 {
@@ -919,7 +945,7 @@  alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
 
 	w->eb.buffers_ptr = to_user_pointer(w->obj);
 	w->eb.buffer_count = j + 1;
-	w->eb.rsvd1 = wrk->ctx_list[w->context].id;
+	w->eb.rsvd1 = get_ctxid(wrk, w);
 
 	if (flags & SWAPVCS && engine == VCS1)
 		engine = VCS2;
@@ -932,17 +958,29 @@  alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
 		printf("%x|", w->obj[i].handle);
 	printf(" %10lu flags=%llx bb=%x[%u] ctx[%u]=%u\n",
 		w->bb_sz, w->eb.flags, w->bb_handle, j, w->context,
-		wrk->ctx_list[w->context].id);
+		get_ctxid(wrk, w));
 #endif
 }
 
+static void __ctx_set_prio(uint32_t ctx_id, unsigned int prio)
+{
+	struct drm_i915_gem_context_param param = {
+		.ctx_id = ctx_id,
+		.param = I915_CONTEXT_PARAM_PRIORITY,
+		.value = prio,
+	};
+
+	if (prio)
+		gem_context_set_param(fd, &param);
+}
+
 static void
 prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 {
 	unsigned int ctx_vcs = 0;
 	int max_ctx = -1;
 	struct w_step *w;
-	int i;
+	int i, j;
 
 	wrk->id = id;
 	wrk->prng = rand();
@@ -973,44 +1011,183 @@  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 		}
 	}
 
+	/*
+	 * Pre-scan workload steps to allocate context list storage.
+	 */
 	for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
-		if ((int)w->context > max_ctx) {
-			int delta = w->context + 1 - wrk->nr_ctxs;
+		int ctx = w->context * 2 + 1; /* Odd slots are special. */
+		int delta;
+
+		if (ctx <= max_ctx)
+			continue;
+
+		delta = ctx + 1 - wrk->nr_ctxs;
 
-			wrk->nr_ctxs += delta;
-			wrk->ctx_list = realloc(wrk->ctx_list,
-						wrk->nr_ctxs *
-						sizeof(*wrk->ctx_list));
-			memset(&wrk->ctx_list[wrk->nr_ctxs - delta], 0,
-			       delta * sizeof(*wrk->ctx_list));
+		wrk->nr_ctxs += delta;
+		wrk->ctx_list = realloc(wrk->ctx_list,
+					wrk->nr_ctxs * sizeof(*wrk->ctx_list));
+		memset(&wrk->ctx_list[wrk->nr_ctxs - delta], 0,
+			delta * sizeof(*wrk->ctx_list));
+
+		max_ctx = ctx;
+	}
+
+	/*
+	 * Identify if contexts target specific engine instances and if they
+	 * want to be balanced.
+	 */
+	for (j = 0; j < wrk->nr_ctxs; j += 2) {
+		bool targets = false;
+		bool balance = false;
 
-			max_ctx = w->context;
+		for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
+			if (w->type != BATCH)
+				continue;
+
+			if (w->context != (j / 2))
+				continue;
+
+			if (w->engine == VCS)
+				balance = true;
+			else
+				targets = true;
 		}
 
-		if (!wrk->ctx_list[w->context].id) {
-			struct drm_i915_gem_context_create arg = {};
+		if (flags & I915) {
+			wrk->ctx_list[j].targets_instance = targets;
+			wrk->ctx_list[j].wants_balance = balance;
+		}
+	}
 
-			drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &arg);
-			igt_assert(arg.ctx_id);
+	/*
+	 * Create and configure contexts.
+	 */
+	for (i = 0; i < wrk->nr_ctxs; i += 2) {
+		struct ctx *ctx = &wrk->ctx_list[i];
+		uint32_t ctx_id, share_vm = 0;
 
-			wrk->ctx_list[w->context].id = arg.ctx_id;
+		if (ctx->id)
+			continue;
 
-			if (flags & GLOBAL_BALANCE) {
-				wrk->ctx_list[w->context].static_vcs = context_vcs_rr;
-				context_vcs_rr ^= 1;
-			} else {
-				wrk->ctx_list[w->context].static_vcs = ctx_vcs;
-				ctx_vcs ^= 1;
-			}
+		if (flags & I915) {
+			struct drm_i915_gem_context_create_ext_setparam ext = {
+				.base.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+				.param.param = I915_CONTEXT_PARAM_VM,
+			};
+			struct drm_i915_gem_context_create_ext args = { };
 
-			if (wrk->prio) {
+			/* Find existing context to share ppgtt with. */
+			for (j = 0; j < wrk->nr_ctxs; j++) {
 				struct drm_i915_gem_context_param param = {
-					.ctx_id = arg.ctx_id,
-					.param = I915_CONTEXT_PARAM_PRIORITY,
-					.value = wrk->prio,
+					.param = I915_CONTEXT_PARAM_VM,
 				};
-				gem_context_set_param(fd, &param);
+
+				if (!wrk->ctx_list[j].id)
+					continue;
+
+				param.ctx_id = wrk->ctx_list[j].id;
+
+				gem_context_get_param(fd, &param);
+				igt_assert(param.value);
+
+				share_vm = param.value;
+
+				ext.param.value = share_vm;
+				args.flags =
+				    I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS;
+				args.extensions = to_user_pointer(&ext);
+				break;
 			}
+
+			if (!ctx->targets_instance)
+				args.flags |=
+				     I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE;
+
+			drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT,
+				 &args);
+
+			ctx_id = args.ctx_id;
+		} else {
+			struct drm_i915_gem_context_create args = {};
+
+			drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &args);
+			ctx_id = args.ctx_id;
+		}
+
+		igt_assert(ctx_id);
+		ctx->id = ctx_id;
+
+		if (flags & GLOBAL_BALANCE) {
+			ctx->static_vcs = context_vcs_rr;
+			context_vcs_rr ^= 1;
+		} else {
+			ctx->static_vcs = ctx_vcs;
+			ctx_vcs ^= 1;
+		}
+
+		__ctx_set_prio(ctx_id, wrk->prio);
+
+		/*
+		 * Do we need a separate context to satisfy this workloads which
+		 * both want to target specific engines and be balanced by i915?
+		 */
+		if ((flags & I915) && ctx->wants_balance &&
+		    ctx->targets_instance) {
+			struct drm_i915_gem_context_create_ext_setparam ext = {
+				.base.name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+				.param.param = I915_CONTEXT_PARAM_VM,
+				.param.value = share_vm,
+			};
+			struct drm_i915_gem_context_create_ext args = {
+				.extensions = to_user_pointer(&ext),
+				.flags =
+				    I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS |
+				    I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
+			};
+
+			igt_assert(share_vm);
+
+			drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT,
+				 &args);
+
+			igt_assert(args.ctx_id);
+			ctx_id = args.ctx_id;
+			wrk->ctx_list[i + 1].id = args.ctx_id;
+
+			__ctx_set_prio(ctx_id, wrk->prio);
+		}
+
+		if (ctx->wants_balance) {
+			I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(load_balance, 2) = {
+				.base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE,
+				.num_siblings = 2,
+				.engines = {
+					{ .engine_class = I915_ENGINE_CLASS_VIDEO,
+					  .engine_instance = 0 },
+					{ .engine_class = I915_ENGINE_CLASS_VIDEO,
+					  .engine_instance = 1 },
+				},
+			};
+			I915_DEFINE_CONTEXT_PARAM_ENGINES(set_engines, 3) = {
+				.extensions = to_user_pointer(&load_balance),
+				.engines = {
+					{ .engine_class = I915_ENGINE_CLASS_INVALID,
+					  .engine_instance = I915_ENGINE_CLASS_INVALID_NONE },
+					{ .engine_class = I915_ENGINE_CLASS_VIDEO,
+					  .engine_instance = 0 },
+					{ .engine_class = I915_ENGINE_CLASS_VIDEO,
+					  .engine_instance = 1 },
+				},
+			};
+
+			struct drm_i915_gem_context_param param = {
+				.ctx_id = ctx_id,
+				.param = I915_CONTEXT_PARAM_ENGINES,
+				.size = sizeof(set_engines),
+				.value = to_user_pointer(&set_engines),
+			};
+
+			gem_context_set_param(fd, &param);
 		}
 	}
 
@@ -1027,7 +1204,6 @@  prepare_workload(unsigned int id, struct workload *wrk, unsigned int flags)
 	 */
 	for (i = 0, w = wrk->steps; i < wrk->nr_steps; i++, w++) {
 		struct w_step *w2;
-		int j;
 
 		if (w->type != PREEMPTION)
 			continue;
@@ -1385,7 +1561,7 @@  static enum intel_engine_id
 context_balance(const struct workload_balancer *balancer,
 		struct workload *wrk, struct w_step *w)
 {
-	return get_vcs_engine(wrk->ctx_list[w->context].static_vcs);
+	return get_vcs_engine(__get_ctx(wrk, w)->static_vcs);
 }
 
 static unsigned int
@@ -1579,6 +1755,12 @@  static const struct workload_balancer all_balancers[] = {
 		.get_qd = get_engine_busy,
 		.balance = busy_avg_balance,
 	},
+	{
+		.id = 11,
+		.name = "i915",
+		.desc = "i915 balancing.",
+		.flags = I915,
+	},
 };
 
 static unsigned int
@@ -1957,7 +2139,8 @@  static void *run_workload(void *data)
 			last_sync = false;
 
 			wrk->nr_bb[engine]++;
-			if (engine == VCS && wrk->balancer) {
+			if (engine == VCS && wrk->balancer &&
+			    wrk->balancer->balance) {
 				engine = wrk->balancer->balance(wrk->balancer,
 								wrk, w);
 				wrk->nr_bb[engine]++;
@@ -2384,6 +2567,12 @@  int main(int argc, char **argv)
 		return 1;
 	}
 
+	if ((flags & VCS2REMAP) && (flags & I915)) {
+		if (verbose)
+			fprintf(stderr, "VCS remapping not supported with i915 balancing!\n");
+		return 1;
+	}
+
 	if (!nop_calibration) {
 		if (verbose > 1)
 			printf("Calibrating nop delay with %u%% tolerance...\n",
@@ -2469,11 +2658,17 @@  int main(int argc, char **argv)
 		printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
 		if (flags & SWAPVCS)
 			printf("Swapping VCS rings between clients.\n");
-		if (flags & GLOBAL_BALANCE)
-			printf("Using %s balancer in global mode.\n",
-			       balancer->name);
-		else if (balancer)
+		if (flags & GLOBAL_BALANCE) {
+			if (flags & I915) {
+				printf("Ignoring global balancing with i915!\n");
+				flags &= ~GLOBAL_BALANCE;
+			} else {
+				printf("Using %s balancer in global mode.\n",
+				       balancer->name);
+			}
+		} else if (balancer) {
 			printf("Using %s balancer.\n", balancer->name);
+		}
 	}
 
 	if (master_workload >= 0 && clients == 1)
@@ -2490,7 +2685,7 @@  int main(int argc, char **argv)
 		if (flags & SWAPVCS && i & 1)
 			flags_ &= ~SWAPVCS;
 
-		if (flags & GLOBAL_BALANCE) {
+		if ((flags & GLOBAL_BALANCE) && !(flags & I915)) {
 			w[i]->balancer = &global_balancer;
 			w[i]->global_wrk = w[0];
 			w[i]->global_balancer = balancer;
diff --git a/scripts/media-bench.pl b/scripts/media-bench.pl
index 066b542f95df..ddf9c0ec05c8 100755
--- a/scripts/media-bench.pl
+++ b/scripts/media-bench.pl
@@ -49,10 +49,11 @@  my $nop;
 my %opts;
 
 my @balancers = ( 'rr', 'rand', 'qd', 'qdr', 'qdavg', 'rt', 'rtr', 'rtavg',
-		  'context', 'busy', 'busy-avg' );
+		  'context', 'busy', 'busy-avg', 'i915' );
 my %bal_skip_H = ( 'rr' => 1, 'rand' => 1, 'context' => 1, , 'busy' => 1,
-		   'busy-avg' => 1 );
-my %bal_skip_R = ( 'context' => 1 );
+		   'busy-avg' => 1, 'i915' => 1 );
+my %bal_skip_R = ( 'context' => 1, 'i915' => 1 );
+my %bal_skip_G = ( 'i915' => 1 );
 
 my @workloads = (
 	'media_load_balance_17i7.wsim',
@@ -498,6 +499,8 @@  foreach my $wrk (@saturation_workloads) {
 				my $bid;
 
 				if ($bal ne '') {
+					next GBAL if $G =~ '-G' and exists $bal_skip_G{$bal};
+
 					push @xargs, "-b $bal";
 					push @xargs, '-R' unless exists $bal_skip_R{$bal};
 					push @xargs, $G if $G ne '';